diff options
Diffstat (limited to 'fs')
179 files changed, 32778 insertions, 4699 deletions
diff --git a/fs/Kconfig b/fs/Kconfig index 80af05163579..4d90075f54bf 100644 --- a/fs/Kconfig +++ b/fs/Kconfig @@ -103,6 +103,7 @@ if BLOCK menu "DOS/FAT/NT Filesystems" source "fs/fat/Kconfig" +source "fs/sdfat/Kconfig" source "fs/ntfs/Kconfig" endmenu @@ -284,4 +285,9 @@ endif # NETWORK_FILESYSTEMS source "fs/nls/Kconfig" source "fs/dlm/Kconfig" +config FILE_TABLE_DEBUG + bool "Enable FILE_TABLE_DEBUG" + help + This option enables debug of the open files using a global filetable + endmenu diff --git a/fs/Makefile b/fs/Makefile index 4644db462ba9..debf0742b3c9 100644 --- a/fs/Makefile +++ b/fs/Makefile @@ -77,6 +77,7 @@ obj-$(CONFIG_HUGETLBFS) += hugetlbfs/ obj-$(CONFIG_CODA_FS) += coda/ obj-$(CONFIG_MINIX_FS) += minix/ obj-$(CONFIG_FAT_FS) += fat/ +obj-$(CONFIG_SDFAT_FS) += sdfat/ obj-$(CONFIG_BFS_FS) += bfs/ obj-$(CONFIG_ISO9660_FS) += isofs/ obj-$(CONFIG_HFSPLUS_FS) += hfsplus/ # Before hfs to find wrapped HFS+ @@ -263,6 +263,7 @@ static int __init aio_setup(void) aio_mnt = kern_mount(&aio_fs); if (IS_ERR(aio_mnt)) panic("Failed to create aio fs mount."); + aio_mnt->mnt_flags |= MNT_NOEXEC; kiocb_cachep = KMEM_CACHE(aio_kiocb, SLAB_HWCACHE_ALIGN|SLAB_PANIC); kioctx_cachep = KMEM_CACHE(kioctx,SLAB_HWCACHE_ALIGN|SLAB_PANIC); @@ -1338,7 +1339,7 @@ static long read_events(struct kioctx *ctx, long min_nr, long nr, SYSCALL_DEFINE2(io_setup, unsigned, nr_events, aio_context_t __user *, ctxp) { struct kioctx *ioctx = NULL; - unsigned long ctx; + unsigned long ctx = 0; long ret; ret = get_user(ctx, ctxp); @@ -1471,6 +1472,7 @@ rw_common: len = ret; + get_file(file); if (rw == WRITE) file_start_write(file); @@ -1478,6 +1480,7 @@ rw_common: if (rw == WRITE) file_end_write(file); + fput(file); kfree(iovec); break; diff --git a/fs/block_dev.c b/fs/block_dev.c index a71d442ef7d0..83c007612a68 100644 --- a/fs/block_dev.c +++ b/fs/block_dev.c @@ -532,6 +532,7 @@ static void init_once(void *foo) #ifdef CONFIG_SYSFS INIT_LIST_HEAD(&bdev->bd_holder_disks); #endif + bdev->bd_bdi = &noop_backing_dev_info; inode_init_once(&ei->vfs_inode); /* Initialize mutex for freeze. */ mutex_init(&bdev->bd_fsfreeze_mutex); @@ -557,6 +558,12 @@ static void bdev_evict_inode(struct inode *inode) } list_del_init(&bdev->bd_list); spin_unlock(&bdev_lock); + /* Detach inode from wb early as bdi_put() may free bdi->wb */ + inode_detach_wb(inode); + if (bdev->bd_bdi != &noop_backing_dev_info) { + bdi_put(bdev->bd_bdi); + bdev->bd_bdi = &noop_backing_dev_info; + } } static const struct super_operations bdev_sops = { @@ -623,6 +630,21 @@ static int bdev_set(struct inode *inode, void *data) static LIST_HEAD(all_bdevs); +/* + * If there is a bdev inode for this device, unhash it so that it gets evicted + * as soon as last inode reference is dropped. + */ +void bdev_unhash_inode(dev_t dev) +{ + struct inode *inode; + + inode = ilookup5(blockdev_superblock, hash(dev), bdev_test, &dev); + if (inode) { + remove_inode_hash(inode); + iput(inode); + } +} + struct block_device *bdget(dev_t dev) { struct block_device *bdev; @@ -1199,6 +1221,7 @@ static int __blkdev_get(struct block_device *bdev, fmode_t mode, int for_part) bdev->bd_disk = disk; bdev->bd_queue = disk->queue; bdev->bd_contains = bdev; + bdev->bd_inode->i_flags = disk->fops->direct_access ? S_DAX : 0; if (!partno) { ret = -ENXIO; @@ -1271,6 +1294,9 @@ static int __blkdev_get(struct block_device *bdev, fmode_t mode, int for_part) (bdev->bd_part->nr_sects % (PAGE_SIZE / 512))) bdev->bd_inode->i_flags &= ~S_DAX; } + + if (bdev->bd_bdi == &noop_backing_dev_info) + bdev->bd_bdi = bdi_get(disk->queue->backing_dev_info); } else { if (bdev->bd_contains == bdev) { ret = 0; @@ -1535,12 +1561,6 @@ static void __blkdev_put(struct block_device *bdev, fmode_t mode, int for_part) kill_bdev(bdev); bdev_write_inode(bdev); - /* - * Detaching bdev inode from its wb in __destroy_inode() - * is too late: the queue which embeds its bdi (along with - * root wb) can be gone as soon as we put_disk() below. - */ - inode_detach_wb(bdev->bd_inode); } if (bdev->bd_contains == bdev) { if (disk->fops->release) diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c index b28bc7690d4b..42c745dbccfa 100644 --- a/fs/btrfs/extent_io.c +++ b/fs/btrfs/extent_io.c @@ -3951,8 +3951,8 @@ retry: if (wbc->sync_mode == WB_SYNC_ALL) tag_pages_for_writeback(mapping, index, end); while (!done && !nr_to_write_done && (index <= end) && - (nr_pages = pagevec_lookup_tag(&pvec, mapping, &index, tag, - min(end - index, (pgoff_t)PAGEVEC_SIZE-1) + 1))) { + (nr_pages = pagevec_lookup_range_tag(&pvec, mapping, &index, end, + tag))) { unsigned i; scanned = 1; @@ -3962,11 +3962,6 @@ retry: if (!PagePrivate(page)) continue; - if (!wbc->range_cyclic && page->index > end) { - done = 1; - break; - } - spin_lock(&mapping->private_lock); if (!PagePrivate(page)) { spin_unlock(&mapping->private_lock); @@ -4099,8 +4094,8 @@ retry: if (wbc->sync_mode == WB_SYNC_ALL) tag_pages_for_writeback(mapping, index, end); while (!done && !nr_to_write_done && (index <= end) && - (nr_pages = pagevec_lookup_tag(&pvec, mapping, &index, tag, - min(end - index, (pgoff_t)PAGEVEC_SIZE-1) + 1))) { + (nr_pages = pagevec_lookup_range_tag(&pvec, mapping, + &index, end, tag))) { unsigned i; scanned = 1; @@ -4124,12 +4119,6 @@ retry: continue; } - if (!wbc->range_cyclic && page->index > end) { - done = 1; - unlock_page(page); - continue; - } - if (wbc->sync_mode != WB_SYNC_NONE) { if (PageWriteback(page)) flush_fn(data); diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index da4ad006739d..ca30e3670b42 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -6473,7 +6473,8 @@ static int btrfs_mknod(struct inode *dir, struct dentry *dentry, goto out_unlock_inode; } else { btrfs_update_inode(trans, root, inode); - d_instantiate_new(dentry, inode); + unlock_new_inode(inode); + d_instantiate(dentry, inode); } out_unlock: @@ -6548,7 +6549,8 @@ static int btrfs_create(struct inode *dir, struct dentry *dentry, goto out_unlock_inode; BTRFS_I(inode)->io_tree.ops = &btrfs_extent_io_ops; - d_instantiate_new(dentry, inode); + unlock_new_inode(inode); + d_instantiate(dentry, inode); out_unlock: btrfs_end_transaction(trans, root); @@ -6691,7 +6693,12 @@ static int btrfs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode) if (err) goto out_fail_inode; - d_instantiate_new(dentry, inode); + d_instantiate(dentry, inode); + /* + * mkdir is special. We're unlocking after we call d_instantiate + * to avoid a race with nfsd calling d_instantiate. + */ + unlock_new_inode(inode); drop_on_err = 0; out_fail: @@ -9850,7 +9857,8 @@ static int btrfs_symlink(struct inode *dir, struct dentry *dentry, goto out_unlock_inode; } - d_instantiate_new(dentry, inode); + unlock_new_inode(inode); + d_instantiate(dentry, inode); out_unlock: btrfs_end_transaction(trans, root); diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c index ff4df1783219..0cbc7af54d0b 100644 --- a/fs/btrfs/tree-log.c +++ b/fs/btrfs/tree-log.c @@ -3676,7 +3676,7 @@ static noinline int copy_items(struct btrfs_trans_handle *trans, src_offset = btrfs_item_ptr_offset(src, start_slot + i); - if ((i == (nr - 1))) + if (i == nr - 1) last_key = ins_keys[i]; if (ins_keys[i].type == BTRFS_INODE_ITEM_KEY) { diff --git a/fs/buffer.c b/fs/buffer.c index f278e27bd8c0..8ee41694a5d4 100644 --- a/fs/buffer.c +++ b/fs/buffer.c @@ -621,6 +621,18 @@ void mark_buffer_dirty_inode(struct buffer_head *bh, struct inode *inode) } EXPORT_SYMBOL(mark_buffer_dirty_inode); +#ifdef CONFIG_BLK_DEV_IO_TRACE +static inline void save_dirty_task(struct page *page) +{ + /* Save the task that is dirtying this page */ + page->tsk_dirty = current; +} +#else +static inline void save_dirty_task(struct page *page) +{ +} +#endif + /* * Mark the page dirty, and set it dirty in the radix tree, and mark the inode * dirty. @@ -641,6 +653,7 @@ static void __set_page_dirty(struct page *page, struct address_space *mapping, account_page_dirtied(page, mapping, memcg); radix_tree_tag_set(&mapping->page_tree, page_index(page), PAGECACHE_TAG_DIRTY); + save_dirty_task(page); } spin_unlock_irqrestore(&mapping->tree_lock, flags); } @@ -1466,12 +1479,48 @@ static bool has_bh_in_lru(int cpu, void *dummy) return 0; } +static void __evict_bh_lru(void *arg) +{ + struct bh_lru *b = &get_cpu_var(bh_lrus); + struct buffer_head *bh = arg; + int i; + + for (i = 0; i < BH_LRU_SIZE; i++) { + if (b->bhs[i] == bh) { + brelse(b->bhs[i]); + b->bhs[i] = NULL; + goto out; + } + } +out: + put_cpu_var(bh_lrus); +} + +static bool bh_exists_in_lru(int cpu, void *arg) +{ + struct bh_lru *b = per_cpu_ptr(&bh_lrus, cpu); + struct buffer_head *bh = arg; + int i; + + for (i = 0; i < BH_LRU_SIZE; i++) { + if (b->bhs[i] == bh) + return 1; + } + + return 0; + +} void invalidate_bh_lrus(void) { on_each_cpu_cond(has_bh_in_lru, invalidate_bh_lru, NULL, 1, GFP_KERNEL); } EXPORT_SYMBOL_GPL(invalidate_bh_lrus); +static void evict_bh_lrus(struct buffer_head *bh) +{ + on_each_cpu_cond(bh_exists_in_lru, __evict_bh_lru, bh, 1, GFP_ATOMIC); +} + void set_bh_page(struct buffer_head *bh, struct page *page, unsigned long offset) { @@ -3199,8 +3248,15 @@ drop_buffers(struct page *page, struct buffer_head **buffers_to_free) do { if (buffer_write_io_error(bh) && page->mapping) set_bit(AS_EIO, &page->mapping->flags); - if (buffer_busy(bh)) - goto failed; + if (buffer_busy(bh)) { + /* + * Check if the busy failure was due to an + * outstanding LRU reference + */ + evict_bh_lrus(bh); + if (buffer_busy(bh)) + goto failed; + } bh = bh->b_this_page; } while (bh != head); diff --git a/fs/ceph/addr.c b/fs/ceph/addr.c index 26de74684c17..bec37093a3de 100644 --- a/fs/ceph/addr.c +++ b/fs/ceph/addr.c @@ -778,8 +778,7 @@ retry: struct page **pages = NULL; mempool_t *pool = NULL; /* Becomes non-null if mempool used */ struct page *page; - int want; - u64 offset, len; + u64 offset = 0, len = 0; long writeback_stat; next = 0; @@ -788,14 +787,9 @@ retry: get_more_pages: first = -1; - want = min(end - index, - min((pgoff_t)PAGEVEC_SIZE, - max_pages - (pgoff_t)locked_pages) - 1) - + 1; - pvec_pages = pagevec_lookup_tag(&pvec, mapping, &index, - PAGECACHE_TAG_DIRTY, - want); - dout("pagevec_lookup_tag got %d\n", pvec_pages); + pvec_pages = pagevec_lookup_range_tag(&pvec, mapping, &index, + end, PAGECACHE_TAG_DIRTY); + dout("pagevec_lookup_range_tag got %d\n", pvec_pages); if (!pvec_pages && !locked_pages) break; for (i = 0; i < pvec_pages && locked_pages < max_pages; i++) { diff --git a/fs/cifs/file.c b/fs/cifs/file.c index 5bc617cb7721..62cc0c22db63 100644 --- a/fs/cifs/file.c +++ b/fs/cifs/file.c @@ -3452,13 +3452,13 @@ readpages_get_pages(struct address_space *mapping, struct list_head *page_list, * should have access to this page, we're safe to simply set * PG_locked without checking it first. */ - __set_page_locked(page); + __SetPageLocked(page); rc = add_to_page_cache_locked(page, mapping, page->index, gfp); /* give up if we can't stick it in the cache */ if (rc) { - __clear_page_locked(page); + __ClearPageLocked(page); return rc; } @@ -3479,9 +3479,9 @@ readpages_get_pages(struct address_space *mapping, struct list_head *page_list, if (*bytes + PAGE_CACHE_SIZE > rsize) break; - __set_page_locked(page); + __SetPageLocked(page); if (add_to_page_cache_locked(page, mapping, page->index, gfp)) { - __clear_page_locked(page); + __ClearPageLocked(page); break; } list_move_tail(&page->lru, tmplist); diff --git a/fs/compat_ioctl.c b/fs/compat_ioctl.c index 5af973621c73..2acdc6ddefb5 100644 --- a/fs/compat_ioctl.c +++ b/fs/compat_ioctl.c @@ -871,6 +871,9 @@ COMPATIBLE_IOCTL(TIOCGPTN) COMPATIBLE_IOCTL(TIOCSPTLCK) COMPATIBLE_IOCTL(TIOCSERGETLSR) COMPATIBLE_IOCTL(TIOCSIG) +COMPATIBLE_IOCTL(TIOCPMGET) +COMPATIBLE_IOCTL(TIOCPMPUT) +COMPATIBLE_IOCTL(TIOCPMACT) #ifdef TIOCSRS485 COMPATIBLE_IOCTL(TIOCSRS485) #endif diff --git a/fs/crypto/crypto.c b/fs/crypto/crypto.c index 0758d32ad01b..0f46cf550907 100644 --- a/fs/crypto/crypto.c +++ b/fs/crypto/crypto.c @@ -162,12 +162,8 @@ int fscrypt_do_page_crypto(const struct inode *inode, fscrypt_direction_t rw, } req = skcipher_request_alloc(tfm, gfp_flags); - if (!req) { - printk_ratelimited(KERN_ERR - "%s: crypto_request_alloc() failed\n", - __func__); + if (!req) return -ENOMEM; - } skcipher_request_set_callback( req, CRYPTO_TFM_REQ_MAY_BACKLOG | CRYPTO_TFM_REQ_MAY_SLEEP, @@ -184,9 +180,10 @@ int fscrypt_do_page_crypto(const struct inode *inode, fscrypt_direction_t rw, res = crypto_wait_req(crypto_skcipher_encrypt(req), &wait); skcipher_request_free(req); if (res) { - printk_ratelimited(KERN_ERR - "%s: crypto_skcipher_encrypt() returned %d\n", - __func__, res); + fscrypt_err(inode->i_sb, + "%scryption failed for inode %lu, block %llu: %d", + (rw == FS_DECRYPT ? "de" : "en"), + inode->i_ino, lblk_num, res); return res; } return 0; @@ -332,7 +329,6 @@ static int fscrypt_d_revalidate(struct dentry *dentry, unsigned int flags) return 0; } - /* this should eventually be an flag in d_flags */ spin_lock(&dentry->d_lock); cached_with_key = dentry->d_flags & DCACHE_ENCRYPTED_WITH_KEY; spin_unlock(&dentry->d_lock); @@ -359,7 +355,6 @@ static int fscrypt_d_revalidate(struct dentry *dentry, unsigned int flags) const struct dentry_operations fscrypt_d_ops = { .d_revalidate = fscrypt_d_revalidate, }; -EXPORT_SYMBOL(fscrypt_d_ops); void fscrypt_restore_control_page(struct page *page) { @@ -428,13 +423,43 @@ fail: return res; } +void fscrypt_msg(struct super_block *sb, const char *level, + const char *fmt, ...) +{ + static DEFINE_RATELIMIT_STATE(rs, DEFAULT_RATELIMIT_INTERVAL, + DEFAULT_RATELIMIT_BURST); + struct va_format vaf; + va_list args; + + if (!__ratelimit(&rs)) + return; + + va_start(args, fmt); + vaf.fmt = fmt; + vaf.va = &args; + if (sb) + printk("%sfscrypt (%s): %pV\n", level, sb->s_id, &vaf); + else + printk("%sfscrypt: %pV\n", level, &vaf); + va_end(args); +} + /** * fscrypt_init() - Set up for fs encryption. */ static int __init fscrypt_init(void) { + /* + * Use an unbound workqueue to allow bios to be decrypted in parallel + * even when they happen to complete on the same CPU. This sacrifices + * locality, but it's worthwhile since decryption is CPU-intensive. + * + * Also use a high-priority workqueue to prioritize decryption work, + * which blocks reads from completing, over regular application tasks. + */ fscrypt_read_workqueue = alloc_workqueue("fscrypt_read_queue", - WQ_HIGHPRI, 0); + WQ_UNBOUND | WQ_HIGHPRI, + num_online_cpus()); if (!fscrypt_read_workqueue) goto fail; diff --git a/fs/crypto/fname.c b/fs/crypto/fname.c index b18fa323d1d9..1bdb9f226eec 100644 --- a/fs/crypto/fname.c +++ b/fs/crypto/fname.c @@ -58,11 +58,8 @@ int fname_encrypt(struct inode *inode, const struct qstr *iname, /* Set up the encryption request */ req = skcipher_request_alloc(tfm, GFP_NOFS); - if (!req) { - printk_ratelimited(KERN_ERR - "%s: skcipher_request_alloc() failed\n", __func__); + if (!req) return -ENOMEM; - } skcipher_request_set_callback(req, CRYPTO_TFM_REQ_MAY_BACKLOG | CRYPTO_TFM_REQ_MAY_SLEEP, crypto_req_done, &wait); @@ -73,8 +70,9 @@ int fname_encrypt(struct inode *inode, const struct qstr *iname, res = crypto_wait_req(crypto_skcipher_encrypt(req), &wait); skcipher_request_free(req); if (res < 0) { - printk_ratelimited(KERN_ERR - "%s: Error (error code %d)\n", __func__, res); + fscrypt_err(inode->i_sb, + "Filename encryption failed for inode %lu: %d", + inode->i_ino, res); return res; } @@ -95,23 +93,14 @@ static int fname_decrypt(struct inode *inode, struct skcipher_request *req = NULL; DECLARE_CRYPTO_WAIT(wait); struct scatterlist src_sg, dst_sg; - struct fscrypt_info *ci = inode->i_crypt_info; - struct crypto_skcipher *tfm = ci->ci_ctfm; + struct crypto_skcipher *tfm = inode->i_crypt_info->ci_ctfm; int res = 0; char iv[FS_CRYPTO_BLOCK_SIZE]; - unsigned lim; - - lim = inode->i_sb->s_cop->max_namelen(inode); - if (iname->len <= 0 || iname->len > lim) - return -EIO; /* Allocate request */ req = skcipher_request_alloc(tfm, GFP_NOFS); - if (!req) { - printk_ratelimited(KERN_ERR - "%s: crypto_request_alloc() failed\n", __func__); + if (!req) return -ENOMEM; - } skcipher_request_set_callback(req, CRYPTO_TFM_REQ_MAY_BACKLOG | CRYPTO_TFM_REQ_MAY_SLEEP, crypto_req_done, &wait); @@ -126,8 +115,9 @@ static int fname_decrypt(struct inode *inode, res = crypto_wait_req(crypto_skcipher_decrypt(req), &wait); skcipher_request_free(req); if (res < 0) { - printk_ratelimited(KERN_ERR - "%s: Error (error code %d)\n", __func__, res); + fscrypt_err(inode->i_sb, + "Filename decryption failed for inode %lu: %d", + inode->i_ino, res); return res; } @@ -340,12 +330,12 @@ int fscrypt_setup_filename(struct inode *dir, const struct qstr *iname, return 0; } ret = fscrypt_get_encryption_info(dir); - if (ret && ret != -EOPNOTSUPP) + if (ret) return ret; if (dir->i_crypt_info) { if (!fscrypt_fname_encrypted_size(dir, iname->len, - dir->i_sb->s_cop->max_namelen(dir), + dir->i_sb->s_cop->max_namelen, &fname->crypto_buf.len)) return -ENAMETOOLONG; fname->crypto_buf.name = kmalloc(fname->crypto_buf.len, diff --git a/fs/crypto/fscrypt_private.h b/fs/crypto/fscrypt_private.h index fe6f6524c1aa..92c6c0ace1b1 100644 --- a/fs/crypto/fscrypt_private.h +++ b/fs/crypto/fscrypt_private.h @@ -17,15 +17,7 @@ /* Encryption parameters */ #define FS_IV_SIZE 16 -#define FS_AES_128_ECB_KEY_SIZE 16 -#define FS_AES_128_CBC_KEY_SIZE 16 -#define FS_AES_128_CTS_KEY_SIZE 16 -#define FS_AES_256_GCM_KEY_SIZE 32 -#define FS_AES_256_CBC_KEY_SIZE 32 -#define FS_AES_256_CTS_KEY_SIZE 32 -#define FS_AES_256_XTS_KEY_SIZE 64 - -#define FS_KEY_DERIVATION_NONCE_SIZE 16 +#define FS_KEY_DERIVATION_NONCE_SIZE 16 /** * Encryption context for inode @@ -101,10 +93,6 @@ static inline bool fscrypt_valid_enc_modes(u32 contents_mode, filenames_mode == FS_ENCRYPTION_MODE_AES_256_CTS) return true; - if (contents_mode == FS_ENCRYPTION_MODE_SPECK128_256_XTS && - filenames_mode == FS_ENCRYPTION_MODE_SPECK128_256_CTS) - return true; - return false; } @@ -119,6 +107,15 @@ extern int fscrypt_do_page_crypto(const struct inode *inode, gfp_t gfp_flags); extern struct page *fscrypt_alloc_bounce_page(struct fscrypt_ctx *ctx, gfp_t gfp_flags); +extern const struct dentry_operations fscrypt_d_ops; + +extern void __printf(3, 4) __cold +fscrypt_msg(struct super_block *sb, const char *level, const char *fmt, ...); + +#define fscrypt_warn(sb, fmt, ...) \ + fscrypt_msg(sb, KERN_WARNING, fmt, ##__VA_ARGS__) +#define fscrypt_err(sb, fmt, ...) \ + fscrypt_msg(sb, KERN_ERR, fmt, ##__VA_ARGS__) /* fname.c */ extern int fname_encrypt(struct inode *inode, const struct qstr *iname, diff --git a/fs/crypto/hooks.c b/fs/crypto/hooks.c index bc010e4609ef..b5328a0c6364 100644 --- a/fs/crypto/hooks.c +++ b/fs/crypto/hooks.c @@ -39,8 +39,9 @@ int fscrypt_file_open(struct inode *inode, struct file *filp) dir = dget_parent(file_dentry(filp)); if (IS_ENCRYPTED(d_inode(dir)) && !fscrypt_has_permitted_context(d_inode(dir), inode)) { - pr_warn_ratelimited("fscrypt: inconsistent encryption contexts: %lu/%lu", - d_inode(dir)->i_ino, inode->i_ino); + fscrypt_warn(inode->i_sb, + "inconsistent encryption contexts: %lu/%lu", + d_inode(dir)->i_ino, inode->i_ino); err = -EPERM; } dput(dir); diff --git a/fs/crypto/keyinfo.c b/fs/crypto/keyinfo.c index 472f69188a96..3cbe0eff8767 100644 --- a/fs/crypto/keyinfo.c +++ b/fs/crypto/keyinfo.c @@ -18,17 +18,16 @@ static struct crypto_shash *essiv_hash_tfm; -/** - * derive_key_aes() - Derive a key using AES-128-ECB - * @deriving_key: Encryption key used for derivation. - * @source_key: Source key to which to apply derivation. - * @derived_raw_key: Derived raw key. +/* + * Key derivation function. This generates the derived key by encrypting the + * master key with AES-128-ECB using the inode's nonce as the AES key. * - * Return: Zero on success; non-zero otherwise. + * The master key must be at least as long as the derived key. If the master + * key is longer, then only the first 'derived_keysize' bytes are used. */ -static int derive_key_aes(u8 deriving_key[FS_AES_128_ECB_KEY_SIZE], - const struct fscrypt_key *source_key, - u8 derived_raw_key[FS_MAX_KEY_SIZE]) +static int derive_key_aes(const u8 *master_key, + const struct fscrypt_context *ctx, + u8 *derived_key, unsigned int derived_keysize) { int res = 0; struct skcipher_request *req = NULL; @@ -50,14 +49,13 @@ static int derive_key_aes(u8 deriving_key[FS_AES_128_ECB_KEY_SIZE], skcipher_request_set_callback(req, CRYPTO_TFM_REQ_MAY_BACKLOG | CRYPTO_TFM_REQ_MAY_SLEEP, crypto_req_done, &wait); - res = crypto_skcipher_setkey(tfm, deriving_key, - FS_AES_128_ECB_KEY_SIZE); + res = crypto_skcipher_setkey(tfm, ctx->nonce, sizeof(ctx->nonce)); if (res < 0) goto out; - sg_init_one(&src_sg, source_key->raw, source_key->size); - sg_init_one(&dst_sg, derived_raw_key, source_key->size); - skcipher_request_set_crypt(req, &src_sg, &dst_sg, source_key->size, + sg_init_one(&src_sg, master_key, derived_keysize); + sg_init_one(&dst_sg, derived_key, derived_keysize); + skcipher_request_set_crypt(req, &src_sg, &dst_sg, derived_keysize, NULL); res = crypto_wait_req(crypto_skcipher_encrypt(req), &wait); out: @@ -66,103 +64,137 @@ out: return res; } -static int validate_user_key(struct fscrypt_info *crypt_info, - struct fscrypt_context *ctx, u8 *raw_key, - const char *prefix, int min_keysize) +/* + * Search the current task's subscribed keyrings for a "logon" key with + * description prefix:descriptor, and if found acquire a read lock on it and + * return a pointer to its validated payload in *payload_ret. + */ +static struct key * +find_and_lock_process_key(const char *prefix, + const u8 descriptor[FS_KEY_DESCRIPTOR_SIZE], + unsigned int min_keysize, + const struct fscrypt_key **payload_ret) { char *description; - struct key *keyring_key; - struct fscrypt_key *master_key; + struct key *key; const struct user_key_payload *ukp; - int res; + const struct fscrypt_key *payload; description = kasprintf(GFP_NOFS, "%s%*phN", prefix, - FS_KEY_DESCRIPTOR_SIZE, - ctx->master_key_descriptor); + FS_KEY_DESCRIPTOR_SIZE, descriptor); if (!description) - return -ENOMEM; + return ERR_PTR(-ENOMEM); - keyring_key = request_key(&key_type_logon, description, NULL); + key = request_key(&key_type_logon, description, NULL); kfree(description); - if (IS_ERR(keyring_key)) - return PTR_ERR(keyring_key); - down_read(&keyring_key->sem); - - if (keyring_key->type != &key_type_logon) { - printk_once(KERN_WARNING - "%s: key type must be logon\n", __func__); - res = -ENOKEY; - goto out; - } - ukp = user_key_payload(keyring_key); - if (!ukp) { - /* key was revoked before we acquired its semaphore */ - res = -EKEYREVOKED; - goto out; + if (IS_ERR(key)) + return key; + + down_read(&key->sem); + ukp = user_key_payload(key); + + if (!ukp) /* was the key revoked before we acquired its semaphore? */ + goto invalid; + + payload = (const struct fscrypt_key *)ukp->data; + + if (ukp->datalen != sizeof(struct fscrypt_key) || + payload->size < 1 || payload->size > FS_MAX_KEY_SIZE) { + fscrypt_warn(NULL, + "key with description '%s' has invalid payload", + key->description); + goto invalid; } - if (ukp->datalen != sizeof(struct fscrypt_key)) { - res = -EINVAL; - goto out; + + if (payload->size < min_keysize) { + fscrypt_warn(NULL, + "key with description '%s' is too short (got %u bytes, need %u+ bytes)", + key->description, payload->size, min_keysize); + goto invalid; } - master_key = (struct fscrypt_key *)ukp->data; - BUILD_BUG_ON(FS_AES_128_ECB_KEY_SIZE != FS_KEY_DERIVATION_NONCE_SIZE); - - if (master_key->size < min_keysize || master_key->size > FS_MAX_KEY_SIZE - || master_key->size % AES_BLOCK_SIZE != 0) { - printk_once(KERN_WARNING - "%s: key size incorrect: %d\n", - __func__, master_key->size); - res = -ENOKEY; - goto out; + + *payload_ret = payload; + return key; + +invalid: + up_read(&key->sem); + key_put(key); + return ERR_PTR(-ENOKEY); +} + +/* Find the master key, then derive the inode's actual encryption key */ +static int find_and_derive_key(const struct inode *inode, + const struct fscrypt_context *ctx, + u8 *derived_key, unsigned int derived_keysize) +{ + struct key *key; + const struct fscrypt_key *payload; + int err; + + key = find_and_lock_process_key(FS_KEY_DESC_PREFIX, + ctx->master_key_descriptor, + derived_keysize, &payload); + if (key == ERR_PTR(-ENOKEY) && inode->i_sb->s_cop->key_prefix) { + key = find_and_lock_process_key(inode->i_sb->s_cop->key_prefix, + ctx->master_key_descriptor, + derived_keysize, &payload); } - res = derive_key_aes(ctx->nonce, master_key, raw_key); -out: - up_read(&keyring_key->sem); - key_put(keyring_key); - return res; + if (IS_ERR(key)) + return PTR_ERR(key); + err = derive_key_aes(payload->raw, ctx, derived_key, derived_keysize); + up_read(&key->sem); + key_put(key); + return err; } -static const struct { +static struct fscrypt_mode { + const char *friendly_name; const char *cipher_str; int keysize; + bool logged_impl_name; } available_modes[] = { - [FS_ENCRYPTION_MODE_AES_256_XTS] = { "xts(aes)", - FS_AES_256_XTS_KEY_SIZE }, - [FS_ENCRYPTION_MODE_AES_256_CTS] = { "cts(cbc(aes))", - FS_AES_256_CTS_KEY_SIZE }, - [FS_ENCRYPTION_MODE_AES_128_CBC] = { "cbc(aes)", - FS_AES_128_CBC_KEY_SIZE }, - [FS_ENCRYPTION_MODE_AES_128_CTS] = { "cts(cbc(aes))", - FS_AES_128_CTS_KEY_SIZE }, - [FS_ENCRYPTION_MODE_SPECK128_256_XTS] = { "xts(speck128)", 64 }, - [FS_ENCRYPTION_MODE_SPECK128_256_CTS] = { "cts(cbc(speck128))", 32 }, + [FS_ENCRYPTION_MODE_AES_256_XTS] = { + .friendly_name = "AES-256-XTS", + .cipher_str = "xts(aes)", + .keysize = 64, + }, + [FS_ENCRYPTION_MODE_AES_256_CTS] = { + .friendly_name = "AES-256-CTS-CBC", + .cipher_str = "cts(cbc(aes))", + .keysize = 32, + }, + [FS_ENCRYPTION_MODE_AES_128_CBC] = { + .friendly_name = "AES-128-CBC", + .cipher_str = "cbc(aes)", + .keysize = 16, + }, + [FS_ENCRYPTION_MODE_AES_128_CTS] = { + .friendly_name = "AES-128-CTS-CBC", + .cipher_str = "cts(cbc(aes))", + .keysize = 16, + }, }; -static int determine_cipher_type(struct fscrypt_info *ci, struct inode *inode, - const char **cipher_str_ret, int *keysize_ret) +static struct fscrypt_mode * +select_encryption_mode(const struct fscrypt_info *ci, const struct inode *inode) { - u32 mode; - if (!fscrypt_valid_enc_modes(ci->ci_data_mode, ci->ci_filename_mode)) { - pr_warn_ratelimited("fscrypt: inode %lu uses unsupported encryption modes (contents mode %d, filenames mode %d)\n", - inode->i_ino, - ci->ci_data_mode, ci->ci_filename_mode); - return -EINVAL; + fscrypt_warn(inode->i_sb, + "inode %lu uses unsupported encryption modes (contents mode %d, filenames mode %d)", + inode->i_ino, ci->ci_data_mode, + ci->ci_filename_mode); + return ERR_PTR(-EINVAL); } - if (S_ISREG(inode->i_mode)) { - mode = ci->ci_data_mode; - } else if (S_ISDIR(inode->i_mode) || S_ISLNK(inode->i_mode)) { - mode = ci->ci_filename_mode; - } else { - WARN_ONCE(1, "fscrypt: filesystem tried to load encryption info for inode %lu, which is not encryptable (file type %d)\n", - inode->i_ino, (inode->i_mode & S_IFMT)); - return -EINVAL; - } + if (S_ISREG(inode->i_mode)) + return &available_modes[ci->ci_data_mode]; + + if (S_ISDIR(inode->i_mode) || S_ISLNK(inode->i_mode)) + return &available_modes[ci->ci_filename_mode]; - *cipher_str_ret = available_modes[mode].cipher_str; - *keysize_ret = available_modes[mode].keysize; - return 0; + WARN_ONCE(1, "fscrypt: filesystem tried to load encryption info for inode %lu, which is not encryptable (file type %d)\n", + inode->i_ino, (inode->i_mode & S_IFMT)); + return ERR_PTR(-EINVAL); } static void put_crypt_info(struct fscrypt_info *ci) @@ -185,8 +217,9 @@ static int derive_essiv_salt(const u8 *key, int keysize, u8 *salt) tfm = crypto_alloc_shash("sha256", 0, 0); if (IS_ERR(tfm)) { - pr_warn_ratelimited("fscrypt: error allocating SHA-256 transform: %ld\n", - PTR_ERR(tfm)); + fscrypt_warn(NULL, + "error allocating SHA-256 transform: %ld", + PTR_ERR(tfm)); return PTR_ERR(tfm); } prev_tfm = cmpxchg(&essiv_hash_tfm, NULL, tfm); @@ -246,8 +279,7 @@ int fscrypt_get_encryption_info(struct inode *inode) struct fscrypt_info *crypt_info; struct fscrypt_context ctx; struct crypto_skcipher *ctfm; - const char *cipher_str; - int keysize; + struct fscrypt_mode *mode; u8 *raw_key = NULL; int res; @@ -291,57 +323,59 @@ int fscrypt_get_encryption_info(struct inode *inode) memcpy(crypt_info->ci_master_key, ctx.master_key_descriptor, sizeof(crypt_info->ci_master_key)); - res = determine_cipher_type(crypt_info, inode, &cipher_str, &keysize); - if (res) + mode = select_encryption_mode(crypt_info, inode); + if (IS_ERR(mode)) { + res = PTR_ERR(mode); goto out; + } /* * This cannot be a stack buffer because it is passed to the scatterlist * crypto API as part of key derivation. */ res = -ENOMEM; - raw_key = kmalloc(FS_MAX_KEY_SIZE, GFP_NOFS); + raw_key = kmalloc(mode->keysize, GFP_NOFS); if (!raw_key) goto out; - res = validate_user_key(crypt_info, &ctx, raw_key, FS_KEY_DESC_PREFIX, - keysize); - if (res && inode->i_sb->s_cop->key_prefix) { - int res2 = validate_user_key(crypt_info, &ctx, raw_key, - inode->i_sb->s_cop->key_prefix, - keysize); - if (res2) { - if (res2 == -ENOKEY) - res = -ENOKEY; - goto out; - } - } else if (res) { + res = find_and_derive_key(inode, &ctx, raw_key, mode->keysize); + if (res) goto out; - } - ctfm = crypto_alloc_skcipher(cipher_str, 0, 0); - if (!ctfm || IS_ERR(ctfm)) { - res = ctfm ? PTR_ERR(ctfm) : -ENOMEM; - pr_debug("%s: error %d (inode %lu) allocating crypto tfm\n", - __func__, res, inode->i_ino); + + ctfm = crypto_alloc_skcipher(mode->cipher_str, 0, 0); + if (IS_ERR(ctfm)) { + res = PTR_ERR(ctfm); + fscrypt_warn(inode->i_sb, + "error allocating '%s' transform for inode %lu: %d", + mode->cipher_str, inode->i_ino, res); goto out; } + if (unlikely(!mode->logged_impl_name)) { + /* + * fscrypt performance can vary greatly depending on which + * crypto algorithm implementation is used. Help people debug + * performance problems by logging the ->cra_driver_name the + * first time a mode is used. Note that multiple threads can + * race here, but it doesn't really matter. + */ + mode->logged_impl_name = true; + pr_info("fscrypt: %s using implementation \"%s\"\n", + mode->friendly_name, + crypto_skcipher_alg(ctfm)->base.cra_driver_name); + } crypt_info->ci_ctfm = ctfm; - crypto_skcipher_clear_flags(ctfm, ~0); crypto_skcipher_set_flags(ctfm, CRYPTO_TFM_REQ_WEAK_KEY); - /* - * if the provided key is longer than keysize, we use the first - * keysize bytes of the derived key only - */ - res = crypto_skcipher_setkey(ctfm, raw_key, keysize); + res = crypto_skcipher_setkey(ctfm, raw_key, mode->keysize); if (res) goto out; if (S_ISREG(inode->i_mode) && crypt_info->ci_data_mode == FS_ENCRYPTION_MODE_AES_128_CBC) { - res = init_essiv_generator(crypt_info, raw_key, keysize); + res = init_essiv_generator(crypt_info, raw_key, mode->keysize); if (res) { - pr_debug("%s: error %d (inode %lu) allocating essiv tfm\n", - __func__, res, inode->i_ino); + fscrypt_warn(inode->i_sb, + "error initializing ESSIV generator for inode %lu: %d", + inode->i_ino, res); goto out; } } diff --git a/fs/dcache.c b/fs/dcache.c index 9aa4a81dc9b3..d065854deb23 100644 --- a/fs/dcache.c +++ b/fs/dcache.c @@ -1394,7 +1394,7 @@ static enum d_walk_ret select_collect(void *_data, struct dentry *dentry) goto out; if (dentry->d_flags & DCACHE_SHRINK_LIST) { - data->found++; + goto out; } else { if (dentry->d_flags & DCACHE_LRU_LIST) d_lru_del(dentry); @@ -1892,28 +1892,6 @@ struct dentry *d_instantiate_unique(struct dentry *entry, struct inode *inode) EXPORT_SYMBOL(d_instantiate_unique); -/* - * This should be equivalent to d_instantiate() + unlock_new_inode(), - * with lockdep-related part of unlock_new_inode() done before - * anything else. Use that instead of open-coding d_instantiate()/ - * unlock_new_inode() combinations. - */ -void d_instantiate_new(struct dentry *entry, struct inode *inode) -{ - BUG_ON(!hlist_unhashed(&entry->d_u.d_alias)); - BUG_ON(!inode); - lockdep_annotate_inode_mutex_key(inode); - spin_lock(&inode->i_lock); - __d_instantiate(entry, inode); - WARN_ON(!(inode->i_state & I_NEW)); - inode->i_state &= ~I_NEW; - smp_mb(); - wake_up_bit(&inode->i_state, __I_NEW); - spin_unlock(&inode->i_lock); - security_d_instantiate(entry, inode); -} -EXPORT_SYMBOL(d_instantiate_new); - /** * d_instantiate_no_diralias - instantiate a non-aliased dentry * @entry: dentry to complete diff --git a/fs/direct-io.c b/fs/direct-io.c index 49c06f3cd952..5afb6e260c84 100644 --- a/fs/direct-io.c +++ b/fs/direct-io.c @@ -399,6 +399,7 @@ static inline void dio_bio_submit(struct dio *dio, struct dio_submit *sdio) if (dio->is_async && dio->rw == READ && dio->should_dirty) bio_set_pages_dirty(bio); + bio->bi_dio_inode = dio->inode; dio->bio_bdev = bio->bi_bdev; if (sdio->submit_io) { @@ -413,6 +414,19 @@ static inline void dio_bio_submit(struct dio *dio, struct dio_submit *sdio) sdio->logical_offset_in_bio = 0; } +struct inode *dio_bio_get_inode(struct bio *bio) +{ + struct inode *inode = NULL; + + if (bio == NULL) + return NULL; + + inode = bio->bi_dio_inode; + + return inode; +} +EXPORT_SYMBOL(dio_bio_get_inode); + /* * Release any resources in case of a failure */ diff --git a/fs/drop_caches.c b/fs/drop_caches.c index 280460fef066..44de6b82b16f 100644 --- a/fs/drop_caches.c +++ b/fs/drop_caches.c @@ -13,7 +13,7 @@ /* A global variable is a bit ugly, but it keeps the code simple */ int sysctl_drop_caches; -static void drop_pagecache_sb(struct super_block *sb, void *unused) +void drop_pagecache_sb(struct super_block *sb, void *unused) { struct inode *inode, *toput_inode = NULL; diff --git a/fs/ecryptfs/Makefile b/fs/ecryptfs/Makefile index 49678a69947d..c29cdd20d08a 100644 --- a/fs/ecryptfs/Makefile +++ b/fs/ecryptfs/Makefile @@ -4,7 +4,7 @@ obj-$(CONFIG_ECRYPT_FS) += ecryptfs.o -ecryptfs-y := dentry.o file.o inode.o main.o super.o mmap.o read_write.o \ +ecryptfs-y := dentry.o file.o inode.o main.o super.o mmap.o read_write.o events.o \ crypto.o keystore.o kthread.o debug.o ecryptfs-$(CONFIG_ECRYPT_FS_MESSAGING) += messaging.o miscdev.o diff --git a/fs/ecryptfs/crypto.c b/fs/ecryptfs/crypto.c index f246f1760ba2..27d1db57f578 100644 --- a/fs/ecryptfs/crypto.c +++ b/fs/ecryptfs/crypto.c @@ -35,6 +35,7 @@ #include <linux/scatterlist.h> #include <linux/slab.h> #include <asm/unaligned.h> +#include <linux/ecryptfs.h> #include "ecryptfs_kernel.h" #define DECRYPT 0 @@ -350,9 +351,9 @@ static int crypt_scatterlist(struct ecryptfs_crypt_stat *crypt_stat, || !(crypt_stat->flags & ECRYPTFS_STRUCT_INITIALIZED)); if (unlikely(ecryptfs_verbosity > 0)) { ecryptfs_printk(KERN_DEBUG, "Key size [%zd]; key:\n", - crypt_stat->key_size); + ecryptfs_get_key_size_to_enc_data(crypt_stat)); ecryptfs_dump_hex(crypt_stat->key, - crypt_stat->key_size); + ecryptfs_get_key_size_to_enc_data(crypt_stat)); } init_completion(&ecr.completion); @@ -371,7 +372,7 @@ static int crypt_scatterlist(struct ecryptfs_crypt_stat *crypt_stat, /* Consider doing this once, when the file is opened */ if (!(crypt_stat->flags & ECRYPTFS_KEY_SET)) { rc = crypto_ablkcipher_setkey(crypt_stat->tfm, crypt_stat->key, - crypt_stat->key_size); + ecryptfs_get_key_size_to_enc_data(crypt_stat)); if (rc) { ecryptfs_printk(KERN_ERR, "Error setting key; rc = [%d]\n", @@ -466,6 +467,30 @@ out: return rc; } +static void init_ecryption_parameters(bool *hw_crypt, bool *cipher_supported, + struct ecryptfs_crypt_stat *crypt_stat) +{ + if (!hw_crypt || !cipher_supported) + return; + + *cipher_supported = false; + *hw_crypt = false; + + if (get_events() && get_events()->is_cipher_supported_cb) { + *cipher_supported = + get_events()->is_cipher_supported_cb(crypt_stat); + if (*cipher_supported) { + + /** + * we should apply external algorythm + * assume that is_hw_crypt() cbck is supplied + */ + if (get_events()->is_hw_crypt_cb) + *hw_crypt = get_events()->is_hw_crypt_cb(); + } + } +} + /** * ecryptfs_encrypt_page * @page: Page mapped from the eCryptfs inode for the file; contains @@ -491,11 +516,18 @@ int ecryptfs_encrypt_page(struct page *page) loff_t extent_offset; loff_t lower_offset; int rc = 0; + bool is_hw_crypt; + bool is_cipher_supported; + ecryptfs_inode = page->mapping->host; crypt_stat = &(ecryptfs_inode_to_private(ecryptfs_inode)->crypt_stat); BUG_ON(!(crypt_stat->flags & ECRYPTFS_ENCRYPTED)); + + init_ecryption_parameters(&is_hw_crypt, + &is_cipher_supported, crypt_stat); + enc_extent_page = alloc_page(GFP_USER); if (!enc_extent_page) { rc = -ENOMEM; @@ -503,24 +535,51 @@ int ecryptfs_encrypt_page(struct page *page) "encrypted extent\n"); goto out; } - - for (extent_offset = 0; - extent_offset < (PAGE_CACHE_SIZE / crypt_stat->extent_size); - extent_offset++) { - rc = crypt_extent(crypt_stat, enc_extent_page, page, - extent_offset, ENCRYPT); - if (rc) { - printk(KERN_ERR "%s: Error encrypting extent; " - "rc = [%d]\n", __func__, rc); - goto out; + if (is_hw_crypt) { + /* no need for encryption */ + } else { + for (extent_offset = 0; + extent_offset < + (PAGE_CACHE_SIZE / crypt_stat->extent_size); + extent_offset++) { + + if (is_cipher_supported) { + if (!get_events()->encrypt_cb) { + rc = -EPERM; + goto out; + } + rc = get_events()->encrypt_cb(page, + enc_extent_page, + ecryptfs_inode_to_lower( + ecryptfs_inode), + extent_offset); + } else { + rc = crypt_extent(crypt_stat, + enc_extent_page, page, + extent_offset, ENCRYPT); + } + if (rc) { + ecryptfs_printk(KERN_ERR, + "%s: Error encrypting; rc = [%d]\n", + __func__, rc); + goto out; + } } } lower_offset = lower_offset_for_page(crypt_stat, page); - enc_extent_virt = kmap(enc_extent_page); + if (is_hw_crypt) + enc_extent_virt = kmap(page); + else + enc_extent_virt = kmap(enc_extent_page); + rc = ecryptfs_write_lower(ecryptfs_inode, enc_extent_virt, lower_offset, PAGE_CACHE_SIZE); - kunmap(enc_extent_page); + if (!is_hw_crypt) + kunmap(enc_extent_page); + else + kunmap(page); + if (rc < 0) { ecryptfs_printk(KERN_ERR, "Error attempting to write lower page; rc = [%d]\n", @@ -559,6 +618,8 @@ int ecryptfs_decrypt_page(struct page *page) unsigned long extent_offset; loff_t lower_offset; int rc = 0; + bool is_cipher_supported; + bool is_hw_crypt; ecryptfs_inode = page->mapping->host; crypt_stat = @@ -577,13 +638,33 @@ int ecryptfs_decrypt_page(struct page *page) goto out; } + init_ecryption_parameters(&is_hw_crypt, + &is_cipher_supported, crypt_stat); + + if (is_hw_crypt) { + rc = 0; + return rc; + } + for (extent_offset = 0; extent_offset < (PAGE_CACHE_SIZE / crypt_stat->extent_size); extent_offset++) { - rc = crypt_extent(crypt_stat, page, page, + if (is_cipher_supported) { + if (!get_events()->decrypt_cb) { + rc = -EPERM; + goto out; + } + + rc = get_events()->decrypt_cb(page, page, + ecryptfs_inode_to_lower(ecryptfs_inode), + extent_offset); + + } else + rc = crypt_extent(crypt_stat, page, page, extent_offset, DECRYPT); + if (rc) { - printk(KERN_ERR "%s: Error encrypting extent; " + ecryptfs_printk(KERN_ERR, "%s: Error decrypting extent;" "rc = [%d]\n", __func__, rc); goto out; } @@ -612,7 +693,7 @@ int ecryptfs_init_crypt_ctx(struct ecryptfs_crypt_stat *crypt_stat) "Initializing cipher [%s]; strlen = [%d]; " "key_size_bits = [%zd]\n", crypt_stat->cipher, (int)strlen(crypt_stat->cipher), - crypt_stat->key_size << 3); + ecryptfs_get_key_size_to_enc_data(crypt_stat) << 3); mutex_lock(&crypt_stat->cs_tfm_mutex); if (crypt_stat->tfm) { rc = 0; @@ -694,7 +775,7 @@ int ecryptfs_compute_root_iv(struct ecryptfs_crypt_stat *crypt_stat) goto out; } rc = ecryptfs_calculate_md5(dst, crypt_stat, crypt_stat->key, - crypt_stat->key_size); + ecryptfs_get_key_size_to_enc_data(crypt_stat)); if (rc) { ecryptfs_printk(KERN_WARNING, "Error attempting to compute " "MD5 while generating root IV\n"); @@ -721,6 +802,31 @@ static void ecryptfs_generate_new_key(struct ecryptfs_crypt_stat *crypt_stat) } } +static int ecryptfs_generate_new_salt(struct ecryptfs_crypt_stat *crypt_stat) +{ + size_t salt_size = 0; + + salt_size = ecryptfs_get_salt_size_for_cipher(crypt_stat); + + if (0 == salt_size) + return 0; + + if (!ecryptfs_check_space_for_salt(crypt_stat->key_size, salt_size)) { + ecryptfs_printk(KERN_WARNING, "not enough space for salt\n"); + crypt_stat->flags |= ECRYPTFS_SECURITY_WARNING; + return -EINVAL; + } + + get_random_bytes(crypt_stat->key + crypt_stat->key_size, salt_size); + if (unlikely(ecryptfs_verbosity > 0)) { + ecryptfs_printk(KERN_DEBUG, "Generated new session salt:\n"); + ecryptfs_dump_hex(crypt_stat->key + crypt_stat->key_size, + salt_size); + } + + return 0; +} + /** * ecryptfs_copy_mount_wide_flags_to_inode_flags * @crypt_stat: The inode's cryptographic context @@ -823,7 +929,6 @@ int ecryptfs_new_file_context(struct inode *ecryptfs_inode) struct ecryptfs_mount_crypt_stat *mount_crypt_stat = &ecryptfs_superblock_to_private( ecryptfs_inode->i_sb)->mount_crypt_stat; - int cipher_name_len; int rc = 0; ecryptfs_set_default_crypt_stat_vals(crypt_stat, mount_crypt_stat); @@ -837,15 +942,19 @@ int ecryptfs_new_file_context(struct inode *ecryptfs_inode) "to the inode key sigs; rc = [%d]\n", rc); goto out; } - cipher_name_len = - strlen(mount_crypt_stat->global_default_cipher_name); - memcpy(crypt_stat->cipher, + strlcpy(crypt_stat->cipher, mount_crypt_stat->global_default_cipher_name, - cipher_name_len); - crypt_stat->cipher[cipher_name_len] = '\0'; + sizeof(crypt_stat->cipher)); + + strlcpy(crypt_stat->cipher_mode, + mount_crypt_stat->global_default_cipher_mode, + sizeof(crypt_stat->cipher_mode)); + crypt_stat->key_size = mount_crypt_stat->global_default_cipher_key_size; ecryptfs_generate_new_key(crypt_stat); + ecryptfs_generate_new_salt(crypt_stat); + rc = ecryptfs_init_crypt_ctx(crypt_stat); if (rc) ecryptfs_printk(KERN_ERR, "Error initializing cryptographic " @@ -971,7 +1080,8 @@ ecryptfs_cipher_code_str_map[] = { {"twofish", RFC2440_CIPHER_TWOFISH}, {"cast6", RFC2440_CIPHER_CAST_6}, {"aes", RFC2440_CIPHER_AES_192}, - {"aes", RFC2440_CIPHER_AES_256} + {"aes", RFC2440_CIPHER_AES_256}, + {"aes_xts", RFC2440_CIPHER_AES_XTS_256} }; /** @@ -999,6 +1109,11 @@ u8 ecryptfs_code_for_cipher_string(char *cipher_name, size_t key_bytes) case 32: code = RFC2440_CIPHER_AES_256; } + } else if (strcmp(cipher_name, "aes_xts") == 0) { + switch (key_bytes) { + case 32: + code = RFC2440_CIPHER_AES_XTS_256; + } } else { for (i = 0; i < ARRAY_SIZE(ecryptfs_cipher_code_str_map); i++) if (strcmp(cipher_name, map[i].cipher_str) == 0) { @@ -1038,9 +1153,24 @@ int ecryptfs_read_and_validate_header_region(struct inode *inode) u8 file_size[ECRYPTFS_SIZE_AND_MARKER_BYTES]; u8 *marker = file_size + ECRYPTFS_FILE_SIZE_BYTES; int rc; + unsigned int ra_pages_org; + struct file *lower_file = NULL; + + if (!inode) + return -EIO; + lower_file = ecryptfs_inode_to_private(inode)->lower_file; + if (!lower_file) + return -EIO; + + /*disable read a head mechanism for a while */ + ra_pages_org = lower_file->f_ra.ra_pages; + lower_file->f_ra.ra_pages = 0; rc = ecryptfs_read_lower(file_size, 0, ECRYPTFS_SIZE_AND_MARKER_BYTES, inode); + lower_file->f_ra.ra_pages = ra_pages_org; + /* restore read a head mechanism */ + if (rc < 0) return rc; else if (rc < ECRYPTFS_SIZE_AND_MARKER_BYTES) @@ -1434,6 +1564,11 @@ int ecryptfs_read_metadata(struct dentry *ecryptfs_dentry) struct ecryptfs_mount_crypt_stat *mount_crypt_stat = &ecryptfs_superblock_to_private( ecryptfs_dentry->d_sb)->mount_crypt_stat; + unsigned int ra_pages_org; + struct file *lower_file = + ecryptfs_inode_to_private(ecryptfs_inode)->lower_file; + if (!lower_file) + return -EIO; ecryptfs_copy_mount_wide_flags_to_inode_flags(crypt_stat, mount_crypt_stat); @@ -1445,8 +1580,14 @@ int ecryptfs_read_metadata(struct dentry *ecryptfs_dentry) __func__); goto out; } + /*disable read a head mechanism */ + ra_pages_org = lower_file->f_ra.ra_pages; + lower_file->f_ra.ra_pages = 0; + rc = ecryptfs_read_lower(page_virt, 0, crypt_stat->extent_size, ecryptfs_inode); + lower_file->f_ra.ra_pages = ra_pages_org; /* restore it back */ + if (rc >= 0) rc = ecryptfs_read_headers_virt(page_virt, crypt_stat, ecryptfs_dentry, diff --git a/fs/ecryptfs/debug.c b/fs/ecryptfs/debug.c index 3d2bdf546ec6..ea2c4a97fddc 100644 --- a/fs/ecryptfs/debug.c +++ b/fs/ecryptfs/debug.c @@ -119,3 +119,32 @@ void ecryptfs_dump_hex(char *data, int bytes) printk("\n"); } +void ecryptfs_dump_salt_hex(char *data, int key_size, + const struct ecryptfs_crypt_stat *crypt_stat) +{ + size_t salt_size = ecryptfs_get_salt_size_for_cipher(crypt_stat); + + if (0 == salt_size) + return; + + if (!ecryptfs_check_space_for_salt(key_size, salt_size)) + return; + + ecryptfs_printk(KERN_DEBUG, "Decrypted session salt key:\n"); + ecryptfs_dump_hex(data + key_size, salt_size); +} + +void ecryptfs_dump_cipher(struct ecryptfs_crypt_stat *stat) +{ + if (!stat) + return; + + if (stat->cipher != NULL) + ecryptfs_printk(KERN_DEBUG, + "ecryptfs cipher is %s\n", stat->cipher); + + if (stat->cipher_mode != NULL) + ecryptfs_printk(KERN_DEBUG, "ecryptfs cipher mode is %s\n", + stat->cipher_mode); + +} diff --git a/fs/ecryptfs/ecryptfs_kernel.h b/fs/ecryptfs/ecryptfs_kernel.h index eae9cdb8af46..f5908e91eb17 100644 --- a/fs/ecryptfs/ecryptfs_kernel.h +++ b/fs/ecryptfs/ecryptfs_kernel.h @@ -254,6 +254,7 @@ struct ecryptfs_crypt_stat { struct mutex cs_tfm_mutex; struct mutex cs_hash_tfm_mutex; struct mutex cs_mutex; + unsigned char cipher_mode[ECRYPTFS_MAX_CIPHER_NAME_SIZE + 1]; }; /* inode private data. */ @@ -354,6 +355,8 @@ struct ecryptfs_mount_crypt_stat { unsigned char global_default_fn_cipher_name[ ECRYPTFS_MAX_CIPHER_NAME_SIZE + 1]; char global_default_fnek_sig[ECRYPTFS_SIG_SIZE_HEX + 1]; + unsigned char global_default_cipher_mode[ECRYPTFS_MAX_CIPHER_NAME_SIZE + + 1]; }; /* superblock private data. */ @@ -536,6 +539,53 @@ ecryptfs_dentry_to_lower_path(struct dentry *dentry) return &((struct ecryptfs_dentry_info *)dentry->d_fsdata)->lower_path; } +/** + * Given a cipher and mode strings, the function + * concatenates them to create a new string of + * <cipher>_<mode> format. + */ +static inline unsigned char *ecryptfs_get_full_cipher( + unsigned char *cipher, unsigned char *mode, + unsigned char *final, size_t final_size) +{ + memset(final, 0, final_size); + + if (strlen(mode) > 0) { + snprintf(final, final_size, "%s_%s", cipher, mode); + return final; + } + + return cipher; +} + +/** + * Given a <cipher>[_<mode>] formatted string, the function + * extracts cipher string and/or mode string. + * Note: the passed cipher and/or mode strings will be null-terminated. + */ +static inline void ecryptfs_parse_full_cipher( + char *s, char *cipher, char *mode) +{ + char input[2*ECRYPTFS_MAX_CIPHER_NAME_SIZE+1+1]; + /* +1 for '_'; +1 for '\0' */ + char *p; + char *input_p = input; + + if (s == NULL || cipher == NULL) + return; + + memset(input, 0, sizeof(input)); + strlcpy(input, s, sizeof(input)); + + p = strsep(&input_p, "_"); + strlcpy(cipher, p, ECRYPTFS_MAX_CIPHER_NAME_SIZE + 1); + + + /* check if mode is specified */ + if (input_p != NULL && mode != NULL) + strlcpy(mode, input_p, ECRYPTFS_MAX_CIPHER_NAME_SIZE + 1); +} + #define ecryptfs_printk(type, fmt, arg...) \ __ecryptfs_printk(type "%s: " fmt, __func__, ## arg); __printf(1, 2) @@ -584,6 +634,10 @@ int ecryptfs_encrypt_and_encode_filename( const char *name, size_t name_size); struct dentry *ecryptfs_lower_dentry(struct dentry *this_dentry); void ecryptfs_dump_hex(char *data, int bytes); +void ecryptfs_dump_salt_hex(char *data, int key_size, + const struct ecryptfs_crypt_stat *crypt_stat); +extern void ecryptfs_dump_cipher(struct ecryptfs_crypt_stat *stat); + int virt_to_scatterlist(const void *addr, int size, struct scatterlist *sg, int sg_size); int ecryptfs_compute_root_iv(struct ecryptfs_crypt_stat *crypt_stat); @@ -727,4 +781,33 @@ int ecryptfs_set_f_namelen(long *namelen, long lower_namelen, int ecryptfs_derive_iv(char *iv, struct ecryptfs_crypt_stat *crypt_stat, loff_t offset); +void clean_inode_pages(struct address_space *mapping, + pgoff_t start, pgoff_t end); + +void ecryptfs_drop_pagecache_sb(struct super_block *sb, void *unused); + +void ecryptfs_free_events(void); + +void ecryptfs_freepage(struct page *page); + +struct ecryptfs_events *get_events(void); + +size_t ecryptfs_get_salt_size_for_cipher( + const struct ecryptfs_crypt_stat *crypt_stat); + +size_t ecryptfs_get_salt_size_for_cipher_mount( + const struct ecryptfs_mount_crypt_stat *mount_crypt_stat); + +size_t ecryptfs_get_key_size_to_enc_data( + const struct ecryptfs_crypt_stat *crypt_stat); + +size_t ecryptfs_get_key_size_to_store_key( + const struct ecryptfs_crypt_stat *crypt_stat); + +size_t ecryptfs_get_key_size_to_restore_key(size_t stored_key_size, + const struct ecryptfs_crypt_stat *crypt_stat); + +bool ecryptfs_check_space_for_salt(const size_t key_size, + const size_t salt_size); + #endif /* #ifndef ECRYPTFS_KERNEL_H */ diff --git a/fs/ecryptfs/events.c b/fs/ecryptfs/events.c new file mode 100644 index 000000000000..12e26c683cf6 --- /dev/null +++ b/fs/ecryptfs/events.c @@ -0,0 +1,393 @@ +/** + * eCryptfs: Linux filesystem encryption layer + * Copyright (c) 2015-2016, The Linux Foundation. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 and + * only version 2 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ + +#include <linux/string.h> +#include <linux/ecryptfs.h> +#include <linux/mutex.h> +#include <linux/types.h> +#include <linux/slab.h> +#include <linux/pagemap.h> +#include <linux/random.h> +#include "ecryptfs_kernel.h" + +static DEFINE_MUTEX(events_mutex); +struct ecryptfs_events *events_ptr = NULL; +static int handle; + +void ecryptfs_free_events(void) +{ + mutex_lock(&events_mutex); + if (events_ptr != NULL) { + kfree(events_ptr); + events_ptr = NULL; + } + + mutex_unlock(&events_mutex); +} + +/** + * Register to ecryptfs events, by passing callback + * functions to be called upon events occurence. + * The function returns a handle to be passed + * to unregister function. + */ +int ecryptfs_register_to_events(const struct ecryptfs_events *ops) +{ + int ret_value = 0; + + if (!ops) + return -EINVAL; + + mutex_lock(&events_mutex); + + if (events_ptr != NULL) { + ecryptfs_printk(KERN_ERR, + "already registered!\n"); + ret_value = -EPERM; + goto out; + } + events_ptr = + kzalloc(sizeof(struct ecryptfs_events), GFP_KERNEL); + + if (!events_ptr) { + ecryptfs_printk(KERN_ERR, "malloc failure\n"); + ret_value = -ENOMEM; + goto out; + } + /* copy the callbacks */ + events_ptr->open_cb = ops->open_cb; + events_ptr->release_cb = ops->release_cb; + events_ptr->encrypt_cb = ops->encrypt_cb; + events_ptr->decrypt_cb = ops->decrypt_cb; + events_ptr->is_cipher_supported_cb = + ops->is_cipher_supported_cb; + events_ptr->is_hw_crypt_cb = ops->is_hw_crypt_cb; + events_ptr->get_salt_key_size_cb = ops->get_salt_key_size_cb; + + get_random_bytes(&handle, sizeof(handle)); + ret_value = handle; + +out: + mutex_unlock(&events_mutex); + return ret_value; +} + +/** + * Unregister from ecryptfs events. + */ +int ecryptfs_unregister_from_events(int user_handle) +{ + int ret_value = 0; + + mutex_lock(&events_mutex); + + if (!events_ptr) { + ret_value = -EINVAL; + goto out; + } + if (user_handle != handle) { + ret_value = ECRYPTFS_INVALID_EVENTS_HANDLE; + goto out; + } + + kfree(events_ptr); + events_ptr = NULL; + +out: + mutex_unlock(&events_mutex); + return ret_value; +} + +/** + * This function decides whether the passed file offset + * belongs to ecryptfs metadata or not. + * The caller must pass ecryptfs data, which was received in one + * of the callback invocations. + */ +bool ecryptfs_is_page_in_metadata(const void *data, pgoff_t offset) +{ + + struct ecryptfs_crypt_stat *stat = NULL; + bool ret = true; + + if (!data) { + ecryptfs_printk(KERN_ERR, "ecryptfs_is_page_in_metadata: invalid data parameter\n"); + ret = false; + goto end; + } + stat = (struct ecryptfs_crypt_stat *)data; + + if (stat->flags & ECRYPTFS_METADATA_IN_XATTR) { + ret = false; + goto end; + } + + if (offset >= (stat->metadata_size/PAGE_CACHE_SIZE)) { + ret = false; + goto end; + } +end: + return ret; +} + +/** + * Given two ecryptfs data, the function + * decides whether they are equal. + */ +inline bool ecryptfs_is_data_equal(const void *data1, const void *data2) +{ + /* pointer comparison*/ + return data1 == data2; +} + +/** + * Given ecryptfs data, the function + * returns appropriate key size. + */ +size_t ecryptfs_get_key_size(const void *data) +{ + + struct ecryptfs_crypt_stat *stat = NULL; + + if (!data) + return 0; + + stat = (struct ecryptfs_crypt_stat *)data; + return stat->key_size; +} + +/** + * Given ecryptfs data, the function + * returns appropriate salt size. + * + * !!! crypt_stat cipher name and mode must be initialized + */ +size_t ecryptfs_get_salt_size(const void *data) +{ + if (!data) { + ecryptfs_printk(KERN_ERR, + "ecryptfs_get_salt_size: invalid data parameter\n"); + return 0; + } + + return ecryptfs_get_salt_size_for_cipher(data); + +} + +/** + * Given ecryptfs data and cipher string, the function + * returns true if provided cipher and the one in ecryptfs match. + */ +bool ecryptfs_cipher_match(const void *data, + const unsigned char *cipher, size_t cipher_size) +{ + unsigned char final[2*ECRYPTFS_MAX_CIPHER_NAME_SIZE+1]; + const unsigned char *ecryptfs_cipher = NULL; + struct ecryptfs_crypt_stat *stat = NULL; + + if (!data || !cipher) { + ecryptfs_printk(KERN_ERR, + "ecryptfs_get_cipher: invalid data parameter\n"); + return false; + } + + if (!cipher_size || cipher_size > sizeof(final)) { + ecryptfs_printk(KERN_ERR, + "ecryptfs_get_cipher: cipher_size\n"); + return false; + } + + stat = (struct ecryptfs_crypt_stat *)data; + ecryptfs_cipher = ecryptfs_get_full_cipher(stat->cipher, + stat->cipher_mode, + final, sizeof(final)); + + if (!ecryptfs_cipher) { + ecryptfs_printk(KERN_ERR, + "ecryptfs_get_cipher: internal error while parsing cipher\n"); + return false; + } + + if (strcmp(ecryptfs_cipher, cipher)) { + if (ecryptfs_verbosity > 0) + ecryptfs_dump_cipher(stat); + + return false; + } + + return true; +} + +/** + * Given ecryptfs data, the function + * returns file encryption key. + */ +const unsigned char *ecryptfs_get_key(const void *data) +{ + + struct ecryptfs_crypt_stat *stat = NULL; + + if (!data) { + ecryptfs_printk(KERN_ERR, + "ecryptfs_get_key: invalid data parameter\n"); + return NULL; + } + stat = (struct ecryptfs_crypt_stat *)data; + return stat->key; +} + +/** + * Given ecryptfs data, the function + * returns file encryption salt. + */ +const unsigned char *ecryptfs_get_salt(const void *data) +{ + struct ecryptfs_crypt_stat *stat = NULL; + + if (!data) { + ecryptfs_printk(KERN_ERR, + "ecryptfs_get_salt: invalid data parameter\n"); + return NULL; + } + stat = (struct ecryptfs_crypt_stat *)data; + return stat->key + ecryptfs_get_salt_size(data); +} + +/** + * Returns ecryptfs events pointer + */ +inline struct ecryptfs_events *get_events(void) +{ + return events_ptr; +} + +/** + * If external crypto module requires salt in addition to key, + * we store it as part of key array (if there is enough space) + * Checks whether a salt key can fit into array allocated for + * regular key + */ +bool ecryptfs_check_space_for_salt(const size_t key_size, + const size_t salt_size) +{ + if ((salt_size + key_size) > ECRYPTFS_MAX_KEY_BYTES) + return false; + + return true; +} + +/* + * If there is salt that is used by external crypto module, it is stored + * in the same array where regular key is. Salt is going to be used by + * external crypto module only, so for all internal crypto operations salt + * should be ignored. + * + * Get key size in cases where it is going to be used for data encryption + * or for all other general purposes + */ +size_t ecryptfs_get_key_size_to_enc_data( + const struct ecryptfs_crypt_stat *crypt_stat) +{ + if (!crypt_stat) + return 0; + + return crypt_stat->key_size; +} + +/* + * If there is salt that is used by external crypto module, it is stored + * in the same array where regular key is. Salt is going to be used by + * external crypto module only, but we still need to save and restore it + * (in encrypted form) as part of ecryptfs header along with the regular + * key. + * + * Get key size in cases where it is going to be stored persistently + * + * !!! crypt_stat cipher name and mode must be initialized + */ +size_t ecryptfs_get_key_size_to_store_key( + const struct ecryptfs_crypt_stat *crypt_stat) +{ + size_t salt_size = 0; + + if (!crypt_stat) + return 0; + + salt_size = ecryptfs_get_salt_size(crypt_stat); + + if (!ecryptfs_check_space_for_salt(crypt_stat->key_size, salt_size)) { + ecryptfs_printk(KERN_WARNING, + "ecryptfs_get_key_size_to_store_key: not enough space for salt\n"); + return crypt_stat->key_size; + } + + return crypt_stat->key_size + salt_size; +} + +/* + * If there is salt that is used by external crypto module, it is stored + * in the same array where regular key is. Salt is going to be used by + * external crypto module only, but we still need to save and restore it + * (in encrypted form) as part of ecryptfs header along with the regular + * key. + * + * Get key size in cases where it is going to be restored from storage + * + * !!! crypt_stat cipher name and mode must be initialized + */ +size_t ecryptfs_get_key_size_to_restore_key(size_t stored_key_size, + const struct ecryptfs_crypt_stat *crypt_stat) +{ + size_t salt_size = 0; + + if (!crypt_stat) + return 0; + + salt_size = ecryptfs_get_salt_size_for_cipher(crypt_stat); + + if (salt_size >= stored_key_size) { + ecryptfs_printk(KERN_WARNING, + "ecryptfs_get_key_size_to_restore_key: salt %zu >= stred size %zu\n", + salt_size, stored_key_size); + + return stored_key_size; + } + + return stored_key_size - salt_size; +} + +/** + * Given crypt_stat, the function returns appropriate salt size. + */ +size_t ecryptfs_get_salt_size_for_cipher( + const struct ecryptfs_crypt_stat *crypt_stat) +{ + if (!get_events() || !(get_events()->get_salt_key_size_cb)) + return 0; + + return get_events()->get_salt_key_size_cb(crypt_stat); +} + +/** + * Given mount_crypt_stat, the function returns appropriate salt size. + */ +size_t ecryptfs_get_salt_size_for_cipher_mount( + const struct ecryptfs_mount_crypt_stat *crypt_stat) +{ + if (!get_events() || !(get_events()->get_salt_key_size_cb)) + return 0; + + return get_events()->get_salt_key_size_cb(crypt_stat); +} + diff --git a/fs/ecryptfs/file.c b/fs/ecryptfs/file.c index 27794b137b24..c93fe5fce41e 100644 --- a/fs/ecryptfs/file.c +++ b/fs/ecryptfs/file.c @@ -31,6 +31,7 @@ #include <linux/security.h> #include <linux/compat.h> #include <linux/fs_stack.h> +#include <linux/ecryptfs.h> #include "ecryptfs_kernel.h" /** @@ -196,6 +197,9 @@ static int ecryptfs_open(struct inode *inode, struct file *file) int rc = 0; struct ecryptfs_crypt_stat *crypt_stat = NULL; struct dentry *ecryptfs_dentry = file->f_path.dentry; + int ret; + + /* Private value of ecryptfs_dentry allocated in * ecryptfs_lookup() */ struct ecryptfs_file_info *file_info; @@ -235,12 +239,39 @@ static int ecryptfs_open(struct inode *inode, struct file *file) } ecryptfs_set_file_lower( file, ecryptfs_inode_to_private(inode)->lower_file); + if (d_is_dir(ecryptfs_dentry)) { + ecryptfs_printk(KERN_DEBUG, "This is a directory\n"); + mutex_lock(&crypt_stat->cs_mutex); + crypt_stat->flags &= ~(ECRYPTFS_ENCRYPTED); + mutex_unlock(&crypt_stat->cs_mutex); + rc = 0; + goto out; + } + rc = read_or_initialize_metadata(ecryptfs_dentry); if (rc) goto out_put; ecryptfs_printk(KERN_DEBUG, "inode w/ addr = [0x%p], i_ino = " "[0x%.16lx] size: [0x%.16llx]\n", inode, inode->i_ino, (unsigned long long)i_size_read(inode)); + + if (get_events() && get_events()->open_cb) { + + ret = vfs_fsync(file, false); + + if (ret) + ecryptfs_printk(KERN_ERR, + "failed to sync file ret = %d.\n", ret); + + get_events()->open_cb(ecryptfs_inode_to_lower(inode), + crypt_stat); + + if (crypt_stat->flags & ECRYPTFS_METADATA_IN_XATTR) { + truncate_inode_pages(inode->i_mapping, 0); + truncate_inode_pages( + ecryptfs_inode_to_lower(inode)->i_mapping, 0); + } + } goto out; out_put: ecryptfs_put_lower_file(inode); @@ -307,6 +338,7 @@ static int ecryptfs_release(struct inode *inode, struct file *file) ecryptfs_put_lower_file(inode); kmem_cache_free(ecryptfs_file_info_cache, ecryptfs_file_to_private(file)); + return 0; } diff --git a/fs/ecryptfs/inode.c b/fs/ecryptfs/inode.c index 34897aeb4a66..9e143b0db9e8 100644 --- a/fs/ecryptfs/inode.c +++ b/fs/ecryptfs/inode.c @@ -261,12 +261,15 @@ out: * * Returns zero on success; non-zero on error condition */ + + static int ecryptfs_create(struct inode *directory_inode, struct dentry *ecryptfs_dentry, umode_t mode, bool excl) { struct inode *ecryptfs_inode; int rc; + struct ecryptfs_crypt_stat *crypt_stat; ecryptfs_inode = ecryptfs_do_create(directory_inode, ecryptfs_dentry, mode); @@ -276,6 +279,7 @@ ecryptfs_create(struct inode *directory_inode, struct dentry *ecryptfs_dentry, rc = PTR_ERR(ecryptfs_inode); goto out; } + /* At this point, a file exists on "disk"; we need to make sure * that this on disk file is prepared to be an ecryptfs file */ rc = ecryptfs_initialize_file(ecryptfs_dentry, ecryptfs_inode); @@ -287,7 +291,15 @@ ecryptfs_create(struct inode *directory_inode, struct dentry *ecryptfs_dentry, iput(ecryptfs_inode); goto out; } - d_instantiate_new(ecryptfs_dentry, ecryptfs_inode); + + crypt_stat = &ecryptfs_inode_to_private(ecryptfs_inode)->crypt_stat; + if (get_events() && get_events()->open_cb) + get_events()->open_cb( + ecryptfs_inode_to_lower(ecryptfs_inode), + crypt_stat); + + unlock_new_inode(ecryptfs_inode); + d_instantiate(ecryptfs_dentry, ecryptfs_inode); out: return rc; } diff --git a/fs/ecryptfs/keystore.c b/fs/ecryptfs/keystore.c index 37920394c64c..ac9b48aee73f 100644 --- a/fs/ecryptfs/keystore.c +++ b/fs/ecryptfs/keystore.c @@ -315,7 +315,8 @@ write_tag_66_packet(char *signature, u8 cipher_code, * | File Encryption Key Size | 1 or 2 bytes | * | File Encryption Key | arbitrary | */ - data_len = (5 + ECRYPTFS_SIG_SIZE_HEX + crypt_stat->key_size); + data_len = (5 + ECRYPTFS_SIG_SIZE_HEX + + ecryptfs_get_key_size_to_store_key(crypt_stat)); *packet = kmalloc(data_len, GFP_KERNEL); message = *packet; if (!message) { @@ -335,8 +336,9 @@ write_tag_66_packet(char *signature, u8 cipher_code, memcpy(&message[i], signature, ECRYPTFS_SIG_SIZE_HEX); i += ECRYPTFS_SIG_SIZE_HEX; /* The encrypted key includes 1 byte cipher code and 2 byte checksum */ - rc = ecryptfs_write_packet_length(&message[i], crypt_stat->key_size + 3, - &packet_size_len); + rc = ecryptfs_write_packet_length(&message[i], + ecryptfs_get_key_size_to_store_key(crypt_stat) + 3, + &packet_size_len); if (rc) { ecryptfs_printk(KERN_ERR, "Error generating tag 66 packet " "header; cannot generate packet length\n"); @@ -344,9 +346,10 @@ write_tag_66_packet(char *signature, u8 cipher_code, } i += packet_size_len; message[i++] = cipher_code; - memcpy(&message[i], crypt_stat->key, crypt_stat->key_size); - i += crypt_stat->key_size; - for (j = 0; j < crypt_stat->key_size; j++) + memcpy(&message[i], crypt_stat->key, + ecryptfs_get_key_size_to_store_key(crypt_stat)); + i += ecryptfs_get_key_size_to_store_key(crypt_stat); + for (j = 0; j < ecryptfs_get_key_size_to_store_key(crypt_stat); j++) checksum += crypt_stat->key[j]; message[i++] = (checksum / 256) % 256; message[i++] = (checksum % 256); @@ -925,6 +928,7 @@ ecryptfs_parse_tag_70_packet(char **filename, size_t *filename_size, struct ecryptfs_parse_tag_70_packet_silly_stack *s; struct key *auth_tok_key = NULL; int rc = 0; + char full_cipher[ECRYPTFS_MAX_CIPHER_NAME_SIZE]; (*packet_size) = 0; (*filename_size) = 0; @@ -984,12 +988,13 @@ ecryptfs_parse_tag_70_packet(char **filename, size_t *filename_size, s->fnek_sig_hex[ECRYPTFS_SIG_SIZE_HEX] = '\0'; (*packet_size) += ECRYPTFS_SIG_SIZE; s->cipher_code = data[(*packet_size)++]; - rc = ecryptfs_cipher_code_to_string(s->cipher_string, s->cipher_code); + rc = ecryptfs_cipher_code_to_string(full_cipher, s->cipher_code); if (rc) { printk(KERN_WARNING "%s: Cipher code [%d] is invalid\n", __func__, s->cipher_code); goto out; } + ecryptfs_parse_full_cipher(full_cipher, s->cipher_string, 0); rc = ecryptfs_find_auth_tok_for_sig(&auth_tok_key, &s->auth_tok, mount_crypt_stat, s->fnek_sig_hex); @@ -1158,6 +1163,7 @@ decrypt_pki_encrypted_session_key(struct ecryptfs_auth_tok *auth_tok, char *payload = NULL; size_t payload_len = 0; int rc; + char full_cipher[ECRYPTFS_MAX_CIPHER_NAME_SIZE]; rc = ecryptfs_get_auth_tok_sig(&auth_tok_sig, auth_tok); if (rc) { @@ -1191,21 +1197,31 @@ decrypt_pki_encrypted_session_key(struct ecryptfs_auth_tok *auth_tok, rc); goto out; } - auth_tok->session_key.flags |= ECRYPTFS_CONTAINS_DECRYPTED_KEY; - memcpy(crypt_stat->key, auth_tok->session_key.decrypted_key, - auth_tok->session_key.decrypted_key_size); - crypt_stat->key_size = auth_tok->session_key.decrypted_key_size; - rc = ecryptfs_cipher_code_to_string(crypt_stat->cipher, cipher_code); + + rc = ecryptfs_cipher_code_to_string(full_cipher, cipher_code); if (rc) { ecryptfs_printk(KERN_ERR, "Cipher code [%d] is invalid\n", cipher_code) - goto out; + goto out; } + + auth_tok->session_key.flags |= ECRYPTFS_CONTAINS_DECRYPTED_KEY; + memcpy(crypt_stat->key, auth_tok->session_key.decrypted_key, + auth_tok->session_key.decrypted_key_size); + crypt_stat->key_size = ecryptfs_get_key_size_to_restore_key( + auth_tok->session_key.decrypted_key_size, crypt_stat); + + ecryptfs_parse_full_cipher(full_cipher, + crypt_stat->cipher, crypt_stat->cipher_mode); + crypt_stat->flags |= ECRYPTFS_KEY_VALID; if (ecryptfs_verbosity > 0) { ecryptfs_printk(KERN_DEBUG, "Decrypted session key:\n"); ecryptfs_dump_hex(crypt_stat->key, crypt_stat->key_size); + + ecryptfs_dump_salt_hex(crypt_stat->key, crypt_stat->key_size, + crypt_stat); } out: kfree(msg); @@ -1387,6 +1403,7 @@ parse_tag_3_packet(struct ecryptfs_crypt_stat *crypt_stat, struct ecryptfs_auth_tok_list_item *auth_tok_list_item; size_t length_size; int rc = 0; + char full_cipher[ECRYPTFS_MAX_CIPHER_NAME_SIZE]; (*packet_size) = 0; (*new_auth_tok) = NULL; @@ -1460,10 +1477,13 @@ parse_tag_3_packet(struct ecryptfs_crypt_stat *crypt_stat, rc = -EINVAL; goto out_free; } - rc = ecryptfs_cipher_code_to_string(crypt_stat->cipher, + rc = ecryptfs_cipher_code_to_string(full_cipher, (u16)data[(*packet_size)]); if (rc) goto out_free; + ecryptfs_parse_full_cipher(full_cipher, + crypt_stat->cipher, crypt_stat->cipher_mode); + /* A little extra work to differentiate among the AES key * sizes; see RFC2440 */ switch(data[(*packet_size)++]) { @@ -1472,7 +1492,10 @@ parse_tag_3_packet(struct ecryptfs_crypt_stat *crypt_stat, break; default: crypt_stat->key_size = - (*new_auth_tok)->session_key.encrypted_key_size; + ecryptfs_get_key_size_to_restore_key( + (*new_auth_tok)->session_key.encrypted_key_size, + crypt_stat); + } rc = ecryptfs_init_crypt_ctx(crypt_stat); if (rc) @@ -1720,7 +1743,7 @@ decrypt_passphrase_encrypted_session_key(struct ecryptfs_auth_tok *auth_tok, mutex_lock(tfm_mutex); rc = crypto_blkcipher_setkey( desc.tfm, auth_tok->token.password.session_key_encryption_key, - crypt_stat->key_size); + auth_tok->token.password.session_key_encryption_key_bytes); if (unlikely(rc < 0)) { mutex_unlock(tfm_mutex); printk(KERN_ERR "Error setting key for crypto context\n"); @@ -1743,6 +1766,8 @@ decrypt_passphrase_encrypted_session_key(struct ecryptfs_auth_tok *auth_tok, crypt_stat->key_size); ecryptfs_dump_hex(crypt_stat->key, crypt_stat->key_size); + ecryptfs_dump_salt_hex(crypt_stat->key, crypt_stat->key_size, + crypt_stat); } out: return rc; @@ -1979,12 +2004,17 @@ pki_encrypt_session_key(struct key *auth_tok_key, size_t payload_len = 0; struct ecryptfs_message *msg; int rc; + unsigned char final[2*ECRYPTFS_MAX_CIPHER_NAME_SIZE+1]; rc = write_tag_66_packet(auth_tok->token.private_key.signature, - ecryptfs_code_for_cipher_string( - crypt_stat->cipher, - crypt_stat->key_size), - crypt_stat, &payload, &payload_len); + ecryptfs_code_for_cipher_string( + ecryptfs_get_full_cipher( + crypt_stat->cipher, + crypt_stat->cipher_mode, + final, sizeof(final)), + ecryptfs_get_key_size_to_enc_data( + crypt_stat)), + crypt_stat, &payload, &payload_len); up_write(&(auth_tok_key->sem)); key_put(auth_tok_key); if (rc) { @@ -2042,7 +2072,7 @@ write_tag_1_packet(char *dest, size_t *remaining_bytes, ecryptfs_from_hex(key_rec->sig, auth_tok->token.private_key.signature, ECRYPTFS_SIG_SIZE); encrypted_session_key_valid = 0; - for (i = 0; i < crypt_stat->key_size; i++) + for (i = 0; i < ecryptfs_get_key_size_to_store_key(crypt_stat); i++) encrypted_session_key_valid |= auth_tok->session_key.encrypted_key[i]; if (encrypted_session_key_valid) { @@ -2196,6 +2226,7 @@ write_tag_3_packet(char *dest, size_t *remaining_bytes, u8 cipher_code; size_t packet_size_length; size_t max_packet_size; + unsigned char final[2*ECRYPTFS_MAX_CIPHER_NAME_SIZE+1]; struct ecryptfs_mount_crypt_stat *mount_crypt_stat = crypt_stat->mount_crypt_stat; struct blkcipher_desc desc = { @@ -2228,13 +2259,14 @@ write_tag_3_packet(char *dest, size_t *remaining_bytes, mount_crypt_stat->global_default_cipher_key_size; if (auth_tok->session_key.encrypted_key_size == 0) auth_tok->session_key.encrypted_key_size = - crypt_stat->key_size; + ecryptfs_get_key_size_to_store_key(crypt_stat); if (crypt_stat->key_size == 24 && strcmp("aes", crypt_stat->cipher) == 0) { memset((crypt_stat->key + 24), 0, 8); auth_tok->session_key.encrypted_key_size = 32; } else - auth_tok->session_key.encrypted_key_size = crypt_stat->key_size; + auth_tok->session_key.encrypted_key_size = + ecryptfs_get_key_size_to_store_key(crypt_stat); key_rec->enc_key_size = auth_tok->session_key.encrypted_key_size; encrypted_session_key_valid = 0; @@ -2258,8 +2290,8 @@ write_tag_3_packet(char *dest, size_t *remaining_bytes, auth_tok->token.password. session_key_encryption_key_bytes); memcpy(session_key_encryption_key, - auth_tok->token.password.session_key_encryption_key, - crypt_stat->key_size); + auth_tok->token.password.session_key_encryption_key, + auth_tok->token.password.session_key_encryption_key_bytes); ecryptfs_printk(KERN_DEBUG, "Cached session key encryption key:\n"); if (ecryptfs_verbosity > 0) @@ -2292,7 +2324,7 @@ write_tag_3_packet(char *dest, size_t *remaining_bytes, } mutex_lock(tfm_mutex); rc = crypto_blkcipher_setkey(desc.tfm, session_key_encryption_key, - crypt_stat->key_size); + auth_tok->token.password.session_key_encryption_key_bytes); if (rc < 0) { mutex_unlock(tfm_mutex); ecryptfs_printk(KERN_ERR, "Error setting key for crypto " @@ -2301,7 +2333,9 @@ write_tag_3_packet(char *dest, size_t *remaining_bytes, } rc = 0; ecryptfs_printk(KERN_DEBUG, "Encrypting [%zd] bytes of the key\n", - crypt_stat->key_size); + crypt_stat->key_size); + ecryptfs_printk(KERN_DEBUG, "Encrypting [%zd] bytes of the salt key\n", + ecryptfs_get_salt_size_for_cipher(crypt_stat)); rc = crypto_blkcipher_encrypt(&desc, dst_sg, src_sg, (*key_rec).enc_key_size); mutex_unlock(tfm_mutex); @@ -2350,8 +2384,10 @@ encrypted_session_key_set: dest[(*packet_size)++] = 0x04; /* version 4 */ /* TODO: Break from RFC2440 so that arbitrary ciphers can be * specified with strings */ - cipher_code = ecryptfs_code_for_cipher_string(crypt_stat->cipher, - crypt_stat->key_size); + cipher_code = ecryptfs_code_for_cipher_string( + ecryptfs_get_full_cipher(crypt_stat->cipher, + crypt_stat->cipher_mode, final, sizeof(final)), + crypt_stat->key_size); if (cipher_code == 0) { ecryptfs_printk(KERN_WARNING, "Unable to generate code for " "cipher [%s]\n", crypt_stat->cipher); diff --git a/fs/ecryptfs/main.c b/fs/ecryptfs/main.c index cd2a3199a814..7d63228b9657 100644 --- a/fs/ecryptfs/main.c +++ b/fs/ecryptfs/main.c @@ -156,16 +156,41 @@ int ecryptfs_get_lower_file(struct dentry *dentry, struct inode *inode) void ecryptfs_put_lower_file(struct inode *inode) { + int ret = 0; struct ecryptfs_inode_info *inode_info; + bool clear_cache_needed = false; inode_info = ecryptfs_inode_to_private(inode); if (atomic_dec_and_mutex_lock(&inode_info->lower_file_count, &inode_info->lower_file_mutex)) { + + if (get_events() && get_events()->is_hw_crypt_cb && + get_events()->is_hw_crypt_cb()) + clear_cache_needed = true; + filemap_write_and_wait(inode->i_mapping); + if (clear_cache_needed) { + ret = vfs_fsync(inode_info->lower_file, false); + + if (ret) + pr_err("failed to sync file ret = %d.\n", ret); + } fput(inode_info->lower_file); inode_info->lower_file = NULL; mutex_unlock(&inode_info->lower_file_mutex); + + if (clear_cache_needed) { + truncate_inode_pages_fill_zero(inode->i_mapping, 0); + truncate_inode_pages_fill_zero( + ecryptfs_inode_to_lower(inode)->i_mapping, 0); + } + + if (get_events() && get_events()->release_cb) + get_events()->release_cb( + ecryptfs_inode_to_lower(inode)); } + + } enum { ecryptfs_opt_sig, ecryptfs_opt_ecryptfs_sig, @@ -280,6 +305,7 @@ static int ecryptfs_parse_options(struct ecryptfs_sb_info *sbi, char *options, char *cipher_key_bytes_src; char *fn_cipher_key_bytes_src; u8 cipher_code; + unsigned char final[2*ECRYPTFS_MAX_CIPHER_NAME_SIZE+1]; *check_ruid = 0; @@ -309,12 +335,14 @@ static int ecryptfs_parse_options(struct ecryptfs_sb_info *sbi, char *options, case ecryptfs_opt_ecryptfs_cipher: cipher_name_src = args[0].from; cipher_name_dst = - mount_crypt_stat-> - global_default_cipher_name; - strncpy(cipher_name_dst, cipher_name_src, - ECRYPTFS_MAX_CIPHER_NAME_SIZE); - cipher_name_dst[ECRYPTFS_MAX_CIPHER_NAME_SIZE] = '\0'; + mount_crypt_stat->global_default_cipher_name; + + ecryptfs_parse_full_cipher(cipher_name_src, + mount_crypt_stat->global_default_cipher_name, + mount_crypt_stat->global_default_cipher_mode); + cipher_name_set = 1; + break; case ecryptfs_opt_ecryptfs_key_bytes: cipher_key_bytes_src = args[0].from; @@ -411,24 +439,35 @@ static int ecryptfs_parse_options(struct ecryptfs_sb_info *sbi, char *options, strcpy(mount_crypt_stat->global_default_cipher_name, ECRYPTFS_DEFAULT_CIPHER); } + if ((mount_crypt_stat->flags & ECRYPTFS_GLOBAL_ENCRYPT_FILENAMES) && !fn_cipher_name_set) strcpy(mount_crypt_stat->global_default_fn_cipher_name, mount_crypt_stat->global_default_cipher_name); + if (!cipher_key_bytes_set) mount_crypt_stat->global_default_cipher_key_size = 0; + if ((mount_crypt_stat->flags & ECRYPTFS_GLOBAL_ENCRYPT_FILENAMES) && !fn_cipher_key_bytes_set) mount_crypt_stat->global_default_fn_cipher_key_bytes = mount_crypt_stat->global_default_cipher_key_size; cipher_code = ecryptfs_code_for_cipher_string( - mount_crypt_stat->global_default_cipher_name, + ecryptfs_get_full_cipher( + mount_crypt_stat->global_default_cipher_name, + mount_crypt_stat->global_default_cipher_mode, + final, sizeof(final)), mount_crypt_stat->global_default_cipher_key_size); if (!cipher_code) { - ecryptfs_printk(KERN_ERR, - "eCryptfs doesn't support cipher: %s", - mount_crypt_stat->global_default_cipher_name); + ecryptfs_printk( + KERN_ERR, + "eCryptfs doesn't support cipher: %s and key size %zu", + ecryptfs_get_full_cipher( + mount_crypt_stat->global_default_cipher_name, + mount_crypt_stat->global_default_cipher_mode, + final, sizeof(final)), + mount_crypt_stat->global_default_cipher_key_size); rc = -EINVAL; goto out; } @@ -488,6 +527,7 @@ static struct file_system_type ecryptfs_fs_type; * @dev_name: The path to mount over * @raw_data: The options passed into the kernel */ + static struct dentry *ecryptfs_mount(struct file_system_type *fs_type, int flags, const char *dev_name, void *raw_data) { @@ -563,6 +603,11 @@ static struct dentry *ecryptfs_mount(struct file_system_type *fs_type, int flags ecryptfs_set_superblock_lower(s, path.dentry->d_sb); + + if (get_events() && get_events()->is_hw_crypt_cb && + get_events()->is_hw_crypt_cb()) + drop_pagecache_sb(ecryptfs_superblock_to_lower(s), 0); + /** * Set the POSIX ACL flag based on whether they're enabled in the lower * mount. @@ -901,6 +946,7 @@ static void __exit ecryptfs_exit(void) do_sysfs_unregistration(); unregister_filesystem(&ecryptfs_fs_type); ecryptfs_free_kmem_caches(); + ecryptfs_free_events(); } MODULE_AUTHOR("Michael A. Halcrow <mhalcrow@us.ibm.com>"); diff --git a/fs/ecryptfs/mmap.c b/fs/ecryptfs/mmap.c index caba848ac763..bdbc72d52438 100644 --- a/fs/ecryptfs/mmap.c +++ b/fs/ecryptfs/mmap.c @@ -552,10 +552,16 @@ static sector_t ecryptfs_bmap(struct address_space *mapping, sector_t block) return rc; } +void ecryptfs_freepage(struct page *page) +{ + zero_user(page, 0, PAGE_CACHE_SIZE); +} + const struct address_space_operations ecryptfs_aops = { .writepage = ecryptfs_writepage, .readpage = ecryptfs_readpage, .write_begin = ecryptfs_write_begin, .write_end = ecryptfs_write_end, .bmap = ecryptfs_bmap, + .freepage = ecryptfs_freepage, }; diff --git a/fs/ecryptfs/super.c b/fs/ecryptfs/super.c index afa1b81c3418..25e436ddcf8e 100644 --- a/fs/ecryptfs/super.c +++ b/fs/ecryptfs/super.c @@ -69,6 +69,9 @@ static void ecryptfs_i_callback(struct rcu_head *head) { struct inode *inode = container_of(head, struct inode, i_rcu); struct ecryptfs_inode_info *inode_info; + if (inode == NULL) + return; + inode_info = ecryptfs_inode_to_private(inode); kmem_cache_free(ecryptfs_inode_info_cache, inode_info); @@ -88,9 +91,12 @@ static void ecryptfs_destroy_inode(struct inode *inode) struct ecryptfs_inode_info *inode_info; inode_info = ecryptfs_inode_to_private(inode); + BUG_ON(inode_info->lower_file); + ecryptfs_destroy_crypt_stat(&inode_info->crypt_stat); call_rcu(&inode->i_rcu, ecryptfs_i_callback); + } /** @@ -149,6 +155,9 @@ static int ecryptfs_show_options(struct seq_file *m, struct dentry *root) struct ecryptfs_mount_crypt_stat *mount_crypt_stat = &ecryptfs_superblock_to_private(sb)->mount_crypt_stat; struct ecryptfs_global_auth_tok *walker; + unsigned char final[2*ECRYPTFS_MAX_CIPHER_NAME_SIZE+1]; + + memset(final, 0, sizeof(final)); mutex_lock(&mount_crypt_stat->global_auth_tok_list_mutex); list_for_each_entry(walker, @@ -162,7 +171,10 @@ static int ecryptfs_show_options(struct seq_file *m, struct dentry *root) mutex_unlock(&mount_crypt_stat->global_auth_tok_list_mutex); seq_printf(m, ",ecryptfs_cipher=%s", - mount_crypt_stat->global_default_cipher_name); + ecryptfs_get_full_cipher( + mount_crypt_stat->global_default_cipher_name, + mount_crypt_stat->global_default_cipher_mode, + final, sizeof(final))); if (mount_crypt_stat->global_default_cipher_key_size) seq_printf(m, ",ecryptfs_key_bytes=%zd", diff --git a/fs/eventpoll.c b/fs/eventpoll.c index e6a26429d70c..b484d4500687 100644 --- a/fs/eventpoll.c +++ b/fs/eventpoll.c @@ -1235,14 +1235,22 @@ static int ep_create_wakeup_source(struct epitem *epi) { struct name_snapshot n; struct wakeup_source *ws; + char task_comm_buf[TASK_COMM_LEN]; + char buf[64]; + + get_task_comm(task_comm_buf, current); if (!epi->ep->ws) { - epi->ep->ws = wakeup_source_register("eventpoll"); + snprintf(buf, sizeof(buf), "epoll_%.*s_epollfd", + (int)sizeof(task_comm_buf), task_comm_buf); + epi->ep->ws = wakeup_source_register(buf); if (!epi->ep->ws) return -ENOMEM; } take_dentry_name_snapshot(&n, epi->ffd.file->f_path.dentry); + snprintf(buf, sizeof(buf), "epoll_%.*s_file:%s", + (int)sizeof(task_comm_buf), task_comm_buf, n.name); ws = wakeup_source_register(n.name); release_dentry_name_snapshot(&n); diff --git a/fs/ext2/namei.c b/fs/ext2/namei.c index da3d40ef1668..3267a80dbbe2 100644 --- a/fs/ext2/namei.c +++ b/fs/ext2/namei.c @@ -40,7 +40,8 @@ static inline int ext2_add_nondir(struct dentry *dentry, struct inode *inode) { int err = ext2_add_link(dentry, inode); if (!err) { - d_instantiate_new(dentry, inode); + unlock_new_inode(inode); + d_instantiate(dentry, inode); return 0; } inode_dec_link_count(inode); @@ -266,7 +267,8 @@ static int ext2_mkdir(struct inode * dir, struct dentry * dentry, umode_t mode) if (err) goto out_fail; - d_instantiate_new(dentry, inode); + unlock_new_inode(inode); + d_instantiate(dentry, inode); out: return err; diff --git a/fs/ext4/Kconfig b/fs/ext4/Kconfig index 3c8293215603..ebaff5ab93da 100644 --- a/fs/ext4/Kconfig +++ b/fs/ext4/Kconfig @@ -118,10 +118,16 @@ config EXT4_ENCRYPTION decrypted pages in the page cache. config EXT4_FS_ENCRYPTION - bool - default y + bool "Ext4 FS Encryption" + default n depends on EXT4_ENCRYPTION +config EXT4_FS_ICE_ENCRYPTION + bool "Ext4 Encryption with ICE support" + default n + depends on EXT4_FS_ENCRYPTION + depends on PFK + config EXT4_DEBUG bool "EXT4 debugging support" depends on EXT4_FS diff --git a/fs/ext4/Makefile b/fs/ext4/Makefile index f52cf54f0cbc..1cabbd9a9229 100644 --- a/fs/ext4/Makefile +++ b/fs/ext4/Makefile @@ -14,3 +14,5 @@ ext4-$(CONFIG_EXT4_FS_POSIX_ACL) += acl.o ext4-$(CONFIG_EXT4_FS_SECURITY) += xattr_security.o ext4-$(CONFIG_EXT4_FS_ENCRYPTION) += crypto_policy.o crypto.o \ crypto_key.o crypto_fname.o + +ext4-$(CONFIG_EXT4_FS_ICE_ENCRYPTION) += ext4_ice.o diff --git a/fs/ext4/crypto.c b/fs/ext4/crypto.c index f6096ee77662..f5099a3386ec 100644 --- a/fs/ext4/crypto.c +++ b/fs/ext4/crypto.c @@ -389,14 +389,12 @@ int ext4_decrypt(struct page *page) page->index, page, page, GFP_NOFS); } -int ext4_encrypted_zeroout(struct inode *inode, struct ext4_extent *ex) +int ext4_encrypted_zeroout(struct inode *inode, ext4_lblk_t lblk, + ext4_fsblk_t pblk, ext4_lblk_t len) { struct ext4_crypto_ctx *ctx; struct page *ciphertext_page = NULL; struct bio *bio; - ext4_lblk_t lblk = le32_to_cpu(ex->ee_block); - ext4_fsblk_t pblk = ext4_ext_pblock(ex); - unsigned int len = ext4_ext_get_actual_len(ex); int ret, err = 0; #if 0 @@ -457,17 +455,10 @@ errout: return err; } -bool ext4_valid_enc_modes(uint32_t contents_mode, uint32_t filenames_mode) +bool ext4_valid_contents_enc_mode(uint32_t mode) { - if (contents_mode == EXT4_ENCRYPTION_MODE_AES_256_XTS) { - return (filenames_mode == EXT4_ENCRYPTION_MODE_AES_256_CTS || - filenames_mode == EXT4_ENCRYPTION_MODE_AES_256_HEH); - } - - if (contents_mode == EXT4_ENCRYPTION_MODE_SPECK128_256_XTS) - return filenames_mode == EXT4_ENCRYPTION_MODE_SPECK128_256_CTS; - - return false; + return (mode == EXT4_ENCRYPTION_MODE_AES_256_XTS || + mode == EXT4_ENCRYPTION_MODE_PRIVATE); } /** diff --git a/fs/ext4/crypto_fname.c b/fs/ext4/crypto_fname.c index 5e5afb6ef71a..026716bdbbfc 100644 --- a/fs/ext4/crypto_fname.c +++ b/fs/ext4/crypto_fname.c @@ -42,6 +42,12 @@ static void ext4_dir_crypt_complete(struct crypto_async_request *req, int res) complete(&ecr->completion); } +bool ext4_valid_filenames_enc_mode(uint32_t mode) +{ + return (mode == EXT4_ENCRYPTION_MODE_AES_256_CTS || + mode == EXT4_ENCRYPTION_MODE_AES_256_HEH); +} + static unsigned max_name_len(struct inode *inode) { return S_ISLNK(inode->i_mode) ? inode->i_sb->s_blocksize : diff --git a/fs/ext4/crypto_key.c b/fs/ext4/crypto_key.c index 68225223ffd8..d3d6b28ce9b9 100644 --- a/fs/ext4/crypto_key.c +++ b/fs/ext4/crypto_key.c @@ -15,6 +15,7 @@ #include <uapi/linux/keyctl.h> #include "ext4.h" +#include "ext4_ice.h" #include "xattr.h" static void derive_crypt_complete(struct crypto_async_request *req, int rc) @@ -173,6 +174,8 @@ void ext4_free_crypt_info(struct ext4_crypt_info *ci) if (!ci) return; + if (ci->ci_keyring_key) + key_put(ci->ci_keyring_key); crypto_free_ablkcipher(ci->ci_ctfm); kmem_cache_free(ext4_crypt_info_cachep, ci); } @@ -194,7 +197,13 @@ void ext4_free_encryption_info(struct inode *inode, ext4_free_crypt_info(ci); } -int ext4_get_encryption_info(struct inode *inode) +static int ext4_default_data_encryption_mode(void) +{ + return ext4_is_ice_enabled() ? EXT4_ENCRYPTION_MODE_PRIVATE : + EXT4_ENCRYPTION_MODE_AES_256_XTS; +} + +int _ext4_get_encryption_info(struct inode *inode) { struct ext4_inode_info *ei = EXT4_I(inode); struct ext4_crypt_info *crypt_info; @@ -207,24 +216,32 @@ int ext4_get_encryption_info(struct inode *inode) struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb); struct crypto_ablkcipher *ctfm; const char *cipher_str; - char raw_key[EXT4_MAX_KEY_SIZE]; - char mode; + int for_fname = 0; + int mode; int res; - if (ei->i_crypt_info) - return 0; - res = ext4_init_crypto(); if (res) return res; +retry: + crypt_info = ACCESS_ONCE(ei->i_crypt_info); + if (crypt_info) { + if (!crypt_info->ci_keyring_key || + key_validate(crypt_info->ci_keyring_key) == 0) + return 0; + ext4_free_encryption_info(inode, crypt_info); + goto retry; + } + res = ext4_xattr_get(inode, EXT4_XATTR_INDEX_ENCRYPTION, EXT4_XATTR_NAME_ENCRYPTION_CONTEXT, &ctx, sizeof(ctx)); if (res < 0) { if (!DUMMY_ENCRYPTION_ENABLED(sbi)) return res; - ctx.contents_encryption_mode = EXT4_ENCRYPTION_MODE_AES_256_XTS; + ctx.contents_encryption_mode = + ext4_default_data_encryption_mode(); ctx.filenames_encryption_mode = EXT4_ENCRYPTION_MODE_AES_256_CTS; ctx.flags = 0; @@ -240,14 +257,15 @@ int ext4_get_encryption_info(struct inode *inode) crypt_info->ci_data_mode = ctx.contents_encryption_mode; crypt_info->ci_filename_mode = ctx.filenames_encryption_mode; crypt_info->ci_ctfm = NULL; + crypt_info->ci_keyring_key = NULL; memcpy(crypt_info->ci_master_key, ctx.master_key_descriptor, sizeof(crypt_info->ci_master_key)); - if (S_ISREG(inode->i_mode)) - mode = crypt_info->ci_data_mode; - else if (S_ISDIR(inode->i_mode) || S_ISLNK(inode->i_mode)) - mode = crypt_info->ci_filename_mode; - else + if (S_ISDIR(inode->i_mode) || S_ISLNK(inode->i_mode)) + for_fname = 1; + else if (!S_ISREG(inode->i_mode)) BUG(); + mode = for_fname ? crypt_info->ci_filename_mode : + crypt_info->ci_data_mode; switch (mode) { case EXT4_ENCRYPTION_MODE_AES_256_XTS: cipher_str = "xts(aes)"; @@ -255,15 +273,11 @@ int ext4_get_encryption_info(struct inode *inode) case EXT4_ENCRYPTION_MODE_AES_256_CTS: cipher_str = "cts(cbc(aes))"; break; + case EXT4_ENCRYPTION_MODE_PRIVATE: + cipher_str = "bugon"; case EXT4_ENCRYPTION_MODE_AES_256_HEH: cipher_str = "heh(aes)"; break; - case EXT4_ENCRYPTION_MODE_SPECK128_256_XTS: - cipher_str = "xts(speck128)"; - break; - case EXT4_ENCRYPTION_MODE_SPECK128_256_CTS: - cipher_str = "cts(cbc(speck128))"; - break; default: printk_once(KERN_WARNING "ext4: unsupported key mode %d (ino %u)\n", @@ -272,7 +286,7 @@ int ext4_get_encryption_info(struct inode *inode) goto out; } if (DUMMY_ENCRYPTION_ENABLED(sbi)) { - memset(raw_key, 0x42, EXT4_AES_256_XTS_KEY_SIZE); + memset(crypt_info->ci_raw_key, 0x42, EXT4_AES_256_XTS_KEY_SIZE); goto got_key; } memcpy(full_key_descriptor, EXT4_KEY_DESC_PREFIX, @@ -288,6 +302,7 @@ int ext4_get_encryption_info(struct inode *inode) keyring_key = NULL; goto out; } + crypt_info->ci_keyring_key = keyring_key; if (keyring_key->type != &key_type_logon) { printk_once(KERN_WARNING "ext4: key type must be logon\n"); @@ -318,36 +333,49 @@ int ext4_get_encryption_info(struct inode *inode) up_read(&keyring_key->sem); goto out; } - res = ext4_derive_key(&ctx, master_key->raw, raw_key); + res = ext4_derive_key(&ctx, master_key->raw, + crypt_info->ci_raw_key); up_read(&keyring_key->sem); if (res) goto out; got_key: - ctfm = crypto_alloc_ablkcipher(cipher_str, 0, 0); - if (!ctfm || IS_ERR(ctfm)) { - res = ctfm ? PTR_ERR(ctfm) : -ENOMEM; - printk(KERN_DEBUG - "%s: error %d (inode %u) allocating crypto tfm\n", - __func__, res, (unsigned) inode->i_ino); + if (for_fname || + (crypt_info->ci_data_mode != EXT4_ENCRYPTION_MODE_PRIVATE)) { + ctfm = crypto_alloc_ablkcipher(cipher_str, 0, 0); + if (!ctfm || IS_ERR(ctfm)) { + res = ctfm ? PTR_ERR(ctfm) : -ENOMEM; + pr_debug("%s: error %d (inode %u) allocating crypto tfm\n", + __func__, res, (unsigned) inode->i_ino); + goto out; + } + crypt_info->ci_ctfm = ctfm; + crypto_ablkcipher_clear_flags(ctfm, ~0); + crypto_tfm_set_flags(crypto_ablkcipher_tfm(ctfm), + CRYPTO_TFM_REQ_WEAK_KEY); + res = crypto_ablkcipher_setkey(ctfm, crypt_info->ci_raw_key, + ext4_encryption_key_size(mode)); + if (res) + goto out; + memzero_explicit(crypt_info->ci_raw_key, + sizeof(crypt_info->ci_raw_key)); + } else if (!ext4_is_ice_enabled()) { + pr_warn("%s: ICE support not available\n", + __func__); + res = -EINVAL; goto out; } - crypt_info->ci_ctfm = ctfm; - crypto_ablkcipher_clear_flags(ctfm, ~0); - crypto_tfm_set_flags(crypto_ablkcipher_tfm(ctfm), - CRYPTO_TFM_REQ_WEAK_KEY); - res = crypto_ablkcipher_setkey(ctfm, raw_key, - ext4_encryption_key_size(mode)); - if (res) - goto out; + if (cmpxchg(&ei->i_crypt_info, NULL, crypt_info) != NULL) { + ext4_free_crypt_info(crypt_info); + goto retry; + } + return 0; - if (cmpxchg(&ei->i_crypt_info, NULL, crypt_info) == NULL) - crypt_info = NULL; out: if (res == -ENOKEY) res = 0; - key_put(keyring_key); + memzero_explicit(crypt_info->ci_raw_key, + sizeof(crypt_info->ci_raw_key)); ext4_free_crypt_info(crypt_info); - memzero_explicit(raw_key, sizeof(raw_key)); return res; } diff --git a/fs/ext4/crypto_policy.c b/fs/ext4/crypto_policy.c index d4b6f3c06ff0..77bd7bfb6329 100644 --- a/fs/ext4/crypto_policy.c +++ b/fs/ext4/crypto_policy.c @@ -60,12 +60,16 @@ static int ext4_create_encryption_context_from_policy( ctx.format = EXT4_ENCRYPTION_CONTEXT_FORMAT_V1; memcpy(ctx.master_key_descriptor, policy->master_key_descriptor, EXT4_KEY_DESCRIPTOR_SIZE); - if (!ext4_valid_enc_modes(policy->contents_encryption_mode, - policy->filenames_encryption_mode)) { + if (!ext4_valid_contents_enc_mode(policy->contents_encryption_mode)) { printk(KERN_WARNING - "%s: Invalid encryption modes (contents %d, filenames %d)\n", - __func__, policy->contents_encryption_mode, - policy->filenames_encryption_mode); + "%s: Invalid contents encryption mode %d\n", __func__, + policy->contents_encryption_mode); + return -EINVAL; + } + if (!ext4_valid_filenames_enc_mode(policy->filenames_encryption_mode)) { + printk(KERN_WARNING + "%s: Invalid filenames encryption mode %d\n", __func__, + policy->filenames_encryption_mode); return -EINVAL; } if (policy->flags & ~EXT4_POLICY_FLAGS_VALID) diff --git a/fs/ext4/dir.c b/fs/ext4/dir.c index 7b626e942987..3dc54352c9e7 100644 --- a/fs/ext4/dir.c +++ b/fs/ext4/dir.c @@ -168,8 +168,11 @@ static int ext4_readdir(struct file *file, struct dir_context *ctx) index, 1); file->f_ra.prev_pos = (loff_t)index << PAGE_CACHE_SHIFT; bh = ext4_bread(NULL, inode, map.m_lblk, 0); - if (IS_ERR(bh)) - return PTR_ERR(bh); + if (IS_ERR(bh)) { + err = PTR_ERR(bh); + bh = NULL; + goto errout; + } } if (!bh) { diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h index 6998e3eaab4c..969beec3ff7e 100644 --- a/fs/ext4/ext4.h +++ b/fs/ext4/ext4.h @@ -589,8 +589,7 @@ enum { #define EXT4_ENCRYPTION_MODE_AES_256_GCM 2 #define EXT4_ENCRYPTION_MODE_AES_256_CBC 3 #define EXT4_ENCRYPTION_MODE_AES_256_CTS 4 -#define EXT4_ENCRYPTION_MODE_SPECK128_256_XTS 7 -#define EXT4_ENCRYPTION_MODE_SPECK128_256_CTS 8 +#define EXT4_ENCRYPTION_MODE_PRIVATE 127 #define EXT4_ENCRYPTION_MODE_AES_256_HEH 126 #include "ext4_crypto.h" @@ -2273,7 +2272,7 @@ int ext4_get_policy(struct inode *inode, /* crypto.c */ extern struct kmem_cache *ext4_crypt_info_cachep; -bool ext4_valid_enc_modes(uint32_t contents_mode, uint32_t filenames_mode); +bool ext4_valid_contents_enc_mode(uint32_t mode); uint32_t ext4_validate_encryption_key_size(uint32_t mode, uint32_t size); extern struct workqueue_struct *ext4_read_workqueue; struct ext4_crypto_ctx *ext4_get_crypto_ctx(struct inode *inode, @@ -2284,7 +2283,8 @@ struct page *ext4_encrypt(struct inode *inode, struct page *plaintext_page, gfp_t gfp_flags); int ext4_decrypt(struct page *page); -int ext4_encrypted_zeroout(struct inode *inode, struct ext4_extent *ex); +int ext4_encrypted_zeroout(struct inode *inode, ext4_lblk_t lblk, + ext4_fsblk_t pblk, ext4_lblk_t len); extern const struct dentry_operations ext4_encrypted_d_ops; #ifdef CONFIG_EXT4_FS_ENCRYPTION @@ -2304,6 +2304,7 @@ static inline int ext4_sb_has_crypto(struct super_block *sb) #endif /* crypto_fname.c */ +bool ext4_valid_filenames_enc_mode(uint32_t mode); u32 ext4_fname_crypto_round_up(u32 size, u32 blksize); unsigned ext4_fname_encrypted_size(struct inode *inode, u32 ilen); int ext4_fname_crypto_alloc_buffer(struct inode *inode, @@ -2347,17 +2348,37 @@ static inline void ext4_fname_free_filename(struct ext4_filename *fname) { } /* crypto_key.c */ void ext4_free_crypt_info(struct ext4_crypt_info *ci); void ext4_free_encryption_info(struct inode *inode, struct ext4_crypt_info *ci); +int _ext4_get_encryption_info(struct inode *inode); #ifdef CONFIG_EXT4_FS_ENCRYPTION int ext4_has_encryption_key(struct inode *inode); -int ext4_get_encryption_info(struct inode *inode); +static inline int ext4_get_encryption_info(struct inode *inode) +{ + struct ext4_crypt_info *ci = EXT4_I(inode)->i_crypt_info; + + if (!ci || + (ci->ci_keyring_key && + (ci->ci_keyring_key->flags & ((1 << KEY_FLAG_INVALIDATED) | + (1 << KEY_FLAG_REVOKED) | + (1 << KEY_FLAG_DEAD))))) + return _ext4_get_encryption_info(inode); + return 0; +} static inline struct ext4_crypt_info *ext4_encryption_info(struct inode *inode) { return EXT4_I(inode)->i_crypt_info; } +static inline int ext4_using_hardware_encryption(struct inode *inode) +{ + struct ext4_crypt_info *ci = ext4_encryption_info(inode); + + return S_ISREG(inode->i_mode) && ci && + ci->ci_data_mode == EXT4_ENCRYPTION_MODE_PRIVATE; +} + #else static inline int ext4_has_encryption_key(struct inode *inode) { @@ -2371,6 +2392,10 @@ static inline struct ext4_crypt_info *ext4_encryption_info(struct inode *inode) { return NULL; } +static inline int ext4_using_hardware_encryption(struct inode *inode) +{ + return 0; +} #endif @@ -2542,6 +2567,8 @@ extern int ext4_filemap_fault(struct vm_area_struct *vma, struct vm_fault *vmf); extern qsize_t *ext4_get_reserved_space(struct inode *inode); extern void ext4_da_update_reserve_space(struct inode *inode, int used, int quota_claim); +extern int ext4_issue_zeroout(struct inode *inode, ext4_lblk_t lblk, + ext4_fsblk_t pblk, ext4_lblk_t len); /* indirect.c */ extern int ext4_ind_map_blocks(handle_t *handle, struct inode *inode, @@ -3045,8 +3072,7 @@ extern int ext4_da_write_inline_data_end(struct inode *inode, loff_t pos, struct page *page); extern int ext4_try_add_inline_entry(handle_t *handle, struct ext4_filename *fname, - struct dentry *dentry, - struct inode *inode); + struct inode *dir, struct inode *inode); extern int ext4_try_create_inline_dir(handle_t *handle, struct inode *parent, struct inode *inode); diff --git a/fs/ext4/ext4_crypto.h b/fs/ext4/ext4_crypto.h index f7ba3be9d7ac..e28cc5aab04a 100644 --- a/fs/ext4/ext4_crypto.h +++ b/fs/ext4/ext4_crypto.h @@ -12,6 +12,7 @@ #define _EXT4_CRYPTO_H #include <linux/fs.h> +#include <linux/pfk.h> #define EXT4_KEY_DESCRIPTOR_SIZE 8 @@ -63,6 +64,7 @@ struct ext4_encryption_context { #define EXT4_AES_256_CTS_KEY_SIZE 32 #define EXT4_AES_256_HEH_KEY_SIZE 32 #define EXT4_AES_256_XTS_KEY_SIZE 64 +#define EXT4_PRIVATE_KEY_SIZE 64 #define EXT4_MAX_KEY_SIZE 64 #define EXT4_KEY_DESC_PREFIX "ext4:" @@ -80,9 +82,13 @@ struct ext4_crypt_info { char ci_filename_mode; char ci_flags; struct crypto_ablkcipher *ci_ctfm; + struct key *ci_keyring_key; char ci_master_key[EXT4_KEY_DESCRIPTOR_SIZE]; + char ci_raw_key[EXT4_MAX_KEY_SIZE]; }; + + #define EXT4_CTX_REQUIRES_FREE_ENCRYPT_FL 0x00000001 #define EXT4_WRITE_PATH_FL 0x00000002 @@ -115,6 +121,7 @@ static inline int ext4_encryption_key_size(int mode) { switch (mode) { case EXT4_ENCRYPTION_MODE_AES_256_XTS: + case EXT4_ENCRYPTION_MODE_PRIVATE: return EXT4_AES_256_XTS_KEY_SIZE; case EXT4_ENCRYPTION_MODE_AES_256_GCM: return EXT4_AES_256_GCM_KEY_SIZE; @@ -124,10 +131,6 @@ static inline int ext4_encryption_key_size(int mode) return EXT4_AES_256_CTS_KEY_SIZE; case EXT4_ENCRYPTION_MODE_AES_256_HEH: return EXT4_AES_256_HEH_KEY_SIZE; - case EXT4_ENCRYPTION_MODE_SPECK128_256_XTS: - return 64; - case EXT4_ENCRYPTION_MODE_SPECK128_256_CTS: - return 32; default: BUG(); } diff --git a/fs/ext4/ext4_ice.c b/fs/ext4/ext4_ice.c new file mode 100644 index 000000000000..d85bcb8ea1ba --- /dev/null +++ b/fs/ext4/ext4_ice.c @@ -0,0 +1,109 @@ +/* Copyright (c) 2016, The Linux Foundation. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 and + * only version 2 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ + +#include "ext4_ice.h" +#include "ext4_crypto.h" + + +/* + * Retrieves encryption key from the inode + */ +char *ext4_get_ice_encryption_key(const struct inode *inode) +{ + struct ext4_crypt_info *ci = NULL; + + if (!inode) + return NULL; + + ci = ext4_encryption_info((struct inode *)inode); + if (!ci) + return NULL; + + return &(ci->ci_raw_key[0]); +} + +/* + * Retrieves encryption salt from the inode + */ +char *ext4_get_ice_encryption_salt(const struct inode *inode) +{ + struct ext4_crypt_info *ci = NULL; + + if (!inode) + return NULL; + + ci = ext4_encryption_info((struct inode *)inode); + if (!ci) + return NULL; + + return &(ci->ci_raw_key[ext4_get_ice_encryption_key_size(inode)]); +} + +/* + * returns true if the cipher mode in inode is AES XTS + */ +int ext4_is_aes_xts_cipher(const struct inode *inode) +{ + struct ext4_crypt_info *ci = NULL; + + ci = ext4_encryption_info((struct inode *)inode); + if (!ci) + return 0; + + return (ci->ci_data_mode == EXT4_ENCRYPTION_MODE_PRIVATE); +} + +/* + * returns true if encryption info in both inodes is equal + */ +int ext4_is_ice_encryption_info_equal(const struct inode *inode1, + const struct inode *inode2) +{ + char *key1 = NULL; + char *key2 = NULL; + char *salt1 = NULL; + char *salt2 = NULL; + + if (!inode1 || !inode2) + return 0; + + if (inode1 == inode2) + return 1; + + /* both do not belong to ice, so we don't care, they are equal for us */ + if (!ext4_should_be_processed_by_ice(inode1) && + !ext4_should_be_processed_by_ice(inode2)) + return 1; + + /* one belongs to ice, the other does not -> not equal */ + if (ext4_should_be_processed_by_ice(inode1) ^ + ext4_should_be_processed_by_ice(inode2)) + return 0; + + key1 = ext4_get_ice_encryption_key(inode1); + key2 = ext4_get_ice_encryption_key(inode2); + salt1 = ext4_get_ice_encryption_salt(inode1); + salt2 = ext4_get_ice_encryption_salt(inode2); + + /* key and salt should not be null by this point */ + if (!key1 || !key2 || !salt1 || !salt2 || + (ext4_get_ice_encryption_key_size(inode1) != + ext4_get_ice_encryption_key_size(inode2)) || + (ext4_get_ice_encryption_salt_size(inode1) != + ext4_get_ice_encryption_salt_size(inode2))) + return 0; + + return ((memcmp(key1, key2, + ext4_get_ice_encryption_key_size(inode1)) == 0) && + (memcmp(salt1, salt2, + ext4_get_ice_encryption_salt_size(inode1)) == 0)); +} diff --git a/fs/ext4/ext4_ice.h b/fs/ext4/ext4_ice.h new file mode 100644 index 000000000000..5257edabd6b2 --- /dev/null +++ b/fs/ext4/ext4_ice.h @@ -0,0 +1,104 @@ +/* Copyright (c) 2016, The Linux Foundation. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 and + * only version 2 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ + +#ifndef _EXT4_ICE_H +#define _EXT4_ICE_H + +#include "ext4.h" +#include "ext4_crypto.h" + +#ifdef CONFIG_EXT4_FS_ICE_ENCRYPTION +static inline int ext4_should_be_processed_by_ice(const struct inode *inode) +{ + if (!ext4_encrypted_inode((struct inode *)inode)) + return 0; + + return ext4_using_hardware_encryption((struct inode *)inode); +} + +static inline int ext4_is_ice_enabled(void) +{ + return 1; +} + +int ext4_is_aes_xts_cipher(const struct inode *inode); + +char *ext4_get_ice_encryption_key(const struct inode *inode); +char *ext4_get_ice_encryption_salt(const struct inode *inode); + +int ext4_is_ice_encryption_info_equal(const struct inode *inode1, + const struct inode *inode2); + +static inline size_t ext4_get_ice_encryption_key_size( + const struct inode *inode) +{ + return EXT4_AES_256_XTS_KEY_SIZE / 2; +} + +static inline size_t ext4_get_ice_encryption_salt_size( + const struct inode *inode) +{ + return EXT4_AES_256_XTS_KEY_SIZE / 2; +} + +#else +static inline int ext4_should_be_processed_by_ice(const struct inode *inode) +{ + return 0; +} +static inline int ext4_is_ice_enabled(void) +{ + return 0; +} + +static inline char *ext4_get_ice_encryption_key(const struct inode *inode) +{ + return NULL; +} + +static inline char *ext4_get_ice_encryption_salt(const struct inode *inode) +{ + return NULL; +} + +static inline size_t ext4_get_ice_encryption_key_size( + const struct inode *inode) +{ + return 0; +} + +static inline size_t ext4_get_ice_encryption_salt_size( + const struct inode *inode) +{ + return 0; +} + +static inline int ext4_is_xts_cipher(const struct inode *inode) +{ + return 0; +} + +static inline int ext4_is_ice_encryption_info_equal( + const struct inode *inode1, + const struct inode *inode2) +{ + return 0; +} + +static inline int ext4_is_aes_xts_cipher(const struct inode *inode) +{ + return 0; +} + +#endif + +#endif /* _EXT4_ICE_H */ diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c index 07ae78ba27a1..4feac4bf90f4 100644 --- a/fs/ext4/extents.c +++ b/fs/ext4/extents.c @@ -3156,19 +3156,11 @@ static int ext4_ext_zeroout(struct inode *inode, struct ext4_extent *ex) { ext4_fsblk_t ee_pblock; unsigned int ee_len; - int ret; ee_len = ext4_ext_get_actual_len(ex); ee_pblock = ext4_ext_pblock(ex); - - if (ext4_encrypted_inode(inode)) - return ext4_encrypted_zeroout(inode, ex); - - ret = sb_issue_zeroout(inode->i_sb, ee_pblock, ee_len, GFP_NOFS); - if (ret > 0) - ret = 0; - - return ret; + return ext4_issue_zeroout(inode, le32_to_cpu(ex->ee_block), ee_pblock, + ee_len); } /* diff --git a/fs/ext4/inline.c b/fs/ext4/inline.c index 792649dad953..1016a8ddd3b0 100644 --- a/fs/ext4/inline.c +++ b/fs/ext4/inline.c @@ -1024,12 +1024,11 @@ void ext4_show_inline_dir(struct inode *dir, struct buffer_head *bh, */ static int ext4_add_dirent_to_inline(handle_t *handle, struct ext4_filename *fname, - struct dentry *dentry, + struct inode *dir, struct inode *inode, struct ext4_iloc *iloc, void *inline_start, int inline_size) { - struct inode *dir = d_inode(dentry->d_parent); int err; struct ext4_dir_entry_2 *de; @@ -1273,12 +1272,11 @@ out: * the new created block. */ int ext4_try_add_inline_entry(handle_t *handle, struct ext4_filename *fname, - struct dentry *dentry, struct inode *inode) + struct inode *dir, struct inode *inode) { int ret, inline_size, no_expand; void *inline_start; struct ext4_iloc iloc; - struct inode *dir = d_inode(dentry->d_parent); ret = ext4_get_inode_loc(dir, &iloc); if (ret) @@ -1292,7 +1290,7 @@ int ext4_try_add_inline_entry(handle_t *handle, struct ext4_filename *fname, EXT4_INLINE_DOTDOT_SIZE; inline_size = EXT4_MIN_INLINE_DATA_SIZE - EXT4_INLINE_DOTDOT_SIZE; - ret = ext4_add_dirent_to_inline(handle, fname, dentry, inode, &iloc, + ret = ext4_add_dirent_to_inline(handle, fname, dir, inode, &iloc, inline_start, inline_size); if (ret != -ENOSPC) goto out; @@ -1313,7 +1311,7 @@ int ext4_try_add_inline_entry(handle_t *handle, struct ext4_filename *fname, if (inline_size) { inline_start = ext4_get_inline_xattr_pos(dir, &iloc); - ret = ext4_add_dirent_to_inline(handle, fname, dentry, + ret = ext4_add_dirent_to_inline(handle, fname, dir, inode, &iloc, inline_start, inline_size); diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index 1c27c8993505..4fbede7d7e2f 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c @@ -42,6 +42,7 @@ #include "xattr.h" #include "acl.h" #include "truncate.h" +#include "ext4_ice.h" #include <trace/events/ext4.h> #include <trace/events/android_fs.h> @@ -392,6 +393,21 @@ static int __check_block_validity(struct inode *inode, const char *func, return 0; } +int ext4_issue_zeroout(struct inode *inode, ext4_lblk_t lblk, ext4_fsblk_t pblk, + ext4_lblk_t len) +{ + int ret; + + if (ext4_encrypted_inode(inode)) + return ext4_encrypted_zeroout(inode, lblk, pblk, len); + + ret = sb_issue_zeroout(inode->i_sb, pblk, len, GFP_NOFS); + if (ret > 0) + ret = 0; + + return ret; +} + #define check_block_validity(inode, map) \ __check_block_validity((inode), __func__, __LINE__, (map)) @@ -1002,7 +1018,8 @@ static int ext4_block_write_begin(struct page *page, loff_t pos, unsigned len, ll_rw_block(READ, 1, &bh); *wait_bh++ = bh; decrypt = ext4_encrypted_inode(inode) && - S_ISREG(inode->i_mode); + S_ISREG(inode->i_mode) && + !ext4_is_ice_enabled(); } } /* @@ -2414,8 +2431,8 @@ static int mpage_prepare_extent_to_map(struct mpage_da_data *mpd) mpd->map.m_len = 0; mpd->next_page = index; while (index <= end) { - nr_pages = pagevec_lookup_tag(&pvec, mapping, &index, tag, - min(end - index, (pgoff_t)PAGEVEC_SIZE-1) + 1); + nr_pages = pagevec_lookup_range_tag(&pvec, mapping, &index, end, + tag); if (nr_pages == 0) goto out; @@ -2423,16 +2440,6 @@ static int mpage_prepare_extent_to_map(struct mpage_da_data *mpd) struct page *page = pvec.pages[i]; /* - * At this point, the page may be truncated or - * invalidated (changing page->mapping to NULL), or - * even swizzled back from swapper_space to tmpfs file - * mapping. However, page->index will not change - * because we have a reference on the page. - */ - if (page->index > end) - goto out; - - /* * Accumulated enough dirty pages? This doesn't apply * to WB_SYNC_ALL mode. For integrity sync we have to * keep going because someone may be concurrently @@ -3309,7 +3316,9 @@ static ssize_t ext4_ext_direct_IO(struct kiocb *iocb, struct iov_iter *iter, get_block_func = ext4_get_block_write; dio_flags = DIO_LOCKING; } -#ifdef CONFIG_EXT4_FS_ENCRYPTION +#if defined(CONFIG_EXT4_FS_ENCRYPTION) && \ +!defined(CONFIG_EXT4_FS_ICE_ENCRYPTION) + BUG_ON(ext4_encrypted_inode(inode) && S_ISREG(inode->i_mode)); #endif if (IS_DAX(inode)) @@ -3383,7 +3392,9 @@ static ssize_t ext4_direct_IO(struct kiocb *iocb, struct iov_iter *iter, return 0; } -#ifdef CONFIG_EXT4_FS_ENCRYPTION +#if defined(CONFIG_EXT4_FS_ENCRYPTION) && \ +!defined(CONFIG_EXT4_FS_ICE_ENCRYPTION) + if (ext4_encrypted_inode(inode) && S_ISREG(inode->i_mode)) return 0; #endif @@ -3583,7 +3594,8 @@ static int __ext4_block_zero_page_range(handle_t *handle, if (!buffer_uptodate(bh)) goto unlock; if (S_ISREG(inode->i_mode) && - ext4_encrypted_inode(inode)) { + ext4_encrypted_inode(inode) && + !ext4_using_hardware_encryption(inode)) { /* We expect the key to be set. */ BUG_ON(!ext4_has_encryption_key(inode)); BUG_ON(blocksize != PAGE_CACHE_SIZE); @@ -3756,6 +3768,7 @@ int ext4_update_disksize_before_punch(struct inode *inode, loff_t offset, int ext4_punch_hole(struct inode *inode, loff_t offset, loff_t length) { +#if 0 struct super_block *sb = inode->i_sb; ext4_lblk_t first_block, stop_block; struct address_space *mapping = inode->i_mapping; @@ -3895,6 +3908,12 @@ out_dio: out_mutex: mutex_unlock(&inode->i_mutex); return ret; +#else + /* + * Disabled as per b/28760453 + */ + return -EOPNOTSUPP; +#endif } int ext4_inode_attach_jinode(struct inode *inode) diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c index 8cd2a7e1eef1..313a61626d2d 100644 --- a/fs/ext4/namei.c +++ b/fs/ext4/namei.c @@ -274,7 +274,7 @@ static struct buffer_head * ext4_dx_find_entry(struct inode *dir, struct ext4_filename *fname, struct ext4_dir_entry_2 **res_dir); static int ext4_dx_add_entry(handle_t *handle, struct ext4_filename *fname, - struct dentry *dentry, struct inode *inode); + struct inode *dir, struct inode *inode); /* checksumming functions */ void initialize_dirent_tail(struct ext4_dir_entry_tail *t, @@ -1931,10 +1931,9 @@ static int add_dirent_to_buf(handle_t *handle, struct ext4_filename *fname, * directory, and adds the dentry to the indexed directory. */ static int make_indexed_dir(handle_t *handle, struct ext4_filename *fname, - struct dentry *dentry, + struct inode *dir, struct inode *inode, struct buffer_head *bh) { - struct inode *dir = d_inode(dentry->d_parent); struct buffer_head *bh2; struct dx_root *root; struct dx_frame frames[2], *frame; @@ -2087,8 +2086,7 @@ static int ext4_add_entry(handle_t *handle, struct dentry *dentry, return retval; if (ext4_has_inline_data(dir)) { - retval = ext4_try_add_inline_entry(handle, &fname, - dentry, inode); + retval = ext4_try_add_inline_entry(handle, &fname, dir, inode); if (retval < 0) goto out; if (retval == 1) { @@ -2098,7 +2096,7 @@ static int ext4_add_entry(handle_t *handle, struct dentry *dentry, } if (is_dx(dir)) { - retval = ext4_dx_add_entry(handle, &fname, dentry, inode); + retval = ext4_dx_add_entry(handle, &fname, dir, inode); if (!retval || (retval != ERR_BAD_DX_DIR)) goto out; /* Can we just ignore htree data? */ @@ -2127,7 +2125,7 @@ static int ext4_add_entry(handle_t *handle, struct dentry *dentry, if (blocks == 1 && !dx_fallback && ext4_has_feature_dir_index(sb)) { - retval = make_indexed_dir(handle, &fname, dentry, + retval = make_indexed_dir(handle, &fname, dir, inode, bh); bh = NULL; /* make_indexed_dir releases bh */ goto out; @@ -2162,12 +2160,11 @@ out: * Returns 0 for success, or a negative error value */ static int ext4_dx_add_entry(handle_t *handle, struct ext4_filename *fname, - struct dentry *dentry, struct inode *inode) + struct inode *dir, struct inode *inode) { struct dx_frame frames[2], *frame; struct dx_entry *entries, *at; struct buffer_head *bh; - struct inode *dir = d_inode(dentry->d_parent); struct super_block *sb = dir->i_sb; struct ext4_dir_entry_2 *de; int err; @@ -2418,7 +2415,8 @@ static int ext4_add_nondir(handle_t *handle, int err = ext4_add_entry(handle, dentry, inode); if (!err) { ext4_mark_inode_dirty(handle, inode); - d_instantiate_new(dentry, inode); + unlock_new_inode(inode); + d_instantiate(dentry, inode); return 0; } drop_nlink(inode); @@ -2657,7 +2655,8 @@ out_clear_inode: err = ext4_mark_inode_dirty(handle, dir); if (err) goto out_clear_inode; - d_instantiate_new(dentry, inode); + unlock_new_inode(inode); + d_instantiate(dentry, inode); if (IS_DIRSYNC(dir)) ext4_handle_sync(handle); diff --git a/fs/ext4/page-io.c b/fs/ext4/page-io.c index 6ca56f5f72b5..978141e8b800 100644 --- a/fs/ext4/page-io.c +++ b/fs/ext4/page-io.c @@ -28,6 +28,7 @@ #include "ext4_jbd2.h" #include "xattr.h" #include "acl.h" +#include "ext4_ice.h" static struct kmem_cache *io_end_cachep; @@ -489,7 +490,11 @@ int ext4_bio_write_page(struct ext4_io_submit *io, gfp_t gfp_flags = GFP_NOFS; retry_encrypt: - data_page = ext4_encrypt(inode, page, gfp_flags); + + if (!ext4_using_hardware_encryption(inode)) + data_page = ext4_encrypt(inode, page, gfp_flags); + + if (IS_ERR(data_page)) { ret = PTR_ERR(data_page); if (ret == -ENOMEM && wbc->sync_mode == WB_SYNC_ALL) { diff --git a/fs/ext4/readpage.c b/fs/ext4/readpage.c index 783e33d839cf..99f1bd8c7f05 100644 --- a/fs/ext4/readpage.c +++ b/fs/ext4/readpage.c @@ -45,6 +45,7 @@ #include <linux/cleancache.h> #include "ext4.h" +#include "ext4_ice.h" #include <trace/events/android_fs.h> /* @@ -63,12 +64,17 @@ static void completion_pages(struct work_struct *work) bio_for_each_segment_all(bv, bio, i) { struct page *page = bv->bv_page; - int ret = ext4_decrypt(page); - if (ret) { - WARN_ON_ONCE(1); - SetPageError(page); - } else + if (ext4_is_ice_enabled()) { SetPageUptodate(page); + } else { + int ret = ext4_decrypt(page); + + if (ret) { + WARN_ON_ONCE(1); + SetPageError(page); + } else + SetPageUptodate(page); + } unlock_page(page); } ext4_release_crypto_ctx(ctx); diff --git a/fs/f2fs/acl.c b/fs/f2fs/acl.c index 92bb650b9fa1..3d7b46d82929 100644 --- a/fs/f2fs/acl.c +++ b/fs/f2fs/acl.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0 /* * fs/f2fs/acl.c * @@ -7,10 +8,6 @@ * Portions of this code from linux/fs/ext2/acl.c * * Copyright (C) 2001-2003 Andreas Gruenbacher, <agruen@suse.de> - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. */ #include <linux/f2fs_fs.h> #include "f2fs.h" @@ -53,6 +50,9 @@ static struct posix_acl *f2fs_acl_from_disk(const char *value, size_t size) struct f2fs_acl_entry *entry = (struct f2fs_acl_entry *)(hdr + 1); const char *end = value + size; + if (size < sizeof(struct f2fs_acl_header)) + return ERR_PTR(-EINVAL); + if (hdr->a_version != cpu_to_le32(F2FS_ACL_VERSION)) return ERR_PTR(-EINVAL); @@ -160,7 +160,7 @@ static void *f2fs_acl_to_disk(struct f2fs_sb_info *sbi, return (void *)f2fs_acl; fail: - kfree(f2fs_acl); + kvfree(f2fs_acl); return ERR_PTR(-EINVAL); } @@ -190,7 +190,7 @@ static struct posix_acl *__f2fs_get_acl(struct inode *inode, int type, acl = NULL; else acl = ERR_PTR(retval); - kfree(value); + kvfree(value); if (!IS_ERR(acl)) set_cached_acl(inode, type, acl); @@ -243,7 +243,7 @@ static int __f2fs_set_acl(struct inode *inode, int type, error = f2fs_setxattr(inode, name_index, "", value, size, ipage, 0); - kfree(value); + kvfree(value); if (!error) set_cached_acl(inode, type, acl); @@ -288,7 +288,7 @@ static int f2fs_acl_create_masq(struct posix_acl *acl, umode_t *mode_p) /* assert(atomic_read(acl->a_refcount) == 1); */ FOREACH_ACL_ENTRY(pa, acl, pe) { - switch(pa->e_tag) { + switch (pa->e_tag) { case ACL_USER_OBJ: pa->e_perm &= (mode >> 6) | ~S_IRWXO; mode &= (pa->e_perm << 6) | ~S_IRWXU; @@ -329,7 +329,7 @@ static int f2fs_acl_create_masq(struct posix_acl *acl, umode_t *mode_p) } *mode_p = (*mode_p & ~S_IRWXUGO) | mode; - return not_equiv; + return not_equiv; } static int f2fs_acl_create(struct inode *dir, umode_t *mode, @@ -399,12 +399,16 @@ int f2fs_init_acl(struct inode *inode, struct inode *dir, struct page *ipage, error = __f2fs_set_acl(inode, ACL_TYPE_DEFAULT, default_acl, ipage); posix_acl_release(default_acl); + } else { + inode->i_default_acl = NULL; } if (acl) { if (!error) error = __f2fs_set_acl(inode, ACL_TYPE_ACCESS, acl, ipage); posix_acl_release(acl); + } else { + inode->i_acl = NULL; } return error; diff --git a/fs/f2fs/acl.h b/fs/f2fs/acl.h index 2c685185c24d..b96823c59b15 100644 --- a/fs/f2fs/acl.h +++ b/fs/f2fs/acl.h @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0 /* * fs/f2fs/acl.h * @@ -7,10 +8,6 @@ * Portions of this code from linux/fs/ext2/acl.h * * Copyright (C) 2001-2003 Andreas Gruenbacher, <agruen@suse.de> - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. */ #ifndef __F2FS_ACL_H__ #define __F2FS_ACL_H__ diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c index 494253a253f8..7d7d5dae1500 100644 --- a/fs/f2fs/checkpoint.c +++ b/fs/f2fs/checkpoint.c @@ -1,12 +1,9 @@ +// SPDX-License-Identifier: GPL-2.0 /* * fs/f2fs/checkpoint.c * * Copyright (c) 2012 Samsung Electronics Co., Ltd. * http://www.samsung.com/ - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. */ #include <linux/fs.h> #include <linux/bio.h> @@ -24,10 +21,11 @@ #include <trace/events/f2fs.h> static struct kmem_cache *ino_entry_slab; -struct kmem_cache *inode_entry_slab; +struct kmem_cache *f2fs_inode_entry_slab; void f2fs_stop_checkpoint(struct f2fs_sb_info *sbi, bool end_io) { + f2fs_build_fault_attr(sbi, 0, 0); set_ckpt_flags(sbi, CP_ERROR_FLAG); if (!end_io) f2fs_flush_merged_writes(sbi); @@ -36,7 +34,7 @@ void f2fs_stop_checkpoint(struct f2fs_sb_info *sbi, bool end_io) /* * We guarantee no failure on the returned page. */ -struct page *grab_meta_page(struct f2fs_sb_info *sbi, pgoff_t index) +struct page *f2fs_grab_meta_page(struct f2fs_sb_info *sbi, pgoff_t index) { struct address_space *mapping = META_MAPPING(sbi); struct page *page = NULL; @@ -46,7 +44,7 @@ repeat: cond_resched(); goto repeat; } - f2fs_wait_on_page_writeback(page, META, true); + f2fs_wait_on_page_writeback(page, META, true, true); if (!PageUptodate(page)) SetPageUptodate(page); return page; @@ -68,8 +66,9 @@ static struct page *__get_meta_page(struct f2fs_sb_info *sbi, pgoff_t index, .old_blkaddr = index, .new_blkaddr = index, .encrypted_page = NULL, - .is_meta = is_meta, + .is_por = !is_meta, }; + int err; if (unlikely(!is_meta)) fio.op_flags &= ~REQ_META; @@ -84,9 +83,10 @@ repeat: fio.page = page; - if (f2fs_submit_page_bio(&fio)) { + err = f2fs_submit_page_bio(&fio); + if (err) { f2fs_put_page(page, 1); - goto repeat; + return ERR_PTR(err); } lock_page(page); @@ -95,29 +95,67 @@ repeat: goto repeat; } - /* - * if there is any IO error when accessing device, make our filesystem - * readonly and make sure do not write checkpoint with non-uptodate - * meta page. - */ - if (unlikely(!PageUptodate(page))) - f2fs_stop_checkpoint(sbi, false); + if (unlikely(!PageUptodate(page))) { + f2fs_put_page(page, 1); + return ERR_PTR(-EIO); + } out: return page; } -struct page *get_meta_page(struct f2fs_sb_info *sbi, pgoff_t index) +struct page *f2fs_get_meta_page(struct f2fs_sb_info *sbi, pgoff_t index) { return __get_meta_page(sbi, index, true); } +struct page *f2fs_get_meta_page_nofail(struct f2fs_sb_info *sbi, pgoff_t index) +{ + struct page *page; + int count = 0; + +retry: + page = __get_meta_page(sbi, index, true); + if (IS_ERR(page)) { + if (PTR_ERR(page) == -EIO && + ++count <= DEFAULT_RETRY_IO_COUNT) + goto retry; + f2fs_stop_checkpoint(sbi, false); + } + return page; +} + /* for POR only */ -struct page *get_tmp_page(struct f2fs_sb_info *sbi, pgoff_t index) +struct page *f2fs_get_tmp_page(struct f2fs_sb_info *sbi, pgoff_t index) { return __get_meta_page(sbi, index, false); } -bool is_valid_blkaddr(struct f2fs_sb_info *sbi, block_t blkaddr, int type) +static bool __is_bitmap_valid(struct f2fs_sb_info *sbi, block_t blkaddr, + int type) +{ + struct seg_entry *se; + unsigned int segno, offset; + bool exist; + + if (type != DATA_GENERIC_ENHANCE && type != DATA_GENERIC_ENHANCE_READ) + return true; + + segno = GET_SEGNO(sbi, blkaddr); + offset = GET_BLKOFF_FROM_SEG0(sbi, blkaddr); + se = get_seg_entry(sbi, segno); + + exist = f2fs_test_bit(offset, se->cur_valid_map); + if (!exist && type == DATA_GENERIC_ENHANCE) { + f2fs_err(sbi, "Inconsistent error blkaddr:%u, sit bitmap:%d", + blkaddr, exist); + set_sbi_flag(sbi, SBI_NEED_FSCK); + WARN_ON(1); + } + return exist; +} + +bool f2fs_is_valid_blkaddr(struct f2fs_sb_info *sbi, + block_t blkaddr, int type) { switch (type) { case META_NAT: @@ -141,6 +179,25 @@ bool is_valid_blkaddr(struct f2fs_sb_info *sbi, block_t blkaddr, int type) blkaddr < MAIN_BLKADDR(sbi))) return false; break; + case DATA_GENERIC: + case DATA_GENERIC_ENHANCE: + case DATA_GENERIC_ENHANCE_READ: + if (unlikely(blkaddr >= MAX_BLKADDR(sbi) || + blkaddr < MAIN_BLKADDR(sbi))) { + f2fs_warn(sbi, "access invalid blkaddr:%u", + blkaddr); + set_sbi_flag(sbi, SBI_NEED_FSCK); + WARN_ON(1); + return false; + } else { + return __is_bitmap_valid(sbi, blkaddr, type); + } + break; + case META_GENERIC: + if (unlikely(blkaddr < SEG0_BLKADDR(sbi) || + blkaddr >= MAIN_BLKADDR(sbi))) + return false; + break; default: BUG(); } @@ -151,7 +208,7 @@ bool is_valid_blkaddr(struct f2fs_sb_info *sbi, block_t blkaddr, int type) /* * Readahead CP/NAT/SIT/SSA pages */ -int ra_meta_pages(struct f2fs_sb_info *sbi, block_t start, int nrpages, +int f2fs_ra_meta_pages(struct f2fs_sb_info *sbi, block_t start, int nrpages, int type, bool sync) { struct page *page; @@ -164,7 +221,7 @@ int ra_meta_pages(struct f2fs_sb_info *sbi, block_t start, int nrpages, REQ_RAHEAD, .encrypted_page = NULL, .in_list = false, - .is_meta = (type != META_POR), + .is_por = (type == META_POR), }; struct blk_plug plug; @@ -174,7 +231,7 @@ int ra_meta_pages(struct f2fs_sb_info *sbi, block_t start, int nrpages, blk_start_plug(&plug); for (; nrpages-- > 0; blkno++) { - if (!is_valid_blkaddr(sbi, blkno, type)) + if (!f2fs_is_valid_blkaddr(sbi, blkno, type)) goto out; switch (type) { @@ -220,7 +277,7 @@ out: return blkno - start; } -void ra_meta_pages_cond(struct f2fs_sb_info *sbi, pgoff_t index) +void f2fs_ra_meta_pages_cond(struct f2fs_sb_info *sbi, pgoff_t index) { struct page *page; bool readahead = false; @@ -231,7 +288,7 @@ void ra_meta_pages_cond(struct f2fs_sb_info *sbi, pgoff_t index) f2fs_put_page(page, 0); if (readahead) - ra_meta_pages(sbi, index, BIO_MAX_PAGES, META_POR, true); + f2fs_ra_meta_pages(sbi, index, BIO_MAX_PAGES, META_POR, true); } static int __f2fs_write_meta_page(struct page *page, @@ -242,22 +299,18 @@ static int __f2fs_write_meta_page(struct page *page, trace_f2fs_writepage(page, META); - if (unlikely(f2fs_cp_error(sbi))) { - dec_page_count(sbi, F2FS_DIRTY_META); - unlock_page(page); - return 0; - } + if (unlikely(f2fs_cp_error(sbi))) + goto redirty_out; if (unlikely(is_sbi_flag_set(sbi, SBI_POR_DOING))) goto redirty_out; if (wbc->for_reclaim && page->index < GET_SUM_BLOCK(sbi, 0)) goto redirty_out; - write_meta_page(sbi, page, io_type); + f2fs_do_write_meta_page(sbi, page, io_type); dec_page_count(sbi, F2FS_DIRTY_META); if (wbc->for_reclaim) - f2fs_submit_merged_write_cond(sbi, page->mapping->host, - 0, page->index, META); + f2fs_submit_merged_write_cond(sbi, NULL, page, 0, META); unlock_page(page); @@ -287,8 +340,9 @@ static int f2fs_write_meta_pages(struct address_space *mapping, goto skip_write; /* collect a number of dirty meta pages and write together */ - if (wbc->for_kupdate || - get_pages(sbi, F2FS_DIRTY_META) < nr_pages_to_skip(sbi, META)) + if (wbc->sync_mode != WB_SYNC_ALL && + get_pages(sbi, F2FS_DIRTY_META) < + nr_pages_to_skip(sbi, META)) goto skip_write; /* if locked failed, cp will flush dirty pages instead */ @@ -297,7 +351,7 @@ static int f2fs_write_meta_pages(struct address_space *mapping, trace_f2fs_writepages(mapping->host, wbc, META); diff = nr_pages_to_write(sbi, META, wbc); - written = sync_meta_pages(sbi, META, wbc->nr_to_write, FS_META_IO); + written = f2fs_sync_meta_pages(sbi, META, wbc->nr_to_write, FS_META_IO); mutex_unlock(&sbi->cp_mutex); wbc->nr_to_write = max((long)0, wbc->nr_to_write - written - diff); return 0; @@ -308,13 +362,14 @@ skip_write: return 0; } -long sync_meta_pages(struct f2fs_sb_info *sbi, enum page_type type, +long f2fs_sync_meta_pages(struct f2fs_sb_info *sbi, enum page_type type, long nr_to_write, enum iostat_type io_type) { struct address_space *mapping = META_MAPPING(sbi); - pgoff_t index = 0, end = ULONG_MAX, prev = ULONG_MAX; + pgoff_t index = 0, prev = ULONG_MAX; struct pagevec pvec; long nwritten = 0; + int nr_pages; struct writeback_control wbc = { .for_reclaim = 0, }; @@ -324,13 +379,9 @@ long sync_meta_pages(struct f2fs_sb_info *sbi, enum page_type type, blk_start_plug(&plug); - while (index <= end) { - int i, nr_pages; - nr_pages = pagevec_lookup_tag(&pvec, mapping, &index, - PAGECACHE_TAG_DIRTY, - min(end - index, (pgoff_t)PAGEVEC_SIZE-1) + 1); - if (unlikely(nr_pages == 0)) - break; + while ((nr_pages = pagevec_lookup_tag(&pvec, mapping, &index, + PAGECACHE_TAG_DIRTY))) { + int i; for (i = 0; i < nr_pages; i++) { struct page *page = pvec.pages[i]; @@ -354,9 +405,8 @@ continue_unlock: goto continue_unlock; } - f2fs_wait_on_page_writeback(page, META, true); + f2fs_wait_on_page_writeback(page, META, true, true); - BUG_ON(PageWriteback(page)); if (!clear_page_dirty_for_io(page)) goto continue_unlock; @@ -390,7 +440,7 @@ static int f2fs_set_meta_page_dirty(struct page *page) if (!PageDirty(page)) { __set_page_dirty_nobuffers(page); inc_page_count(F2FS_P_SB(page), F2FS_DIRTY_META); - SetPagePrivate(page); + f2fs_set_page_private(page, 0); f2fs_trace_pid(page); return 1; } @@ -461,20 +511,20 @@ static void __remove_ino_entry(struct f2fs_sb_info *sbi, nid_t ino, int type) spin_unlock(&im->ino_lock); } -void add_ino_entry(struct f2fs_sb_info *sbi, nid_t ino, int type) +void f2fs_add_ino_entry(struct f2fs_sb_info *sbi, nid_t ino, int type) { /* add new dirty ino entry into list */ __add_ino_entry(sbi, ino, 0, type); } -void remove_ino_entry(struct f2fs_sb_info *sbi, nid_t ino, int type) +void f2fs_remove_ino_entry(struct f2fs_sb_info *sbi, nid_t ino, int type) { /* remove dirty ino entry from list */ __remove_ino_entry(sbi, ino, type); } /* mode should be APPEND_INO or UPDATE_INO */ -bool exist_written_data(struct f2fs_sb_info *sbi, nid_t ino, int mode) +bool f2fs_exist_written_data(struct f2fs_sb_info *sbi, nid_t ino, int mode) { struct inode_management *im = &sbi->im[mode]; struct ino_entry *e; @@ -485,7 +535,7 @@ bool exist_written_data(struct f2fs_sb_info *sbi, nid_t ino, int mode) return e ? true : false; } -void release_ino_entry(struct f2fs_sb_info *sbi, bool all) +void f2fs_release_ino_entry(struct f2fs_sb_info *sbi, bool all) { struct ino_entry *e, *tmp; int i; @@ -504,13 +554,13 @@ void release_ino_entry(struct f2fs_sb_info *sbi, bool all) } } -void set_dirty_device(struct f2fs_sb_info *sbi, nid_t ino, +void f2fs_set_dirty_device(struct f2fs_sb_info *sbi, nid_t ino, unsigned int devidx, int type) { __add_ino_entry(sbi, ino, devidx, type); } -bool is_dirty_device(struct f2fs_sb_info *sbi, nid_t ino, +bool f2fs_is_dirty_device(struct f2fs_sb_info *sbi, nid_t ino, unsigned int devidx, int type) { struct inode_management *im = &sbi->im[type]; @@ -525,20 +575,19 @@ bool is_dirty_device(struct f2fs_sb_info *sbi, nid_t ino, return is_dirty; } -int acquire_orphan_inode(struct f2fs_sb_info *sbi) +int f2fs_acquire_orphan_inode(struct f2fs_sb_info *sbi) { struct inode_management *im = &sbi->im[ORPHAN_INO]; int err = 0; spin_lock(&im->ino_lock); -#ifdef CONFIG_F2FS_FAULT_INJECTION if (time_to_inject(sbi, FAULT_ORPHAN)) { spin_unlock(&im->ino_lock); f2fs_show_injection_info(FAULT_ORPHAN); return -ENOSPC; } -#endif + if (unlikely(im->ino_num >= sbi->max_orphans)) err = -ENOSPC; else @@ -548,7 +597,7 @@ int acquire_orphan_inode(struct f2fs_sb_info *sbi) return err; } -void release_orphan_inode(struct f2fs_sb_info *sbi) +void f2fs_release_orphan_inode(struct f2fs_sb_info *sbi) { struct inode_management *im = &sbi->im[ORPHAN_INO]; @@ -558,14 +607,14 @@ void release_orphan_inode(struct f2fs_sb_info *sbi) spin_unlock(&im->ino_lock); } -void add_orphan_inode(struct inode *inode) +void f2fs_add_orphan_inode(struct inode *inode) { /* add new orphan ino entry into list */ __add_ino_entry(F2FS_I_SB(inode), inode->i_ino, 0, ORPHAN_INO); - update_inode_page(inode); + f2fs_update_inode_page(inode); } -void remove_orphan_inode(struct f2fs_sb_info *sbi, nid_t ino) +void f2fs_remove_orphan_inode(struct f2fs_sb_info *sbi, nid_t ino) { /* remove orphan entry from orphan list */ __remove_ino_entry(sbi, ino, ORPHAN_INO); @@ -575,12 +624,7 @@ static int recover_orphan_inode(struct f2fs_sb_info *sbi, nid_t ino) { struct inode *inode; struct node_info ni; - int err = acquire_orphan_inode(sbi); - - if (err) - goto err_out; - - __add_ino_entry(sbi, ino, 0, ORPHAN_INO); + int err; inode = f2fs_iget_retry(sbi->sb, ino); if (IS_ERR(inode)) { @@ -593,34 +637,35 @@ static int recover_orphan_inode(struct f2fs_sb_info *sbi, nid_t ino) } err = dquot_initialize(inode); - if (err) + if (err) { + iput(inode); goto err_out; + } - dquot_initialize(inode); clear_nlink(inode); /* truncate all the data during iput */ iput(inode); - get_node_info(sbi, ino, &ni); + err = f2fs_get_node_info(sbi, ino, &ni); + if (err) + goto err_out; /* ENOMEM was fully retried in f2fs_evict_inode. */ if (ni.blk_addr != NULL_ADDR) { err = -EIO; goto err_out; } - __remove_ino_entry(sbi, ino, ORPHAN_INO); return 0; err_out: set_sbi_flag(sbi, SBI_NEED_FSCK); - f2fs_msg(sbi->sb, KERN_WARNING, - "%s: orphan failed (ino=%x), run fsck to fix.", - __func__, ino); + f2fs_warn(sbi, "%s: orphan failed (ino=%x), run fsck to fix.", + __func__, ino); return err; } -int recover_orphan_inodes(struct f2fs_sb_info *sbi) +int f2fs_recover_orphan_inodes(struct f2fs_sb_info *sbi) { block_t start_blk, orphan_blocks, i, j; unsigned int s_flags = sbi->sb->s_flags; @@ -632,8 +677,13 @@ int recover_orphan_inodes(struct f2fs_sb_info *sbi) if (!is_set_ckpt_flags(sbi, CP_ORPHAN_PRESENT_FLAG)) return 0; + if (bdev_read_only(sbi->sb->s_bdev)) { + f2fs_info(sbi, "write access unavailable, skipping orphan cleanup"); + return 0; + } + if (s_flags & MS_RDONLY) { - f2fs_msg(sbi->sb, KERN_INFO, "orphan cleanup on readonly fs"); + f2fs_info(sbi, "orphan cleanup on readonly fs"); sbi->sb->s_flags &= ~MS_RDONLY; } @@ -641,19 +691,28 @@ int recover_orphan_inodes(struct f2fs_sb_info *sbi) /* Needed for iput() to work correctly and not trash data */ sbi->sb->s_flags |= MS_ACTIVE; - /* Turn on quotas so that they are updated correctly */ + /* + * Turn on quotas which were not enabled for read-only mounts if + * filesystem has quota feature, so that they are updated correctly. + */ quota_enabled = f2fs_enable_quota_files(sbi, s_flags & MS_RDONLY); #endif start_blk = __start_cp_addr(sbi) + 1 + __cp_payload(sbi); orphan_blocks = __start_sum_addr(sbi) - 1 - __cp_payload(sbi); - ra_meta_pages(sbi, start_blk, orphan_blocks, META_CP, true); + f2fs_ra_meta_pages(sbi, start_blk, orphan_blocks, META_CP, true); for (i = 0; i < orphan_blocks; i++) { - struct page *page = get_meta_page(sbi, start_blk + i); + struct page *page; struct f2fs_orphan_block *orphan_blk; + page = f2fs_get_meta_page(sbi, start_blk + i); + if (IS_ERR(page)) { + err = PTR_ERR(page); + goto out; + } + orphan_blk = (struct f2fs_orphan_block *)page_address(page); for (j = 0; j < le32_to_cpu(orphan_blk->entry_count); j++) { nid_t ino = le32_to_cpu(orphan_blk->ino[j]); @@ -668,6 +727,8 @@ int recover_orphan_inodes(struct f2fs_sb_info *sbi) /* clear Orphan Flag */ clear_ckpt_flags(sbi, CP_ORPHAN_PRESENT_FLAG); out: + set_sbi_flag(sbi, SBI_IS_RECOVERED); + #ifdef CONFIG_QUOTA /* Turn quotas off */ if (quota_enabled) @@ -701,7 +762,7 @@ static void write_orphan_inodes(struct f2fs_sb_info *sbi, block_t start_blk) /* loop for each orphan inode entry and write them in Jornal block */ list_for_each_entry(orphan, head, list) { if (!page) { - page = grab_meta_page(sbi, start_blk++); + page = f2fs_grab_meta_page(sbi, start_blk++); orphan_blk = (struct f2fs_orphan_block *)page_address(page); memset(orphan_blk, 0, sizeof(*orphan_blk)); @@ -735,27 +796,46 @@ static void write_orphan_inodes(struct f2fs_sb_info *sbi, block_t start_blk) } } +static __u32 f2fs_checkpoint_chksum(struct f2fs_sb_info *sbi, + struct f2fs_checkpoint *ckpt) +{ + unsigned int chksum_ofs = le32_to_cpu(ckpt->checksum_offset); + __u32 chksum; + + chksum = f2fs_crc32(sbi, ckpt, chksum_ofs); + if (chksum_ofs < CP_CHKSUM_OFFSET) { + chksum_ofs += sizeof(chksum); + chksum = f2fs_chksum(sbi, chksum, (__u8 *)ckpt + chksum_ofs, + F2FS_BLKSIZE - chksum_ofs); + } + return chksum; +} + static int get_checkpoint_version(struct f2fs_sb_info *sbi, block_t cp_addr, struct f2fs_checkpoint **cp_block, struct page **cp_page, unsigned long long *version) { - unsigned long blk_size = sbi->blocksize; size_t crc_offset = 0; - __u32 crc = 0; + __u32 crc; + + *cp_page = f2fs_get_meta_page(sbi, cp_addr); + if (IS_ERR(*cp_page)) + return PTR_ERR(*cp_page); - *cp_page = get_meta_page(sbi, cp_addr); *cp_block = (struct f2fs_checkpoint *)page_address(*cp_page); crc_offset = le32_to_cpu((*cp_block)->checksum_offset); - if (crc_offset > (blk_size - sizeof(__le32))) { - f2fs_msg(sbi->sb, KERN_WARNING, - "invalid crc_offset: %zu", crc_offset); + if (crc_offset < CP_MIN_CHKSUM_OFFSET || + crc_offset > CP_CHKSUM_OFFSET) { + f2fs_put_page(*cp_page, 1); + f2fs_warn(sbi, "invalid crc_offset: %zu", crc_offset); return -EINVAL; } - crc = cur_cp_crc(*cp_block); - if (!f2fs_crc_valid(sbi, crc, *cp_block, crc_offset)) { - f2fs_msg(sbi->sb, KERN_WARNING, "invalid crc value"); + crc = f2fs_checkpoint_chksum(sbi, *cp_block); + if (crc != cur_cp_crc(*cp_block)) { + f2fs_put_page(*cp_page, 1); + f2fs_warn(sbi, "invalid crc value"); return -EINVAL; } @@ -774,14 +854,21 @@ static struct page *validate_checkpoint(struct f2fs_sb_info *sbi, err = get_checkpoint_version(sbi, cp_addr, &cp_block, &cp_page_1, version); if (err) - goto invalid_cp1; + return NULL; + + if (le32_to_cpu(cp_block->cp_pack_total_block_count) > + sbi->blocks_per_seg) { + f2fs_warn(sbi, "invalid cp_pack_total_block_count:%u", + le32_to_cpu(cp_block->cp_pack_total_block_count)); + goto invalid_cp; + } pre_version = *version; cp_addr += le32_to_cpu(cp_block->cp_pack_total_block_count) - 1; err = get_checkpoint_version(sbi, cp_addr, &cp_block, &cp_page_2, version); if (err) - goto invalid_cp2; + goto invalid_cp; cur_version = *version; if (cur_version == pre_version) { @@ -789,14 +876,13 @@ static struct page *validate_checkpoint(struct f2fs_sb_info *sbi, f2fs_put_page(cp_page_2, 1); return cp_page_1; } -invalid_cp2: f2fs_put_page(cp_page_2, 1); -invalid_cp1: +invalid_cp: f2fs_put_page(cp_page_1, 1); return NULL; } -int get_valid_checkpoint(struct f2fs_sb_info *sbi) +int f2fs_get_valid_checkpoint(struct f2fs_sb_info *sbi) { struct f2fs_checkpoint *cp_block; struct f2fs_super_block *fsb = sbi->raw_super; @@ -807,8 +893,10 @@ int get_valid_checkpoint(struct f2fs_sb_info *sbi) unsigned int cp_blks = 1 + __cp_payload(sbi); block_t cp_blk_no; int i; + int err; - sbi->ckpt = f2fs_kzalloc(sbi, cp_blks * blk_size, GFP_KERNEL); + sbi->ckpt = f2fs_kzalloc(sbi, array_size(blk_size, cp_blks), + GFP_KERNEL); if (!sbi->ckpt) return -ENOMEM; /* @@ -833,21 +921,24 @@ int get_valid_checkpoint(struct f2fs_sb_info *sbi) } else if (cp2) { cur_page = cp2; } else { + err = -EFSCORRUPTED; goto fail_no_cp; } cp_block = (struct f2fs_checkpoint *)page_address(cur_page); memcpy(sbi->ckpt, cp_block, blk_size); - /* Sanity checking of checkpoint */ - if (sanity_check_ckpt(sbi)) - goto free_fail_no_cp; - if (cur_page == cp1) sbi->cur_cp_pack = 1; else sbi->cur_cp_pack = 2; + /* Sanity checking of checkpoint */ + if (f2fs_sanity_check_ckpt(sbi)) { + err = -EFSCORRUPTED; + goto free_fail_no_cp; + } + if (cp_blks <= 1) goto done; @@ -859,7 +950,11 @@ int get_valid_checkpoint(struct f2fs_sb_info *sbi) void *sit_bitmap_ptr; unsigned char *ckpt = (unsigned char *)sbi->ckpt; - cur_page = get_meta_page(sbi, cp_blk_no + i); + cur_page = f2fs_get_meta_page(sbi, cp_blk_no + i); + if (IS_ERR(cur_page)) { + err = PTR_ERR(cur_page); + goto free_fail_no_cp; + } sit_bitmap_ptr = page_address(cur_page); memcpy(ckpt + i * blk_size, sit_bitmap_ptr, blk_size); f2fs_put_page(cur_page, 1); @@ -873,8 +968,8 @@ free_fail_no_cp: f2fs_put_page(cp1, 1); f2fs_put_page(cp2, 1); fail_no_cp: - kfree(sbi->ckpt); - return -EINVAL; + kvfree(sbi->ckpt); + return err; } static void __add_dirty_inode(struct inode *inode, enum inode_type type) @@ -904,7 +999,7 @@ static void __remove_dirty_inode(struct inode *inode, enum inode_type type) stat_dec_dirty_inode(F2FS_I_SB(inode), type); } -void update_dirty_page(struct inode *inode, struct page *page) +void f2fs_update_dirty_page(struct inode *inode, struct page *page) { struct f2fs_sb_info *sbi = F2FS_I_SB(inode); enum inode_type type = S_ISDIR(inode->i_mode) ? DIR_INODE : FILE_INODE; @@ -919,11 +1014,11 @@ void update_dirty_page(struct inode *inode, struct page *page) inode_inc_dirty_pages(inode); spin_unlock(&sbi->inode_lock[type]); - SetPagePrivate(page); + f2fs_set_page_private(page, 0); f2fs_trace_pid(page); } -void remove_dirty_inode(struct inode *inode) +void f2fs_remove_dirty_inode(struct inode *inode) { struct f2fs_sb_info *sbi = F2FS_I_SB(inode); enum inode_type type = S_ISDIR(inode->i_mode) ? DIR_INODE : FILE_INODE; @@ -940,7 +1035,7 @@ void remove_dirty_inode(struct inode *inode) spin_unlock(&sbi->inode_lock[type]); } -int sync_dirty_inodes(struct f2fs_sb_info *sbi, enum inode_type type) +int f2fs_sync_dirty_inodes(struct f2fs_sb_info *sbi, enum inode_type type) { struct list_head *head; struct inode *inode; @@ -971,22 +1066,18 @@ retry: if (inode) { unsigned long cur_ino = inode->i_ino; - if (is_dir) - F2FS_I(inode)->cp_task = current; + F2FS_I(inode)->cp_task = current; filemap_fdatawrite(inode->i_mapping); - if (is_dir) - F2FS_I(inode)->cp_task = NULL; + F2FS_I(inode)->cp_task = NULL; iput(inode); /* We need to give cpu to another writers. */ - if (ino == cur_ino) { - congestion_wait(BLK_RW_ASYNC, HZ/50); + if (ino == cur_ino) cond_resched(); - } else { + else ino = cur_ino; - } } else { /* * We should submit bio, since it exists several @@ -1023,7 +1114,7 @@ int f2fs_sync_inode_meta(struct f2fs_sb_info *sbi) /* it's on eviction */ if (is_inode_flag_set(inode, FI_DIRTY_INODE)) - update_inode_page(inode); + f2fs_update_inode_page(inode); iput(inode); } } @@ -1043,6 +1134,28 @@ static void __prepare_cp_block(struct f2fs_sb_info *sbi) ckpt->next_free_nid = cpu_to_le32(last_nid); } +static bool __need_flush_quota(struct f2fs_sb_info *sbi) +{ + bool ret = false; + + if (!is_journalled_quota(sbi)) + return false; + + down_write(&sbi->quota_sem); + if (is_sbi_flag_set(sbi, SBI_QUOTA_SKIP_FLUSH)) { + ret = false; + } else if (is_sbi_flag_set(sbi, SBI_QUOTA_NEED_REPAIR)) { + ret = false; + } else if (is_sbi_flag_set(sbi, SBI_QUOTA_NEED_FLUSH)) { + clear_sbi_flag(sbi, SBI_QUOTA_NEED_FLUSH); + ret = true; + } else if (get_pages(sbi, F2FS_DIRTY_QDATA)) { + ret = true; + } + up_write(&sbi->quota_sem); + return ret; +} + /* * Freeze all the FS-operations for checkpoint. */ @@ -1054,20 +1167,40 @@ static int block_operations(struct f2fs_sb_info *sbi) .for_reclaim = 0, }; struct blk_plug plug; - int err = 0; + int err = 0, cnt = 0; blk_start_plug(&plug); -retry_flush_dents: +retry_flush_quotas: f2fs_lock_all(sbi); + if (__need_flush_quota(sbi)) { + int locked; + + if (++cnt > DEFAULT_RETRY_QUOTA_FLUSH_COUNT) { + set_sbi_flag(sbi, SBI_QUOTA_SKIP_FLUSH); + set_sbi_flag(sbi, SBI_QUOTA_NEED_FLUSH); + goto retry_flush_dents; + } + f2fs_unlock_all(sbi); + + /* only failed during mount/umount/freeze/quotactl */ + locked = down_read_trylock(&sbi->sb->s_umount); + f2fs_quota_sync(sbi->sb, -1); + if (locked) + up_read(&sbi->sb->s_umount); + cond_resched(); + goto retry_flush_quotas; + } + +retry_flush_dents: /* write all the dirty dentry pages */ if (get_pages(sbi, F2FS_DIRTY_DENTS)) { f2fs_unlock_all(sbi); - err = sync_dirty_inodes(sbi, DIR_INODE); + err = f2fs_sync_dirty_inodes(sbi, DIR_INODE); if (err) goto out; cond_resched(); - goto retry_flush_dents; + goto retry_flush_quotas; } /* @@ -1083,7 +1216,7 @@ retry_flush_dents: if (err) goto out; cond_resched(); - goto retry_flush_dents; + goto retry_flush_quotas; } retry_flush_nodes: @@ -1091,7 +1224,9 @@ retry_flush_nodes: if (get_pages(sbi, F2FS_DIRTY_NODES)) { up_write(&sbi->node_write); - err = sync_node_pages(sbi, &wbc, false, FS_CP_NODE_IO); + atomic_inc(&sbi->wb_sync_req[NODE]); + err = f2fs_sync_node_pages(sbi, &wbc, false, FS_CP_NODE_IO); + atomic_dec(&sbi->wb_sync_req[NODE]); if (err) { up_write(&sbi->node_change); f2fs_unlock_all(sbi); @@ -1118,7 +1253,7 @@ static void unblock_operations(struct f2fs_sb_info *sbi) f2fs_unlock_all(sbi); } -static void wait_on_all_pages_writeback(struct f2fs_sb_info *sbi) +void f2fs_wait_on_all_pages_writeback(struct f2fs_sb_info *sbi) { DEFINE_WAIT(wait); @@ -1128,6 +1263,9 @@ static void wait_on_all_pages_writeback(struct f2fs_sb_info *sbi) if (!get_pages(sbi, F2FS_WB_CP_DATA)) break; + if (unlikely(f2fs_cp_error(sbi))) + break; + io_schedule_timeout(5*HZ); } finish_wait(&sbi->cp_wait, &wait); @@ -1166,9 +1304,28 @@ static void update_ckpt_flags(struct f2fs_sb_info *sbi, struct cp_control *cpc) else __clear_ckpt_flags(ckpt, CP_ORPHAN_PRESENT_FLAG); - if (is_sbi_flag_set(sbi, SBI_NEED_FSCK)) + if (is_sbi_flag_set(sbi, SBI_NEED_FSCK) || + is_sbi_flag_set(sbi, SBI_IS_RESIZEFS)) __set_ckpt_flags(ckpt, CP_FSCK_FLAG); + if (is_sbi_flag_set(sbi, SBI_CP_DISABLED)) + __set_ckpt_flags(ckpt, CP_DISABLED_FLAG); + else + __clear_ckpt_flags(ckpt, CP_DISABLED_FLAG); + + if (is_sbi_flag_set(sbi, SBI_CP_DISABLED_QUICK)) + __set_ckpt_flags(ckpt, CP_DISABLED_QUICK_FLAG); + else + __clear_ckpt_flags(ckpt, CP_DISABLED_QUICK_FLAG); + + if (is_sbi_flag_set(sbi, SBI_QUOTA_SKIP_FLUSH)) + __set_ckpt_flags(ckpt, CP_QUOTA_NEED_FSCK_FLAG); + else + __clear_ckpt_flags(ckpt, CP_QUOTA_NEED_FSCK_FLAG); + + if (is_sbi_flag_set(sbi, SBI_QUOTA_NEED_REPAIR)) + __set_ckpt_flags(ckpt, CP_QUOTA_NEED_FSCK_FLAG); + /* set this flag to activate crc|cp_ver for recovery */ __set_ckpt_flags(ckpt, CP_CRC_RECOVERY_FLAG); __clear_ckpt_flags(ckpt, CP_NOCRC_RECOVERY_FLAG); @@ -1185,24 +1342,28 @@ static void commit_checkpoint(struct f2fs_sb_info *sbi, /* * pagevec_lookup_tag and lock_page again will take - * some extra time. Therefore, update_meta_pages and - * sync_meta_pages are combined in this function. + * some extra time. Therefore, f2fs_update_meta_pages and + * f2fs_sync_meta_pages are combined in this function. */ - struct page *page = grab_meta_page(sbi, blk_addr); + struct page *page = f2fs_grab_meta_page(sbi, blk_addr); int err; + f2fs_wait_on_page_writeback(page, META, true, true); + memcpy(page_address(page), src, PAGE_SIZE); - set_page_dirty(page); - f2fs_wait_on_page_writeback(page, META, true); - f2fs_bug_on(sbi, PageWriteback(page)); + set_page_dirty(page); if (unlikely(!clear_page_dirty_for_io(page))) f2fs_bug_on(sbi, 1); /* writeout cp pack 2 page */ err = __f2fs_write_meta_page(page, &wbc, FS_CP_META_IO); - f2fs_bug_on(sbi, err); + if (unlikely(err && f2fs_cp_error(sbi))) { + f2fs_put_page(page, 1); + return; + } + f2fs_bug_on(sbi, err); f2fs_put_page(page, 0); /* submit checkpoint (with barrier if NOBARRIER is not set) */ @@ -1225,17 +1386,15 @@ static int do_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc) int err; /* Flush all the NAT/SIT pages */ - while (get_pages(sbi, F2FS_DIRTY_META)) { - sync_meta_pages(sbi, META, LONG_MAX, FS_CP_META_IO); - if (unlikely(f2fs_cp_error(sbi))) - return -EIO; - } + f2fs_sync_meta_pages(sbi, META, LONG_MAX, FS_CP_META_IO); + f2fs_bug_on(sbi, get_pages(sbi, F2FS_DIRTY_META) && + !f2fs_cp_error(sbi)); /* * modify checkpoint * version number is already updated */ - ckpt->elapsed_time = cpu_to_le64(get_mtime(sbi)); + ckpt->elapsed_time = cpu_to_le64(get_mtime(sbi, true)); ckpt->free_segment_count = cpu_to_le32(free_segments(sbi)); for (i = 0; i < NR_CURSEG_NODE_TYPE; i++) { ckpt->cur_node_segno[i] = @@ -1255,7 +1414,7 @@ static int do_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc) } /* 2 cp + n data seg summary + orphan inode blocks */ - data_sum_blocks = npages_for_summary_flush(sbi, false); + data_sum_blocks = f2fs_npages_for_summary_flush(sbi, false); spin_lock_irqsave(&sbi->cp_lock, flags); if (data_sum_blocks < NR_CURSEG_DATA_TYPE) __set_ckpt_flags(ckpt, CP_COMPACT_SUM_FLAG); @@ -1283,7 +1442,7 @@ static int do_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc) get_sit_bitmap(sbi, __bitmap_ptr(sbi, SIT_BITMAP)); get_nat_bitmap(sbi, __bitmap_ptr(sbi, NAT_BITMAP)); - crc32 = f2fs_crc32(sbi, ckpt, le32_to_cpu(ckpt->checksum_offset)); + crc32 = f2fs_checkpoint_chksum(sbi, ckpt); *((__le32 *)((unsigned char *)ckpt + le32_to_cpu(ckpt->checksum_offset))) = cpu_to_le32(crc32); @@ -1300,22 +1459,15 @@ static int do_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc) blk = start_blk + sbi->blocks_per_seg - nm_i->nat_bits_blocks; for (i = 0; i < nm_i->nat_bits_blocks; i++) - update_meta_page(sbi, nm_i->nat_bits + + f2fs_update_meta_page(sbi, nm_i->nat_bits + (i << F2FS_BLKSIZE_BITS), blk + i); - - /* Flush all the NAT BITS pages */ - while (get_pages(sbi, F2FS_DIRTY_META)) { - sync_meta_pages(sbi, META, LONG_MAX, FS_CP_META_IO); - if (unlikely(f2fs_cp_error(sbi))) - return -EIO; - } } /* write out checkpoint buffer at block 0 */ - update_meta_page(sbi, ckpt, start_blk++); + f2fs_update_meta_page(sbi, ckpt, start_blk++); for (i = 1; i < 1 + cp_payload_blks; i++) - update_meta_page(sbi, (char *)ckpt + i * F2FS_BLKSIZE, + f2fs_update_meta_page(sbi, (char *)ckpt + i * F2FS_BLKSIZE, start_blk++); if (orphan_num) { @@ -1323,7 +1475,7 @@ static int do_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc) start_blk += orphan_blocks; } - write_data_summaries(sbi, start_blk); + f2fs_write_data_summaries(sbi, start_blk); start_blk += data_sum_blocks; /* Record write statistics in the hot node summary */ @@ -1334,7 +1486,7 @@ static int do_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc) seg_i->journal->info.kbytes_written = cpu_to_le64(kbytes_written); if (__remain_node_summaries(cpc->reason)) { - write_node_summaries(sbi, start_blk); + f2fs_write_node_summaries(sbi, start_blk); start_blk += NR_CURSEG_NODE_TYPE; } @@ -1343,13 +1495,12 @@ static int do_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc) percpu_counter_set(&sbi->alloc_valid_block_count, 0); /* Here, we have one bio having CP pack except cp pack 2 page */ - sync_meta_pages(sbi, META, LONG_MAX, FS_CP_META_IO); + f2fs_sync_meta_pages(sbi, META, LONG_MAX, FS_CP_META_IO); + f2fs_bug_on(sbi, get_pages(sbi, F2FS_DIRTY_META) && + !f2fs_cp_error(sbi)); /* wait for previous submitted meta pages writeback */ - wait_on_all_pages_writeback(sbi); - - if (unlikely(f2fs_cp_error(sbi))) - return -EIO; + f2fs_wait_on_all_pages_writeback(sbi); /* flush all device cache */ err = f2fs_flush_device_cache(sbi); @@ -1358,15 +1509,28 @@ static int do_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc) /* barrier and flush checkpoint cp pack 2 page if it can */ commit_checkpoint(sbi, ckpt, start_blk); - wait_on_all_pages_writeback(sbi); + f2fs_wait_on_all_pages_writeback(sbi); - release_ino_entry(sbi, false); + /* + * invalidate intermediate page cache borrowed from meta inode + * which are used for migration of encrypted inode's blocks. + */ + if (f2fs_sb_has_encrypt(sbi)) + invalidate_mapping_pages(META_MAPPING(sbi), + MAIN_BLKADDR(sbi), MAX_BLKADDR(sbi) - 1); - if (unlikely(f2fs_cp_error(sbi))) - return -EIO; + f2fs_release_ino_entry(sbi, false); + + f2fs_reset_fsync_node_info(sbi); clear_sbi_flag(sbi, SBI_IS_DIRTY); clear_sbi_flag(sbi, SBI_NEED_CP); + clear_sbi_flag(sbi, SBI_QUOTA_SKIP_FLUSH); + + spin_lock(&sbi->stat_lock); + sbi->unusable_block_count = 0; + spin_unlock(&sbi->stat_lock); + __set_cp_next_pack(sbi); /* @@ -1379,18 +1543,26 @@ static int do_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc) f2fs_bug_on(sbi, get_pages(sbi, F2FS_DIRTY_DENTS)); - return 0; + return unlikely(f2fs_cp_error(sbi)) ? -EIO : 0; } /* * We guarantee that this checkpoint procedure will not fail. */ -int write_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc) +int f2fs_write_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc) { struct f2fs_checkpoint *ckpt = F2FS_CKPT(sbi); unsigned long long ckpt_ver; int err = 0; + if (f2fs_readonly(sbi->sb) || f2fs_hw_is_readonly(sbi)) + return -EROFS; + + if (unlikely(is_sbi_flag_set(sbi, SBI_CP_DISABLED))) { + if (cpc->reason != CP_PAUSE) + return 0; + f2fs_warn(sbi, "Start checkpoint disabled!"); + } mutex_lock(&sbi->cp_mutex); if (!is_sbi_flag_set(sbi, SBI_IS_DIRTY) && @@ -1401,10 +1573,6 @@ int write_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc) err = -EIO; goto out; } - if (f2fs_readonly(sbi->sb)) { - err = -EROFS; - goto out; - } trace_f2fs_write_checkpoint(sbi->sb, cpc->reason, "start block_ops"); @@ -1418,7 +1586,7 @@ int write_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc) /* this is the case of multiple fstrims without any changes */ if (cpc->reason & CP_DISCARD) { - if (!exist_trim_candidates(sbi, cpc)) { + if (!f2fs_exist_trim_candidates(sbi, cpc)) { unblock_operations(sbi); goto out; } @@ -1426,8 +1594,8 @@ int write_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc) if (NM_I(sbi)->dirty_nat_cnt == 0 && SIT_I(sbi)->dirty_sentries == 0 && prefree_segments(sbi) == 0) { - flush_sit_entries(sbi, cpc); - clear_prefree_segments(sbi, cpc); + f2fs_flush_sit_entries(sbi, cpc); + f2fs_clear_prefree_segments(sbi, cpc); unblock_operations(sbi); goto out; } @@ -1442,22 +1610,24 @@ int write_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc) ckpt->checkpoint_ver = cpu_to_le64(++ckpt_ver); /* write cached NAT/SIT entries to NAT/SIT area */ - flush_nat_entries(sbi, cpc); - flush_sit_entries(sbi, cpc); + err = f2fs_flush_nat_entries(sbi, cpc); + if (err) + goto stop; + + f2fs_flush_sit_entries(sbi, cpc); /* unlock all the fs_lock[] in do_checkpoint() */ err = do_checkpoint(sbi, cpc); if (err) - release_discard_addrs(sbi); + f2fs_release_discard_addrs(sbi); else - clear_prefree_segments(sbi, cpc); - + f2fs_clear_prefree_segments(sbi, cpc); +stop: unblock_operations(sbi); stat_inc_cp_count(sbi->stat_info); if (cpc->reason & CP_RECOVERY) - f2fs_msg(sbi->sb, KERN_NOTICE, - "checkpoint: version = %llx", ckpt_ver); + f2fs_notice(sbi, "checkpoint: version = %llx", ckpt_ver); /* do checkpoint periodically */ f2fs_update_time(sbi, CP_TIME); @@ -1467,7 +1637,7 @@ out: return err; } -void init_ino_entry_info(struct f2fs_sb_info *sbi) +void f2fs_init_ino_entry_info(struct f2fs_sb_info *sbi) { int i; @@ -1485,23 +1655,23 @@ void init_ino_entry_info(struct f2fs_sb_info *sbi) F2FS_ORPHANS_PER_BLOCK; } -int __init create_checkpoint_caches(void) +int __init f2fs_create_checkpoint_caches(void) { ino_entry_slab = f2fs_kmem_cache_create("f2fs_ino_entry", sizeof(struct ino_entry)); if (!ino_entry_slab) return -ENOMEM; - inode_entry_slab = f2fs_kmem_cache_create("f2fs_inode_entry", + f2fs_inode_entry_slab = f2fs_kmem_cache_create("f2fs_inode_entry", sizeof(struct inode_entry)); - if (!inode_entry_slab) { + if (!f2fs_inode_entry_slab) { kmem_cache_destroy(ino_entry_slab); return -ENOMEM; } return 0; } -void destroy_checkpoint_caches(void) +void f2fs_destroy_checkpoint_caches(void) { kmem_cache_destroy(ino_entry_slab); - kmem_cache_destroy(inode_entry_slab); + kmem_cache_destroy(f2fs_inode_entry_slab); } diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index 3d846b027fa1..568aabab15a4 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -1,12 +1,9 @@ +// SPDX-License-Identifier: GPL-2.0 /* * fs/f2fs/data.c * * Copyright (c) 2012 Samsung Electronics Co., Ltd. * http://www.samsung.com/ - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. */ #include <linux/fs.h> #include <linux/f2fs_fs.h> @@ -17,6 +14,7 @@ #include <linux/pagevec.h> #include <linux/blkdev.h> #include <linux/bio.h> +#include <linux/swap.h> #include <linux/prefetch.h> #include <linux/uio.h> #include <linux/cleancache.h> @@ -48,11 +46,30 @@ static bool __is_cp_guaranteed(struct page *page) if (inode->i_ino == F2FS_META_INO(sbi) || inode->i_ino == F2FS_NODE_INO(sbi) || S_ISDIR(inode->i_mode) || + (S_ISREG(inode->i_mode) && + (f2fs_is_atomic_file(inode) || IS_NOQUOTA(inode))) || is_cold_data(page)) return true; return false; } +static enum count_type __read_io_type(struct page *page) +{ + struct address_space *mapping = page_file_mapping(page); + + if (mapping) { + struct inode *inode = mapping->host; + struct f2fs_sb_info *sbi = F2FS_I_SB(inode); + + if (inode->i_ino == F2FS_META_INO(sbi)) + return F2FS_RD_META; + + if (inode->i_ino == F2FS_NODE_INO(sbi)) + return F2FS_RD_NODE; + } + return F2FS_RD_DATA; +} + /* postprocessing steps for read bios */ enum bio_post_read_step { STEP_INITIAL = 0, @@ -78,10 +95,12 @@ static void __read_end_io(struct bio *bio) /* PG_error was set if any post_read step failed */ if (bio->bi_error || PageError(page)) { ClearPageUptodate(page); - SetPageError(page); + /* will re-read again later */ + ClearPageError(page); } else { SetPageUptodate(page); } + dec_page_count(F2FS_P_SB(page), __read_io_type(page)); unlock_page(page); } if (bio->bi_private) @@ -124,12 +143,12 @@ static bool f2fs_bio_post_read_required(struct bio *bio) static void f2fs_read_end_io(struct bio *bio) { -#ifdef CONFIG_F2FS_FAULT_INJECTION - if (time_to_inject(F2FS_P_SB(bio->bi_io_vec->bv_page), FAULT_IO)) { - f2fs_show_injection_info(FAULT_IO); + struct page *first_page = bio->bi_io_vec[0].bv_page; + + if (time_to_inject(F2FS_P_SB(bio->bi_io_vec->bv_page), FAULT_READ_IO)) { + f2fs_show_injection_info(FAULT_READ_IO); bio->bi_error = -EIO; } -#endif if (f2fs_bio_post_read_required(bio)) { struct bio_post_read_ctx *ctx = bio->bi_private; @@ -139,6 +158,13 @@ static void f2fs_read_end_io(struct bio *bio) return; } + if (first_page != NULL && + __read_io_type(first_page) == F2FS_RD_DATA) { + trace_android_fs_dataread_end(first_page->mapping->host, + page_offset(first_page), + bio->bi_iter.bi_size); + } + __read_end_io(bio); } @@ -148,6 +174,11 @@ static void f2fs_write_end_io(struct bio *bio) struct bio_vec *bvec; int i; + if (time_to_inject(sbi, FAULT_WRITE_IO)) { + f2fs_show_injection_info(FAULT_WRITE_IO); + bio->bi_error = -EIO; + } + bio_for_each_segment_all(bvec, bio, i) { struct page *page = bvec->bv_page; enum count_type type = WB_DATA_TYPE(page); @@ -175,6 +206,8 @@ static void f2fs_write_end_io(struct bio *bio) page->index != nid_of_node(page)); dec_page_count(sbi, type); + if (f2fs_in_warm_node_list(sbi, page)) + f2fs_del_fsync_node_entry(sbi, page); clear_cold_data(page); end_page_writeback(page); } @@ -194,12 +227,14 @@ struct block_device *f2fs_target_device(struct f2fs_sb_info *sbi, struct block_device *bdev = sbi->sb->s_bdev; int i; - for (i = 0; i < sbi->s_ndevs; i++) { - if (FDEV(i).start_blk <= blk_addr && - FDEV(i).end_blk >= blk_addr) { - blk_addr -= FDEV(i).start_blk; - bdev = FDEV(i).bdev; - break; + if (f2fs_is_multi_device(sbi)) { + for (i = 0; i < sbi->s_ndevs; i++) { + if (FDEV(i).start_blk <= blk_addr && + FDEV(i).end_blk >= blk_addr) { + blk_addr -= FDEV(i).start_blk; + bdev = FDEV(i).bdev; + break; + } } } if (bio) { @@ -213,6 +248,9 @@ int f2fs_target_device_index(struct f2fs_sb_info *sbi, block_t blkaddr) { int i; + if (!f2fs_is_multi_device(sbi)) + return 0; + for (i = 0; i < sbi->s_ndevs; i++) if (FDEV(i).start_blk <= blkaddr && FDEV(i).end_blk >= blkaddr) return i; @@ -244,7 +282,7 @@ static struct bio *__bio_alloc(struct f2fs_sb_info *sbi, block_t blk_addr, } else { bio->bi_end_io = f2fs_write_end_io; bio->bi_private = sbi; - bio->bi_write_hint = io_type_to_rw_hint(sbi, type, temp); + bio->bi_write_hint = f2fs_io_type_to_rw_hint(sbi, type, temp); } if (wbc) wbc_init_bio(wbc, bio); @@ -261,7 +299,7 @@ static inline void __submit_bio(struct f2fs_sb_info *sbi, if (type != DATA && type != NODE) goto submit_io; - if (f2fs_sb_has_blkzoned(sbi->sb) && current->plug) + if (test_opt(sbi, LFS) && current->plug) blk_finish_plug(current->plug); start = bio->bi_iter.bi_size >> F2FS_BLKSIZE_BITS; @@ -274,9 +312,10 @@ static inline void __submit_bio(struct f2fs_sb_info *sbi, for (; start < F2FS_IO_SIZE(sbi); start++) { struct page *page = mempool_alloc(sbi->write_io_dummy, - GFP_NOIO | __GFP_ZERO | __GFP_NOFAIL); + GFP_NOIO | __GFP_NOFAIL); f2fs_bug_on(sbi, !page); + zero_user_segment(page, 0, PAGE_SIZE); SetPagePrivate(page); set_page_private(page, (unsigned long)DUMMY_WRITTEN_PAGE); lock_page(page); @@ -298,6 +337,32 @@ submit_io: submit_bio(bio_op(bio), bio); } +static void __f2fs_submit_read_bio(struct f2fs_sb_info *sbi, + struct bio *bio, enum page_type type) +{ + if (trace_android_fs_dataread_start_enabled() && (type == DATA)) { + struct page *first_page = bio->bi_io_vec[0].bv_page; + + if (first_page != NULL && + __read_io_type(first_page) == F2FS_RD_DATA) { + char *path, pathbuf[MAX_TRACE_PATHBUF_LEN]; + + path = android_fstrace_get_pathname(pathbuf, + MAX_TRACE_PATHBUF_LEN, + first_page->mapping->host); + + trace_android_fs_dataread_start( + first_page->mapping->host, + page_offset(first_page), + bio->bi_iter.bi_size, + current->pid, + path, + current->comm); + } + } + __submit_bio(sbi, bio, type); +} + static void __submit_merged_bio(struct f2fs_bio_info *io) { struct f2fs_io_info *fio = &io->fio; @@ -316,31 +381,30 @@ static void __submit_merged_bio(struct f2fs_bio_info *io) io->bio = NULL; } -static bool __has_merged_page(struct f2fs_bio_info *io, - struct inode *inode, nid_t ino, pgoff_t idx) +static bool __has_merged_page(struct bio *bio, struct inode *inode, + struct page *page, nid_t ino) { struct bio_vec *bvec; struct page *target; int i; - if (!io->bio) + if (!bio) return false; - if (!inode && !ino) + if (!inode && !page && !ino) return true; - bio_for_each_segment_all(bvec, io->bio, i) { + bio_for_each_segment_all(bvec, bio, i) { if (bvec->bv_page->mapping) target = bvec->bv_page; else target = fscrypt_control_page(bvec->bv_page); - if (idx != target->index) - continue; - if (inode && inode == target->mapping->host) return true; + if (page && page == target) + return true; if (ino && ino == ino_of_node(target)) return true; } @@ -348,28 +412,6 @@ static bool __has_merged_page(struct f2fs_bio_info *io, return false; } -static bool has_merged_page(struct f2fs_sb_info *sbi, struct inode *inode, - nid_t ino, pgoff_t idx, enum page_type type) -{ - enum page_type btype = PAGE_TYPE_OF_BIO(type); - enum temp_type temp; - struct f2fs_bio_info *io; - bool ret = false; - - for (temp = HOT; temp < NR_TEMP_TYPE; temp++) { - io = sbi->write_io[btype] + temp; - - down_read(&io->io_rwsem); - ret = __has_merged_page(io, inode, ino, idx); - up_read(&io->io_rwsem); - - /* TODO: use HOT temp only for meta pages now. */ - if (ret || btype == META) - break; - } - return ret; -} - static void __f2fs_submit_merged_write(struct f2fs_sb_info *sbi, enum page_type type, enum temp_type temp) { @@ -391,17 +433,23 @@ static void __f2fs_submit_merged_write(struct f2fs_sb_info *sbi, } static void __submit_merged_write_cond(struct f2fs_sb_info *sbi, - struct inode *inode, nid_t ino, pgoff_t idx, - enum page_type type, bool force) + struct inode *inode, struct page *page, + nid_t ino, enum page_type type, bool force) { enum temp_type temp; - - if (!force && !has_merged_page(sbi, inode, ino, idx, type)) - return; + bool ret = true; for (temp = HOT; temp < NR_TEMP_TYPE; temp++) { + if (!force) { + enum page_type btype = PAGE_TYPE_OF_BIO(type); + struct f2fs_bio_info *io = sbi->write_io[btype] + temp; - __f2fs_submit_merged_write(sbi, type, temp); + down_read(&io->io_rwsem); + ret = __has_merged_page(io->bio, inode, page, ino); + up_read(&io->io_rwsem); + } + if (ret) + __f2fs_submit_merged_write(sbi, type, temp); /* TODO: use HOT temp only for meta pages now. */ if (type >= META) @@ -411,14 +459,14 @@ static void __submit_merged_write_cond(struct f2fs_sb_info *sbi, void f2fs_submit_merged_write(struct f2fs_sb_info *sbi, enum page_type type) { - __submit_merged_write_cond(sbi, NULL, 0, 0, type, true); + __submit_merged_write_cond(sbi, NULL, NULL, 0, type, true); } void f2fs_submit_merged_write_cond(struct f2fs_sb_info *sbi, - struct inode *inode, nid_t ino, pgoff_t idx, - enum page_type type) + struct inode *inode, struct page *page, + nid_t ino, enum page_type type) { - __submit_merged_write_cond(sbi, inode, ino, idx, type, false); + __submit_merged_write_cond(sbi, inode, page, ino, type, false); } void f2fs_flush_merged_writes(struct f2fs_sb_info *sbi) @@ -438,7 +486,11 @@ int f2fs_submit_page_bio(struct f2fs_io_info *fio) struct page *page = fio->encrypted_page ? fio->encrypted_page : fio->page; - verify_block_addr(fio, fio->new_blkaddr); + if (!f2fs_is_valid_blkaddr(fio->sbi, fio->new_blkaddr, + fio->is_por ? META_POR : (__is_meta_io(fio) ? + META_GENERIC : DATA_GENERIC_ENHANCE))) + return -EFSCORRUPTED; + trace_f2fs_submit_page_bio(page, fio); f2fs_trace_ios(fio, 0); @@ -450,22 +502,80 @@ int f2fs_submit_page_bio(struct f2fs_io_info *fio) bio_put(bio); return -EFAULT; } + + if (fio->io_wbc && !is_read_io(fio->op)) + wbc_account_io(fio->io_wbc, page, PAGE_SIZE); + bio_set_op_attrs(bio, fio->op, fio->op_flags); - __submit_bio(fio->sbi, bio, fio->type); + inc_page_count(fio->sbi, is_read_io(fio->op) ? + __read_io_type(page): WB_DATA_TYPE(fio->page)); - if (!is_read_io(fio->op)) - inc_page_count(fio->sbi, WB_DATA_TYPE(fio->page)); + __f2fs_submit_read_bio(fio->sbi, bio, fio->type); return 0; } -int f2fs_submit_page_write(struct f2fs_io_info *fio) +int f2fs_merge_page_bio(struct f2fs_io_info *fio) +{ + struct bio *bio = *fio->bio; + struct page *page = fio->encrypted_page ? + fio->encrypted_page : fio->page; + + if (!f2fs_is_valid_blkaddr(fio->sbi, fio->new_blkaddr, + __is_meta_io(fio) ? META_GENERIC : DATA_GENERIC)) + return -EFSCORRUPTED; + + trace_f2fs_submit_page_bio(page, fio); + f2fs_trace_ios(fio, 0); + + if (bio && (*fio->last_block + 1 != fio->new_blkaddr || + !__same_bdev(fio->sbi, fio->new_blkaddr, bio))) { + __submit_bio(fio->sbi, bio, fio->type); + bio = NULL; + } +alloc_new: + if (!bio) { + bio = __bio_alloc(fio->sbi, fio->new_blkaddr, fio->io_wbc, + BIO_MAX_PAGES, false, fio->type, fio->temp); + bio_set_op_attrs(bio, fio->op, fio->op_flags); + } + + if (bio_add_page(bio, page, PAGE_SIZE, 0) < PAGE_SIZE) { + __submit_bio(fio->sbi, bio, fio->type); + bio = NULL; + goto alloc_new; + } + + if (fio->io_wbc) + wbc_account_io(fio->io_wbc, page, PAGE_SIZE); + + inc_page_count(fio->sbi, WB_DATA_TYPE(page)); + + *fio->last_block = fio->new_blkaddr; + *fio->bio = bio; + + return 0; +} + +static void f2fs_submit_ipu_bio(struct f2fs_sb_info *sbi, struct bio **bio, + struct page *page) +{ + if (!bio) + return; + + if (!__has_merged_page(*bio, NULL, page, 0)) + return; + + __submit_bio(sbi, *bio, DATA); + *bio = NULL; +} + +void f2fs_submit_page_write(struct f2fs_io_info *fio) { struct f2fs_sb_info *sbi = fio->sbi; enum page_type btype = PAGE_TYPE_OF_BIO(fio->type); struct f2fs_bio_info *io = sbi->write_io[btype] + fio->temp; struct page *bio_page; - int err = 0; f2fs_bug_on(sbi, is_read_io(fio->op)); @@ -475,7 +585,7 @@ next: spin_lock(&io->io_lock); if (list_empty(&io->io_list)) { spin_unlock(&io->io_lock); - goto out_fail; + goto out; } fio = list_first_entry(&io->io_list, struct f2fs_io_info, list); @@ -483,9 +593,7 @@ next: spin_unlock(&io->io_lock); } - if (fio->old_blkaddr != NEW_ADDR) - verify_block_addr(fio, fio->old_blkaddr); - verify_block_addr(fio, fio->new_blkaddr); + verify_fio_blkaddr(fio); bio_page = fio->encrypted_page ? fio->encrypted_page : fio->page; @@ -502,9 +610,9 @@ alloc_new: if (io->bio == NULL) { if ((fio->type == DATA || fio->type == NODE) && fio->new_blkaddr & F2FS_IO_SIZE_MASK(sbi)) { - err = -EAGAIN; dec_page_count(sbi, WB_DATA_TYPE(bio_page)); - goto out_fail; + fio->retry = true; + goto skip; } io->bio = __bio_alloc(sbi, fio->new_blkaddr, fio->io_wbc, BIO_MAX_PAGES, false, @@ -524,16 +632,18 @@ alloc_new: f2fs_trace_ios(fio, 0); trace_f2fs_submit_page_write(fio->page, fio); - +skip: if (fio->in_list) goto next; -out_fail: +out: + if (is_sbi_flag_set(sbi, SBI_IS_SHUTDOWN) || + f2fs_is_checkpoint_ready(sbi)) + __submit_merged_bio(io); up_write(&io->io_rwsem); - return err; } static struct bio *f2fs_grab_read_bio(struct inode *inode, block_t blkaddr, - unsigned nr_pages) + unsigned nr_pages, unsigned op_flag) { struct f2fs_sb_info *sbi = F2FS_I_SB(inode); struct bio *bio; @@ -545,7 +655,7 @@ static struct bio *f2fs_grab_read_bio(struct inode *inode, block_t blkaddr, return ERR_PTR(-ENOMEM); f2fs_target_device(sbi, blkaddr, bio); bio->bi_end_io = f2fs_read_end_io; - bio_set_op_attrs(bio, REQ_OP_READ, 0); + bio_set_op_attrs(bio, REQ_OP_READ, op_flag); if (f2fs_encrypted_file(inode)) post_read_steps |= 1 << STEP_DECRYPT; @@ -558,9 +668,6 @@ static struct bio *f2fs_grab_read_bio(struct inode *inode, block_t blkaddr, ctx->bio = bio; ctx->enabled_steps = post_read_steps; bio->bi_private = ctx; - - /* wait the page to be moved by cleaning */ - f2fs_wait_on_block_writeback(sbi, blkaddr); } return bio; @@ -570,16 +677,23 @@ static struct bio *f2fs_grab_read_bio(struct inode *inode, block_t blkaddr, static int f2fs_submit_page_read(struct inode *inode, struct page *page, block_t blkaddr) { - struct bio *bio = f2fs_grab_read_bio(inode, blkaddr, 1); + struct f2fs_sb_info *sbi = F2FS_I_SB(inode); + struct bio *bio; + bio = f2fs_grab_read_bio(inode, blkaddr, 1, 0); if (IS_ERR(bio)) return PTR_ERR(bio); + /* wait for GCed page writeback via META_MAPPING */ + f2fs_wait_on_block_writeback(inode, blkaddr); + if (bio_add_page(bio, page, PAGE_SIZE, 0) < PAGE_SIZE) { bio_put(bio); return -EFAULT; } - __submit_bio(F2FS_I_SB(inode), bio, DATA); + ClearPageError(page); + inc_page_count(sbi, F2FS_RD_DATA); + __f2fs_submit_read_bio(sbi, bio, DATA); return 0; } @@ -603,9 +717,9 @@ static void __set_data_blkaddr(struct dnode_of_data *dn) * ->node_page * update block addresses in the node page */ -void set_data_blkaddr(struct dnode_of_data *dn) +void f2fs_set_data_blkaddr(struct dnode_of_data *dn) { - f2fs_wait_on_page_writeback(dn->node_page, NODE, true); + f2fs_wait_on_page_writeback(dn->node_page, NODE, true, true); __set_data_blkaddr(dn); if (set_page_dirty(dn->node_page)) dn->node_changed = true; @@ -614,12 +728,12 @@ void set_data_blkaddr(struct dnode_of_data *dn) void f2fs_update_data_blkaddr(struct dnode_of_data *dn, block_t blkaddr) { dn->data_blkaddr = blkaddr; - set_data_blkaddr(dn); + f2fs_set_data_blkaddr(dn); f2fs_update_extent_cache(dn); } /* dn->ofs_in_node will be returned with up-to-date last block pointer */ -int reserve_new_blocks(struct dnode_of_data *dn, blkcnt_t count) +int f2fs_reserve_new_blocks(struct dnode_of_data *dn, blkcnt_t count) { struct f2fs_sb_info *sbi = F2FS_I_SB(dn->inode); int err; @@ -635,7 +749,7 @@ int reserve_new_blocks(struct dnode_of_data *dn, blkcnt_t count) trace_f2fs_reserve_new_blocks(dn->inode, dn->nid, dn->ofs_in_node, count); - f2fs_wait_on_page_writeback(dn->node_page, NODE, true); + f2fs_wait_on_page_writeback(dn->node_page, NODE, true, true); for (; count > 0; dn->ofs_in_node++) { block_t blkaddr = datablock_addr(dn->inode, @@ -653,12 +767,12 @@ int reserve_new_blocks(struct dnode_of_data *dn, blkcnt_t count) } /* Should keep dn->ofs_in_node unchanged */ -int reserve_new_block(struct dnode_of_data *dn) +int f2fs_reserve_new_block(struct dnode_of_data *dn) { unsigned int ofs_in_node = dn->ofs_in_node; int ret; - ret = reserve_new_blocks(dn, 1); + ret = f2fs_reserve_new_blocks(dn, 1); dn->ofs_in_node = ofs_in_node; return ret; } @@ -668,12 +782,12 @@ int f2fs_reserve_block(struct dnode_of_data *dn, pgoff_t index) bool need_put = dn->inode_page ? false : true; int err; - err = get_dnode_of_data(dn, index, ALLOC_NODE); + err = f2fs_get_dnode_of_data(dn, index, ALLOC_NODE); if (err) return err; if (dn->data_blkaddr == NULL_ADDR) - err = reserve_new_block(dn); + err = f2fs_reserve_new_block(dn); if (err || need_put) f2fs_put_dnode(dn); return err; @@ -692,7 +806,7 @@ int f2fs_get_block(struct dnode_of_data *dn, pgoff_t index) return f2fs_reserve_block(dn, index); } -struct page *get_read_data_page(struct inode *inode, pgoff_t index, +struct page *f2fs_get_read_data_page(struct inode *inode, pgoff_t index, int op_flags, bool for_write) { struct address_space *mapping = inode->i_mapping; @@ -707,11 +821,16 @@ struct page *get_read_data_page(struct inode *inode, pgoff_t index, if (f2fs_lookup_extent_cache(inode, index, &ei)) { dn.data_blkaddr = ei.blk + index - ei.fofs; + if (!f2fs_is_valid_blkaddr(F2FS_I_SB(inode), dn.data_blkaddr, + DATA_GENERIC_ENHANCE_READ)) { + err = -EFSCORRUPTED; + goto put_err; + } goto got_it; } set_new_dnode(&dn, inode, NULL, NULL, 0); - err = get_dnode_of_data(&dn, index, LOOKUP_NODE); + err = f2fs_get_dnode_of_data(&dn, index, LOOKUP_NODE); if (err) goto put_err; f2fs_put_dnode(&dn); @@ -720,6 +839,13 @@ struct page *get_read_data_page(struct inode *inode, pgoff_t index, err = -ENOENT; goto put_err; } + if (dn.data_blkaddr != NEW_ADDR && + !f2fs_is_valid_blkaddr(F2FS_I_SB(inode), + dn.data_blkaddr, + DATA_GENERIC_ENHANCE)) { + err = -EFSCORRUPTED; + goto put_err; + } got_it: if (PageUptodate(page)) { unlock_page(page); @@ -730,7 +856,8 @@ got_it: * A new dentry page is allocated but not able to be written, since its * new inode page couldn't be allocated due to -ENOSPC. * In such the case, its blkaddr can be remained as NEW_ADDR. - * see, f2fs_add_link -> get_new_data_page -> init_inode_metadata. + * see, f2fs_add_link -> f2fs_get_new_data_page -> + * f2fs_init_inode_metadata. */ if (dn.data_blkaddr == NEW_ADDR) { zero_user_segment(page, 0, PAGE_SIZE); @@ -750,7 +877,7 @@ put_err: return ERR_PTR(err); } -struct page *find_data_page(struct inode *inode, pgoff_t index) +struct page *f2fs_find_data_page(struct inode *inode, pgoff_t index) { struct address_space *mapping = inode->i_mapping; struct page *page; @@ -760,7 +887,7 @@ struct page *find_data_page(struct inode *inode, pgoff_t index) return page; f2fs_put_page(page, 0); - page = get_read_data_page(inode, index, REQ_SYNC, false); + page = f2fs_get_read_data_page(inode, index, REQ_SYNC, false); if (IS_ERR(page)) return page; @@ -780,13 +907,13 @@ struct page *find_data_page(struct inode *inode, pgoff_t index) * Because, the callers, functions in dir.c and GC, should be able to know * whether this page exists or not. */ -struct page *get_lock_data_page(struct inode *inode, pgoff_t index, +struct page *f2fs_get_lock_data_page(struct inode *inode, pgoff_t index, bool for_write) { struct address_space *mapping = inode->i_mapping; struct page *page; repeat: - page = get_read_data_page(inode, index, REQ_SYNC, for_write); + page = f2fs_get_read_data_page(inode, index, REQ_SYNC, for_write); if (IS_ERR(page)) return page; @@ -812,7 +939,7 @@ repeat: * Note that, ipage is set only by make_empty_dir, and if any error occur, * ipage should be released by this function. */ -struct page *get_new_data_page(struct inode *inode, +struct page *f2fs_get_new_data_page(struct inode *inode, struct page *ipage, pgoff_t index, bool new_i_size) { struct address_space *mapping = inode->i_mapping; @@ -851,7 +978,7 @@ struct page *get_new_data_page(struct inode *inode, /* if ipage exists, blkaddr should be NEW_ADDR */ f2fs_bug_on(F2FS_I_SB(inode), ipage); - page = get_lock_data_page(inode, index, true); + page = f2fs_get_lock_data_page(inode, index, true); if (IS_ERR(page)) return page; } @@ -867,35 +994,39 @@ static int __allocate_data_block(struct dnode_of_data *dn, int seg_type) struct f2fs_sb_info *sbi = F2FS_I_SB(dn->inode); struct f2fs_summary sum; struct node_info ni; - pgoff_t fofs; + block_t old_blkaddr; blkcnt_t count = 1; int err; if (unlikely(is_inode_flag_set(dn->inode, FI_NO_ALLOC))) return -EPERM; + err = f2fs_get_node_info(sbi, dn->nid, &ni); + if (err) + return err; + dn->data_blkaddr = datablock_addr(dn->inode, dn->node_page, dn->ofs_in_node); - if (dn->data_blkaddr == NEW_ADDR) + if (dn->data_blkaddr != NULL_ADDR) goto alloc; if (unlikely((err = inc_valid_block_count(sbi, dn->inode, &count)))) return err; alloc: - get_node_info(sbi, dn->nid, &ni); set_summary(&sum, dn->nid, dn->ofs_in_node, ni.version); - - allocate_data_block(sbi, NULL, dn->data_blkaddr, &dn->data_blkaddr, + old_blkaddr = dn->data_blkaddr; + f2fs_allocate_data_block(sbi, NULL, old_blkaddr, &dn->data_blkaddr, &sum, seg_type, NULL, false); - set_data_blkaddr(dn); - - /* update i_size */ - fofs = start_bidx_of_node(ofs_of_node(dn->node_page), dn->inode) + - dn->ofs_in_node; - if (i_size_read(dn->inode) < ((loff_t)(fofs + 1) << PAGE_SHIFT)) - f2fs_i_size_write(dn->inode, - ((loff_t)(fofs + 1) << PAGE_SHIFT)); + if (GET_SEGNO(sbi, old_blkaddr) != NULL_SEGNO) + invalidate_mapping_pages(META_MAPPING(sbi), + old_blkaddr, old_blkaddr); + f2fs_set_data_blkaddr(dn); + + /* + * i_size will be updated by direct_IO. Otherwise, we'll get stale + * data from unwritten block via dio_read. + */ return 0; } @@ -914,6 +1045,9 @@ int f2fs_preallocate_blocks(struct kiocb *iocb, struct iov_iter *from) return err; } + if (direct_io && allow_outplace_dio(inode, iocb, from)) + return 0; + if (is_inode_flag_set(inode, FI_NO_PREALLOC)) return 0; @@ -927,10 +1061,11 @@ int f2fs_preallocate_blocks(struct kiocb *iocb, struct iov_iter *from) map.m_next_pgofs = NULL; map.m_next_extent = NULL; map.m_seg_type = NO_CHECK_TYPE; + map.m_may_create = true; if (direct_io) { - map.m_seg_type = rw_hint_to_seg_type(iocb->ki_hint); - flag = f2fs_force_buffered_io(inode, WRITE) ? + map.m_seg_type = f2fs_rw_hint_to_seg_type(iocb->ki_hint); + flag = f2fs_force_buffered_io(inode, iocb, from) ? F2FS_GET_BLOCK_PRE_AIO : F2FS_GET_BLOCK_PRE_DIO; goto map_blocks; @@ -955,7 +1090,7 @@ map_blocks: return err; } -static inline void __do_map_lock(struct f2fs_sb_info *sbi, int flag, bool lock) +void __do_map_lock(struct f2fs_sb_info *sbi, int flag, bool lock) { if (flag == F2FS_GET_BLOCK_PRE_AIO) { if (lock) @@ -985,7 +1120,7 @@ int f2fs_map_blocks(struct inode *inode, struct f2fs_map_blocks *map, unsigned int maxblocks = map->m_len; struct dnode_of_data dn; struct f2fs_sb_info *sbi = F2FS_I_SB(inode); - int mode = create ? ALLOC_NODE : LOOKUP_NODE; + int mode = map->m_may_create ? ALLOC_NODE : LOOKUP_NODE; pgoff_t pgofs, end_offset, end; int err = 0, ofs = 1; unsigned int ofs_in_node, last_ofs_in_node; @@ -1005,21 +1140,30 @@ int f2fs_map_blocks(struct inode *inode, struct f2fs_map_blocks *map, end = pgofs + maxblocks; if (!create && f2fs_lookup_extent_cache(inode, pgofs, &ei)) { + if (test_opt(sbi, LFS) && flag == F2FS_GET_BLOCK_DIO && + map->m_may_create) + goto next_dnode; + map->m_pblk = ei.blk + pgofs - ei.fofs; map->m_len = min((pgoff_t)maxblocks, ei.fofs + ei.len - pgofs); map->m_flags = F2FS_MAP_MAPPED; if (map->m_next_extent) *map->m_next_extent = pgofs + map->m_len; + + /* for hardware encryption, but to avoid potential issue in future */ + if (flag == F2FS_GET_BLOCK_DIO) + f2fs_wait_on_block_writeback_range(inode, + map->m_pblk, map->m_len); goto out; } next_dnode: - if (create) + if (map->m_may_create) __do_map_lock(sbi, flag, true); /* When reading holes, we need its node page */ set_new_dnode(&dn, inode, NULL, NULL, 0); - err = get_dnode_of_data(&dn, pgofs, mode); + err = f2fs_get_dnode_of_data(&dn, pgofs, mode); if (err) { if (flag == F2FS_GET_BLOCK_BMAP) map->m_pblk = 0; @@ -1027,10 +1171,10 @@ next_dnode: err = 0; if (map->m_next_pgofs) *map->m_next_pgofs = - get_next_page_offset(&dn, pgofs); + f2fs_get_next_page_offset(&dn, pgofs); if (map->m_next_extent) *map->m_next_extent = - get_next_page_offset(&dn, pgofs); + f2fs_get_next_page_offset(&dn, pgofs); } goto unlock_out; } @@ -1043,7 +1187,23 @@ next_dnode: next_block: blkaddr = datablock_addr(dn.inode, dn.node_page, dn.ofs_in_node); - if (blkaddr == NEW_ADDR || blkaddr == NULL_ADDR) { + if (__is_valid_data_blkaddr(blkaddr) && + !f2fs_is_valid_blkaddr(sbi, blkaddr, DATA_GENERIC_ENHANCE)) { + err = -EFSCORRUPTED; + goto sync_out; + } + + if (__is_valid_data_blkaddr(blkaddr)) { + /* use out-place-update for driect IO under LFS mode */ + if (test_opt(sbi, LFS) && flag == F2FS_GET_BLOCK_DIO && + map->m_may_create) { + err = __allocate_data_block(&dn, map->m_seg_type); + if (!err) { + blkaddr = dn.data_blkaddr; + set_inode_flag(inode, FI_APPEND_WRITE); + } + } + } else { if (create) { if (unlikely(f2fs_cp_error(sbi))) { err = -EIO; @@ -1055,6 +1215,8 @@ next_block: last_ofs_in_node = dn.ofs_in_node; } } else { + WARN_ON(flag != F2FS_GET_BLOCK_PRE_DIO && + flag != F2FS_GET_BLOCK_DIO); err = __allocate_data_block(&dn, map->m_seg_type); if (!err) @@ -1116,7 +1278,7 @@ skip: (pgofs == end || dn.ofs_in_node == end_offset)) { dn.ofs_in_node = ofs_in_node; - err = reserve_new_blocks(&dn, prealloc); + err = f2fs_reserve_new_blocks(&dn, prealloc); if (err) goto sync_out; @@ -1145,13 +1307,19 @@ skip: f2fs_put_dnode(&dn); - if (create) { + if (map->m_may_create) { __do_map_lock(sbi, flag, false); f2fs_balance_fs(sbi, dn.node_changed); } goto next_dnode; sync_out: + + /* for hardware encryption, but to avoid potential issue in future */ + if (flag == F2FS_GET_BLOCK_DIO && map->m_flags & F2FS_MAP_MAPPED) + f2fs_wait_on_block_writeback_range(inode, + map->m_pblk, map->m_len); + if (flag == F2FS_GET_BLOCK_PRECACHE) { if (map->m_flags & F2FS_MAP_MAPPED) { unsigned int ofs = start_pgofs - map->m_lblk; @@ -1165,7 +1333,7 @@ sync_out: } f2fs_put_dnode(&dn); unlock_out: - if (create) { + if (map->m_may_create) { __do_map_lock(sbi, flag, false); f2fs_balance_fs(sbi, dn.node_changed); } @@ -1187,6 +1355,7 @@ bool f2fs_overwrite_io(struct inode *inode, loff_t pos, size_t len) map.m_next_pgofs = NULL; map.m_next_extent = NULL; map.m_seg_type = NO_CHECK_TYPE; + map.m_may_create = false; last_lblk = F2FS_BLK_ALIGN(pos + len); while (map.m_lblk < last_lblk) { @@ -1201,7 +1370,7 @@ bool f2fs_overwrite_io(struct inode *inode, loff_t pos, size_t len) static int __get_data_block(struct inode *inode, sector_t iblock, struct buffer_head *bh, int create, int flag, - pgoff_t *next_pgofs, int seg_type) + pgoff_t *next_pgofs, int seg_type, bool may_write) { struct f2fs_map_blocks map; int err; @@ -1211,6 +1380,7 @@ static int __get_data_block(struct inode *inode, sector_t iblock, map.m_next_pgofs = next_pgofs; map.m_next_extent = NULL; map.m_seg_type = seg_type; + map.m_may_create = may_write; err = f2fs_map_blocks(inode, &map, create, flag); if (!err) { @@ -1227,16 +1397,25 @@ static int get_data_block(struct inode *inode, sector_t iblock, { return __get_data_block(inode, iblock, bh_result, create, flag, next_pgofs, - NO_CHECK_TYPE); + NO_CHECK_TYPE, create); +} + +static int get_data_block_dio_write(struct inode *inode, sector_t iblock, + struct buffer_head *bh_result, int create) +{ + return __get_data_block(inode, iblock, bh_result, create, + F2FS_GET_BLOCK_DIO, NULL, + f2fs_rw_hint_to_seg_type(inode->i_write_hint), + true); } static int get_data_block_dio(struct inode *inode, sector_t iblock, struct buffer_head *bh_result, int create) { return __get_data_block(inode, iblock, bh_result, create, - F2FS_GET_BLOCK_DEFAULT, NULL, - rw_hint_to_seg_type( - inode->i_write_hint)); + F2FS_GET_BLOCK_DIO, NULL, + f2fs_rw_hint_to_seg_type(inode->i_write_hint), + false); } static int get_data_block_bmap(struct inode *inode, sector_t iblock, @@ -1248,7 +1427,7 @@ static int get_data_block_bmap(struct inode *inode, sector_t iblock, return __get_data_block(inode, iblock, bh_result, create, F2FS_GET_BLOCK_BMAP, NULL, - NO_CHECK_TYPE); + NO_CHECK_TYPE, create); } static inline sector_t logical_to_blk(struct inode *inode, loff_t offset) @@ -1280,7 +1459,11 @@ static int f2fs_xattr_fiemap(struct inode *inode, if (!page) return -ENOMEM; - get_node_info(sbi, inode->i_ino, &ni); + err = f2fs_get_node_info(sbi, inode->i_ino, &ni); + if (err) { + f2fs_put_page(page, 1); + return err; + } phys = (__u64)blk_to_logical(inode, ni.blk_addr); offset = offsetof(struct f2fs_inode, i_addr) + @@ -1307,7 +1490,11 @@ static int f2fs_xattr_fiemap(struct inode *inode, if (!page) return -ENOMEM; - get_node_info(sbi, xnid, &ni); + err = f2fs_get_node_info(sbi, xnid, &ni); + if (err) { + f2fs_put_page(page, 1); + return err; + } phys = (__u64)blk_to_logical(inode, ni.blk_addr); len = inode->i_sb->s_blocksize; @@ -1416,24 +1603,137 @@ out: return ret; } +static int f2fs_read_single_page(struct inode *inode, struct page *page, + unsigned nr_pages, + struct f2fs_map_blocks *map, + struct bio **bio_ret, + sector_t *last_block_in_bio, + bool is_readahead) +{ + struct bio *bio = *bio_ret; + const unsigned blkbits = inode->i_blkbits; + const unsigned blocksize = 1 << blkbits; + sector_t block_in_file; + sector_t last_block; + sector_t last_block_in_file; + sector_t block_nr; + int ret = 0; + + block_in_file = (sector_t)page_index(page); + last_block = block_in_file + nr_pages; + last_block_in_file = (i_size_read(inode) + blocksize - 1) >> + blkbits; + if (last_block > last_block_in_file) + last_block = last_block_in_file; + + /* just zeroing out page which is beyond EOF */ + if (block_in_file >= last_block) + goto zero_out; + /* + * Map blocks using the previous result first. + */ + if ((map->m_flags & F2FS_MAP_MAPPED) && + block_in_file > map->m_lblk && + block_in_file < (map->m_lblk + map->m_len)) + goto got_it; + + /* + * Then do more f2fs_map_blocks() calls until we are + * done with this page. + */ + map->m_lblk = block_in_file; + map->m_len = last_block - block_in_file; + + ret = f2fs_map_blocks(inode, map, 0, F2FS_GET_BLOCK_DEFAULT); + if (ret) + goto out; +got_it: + if ((map->m_flags & F2FS_MAP_MAPPED)) { + block_nr = map->m_pblk + block_in_file - map->m_lblk; + SetPageMappedToDisk(page); + + if (!PageUptodate(page) && (!PageSwapCache(page) && + !cleancache_get_page(page))) { + SetPageUptodate(page); + goto confused; + } + + if (!f2fs_is_valid_blkaddr(F2FS_I_SB(inode), block_nr, + DATA_GENERIC_ENHANCE_READ)) { + ret = -EFSCORRUPTED; + goto out; + } + } else { +zero_out: + zero_user_segment(page, 0, PAGE_SIZE); + if (!PageUptodate(page)) + SetPageUptodate(page); + unlock_page(page); + goto out; + } + + /* + * This page will go to BIO. Do we need to send this + * BIO off first? + */ + if (bio && (*last_block_in_bio != block_nr - 1 || + !__same_bdev(F2FS_I_SB(inode), block_nr, bio))) { +submit_and_realloc: + __f2fs_submit_read_bio(F2FS_I_SB(inode), bio, DATA); + bio = NULL; + } + if (bio == NULL) { + bio = f2fs_grab_read_bio(inode, block_nr, nr_pages, + is_readahead ? REQ_RAHEAD : 0); + if (IS_ERR(bio)) { + ret = PTR_ERR(bio); + bio = NULL; + goto out; + } + } + + /* + * If the page is under writeback, we need to wait for + * its completion to see the correct decrypted data. + */ + f2fs_wait_on_block_writeback(inode, block_nr); + + if (bio_add_page(bio, page, blocksize, 0) < blocksize) + goto submit_and_realloc; + + inc_page_count(F2FS_I_SB(inode), F2FS_RD_DATA); + ClearPageError(page); + *last_block_in_bio = block_nr; + goto out; +confused: + if (bio) { + __f2fs_submit_read_bio(F2FS_I_SB(inode), bio, DATA); + bio = NULL; + } + unlock_page(page); +out: + *bio_ret = bio; + return ret; +} + /* * This function was originally taken from fs/mpage.c, and customized for f2fs. * Major change was from block_size == page_size in f2fs by default. + * + * Note that the aops->readpages() function is ONLY used for read-ahead. If + * this function ever deviates from doing just read-ahead, it should either + * use ->readpage() or do the necessary surgery to decouple ->readpages() + * from read-ahead. */ static int f2fs_mpage_readpages(struct address_space *mapping, struct list_head *pages, struct page *page, - unsigned nr_pages) + unsigned nr_pages, bool is_readahead) { struct bio *bio = NULL; sector_t last_block_in_bio = 0; struct inode *inode = mapping->host; - const unsigned blkbits = inode->i_blkbits; - const unsigned blocksize = 1 << blkbits; - sector_t block_in_file; - sector_t last_block; - sector_t last_block_in_file; - sector_t block_nr; struct f2fs_map_blocks map; + int ret = 0; map.m_pblk = 0; map.m_lblk = 0; @@ -1442,6 +1742,7 @@ static int f2fs_mpage_readpages(struct address_space *mapping, map.m_next_pgofs = NULL; map.m_next_extent = NULL; map.m_seg_type = NO_CHECK_TYPE; + map.m_may_create = false; for (; nr_pages; nr_pages--) { if (pages) { @@ -1450,103 +1751,30 @@ static int f2fs_mpage_readpages(struct address_space *mapping, prefetchw(&page->flags); list_del(&page->lru); if (add_to_page_cache_lru(page, mapping, - page->index, GFP_KERNEL)) + page_index(page), GFP_KERNEL)) goto next_page; } - block_in_file = (sector_t)page->index; - last_block = block_in_file + nr_pages; - last_block_in_file = (i_size_read(inode) + blocksize - 1) >> - blkbits; - if (last_block > last_block_in_file) - last_block = last_block_in_file; - - /* - * Map blocks using the previous result first. - */ - if ((map.m_flags & F2FS_MAP_MAPPED) && - block_in_file > map.m_lblk && - block_in_file < (map.m_lblk + map.m_len)) - goto got_it; - - /* - * Then do more f2fs_map_blocks() calls until we are - * done with this page. - */ - map.m_flags = 0; - - if (block_in_file < last_block) { - map.m_lblk = block_in_file; - map.m_len = last_block - block_in_file; - - if (f2fs_map_blocks(inode, &map, 0, - F2FS_GET_BLOCK_DEFAULT)) - goto set_error_page; - } -got_it: - if ((map.m_flags & F2FS_MAP_MAPPED)) { - block_nr = map.m_pblk + block_in_file - map.m_lblk; - SetPageMappedToDisk(page); - - if (!PageUptodate(page) && !cleancache_get_page(page)) { - SetPageUptodate(page); - goto confused; - } - } else { + ret = f2fs_read_single_page(inode, page, nr_pages, &map, &bio, + &last_block_in_bio, is_readahead); + if (ret) { + SetPageError(page); zero_user_segment(page, 0, PAGE_SIZE); - if (!PageUptodate(page)) - SetPageUptodate(page); unlock_page(page); - goto next_page; - } - - /* - * This page will go to BIO. Do we need to send this - * BIO off first? - */ - if (bio && (last_block_in_bio != block_nr - 1 || - !__same_bdev(F2FS_I_SB(inode), block_nr, bio))) { -submit_and_realloc: - __submit_bio(F2FS_I_SB(inode), bio, DATA); - bio = NULL; - } - if (bio == NULL) { - bio = f2fs_grab_read_bio(inode, block_nr, nr_pages); - if (IS_ERR(bio)) { - bio = NULL; - goto set_error_page; - } - } - - if (bio_add_page(bio, page, blocksize, 0) < blocksize) - goto submit_and_realloc; - - last_block_in_bio = block_nr; - goto next_page; -set_error_page: - SetPageError(page); - zero_user_segment(page, 0, PAGE_SIZE); - unlock_page(page); - goto next_page; -confused: - if (bio) { - __submit_bio(F2FS_I_SB(inode), bio, DATA); - bio = NULL; } - unlock_page(page); next_page: if (pages) put_page(page); } BUG_ON(pages && !list_empty(pages)); if (bio) - __submit_bio(F2FS_I_SB(inode), bio, DATA); - return 0; + __f2fs_submit_read_bio(F2FS_I_SB(inode), bio, DATA); + return pages ? 0 : ret; } static int f2fs_read_data_page(struct file *file, struct page *page) { - struct inode *inode = page->mapping->host; + struct inode *inode = page_file_mapping(page)->host; int ret = -EAGAIN; trace_f2fs_readpage(page, DATA); @@ -1555,7 +1783,8 @@ static int f2fs_read_data_page(struct file *file, struct page *page) if (f2fs_has_inline_data(inode)) ret = f2fs_read_inline_data(inode, page); if (ret == -EAGAIN) - ret = f2fs_mpage_readpages(page->mapping, NULL, page, 1); + ret = f2fs_mpage_readpages(page_file_mapping(page), + NULL, page, 1, false); return ret; } @@ -1572,34 +1801,43 @@ static int f2fs_read_data_pages(struct file *file, if (f2fs_has_inline_data(inode)) return 0; - return f2fs_mpage_readpages(mapping, pages, NULL, nr_pages); + return f2fs_mpage_readpages(mapping, pages, NULL, nr_pages, true); } static int encrypt_one_page(struct f2fs_io_info *fio) { struct inode *inode = fio->page->mapping->host; + struct page *mpage; gfp_t gfp_flags = GFP_NOFS; if (!f2fs_encrypted_file(inode)) return 0; /* wait for GCed page writeback via META_MAPPING */ - f2fs_wait_on_block_writeback(fio->sbi, fio->old_blkaddr); + f2fs_wait_on_block_writeback(inode, fio->old_blkaddr); retry_encrypt: fio->encrypted_page = fscrypt_encrypt_page(inode, fio->page, PAGE_SIZE, 0, fio->page->index, gfp_flags); - if (!IS_ERR(fio->encrypted_page)) - return 0; + if (IS_ERR(fio->encrypted_page)) { + /* flush pending IOs and wait for a while in the ENOMEM case */ + if (PTR_ERR(fio->encrypted_page) == -ENOMEM) { + f2fs_flush_merged_writes(fio->sbi); + congestion_wait(BLK_RW_ASYNC, HZ/50); + gfp_flags |= __GFP_NOFAIL; + goto retry_encrypt; + } + return PTR_ERR(fio->encrypted_page); + } - /* flush pending IOs and wait for a while in the ENOMEM case */ - if (PTR_ERR(fio->encrypted_page) == -ENOMEM) { - f2fs_flush_merged_writes(fio->sbi); - congestion_wait(BLK_RW_ASYNC, HZ/50); - gfp_flags |= __GFP_NOFAIL; - goto retry_encrypt; + mpage = find_lock_page(META_MAPPING(fio->sbi), fio->old_blkaddr); + if (mpage) { + if (PageUptodate(mpage)) + memcpy(page_address(mpage), + page_address(fio->encrypted_page), PAGE_SIZE); + f2fs_put_page(mpage, 1); } - return PTR_ERR(fio->encrypted_page); + return 0; } static inline bool check_inplace_update_policy(struct inode *inode, @@ -1610,12 +1848,12 @@ static inline bool check_inplace_update_policy(struct inode *inode, if (policy & (0x1 << F2FS_IPU_FORCE)) return true; - if (policy & (0x1 << F2FS_IPU_SSR) && need_SSR(sbi)) + if (policy & (0x1 << F2FS_IPU_SSR) && f2fs_need_SSR(sbi)) return true; if (policy & (0x1 << F2FS_IPU_UTIL) && utilization(sbi) > SM_I(sbi)->min_ipu_util) return true; - if (policy & (0x1 << F2FS_IPU_SSR_UTIL) && need_SSR(sbi) && + if (policy & (0x1 << F2FS_IPU_SSR_UTIL) && f2fs_need_SSR(sbi) && utilization(sbi) > SM_I(sbi)->min_ipu_util) return true; @@ -1633,10 +1871,14 @@ static inline bool check_inplace_update_policy(struct inode *inode, is_inode_flag_set(inode, FI_NEED_IPU)) return true; + if (unlikely(fio && is_sbi_flag_set(sbi, SBI_CP_DISABLED) && + !f2fs_is_checkpointed_data(sbi, fio->old_blkaddr))) + return true; + return false; } -bool should_update_inplace(struct inode *inode, struct f2fs_io_info *fio) +bool f2fs_should_update_inplace(struct inode *inode, struct f2fs_io_info *fio) { if (f2fs_is_pinned_file(inode)) return true; @@ -1648,7 +1890,7 @@ bool should_update_inplace(struct inode *inode, struct f2fs_io_info *fio) return check_inplace_update_policy(inode, fio); } -bool should_update_outplace(struct inode *inode, struct f2fs_io_info *fio) +bool f2fs_should_update_outplace(struct inode *inode, struct f2fs_io_info *fio) { struct f2fs_sb_info *sbi = F2FS_I_SB(inode); @@ -1656,6 +1898,8 @@ bool should_update_outplace(struct inode *inode, struct f2fs_io_info *fio) return true; if (S_ISDIR(inode->i_mode)) return true; + if (IS_NOQUOTA(inode)) + return true; if (f2fs_is_atomic_file(inode)) return true; if (fio) { @@ -1663,6 +1907,9 @@ bool should_update_outplace(struct inode *inode, struct f2fs_io_info *fio) return true; if (IS_ATOMIC_WRITTEN_PAGE(fio->page)) return true; + if (unlikely(is_sbi_flag_set(sbi, SBI_CP_DISABLED) && + f2fs_is_checkpointed_data(sbi, fio->old_blkaddr))) + return true; } return false; } @@ -1671,27 +1918,19 @@ static inline bool need_inplace_update(struct f2fs_io_info *fio) { struct inode *inode = fio->page->mapping->host; - if (should_update_outplace(inode, fio)) + if (f2fs_should_update_outplace(inode, fio)) return false; - return should_update_inplace(inode, fio); -} - -static inline bool valid_ipu_blkaddr(struct f2fs_io_info *fio) -{ - if (fio->old_blkaddr == NEW_ADDR) - return false; - if (fio->old_blkaddr == NULL_ADDR) - return false; - return true; + return f2fs_should_update_inplace(inode, fio); } -int do_write_data_page(struct f2fs_io_info *fio) +int f2fs_do_write_data_page(struct f2fs_io_info *fio) { struct page *page = fio->page; struct inode *inode = page->mapping->host; struct dnode_of_data dn; struct extent_info ei = {0,0,0}; + struct node_info ni; bool ipu_force = false; int err = 0; @@ -1700,18 +1939,20 @@ int do_write_data_page(struct f2fs_io_info *fio) f2fs_lookup_extent_cache(inode, page->index, &ei)) { fio->old_blkaddr = ei.blk + page->index - ei.fofs; - if (valid_ipu_blkaddr(fio)) { - ipu_force = true; - fio->need_lock = LOCK_DONE; - goto got_it; - } + if (!f2fs_is_valid_blkaddr(fio->sbi, fio->old_blkaddr, + DATA_GENERIC_ENHANCE)) + return -EFSCORRUPTED; + + ipu_force = true; + fio->need_lock = LOCK_DONE; + goto got_it; } /* Deadlock due to between page->lock and f2fs_lock_op */ if (fio->need_lock == LOCK_REQ && !f2fs_trylock_op(fio->sbi)) return -EAGAIN; - err = get_dnode_of_data(&dn, page->index, LOOKUP_NODE); + err = f2fs_get_dnode_of_data(&dn, page->index, LOOKUP_NODE); if (err) goto out; @@ -1720,14 +1961,23 @@ int do_write_data_page(struct f2fs_io_info *fio) /* This page is already truncated */ if (fio->old_blkaddr == NULL_ADDR) { ClearPageUptodate(page); + clear_cold_data(page); goto out_writepage; } got_it: + if (__is_valid_data_blkaddr(fio->old_blkaddr) && + !f2fs_is_valid_blkaddr(fio->sbi, fio->old_blkaddr, + DATA_GENERIC_ENHANCE)) { + err = -EFSCORRUPTED; + goto out_writepage; + } /* * If current allocation needs SSR, * it had better in-place writes for updated data. */ - if (ipu_force || (valid_ipu_blkaddr(fio) && need_inplace_update(fio))) { + if (ipu_force || + (__is_valid_data_blkaddr(fio->old_blkaddr) && + need_inplace_update(fio))) { err = encrypt_one_page(fio); if (err) goto out_writepage; @@ -1737,9 +1987,17 @@ got_it: f2fs_put_dnode(&dn); if (fio->need_lock == LOCK_REQ) f2fs_unlock_op(fio->sbi); - err = rewrite_data_page(fio); + err = f2fs_inplace_write_data(fio); + if (err) { + if (f2fs_encrypted_file(inode)) + fscrypt_pullback_bio_page(&fio->encrypted_page, + true); + if (PageWriteback(page)) + end_page_writeback(page); + } else { + set_inode_flag(inode, FI_UPDATE_WRITE); + } trace_f2fs_do_write_data_page(fio->page, IPU); - set_inode_flag(inode, FI_UPDATE_WRITE); return err; } @@ -1751,6 +2009,12 @@ got_it: fio->need_lock = LOCK_REQ; } + err = f2fs_get_node_info(fio->sbi, dn.nid, &ni); + if (err) + goto out_writepage; + + fio->version = ni.version; + err = encrypt_one_page(fio); if (err) goto out_writepage; @@ -1759,7 +2023,7 @@ got_it: ClearPageError(page); /* LFS mode write path */ - write_data_page(&dn, fio); + f2fs_outplace_write_data(&dn, fio); trace_f2fs_do_write_data_page(page, OPU); set_inode_flag(inode, FI_APPEND_WRITE); if (page->index == 0) @@ -1773,6 +2037,8 @@ out: } static int __write_data_page(struct page *page, bool *submitted, + struct bio **bio, + sector_t *last_block, struct writeback_control *wbc, enum iostat_type io_type) { @@ -1798,6 +2064,8 @@ static int __write_data_page(struct page *page, bool *submitted, .need_lock = LOCK_RETRY, .io_type = io_type, .io_wbc = wbc, + .bio = bio, + .last_block = last_block, }; trace_f2fs_writepage(page, DATA); @@ -1805,6 +2073,12 @@ static int __write_data_page(struct page *page, bool *submitted, /* we should bypass data pages to proceed the kworkder jobs */ if (unlikely(f2fs_cp_error(sbi))) { mapping_set_error(page->mapping, -EIO); + /* + * don't drop any dirty dentry pages for keeping lastest + * directory structure. + */ + if (S_ISDIR(inode->i_mode)) + goto redirty_out; goto out; } @@ -1829,13 +2103,13 @@ write: /* we should not write 0'th page having journal header */ if (f2fs_is_volatile_file(inode) && (!page->index || (!wbc->for_reclaim && - available_free_memory(sbi, BASE_CHECK)))) + f2fs_available_free_memory(sbi, BASE_CHECK)))) goto redirty_out; /* Dentry blocks are controlled by checkpoint */ if (S_ISDIR(inode->i_mode)) { fio.need_lock = LOCK_DONE; - err = do_write_data_page(&fio); + err = f2fs_do_write_data_page(&fio); goto done; } @@ -1854,10 +2128,10 @@ write: } if (err == -EAGAIN) { - err = do_write_data_page(&fio); + err = f2fs_do_write_data_page(&fio); if (err == -EAGAIN) { fio.need_lock = LOCK_REQ; - err = do_write_data_page(&fio); + err = f2fs_do_write_data_page(&fio); } } @@ -1876,21 +2150,27 @@ done: out: inode_dec_dirty_pages(inode); - if (err) + if (err) { ClearPageUptodate(page); + clear_cold_data(page); + } if (wbc->for_reclaim) { - f2fs_submit_merged_write_cond(sbi, inode, 0, page->index, DATA); + f2fs_submit_merged_write_cond(sbi, NULL, page, 0, DATA); clear_inode_flag(inode, FI_HOT_DATA); - remove_dirty_inode(inode); + f2fs_remove_dirty_inode(inode); submitted = NULL; } unlock_page(page); - if (!S_ISDIR(inode->i_mode)) + if (!S_ISDIR(inode->i_mode) && !IS_NOQUOTA(inode) && + !F2FS_I(inode)->cp_task) { + f2fs_submit_ipu_bio(sbi, bio, page); f2fs_balance_fs(sbi, need_balance_fs); + } if (unlikely(f2fs_cp_error(sbi))) { + f2fs_submit_ipu_bio(sbi, bio, page); f2fs_submit_merged_write(sbi, DATA); submitted = NULL; } @@ -1917,7 +2197,7 @@ redirty_out: static int f2fs_write_data_page(struct page *page, struct writeback_control *wbc) { - return __write_data_page(page, NULL, wbc, FS_DATA_IO); + return __write_data_page(page, NULL, NULL, NULL, wbc, FS_DATA_IO); } /* @@ -1932,15 +2212,18 @@ static int f2fs_write_cache_pages(struct address_space *mapping, int ret = 0; int done = 0; struct pagevec pvec; + struct f2fs_sb_info *sbi = F2FS_M_SB(mapping); + struct bio *bio = NULL; + sector_t last_block; int nr_pages; pgoff_t uninitialized_var(writeback_index); pgoff_t index; pgoff_t end; /* Inclusive */ pgoff_t done_index; - pgoff_t last_idx = ULONG_MAX; int cycled; int range_whole = 0; int tag; + int nwritten = 0; pagevec_init(&pvec, 0); @@ -1976,8 +2259,8 @@ retry: while (!done && (index <= end)) { int i; - nr_pages = pagevec_lookup_tag(&pvec, mapping, &index, tag, - min(end - index, (pgoff_t)PAGEVEC_SIZE - 1) + 1); + nr_pages = pagevec_lookup_range_tag(&pvec, mapping, &index, end, + tag); if (nr_pages == 0) break; @@ -1985,7 +2268,9 @@ retry: struct page *page = pvec.pages[i]; bool submitted = false; - if (page->index > end) { + /* give a priority to WB_SYNC threads */ + if (atomic_read(&sbi->wb_sync_req[DATA]) && + wbc->sync_mode == WB_SYNC_NONE) { done = 1; break; } @@ -2006,18 +2291,20 @@ continue_unlock: } if (PageWriteback(page)) { - if (wbc->sync_mode != WB_SYNC_NONE) + if (wbc->sync_mode != WB_SYNC_NONE) { f2fs_wait_on_page_writeback(page, - DATA, true); - else + DATA, true, true); + f2fs_submit_ipu_bio(sbi, &bio, page); + } else { goto continue_unlock; + } } - BUG_ON(PageWriteback(page)); if (!clear_page_dirty_for_io(page)) goto continue_unlock; - ret = __write_data_page(page, &submitted, wbc, io_type); + ret = __write_data_page(page, &submitted, &bio, + &last_block, wbc, io_type); if (unlikely(ret)) { /* * keep nr_to_write, since vfs uses this to @@ -2041,12 +2328,10 @@ continue_unlock: done = 1; break; } else if (submitted) { - last_idx = page->index; + nwritten++; } - /* give a priority to WB_SYNC threads */ - if ((atomic_read(&F2FS_M_SB(mapping)->wb_sync_req) || - --wbc->nr_to_write <= 0) && + if (--wbc->nr_to_write <= 0 && wbc->sync_mode == WB_SYNC_NONE) { done = 1; break; @@ -2065,14 +2350,34 @@ continue_unlock: if (wbc->range_cyclic || (range_whole && wbc->nr_to_write > 0)) mapping->writeback_index = done_index; - if (last_idx != ULONG_MAX) + if (nwritten) f2fs_submit_merged_write_cond(F2FS_M_SB(mapping), mapping->host, - 0, last_idx, DATA); + NULL, 0, DATA); + /* submit cached bio of IPU write */ + if (bio) + __submit_bio(sbi, bio, DATA); return ret; } -int __f2fs_write_data_pages(struct address_space *mapping, +static inline bool __should_serialize_io(struct inode *inode, + struct writeback_control *wbc) +{ + if (!S_ISREG(inode->i_mode)) + return false; + if (IS_NOQUOTA(inode)) + return false; + /* to avoid deadlock in path of data flush */ + if (F2FS_I(inode)->cp_task) + return false; + if (wbc->sync_mode != WB_SYNC_ALL) + return true; + if (get_dirty_pages(inode) >= SM_I(F2FS_I_SB(inode))->min_seq_blocks) + return true; + return false; +} + +static int __f2fs_write_data_pages(struct address_space *mapping, struct writeback_control *wbc, enum iostat_type io_type) { @@ -2080,6 +2385,7 @@ int __f2fs_write_data_pages(struct address_space *mapping, struct f2fs_sb_info *sbi = F2FS_I_SB(inode); struct blk_plug plug; int ret; + bool locked = false; /* deal with chardevs and other special file */ if (!mapping->a_ops->writepage) @@ -2093,9 +2399,10 @@ int __f2fs_write_data_pages(struct address_space *mapping, if (unlikely(is_sbi_flag_set(sbi, SBI_POR_DOING))) goto skip_write; - if (S_ISDIR(inode->i_mode) && wbc->sync_mode == WB_SYNC_NONE && + if ((S_ISDIR(inode->i_mode) || IS_NOQUOTA(inode)) && + wbc->sync_mode == WB_SYNC_NONE && get_dirty_pages(inode) < nr_pages_to_skip(sbi, DATA) && - available_free_memory(sbi, DIRTY_DENTS)) + f2fs_available_free_memory(sbi, DIRTY_DENTS)) goto skip_write; /* skip writing during file defragment */ @@ -2106,22 +2413,30 @@ int __f2fs_write_data_pages(struct address_space *mapping, /* to avoid spliting IOs due to mixed WB_SYNC_ALL and WB_SYNC_NONE */ if (wbc->sync_mode == WB_SYNC_ALL) - atomic_inc(&sbi->wb_sync_req); - else if (atomic_read(&sbi->wb_sync_req)) + atomic_inc(&sbi->wb_sync_req[DATA]); + else if (atomic_read(&sbi->wb_sync_req[DATA])) goto skip_write; + if (__should_serialize_io(inode, wbc)) { + mutex_lock(&sbi->writepages); + locked = true; + } + blk_start_plug(&plug); ret = f2fs_write_cache_pages(mapping, wbc, io_type); blk_finish_plug(&plug); + if (locked) + mutex_unlock(&sbi->writepages); + if (wbc->sync_mode == WB_SYNC_ALL) - atomic_dec(&sbi->wb_sync_req); + atomic_dec(&sbi->wb_sync_req[DATA]); /* * if some pages were truncated, we cannot guarantee its mapping->host * to detect pending bios. */ - remove_dirty_inode(inode); + f2fs_remove_dirty_inode(inode); return ret; skip_write: @@ -2146,10 +2461,15 @@ static void f2fs_write_failed(struct address_space *mapping, loff_t to) loff_t i_size = i_size_read(inode); if (to > i_size) { + down_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]); down_write(&F2FS_I(inode)->i_mmap_sem); + truncate_pagecache(inode, i_size); - truncate_blocks(inode, i_size, true); + if (!IS_NOQUOTA(inode)) + f2fs_truncate_blocks(inode, i_size, true); + up_write(&F2FS_I(inode)->i_mmap_sem); + up_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]); } } @@ -2164,6 +2484,7 @@ static int prepare_write_begin(struct f2fs_sb_info *sbi, bool locked = false; struct extent_info ei = {0,0,0}; int err = 0; + int flag; /* * we already allocated all the blocks, so we don't need to get @@ -2173,14 +2494,20 @@ static int prepare_write_begin(struct f2fs_sb_info *sbi, !is_inode_flag_set(inode, FI_NO_PREALLOC)) return 0; + /* f2fs_lock_op avoids race between write CP and convert_inline_page */ + if (f2fs_has_inline_data(inode) && pos + len > MAX_INLINE_DATA(inode)) + flag = F2FS_GET_BLOCK_DEFAULT; + else + flag = F2FS_GET_BLOCK_PRE_AIO; + if (f2fs_has_inline_data(inode) || (pos & PAGE_MASK) >= i_size_read(inode)) { - __do_map_lock(sbi, F2FS_GET_BLOCK_PRE_AIO, true); + __do_map_lock(sbi, flag, true); locked = true; } restart: /* check inline_data */ - ipage = get_node_page(sbi, inode->i_ino); + ipage = f2fs_get_node_page(sbi, inode->i_ino); if (IS_ERR(ipage)) { err = PTR_ERR(ipage); goto unlock_out; @@ -2190,7 +2517,7 @@ restart: if (f2fs_has_inline_data(inode)) { if (pos + len <= MAX_INLINE_DATA(inode)) { - read_inline_data(page, ipage); + f2fs_do_read_inline_data(page, ipage); set_inode_flag(inode, FI_DATA_EXIST); if (inode->i_nlink) set_inline_node(ipage); @@ -2208,11 +2535,12 @@ restart: dn.data_blkaddr = ei.blk + index - ei.fofs; } else { /* hole case */ - err = get_dnode_of_data(&dn, index, LOOKUP_NODE); + err = f2fs_get_dnode_of_data(&dn, index, LOOKUP_NODE); if (err || dn.data_blkaddr == NULL_ADDR) { f2fs_put_dnode(&dn); __do_map_lock(sbi, F2FS_GET_BLOCK_PRE_AIO, true); + WARN_ON(flag != F2FS_GET_BLOCK_PRE_AIO); locked = true; goto restart; } @@ -2226,7 +2554,7 @@ out: f2fs_put_dnode(&dn); unlock_out: if (locked) - __do_map_lock(sbi, F2FS_GET_BLOCK_PRE_AIO, false); + __do_map_lock(sbi, flag, false); return err; } @@ -2254,8 +2582,13 @@ static int f2fs_write_begin(struct file *file, struct address_space *mapping, } trace_f2fs_write_begin(inode, pos, len, flags); - if (f2fs_is_atomic_file(inode) && - !available_free_memory(sbi, INMEM_PAGES)) { + err = f2fs_is_checkpoint_ready(sbi); + if (err) + goto fail; + + if ((f2fs_is_atomic_file(inode) && + !f2fs_available_free_memory(sbi, INMEM_PAGES)) || + is_inode_flag_set(inode, FI_ATOMIC_REVOKE_REQUEST)) { err = -ENOMEM; drop_atomic = true; goto fail; @@ -2290,7 +2623,8 @@ repeat: if (err) goto fail; - if (need_balance && has_not_enough_free_secs(sbi, 0, 0)) { + if (need_balance && !IS_NOQUOTA(inode) && + has_not_enough_free_secs(sbi, 0, 0)) { unlock_page(page); f2fs_balance_fs(sbi, true); lock_page(page); @@ -2301,11 +2635,7 @@ repeat: } } - f2fs_wait_on_page_writeback(page, DATA, false); - - /* wait for GCed page writeback via META_MAPPING */ - if (f2fs_post_read_required(inode)) - f2fs_wait_on_block_writeback(sbi, blkaddr); + f2fs_wait_on_page_writeback(page, DATA, false, true); if (len == PAGE_SIZE || PageUptodate(page)) return 0; @@ -2319,6 +2649,11 @@ repeat: zero_user_segment(page, 0, PAGE_SIZE); SetPageUptodate(page); } else { + if (!f2fs_is_valid_blkaddr(sbi, blkaddr, + DATA_GENERIC_ENHANCE_READ)) { + err = -EFSCORRUPTED; + goto fail; + } err = f2fs_submit_page_read(inode, page, blkaddr); if (err) goto fail; @@ -2339,7 +2674,7 @@ fail: f2fs_put_page(page, 1); f2fs_write_failed(mapping, pos + len); if (drop_atomic) - drop_inmem_pages_all(sbi); + f2fs_drop_inmem_pages_all(sbi, false); return err; } @@ -2380,15 +2715,65 @@ unlock_out: static int check_direct_IO(struct inode *inode, struct iov_iter *iter, loff_t offset) { - unsigned blocksize_mask = inode->i_sb->s_blocksize - 1; + unsigned i_blkbits = READ_ONCE(inode->i_blkbits); + unsigned blkbits = i_blkbits; + unsigned blocksize_mask = (1 << blkbits) - 1; + unsigned long align = offset | iov_iter_alignment(iter); + struct block_device *bdev = inode->i_sb->s_bdev; + + if (align & blocksize_mask) { + if (bdev) + blkbits = blksize_bits(bdev_logical_block_size(bdev)); + blocksize_mask = (1 << blkbits) - 1; + if (align & blocksize_mask) + return -EINVAL; + return 1; + } + return 0; +} - if (offset & blocksize_mask) - return -EINVAL; +static void f2fs_dio_end_io(struct bio *bio) +{ + struct f2fs_private_dio *dio = bio->bi_private; - if (iov_iter_alignment(iter) & blocksize_mask) - return -EINVAL; + dec_page_count(F2FS_I_SB(dio->inode), + dio->write ? F2FS_DIO_WRITE : F2FS_DIO_READ); - return 0; + bio->bi_private = dio->orig_private; + bio->bi_end_io = dio->orig_end_io; + + kvfree(dio); + + bio_endio(bio); +} + +static void f2fs_dio_submit_bio(int rw, struct bio *bio, struct inode *inode, + loff_t file_offset) +{ + struct f2fs_private_dio *dio; + bool write = (rw == REQ_OP_WRITE); + + dio = f2fs_kzalloc(F2FS_I_SB(inode), + sizeof(struct f2fs_private_dio), GFP_NOFS); + if (!dio) + goto out; + + dio->inode = inode; + dio->orig_end_io = bio->bi_end_io; + dio->orig_private = bio->bi_private; + dio->write = write; + + bio->bi_end_io = f2fs_dio_end_io; + bio->bi_private = dio; + + inc_page_count(F2FS_I_SB(inode), + write ? F2FS_DIO_WRITE : F2FS_DIO_READ); + + submit_bio(rw, bio); + return; +out: + bio->bi_error = -EIO; + bio_endio(bio); } static ssize_t f2fs_direct_IO(struct kiocb *iocb, struct iov_iter *iter, @@ -2397,17 +2782,19 @@ static ssize_t f2fs_direct_IO(struct kiocb *iocb, struct iov_iter *iter, struct address_space *mapping = iocb->ki_filp->f_mapping; struct inode *inode = mapping->host; struct f2fs_sb_info *sbi = F2FS_I_SB(inode); + struct f2fs_inode_info *fi = F2FS_I(inode); size_t count = iov_iter_count(iter); int rw = iov_iter_rw(iter); int err; enum rw_hint hint = iocb->ki_hint; int whint_mode = F2FS_OPTION(sbi).whint_mode; + bool do_opu; err = check_direct_IO(inode, iter, offset); if (err) - return err; + return err < 0 ? err : 0; - if (f2fs_force_buffered_io(inode, rw)) + if (f2fs_force_buffered_io(inode, iocb, iter)) return 0; if (trace_android_fs_dataread_start_enabled() && @@ -2432,22 +2819,42 @@ static ssize_t f2fs_direct_IO(struct kiocb *iocb, struct iov_iter *iter, current->pid, path, current->comm); } + + do_opu = allow_outplace_dio(inode, iocb, iter); + trace_f2fs_direct_IO_enter(inode, offset, count, rw); if (rw == WRITE && whint_mode == WHINT_MODE_OFF) iocb->ki_hint = WRITE_LIFE_NOT_SET; - if (!down_read_trylock(&F2FS_I(inode)->dio_rwsem[rw])) { - if (iocb->ki_flags & IOCB_NOWAIT) { + if (iocb->ki_flags & IOCB_NOWAIT) { + if (!down_read_trylock(&fi->i_gc_rwsem[rw])) { + iocb->ki_hint = hint; + err = -EAGAIN; + goto out; + } + if (do_opu && !down_read_trylock(&fi->i_gc_rwsem[READ])) { + up_read(&fi->i_gc_rwsem[rw]); iocb->ki_hint = hint; err = -EAGAIN; goto out; } - down_read(&F2FS_I(inode)->dio_rwsem[rw]); + } else { + down_read(&fi->i_gc_rwsem[rw]); + if (do_opu) + down_read(&fi->i_gc_rwsem[READ]); } - err = blockdev_direct_IO(iocb, inode, iter, offset, get_data_block_dio); - up_read(&F2FS_I(inode)->dio_rwsem[rw]); + err = __blockdev_direct_IO(iocb, inode, inode->i_sb->s_bdev, + iter, offset, + rw == WRITE ? get_data_block_dio_write : + get_data_block_dio, NULL, f2fs_dio_submit_bio, + DIO_LOCKING | DIO_SKIP_HOLES); + + if (do_opu) + up_read(&fi->i_gc_rwsem[READ]); + + up_read(&fi->i_gc_rwsem[rw]); if (rw == WRITE) { if (whint_mode == WHINT_MODE_OFF) @@ -2455,7 +2862,8 @@ static ssize_t f2fs_direct_IO(struct kiocb *iocb, struct iov_iter *iter, if (err > 0) { f2fs_update_iostat(F2FS_I_SB(inode), APP_DIRECT_IO, err); - set_inode_flag(inode, FI_UPDATE_WRITE); + if (!do_opu) + set_inode_flag(inode, FI_UPDATE_WRITE); } else if (err < 0) { f2fs_write_failed(mapping, offset + count); } @@ -2490,16 +2898,16 @@ void f2fs_invalidate_page(struct page *page, unsigned int offset, dec_page_count(sbi, F2FS_DIRTY_NODES); } else { inode_dec_dirty_pages(inode); - remove_dirty_inode(inode); + f2fs_remove_dirty_inode(inode); } } - /* This is atomic written page, keep Private */ + clear_cold_data(page); + if (IS_ATOMIC_WRITTEN_PAGE(page)) - return drop_inmem_page(inode, page); + return f2fs_drop_inmem_page(inode, page); - set_page_private(page, 0); - ClearPagePrivate(page); + f2fs_clear_page_private(page); } int f2fs_release_page(struct page *page, gfp_t wait) @@ -2512,24 +2920,25 @@ int f2fs_release_page(struct page *page, gfp_t wait) if (IS_ATOMIC_WRITTEN_PAGE(page)) return 0; - set_page_private(page, 0); - ClearPagePrivate(page); + clear_cold_data(page); + f2fs_clear_page_private(page); return 1; } static int f2fs_set_data_page_dirty(struct page *page) { - struct address_space *mapping = page->mapping; - struct inode *inode = mapping->host; + struct inode *inode = page_file_mapping(page)->host; trace_f2fs_set_page_dirty(page, DATA); if (!PageUptodate(page)) SetPageUptodate(page); + if (PageSwapCache(page)) + return __set_page_dirty_nobuffers(page); if (f2fs_is_atomic_file(inode) && !f2fs_is_commit_atomic_write(inode)) { if (!IS_ATOMIC_WRITTEN_PAGE(page)) { - register_inmem_page(inode, page); + f2fs_register_inmem_page(inode, page); return 1; } /* @@ -2541,7 +2950,7 @@ static int f2fs_set_data_page_dirty(struct page *page) if (!PageDirty(page)) { __set_page_dirty_nobuffers(page); - update_dirty_page(inode, page); + f2fs_update_dirty_page(inode, page); return 1; } return 0; @@ -2581,12 +2990,8 @@ int f2fs_migrate_page(struct address_space *mapping, return -EAGAIN; } - /* - * A reference is expected if PagePrivate set when move mapping, - * however F2FS breaks this for maintaining dirty page counts when - * truncating pages. So here adjusting the 'extra_count' make it work. - */ - extra_count = (atomic_written ? 1 : 0) - page_has_private(page); + /* one extra reference was held for atomic_write page */ + extra_count = atomic_written ? 1 : 0; rc = migrate_page_move_mapping(mapping, newpage, page, NULL, mode, extra_count); if (rc != MIGRATEPAGE_SUCCESS) { @@ -2607,9 +3012,10 @@ int f2fs_migrate_page(struct address_space *mapping, get_page(newpage); } - if (PagePrivate(page)) - SetPagePrivate(newpage); - set_page_private(newpage, page_private(page)); + if (PagePrivate(page)) { + f2fs_set_page_private(newpage, page_private(page)); + f2fs_clear_page_private(page); + } migrate_page_copy(newpage, page); @@ -2617,6 +3023,126 @@ int f2fs_migrate_page(struct address_space *mapping, } #endif +#ifdef CONFIG_SWAP +/* Copied from generic_swapfile_activate() to check any holes */ +static int check_swap_activate(struct file *swap_file, unsigned int max) +{ + struct address_space *mapping = swap_file->f_mapping; + struct inode *inode = mapping->host; + unsigned blocks_per_page; + unsigned long page_no; + unsigned blkbits; + sector_t probe_block; + sector_t last_block; + sector_t lowest_block = -1; + sector_t highest_block = 0; + + blkbits = inode->i_blkbits; + blocks_per_page = PAGE_SIZE >> blkbits; + + /* + * Map all the blocks into the extent list. This code doesn't try + * to be very smart. + */ + probe_block = 0; + page_no = 0; + last_block = i_size_read(inode) >> blkbits; + while ((probe_block + blocks_per_page) <= last_block && page_no < max) { + unsigned block_in_page; + sector_t first_block; + + cond_resched(); + + first_block = bmap(inode, probe_block); + if (first_block == 0) + goto bad_bmap; + + /* + * It must be PAGE_SIZE aligned on-disk + */ + if (first_block & (blocks_per_page - 1)) { + probe_block++; + goto reprobe; + } + + for (block_in_page = 1; block_in_page < blocks_per_page; + block_in_page++) { + sector_t block; + + block = bmap(inode, probe_block + block_in_page); + if (block == 0) + goto bad_bmap; + if (block != first_block + block_in_page) { + /* Discontiguity */ + probe_block++; + goto reprobe; + } + } + + first_block >>= (PAGE_SHIFT - blkbits); + if (page_no) { /* exclude the header page */ + if (first_block < lowest_block) + lowest_block = first_block; + if (first_block > highest_block) + highest_block = first_block; + } + + page_no++; + probe_block += blocks_per_page; +reprobe: + continue; + } + return 0; + +bad_bmap: + pr_err("swapon: swapfile has holes\n"); + return -EINVAL; +} + +static int f2fs_swap_activate(struct swap_info_struct *sis, struct file *file, + sector_t *span) +{ + struct inode *inode = file_inode(file); + int ret; + + if (!S_ISREG(inode->i_mode)) + return -EINVAL; + + if (f2fs_readonly(F2FS_I_SB(inode)->sb)) + return -EROFS; + + ret = f2fs_convert_inline_inode(inode); + if (ret) + return ret; + + ret = check_swap_activate(file, sis->max); + if (ret) + return ret; + + set_inode_flag(inode, FI_PIN_FILE); + f2fs_precache_extents(inode); + f2fs_update_time(F2FS_I_SB(inode), REQ_TIME); + return 0; +} + +static void f2fs_swap_deactivate(struct file *file) +{ + struct inode *inode = file_inode(file); + + clear_inode_flag(inode, FI_PIN_FILE); +} +#else +static int f2fs_swap_activate(struct swap_info_struct *sis, struct file *file, + sector_t *span) +{ + return -EOPNOTSUPP; +} + +static void f2fs_swap_deactivate(struct file *file) +{ +} +#endif + const struct address_space_operations f2fs_dblock_aops = { .readpage = f2fs_read_data_page, .readpages = f2fs_read_data_pages, @@ -2629,11 +3155,24 @@ const struct address_space_operations f2fs_dblock_aops = { .releasepage = f2fs_release_page, .direct_IO = f2fs_direct_IO, .bmap = f2fs_bmap, + .swap_activate = f2fs_swap_activate, + .swap_deactivate = f2fs_swap_deactivate, #ifdef CONFIG_MIGRATION .migratepage = f2fs_migrate_page, #endif }; +void f2fs_clear_radix_tree_dirty_tag(struct page *page) +{ + struct address_space *mapping = page_mapping(page); + unsigned long flags; + + spin_lock_irqsave(&mapping->tree_lock, flags); + radix_tree_tag_clear(&mapping->page_tree, page_index(page), + PAGECACHE_TAG_DIRTY); + spin_unlock_irqrestore(&mapping->tree_lock, flags); +} + int __init f2fs_init_post_read_processing(void) { bio_post_read_ctx_cache = KMEM_CACHE(bio_post_read_ctx, 0); diff --git a/fs/f2fs/debug.c b/fs/f2fs/debug.c index a66107b5cfff..9cadcf9f1ecf 100644 --- a/fs/f2fs/debug.c +++ b/fs/f2fs/debug.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0 /* * f2fs debugging statistics * @@ -5,10 +6,6 @@ * http://www.samsung.com/ * Copyright (c) 2012 Linux Foundation * Copyright (c) 2012 Greg Kroah-Hartman <gregkh@linuxfoundation.org> - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. */ #include <linux/fs.h> @@ -30,8 +27,15 @@ static DEFINE_MUTEX(f2fs_stat_mutex); static void update_general_status(struct f2fs_sb_info *sbi) { struct f2fs_stat_info *si = F2FS_STAT(sbi); + struct f2fs_super_block *raw_super = F2FS_RAW_SUPER(sbi); int i; + /* these will be changed if online resize is done */ + si->main_area_segs = le32_to_cpu(raw_super->segment_count_main); + si->main_area_sections = le32_to_cpu(raw_super->section_count); + si->main_area_zones = si->main_area_sections / + le32_to_cpu(raw_super->secs_per_zone); + /* validation check of the segment numbers */ si->hit_largest = atomic64_read(&sbi->read_hit_largest); si->hit_cached = atomic64_read(&sbi->read_hit_cached); @@ -56,13 +60,18 @@ static void update_general_status(struct f2fs_sb_info *sbi) si->vw_cnt = atomic_read(&sbi->vw_cnt); si->max_aw_cnt = atomic_read(&sbi->max_aw_cnt); si->max_vw_cnt = atomic_read(&sbi->max_vw_cnt); + si->nr_dio_read = get_pages(sbi, F2FS_DIO_READ); + si->nr_dio_write = get_pages(sbi, F2FS_DIO_WRITE); si->nr_wb_cp_data = get_pages(sbi, F2FS_WB_CP_DATA); si->nr_wb_data = get_pages(sbi, F2FS_WB_DATA); + si->nr_rd_data = get_pages(sbi, F2FS_RD_DATA); + si->nr_rd_node = get_pages(sbi, F2FS_RD_NODE); + si->nr_rd_meta = get_pages(sbi, F2FS_RD_META); if (SM_I(sbi) && SM_I(sbi)->fcc_info) { si->nr_flushed = atomic_read(&SM_I(sbi)->fcc_info->issued_flush); si->nr_flushing = - atomic_read(&SM_I(sbi)->fcc_info->issing_flush); + atomic_read(&SM_I(sbi)->fcc_info->queued_flush); si->flush_list_empty = llist_empty(&SM_I(sbi)->fcc_info->issue_list); } @@ -70,7 +79,7 @@ static void update_general_status(struct f2fs_sb_info *sbi) si->nr_discarded = atomic_read(&SM_I(sbi)->dcc_info->issued_discard); si->nr_discarding = - atomic_read(&SM_I(sbi)->dcc_info->issing_discard); + atomic_read(&SM_I(sbi)->dcc_info->queued_discard); si->nr_discard_cmd = atomic_read(&SM_I(sbi)->dcc_info->discard_cmd_cnt); si->undiscard_blks = SM_I(sbi)->dcc_info->undiscard_blks; @@ -94,8 +103,10 @@ static void update_general_status(struct f2fs_sb_info *sbi) si->free_secs = free_sections(sbi); si->prefree_count = prefree_segments(sbi); si->dirty_count = dirty_segments(sbi); - si->node_pages = NODE_MAPPING(sbi)->nrpages; - si->meta_pages = META_MAPPING(sbi)->nrpages; + if (sbi->node_inode) + si->node_pages = NODE_MAPPING(sbi)->nrpages; + if (sbi->meta_inode) + si->meta_pages = META_MAPPING(sbi)->nrpages; si->nats = NM_I(sbi)->nat_cnt; si->dirty_nats = NM_I(sbi)->dirty_nat_cnt; si->sits = MAIN_SEGS(sbi); @@ -104,6 +115,10 @@ static void update_general_status(struct f2fs_sb_info *sbi) si->avail_nids = NM_I(sbi)->available_nids; si->alloc_nids = NM_I(sbi)->nid_cnt[PREALLOC_NID]; si->bg_gc = sbi->bg_gc; + si->io_skip_bggc = sbi->io_skip_bggc; + si->other_skip_bggc = sbi->other_skip_bggc; + si->skipped_atomic_files[BG_GC] = sbi->skipped_atomic_files[BG_GC]; + si->skipped_atomic_files[FG_GC] = sbi->skipped_atomic_files[FG_GC]; si->util_free = (int)(free_user_blocks(sbi) >> sbi->log_blocks_per_seg) * 100 / (int)(sbi->user_block_count >> sbi->log_blocks_per_seg) / 2; @@ -119,6 +134,9 @@ static void update_general_status(struct f2fs_sb_info *sbi) si->curzone[i] = GET_ZONE_FROM_SEC(sbi, si->cursec[i]); } + for (i = META_CP; i < META_MAX; i++) + si->meta_count[i] = atomic_read(&sbi->meta_count[i]); + for (i = 0; i < 2; i++) { si->segment_count[i] = sbi->segment_count[i]; si->block_count[i] = sbi->block_count[i]; @@ -166,7 +184,6 @@ static void update_sit_info(struct f2fs_sb_info *sbi) static void update_mem_info(struct f2fs_sb_info *sbi) { struct f2fs_stat_info *si = F2FS_STAT(sbi); - unsigned npages; int i; if (si->base_mem) @@ -188,10 +205,9 @@ static void update_mem_info(struct f2fs_sb_info *sbi) si->base_mem += MAIN_SEGS(sbi) * sizeof(struct seg_entry); si->base_mem += f2fs_bitmap_size(MAIN_SEGS(sbi)); si->base_mem += 2 * SIT_VBLOCK_MAP_SIZE * MAIN_SEGS(sbi); - if (f2fs_discard_en(sbi)) - si->base_mem += SIT_VBLOCK_MAP_SIZE * MAIN_SEGS(sbi); + si->base_mem += SIT_VBLOCK_MAP_SIZE * MAIN_SEGS(sbi); si->base_mem += SIT_VBLOCK_MAP_SIZE; - if (sbi->segs_per_sec > 1) + if (__is_large_section(sbi)) si->base_mem += MAIN_SECS(sbi) * sizeof(struct sec_entry); si->base_mem += __bitmap_size(sbi, SIT_BITMAP); @@ -213,7 +229,8 @@ static void update_mem_info(struct f2fs_sb_info *sbi) si->base_mem += sizeof(struct f2fs_nm_info); si->base_mem += __bitmap_size(sbi, NAT_BITMAP); si->base_mem += (NM_I(sbi)->nat_bits_blocks << F2FS_BLKSIZE_BITS); - si->base_mem += NM_I(sbi)->nat_blocks * NAT_ENTRY_BITMAP_SIZE; + si->base_mem += NM_I(sbi)->nat_blocks * + f2fs_bitmap_size(NAT_ENTRY_PER_BLOCK); si->base_mem += NM_I(sbi)->nat_blocks / 8; si->base_mem += NM_I(sbi)->nat_blocks * sizeof(unsigned short); @@ -249,10 +266,14 @@ get_cache: sizeof(struct extent_node); si->page_mem = 0; - npages = NODE_MAPPING(sbi)->nrpages; - si->page_mem += (unsigned long long)npages << PAGE_SHIFT; - npages = META_MAPPING(sbi)->nrpages; - si->page_mem += (unsigned long long)npages << PAGE_SHIFT; + if (sbi->node_inode) { + unsigned npages = NODE_MAPPING(sbi)->nrpages; + si->page_mem += (unsigned long long)npages << PAGE_SHIFT; + } + if (sbi->meta_inode) { + unsigned npages = META_MAPPING(sbi)->nrpages; + si->page_mem += (unsigned long long)npages << PAGE_SHIFT; + } } static int stat_show(struct seq_file *s, void *v) @@ -268,7 +289,8 @@ static int stat_show(struct seq_file *s, void *v) seq_printf(s, "\n=====[ partition info(%pg). #%d, %s, CP: %s]=====\n", si->sbi->sb->s_bdev, i++, f2fs_readonly(si->sbi->sb) ? "RO": "RW", - f2fs_cp_error(si->sbi) ? "Error": "Good"); + is_set_ckpt_flags(si->sbi, CP_DISABLED_FLAG) ? + "Disabled": (f2fs_cp_error(si->sbi) ? "Error": "Good")); seq_printf(s, "[SB: 1] [CP: 2] [SIT: %d] [NAT: %d] ", si->sit_area_segs, si->nat_area_segs); seq_printf(s, "[SSA: %d] [MAIN: %d", @@ -330,6 +352,13 @@ static int stat_show(struct seq_file *s, void *v) si->prefree_count, si->free_segs, si->free_secs); seq_printf(s, "CP calls: %d (BG: %d)\n", si->cp_count, si->bg_cp_count); + seq_printf(s, " - cp blocks : %u\n", si->meta_count[META_CP]); + seq_printf(s, " - sit blocks : %u\n", + si->meta_count[META_SIT]); + seq_printf(s, " - nat blocks : %u\n", + si->meta_count[META_NAT]); + seq_printf(s, " - ssa blocks : %u\n", + si->meta_count[META_SSA]); seq_printf(s, "GC calls: %d (BG: %d)\n", si->call_count, si->bg_gc); seq_printf(s, " - data segments : %d (%d)\n", @@ -342,6 +371,12 @@ static int stat_show(struct seq_file *s, void *v) si->bg_data_blks); seq_printf(s, " - node blocks : %d (%d)\n", si->node_blks, si->bg_node_blks); + seq_printf(s, "Skipped : atomic write %llu (%llu)\n", + si->skipped_atomic_files[BG_GC] + + si->skipped_atomic_files[FG_GC], + si->skipped_atomic_files[BG_GC]); + seq_printf(s, "BG skip : IO: %u, Other: %u\n", + si->io_skip_bggc, si->other_skip_bggc); seq_puts(s, "\nExtent Cache:\n"); seq_printf(s, " - Hit Count: L1-1:%llu L1-2:%llu L2:%llu\n", si->hit_largest, si->hit_cached, @@ -353,7 +388,11 @@ static int stat_show(struct seq_file *s, void *v) seq_printf(s, " - Inner Struct Count: tree: %d(%d), node: %d\n", si->ext_tree, si->zombie_tree, si->ext_node); seq_puts(s, "\nBalancing F2FS Async:\n"); - seq_printf(s, " - IO (CP: %4d, Data: %4d, Flush: (%4d %4d %4d), " + seq_printf(s, " - DIO (R: %4d, W: %4d)\n", + si->nr_dio_read, si->nr_dio_write); + seq_printf(s, " - IO_R (Data: %4d, Node: %4d, Meta: %4d\n", + si->nr_rd_data, si->nr_rd_node, si->nr_rd_meta); + seq_printf(s, " - IO_W (CP: %4d, Data: %4d, Flush: (%4d %4d %4d), " "Discard: (%4d %4d)) cmd: %4d undiscard:%4u\n", si->nr_wb_cp_data, si->nr_wb_data, si->nr_flushing, si->nr_flushed, @@ -438,6 +477,7 @@ int f2fs_build_stats(struct f2fs_sb_info *sbi) { struct f2fs_super_block *raw_super = F2FS_RAW_SUPER(sbi); struct f2fs_stat_info *si; + int i; si = f2fs_kzalloc(sbi, sizeof(struct f2fs_stat_info), GFP_KERNEL); if (!si) @@ -463,6 +503,8 @@ int f2fs_build_stats(struct f2fs_sb_info *sbi) atomic_set(&sbi->inline_inode, 0); atomic_set(&sbi->inline_dir, 0); atomic_set(&sbi->inplace_count, 0); + for (i = META_CP; i < META_MAX; i++) + atomic_set(&sbi->meta_count[i], 0); atomic_set(&sbi->aw_cnt, 0); atomic_set(&sbi->vw_cnt, 0); @@ -484,33 +526,19 @@ void f2fs_destroy_stats(struct f2fs_sb_info *sbi) list_del(&si->stat_list); mutex_unlock(&f2fs_stat_mutex); - kfree(si); + kvfree(si); } -int __init f2fs_create_root_stats(void) +void __init f2fs_create_root_stats(void) { - struct dentry *file; - f2fs_debugfs_root = debugfs_create_dir("f2fs", NULL); - if (!f2fs_debugfs_root) - return -ENOMEM; - file = debugfs_create_file("status", S_IRUGO, f2fs_debugfs_root, - NULL, &stat_fops); - if (!file) { - debugfs_remove(f2fs_debugfs_root); - f2fs_debugfs_root = NULL; - return -ENOMEM; - } - - return 0; + debugfs_create_file("status", S_IRUGO, f2fs_debugfs_root, NULL, + &stat_fops); } void f2fs_destroy_root_stats(void) { - if (!f2fs_debugfs_root) - return; - debugfs_remove_recursive(f2fs_debugfs_root); f2fs_debugfs_root = NULL; } diff --git a/fs/f2fs/dir.c b/fs/f2fs/dir.c index b7ad932a05ab..d01f376291a3 100644 --- a/fs/f2fs/dir.c +++ b/fs/f2fs/dir.c @@ -1,12 +1,9 @@ +// SPDX-License-Identifier: GPL-2.0 /* * fs/f2fs/dir.c * * Copyright (c) 2012 Samsung Electronics Co., Ltd. * http://www.samsung.com/ - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. */ #include <linux/fs.h> #include <linux/f2fs_fs.h> @@ -60,12 +57,12 @@ static unsigned char f2fs_type_by_mode[S_IFMT >> S_SHIFT] = { [S_IFLNK >> S_SHIFT] = F2FS_FT_SYMLINK, }; -void set_de_type(struct f2fs_dir_entry *de, umode_t mode) +static void set_de_type(struct f2fs_dir_entry *de, umode_t mode) { de->file_type = f2fs_type_by_mode[(mode & S_IFMT) >> S_SHIFT]; } -unsigned char get_de_type(struct f2fs_dir_entry *de) +unsigned char f2fs_get_de_type(struct f2fs_dir_entry *de) { if (de->file_type < F2FS_FT_MAX) return f2fs_filetype_table[de->file_type]; @@ -97,14 +94,14 @@ static struct f2fs_dir_entry *find_in_block(struct page *dentry_page, dentry_blk = (struct f2fs_dentry_block *)page_address(dentry_page); make_dentry_ptr_block(NULL, &d, dentry_blk); - de = find_target_dentry(fname, namehash, max_slots, &d); + de = f2fs_find_target_dentry(fname, namehash, max_slots, &d); if (de) *res_page = dentry_page; return de; } -struct f2fs_dir_entry *find_target_dentry(struct fscrypt_name *fname, +struct f2fs_dir_entry *f2fs_find_target_dentry(struct fscrypt_name *fname, f2fs_hash_t namehash, int *max_slots, struct f2fs_dentry_ptr *d) { @@ -171,7 +168,7 @@ static struct f2fs_dir_entry *find_in_level(struct inode *dir, for (; bidx < end_block; bidx++) { /* no need to allocate new dentry pages to all the indices */ - dentry_page = find_data_page(dir, bidx); + dentry_page = f2fs_find_data_page(dir, bidx); if (IS_ERR(dentry_page)) { if (PTR_ERR(dentry_page) == -ENOENT) { room = true; @@ -210,7 +207,7 @@ struct f2fs_dir_entry *__f2fs_find_entry(struct inode *dir, if (f2fs_has_inline_dentry(dir)) { *res_page = NULL; - de = find_in_inline_dir(dir, fname, res_page); + de = f2fs_find_in_inline_dir(dir, fname, res_page); goto out; } @@ -221,9 +218,8 @@ struct f2fs_dir_entry *__f2fs_find_entry(struct inode *dir, max_depth = F2FS_I(dir)->i_current_depth; if (unlikely(max_depth > MAX_DIR_HASH_DEPTH)) { - f2fs_msg(F2FS_I_SB(dir)->sb, KERN_WARNING, - "Corrupted max_depth of %lu: %u", - dir->i_ino, max_depth); + f2fs_warn(F2FS_I_SB(dir), "Corrupted max_depth of %lu: %u", + dir->i_ino, max_depth); max_depth = MAX_DIR_HASH_DEPTH; f2fs_i_depth_write(dir, max_depth); } @@ -296,7 +292,7 @@ void f2fs_set_link(struct inode *dir, struct f2fs_dir_entry *de, { enum page_type type = f2fs_has_inline_dentry(dir) ? NODE : DATA; lock_page(page); - f2fs_wait_on_page_writeback(page, type, true); + f2fs_wait_on_page_writeback(page, type, true, true); de->ino = cpu_to_le32(inode->i_ino); set_de_type(de, inode->i_mode); set_page_dirty(page); @@ -310,7 +306,7 @@ static void init_dent_inode(const struct qstr *name, struct page *ipage) { struct f2fs_inode *ri; - f2fs_wait_on_page_writeback(ipage, NODE, true); + f2fs_wait_on_page_writeback(ipage, NODE, true, true); /* copy name info. to this inode page */ ri = F2FS_INODE(ipage); @@ -319,7 +315,7 @@ static void init_dent_inode(const struct qstr *name, struct page *ipage) set_page_dirty(ipage); } -void do_make_empty_dir(struct inode *inode, struct inode *parent, +void f2fs_do_make_empty_dir(struct inode *inode, struct inode *parent, struct f2fs_dentry_ptr *d) { struct qstr dot = QSTR_INIT(".", 1); @@ -340,23 +336,23 @@ static int make_empty_dir(struct inode *inode, struct f2fs_dentry_ptr d; if (f2fs_has_inline_dentry(inode)) - return make_empty_inline_dir(inode, parent, page); + return f2fs_make_empty_inline_dir(inode, parent, page); - dentry_page = get_new_data_page(inode, page, 0, true); + dentry_page = f2fs_get_new_data_page(inode, page, 0, true); if (IS_ERR(dentry_page)) return PTR_ERR(dentry_page); dentry_blk = page_address(dentry_page); make_dentry_ptr_block(NULL, &d, dentry_blk); - do_make_empty_dir(inode, parent, &d); + f2fs_do_make_empty_dir(inode, parent, &d); set_page_dirty(dentry_page); f2fs_put_page(dentry_page, 1); return 0; } -struct page *init_inode_metadata(struct inode *inode, struct inode *dir, +struct page *f2fs_init_inode_metadata(struct inode *inode, struct inode *dir, const struct qstr *new_name, const struct qstr *orig_name, struct page *dpage) { @@ -365,7 +361,7 @@ struct page *init_inode_metadata(struct inode *inode, struct inode *dir, int err; if (is_inode_flag_set(inode, FI_NEW_INODE)) { - page = new_inode_page(inode); + page = f2fs_new_inode_page(inode); if (IS_ERR(page)) return page; @@ -395,7 +391,7 @@ struct page *init_inode_metadata(struct inode *inode, struct inode *dir, goto put_error; } } else { - page = get_node_page(F2FS_I_SB(dir), inode->i_ino); + page = f2fs_get_node_page(F2FS_I_SB(dir), inode->i_ino); if (IS_ERR(page)) return page; } @@ -418,19 +414,19 @@ struct page *init_inode_metadata(struct inode *inode, struct inode *dir, * we should remove this inode from orphan list. */ if (inode->i_nlink == 0) - remove_orphan_inode(F2FS_I_SB(dir), inode->i_ino); + f2fs_remove_orphan_inode(F2FS_I_SB(dir), inode->i_ino); f2fs_i_links_write(inode, true); } return page; put_error: clear_nlink(inode); - update_inode(inode, page); + f2fs_update_inode(inode, page); f2fs_put_page(page, 1); return ERR_PTR(err); } -void update_parent_metadata(struct inode *dir, struct inode *inode, +void f2fs_update_parent_metadata(struct inode *dir, struct inode *inode, unsigned int current_depth) { if (inode && is_inode_flag_set(inode, FI_NEW_INODE)) { @@ -448,7 +444,7 @@ void update_parent_metadata(struct inode *dir, struct inode *inode, clear_inode_flag(inode, FI_INC_LINK); } -int room_for_filename(const void *bitmap, int slots, int max_slots) +int f2fs_room_for_filename(const void *bitmap, int slots, int max_slots) { int bit_start = 0; int zero_start, zero_end; @@ -517,12 +513,11 @@ int f2fs_add_regular_entry(struct inode *dir, const struct qstr *new_name, } start: -#ifdef CONFIG_F2FS_FAULT_INJECTION if (time_to_inject(F2FS_I_SB(dir), FAULT_DIR_DEPTH)) { f2fs_show_injection_info(FAULT_DIR_DEPTH); return -ENOSPC; } -#endif + if (unlikely(current_depth == MAX_DIR_HASH_DEPTH)) return -ENOSPC; @@ -537,12 +532,12 @@ start: (le32_to_cpu(dentry_hash) % nbucket)); for (block = bidx; block <= (bidx + nblock - 1); block++) { - dentry_page = get_new_data_page(dir, NULL, block, true); + dentry_page = f2fs_get_new_data_page(dir, NULL, block, true); if (IS_ERR(dentry_page)) return PTR_ERR(dentry_page); dentry_blk = page_address(dentry_page); - bit_pos = room_for_filename(&dentry_blk->dentry_bitmap, + bit_pos = f2fs_room_for_filename(&dentry_blk->dentry_bitmap, slots, NR_DENTRY_IN_BLOCK); if (bit_pos < NR_DENTRY_IN_BLOCK) goto add_dentry; @@ -554,11 +549,11 @@ start: ++level; goto start; add_dentry: - f2fs_wait_on_page_writeback(dentry_page, DATA, true); + f2fs_wait_on_page_writeback(dentry_page, DATA, true, true); if (inode) { down_write(&F2FS_I(inode)->i_sem); - page = init_inode_metadata(inode, dir, new_name, + page = f2fs_init_inode_metadata(inode, dir, new_name, orig_name, NULL); if (IS_ERR(page)) { err = PTR_ERR(page); @@ -576,7 +571,7 @@ add_dentry: f2fs_put_page(page, 1); } - update_parent_metadata(dir, inode, current_depth); + f2fs_update_parent_metadata(dir, inode, current_depth); fail: if (inode) up_write(&F2FS_I(inode)->i_sem); @@ -586,7 +581,7 @@ fail: return err; } -int __f2fs_do_add_link(struct inode *dir, struct fscrypt_name *fname, +int f2fs_add_dentry(struct inode *dir, struct fscrypt_name *fname, struct inode *inode, nid_t ino, umode_t mode) { struct qstr new_name; @@ -610,7 +605,7 @@ int __f2fs_do_add_link(struct inode *dir, struct fscrypt_name *fname, * Caller should grab and release a rwsem by calling f2fs_lock_op() and * f2fs_unlock_op(). */ -int __f2fs_add_link(struct inode *dir, const struct qstr *name, +int f2fs_do_add_link(struct inode *dir, const struct qstr *name, struct inode *inode, nid_t ino, umode_t mode) { struct fscrypt_name fname; @@ -639,7 +634,7 @@ int __f2fs_add_link(struct inode *dir, const struct qstr *name, } else if (IS_ERR(page)) { err = PTR_ERR(page); } else { - err = __f2fs_do_add_link(dir, &fname, inode, ino, mode); + err = f2fs_add_dentry(dir, &fname, inode, ino, mode); } fscrypt_free_filename(&fname); return err; @@ -651,7 +646,7 @@ int f2fs_do_tmpfile(struct inode *inode, struct inode *dir) int err = 0; down_write(&F2FS_I(inode)->i_sem); - page = init_inode_metadata(inode, dir, NULL, NULL, NULL); + page = f2fs_init_inode_metadata(inode, dir, NULL, NULL, NULL); if (IS_ERR(page)) { err = PTR_ERR(page); goto fail; @@ -659,9 +654,9 @@ int f2fs_do_tmpfile(struct inode *inode, struct inode *dir) f2fs_put_page(page, 1); clear_inode_flag(inode, FI_NEW_INODE); + f2fs_update_time(F2FS_I_SB(inode), REQ_TIME); fail: up_write(&F2FS_I(inode)->i_sem); - f2fs_update_time(F2FS_I_SB(inode), REQ_TIME); return err; } @@ -683,9 +678,9 @@ void f2fs_drop_nlink(struct inode *dir, struct inode *inode) up_write(&F2FS_I(inode)->i_sem); if (inode->i_nlink == 0) - add_orphan_inode(inode); + f2fs_add_orphan_inode(inode); else - release_orphan_inode(sbi); + f2fs_release_orphan_inode(sbi); } /* @@ -698,20 +693,18 @@ void f2fs_delete_entry(struct f2fs_dir_entry *dentry, struct page *page, struct f2fs_dentry_block *dentry_blk; unsigned int bit_pos; int slots = GET_DENTRY_SLOTS(le16_to_cpu(dentry->name_len)); - struct address_space *mapping = page_mapping(page); - unsigned long flags; int i; f2fs_update_time(F2FS_I_SB(dir), REQ_TIME); if (F2FS_OPTION(F2FS_I_SB(dir)).fsync_mode == FSYNC_MODE_STRICT) - add_ino_entry(F2FS_I_SB(dir), dir->i_ino, TRANS_DIR_INO); + f2fs_add_ino_entry(F2FS_I_SB(dir), dir->i_ino, TRANS_DIR_INO); if (f2fs_has_inline_dentry(dir)) return f2fs_delete_inline_entry(dentry, page, dir, inode); lock_page(page); - f2fs_wait_on_page_writeback(page, DATA, true); + f2fs_wait_on_page_writeback(page, DATA, true, true); dentry_blk = page_address(page); bit_pos = dentry - dentry_blk->dentry; @@ -731,17 +724,14 @@ void f2fs_delete_entry(struct f2fs_dir_entry *dentry, struct page *page, f2fs_drop_nlink(dir, inode); if (bit_pos == NR_DENTRY_IN_BLOCK && - !truncate_hole(dir, page->index, page->index + 1)) { - spin_lock_irqsave(&mapping->tree_lock, flags); - radix_tree_tag_clear(&mapping->page_tree, page_index(page), - PAGECACHE_TAG_DIRTY); - spin_unlock_irqrestore(&mapping->tree_lock, flags); - + !f2fs_truncate_hole(dir, page->index, page->index + 1)) { + f2fs_clear_radix_tree_dirty_tag(page); clear_page_dirty_for_io(page); - ClearPagePrivate(page); + f2fs_clear_page_private(page); ClearPageUptodate(page); + clear_cold_data(page); inode_dec_dirty_pages(dir); - remove_dirty_inode(dir); + f2fs_remove_dirty_inode(dir); } f2fs_put_page(page, 1); } @@ -758,7 +748,7 @@ bool f2fs_empty_dir(struct inode *dir) return f2fs_empty_inline_dir(dir); for (bidx = 0; bidx < nblock; bidx++) { - dentry_page = get_lock_data_page(dir, bidx, false); + dentry_page = f2fs_get_lock_data_page(dir, bidx, false); if (IS_ERR(dentry_page)) { if (PTR_ERR(dentry_page) == -ENOENT) continue; @@ -791,9 +781,15 @@ int f2fs_fill_dentries(struct dir_context *ctx, struct f2fs_dentry_ptr *d, struct f2fs_dir_entry *de = NULL; struct fscrypt_str de_name = FSTR_INIT(NULL, 0); struct f2fs_sb_info *sbi = F2FS_I_SB(d->inode); + struct blk_plug plug; + bool readdir_ra = sbi->readdir_ra == 1; + int err = 0; bit_pos = ((unsigned long)ctx->pos % d->max); + if (readdir_ra) + blk_start_plug(&plug); + while (bit_pos < d->max) { bit_pos = find_next_bit_le(d->bitmap, d->max, bit_pos); if (bit_pos >= d->max) @@ -803,10 +799,14 @@ int f2fs_fill_dentries(struct dir_context *ctx, struct f2fs_dentry_ptr *d, if (de->name_len == 0) { bit_pos++; ctx->pos = start_pos + bit_pos; + printk_ratelimited( + "%s, invalid namelen(0), ino:%u, run fsck to fix.", + KERN_WARNING, le32_to_cpu(de->ino)); + set_sbi_flag(sbi, SBI_NEED_FSCK); continue; } - d_type = get_de_type(de); + d_type = f2fs_get_de_type(de); de_name.name = d->filename[bit_pos]; de_name.len = le16_to_cpu(de->name_len); @@ -815,37 +815,41 @@ int f2fs_fill_dentries(struct dir_context *ctx, struct f2fs_dentry_ptr *d, bit_pos += GET_DENTRY_SLOTS(le16_to_cpu(de->name_len)); if (unlikely(bit_pos > d->max || le16_to_cpu(de->name_len) > F2FS_NAME_LEN)) { - f2fs_msg(F2FS_I_SB(d->inode)->sb, KERN_WARNING, - "%s: corrupted namelen=%d, run fsck to fix.", - __func__, le16_to_cpu(de->name_len)); - set_sbi_flag(F2FS_I_SB(d->inode)->sb->s_fs_info, SBI_NEED_FSCK); - return -EINVAL; + f2fs_warn(sbi, "%s: corrupted namelen=%d, run fsck to fix.", + __func__, le16_to_cpu(de->name_len)); + set_sbi_flag(sbi, SBI_NEED_FSCK); + err = -EFSCORRUPTED; + goto out; } if (f2fs_encrypted_inode(d->inode)) { int save_len = fstr->len; - int err; err = fscrypt_fname_disk_to_usr(d->inode, - (u32)de->hash_code, 0, - &de_name, fstr); + (u32)le32_to_cpu(de->hash_code), + 0, &de_name, fstr); if (err) - return err; + goto out; de_name = *fstr; fstr->len = save_len; } if (!dir_emit(ctx, de_name.name, de_name.len, - le32_to_cpu(de->ino), d_type)) - return 1; + le32_to_cpu(de->ino), d_type)) { + err = 1; + goto out; + } - if (sbi->readdir_ra == 1) - ra_node_page(sbi, le32_to_cpu(de->ino)); + if (readdir_ra) + f2fs_ra_node_page(sbi, le32_to_cpu(de->ino)); ctx->pos = start_pos + bit_pos; } - return 0; +out: + if (readdir_ra) + blk_finish_plug(&plug); + return err; } static int f2fs_readdir(struct file *file, struct dir_context *ctx) @@ -890,7 +894,7 @@ static int f2fs_readdir(struct file *file, struct dir_context *ctx) page_cache_sync_readahead(inode->i_mapping, ra, file, n, min(npages - n, (pgoff_t)MAX_DIR_RA_PAGES)); - dentry_page = get_lock_data_page(inode, n, false); + dentry_page = f2fs_find_data_page(inode, n); if (IS_ERR(dentry_page)) { err = PTR_ERR(dentry_page); if (err == -ENOENT) { @@ -908,11 +912,11 @@ static int f2fs_readdir(struct file *file, struct dir_context *ctx) err = f2fs_fill_dentries(ctx, &d, n * NR_DENTRY_IN_BLOCK, &fstr); if (err) { - f2fs_put_page(dentry_page, 1); + f2fs_put_page(dentry_page, 0); break; } - f2fs_put_page(dentry_page, 1); + f2fs_put_page(dentry_page, 0); } out_free: fscrypt_fname_free_buffer(&fstr); diff --git a/fs/f2fs/extent_cache.c b/fs/f2fs/extent_cache.c index d5a861bf2b42..a77022601bed 100644 --- a/fs/f2fs/extent_cache.c +++ b/fs/f2fs/extent_cache.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0 /* * f2fs extent cache support * @@ -5,10 +6,6 @@ * Copyright (c) 2015 Samsung Electronics * Authors: Jaegeuk Kim <jaegeuk@kernel.org> * Chao Yu <chao2.yu@samsung.com> - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. */ #include <linux/fs.h> @@ -49,7 +46,7 @@ static struct rb_entry *__lookup_rb_tree_slow(struct rb_root *root, return NULL; } -struct rb_entry *__lookup_rb_tree(struct rb_root *root, +struct rb_entry *f2fs_lookup_rb_tree(struct rb_root *root, struct rb_entry *cached_re, unsigned int ofs) { struct rb_entry *re; @@ -61,7 +58,7 @@ struct rb_entry *__lookup_rb_tree(struct rb_root *root, return re; } -struct rb_node **__lookup_rb_tree_for_insert(struct f2fs_sb_info *sbi, +struct rb_node **f2fs_lookup_rb_tree_for_insert(struct f2fs_sb_info *sbi, struct rb_root *root, struct rb_node **parent, unsigned int ofs) { @@ -92,7 +89,7 @@ struct rb_node **__lookup_rb_tree_for_insert(struct f2fs_sb_info *sbi, * in order to simpfy the insertion after. * tree must stay unchanged between lookup and insertion. */ -struct rb_entry *__lookup_rb_tree_ret(struct rb_root *root, +struct rb_entry *f2fs_lookup_rb_tree_ret(struct rb_root *root, struct rb_entry *cached_re, unsigned int ofs, struct rb_entry **prev_entry, @@ -159,7 +156,7 @@ lookup_neighbors: return re; } -bool __check_rb_tree_consistence(struct f2fs_sb_info *sbi, +bool f2fs_check_rb_tree_consistence(struct f2fs_sb_info *sbi, struct rb_root *root) { #ifdef CONFIG_F2FS_CHECK_FS @@ -178,10 +175,9 @@ bool __check_rb_tree_consistence(struct f2fs_sb_info *sbi, next_re = rb_entry(next, struct rb_entry, rb_node); if (cur_re->ofs + cur_re->len > next_re->ofs) { - f2fs_msg(sbi->sb, KERN_INFO, "inconsistent rbtree, " - "cur(%u, %u) next(%u, %u)", - cur_re->ofs, cur_re->len, - next_re->ofs, next_re->len); + f2fs_info(sbi, "inconsistent rbtree, cur(%u, %u) next(%u, %u)", + cur_re->ofs, cur_re->len, + next_re->ofs, next_re->len); return false; } @@ -308,14 +304,13 @@ static unsigned int __free_extent_tree(struct f2fs_sb_info *sbi, return count - atomic_read(&et->node_cnt); } -static void __drop_largest_extent(struct inode *inode, +static void __drop_largest_extent(struct extent_tree *et, pgoff_t fofs, unsigned int len) { - struct extent_info *largest = &F2FS_I(inode)->extent_tree->largest; - - if (fofs < largest->fofs + largest->len && fofs + len > largest->fofs) { - largest->len = 0; - f2fs_mark_inode_dirty_sync(inode, true); + if (fofs < et->largest.fofs + et->largest.len && + fofs + len > et->largest.fofs) { + et->largest.len = 0; + et->largest_updated = true; } } @@ -390,7 +385,7 @@ static bool f2fs_lookup_extent_tree(struct inode *inode, pgoff_t pgofs, goto out; } - en = (struct extent_node *)__lookup_rb_tree(&et->root, + en = (struct extent_node *)f2fs_lookup_rb_tree(&et->root, (struct rb_entry *)et->cached_en, pgofs); if (!en) goto out; @@ -416,12 +411,11 @@ out: return ret; } -static struct extent_node *__try_merge_extent_node(struct inode *inode, +static struct extent_node *__try_merge_extent_node(struct f2fs_sb_info *sbi, struct extent_tree *et, struct extent_info *ei, struct extent_node *prev_ex, struct extent_node *next_ex) { - struct f2fs_sb_info *sbi = F2FS_I_SB(inode); struct extent_node *en = NULL; if (prev_ex && __is_back_mergeable(ei, &prev_ex->ei)) { @@ -443,7 +437,7 @@ static struct extent_node *__try_merge_extent_node(struct inode *inode, if (!en) return NULL; - __try_update_largest_extent(inode, et, en); + __try_update_largest_extent(et, en); spin_lock(&sbi->extent_lock); if (!list_empty(&en->list)) { @@ -454,12 +448,11 @@ static struct extent_node *__try_merge_extent_node(struct inode *inode, return en; } -static struct extent_node *__insert_extent_tree(struct inode *inode, +static struct extent_node *__insert_extent_tree(struct f2fs_sb_info *sbi, struct extent_tree *et, struct extent_info *ei, struct rb_node **insert_p, struct rb_node *insert_parent) { - struct f2fs_sb_info *sbi = F2FS_I_SB(inode); struct rb_node **p; struct rb_node *parent = NULL; struct extent_node *en = NULL; @@ -470,13 +463,13 @@ static struct extent_node *__insert_extent_tree(struct inode *inode, goto do_insert; } - p = __lookup_rb_tree_for_insert(sbi, &et->root, &parent, ei->fofs); + p = f2fs_lookup_rb_tree_for_insert(sbi, &et->root, &parent, ei->fofs); do_insert: en = __attach_extent_node(sbi, et, ei, parent, p); if (!en) return NULL; - __try_update_largest_extent(inode, et, en); + __try_update_largest_extent(et, en); /* update in global extent list */ spin_lock(&sbi->extent_lock); @@ -497,6 +490,7 @@ static void f2fs_update_extent_tree_range(struct inode *inode, struct rb_node **insert_p = NULL, *insert_parent = NULL; unsigned int end = fofs + len; unsigned int pos = (unsigned int)fofs; + bool updated = false; if (!et) return; @@ -517,10 +511,10 @@ static void f2fs_update_extent_tree_range(struct inode *inode, * drop largest extent before lookup, in case it's already * been shrunk from extent tree */ - __drop_largest_extent(inode, fofs, len); + __drop_largest_extent(et, fofs, len); /* 1. lookup first extent node in range [fofs, fofs + len - 1] */ - en = (struct extent_node *)__lookup_rb_tree_ret(&et->root, + en = (struct extent_node *)f2fs_lookup_rb_tree_ret(&et->root, (struct rb_entry *)et->cached_en, fofs, (struct rb_entry **)&prev_en, (struct rb_entry **)&next_en, @@ -550,7 +544,7 @@ static void f2fs_update_extent_tree_range(struct inode *inode, set_extent_info(&ei, end, end - dei.fofs + dei.blk, org_end - end); - en1 = __insert_extent_tree(inode, et, &ei, + en1 = __insert_extent_tree(sbi, et, &ei, NULL, NULL); next_en = en1; } else { @@ -570,7 +564,7 @@ static void f2fs_update_extent_tree_range(struct inode *inode, } if (parts) - __try_update_largest_extent(inode, et, en); + __try_update_largest_extent(et, en); else __release_extent_node(sbi, et, en); @@ -590,15 +584,16 @@ static void f2fs_update_extent_tree_range(struct inode *inode, if (blkaddr) { set_extent_info(&ei, fofs, blkaddr, len); - if (!__try_merge_extent_node(inode, et, &ei, prev_en, next_en)) - __insert_extent_tree(inode, et, &ei, + if (!__try_merge_extent_node(sbi, et, &ei, prev_en, next_en)) + __insert_extent_tree(sbi, et, &ei, insert_p, insert_parent); /* give up extent_cache, if split and small updates happen */ if (dei.len >= 1 && prev.len < F2FS_MIN_EXTENT_LEN && et->largest.len < F2FS_MIN_EXTENT_LEN) { - __drop_largest_extent(inode, 0, UINT_MAX); + et->largest.len = 0; + et->largest_updated = true; set_inode_flag(inode, FI_NO_EXTENT); } } @@ -606,7 +601,15 @@ static void f2fs_update_extent_tree_range(struct inode *inode, if (is_inode_flag_set(inode, FI_NO_EXTENT)) __free_extent_tree(sbi, et); + if (et->largest_updated) { + et->largest_updated = false; + updated = true; + } + write_unlock(&et->lock); + + if (updated) + f2fs_mark_inode_dirty_sync(inode, true); } unsigned int f2fs_shrink_extent_tree(struct f2fs_sb_info *sbi, int nr_shrink) @@ -705,6 +708,7 @@ void f2fs_drop_extent_tree(struct inode *inode) { struct f2fs_sb_info *sbi = F2FS_I_SB(inode); struct extent_tree *et = F2FS_I(inode)->extent_tree; + bool updated = false; if (!f2fs_may_extent_tree(inode)) return; @@ -713,8 +717,13 @@ void f2fs_drop_extent_tree(struct inode *inode) write_lock(&et->lock); __free_extent_tree(sbi, et); - __drop_largest_extent(inode, 0, UINT_MAX); + if (et->largest.len) { + et->largest.len = 0; + updated = true; + } write_unlock(&et->lock); + if (updated) + f2fs_mark_inode_dirty_sync(inode, true); } void f2fs_destroy_extent_tree(struct inode *inode) @@ -773,7 +782,7 @@ void f2fs_update_extent_cache(struct dnode_of_data *dn) else blkaddr = dn->data_blkaddr; - fofs = start_bidx_of_node(ofs_of_node(dn->node_page), dn->inode) + + fofs = f2fs_start_bidx_of_node(ofs_of_node(dn->node_page), dn->inode) + dn->ofs_in_node; f2fs_update_extent_tree_range(dn->inode, fofs, blkaddr, 1); } @@ -788,7 +797,7 @@ void f2fs_update_extent_cache_range(struct dnode_of_data *dn, f2fs_update_extent_tree_range(dn->inode, fofs, blkaddr, len); } -void init_extent_cache_info(struct f2fs_sb_info *sbi) +void f2fs_init_extent_cache_info(struct f2fs_sb_info *sbi) { INIT_RADIX_TREE(&sbi->extent_tree_root, GFP_NOIO); mutex_init(&sbi->extent_tree_lock); @@ -800,7 +809,7 @@ void init_extent_cache_info(struct f2fs_sb_info *sbi) atomic_set(&sbi->total_ext_node, 0); } -int __init create_extent_cache(void) +int __init f2fs_create_extent_cache(void) { extent_tree_slab = f2fs_kmem_cache_create("f2fs_extent_tree", sizeof(struct extent_tree)); @@ -815,7 +824,7 @@ int __init create_extent_cache(void) return 0; } -void destroy_extent_cache(void) +void f2fs_destroy_extent_cache(void) { kmem_cache_destroy(extent_node_slab); kmem_cache_destroy(extent_tree_slab); diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 97c17b3d984c..ae98cdf21f55 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -1,16 +1,14 @@ +// SPDX-License-Identifier: GPL-2.0 /* * fs/f2fs/f2fs.h * * Copyright (c) 2012 Samsung Electronics Co., Ltd. * http://www.samsung.com/ - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. */ #ifndef _LINUX_F2FS_H #define _LINUX_F2FS_H +#include <linux/uio.h> #include <linux/types.h> #include <linux/page-flags.h> #include <linux/buffer_head.h> @@ -26,6 +24,7 @@ #include <linux/quotaops.h> #include <crypto/hash.h> #include <linux/writeback.h> +#include <linux/overflow.h> #define __FS_HAS_ENCRYPTION IS_ENABLED(CONFIG_F2FS_FS_ENCRYPTION) #include <linux/fscrypt.h> @@ -42,7 +41,6 @@ } while (0) #endif -#ifdef CONFIG_F2FS_FAULT_INJECTION enum { FAULT_KMALLOC, FAULT_KVMALLOC, @@ -55,18 +53,23 @@ enum { FAULT_DIR_DEPTH, FAULT_EVICT_INODE, FAULT_TRUNCATE, - FAULT_IO, + FAULT_READ_IO, FAULT_CHECKPOINT, + FAULT_DISCARD, + FAULT_WRITE_IO, FAULT_MAX, }; +#ifdef CONFIG_F2FS_FAULT_INJECTION +#define F2FS_ALL_FAULT_TYPE ((1 << FAULT_MAX) - 1) + struct f2fs_fault_info { atomic_t inject_ops; unsigned int inject_rate; unsigned int inject_type; }; -extern char *fault_name[FAULT_MAX]; +extern const char *f2fs_fault_name[FAULT_MAX]; #define IS_FAULT_SET(fi, type) ((fi)->inject_type & (1 << (type))) #endif @@ -98,6 +101,7 @@ extern char *fault_name[FAULT_MAX]; #define F2FS_MOUNT_QUOTA 0x00400000 #define F2FS_MOUNT_INLINE_XATTR_SIZE 0x00800000 #define F2FS_MOUNT_RESERVE_ROOT 0x01000000 +#define F2FS_MOUNT_DISABLE_CHECKPOINT 0x02000000 #define F2FS_OPTION(sbi) ((sbi)->mount_opt) #define clear_opt(sbi, option) (F2FS_OPTION(sbi).opt &= ~F2FS_MOUNT_##option) @@ -135,6 +139,9 @@ struct f2fs_mount_info { int alloc_mode; /* segment allocation policy */ int fsync_mode; /* fsync policy */ bool test_dummy_encryption; /* test dummy encryption */ + block_t unusable_cap; /* Amount of space allowed to be + * unusable when disabling checkpoint + */ }; #define F2FS_FEATURE_ENCRYPT 0x0001 @@ -148,13 +155,15 @@ struct f2fs_mount_info { #define F2FS_FEATURE_INODE_CRTIME 0x0100 #define F2FS_FEATURE_LOST_FOUND 0x0200 #define F2FS_FEATURE_VERITY 0x0400 /* reserved */ +#define F2FS_FEATURE_SB_CHKSUM 0x0800 -#define F2FS_HAS_FEATURE(sb, mask) \ - ((F2FS_SB(sb)->raw_super->feature & cpu_to_le32(mask)) != 0) -#define F2FS_SET_FEATURE(sb, mask) \ - (F2FS_SB(sb)->raw_super->feature |= cpu_to_le32(mask)) -#define F2FS_CLEAR_FEATURE(sb, mask) \ - (F2FS_SB(sb)->raw_super->feature &= ~cpu_to_le32(mask)) +#define __F2FS_HAS_FEATURE(raw_super, mask) \ + ((raw_super->feature & cpu_to_le32(mask)) != 0) +#define F2FS_HAS_FEATURE(sbi, mask) __F2FS_HAS_FEATURE(sbi->raw_super, mask) +#define F2FS_SET_FEATURE(sbi, mask) \ + (sbi->raw_super->feature |= cpu_to_le32(mask)) +#define F2FS_CLEAR_FEATURE(sbi, mask) \ + (sbi->raw_super->feature &= ~cpu_to_le32(mask)) /* bio stuffs */ #define REQ_OP_READ READ @@ -237,15 +246,19 @@ enum { #define CP_RECOVERY 0x00000008 #define CP_DISCARD 0x00000010 #define CP_TRIMMED 0x00000020 +#define CP_PAUSE 0x00000040 #define MAX_DISCARD_BLOCKS(sbi) BLKS_PER_SEC(sbi) #define DEF_MAX_DISCARD_REQUEST 8 /* issue 8 discards per round */ -#define DEF_MAX_DISCARD_LEN 512 /* Max. 2MB per discard */ #define DEF_MIN_DISCARD_ISSUE_TIME 50 /* 50 ms, if exists */ +#define DEF_MID_DISCARD_ISSUE_TIME 500 /* 500 ms, if device busy */ #define DEF_MAX_DISCARD_ISSUE_TIME 60000 /* 60 s, if no candidates */ #define DEF_DISCARD_URGENT_UTIL 80 /* do more discard over 80% */ #define DEF_CP_INTERVAL 60 /* 60 secs */ #define DEF_IDLE_INTERVAL 5 /* 5 secs */ +#define DEF_DISABLE_INTERVAL 5 /* 5 secs */ +#define DEF_DISABLE_QUICK_INTERVAL 1 /* 1 secs */ +#define DEF_UMOUNT_DISCARD_TIMEOUT 5 /* 5 secs */ struct cp_control { int reason; @@ -255,14 +268,24 @@ struct cp_control { }; /* - * For CP/NAT/SIT/SSA readahead + * indicate meta/data type */ enum { META_CP, META_NAT, META_SIT, META_SSA, + META_MAX, META_POR, + DATA_GENERIC, /* check range only */ + DATA_GENERIC_ENHANCE, /* strong check on range and segment bitmap */ + DATA_GENERIC_ENHANCE_READ, /* + * strong check on range and segment + * bitmap but no warning due to race + * condition of read on truncated area + * by extent_cache + */ + META_GENERIC, }; /* for the list of ino */ @@ -287,6 +310,12 @@ struct inode_entry { struct inode *inode; /* vfs inode pointer */ }; +struct fsync_node_entry { + struct list_head list; /* list head */ + struct page *page; /* warm node page pointer */ + unsigned int seq_id; /* sequence id */ +}; + /* for the bitmap indicate blocks to be discarded */ struct discard_entry { struct list_head list; /* list head */ @@ -300,12 +329,13 @@ struct discard_entry { /* max discard pend list number */ #define MAX_PLIST_NUM 512 #define plist_idx(blk_num) ((blk_num) >= MAX_PLIST_NUM ? \ - (MAX_PLIST_NUM - 1) : (blk_num - 1)) + (MAX_PLIST_NUM - 1) : ((blk_num) - 1)) enum { - D_PREP, - D_SUBMIT, - D_DONE, + D_PREP, /* initial */ + D_PARTIAL, /* partially submitted */ + D_SUBMIT, /* all submitted */ + D_DONE, /* finished */ }; struct discard_info { @@ -330,7 +360,10 @@ struct discard_cmd { struct block_device *bdev; /* bdev */ unsigned short ref; /* reference count */ unsigned char state; /* state */ + unsigned char queued; /* queued discard */ int error; /* bio error */ + spinlock_t lock; /* for state/bio_ref updating */ + unsigned short bio_ref; /* bio reference count */ }; enum { @@ -344,12 +377,15 @@ enum { struct discard_policy { int type; /* type of discard */ unsigned int min_interval; /* used for candidates exist */ + unsigned int mid_interval; /* used for device busy */ unsigned int max_interval; /* used for candidates not exist */ unsigned int max_requests; /* # of discards issued per round */ unsigned int io_aware_gran; /* minimum granularity discard not be aware of I/O */ bool io_aware; /* issue discard in idle time */ bool sync; /* submit discard with REQ_SYNC flag */ + bool ordered; /* issue discard by lba order */ unsigned int granularity; /* discard granularity */ + int timeout; /* discard timeout for put_super */ }; struct discard_cmd_control { @@ -365,10 +401,12 @@ struct discard_cmd_control { unsigned int max_discards; /* max. discards to be issued */ unsigned int discard_granularity; /* discard granularity */ unsigned int undiscard_blks; /* # of undiscard blocks */ + unsigned int next_pos; /* next discard position */ atomic_t issued_discard; /* # of issued discard */ - atomic_t issing_discard; /* # of issing discard */ + atomic_t queued_discard; /* # of queued discard */ atomic_t discard_cmd_cnt; /* # of cached cmd count */ struct rb_root root; /* root of discard rb-tree */ + bool rbtree_check; /* config for consistence check */ }; /* for the list of fsync inodes, used only during recovery */ @@ -441,6 +479,7 @@ static inline bool __has_cursum_space(struct f2fs_journal *journal, #define F2FS_IOC_SET_PIN_FILE _IOW(F2FS_IOCTL_MAGIC, 13, __u32) #define F2FS_IOC_GET_PIN_FILE _IOR(F2FS_IOCTL_MAGIC, 14, __u32) #define F2FS_IOC_PRECACHE_EXTENTS _IO(F2FS_IOCTL_MAGIC, 15) +#define F2FS_IOC_RESIZE_FS _IOW(F2FS_IOCTL_MAGIC, 16, __u64) #define F2FS_IOC_SET_ENCRYPTION_POLICY FS_IOC_SET_ENCRYPTION_POLICY #define F2FS_IOC_GET_ENCRYPTION_POLICY FS_IOC_GET_ENCRYPTION_POLICY @@ -455,6 +494,7 @@ static inline bool __has_cursum_space(struct f2fs_journal *journal, #define F2FS_GOING_DOWN_METASYNC 0x1 /* going down with metadata */ #define F2FS_GOING_DOWN_NOSYNC 0x2 /* going down */ #define F2FS_GOING_DOWN_METAFLUSH 0x3 /* going down with meta flush */ +#define F2FS_GOING_DOWN_NEED_FSCK 0x4 /* going down to trigger fsck */ #if defined(__KERNEL__) && defined(CONFIG_COMPAT) /* @@ -490,7 +530,6 @@ struct f2fs_flush_device { /* for inline stuff */ #define DEF_INLINE_RESERVED_SIZE 1 -#define DEF_MIN_INLINE_SIZE 1 static inline int get_extra_isize(struct inode *inode); static inline int get_inline_xattr_addrs(struct inode *inode); #define MAX_INLINE_DATA(inode) (sizeof(__le32) * \ @@ -502,8 +541,8 @@ static inline int get_inline_xattr_addrs(struct inode *inode); #define NR_INLINE_DENTRY(inode) (MAX_INLINE_DATA(inode) * BITS_PER_BYTE / \ ((SIZE_OF_DIR_ENTRY + F2FS_SLOT_LEN) * \ BITS_PER_BYTE + 1)) -#define INLINE_DENTRY_BITMAP_SIZE(inode) ((NR_INLINE_DENTRY(inode) + \ - BITS_PER_BYTE - 1) / BITS_PER_BYTE) +#define INLINE_DENTRY_BITMAP_SIZE(inode) \ + DIV_ROUND_UP(NR_INLINE_DENTRY(inode), BITS_PER_BYTE) #define INLINE_RESERVED_SIZE(inode) (MAX_INLINE_DATA(inode) - \ ((SIZE_OF_DIR_ENTRY + F2FS_SLOT_LEN) * \ NR_INLINE_DENTRY(inode) + \ @@ -565,13 +604,15 @@ enum { */ }; +#define DEFAULT_RETRY_IO_COUNT 8 /* maximum retry read IO count */ + +/* maximum retry quota flush count */ +#define DEFAULT_RETRY_QUOTA_FLUSH_COUNT 8 + #define F2FS_LINK_MAX 0xffffffff /* maximum link count per file */ #define MAX_DIR_RA_PAGES 4 /* maximum ra pages of dir */ -/* vector size for gang look-up from extent cache that consists of radix tree */ -#define EXT_TREE_VEC_SIZE 64 - /* for in-memory extent cache entry */ #define F2FS_MIN_EXTENT_LEN 64 /* minimum extent length */ @@ -591,16 +632,8 @@ struct extent_info { }; struct extent_node { - struct rb_node rb_node; - union { - struct { - unsigned int fofs; - unsigned int len; - u32 blk; - }; - struct extent_info ei; /* extent info */ - - }; + struct rb_node rb_node; /* rb node located in rb-tree */ + struct extent_info ei; /* extent info */ struct list_head list; /* node in global extent list of sbi */ struct extent_tree *et; /* extent tree pointer */ }; @@ -613,6 +646,7 @@ struct extent_tree { struct list_head list; /* to be used by sbi->zombie_list */ rwlock_t lock; /* protect extent info rb-tree */ atomic_t node_cnt; /* # of extent node in rb-tree*/ + bool largest_updated; /* largest extent updated */ }; /* @@ -634,6 +668,7 @@ struct f2fs_map_blocks { pgoff_t *m_next_pgofs; /* point next possible non-hole pgofs */ pgoff_t *m_next_extent; /* point to next possible extent */ int m_seg_type; + bool m_may_create; /* indicate it is from write path */ }; /* for flag in get_data_block */ @@ -641,6 +676,7 @@ enum { F2FS_GET_BLOCK_DEFAULT, F2FS_GET_BLOCK_FIEMAP, F2FS_GET_BLOCK_BMAP, + F2FS_GET_BLOCK_DIO, F2FS_GET_BLOCK_PRE_DIO, F2FS_GET_BLOCK_PRE_AIO, F2FS_GET_BLOCK_PRECACHE, @@ -657,6 +693,8 @@ enum { #define FADVISE_HOT_BIT 0x20 #define FADVISE_VERITY_BIT 0x40 /* reserved */ +#define FADVISE_MODIFIABLE_BITS (FADVISE_COLD_BIT | FADVISE_HOT_BIT) + #define file_is_cold(inode) is_file(inode, FADVISE_COLD_BIT) #define file_wrong_pino(inode) is_file(inode, FADVISE_LOST_PINO_BIT) #define file_set_cold(inode) set_file(inode, FADVISE_COLD_BIT) @@ -676,15 +714,20 @@ enum { #define DEF_DIR_LEVEL 0 +enum { + GC_FAILURE_PIN, + GC_FAILURE_ATOMIC, + MAX_GC_FAILURE +}; + struct f2fs_inode_info { struct inode vfs_inode; /* serve a vfs inode */ unsigned long i_flags; /* keep an inode flags for ioctl */ unsigned char i_advise; /* use to give file attribute hints */ unsigned char i_dir_level; /* use for dentry level for large dir */ - union { - unsigned int i_current_depth; /* only for directory depth */ - unsigned short i_gc_failures; /* only for regular file */ - }; + unsigned int i_current_depth; /* only for directory depth */ + /* for gc failure statistic */ + unsigned int i_gc_failures[MAX_GC_FAILURE]; unsigned int i_pino; /* parent inode number */ umode_t i_acl_mode; /* keep file acl mode temporarily */ @@ -712,7 +755,9 @@ struct f2fs_inode_info { struct task_struct *inmem_task; /* store inmemory task */ struct mutex inmem_lock; /* lock for inmemory pages */ struct extent_tree *extent_tree; /* cached extent_tree entry */ - struct rw_semaphore dio_rwsem[2];/* avoid racing between dio and gc */ + + /* avoid racing between foreground op and gc */ + struct rw_semaphore i_gc_rwsem[2]; struct rw_semaphore i_mmap_sem; struct rw_semaphore i_xattr_sem; /* avoid racing between reading and changing EAs */ @@ -748,22 +793,22 @@ static inline void set_extent_info(struct extent_info *ei, unsigned int fofs, } static inline bool __is_discard_mergeable(struct discard_info *back, - struct discard_info *front) + struct discard_info *front, unsigned int max_len) { return (back->lstart + back->len == front->lstart) && - (back->len + front->len < DEF_MAX_DISCARD_LEN); + (back->len + front->len <= max_len); } static inline bool __is_discard_back_mergeable(struct discard_info *cur, - struct discard_info *back) + struct discard_info *back, unsigned int max_len) { - return __is_discard_mergeable(back, cur); + return __is_discard_mergeable(back, cur, max_len); } static inline bool __is_discard_front_mergeable(struct discard_info *cur, - struct discard_info *front) + struct discard_info *front, unsigned int max_len) { - return __is_discard_mergeable(cur, front); + return __is_discard_mergeable(cur, front, max_len); } static inline bool __is_extent_mergeable(struct extent_info *back, @@ -786,12 +831,12 @@ static inline bool __is_front_mergeable(struct extent_info *cur, } extern void f2fs_mark_inode_dirty_sync(struct inode *inode, bool sync); -static inline void __try_update_largest_extent(struct inode *inode, - struct extent_tree *et, struct extent_node *en) +static inline void __try_update_largest_extent(struct extent_tree *et, + struct extent_node *en) { if (en->ei.len > et->largest.len) { et->largest = en->ei; - f2fs_mark_inode_dirty_sync(inode, true); + et->largest_updated = true; } } @@ -818,6 +863,7 @@ struct f2fs_nm_info { struct radix_tree_root nat_set_root;/* root of the nat set cache */ struct rw_semaphore nat_tree_lock; /* protect nat_tree_lock */ struct list_head nat_entries; /* cached nat entry list (clean) */ + spinlock_t nat_list_lock; /* protect clean nat entry list */ unsigned int nat_cnt; /* the # of cached nat entries */ unsigned int dirty_nat_cnt; /* total num of nat entries in set */ unsigned int nat_blocks; /* # of nat blocks */ @@ -911,7 +957,7 @@ struct flush_cmd_control { struct task_struct *f2fs_issue_flush; /* flush thread */ wait_queue_head_t flush_wait_queue; /* waiting queue for wake-up */ atomic_t issued_flush; /* # of issued flushes */ - atomic_t issing_flush; /* # of issing flushes */ + atomic_t queued_flush; /* # of queued flushes */ struct llist_head issue_list; /* list for command issue */ struct llist_node *dispatch_list; /* list for command dispatch */ }; @@ -944,6 +990,7 @@ struct f2fs_sm_info { unsigned int ipu_policy; /* in-place-update policy */ unsigned int min_ipu_util; /* in-place-update threshold */ unsigned int min_fsync_blocks; /* threshold for fsync */ + unsigned int min_seq_blocks; /* threshold for sequential blocks */ unsigned int min_hot_blocks; /* threshold for hot block allocation */ unsigned int min_ssr_sections; /* threshold to trigger SSR allocation */ @@ -974,6 +1021,11 @@ enum count_type { F2FS_DIRTY_IMETA, F2FS_WB_CP_DATA, F2FS_WB_DATA, + F2FS_RD_DATA, + F2FS_RD_NODE, + F2FS_RD_META, + F2FS_DIO_WRITE, + F2FS_DIO_READ, NR_COUNT_TYPE, }; @@ -1061,9 +1113,13 @@ struct f2fs_io_info { bool submitted; /* indicate IO submission */ int need_lock; /* indicate we need to lock cp_rwsem */ bool in_list; /* indicate fio is in io_list */ - bool is_meta; /* indicate borrow meta inode mapping or not */ + bool is_por; /* indicate IO is from recovery or not */ + bool retry; /* need to reallocate block address */ enum iostat_type io_type; /* io type */ struct writeback_control *io_wbc; /* writeback control */ + struct bio **bio; /* bio for ipu */ + sector_t *last_block; /* last block number in bio */ + unsigned char version; /* version of the node */ }; #define is_read_io(rw) ((rw) == READ) @@ -1086,8 +1142,8 @@ struct f2fs_dev_info { block_t start_blk; block_t end_blk; #ifdef CONFIG_BLK_DEV_ZONED - unsigned int nr_blkz; /* Total number of zones */ - u8 *blkz_type; /* Array of zones type */ + unsigned int nr_blkz; /* Total number of zones */ + unsigned long *blkz_seq; /* Bitmap indicating sequential zones */ #endif }; @@ -1115,15 +1171,34 @@ enum { SBI_POR_DOING, /* recovery is doing or not */ SBI_NEED_SB_WRITE, /* need to recover superblock */ SBI_NEED_CP, /* need to checkpoint */ + SBI_IS_SHUTDOWN, /* shutdown by ioctl */ + SBI_IS_RECOVERED, /* recovered orphan/data */ + SBI_CP_DISABLED, /* CP was disabled last mount */ + SBI_CP_DISABLED_QUICK, /* CP was disabled quickly */ + SBI_QUOTA_NEED_FLUSH, /* need to flush quota info in CP */ + SBI_QUOTA_SKIP_FLUSH, /* skip flushing quota in current CP */ + SBI_QUOTA_NEED_REPAIR, /* quota file may be corrupted */ + SBI_IS_RESIZEFS, /* resizefs is in process */ }; enum { CP_TIME, REQ_TIME, + DISCARD_TIME, + GC_TIME, + DISABLE_TIME, + UMOUNT_DISCARD_TIMEOUT, MAX_TIME, }; enum { + GC_NORMAL, + GC_IDLE_CB, + GC_IDLE_GREEDY, + GC_URGENT, +}; + +enum { WHINT_MODE_OFF, /* not pass down write hints */ WHINT_MODE_USER, /* try to pass down hints given by users */ WHINT_MODE_FS, /* pass down hints with F2FS policy */ @@ -1154,6 +1229,7 @@ struct f2fs_sb_info { struct rw_semaphore sb_lock; /* lock for raw super block */ int valid_super_block; /* valid super block no */ unsigned long s_flag; /* flags for sbi */ + struct mutex writepages; /* mutex for writepages() */ #ifdef CONFIG_BLK_DEV_ZONED unsigned int blocks_per_blkz; /* F2FS blocks per zone */ @@ -1169,8 +1245,8 @@ struct f2fs_sb_info { /* for bio operations */ struct f2fs_bio_info *write_io[NR_PAGE_TYPE]; /* for write bios */ - struct mutex wio_mutex[NR_PAGE_TYPE - 1][NR_TEMP_TYPE]; - /* bio ordering for NODE/DATA */ + /* keep migration IO order for LFS mode */ + struct rw_semaphore io_order_lock; mempool_t *write_io_dummy; /* Dummy pages */ /* for checkpoint */ @@ -1188,12 +1264,18 @@ struct f2fs_sb_info { struct inode_management im[MAX_INO_ENTRY]; /* manage inode cache */ + spinlock_t fsync_node_lock; /* for node entry lock */ + struct list_head fsync_node_list; /* node list head */ + unsigned int fsync_seg_id; /* sequence id */ + unsigned int fsync_node_num; /* number of node entries */ + /* for orphan inode, use 0'th array */ unsigned int max_orphans; /* max orphan inodes */ /* for inode management */ struct list_head inode_list[NR_INODE_TYPE]; /* dirty inode list */ spinlock_t inode_lock[NR_INODE_TYPE]; /* for dirty inode list lock */ + struct mutex flush_lock; /* for flush exclusion */ /* for extent tree cache */ struct radix_tree_root extent_tree_root;/* cache extent cache entries */ @@ -1217,11 +1299,11 @@ struct f2fs_sb_info { unsigned int segs_per_sec; /* segments per section */ unsigned int secs_per_zone; /* sections per zone */ unsigned int total_sections; /* total section count */ + struct mutex resize_mutex; /* for resize exclusion */ unsigned int total_node_count; /* total node block count */ unsigned int total_valid_node_count; /* valid node block count */ loff_t max_file_blocks; /* max block index of file */ int dir_level; /* directory level */ - unsigned int trigger_ssr_threshold; /* threshold to trigger ssr */ int readdir_ra; /* readahead inode in readdir */ block_t user_block_count; /* # of user blocks */ @@ -1231,9 +1313,11 @@ struct f2fs_sb_info { block_t reserved_blocks; /* configurable reserved blocks */ block_t current_reserved_blocks; /* current reserved blocks */ - unsigned int nquota_files; /* # of quota sysfile */ + /* Additional tracking for no checkpoint mode */ + block_t unusable_block_count; /* # of blocks saved by last cp */ - u32 s_next_generation; /* for NFS support */ + unsigned int nquota_files; /* # of quota sysfile */ + struct rw_semaphore quota_sem; /* blocking cp for flags */ /* # of pages, see count_type */ atomic_t nr_pages[NR_COUNT_TYPE]; @@ -1241,7 +1325,7 @@ struct f2fs_sb_info { struct percpu_counter alloc_valid_block_count; /* writeback control */ - atomic_t wb_sync_req; /* count # of WB_SYNC threads */ + atomic_t wb_sync_req[META]; /* count # of WB_SYNC threads */ /* valid inode count */ struct percpu_counter total_valid_inode_count; @@ -1252,15 +1336,19 @@ struct f2fs_sb_info { struct mutex gc_mutex; /* mutex for GC */ struct f2fs_gc_kthread *gc_thread; /* GC thread */ unsigned int cur_victim_sec; /* current victim section num */ - - /* threshold for converting bg victims for fg */ - u64 fggc_threshold; + unsigned int gc_mode; /* current GC state */ + unsigned int next_victim_seg[2]; /* next segment in victim section */ + /* for skip statistic */ + unsigned long long skipped_atomic_files[2]; /* FG_GC and BG_GC */ + unsigned long long skipped_gc_rwsem; /* FG_GC only */ /* threshold for gc trials on pinned files */ u64 gc_pin_file_threshold; /* maximum # of trials to find a victim segment for SSR and GC */ unsigned int max_victim_search; + /* migration granularity of garbage collection, unit: segment */ + unsigned int migration_granularity; /* * for stat information. @@ -1268,6 +1356,7 @@ struct f2fs_sb_info { */ #ifdef CONFIG_F2FS_STAT_FS struct f2fs_stat_info *stat_info; /* FS status information */ + atomic_t meta_count[META_MAX]; /* # of meta blocks */ unsigned int segment_count[2]; /* # of allocated segments */ unsigned int block_count[2]; /* # of allocated blocks */ atomic_t inplace_count; /* # of inplace update */ @@ -1283,6 +1372,8 @@ struct f2fs_sb_info { atomic_t max_aw_cnt; /* max # of atomic writes */ atomic_t max_vw_cnt; /* max # of volatile writes */ int bg_gc; /* background gc calls */ + unsigned int io_skip_bggc; /* skip background gc for in-flight IO */ + unsigned int other_skip_bggc; /* skip background gc for other reasons */ unsigned int ndirty_inode[NR_INODE_TYPE]; /* # of dirty inodes */ #endif spinlock_t stat_lock; /* lock for stat operations */ @@ -1316,10 +1407,17 @@ struct f2fs_sb_info { __u32 s_chksum_seed; }; +struct f2fs_private_dio { + struct inode *inode; + void *orig_private; + bio_end_io_t *orig_end_io; + bool write; +}; + #ifdef CONFIG_F2FS_FAULT_INJECTION -#define f2fs_show_injection_info(type) \ - printk("%sF2FS-fs : inject %s in %s of %pF\n", \ - KERN_INFO, fault_name[type], \ +#define f2fs_show_injection_info(type) \ + printk_ratelimited("%sF2FS-fs : inject %s in %s of %pF\n", \ + KERN_INFO, f2fs_fault_name[type], \ __func__, __builtin_return_address(0)) static inline bool time_to_inject(struct f2fs_sb_info *sbi, int type) { @@ -1338,8 +1436,25 @@ static inline bool time_to_inject(struct f2fs_sb_info *sbi, int type) } return false; } +#else +#define f2fs_show_injection_info(type) do { } while (0) +static inline bool time_to_inject(struct f2fs_sb_info *sbi, int type) +{ + return false; +} #endif +/* + * Test if the mounted volume is a multi-device volume. + * - For a single regular disk volume, sbi->s_ndevs is 0. + * - For a single zoned disk volume, sbi->s_ndevs is 1. + * - For a multi-device volume, sbi->s_ndevs is always 2 or more. + */ +static inline bool f2fs_is_multi_device(struct f2fs_sb_info *sbi) +{ + return sbi->s_ndevs > 1; +} + /* For write statistics. Suppose sector size is 512 bytes, * and the return value is in kbytes. s is of struct f2fs_sb_info. */ @@ -1349,7 +1464,15 @@ static inline bool time_to_inject(struct f2fs_sb_info *sbi, int type) static inline void f2fs_update_time(struct f2fs_sb_info *sbi, int type) { - sbi->last_time[type] = jiffies; + unsigned long now = jiffies; + + sbi->last_time[type] = now; + + /* DISCARD_TIME and GC_TIME are based on REQ_TIME */ + if (type == REQ_TIME) { + sbi->last_time[DISCARD_TIME] = now; + sbi->last_time[GC_TIME] = now; + } } static inline bool f2fs_time_over(struct f2fs_sb_info *sbi, int type) @@ -1359,16 +1482,18 @@ static inline bool f2fs_time_over(struct f2fs_sb_info *sbi, int type) return time_after(jiffies, sbi->last_time[type] + interval); } -static inline bool is_idle(struct f2fs_sb_info *sbi) +static inline unsigned int f2fs_time_to_wait(struct f2fs_sb_info *sbi, + int type) { - struct block_device *bdev = sbi->sb->s_bdev; - struct request_queue *q = bdev_get_queue(bdev); - struct request_list *rl = &q->root_rl; + unsigned long interval = sbi->interval_time[type] * HZ; + unsigned int wait_ms = 0; + long delta; - if (rl->count[BLK_RW_SYNC] || rl->count[BLK_RW_ASYNC]) - return 0; + delta = (sbi->last_time[type] + interval) - jiffies; + if (delta > 0) + wait_ms = jiffies_to_msecs(delta); - return f2fs_time_over(sbi, REQ_TIME); + return wait_ms; } /* @@ -1435,7 +1560,7 @@ static inline struct f2fs_sb_info *F2FS_M_SB(struct address_space *mapping) static inline struct f2fs_sb_info *F2FS_P_SB(struct page *page) { - return F2FS_M_SB(page->mapping); + return F2FS_M_SB(page_file_mapping(page)); } static inline struct f2fs_super_block *F2FS_RAW_SUPER(struct f2fs_sb_info *sbi) @@ -1578,12 +1703,16 @@ static inline void disable_nat_bits(struct f2fs_sb_info *sbi, bool lock) { unsigned long flags; - set_sbi_flag(sbi, SBI_NEED_FSCK); + /* + * In order to re-enable nat_bits we need to call fsck.f2fs by + * set_sbi_flag(sbi, SBI_NEED_FSCK). But it may give huge cost, + * so let's rely on regular fsck or unclean shutdown. + */ if (lock) spin_lock_irqsave(&sbi->cp_lock, flags); __clear_ckpt_flags(F2FS_CKPT(sbi), CP_NAT_BITS_FLAG); - kfree(NM_I(sbi)->nat_bits); + kvfree(NM_I(sbi)->nat_bits); NM_I(sbi)->nat_bits = NULL; if (lock) spin_unlock_irqrestore(&sbi->cp_lock, flags); @@ -1645,18 +1774,6 @@ static inline bool __exist_node_summaries(struct f2fs_sb_info *sbi) } /* - * Check whether the given nid is within node id range. - */ -static inline int check_nid_range(struct f2fs_sb_info *sbi, nid_t nid) -{ - if (unlikely(nid < F2FS_ROOT_INO(sbi))) - return -EINVAL; - if (unlikely(nid >= NM_I(sbi)->max_nid)) - return -EINVAL; - return 0; -} - -/* * Check whether the inode has blocks or not */ static inline int F2FS_HAS_BLOCKS(struct inode *inode) @@ -1702,13 +1819,12 @@ static inline int inc_valid_block_count(struct f2fs_sb_info *sbi, if (ret) return ret; -#ifdef CONFIG_F2FS_FAULT_INJECTION if (time_to_inject(sbi, FAULT_BLOCK)) { f2fs_show_injection_info(FAULT_BLOCK); release = *count; goto enospc; } -#endif + /* * let's increase this in prior to actual block count change in order * for f2fs_sync_file to avoid data races when deciding checkpoint. @@ -1722,7 +1838,12 @@ static inline int inc_valid_block_count(struct f2fs_sb_info *sbi, if (!__allow_reserved_blocks(sbi, inode, true)) avail_user_block_count -= F2FS_OPTION(sbi).root_reserved_blocks; - + if (unlikely(is_sbi_flag_set(sbi, SBI_CP_DISABLED))) { + if (avail_user_block_count > sbi->unusable_block_count) + avail_user_block_count -= sbi->unusable_block_count; + else + avail_user_block_count = 0; + } if (unlikely(sbi->total_valid_block_count > avail_user_block_count)) { diff = sbi->total_valid_block_count - avail_user_block_count; if (diff > *count) @@ -1732,22 +1853,38 @@ static inline int inc_valid_block_count(struct f2fs_sb_info *sbi, sbi->total_valid_block_count -= diff; if (!*count) { spin_unlock(&sbi->stat_lock); - percpu_counter_sub(&sbi->alloc_valid_block_count, diff); goto enospc; } } spin_unlock(&sbi->stat_lock); - if (unlikely(release)) + if (unlikely(release)) { + percpu_counter_sub(&sbi->alloc_valid_block_count, release); dquot_release_reservation_block(inode, release); + } f2fs_i_blocks_write(inode, *count, true, true); return 0; enospc: + percpu_counter_sub(&sbi->alloc_valid_block_count, release); dquot_release_reservation_block(inode, release); return -ENOSPC; } +__printf(2, 3) +void f2fs_printk(struct f2fs_sb_info *sbi, const char *fmt, ...); + +#define f2fs_err(sbi, fmt, ...) \ + f2fs_printk(sbi, KERN_ERR fmt, ##__VA_ARGS__) +#define f2fs_warn(sbi, fmt, ...) \ + f2fs_printk(sbi, KERN_WARNING fmt, ##__VA_ARGS__) +#define f2fs_notice(sbi, fmt, ...) \ + f2fs_printk(sbi, KERN_NOTICE fmt, ##__VA_ARGS__) +#define f2fs_info(sbi, fmt, ...) \ + f2fs_printk(sbi, KERN_INFO fmt, ##__VA_ARGS__) +#define f2fs_debug(sbi, fmt, ...) \ + f2fs_printk(sbi, KERN_DEBUG fmt, ##__VA_ARGS__) + static inline void dec_valid_block_count(struct f2fs_sb_info *sbi, struct inode *inode, block_t count) @@ -1756,13 +1893,20 @@ static inline void dec_valid_block_count(struct f2fs_sb_info *sbi, spin_lock(&sbi->stat_lock); f2fs_bug_on(sbi, sbi->total_valid_block_count < (block_t) count); - f2fs_bug_on(sbi, inode->i_blocks < sectors); sbi->total_valid_block_count -= (block_t)count; if (sbi->reserved_blocks && sbi->current_reserved_blocks < sbi->reserved_blocks) sbi->current_reserved_blocks = min(sbi->reserved_blocks, sbi->current_reserved_blocks + count); spin_unlock(&sbi->stat_lock); + if (unlikely(inode->i_blocks < sectors)) { + f2fs_warn(sbi, "Inconsistent i_blocks, ino:%lu, iblocks:%llu, sectors:%llu", + inode->i_ino, + (unsigned long long)inode->i_blocks, + (unsigned long long)sectors); + set_sbi_flag(sbi, SBI_NEED_FSCK); + return; + } f2fs_i_blocks_write(inode, count, false, true); } @@ -1770,11 +1914,12 @@ static inline void inc_page_count(struct f2fs_sb_info *sbi, int count_type) { atomic_inc(&sbi->nr_pages[count_type]); - if (count_type == F2FS_DIRTY_DATA || count_type == F2FS_INMEM_PAGES || - count_type == F2FS_WB_CP_DATA || count_type == F2FS_WB_DATA) - return; - - set_sbi_flag(sbi, SBI_IS_DIRTY); + if (count_type == F2FS_DIRTY_DENTS || + count_type == F2FS_DIRTY_NODES || + count_type == F2FS_DIRTY_META || + count_type == F2FS_DIRTY_QDATA || + count_type == F2FS_DIRTY_IMETA) + set_sbi_flag(sbi, SBI_IS_DIRTY); } static inline void inode_inc_dirty_pages(struct inode *inode) @@ -1859,7 +2004,11 @@ static inline void *__bitmap_ptr(struct f2fs_sb_info *sbi, int flag) if (is_set_ckpt_flags(sbi, CP_LARGE_NAT_BITMAP_FLAG)) { offset = (flag == SIT_BITMAP) ? le32_to_cpu(ckpt->nat_ver_bitmap_bytesize) : 0; - return &ckpt->sit_nat_version_bitmap + offset; + /* + * if large_nat_bitmap feature is enabled, leave checksum + * protection for all nat/sit bitmaps. + */ + return &ckpt->sit_nat_version_bitmap + offset + sizeof(__le32); } if (__cp_payload(sbi) > 0) { @@ -1906,21 +2055,25 @@ static inline int inc_valid_node_count(struct f2fs_sb_info *sbi, struct inode *inode, bool is_inode) { block_t valid_block_count; - unsigned int valid_node_count; - bool quota = inode && !is_inode; + unsigned int valid_node_count, user_block_count; + int err; - if (quota) { - int ret = dquot_reserve_block(inode, 1); - if (ret) - return ret; + if (is_inode) { + if (inode) { + err = dquot_alloc_inode(inode); + if (err) + return err; + } + } else { + err = dquot_reserve_block(inode, 1); + if (err) + return err; } -#ifdef CONFIG_F2FS_FAULT_INJECTION if (time_to_inject(sbi, FAULT_BLOCK)) { f2fs_show_injection_info(FAULT_BLOCK); goto enospc; } -#endif spin_lock(&sbi->stat_lock); @@ -1929,8 +2082,11 @@ static inline int inc_valid_node_count(struct f2fs_sb_info *sbi, if (!__allow_reserved_blocks(sbi, inode, false)) valid_block_count += F2FS_OPTION(sbi).root_reserved_blocks; + user_block_count = sbi->user_block_count; + if (unlikely(is_sbi_flag_set(sbi, SBI_CP_DISABLED))) + user_block_count -= sbi->unusable_block_count; - if (unlikely(valid_block_count > sbi->user_block_count)) { + if (unlikely(valid_block_count > user_block_count)) { spin_unlock(&sbi->stat_lock); goto enospc; } @@ -1956,8 +2112,12 @@ static inline int inc_valid_node_count(struct f2fs_sb_info *sbi, return 0; enospc: - if (quota) + if (is_inode) { + if (inode) + dquot_free_inode(inode); + } else { dquot_release_reservation_block(inode, 1); + } return -ENOSPC; } @@ -1968,7 +2128,6 @@ static inline void dec_valid_node_count(struct f2fs_sb_info *sbi, f2fs_bug_on(sbi, !sbi->total_valid_block_count); f2fs_bug_on(sbi, !sbi->total_valid_node_count); - f2fs_bug_on(sbi, !is_inode && !inode->i_blocks); sbi->total_valid_node_count--; sbi->total_valid_block_count--; @@ -1978,8 +2137,18 @@ static inline void dec_valid_node_count(struct f2fs_sb_info *sbi, spin_unlock(&sbi->stat_lock); - if (!is_inode) + if (is_inode) { + dquot_free_inode(inode); + } else { + if (unlikely(inode->i_blocks == 0)) { + f2fs_warn(sbi, "Inconsistent i_blocks, ino:%lu, iblocks:%llu", + inode->i_ino, + (unsigned long long)inode->i_blocks); + set_sbi_flag(sbi, SBI_NEED_FSCK); + return; + } f2fs_i_blocks_write(inode, 1, false, true); + } } static inline unsigned int valid_node_count(struct f2fs_sb_info *sbi) @@ -2005,17 +2174,23 @@ static inline s64 valid_inode_count(struct f2fs_sb_info *sbi) static inline struct page *f2fs_grab_cache_page(struct address_space *mapping, pgoff_t index, bool for_write) { -#ifdef CONFIG_F2FS_FAULT_INJECTION - struct page *page = find_lock_page(mapping, index); + struct page *page; - if (page) - return page; + if (IS_ENABLED(CONFIG_F2FS_FAULT_INJECTION)) { + if (!for_write) + page = find_get_page_flags(mapping, index, + FGP_LOCK | FGP_ACCESSED); + else + page = find_lock_page(mapping, index); + if (page) + return page; - if (time_to_inject(F2FS_M_SB(mapping), FAULT_PAGE_ALLOC)) { - f2fs_show_injection_info(FAULT_PAGE_ALLOC); - return NULL; + if (time_to_inject(F2FS_M_SB(mapping), FAULT_PAGE_ALLOC)) { + f2fs_show_injection_info(FAULT_PAGE_ALLOC); + return NULL; + } } -#endif + if (!for_write) return grab_cache_page(mapping, index); return grab_cache_page_write_begin(mapping, index, AOP_FLAG_NOFS); @@ -2025,12 +2200,11 @@ static inline struct page *f2fs_pagecache_get_page( struct address_space *mapping, pgoff_t index, int fgp_flags, gfp_t gfp_mask) { -#ifdef CONFIG_F2FS_FAULT_INJECTION if (time_to_inject(F2FS_M_SB(mapping), FAULT_PAGE_GET)) { f2fs_show_injection_info(FAULT_PAGE_GET); return NULL; } -#endif + return pagecache_get_page(mapping, index, fgp_flags, gfp_mask); } @@ -2095,15 +2269,37 @@ static inline struct bio *f2fs_bio_alloc(struct f2fs_sb_info *sbi, bio = bio_alloc(GFP_NOIO | __GFP_NOFAIL, npages); return bio; } -#ifdef CONFIG_F2FS_FAULT_INJECTION if (time_to_inject(sbi, FAULT_ALLOC_BIO)) { f2fs_show_injection_info(FAULT_ALLOC_BIO); return NULL; } -#endif + return bio_alloc(GFP_KERNEL, npages); } +static inline bool is_idle(struct f2fs_sb_info *sbi, int type) +{ + if (sbi->gc_mode == GC_URGENT) + return true; + + if (get_pages(sbi, F2FS_RD_DATA) || get_pages(sbi, F2FS_RD_NODE) || + get_pages(sbi, F2FS_RD_META) || get_pages(sbi, F2FS_WB_DATA) || + get_pages(sbi, F2FS_WB_CP_DATA) || + get_pages(sbi, F2FS_DIO_READ) || + get_pages(sbi, F2FS_DIO_WRITE)) + return false; + + if (type != DISCARD_TIME && SM_I(sbi) && SM_I(sbi)->dcc_info && + atomic_read(&SM_I(sbi)->dcc_info->queued_discard)) + return false; + + if (SM_I(sbi) && SM_I(sbi)->fcc_info && + atomic_read(&SM_I(sbi)->fcc_info->queued_flush)) + return false; + + return f2fs_time_over(sbi, type); +} + static inline void f2fs_radix_tree_insert(struct radix_tree_root *root, unsigned long index, void *item) { @@ -2214,9 +2410,27 @@ static inline void f2fs_change_bit(unsigned int nr, char *addr) *addr ^= mask; } -#define F2FS_REG_FLMASK (~(FS_DIRSYNC_FL | FS_TOPDIR_FL)) -#define F2FS_OTHER_FLMASK (FS_NODUMP_FL | FS_NOATIME_FL) -#define F2FS_FL_INHERITED (FS_PROJINHERIT_FL) +/* + * On-disk inode flags (f2fs_inode::i_flags) + */ +#define F2FS_SYNC_FL 0x00000008 /* Synchronous updates */ +#define F2FS_IMMUTABLE_FL 0x00000010 /* Immutable file */ +#define F2FS_APPEND_FL 0x00000020 /* writes to file may only append */ +#define F2FS_NODUMP_FL 0x00000040 /* do not dump file */ +#define F2FS_NOATIME_FL 0x00000080 /* do not update atime */ +#define F2FS_INDEX_FL 0x00001000 /* hash-indexed directory */ +#define F2FS_DIRSYNC_FL 0x00010000 /* dirsync behaviour (directories only) */ +#define F2FS_PROJINHERIT_FL 0x20000000 /* Create with parents projid */ + +/* Flags that should be inherited by new inodes from their parent. */ +#define F2FS_FL_INHERITED (F2FS_SYNC_FL | F2FS_NODUMP_FL | F2FS_NOATIME_FL | \ + F2FS_DIRSYNC_FL | F2FS_PROJINHERIT_FL) + +/* Flags that are appropriate for regular files (all but dir-specific ones). */ +#define F2FS_REG_FLMASK (~(F2FS_DIRSYNC_FL | F2FS_PROJINHERIT_FL)) + +/* Flags that are appropriate for non-directories/regular files. */ +#define F2FS_OTHER_FLMASK (F2FS_NODUMP_FL | F2FS_NOATIME_FL) static inline __u32 f2fs_mask_flags(umode_t mode, __u32 flags) { @@ -2259,6 +2473,7 @@ enum { FI_EXTRA_ATTR, /* indicate file has extra attribute */ FI_PROJ_INHERIT, /* indicate file inherits projectid */ FI_PIN_FILE, /* indicate file should not be gced */ + FI_ATOMIC_REVOKE_REQUEST, /* request to drop atomic data */ }; static inline void __mark_inode_dirty_flag(struct inode *inode, @@ -2271,6 +2486,7 @@ static inline void __mark_inode_dirty_flag(struct inode *inode, case FI_NEW_INODE: if (set) return; + /* fall through */ case FI_DATA_EXIST: case FI_INLINE_DOTS: case FI_PIN_FILE: @@ -2357,7 +2573,7 @@ static inline void f2fs_i_depth_write(struct inode *inode, unsigned int depth) static inline void f2fs_i_gc_failures_write(struct inode *inode, unsigned int count) { - F2FS_I(inode)->i_gc_failures = count; + F2FS_I(inode)->i_gc_failures[GC_FAILURE_PIN] = count; f2fs_mark_inode_dirty_sync(inode, true); } @@ -2423,9 +2639,18 @@ static inline int f2fs_has_inline_xattr(struct inode *inode) return is_inode_flag_set(inode, FI_INLINE_XATTR); } +#define ALIGN_DOWN(x, a) __ALIGN_KERNEL((x) - ((a) - 1), (a)) + static inline unsigned int addrs_per_inode(struct inode *inode) { - return CUR_ADDRS_PER_INODE(inode) - get_inline_xattr_addrs(inode); + unsigned int addrs = CUR_ADDRS_PER_INODE(inode) - + get_inline_xattr_addrs(inode); + return ALIGN_DOWN(addrs, 1); +} + +static inline unsigned int addrs_per_block(struct inode *inode) +{ + return ALIGN_DOWN(DEF_ADDRS_PER_BLOCK, 1); } static inline void *inline_xattr_addr(struct inode *inode, struct page *page) @@ -2438,7 +2663,9 @@ static inline void *inline_xattr_addr(struct inode *inode, struct page *page) static inline int inline_xattr_size(struct inode *inode) { - return get_inline_xattr_addrs(inode) * sizeof(__le32); + if (f2fs_has_inline_xattr(inode)) + return get_inline_xattr_addrs(inode) * sizeof(__le32); + return 0; } static inline int f2fs_has_inline_data(struct inode *inode) @@ -2573,33 +2800,47 @@ static inline bool is_dot_dotdot(const struct qstr *str) static inline bool f2fs_may_extent_tree(struct inode *inode) { - if (!test_opt(F2FS_I_SB(inode), EXTENT_CACHE) || + struct f2fs_sb_info *sbi = F2FS_I_SB(inode); + + if (!test_opt(sbi, EXTENT_CACHE) || is_inode_flag_set(inode, FI_NO_EXTENT)) return false; + /* + * for recovered files during mount do not create extents + * if shrinker is not registered. + */ + if (list_empty(&sbi->s_list)) + return false; + return S_ISREG(inode->i_mode); } +static inline void *kvmalloc(size_t size, gfp_t flags) +{ + void *ret; + + ret = kmalloc(size, flags | __GFP_NOWARN); + if (!ret) + ret = __vmalloc(size, flags, PAGE_KERNEL); + return ret; +} + static inline void *f2fs_kmalloc(struct f2fs_sb_info *sbi, size_t size, gfp_t flags) { -#ifdef CONFIG_F2FS_FAULT_INJECTION + void *ret; + if (time_to_inject(sbi, FAULT_KMALLOC)) { f2fs_show_injection_info(FAULT_KMALLOC); return NULL; } -#endif - return kmalloc(size, flags); -} -static inline void *kvmalloc(size_t size, gfp_t flags) -{ - void *ret; + ret = kmalloc(size, flags); + if (ret) + return ret; - ret = kmalloc(size, flags | __GFP_NOWARN); - if (!ret) - ret = __vmalloc(size, flags, PAGE_KERNEL); - return ret; + return kvmalloc(size, flags); } static inline void *kvzalloc(size_t size, gfp_t flags) @@ -2621,12 +2862,11 @@ static inline void *f2fs_kzalloc(struct f2fs_sb_info *sbi, static inline void *f2fs_kvmalloc(struct f2fs_sb_info *sbi, size_t size, gfp_t flags) { -#ifdef CONFIG_F2FS_FAULT_INJECTION if (time_to_inject(sbi, FAULT_KVMALLOC)) { f2fs_show_injection_info(FAULT_KVMALLOC); return NULL; } -#endif + return kvmalloc(size, flags); } @@ -2646,7 +2886,7 @@ static inline int get_inline_xattr_addrs(struct inode *inode) return F2FS_I(inode)->i_inline_xattr_size; } -#define get_inode_mode(i) \ +#define f2fs_get_inode_mode(i) \ ((is_inode_flag_set(i, FI_ACL_MODE)) ? \ (F2FS_I(i)->i_acl_mode) : ((i)->i_mode)) @@ -2656,9 +2896,9 @@ static inline int get_inline_xattr_addrs(struct inode *inode) #define F2FS_OLD_ATTRIBUTE_SIZE (offsetof(struct f2fs_inode, i_addr)) #define F2FS_FITS_IN_INODE(f2fs_inode, extra_isize, field) \ - ((offsetof(typeof(*f2fs_inode), field) + \ + ((offsetof(typeof(*(f2fs_inode)), field) + \ sizeof((f2fs_inode)->field)) \ - <= (F2FS_OLD_ATTRIBUTE_SIZE + extra_isize)) \ + <= (F2FS_OLD_ATTRIBUTE_SIZE + (extra_isize))) \ static inline void f2fs_reset_iostat(struct f2fs_sb_info *sbi) { @@ -2685,18 +2925,62 @@ static inline void f2fs_update_iostat(struct f2fs_sb_info *sbi, spin_unlock(&sbi->iostat_lock); } +#define __is_large_section(sbi) ((sbi)->segs_per_sec > 1) + +#define __is_meta_io(fio) (PAGE_TYPE_OF_BIO((fio)->type) == META) + +bool f2fs_is_valid_blkaddr(struct f2fs_sb_info *sbi, + block_t blkaddr, int type); +static inline void verify_blkaddr(struct f2fs_sb_info *sbi, + block_t blkaddr, int type) +{ + if (!f2fs_is_valid_blkaddr(sbi, blkaddr, type)) { + f2fs_err(sbi, "invalid blkaddr: %u, type: %d, run fsck to fix.", + blkaddr, type); + f2fs_bug_on(sbi, 1); + } +} + +static inline bool __is_valid_data_blkaddr(block_t blkaddr) +{ + if (blkaddr == NEW_ADDR || blkaddr == NULL_ADDR) + return false; + return true; +} + +static inline void f2fs_set_page_private(struct page *page, + unsigned long data) +{ + if (PagePrivate(page)) + return; + + get_page(page); + SetPagePrivate(page); + set_page_private(page, data); +} + +static inline void f2fs_clear_page_private(struct page *page) +{ + if (!PagePrivate(page)) + return; + + set_page_private(page, 0); + ClearPagePrivate(page); + f2fs_put_page(page, 0); +} + /* * file.c */ int f2fs_sync_file(struct file *file, loff_t start, loff_t end, int datasync); -void truncate_data_blocks(struct dnode_of_data *dn); -int truncate_blocks(struct inode *inode, u64 from, bool lock); +void f2fs_truncate_data_blocks(struct dnode_of_data *dn); +int f2fs_truncate_blocks(struct inode *inode, u64 from, bool lock); int f2fs_truncate(struct inode *inode); int f2fs_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat *stat); int f2fs_setattr(struct dentry *dentry, struct iattr *attr); -int truncate_hole(struct inode *inode, pgoff_t pg_start, pgoff_t pg_end); -void truncate_data_blocks_range(struct dnode_of_data *dn, int count); +int f2fs_truncate_hole(struct inode *inode, pgoff_t pg_start, pgoff_t pg_end); +void f2fs_truncate_data_blocks_range(struct dnode_of_data *dn, int count); int f2fs_precache_extents(struct inode *inode); long f2fs_ioctl(struct file *filp, unsigned int cmd, unsigned long arg); long f2fs_compat_ioctl(struct file *file, unsigned int cmd, unsigned long arg); @@ -2710,38 +2994,37 @@ bool f2fs_inode_chksum_verify(struct f2fs_sb_info *sbi, struct page *page); void f2fs_inode_chksum_set(struct f2fs_sb_info *sbi, struct page *page); struct inode *f2fs_iget(struct super_block *sb, unsigned long ino); struct inode *f2fs_iget_retry(struct super_block *sb, unsigned long ino); -int try_to_free_nats(struct f2fs_sb_info *sbi, int nr_shrink); -void update_inode(struct inode *inode, struct page *node_page); -void update_inode_page(struct inode *inode); +int f2fs_try_to_free_nats(struct f2fs_sb_info *sbi, int nr_shrink); +void f2fs_update_inode(struct inode *inode, struct page *node_page); +void f2fs_update_inode_page(struct inode *inode); int f2fs_write_inode(struct inode *inode, struct writeback_control *wbc); void f2fs_evict_inode(struct inode *inode); -void handle_failed_inode(struct inode *inode); +void f2fs_handle_failed_inode(struct inode *inode); /* * namei.c */ -int update_extension_list(struct f2fs_sb_info *sbi, const char *name, +int f2fs_update_extension_list(struct f2fs_sb_info *sbi, const char *name, bool hot, bool set); struct dentry *f2fs_get_parent(struct dentry *child); /* * dir.c */ -void set_de_type(struct f2fs_dir_entry *de, umode_t mode); -unsigned char get_de_type(struct f2fs_dir_entry *de); -struct f2fs_dir_entry *find_target_dentry(struct fscrypt_name *fname, +unsigned char f2fs_get_de_type(struct f2fs_dir_entry *de); +struct f2fs_dir_entry *f2fs_find_target_dentry(struct fscrypt_name *fname, f2fs_hash_t namehash, int *max_slots, struct f2fs_dentry_ptr *d); int f2fs_fill_dentries(struct dir_context *ctx, struct f2fs_dentry_ptr *d, unsigned int start_pos, struct fscrypt_str *fstr); -void do_make_empty_dir(struct inode *inode, struct inode *parent, +void f2fs_do_make_empty_dir(struct inode *inode, struct inode *parent, struct f2fs_dentry_ptr *d); -struct page *init_inode_metadata(struct inode *inode, struct inode *dir, +struct page *f2fs_init_inode_metadata(struct inode *inode, struct inode *dir, const struct qstr *new_name, const struct qstr *orig_name, struct page *dpage); -void update_parent_metadata(struct inode *dir, struct inode *inode, +void f2fs_update_parent_metadata(struct inode *dir, struct inode *inode, unsigned int current_depth); -int room_for_filename(const void *bitmap, int slots, int max_slots); +int f2fs_room_for_filename(const void *bitmap, int slots, int max_slots); void f2fs_drop_nlink(struct inode *dir, struct inode *inode); struct f2fs_dir_entry *__f2fs_find_entry(struct inode *dir, struct fscrypt_name *fname, struct page **res_page); @@ -2758,9 +3041,9 @@ void f2fs_update_dentry(nid_t ino, umode_t mode, struct f2fs_dentry_ptr *d, int f2fs_add_regular_entry(struct inode *dir, const struct qstr *new_name, const struct qstr *orig_name, struct inode *inode, nid_t ino, umode_t mode); -int __f2fs_do_add_link(struct inode *dir, struct fscrypt_name *fname, +int f2fs_add_dentry(struct inode *dir, struct fscrypt_name *fname, struct inode *inode, nid_t ino, umode_t mode); -int __f2fs_add_link(struct inode *dir, const struct qstr *name, +int f2fs_do_add_link(struct inode *dir, const struct qstr *name, struct inode *inode, nid_t ino, umode_t mode); void f2fs_delete_entry(struct f2fs_dir_entry *dentry, struct page *page, struct inode *dir, struct inode *inode); @@ -2769,7 +3052,7 @@ bool f2fs_empty_dir(struct inode *dir); static inline int f2fs_add_link(struct dentry *dentry, struct inode *inode) { - return __f2fs_add_link(d_inode(dentry->d_parent), &dentry->d_name, + return f2fs_do_add_link(d_inode(dentry->d_parent), &dentry->d_name, inode, inode->i_ino, inode->i_mode); } @@ -2779,12 +3062,11 @@ static inline int f2fs_add_link(struct dentry *dentry, struct inode *inode) int f2fs_inode_dirtied(struct inode *inode, bool sync); void f2fs_inode_synced(struct inode *inode); int f2fs_enable_quota_files(struct f2fs_sb_info *sbi, bool rdonly); +int f2fs_quota_sync(struct super_block *sb, int type); void f2fs_quota_off_umount(struct super_block *sb); int f2fs_commit_super(struct f2fs_sb_info *sbi, bool recover); int f2fs_sync_fs(struct super_block *sb, int sync); -extern __printf(3, 4) -void f2fs_msg(struct super_block *sb, const char *level, const char *fmt, ...); -int sanity_check_ckpt(struct f2fs_sb_info *sbi); +int f2fs_sanity_check_ckpt(struct f2fs_sb_info *sbi); /* * hash.c @@ -2798,138 +3080,161 @@ f2fs_hash_t f2fs_dentry_hash(const struct qstr *name_info, struct dnode_of_data; struct node_info; -bool available_free_memory(struct f2fs_sb_info *sbi, int type); -int need_dentry_mark(struct f2fs_sb_info *sbi, nid_t nid); -bool is_checkpointed_node(struct f2fs_sb_info *sbi, nid_t nid); -bool need_inode_block_update(struct f2fs_sb_info *sbi, nid_t ino); -void get_node_info(struct f2fs_sb_info *sbi, nid_t nid, struct node_info *ni); -pgoff_t get_next_page_offset(struct dnode_of_data *dn, pgoff_t pgofs); -int get_dnode_of_data(struct dnode_of_data *dn, pgoff_t index, int mode); -int truncate_inode_blocks(struct inode *inode, pgoff_t from); -int truncate_xattr_node(struct inode *inode); -int wait_on_node_pages_writeback(struct f2fs_sb_info *sbi, nid_t ino); -int remove_inode_page(struct inode *inode); -struct page *new_inode_page(struct inode *inode); -struct page *new_node_page(struct dnode_of_data *dn, unsigned int ofs); -void ra_node_page(struct f2fs_sb_info *sbi, nid_t nid); -struct page *get_node_page(struct f2fs_sb_info *sbi, pgoff_t nid); -struct page *get_node_page_ra(struct page *parent, int start); -void move_node_page(struct page *node_page, int gc_type); -int fsync_node_pages(struct f2fs_sb_info *sbi, struct inode *inode, - struct writeback_control *wbc, bool atomic); -int sync_node_pages(struct f2fs_sb_info *sbi, struct writeback_control *wbc, +int f2fs_check_nid_range(struct f2fs_sb_info *sbi, nid_t nid); +bool f2fs_available_free_memory(struct f2fs_sb_info *sbi, int type); +bool f2fs_in_warm_node_list(struct f2fs_sb_info *sbi, struct page *page); +void f2fs_init_fsync_node_info(struct f2fs_sb_info *sbi); +void f2fs_del_fsync_node_entry(struct f2fs_sb_info *sbi, struct page *page); +void f2fs_reset_fsync_node_info(struct f2fs_sb_info *sbi); +int f2fs_need_dentry_mark(struct f2fs_sb_info *sbi, nid_t nid); +bool f2fs_is_checkpointed_node(struct f2fs_sb_info *sbi, nid_t nid); +bool f2fs_need_inode_block_update(struct f2fs_sb_info *sbi, nid_t ino); +int f2fs_get_node_info(struct f2fs_sb_info *sbi, nid_t nid, + struct node_info *ni); +pgoff_t f2fs_get_next_page_offset(struct dnode_of_data *dn, pgoff_t pgofs); +int f2fs_get_dnode_of_data(struct dnode_of_data *dn, pgoff_t index, int mode); +int f2fs_truncate_inode_blocks(struct inode *inode, pgoff_t from); +int f2fs_truncate_xattr_node(struct inode *inode); +int f2fs_wait_on_node_pages_writeback(struct f2fs_sb_info *sbi, + unsigned int seq_id); +int f2fs_remove_inode_page(struct inode *inode); +struct page *f2fs_new_inode_page(struct inode *inode); +struct page *f2fs_new_node_page(struct dnode_of_data *dn, unsigned int ofs); +void f2fs_ra_node_page(struct f2fs_sb_info *sbi, nid_t nid); +struct page *f2fs_get_node_page(struct f2fs_sb_info *sbi, pgoff_t nid); +struct page *f2fs_get_node_page_ra(struct page *parent, int start); +int f2fs_move_node_page(struct page *node_page, int gc_type); +int f2fs_fsync_node_pages(struct f2fs_sb_info *sbi, struct inode *inode, + struct writeback_control *wbc, bool atomic, + unsigned int *seq_id); +int f2fs_sync_node_pages(struct f2fs_sb_info *sbi, + struct writeback_control *wbc, bool do_balance, enum iostat_type io_type); -void build_free_nids(struct f2fs_sb_info *sbi, bool sync, bool mount); -bool alloc_nid(struct f2fs_sb_info *sbi, nid_t *nid); -void alloc_nid_done(struct f2fs_sb_info *sbi, nid_t nid); -void alloc_nid_failed(struct f2fs_sb_info *sbi, nid_t nid); -int try_to_free_nids(struct f2fs_sb_info *sbi, int nr_shrink); -void recover_inline_xattr(struct inode *inode, struct page *page); -int recover_xattr_data(struct inode *inode, struct page *page); -int recover_inode_page(struct f2fs_sb_info *sbi, struct page *page); -void restore_node_summary(struct f2fs_sb_info *sbi, +int f2fs_build_free_nids(struct f2fs_sb_info *sbi, bool sync, bool mount); +bool f2fs_alloc_nid(struct f2fs_sb_info *sbi, nid_t *nid); +void f2fs_alloc_nid_done(struct f2fs_sb_info *sbi, nid_t nid); +void f2fs_alloc_nid_failed(struct f2fs_sb_info *sbi, nid_t nid); +int f2fs_try_to_free_nids(struct f2fs_sb_info *sbi, int nr_shrink); +void f2fs_recover_inline_xattr(struct inode *inode, struct page *page); +int f2fs_recover_xattr_data(struct inode *inode, struct page *page); +int f2fs_recover_inode_page(struct f2fs_sb_info *sbi, struct page *page); +int f2fs_restore_node_summary(struct f2fs_sb_info *sbi, unsigned int segno, struct f2fs_summary_block *sum); -void flush_nat_entries(struct f2fs_sb_info *sbi, struct cp_control *cpc); -int build_node_manager(struct f2fs_sb_info *sbi); -void destroy_node_manager(struct f2fs_sb_info *sbi); -int __init create_node_manager_caches(void); -void destroy_node_manager_caches(void); +int f2fs_flush_nat_entries(struct f2fs_sb_info *sbi, struct cp_control *cpc); +int f2fs_build_node_manager(struct f2fs_sb_info *sbi); +void f2fs_destroy_node_manager(struct f2fs_sb_info *sbi); +int __init f2fs_create_node_manager_caches(void); +void f2fs_destroy_node_manager_caches(void); /* * segment.c */ -bool need_SSR(struct f2fs_sb_info *sbi); -void register_inmem_page(struct inode *inode, struct page *page); -void drop_inmem_pages_all(struct f2fs_sb_info *sbi); -void drop_inmem_pages(struct inode *inode); -void drop_inmem_page(struct inode *inode, struct page *page); -int commit_inmem_pages(struct inode *inode); +bool f2fs_need_SSR(struct f2fs_sb_info *sbi); +void f2fs_register_inmem_page(struct inode *inode, struct page *page); +void f2fs_drop_inmem_pages_all(struct f2fs_sb_info *sbi, bool gc_failure); +void f2fs_drop_inmem_pages(struct inode *inode); +void f2fs_drop_inmem_page(struct inode *inode, struct page *page); +int f2fs_commit_inmem_pages(struct inode *inode); void f2fs_balance_fs(struct f2fs_sb_info *sbi, bool need); void f2fs_balance_fs_bg(struct f2fs_sb_info *sbi); int f2fs_issue_flush(struct f2fs_sb_info *sbi, nid_t ino); -int create_flush_cmd_control(struct f2fs_sb_info *sbi); +int f2fs_create_flush_cmd_control(struct f2fs_sb_info *sbi); int f2fs_flush_device_cache(struct f2fs_sb_info *sbi); -void destroy_flush_cmd_control(struct f2fs_sb_info *sbi, bool free); -void invalidate_blocks(struct f2fs_sb_info *sbi, block_t addr); -bool is_checkpointed_data(struct f2fs_sb_info *sbi, block_t blkaddr); -void drop_discard_cmd(struct f2fs_sb_info *sbi); -void stop_discard_thread(struct f2fs_sb_info *sbi); -bool f2fs_wait_discard_bios(struct f2fs_sb_info *sbi); -void clear_prefree_segments(struct f2fs_sb_info *sbi, struct cp_control *cpc); -void release_discard_addrs(struct f2fs_sb_info *sbi); -int npages_for_summary_flush(struct f2fs_sb_info *sbi, bool for_ra); -void allocate_new_segments(struct f2fs_sb_info *sbi); +void f2fs_destroy_flush_cmd_control(struct f2fs_sb_info *sbi, bool free); +void f2fs_invalidate_blocks(struct f2fs_sb_info *sbi, block_t addr); +bool f2fs_is_checkpointed_data(struct f2fs_sb_info *sbi, block_t blkaddr); +void f2fs_drop_discard_cmd(struct f2fs_sb_info *sbi); +void f2fs_stop_discard_thread(struct f2fs_sb_info *sbi); +bool f2fs_issue_discard_timeout(struct f2fs_sb_info *sbi); +void f2fs_clear_prefree_segments(struct f2fs_sb_info *sbi, + struct cp_control *cpc); +void f2fs_dirty_to_prefree(struct f2fs_sb_info *sbi); +block_t f2fs_get_unusable_blocks(struct f2fs_sb_info *sbi); +int f2fs_disable_cp_again(struct f2fs_sb_info *sbi, block_t unusable); +void f2fs_release_discard_addrs(struct f2fs_sb_info *sbi); +int f2fs_npages_for_summary_flush(struct f2fs_sb_info *sbi, bool for_ra); +void allocate_segment_for_resize(struct f2fs_sb_info *sbi, int type, + unsigned int start, unsigned int end); +void f2fs_allocate_new_segments(struct f2fs_sb_info *sbi); int f2fs_trim_fs(struct f2fs_sb_info *sbi, struct fstrim_range *range); -bool exist_trim_candidates(struct f2fs_sb_info *sbi, struct cp_control *cpc); -struct page *get_sum_page(struct f2fs_sb_info *sbi, unsigned int segno); -void update_meta_page(struct f2fs_sb_info *sbi, void *src, block_t blk_addr); -void write_meta_page(struct f2fs_sb_info *sbi, struct page *page, +bool f2fs_exist_trim_candidates(struct f2fs_sb_info *sbi, + struct cp_control *cpc); +struct page *f2fs_get_sum_page(struct f2fs_sb_info *sbi, unsigned int segno); +void f2fs_update_meta_page(struct f2fs_sb_info *sbi, void *src, + block_t blk_addr); +void f2fs_do_write_meta_page(struct f2fs_sb_info *sbi, struct page *page, enum iostat_type io_type); -void write_node_page(unsigned int nid, struct f2fs_io_info *fio); -void write_data_page(struct dnode_of_data *dn, struct f2fs_io_info *fio); -int rewrite_data_page(struct f2fs_io_info *fio); -void __f2fs_replace_block(struct f2fs_sb_info *sbi, struct f2fs_summary *sum, +void f2fs_do_write_node_page(unsigned int nid, struct f2fs_io_info *fio); +void f2fs_outplace_write_data(struct dnode_of_data *dn, + struct f2fs_io_info *fio); +int f2fs_inplace_write_data(struct f2fs_io_info *fio); +void f2fs_do_replace_block(struct f2fs_sb_info *sbi, struct f2fs_summary *sum, block_t old_blkaddr, block_t new_blkaddr, bool recover_curseg, bool recover_newaddr); void f2fs_replace_block(struct f2fs_sb_info *sbi, struct dnode_of_data *dn, block_t old_addr, block_t new_addr, unsigned char version, bool recover_curseg, bool recover_newaddr); -void allocate_data_block(struct f2fs_sb_info *sbi, struct page *page, +void f2fs_allocate_data_block(struct f2fs_sb_info *sbi, struct page *page, block_t old_blkaddr, block_t *new_blkaddr, struct f2fs_summary *sum, int type, struct f2fs_io_info *fio, bool add_list); void f2fs_wait_on_page_writeback(struct page *page, - enum page_type type, bool ordered); -void f2fs_wait_on_block_writeback(struct f2fs_sb_info *sbi, block_t blkaddr); -void write_data_summaries(struct f2fs_sb_info *sbi, block_t start_blk); -void write_node_summaries(struct f2fs_sb_info *sbi, block_t start_blk); -int lookup_journal_in_cursum(struct f2fs_journal *journal, int type, + enum page_type type, bool ordered, bool locked); +void f2fs_wait_on_block_writeback(struct inode *inode, block_t blkaddr); +void f2fs_wait_on_block_writeback_range(struct inode *inode, block_t blkaddr, + block_t len); +void f2fs_write_data_summaries(struct f2fs_sb_info *sbi, block_t start_blk); +void f2fs_write_node_summaries(struct f2fs_sb_info *sbi, block_t start_blk); +int f2fs_lookup_journal_in_cursum(struct f2fs_journal *journal, int type, unsigned int val, int alloc); -void flush_sit_entries(struct f2fs_sb_info *sbi, struct cp_control *cpc); -int build_segment_manager(struct f2fs_sb_info *sbi); -void destroy_segment_manager(struct f2fs_sb_info *sbi); -int __init create_segment_manager_caches(void); -void destroy_segment_manager_caches(void); -int rw_hint_to_seg_type(enum rw_hint hint); -enum rw_hint io_type_to_rw_hint(struct f2fs_sb_info *sbi, enum page_type type, - enum temp_type temp); +void f2fs_flush_sit_entries(struct f2fs_sb_info *sbi, struct cp_control *cpc); +int f2fs_build_segment_manager(struct f2fs_sb_info *sbi); +void f2fs_destroy_segment_manager(struct f2fs_sb_info *sbi); +int __init f2fs_create_segment_manager_caches(void); +void f2fs_destroy_segment_manager_caches(void); +int f2fs_rw_hint_to_seg_type(enum rw_hint hint); +enum rw_hint f2fs_io_type_to_rw_hint(struct f2fs_sb_info *sbi, + enum page_type type, enum temp_type temp); /* * checkpoint.c */ void f2fs_stop_checkpoint(struct f2fs_sb_info *sbi, bool end_io); -struct page *grab_meta_page(struct f2fs_sb_info *sbi, pgoff_t index); -struct page *get_meta_page(struct f2fs_sb_info *sbi, pgoff_t index); -struct page *get_tmp_page(struct f2fs_sb_info *sbi, pgoff_t index); -bool is_valid_blkaddr(struct f2fs_sb_info *sbi, block_t blkaddr, int type); -int ra_meta_pages(struct f2fs_sb_info *sbi, block_t start, int nrpages, +struct page *f2fs_grab_meta_page(struct f2fs_sb_info *sbi, pgoff_t index); +struct page *f2fs_get_meta_page(struct f2fs_sb_info *sbi, pgoff_t index); +struct page *f2fs_get_meta_page_nofail(struct f2fs_sb_info *sbi, pgoff_t index); +struct page *f2fs_get_tmp_page(struct f2fs_sb_info *sbi, pgoff_t index); +bool f2fs_is_valid_blkaddr(struct f2fs_sb_info *sbi, + block_t blkaddr, int type); +int f2fs_ra_meta_pages(struct f2fs_sb_info *sbi, block_t start, int nrpages, int type, bool sync); -void ra_meta_pages_cond(struct f2fs_sb_info *sbi, pgoff_t index); -long sync_meta_pages(struct f2fs_sb_info *sbi, enum page_type type, +void f2fs_ra_meta_pages_cond(struct f2fs_sb_info *sbi, pgoff_t index); +long f2fs_sync_meta_pages(struct f2fs_sb_info *sbi, enum page_type type, long nr_to_write, enum iostat_type io_type); -void add_ino_entry(struct f2fs_sb_info *sbi, nid_t ino, int type); -void remove_ino_entry(struct f2fs_sb_info *sbi, nid_t ino, int type); -void release_ino_entry(struct f2fs_sb_info *sbi, bool all); -bool exist_written_data(struct f2fs_sb_info *sbi, nid_t ino, int mode); -void set_dirty_device(struct f2fs_sb_info *sbi, nid_t ino, +void f2fs_add_ino_entry(struct f2fs_sb_info *sbi, nid_t ino, int type); +void f2fs_remove_ino_entry(struct f2fs_sb_info *sbi, nid_t ino, int type); +void f2fs_release_ino_entry(struct f2fs_sb_info *sbi, bool all); +bool f2fs_exist_written_data(struct f2fs_sb_info *sbi, nid_t ino, int mode); +void f2fs_set_dirty_device(struct f2fs_sb_info *sbi, nid_t ino, unsigned int devidx, int type); -bool is_dirty_device(struct f2fs_sb_info *sbi, nid_t ino, +bool f2fs_is_dirty_device(struct f2fs_sb_info *sbi, nid_t ino, unsigned int devidx, int type); int f2fs_sync_inode_meta(struct f2fs_sb_info *sbi); -int acquire_orphan_inode(struct f2fs_sb_info *sbi); -void release_orphan_inode(struct f2fs_sb_info *sbi); -void add_orphan_inode(struct inode *inode); -void remove_orphan_inode(struct f2fs_sb_info *sbi, nid_t ino); -int recover_orphan_inodes(struct f2fs_sb_info *sbi); -int get_valid_checkpoint(struct f2fs_sb_info *sbi); -void update_dirty_page(struct inode *inode, struct page *page); -void remove_dirty_inode(struct inode *inode); -int sync_dirty_inodes(struct f2fs_sb_info *sbi, enum inode_type type); -int write_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc); -void init_ino_entry_info(struct f2fs_sb_info *sbi); -int __init create_checkpoint_caches(void); -void destroy_checkpoint_caches(void); +int f2fs_acquire_orphan_inode(struct f2fs_sb_info *sbi); +void f2fs_release_orphan_inode(struct f2fs_sb_info *sbi); +void f2fs_add_orphan_inode(struct inode *inode); +void f2fs_remove_orphan_inode(struct f2fs_sb_info *sbi, nid_t ino); +int f2fs_recover_orphan_inodes(struct f2fs_sb_info *sbi); +int f2fs_get_valid_checkpoint(struct f2fs_sb_info *sbi); +void f2fs_update_dirty_page(struct inode *inode, struct page *page); +void f2fs_remove_dirty_inode(struct inode *inode); +int f2fs_sync_dirty_inodes(struct f2fs_sb_info *sbi, enum inode_type type); +void f2fs_wait_on_all_pages_writeback(struct f2fs_sb_info *sbi); +int f2fs_write_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc); +void f2fs_init_ino_entry_info(struct f2fs_sb_info *sbi); +int __init f2fs_create_checkpoint_caches(void); +void f2fs_destroy_checkpoint_caches(void); /* * data.c @@ -2938,38 +3243,37 @@ int f2fs_init_post_read_processing(void); void f2fs_destroy_post_read_processing(void); void f2fs_submit_merged_write(struct f2fs_sb_info *sbi, enum page_type type); void f2fs_submit_merged_write_cond(struct f2fs_sb_info *sbi, - struct inode *inode, nid_t ino, pgoff_t idx, - enum page_type type); + struct inode *inode, struct page *page, + nid_t ino, enum page_type type); void f2fs_flush_merged_writes(struct f2fs_sb_info *sbi); int f2fs_submit_page_bio(struct f2fs_io_info *fio); -int f2fs_submit_page_write(struct f2fs_io_info *fio); +int f2fs_merge_page_bio(struct f2fs_io_info *fio); +void f2fs_submit_page_write(struct f2fs_io_info *fio); struct block_device *f2fs_target_device(struct f2fs_sb_info *sbi, block_t blk_addr, struct bio *bio); int f2fs_target_device_index(struct f2fs_sb_info *sbi, block_t blkaddr); -void set_data_blkaddr(struct dnode_of_data *dn); +void f2fs_set_data_blkaddr(struct dnode_of_data *dn); void f2fs_update_data_blkaddr(struct dnode_of_data *dn, block_t blkaddr); -int reserve_new_blocks(struct dnode_of_data *dn, blkcnt_t count); -int reserve_new_block(struct dnode_of_data *dn); +int f2fs_reserve_new_blocks(struct dnode_of_data *dn, blkcnt_t count); +int f2fs_reserve_new_block(struct dnode_of_data *dn); int f2fs_get_block(struct dnode_of_data *dn, pgoff_t index); int f2fs_preallocate_blocks(struct kiocb *iocb, struct iov_iter *from); int f2fs_reserve_block(struct dnode_of_data *dn, pgoff_t index); -struct page *get_read_data_page(struct inode *inode, pgoff_t index, +struct page *f2fs_get_read_data_page(struct inode *inode, pgoff_t index, int op_flags, bool for_write); -struct page *find_data_page(struct inode *inode, pgoff_t index); -struct page *get_lock_data_page(struct inode *inode, pgoff_t index, +struct page *f2fs_find_data_page(struct inode *inode, pgoff_t index); +struct page *f2fs_get_lock_data_page(struct inode *inode, pgoff_t index, bool for_write); -struct page *get_new_data_page(struct inode *inode, +struct page *f2fs_get_new_data_page(struct inode *inode, struct page *ipage, pgoff_t index, bool new_i_size); -int do_write_data_page(struct f2fs_io_info *fio); +int f2fs_do_write_data_page(struct f2fs_io_info *fio); +void __do_map_lock(struct f2fs_sb_info *sbi, int flag, bool lock); int f2fs_map_blocks(struct inode *inode, struct f2fs_map_blocks *map, int create, int flag); int f2fs_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, u64 start, u64 len); -bool should_update_inplace(struct inode *inode, struct f2fs_io_info *fio); -bool should_update_outplace(struct inode *inode, struct f2fs_io_info *fio); -int __f2fs_write_data_pages(struct address_space *mapping, - struct writeback_control *wbc, - enum iostat_type io_type); +bool f2fs_should_update_inplace(struct inode *inode, struct f2fs_io_info *fio); +bool f2fs_should_update_outplace(struct inode *inode, struct f2fs_io_info *fio); void f2fs_invalidate_page(struct page *page, unsigned int offset, unsigned int length); int f2fs_release_page(struct page *page, gfp_t wait); @@ -2978,22 +3282,24 @@ int f2fs_migrate_page(struct address_space *mapping, struct page *newpage, struct page *page, enum migrate_mode mode); #endif bool f2fs_overwrite_io(struct inode *inode, loff_t pos, size_t len); +void f2fs_clear_radix_tree_dirty_tag(struct page *page); /* * gc.c */ -int start_gc_thread(struct f2fs_sb_info *sbi); -void stop_gc_thread(struct f2fs_sb_info *sbi); -block_t start_bidx_of_node(unsigned int node_ofs, struct inode *inode); +int f2fs_start_gc_thread(struct f2fs_sb_info *sbi); +void f2fs_stop_gc_thread(struct f2fs_sb_info *sbi); +block_t f2fs_start_bidx_of_node(unsigned int node_ofs, struct inode *inode); int f2fs_gc(struct f2fs_sb_info *sbi, bool sync, bool background, unsigned int segno); -void build_gc_manager(struct f2fs_sb_info *sbi); +void f2fs_build_gc_manager(struct f2fs_sb_info *sbi); +int f2fs_resize_fs(struct f2fs_sb_info *sbi, __u64 block_count); /* * recovery.c */ -int recover_fsync_data(struct f2fs_sb_info *sbi, bool check_only); -bool space_for_roll_forward(struct f2fs_sb_info *sbi); +int f2fs_recover_fsync_data(struct f2fs_sb_info *sbi, bool check_only); +bool f2fs_space_for_roll_forward(struct f2fs_sb_info *sbi); /* * debug.c @@ -3015,6 +3321,9 @@ struct f2fs_stat_info { int free_nids, avail_nids, alloc_nids; int total_count, utilization; int bg_gc, nr_wb_cp_data, nr_wb_data; + int nr_rd_data, nr_rd_node, nr_rd_meta; + int nr_dio_read, nr_dio_write; + unsigned int io_skip_bggc, other_skip_bggc; int nr_flushing, nr_flushed, flush_list_empty; int nr_discarding, nr_discarded; int nr_discard_cmd; @@ -3031,10 +3340,12 @@ struct f2fs_stat_info { int bg_node_segs, bg_data_segs; int tot_blks, data_blks, node_blks; int bg_data_blks, bg_node_blks; + unsigned long long skipped_atomic_files[2]; int curseg[NR_CURSEG_TYPE]; int cursec[NR_CURSEG_TYPE]; int curzone[NR_CURSEG_TYPE]; + unsigned int meta_count[META_MAX]; unsigned int segment_count[2]; unsigned int block_count[2]; unsigned int inplace_count; @@ -3050,6 +3361,8 @@ static inline struct f2fs_stat_info *F2FS_STAT(struct f2fs_sb_info *sbi) #define stat_inc_bg_cp_count(si) ((si)->bg_cp_count++) #define stat_inc_call_count(si) ((si)->call_count++) #define stat_inc_bggc_count(sbi) ((sbi)->bg_gc++) +#define stat_io_skip_bggc_count(sbi) ((sbi)->io_skip_bggc++) +#define stat_other_skip_bggc_count(sbi) ((sbi)->other_skip_bggc++) #define stat_inc_dirty_inode(sbi, type) ((sbi)->ndirty_inode[type]++) #define stat_dec_dirty_inode(sbi, type) ((sbi)->ndirty_inode[type]--) #define stat_inc_total_hit(sbi) (atomic64_inc(&(sbi)->total_hit_ext)) @@ -3086,6 +3399,17 @@ static inline struct f2fs_stat_info *F2FS_STAT(struct f2fs_sb_info *sbi) if (f2fs_has_inline_dentry(inode)) \ (atomic_dec(&F2FS_I_SB(inode)->inline_dir)); \ } while (0) +#define stat_inc_meta_count(sbi, blkaddr) \ + do { \ + if (blkaddr < SIT_I(sbi)->sit_base_addr) \ + atomic_inc(&(sbi)->meta_count[META_CP]); \ + else if (blkaddr < NM_I(sbi)->nat_blkaddr) \ + atomic_inc(&(sbi)->meta_count[META_SIT]); \ + else if (blkaddr < SM_I(sbi)->ssa_blkaddr) \ + atomic_inc(&(sbi)->meta_count[META_NAT]); \ + else if (blkaddr < SM_I(sbi)->main_blkaddr) \ + atomic_inc(&(sbi)->meta_count[META_SSA]); \ + } while (0) #define stat_inc_seg_type(sbi, curseg) \ ((sbi)->segment_count[(curseg)->alloc_type]++) #define stat_inc_block_count(sbi, curseg) \ @@ -3148,13 +3472,15 @@ static inline struct f2fs_stat_info *F2FS_STAT(struct f2fs_sb_info *sbi) int f2fs_build_stats(struct f2fs_sb_info *sbi); void f2fs_destroy_stats(struct f2fs_sb_info *sbi); -int __init f2fs_create_root_stats(void); +void __init f2fs_create_root_stats(void); void f2fs_destroy_root_stats(void); #else #define stat_inc_cp_count(si) do { } while (0) #define stat_inc_bg_cp_count(si) do { } while (0) #define stat_inc_call_count(si) do { } while (0) #define stat_inc_bggc_count(si) do { } while (0) +#define stat_io_skip_bggc_count(sbi) do { } while (0) +#define stat_other_skip_bggc_count(sbi) do { } while (0) #define stat_inc_dirty_inode(sbi, type) do { } while (0) #define stat_dec_dirty_inode(sbi, type) do { } while (0) #define stat_inc_total_hit(sb) do { } while (0) @@ -3173,6 +3499,7 @@ void f2fs_destroy_root_stats(void); #define stat_inc_volatile_write(inode) do { } while (0) #define stat_dec_volatile_write(inode) do { } while (0) #define stat_update_max_volatile_write(inode) do { } while (0) +#define stat_inc_meta_count(sbi, blkaddr) do { } while (0) #define stat_inc_seg_type(sbi, curseg) do { } while (0) #define stat_inc_block_count(sbi, curseg) do { } while (0) #define stat_inc_inplace_blocks(sbi) do { } while (0) @@ -3183,7 +3510,7 @@ void f2fs_destroy_root_stats(void); static inline int f2fs_build_stats(struct f2fs_sb_info *sbi) { return 0; } static inline void f2fs_destroy_stats(struct f2fs_sb_info *sbi) { } -static inline int __init f2fs_create_root_stats(void) { return 0; } +static inline void __init f2fs_create_root_stats(void) { } static inline void f2fs_destroy_root_stats(void) { } #endif @@ -3197,29 +3524,31 @@ extern const struct inode_operations f2fs_dir_inode_operations; extern const struct inode_operations f2fs_symlink_inode_operations; extern const struct inode_operations f2fs_encrypted_symlink_inode_operations; extern const struct inode_operations f2fs_special_inode_operations; -extern struct kmem_cache *inode_entry_slab; +extern struct kmem_cache *f2fs_inode_entry_slab; /* * inline.c */ bool f2fs_may_inline_data(struct inode *inode); bool f2fs_may_inline_dentry(struct inode *inode); -void read_inline_data(struct page *page, struct page *ipage); -void truncate_inline_inode(struct inode *inode, struct page *ipage, u64 from); +void f2fs_do_read_inline_data(struct page *page, struct page *ipage); +void f2fs_truncate_inline_inode(struct inode *inode, + struct page *ipage, u64 from); int f2fs_read_inline_data(struct inode *inode, struct page *page); int f2fs_convert_inline_page(struct dnode_of_data *dn, struct page *page); int f2fs_convert_inline_inode(struct inode *inode); int f2fs_write_inline_data(struct inode *inode, struct page *page); -bool recover_inline_data(struct inode *inode, struct page *npage); -struct f2fs_dir_entry *find_in_inline_dir(struct inode *dir, +bool f2fs_recover_inline_data(struct inode *inode, struct page *npage); +struct f2fs_dir_entry *f2fs_find_in_inline_dir(struct inode *dir, struct fscrypt_name *fname, struct page **res_page); -int make_empty_inline_dir(struct inode *inode, struct inode *parent, +int f2fs_make_empty_inline_dir(struct inode *inode, struct inode *parent, struct page *ipage); int f2fs_add_inline_entry(struct inode *dir, const struct qstr *new_name, const struct qstr *orig_name, struct inode *inode, nid_t ino, umode_t mode); -void f2fs_delete_inline_entry(struct f2fs_dir_entry *dentry, struct page *page, - struct inode *dir, struct inode *inode); +void f2fs_delete_inline_entry(struct f2fs_dir_entry *dentry, + struct page *page, struct inode *dir, + struct inode *inode); bool f2fs_empty_inline_dir(struct inode *dir); int f2fs_read_inline_dir(struct file *file, struct dir_context *ctx, struct fscrypt_str *fstr); @@ -3240,17 +3569,17 @@ void f2fs_leave_shrinker(struct f2fs_sb_info *sbi); /* * extent_cache.c */ -struct rb_entry *__lookup_rb_tree(struct rb_root *root, +struct rb_entry *f2fs_lookup_rb_tree(struct rb_root *root, struct rb_entry *cached_re, unsigned int ofs); -struct rb_node **__lookup_rb_tree_for_insert(struct f2fs_sb_info *sbi, +struct rb_node **f2fs_lookup_rb_tree_for_insert(struct f2fs_sb_info *sbi, struct rb_root *root, struct rb_node **parent, unsigned int ofs); -struct rb_entry *__lookup_rb_tree_ret(struct rb_root *root, +struct rb_entry *f2fs_lookup_rb_tree_ret(struct rb_root *root, struct rb_entry *cached_re, unsigned int ofs, struct rb_entry **prev_entry, struct rb_entry **next_entry, struct rb_node ***insert_p, struct rb_node **insert_parent, bool force); -bool __check_rb_tree_consistence(struct f2fs_sb_info *sbi, +bool f2fs_check_rb_tree_consistence(struct f2fs_sb_info *sbi, struct rb_root *root); unsigned int f2fs_shrink_extent_tree(struct f2fs_sb_info *sbi, int nr_shrink); bool f2fs_init_extent_tree(struct inode *inode, struct f2fs_extent *i_ext); @@ -3262,9 +3591,9 @@ bool f2fs_lookup_extent_cache(struct inode *inode, pgoff_t pgofs, void f2fs_update_extent_cache(struct dnode_of_data *dn); void f2fs_update_extent_cache_range(struct dnode_of_data *dn, pgoff_t fofs, block_t blkaddr, unsigned int len); -void init_extent_cache_info(struct f2fs_sb_info *sbi); -int __init create_extent_cache(void); -void destroy_extent_cache(void); +void f2fs_init_extent_cache_info(struct f2fs_sb_info *sbi); +int __init f2fs_create_extent_cache(void); +void f2fs_destroy_extent_cache(void); /* * sysfs.c @@ -3291,7 +3620,7 @@ static inline void f2fs_set_encrypted_inode(struct inode *inode) { #ifdef CONFIG_F2FS_FS_ENCRYPTION file_set_encrypt(inode); - inode->i_flags |= S_ENCRYPTED; + f2fs_set_inode_flags(inode); #endif } @@ -3305,9 +3634,9 @@ static inline bool f2fs_post_read_required(struct inode *inode) } #define F2FS_FEATURE_FUNCS(name, flagname) \ -static inline int f2fs_sb_has_##name(struct super_block *sb) \ +static inline int f2fs_sb_has_##name(struct f2fs_sb_info *sbi) \ { \ - return F2FS_HAS_FEATURE(sb, F2FS_FEATURE_##flagname); \ + return F2FS_HAS_FEATURE(sbi, F2FS_FEATURE_##flagname); \ } F2FS_FEATURE_FUNCS(encrypt, ENCRYPT); @@ -3319,28 +3648,66 @@ F2FS_FEATURE_FUNCS(flexible_inline_xattr, FLEXIBLE_INLINE_XATTR); F2FS_FEATURE_FUNCS(quota_ino, QUOTA_INO); F2FS_FEATURE_FUNCS(inode_crtime, INODE_CRTIME); F2FS_FEATURE_FUNCS(lost_found, LOST_FOUND); +F2FS_FEATURE_FUNCS(sb_chksum, SB_CHKSUM); #ifdef CONFIG_BLK_DEV_ZONED -static inline int get_blkz_type(struct f2fs_sb_info *sbi, - struct block_device *bdev, block_t blkaddr) +static inline bool f2fs_blkz_is_seq(struct f2fs_sb_info *sbi, int devi, + block_t blkaddr) { unsigned int zno = blkaddr >> sbi->log_blocks_per_blkz; + + return test_bit(zno, FDEV(devi).blkz_seq); +} +#endif + +static inline bool f2fs_hw_should_discard(struct f2fs_sb_info *sbi) +{ + return f2fs_sb_has_blkzoned(sbi); +} + +static inline bool f2fs_bdev_support_discard(struct block_device *bdev) +{ + return blk_queue_discard(bdev_get_queue(bdev)) || +#ifdef CONFIG_BLK_DEV_ZONED + bdev_is_zoned(bdev); +#else + 0; +#endif +} + +static inline bool f2fs_hw_support_discard(struct f2fs_sb_info *sbi) +{ int i; + if (!f2fs_is_multi_device(sbi)) + return f2fs_bdev_support_discard(sbi->sb->s_bdev); + for (i = 0; i < sbi->s_ndevs; i++) - if (FDEV(i).bdev == bdev) - return FDEV(i).blkz_type[zno]; - return -EINVAL; + if (f2fs_bdev_support_discard(FDEV(i).bdev)) + return true; + return false; } -#endif -static inline bool f2fs_discard_en(struct f2fs_sb_info *sbi) +static inline bool f2fs_realtime_discard_enable(struct f2fs_sb_info *sbi) { - struct request_queue *q = bdev_get_queue(sbi->sb->s_bdev); + return (test_opt(sbi, DISCARD) && f2fs_hw_support_discard(sbi)) || + f2fs_hw_should_discard(sbi); +} + +static inline bool f2fs_hw_is_readonly(struct f2fs_sb_info *sbi) +{ + int i; - return blk_queue_discard(q) || f2fs_sb_has_blkzoned(sbi->sb); + if (!f2fs_is_multi_device(sbi)) + return bdev_read_only(sbi->sb->s_bdev); + + for (i = 0; i < sbi->s_ndevs; i++) + if (bdev_read_only(FDEV(i).bdev)) + return true; + return false; } + static inline void set_opt_mode(struct f2fs_sb_info *sbi, unsigned int mt) { clear_opt(sbi, ADAPTIVE); @@ -3363,15 +3730,78 @@ static inline bool f2fs_may_encrypt(struct inode *inode) return (S_ISREG(mode) || S_ISDIR(mode) || S_ISLNK(mode)); #else - return 0; + return false; #endif } -static inline bool f2fs_force_buffered_io(struct inode *inode, int rw) +static inline int block_unaligned_IO(struct inode *inode, + struct kiocb *iocb, struct iov_iter *iter) +{ + unsigned int i_blkbits = READ_ONCE(inode->i_blkbits); + unsigned int blocksize_mask = (1 << i_blkbits) - 1; + loff_t offset = iocb->ki_pos; + unsigned long align = offset | iov_iter_alignment(iter); + + return align & blocksize_mask; +} + +static inline int allow_outplace_dio(struct inode *inode, + struct kiocb *iocb, struct iov_iter *iter) { - return (f2fs_post_read_required(inode) || - (rw == WRITE && test_opt(F2FS_I_SB(inode), LFS)) || - F2FS_I_SB(inode)->s_ndevs); + struct f2fs_sb_info *sbi = F2FS_I_SB(inode); + int rw = iov_iter_rw(iter); + + return (test_opt(sbi, LFS) && (rw == WRITE) && + !block_unaligned_IO(inode, iocb, iter)); } +static inline bool f2fs_force_buffered_io(struct inode *inode, + struct kiocb *iocb, struct iov_iter *iter) +{ + struct f2fs_sb_info *sbi = F2FS_I_SB(inode); + int rw = iov_iter_rw(iter); + + if (f2fs_post_read_required(inode)) + return true; + if (f2fs_is_multi_device(sbi)) + return true; + /* + * for blkzoned device, fallback direct IO to buffered IO, so + * all IOs can be serialized by log-structured write. + */ + if (f2fs_sb_has_blkzoned(sbi)) + return true; + if (test_opt(sbi, LFS) && (rw == WRITE) && + block_unaligned_IO(inode, iocb, iter)) + return true; + if (is_sbi_flag_set(F2FS_I_SB(inode), SBI_CP_DISABLED) && + !(inode->i_flags & S_SWAPFILE)) + return true; + + return false; +} + +#ifdef CONFIG_F2FS_FAULT_INJECTION +extern void f2fs_build_fault_attr(struct f2fs_sb_info *sbi, unsigned int rate, + unsigned int type); +#else +#define f2fs_build_fault_attr(sbi, rate, type) do { } while (0) +#endif + +static inline bool is_journalled_quota(struct f2fs_sb_info *sbi) +{ +#ifdef CONFIG_QUOTA + if (f2fs_sb_has_quota_ino(sbi)) + return true; + if (F2FS_OPTION(sbi).s_qf_names[USRQUOTA] || + F2FS_OPTION(sbi).s_qf_names[GRPQUOTA] || + F2FS_OPTION(sbi).s_qf_names[PRJQUOTA]) + return true; #endif + return false; +} + +#define EFSBADCRC EBADMSG /* Bad CRC detected */ +#define EFSCORRUPTED EUCLEAN /* Filesystem is corrupted */ + +#endif /* _LINUX_F2FS_H */ diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c index eadd80ca1f27..aa8a31be2eb2 100644 --- a/fs/f2fs/file.c +++ b/fs/f2fs/file.c @@ -1,12 +1,9 @@ +// SPDX-License-Identifier: GPL-2.0 /* * fs/f2fs/file.c * * Copyright (c) 2012 Samsung Electronics Co., Ltd. * http://www.samsung.com/ - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. */ #include <linux/fs.h> #include <linux/f2fs_fs.h> @@ -44,6 +41,8 @@ static int f2fs_filemap_fault(struct vm_area_struct *vma, err = filemap_fault(vma, vmf); up_read(&F2FS_I(inode)->i_mmap_sem); + trace_f2fs_filemap_fault(inode, vmf->pgoff, (unsigned long)err); + return err; } @@ -53,7 +52,7 @@ static int f2fs_vm_page_mkwrite(struct vm_area_struct *vma, struct page *page = vmf->page; struct inode *inode = file_inode(vma->vm_file); struct f2fs_sb_info *sbi = F2FS_I_SB(inode); - struct dnode_of_data dn; + struct dnode_of_data dn = { .node_changed = false }; int err; if (unlikely(f2fs_cp_error(sbi))) { @@ -65,19 +64,6 @@ static int f2fs_vm_page_mkwrite(struct vm_area_struct *vma, f2fs_bug_on(sbi, f2fs_has_inline_data(inode)); - /* block allocation */ - f2fs_lock_op(sbi); - set_new_dnode(&dn, inode, NULL, NULL, 0); - err = f2fs_reserve_block(&dn, page->index); - if (err) { - f2fs_unlock_op(sbi); - goto out; - } - f2fs_put_dnode(&dn); - f2fs_unlock_op(sbi); - - f2fs_balance_fs(sbi, dn.node_changed); - file_update_time(vma->vm_file); down_read(&F2FS_I(inode)->i_mmap_sem); lock_page(page); @@ -89,16 +75,34 @@ static int f2fs_vm_page_mkwrite(struct vm_area_struct *vma, goto out_sem; } + /* block allocation */ + __do_map_lock(sbi, F2FS_GET_BLOCK_PRE_AIO, true); + set_new_dnode(&dn, inode, NULL, NULL, 0); + err = f2fs_get_block(&dn, page->index); + f2fs_put_dnode(&dn); + __do_map_lock(sbi, F2FS_GET_BLOCK_PRE_AIO, false); + if (err) { + unlock_page(page); + goto out_sem; + } + + /* fill the page */ + f2fs_wait_on_page_writeback(page, DATA, false, true); + + /* wait for GCed page writeback via META_MAPPING */ + f2fs_wait_on_block_writeback(inode, dn.data_blkaddr); + /* * check to see if the page is mapped already (no holes) */ if (PageMappedToDisk(page)) - goto mapped; + goto out_sem; /* page is wholly or partially inside EOF */ if (((loff_t)(page->index + 1) << PAGE_SHIFT) > i_size_read(inode)) { - unsigned offset; + loff_t offset; + offset = i_size_read(inode) & ~PAGE_MASK; zero_user_segment(page, offset, PAGE_SIZE); } @@ -107,21 +111,15 @@ static int f2fs_vm_page_mkwrite(struct vm_area_struct *vma, SetPageUptodate(page); f2fs_update_iostat(sbi, APP_MAPPED_IO, F2FS_BLKSIZE); + f2fs_update_time(sbi, REQ_TIME); trace_f2fs_vm_page_mkwrite(page, DATA); -mapped: - /* fill the page */ - f2fs_wait_on_page_writeback(page, DATA, false); - - /* wait for GCed page writeback via META_MAPPING */ - if (f2fs_post_read_required(inode)) - f2fs_wait_on_block_writeback(sbi, dn.data_blkaddr); - out_sem: up_read(&F2FS_I(inode)->i_mmap_sem); -out: + + f2fs_balance_fs(sbi, dn.node_changed); + sb_end_pagefault(inode->i_sb); - f2fs_update_time(sbi, REQ_TIME); err: return block_page_mkwrite_return(err); } @@ -160,17 +158,18 @@ static inline enum cp_reason_type need_do_checkpoint(struct inode *inode) cp_reason = CP_SB_NEED_CP; else if (file_wrong_pino(inode)) cp_reason = CP_WRONG_PINO; - else if (!space_for_roll_forward(sbi)) + else if (!f2fs_space_for_roll_forward(sbi)) cp_reason = CP_NO_SPC_ROLL; - else if (!is_checkpointed_node(sbi, F2FS_I(inode)->i_pino)) + else if (!f2fs_is_checkpointed_node(sbi, F2FS_I(inode)->i_pino)) cp_reason = CP_NODE_NEED_CP; else if (test_opt(sbi, FASTBOOT)) cp_reason = CP_FASTBOOT_MODE; else if (F2FS_OPTION(sbi).active_logs == 2) cp_reason = CP_SPEC_LOG_NUM; else if (F2FS_OPTION(sbi).fsync_mode == FSYNC_MODE_STRICT && - need_dentry_mark(sbi, inode->i_ino) && - exist_written_data(sbi, F2FS_I(inode)->i_pino, TRANS_DIR_INO)) + f2fs_need_dentry_mark(sbi, inode->i_ino) && + f2fs_exist_written_data(sbi, F2FS_I(inode)->i_pino, + TRANS_DIR_INO)) cp_reason = CP_RECOVER_DIR; return cp_reason; @@ -181,7 +180,7 @@ static bool need_inode_page_update(struct f2fs_sb_info *sbi, nid_t ino) struct page *i = find_get_page(NODE_MAPPING(sbi), ino); bool ret = false; /* But we need to avoid that there are some inode updates */ - if ((i && PageDirty(i)) || need_inode_block_update(sbi, ino)) + if ((i && PageDirty(i)) || f2fs_need_inode_block_update(sbi, ino)) ret = true; f2fs_put_page(i, 0); return ret; @@ -214,8 +213,10 @@ static int f2fs_do_sync_file(struct file *file, loff_t start, loff_t end, .nr_to_write = LONG_MAX, .for_reclaim = 0, }; + unsigned int seq_id = 0; - if (unlikely(f2fs_readonly(inode->i_sb))) + if (unlikely(f2fs_readonly(inode->i_sb) || + is_sbi_flag_set(sbi, SBI_CP_DISABLED))) return 0; trace_f2fs_sync_file_enter(inode); @@ -244,14 +245,14 @@ static int f2fs_do_sync_file(struct file *file, loff_t start, loff_t end, * if there is no written data, don't waste time to write recovery info. */ if (!is_inode_flag_set(inode, FI_APPEND_WRITE) && - !exist_written_data(sbi, ino, APPEND_INO)) { + !f2fs_exist_written_data(sbi, ino, APPEND_INO)) { /* it may call write_inode just prior to fsync */ if (need_inode_page_update(sbi, ino)) goto go_write; if (is_inode_flag_set(inode, FI_UPDATE_WRITE) || - exist_written_data(sbi, ino, UPDATE_INO)) + f2fs_exist_written_data(sbi, ino, UPDATE_INO)) goto flush_out; goto out; } @@ -278,7 +279,9 @@ go_write: goto out; } sync_nodes: - ret = fsync_node_pages(sbi, inode, &wbc, atomic); + atomic_inc(&sbi->wb_sync_req[NODE]); + ret = f2fs_fsync_node_pages(sbi, inode, &wbc, atomic, &seq_id); + atomic_dec(&sbi->wb_sync_req[NODE]); if (ret) goto out; @@ -288,7 +291,7 @@ sync_nodes: goto out; } - if (need_inode_block_update(sbi, ino)) { + if (f2fs_need_inode_block_update(sbi, ino)) { f2fs_mark_inode_dirty_sync(inode, true); f2fs_write_inode(inode, NULL); goto sync_nodes; @@ -303,21 +306,21 @@ sync_nodes: * given fsync mark. */ if (!atomic) { - ret = wait_on_node_pages_writeback(sbi, ino); + ret = f2fs_wait_on_node_pages_writeback(sbi, seq_id); if (ret) goto out; } /* once recovery info is written, don't need to tack this */ - remove_ino_entry(sbi, ino, APPEND_INO); + f2fs_remove_ino_entry(sbi, ino, APPEND_INO); clear_inode_flag(inode, FI_APPEND_WRITE); flush_out: if (!atomic && F2FS_OPTION(sbi).fsync_mode != FSYNC_MODE_NOBARRIER) ret = f2fs_issue_flush(sbi, inode->i_ino); if (!ret) { - remove_ino_entry(sbi, ino, UPDATE_INO); + f2fs_remove_ino_entry(sbi, ino, UPDATE_INO); clear_inode_flag(inode, FI_UPDATE_WRITE); - remove_ino_entry(sbi, ino, FLUSH_INO); + f2fs_remove_ino_entry(sbi, ino, FLUSH_INO); } f2fs_update_time(sbi, REQ_TIME); out: @@ -336,28 +339,29 @@ int f2fs_sync_file(struct file *file, loff_t start, loff_t end, int datasync) static pgoff_t __get_first_dirty_index(struct address_space *mapping, pgoff_t pgofs, int whence) { - struct pagevec pvec; + struct page *page; int nr_pages; if (whence != SEEK_DATA) return 0; /* find first dirty page index */ - pagevec_init(&pvec, 0); - nr_pages = pagevec_lookup_tag(&pvec, mapping, &pgofs, - PAGECACHE_TAG_DIRTY, 1); - pgofs = nr_pages ? pvec.pages[0]->index : ULONG_MAX; - pagevec_release(&pvec); + nr_pages = find_get_pages_tag(mapping, &pgofs, PAGECACHE_TAG_DIRTY, + 1, &page); + if (!nr_pages) + return ULONG_MAX; + pgofs = page->index; + put_page(page); return pgofs; } -static bool __found_offset(block_t blkaddr, pgoff_t dirty, pgoff_t pgofs, - int whence) +static bool __found_offset(struct f2fs_sb_info *sbi, block_t blkaddr, + pgoff_t dirty, pgoff_t pgofs, int whence) { switch (whence) { case SEEK_DATA: if ((blkaddr == NEW_ADDR && dirty == pgofs) || - (blkaddr != NEW_ADDR && blkaddr != NULL_ADDR)) + __is_valid_data_blkaddr(blkaddr)) return true; break; case SEEK_HOLE: @@ -397,13 +401,13 @@ static loff_t f2fs_seek_block(struct file *file, loff_t offset, int whence) for (; data_ofs < isize; data_ofs = (loff_t)pgofs << PAGE_SHIFT) { set_new_dnode(&dn, inode, NULL, NULL, 0); - err = get_dnode_of_data(&dn, pgofs, LOOKUP_NODE); + err = f2fs_get_dnode_of_data(&dn, pgofs, LOOKUP_NODE); if (err && err != -ENOENT) { goto fail; } else if (err == -ENOENT) { /* direct node does not exists */ if (whence == SEEK_DATA) { - pgofs = get_next_page_offset(&dn, pgofs); + pgofs = f2fs_get_next_page_offset(&dn, pgofs); continue; } else { goto found; @@ -417,10 +421,19 @@ static loff_t f2fs_seek_block(struct file *file, loff_t offset, int whence) dn.ofs_in_node++, pgofs++, data_ofs = (loff_t)pgofs << PAGE_SHIFT) { block_t blkaddr; + blkaddr = datablock_addr(dn.inode, dn.node_page, dn.ofs_in_node); - if (__found_offset(blkaddr, dirty, pgofs, whence)) { + if (__is_valid_data_blkaddr(blkaddr) && + !f2fs_is_valid_blkaddr(F2FS_I_SB(inode), + blkaddr, DATA_GENERIC_ENHANCE)) { + f2fs_put_dnode(&dn); + goto fail; + } + + if (__found_offset(F2FS_I_SB(inode), blkaddr, dirty, + pgofs, whence)) { f2fs_put_dnode(&dn); goto found; } @@ -491,7 +504,7 @@ static int f2fs_file_open(struct inode *inode, struct file *filp) return dquot_file_open(inode, filp); } -void truncate_data_blocks_range(struct dnode_of_data *dn, int count) +void f2fs_truncate_data_blocks_range(struct dnode_of_data *dn, int count) { struct f2fs_sb_info *sbi = F2FS_I_SB(dn->inode); struct f2fs_node *raw_node; @@ -507,12 +520,19 @@ void truncate_data_blocks_range(struct dnode_of_data *dn, int count) for (; count > 0; count--, addr++, dn->ofs_in_node++) { block_t blkaddr = le32_to_cpu(*addr); + if (blkaddr == NULL_ADDR) continue; dn->data_blkaddr = NULL_ADDR; - set_data_blkaddr(dn); - invalidate_blocks(sbi, blkaddr); + f2fs_set_data_blkaddr(dn); + + if (__is_valid_data_blkaddr(blkaddr) && + !f2fs_is_valid_blkaddr(sbi, blkaddr, + DATA_GENERIC_ENHANCE)) + continue; + + f2fs_invalidate_blocks(sbi, blkaddr); if (dn->ofs_in_node == 0 && IS_INODE(dn->node_page)) clear_inode_flag(dn->inode, FI_FIRST_BLOCK_WRITTEN); nr_free++; @@ -524,7 +544,7 @@ void truncate_data_blocks_range(struct dnode_of_data *dn, int count) * once we invalidate valid blkaddr in range [ofs, ofs + count], * we will invalidate all blkaddr in the whole range. */ - fofs = start_bidx_of_node(ofs_of_node(dn->node_page), + fofs = f2fs_start_bidx_of_node(ofs_of_node(dn->node_page), dn->inode) + ofs; f2fs_update_extent_cache_range(dn, fofs, 0, len); dec_valid_block_count(sbi, dn->inode, nr_free); @@ -536,15 +556,15 @@ void truncate_data_blocks_range(struct dnode_of_data *dn, int count) dn->ofs_in_node, nr_free); } -void truncate_data_blocks(struct dnode_of_data *dn) +void f2fs_truncate_data_blocks(struct dnode_of_data *dn) { - truncate_data_blocks_range(dn, ADDRS_PER_BLOCK); + f2fs_truncate_data_blocks_range(dn, ADDRS_PER_BLOCK(dn->inode)); } static int truncate_partial_data_page(struct inode *inode, u64 from, bool cache_only) { - unsigned offset = from & (PAGE_SIZE - 1); + loff_t offset = from & (PAGE_SIZE - 1); pgoff_t index = from >> PAGE_SHIFT; struct address_space *mapping = inode->i_mapping; struct page *page; @@ -560,11 +580,11 @@ static int truncate_partial_data_page(struct inode *inode, u64 from, return 0; } - page = get_lock_data_page(inode, index, true); + page = f2fs_get_lock_data_page(inode, index, true); if (IS_ERR(page)) return PTR_ERR(page) == -ENOENT ? 0 : PTR_ERR(page); truncate_out: - f2fs_wait_on_page_writeback(page, DATA, true); + f2fs_wait_on_page_writeback(page, DATA, true, true); zero_user(page, offset, PAGE_SIZE - offset); /* An encrypted inode should have a key and truncate the last page. */ @@ -575,7 +595,7 @@ truncate_out: return 0; } -int truncate_blocks(struct inode *inode, u64 from, bool lock) +int f2fs_truncate_blocks(struct inode *inode, u64 from, bool lock) { struct f2fs_sb_info *sbi = F2FS_I_SB(inode); struct dnode_of_data dn; @@ -594,21 +614,21 @@ int truncate_blocks(struct inode *inode, u64 from, bool lock) if (lock) f2fs_lock_op(sbi); - ipage = get_node_page(sbi, inode->i_ino); + ipage = f2fs_get_node_page(sbi, inode->i_ino); if (IS_ERR(ipage)) { err = PTR_ERR(ipage); goto out; } if (f2fs_has_inline_data(inode)) { - truncate_inline_inode(inode, ipage, from); + f2fs_truncate_inline_inode(inode, ipage, from); f2fs_put_page(ipage, 1); truncate_page = true; goto out; } set_new_dnode(&dn, inode, ipage, NULL, 0); - err = get_dnode_of_data(&dn, free_from, LOOKUP_NODE_RA); + err = f2fs_get_dnode_of_data(&dn, free_from, LOOKUP_NODE_RA); if (err) { if (err == -ENOENT) goto free_next; @@ -621,13 +641,13 @@ int truncate_blocks(struct inode *inode, u64 from, bool lock) f2fs_bug_on(sbi, count < 0); if (dn.ofs_in_node || IS_INODE(dn.node_page)) { - truncate_data_blocks_range(&dn, count); + f2fs_truncate_data_blocks_range(&dn, count); free_from += count; } f2fs_put_dnode(&dn); free_next: - err = truncate_inode_blocks(inode, free_from); + err = f2fs_truncate_inode_blocks(inode, free_from); out: if (lock) f2fs_unlock_op(sbi); @@ -653,12 +673,11 @@ int f2fs_truncate(struct inode *inode) trace_f2fs_truncate(inode); -#ifdef CONFIG_F2FS_FAULT_INJECTION if (time_to_inject(F2FS_I_SB(inode), FAULT_TRUNCATE)) { f2fs_show_injection_info(FAULT_TRUNCATE); return -EIO; } -#endif + /* we should check inline_data size */ if (!f2fs_may_inline_data(inode)) { err = f2fs_convert_inline_inode(inode); @@ -666,7 +685,7 @@ int f2fs_truncate(struct inode *inode) return err; } - err = truncate_blocks(inode, i_size_read(inode), true); + err = f2fs_truncate_blocks(inode, i_size_read(inode), true); if (err) return err; @@ -685,27 +704,26 @@ int f2fs_getattr(struct vfsmount *mnt, unsigned int flags; if (f2fs_has_extra_attr(inode) && - f2fs_sb_has_inode_crtime(inode->i_sb) && + f2fs_sb_has_inode_crtime(F2FS_I_SB(inode)) && F2FS_FITS_IN_INODE(ri, fi->i_extra_isize, i_crtime)) { stat->result_mask |= STATX_BTIME; stat->btime.tv_sec = fi->i_crtime.tv_sec; stat->btime.tv_nsec = fi->i_crtime.tv_nsec; } - flags = fi->i_flags & (FS_FL_USER_VISIBLE | FS_PROJINHERIT_FL); - if (flags & FS_APPEND_FL) + flags = fi->i_flags; + if (flags & F2FS_APPEND_FL) stat->attributes |= STATX_ATTR_APPEND; - if (flags & FS_COMPR_FL) + if (flags & F2FS_COMPR_FL) stat->attributes |= STATX_ATTR_COMPRESSED; - if (f2fs_encrypted_inode(inode)) + if (IS_ENCRYPTED(inode)) stat->attributes |= STATX_ATTR_ENCRYPTED; - if (flags & FS_IMMUTABLE_FL) + if (flags & F2FS_IMMUTABLE_FL) stat->attributes |= STATX_ATTR_IMMUTABLE; - if (flags & FS_NODUMP_FL) + if (flags & F2FS_NODUMP_FL) stat->attributes |= STATX_ATTR_NODUMP; stat->attributes_mask |= (STATX_ATTR_APPEND | - STATX_ATTR_COMPRESSED | STATX_ATTR_ENCRYPTED | STATX_ATTR_IMMUTABLE | STATX_ATTR_NODUMP); @@ -755,7 +773,6 @@ int f2fs_setattr(struct dentry *dentry, struct iattr *attr) { struct inode *inode = d_inode(dentry); int err; - bool size_changed = false; if (unlikely(f2fs_cp_error(F2FS_I_SB(inode)))) return -EIO; @@ -777,28 +794,47 @@ int f2fs_setattr(struct dentry *dentry, struct iattr *attr) !uid_eq(attr->ia_uid, inode->i_uid)) || (attr->ia_valid & ATTR_GID && !gid_eq(attr->ia_gid, inode->i_gid))) { + f2fs_lock_op(F2FS_I_SB(inode)); err = dquot_transfer(inode, attr); - if (err) + if (err) { + set_sbi_flag(F2FS_I_SB(inode), + SBI_QUOTA_NEED_REPAIR); + f2fs_unlock_op(F2FS_I_SB(inode)); return err; + } + /* + * update uid/gid under lock_op(), so that dquot and inode can + * be updated atomically. + */ + if (attr->ia_valid & ATTR_UID) + inode->i_uid = attr->ia_uid; + if (attr->ia_valid & ATTR_GID) + inode->i_gid = attr->ia_gid; + f2fs_mark_inode_dirty_sync(inode, true); + f2fs_unlock_op(F2FS_I_SB(inode)); } if (attr->ia_valid & ATTR_SIZE) { - if (attr->ia_size <= i_size_read(inode)) { - down_write(&F2FS_I(inode)->i_mmap_sem); - truncate_setsize(inode, attr->ia_size); + bool to_smaller = (attr->ia_size <= i_size_read(inode)); + + down_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]); + down_write(&F2FS_I(inode)->i_mmap_sem); + + truncate_setsize(inode, attr->ia_size); + + if (to_smaller) err = f2fs_truncate(inode); - up_write(&F2FS_I(inode)->i_mmap_sem); - if (err) - return err; - } else { - /* - * do not trim all blocks after i_size if target size is - * larger than i_size. - */ - down_write(&F2FS_I(inode)->i_mmap_sem); - truncate_setsize(inode, attr->ia_size); - up_write(&F2FS_I(inode)->i_mmap_sem); + /* + * do not trim all blocks after i_size if target size is + * larger than i_size. + */ + up_write(&F2FS_I(inode)->i_mmap_sem); + up_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]); + if (err) + return err; + + if (!to_smaller) { /* should convert inline inode here */ if (!f2fs_may_inline_data(inode)) { err = f2fs_convert_inline_inode(inode); @@ -811,14 +847,12 @@ int f2fs_setattr(struct dentry *dentry, struct iattr *attr) down_write(&F2FS_I(inode)->i_sem); F2FS_I(inode)->last_disk_size = i_size_read(inode); up_write(&F2FS_I(inode)->i_sem); - - size_changed = true; } __setattr_copy(inode, attr); if (attr->ia_valid & ATTR_MODE) { - err = posix_acl_chmod(inode, get_inode_mode(inode)); + err = posix_acl_chmod(inode, f2fs_get_inode_mode(inode)); if (err || is_inode_flag_set(inode, FI_ACL_MODE)) { inode->i_mode = F2FS_I(inode)->i_acl_mode; clear_inode_flag(inode, FI_ACL_MODE); @@ -826,7 +860,7 @@ int f2fs_setattr(struct dentry *dentry, struct iattr *attr) } /* file size may changed here */ - f2fs_mark_inode_dirty_sync(inode, size_changed); + f2fs_mark_inode_dirty_sync(inode, true); /* inode change will produce dirty node pages flushed by checkpoint */ f2fs_balance_fs(F2FS_I_SB(inode), true); @@ -860,20 +894,20 @@ static int fill_zero(struct inode *inode, pgoff_t index, f2fs_balance_fs(sbi, true); f2fs_lock_op(sbi); - page = get_new_data_page(inode, NULL, index, false); + page = f2fs_get_new_data_page(inode, NULL, index, false); f2fs_unlock_op(sbi); if (IS_ERR(page)) return PTR_ERR(page); - f2fs_wait_on_page_writeback(page, DATA, true); + f2fs_wait_on_page_writeback(page, DATA, true, true); zero_user(page, start, len); set_page_dirty(page); f2fs_put_page(page, 1); return 0; } -int truncate_hole(struct inode *inode, pgoff_t pg_start, pgoff_t pg_end) +int f2fs_truncate_hole(struct inode *inode, pgoff_t pg_start, pgoff_t pg_end) { int err; @@ -882,10 +916,11 @@ int truncate_hole(struct inode *inode, pgoff_t pg_start, pgoff_t pg_end) pgoff_t end_offset, count; set_new_dnode(&dn, inode, NULL, NULL, 0); - err = get_dnode_of_data(&dn, pg_start, LOOKUP_NODE); + err = f2fs_get_dnode_of_data(&dn, pg_start, LOOKUP_NODE); if (err) { if (err == -ENOENT) { - pg_start = get_next_page_offset(&dn, pg_start); + pg_start = f2fs_get_next_page_offset(&dn, + pg_start); continue; } return err; @@ -896,7 +931,7 @@ int truncate_hole(struct inode *inode, pgoff_t pg_start, pgoff_t pg_end) f2fs_bug_on(F2FS_I_SB(inode), count == 0 || count > end_offset); - truncate_data_blocks_range(&dn, count); + f2fs_truncate_data_blocks_range(&dn, count); f2fs_put_dnode(&dn); pg_start += count; @@ -947,14 +982,19 @@ static int punch_hole(struct inode *inode, loff_t offset, loff_t len) blk_start = (loff_t)pg_start << PAGE_SHIFT; blk_end = (loff_t)pg_end << PAGE_SHIFT; + + down_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]); down_write(&F2FS_I(inode)->i_mmap_sem); + truncate_inode_pages_range(mapping, blk_start, blk_end - 1); f2fs_lock_op(sbi); - ret = truncate_hole(inode, pg_start, pg_end); + ret = f2fs_truncate_hole(inode, pg_start, pg_end); f2fs_unlock_op(sbi); + up_write(&F2FS_I(inode)->i_mmap_sem); + up_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]); } } @@ -970,13 +1010,14 @@ static int __read_out_blkaddrs(struct inode *inode, block_t *blkaddr, next_dnode: set_new_dnode(&dn, inode, NULL, NULL, 0); - ret = get_dnode_of_data(&dn, off, LOOKUP_NODE_RA); + ret = f2fs_get_dnode_of_data(&dn, off, LOOKUP_NODE_RA); if (ret && ret != -ENOENT) { return ret; } else if (ret == -ENOENT) { if (dn.max_level == 0) return -ENOENT; - done = min((pgoff_t)ADDRS_PER_BLOCK - dn.ofs_in_node, len); + done = min((pgoff_t)ADDRS_PER_BLOCK(inode) - dn.ofs_in_node, + len); blkaddr += done; do_replace += done; goto next; @@ -987,7 +1028,15 @@ next_dnode: for (i = 0; i < done; i++, blkaddr++, do_replace++, dn.ofs_in_node++) { *blkaddr = datablock_addr(dn.inode, dn.node_page, dn.ofs_in_node); - if (!is_checkpointed_data(sbi, *blkaddr)) { + + if (__is_valid_data_blkaddr(*blkaddr) && + !f2fs_is_valid_blkaddr(sbi, *blkaddr, + DATA_GENERIC_ENHANCE)) { + f2fs_put_dnode(&dn); + return -EFSCORRUPTED; + } + + if (!f2fs_is_checkpointed_data(sbi, *blkaddr)) { if (test_opt(sbi, LFS)) { f2fs_put_dnode(&dn); @@ -1020,10 +1069,10 @@ static int __roll_back_blkaddrs(struct inode *inode, block_t *blkaddr, continue; set_new_dnode(&dn, inode, NULL, NULL, 0); - ret = get_dnode_of_data(&dn, off + i, LOOKUP_NODE_RA); + ret = f2fs_get_dnode_of_data(&dn, off + i, LOOKUP_NODE_RA); if (ret) { dec_valid_block_count(sbi, inode, 1); - invalidate_blocks(sbi, *blkaddr); + f2fs_invalidate_blocks(sbi, *blkaddr); } else { f2fs_update_data_blkaddr(&dn, *blkaddr); } @@ -1053,18 +1102,23 @@ static int __clone_blkaddrs(struct inode *src_inode, struct inode *dst_inode, pgoff_t ilen; set_new_dnode(&dn, dst_inode, NULL, NULL, 0); - ret = get_dnode_of_data(&dn, dst + i, ALLOC_NODE); + ret = f2fs_get_dnode_of_data(&dn, dst + i, ALLOC_NODE); if (ret) return ret; - get_node_info(sbi, dn.nid, &ni); + ret = f2fs_get_node_info(sbi, dn.nid, &ni); + if (ret) { + f2fs_put_dnode(&dn); + return ret; + } + ilen = min((pgoff_t) ADDRS_PER_PAGE(dn.node_page, dst_inode) - dn.ofs_in_node, len - i); do { dn.data_blkaddr = datablock_addr(dn.inode, dn.node_page, dn.ofs_in_node); - truncate_data_blocks_range(&dn, 1); + f2fs_truncate_data_blocks_range(&dn, 1); if (do_replace[i]) { f2fs_i_blocks_write(src_inode, @@ -1087,10 +1141,11 @@ static int __clone_blkaddrs(struct inode *src_inode, struct inode *dst_inode, } else { struct page *psrc, *pdst; - psrc = get_lock_data_page(src_inode, src + i, true); + psrc = f2fs_get_lock_data_page(src_inode, + src + i, true); if (IS_ERR(psrc)) return PTR_ERR(psrc); - pdst = get_new_data_page(dst_inode, NULL, dst + i, + pdst = f2fs_get_new_data_page(dst_inode, NULL, dst + i, true); if (IS_ERR(pdst)) { f2fs_put_page(psrc, 1); @@ -1101,7 +1156,8 @@ static int __clone_blkaddrs(struct inode *src_inode, struct inode *dst_inode, f2fs_put_page(pdst, 1); f2fs_put_page(psrc, 1); - ret = truncate_hole(src_inode, src + i, src + i + 1); + ret = f2fs_truncate_hole(src_inode, + src + i, src + i + 1); if (ret) return ret; i++; @@ -1120,15 +1176,17 @@ static int __exchange_data_block(struct inode *src_inode, int ret; while (len) { - olen = min((pgoff_t)4 * ADDRS_PER_BLOCK, len); + olen = min((pgoff_t)4 * ADDRS_PER_BLOCK(src_inode), len); src_blkaddr = f2fs_kvzalloc(F2FS_I_SB(src_inode), - sizeof(block_t) * olen, GFP_KERNEL); + array_size(olen, sizeof(block_t)), + GFP_KERNEL); if (!src_blkaddr) return -ENOMEM; do_replace = f2fs_kvzalloc(F2FS_I_SB(src_inode), - sizeof(int) * olen, GFP_KERNEL); + array_size(olen, sizeof(int)), + GFP_KERNEL); if (!do_replace) { kvfree(src_blkaddr); return -ENOMEM; @@ -1154,31 +1212,39 @@ static int __exchange_data_block(struct inode *src_inode, return 0; roll_back: - __roll_back_blkaddrs(src_inode, src_blkaddr, do_replace, src, len); + __roll_back_blkaddrs(src_inode, src_blkaddr, do_replace, src, olen); kvfree(src_blkaddr); kvfree(do_replace); return ret; } -static int f2fs_do_collapse(struct inode *inode, pgoff_t start, pgoff_t end) +static int f2fs_do_collapse(struct inode *inode, loff_t offset, loff_t len) { struct f2fs_sb_info *sbi = F2FS_I_SB(inode); - pgoff_t nrpages = (i_size_read(inode) + PAGE_SIZE - 1) / PAGE_SIZE; + pgoff_t nrpages = DIV_ROUND_UP(i_size_read(inode), PAGE_SIZE); + pgoff_t start = offset >> PAGE_SHIFT; + pgoff_t end = (offset + len) >> PAGE_SHIFT; int ret; f2fs_balance_fs(sbi, true); - f2fs_lock_op(sbi); - f2fs_drop_extent_tree(inode); + /* avoid gc operation during block exchange */ + down_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]); + down_write(&F2FS_I(inode)->i_mmap_sem); + f2fs_lock_op(sbi); + f2fs_drop_extent_tree(inode); + truncate_pagecache(inode, offset); ret = __exchange_data_block(inode, inode, end, start, nrpages - end, true); f2fs_unlock_op(sbi); + + up_write(&F2FS_I(inode)->i_mmap_sem); + up_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]); return ret; } static int f2fs_collapse_range(struct inode *inode, loff_t offset, loff_t len) { - pgoff_t pg_start, pg_end; loff_t new_size; int ret; @@ -1193,37 +1259,27 @@ static int f2fs_collapse_range(struct inode *inode, loff_t offset, loff_t len) if (ret) return ret; - pg_start = offset >> PAGE_SHIFT; - pg_end = (offset + len) >> PAGE_SHIFT; - - /* avoid gc operation during block exchange */ - down_write(&F2FS_I(inode)->dio_rwsem[WRITE]); - - down_write(&F2FS_I(inode)->i_mmap_sem); /* write out all dirty pages from offset */ ret = filemap_write_and_wait_range(inode->i_mapping, offset, LLONG_MAX); if (ret) - goto out_unlock; - - truncate_pagecache(inode, offset); + return ret; - ret = f2fs_do_collapse(inode, pg_start, pg_end); + ret = f2fs_do_collapse(inode, offset, len); if (ret) - goto out_unlock; + return ret; /* write out all moved pages, if possible */ + down_write(&F2FS_I(inode)->i_mmap_sem); filemap_write_and_wait_range(inode->i_mapping, offset, LLONG_MAX); truncate_pagecache(inode, offset); new_size = i_size_read(inode) - len; truncate_pagecache(inode, new_size); - ret = truncate_blocks(inode, new_size, true); + ret = f2fs_truncate_blocks(inode, new_size, true); + up_write(&F2FS_I(inode)->i_mmap_sem); if (!ret) f2fs_i_size_write(inode, new_size); -out_unlock: - up_write(&F2FS_I(inode)->i_mmap_sem); - up_write(&F2FS_I(inode)->dio_rwsem[WRITE]); return ret; } @@ -1243,7 +1299,7 @@ static int f2fs_do_zero_range(struct dnode_of_data *dn, pgoff_t start, } dn->ofs_in_node = ofs_in_node; - ret = reserve_new_blocks(dn, count); + ret = f2fs_reserve_new_blocks(dn, count); if (ret) return ret; @@ -1252,7 +1308,7 @@ static int f2fs_do_zero_range(struct dnode_of_data *dn, pgoff_t start, dn->data_blkaddr = datablock_addr(dn->inode, dn->node_page, dn->ofs_in_node); /* - * reserve_new_blocks will not guarantee entire block + * f2fs_reserve_new_blocks will not guarantee entire block * allocation. */ if (dn->data_blkaddr == NULL_ADDR) { @@ -1260,9 +1316,9 @@ static int f2fs_do_zero_range(struct dnode_of_data *dn, pgoff_t start, break; } if (dn->data_blkaddr != NEW_ADDR) { - invalidate_blocks(sbi, dn->data_blkaddr); + f2fs_invalidate_blocks(sbi, dn->data_blkaddr); dn->data_blkaddr = NEW_ADDR; - set_data_blkaddr(dn); + f2fs_set_data_blkaddr(dn); } } @@ -1289,12 +1345,9 @@ static int f2fs_zero_range(struct inode *inode, loff_t offset, loff_t len, if (ret) return ret; - down_write(&F2FS_I(inode)->i_mmap_sem); ret = filemap_write_and_wait_range(mapping, offset, offset + len - 1); if (ret) - goto out_sem; - - truncate_pagecache_range(inode, offset, offset + len - 1); + return ret; pg_start = ((unsigned long long) offset) >> PAGE_SHIFT; pg_end = ((unsigned long long) offset + len) >> PAGE_SHIFT; @@ -1306,7 +1359,7 @@ static int f2fs_zero_range(struct inode *inode, loff_t offset, loff_t len, ret = fill_zero(inode, pg_start, off_start, off_end - off_start); if (ret) - goto out_sem; + return ret; new_size = max_t(loff_t, new_size, offset + len); } else { @@ -1314,7 +1367,7 @@ static int f2fs_zero_range(struct inode *inode, loff_t offset, loff_t len, ret = fill_zero(inode, pg_start++, off_start, PAGE_SIZE - off_start); if (ret) - goto out_sem; + return ret; new_size = max_t(loff_t, new_size, (loff_t)pg_start << PAGE_SHIFT); @@ -1325,12 +1378,21 @@ static int f2fs_zero_range(struct inode *inode, loff_t offset, loff_t len, unsigned int end_offset; pgoff_t end; + down_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]); + down_write(&F2FS_I(inode)->i_mmap_sem); + + truncate_pagecache_range(inode, + (loff_t)index << PAGE_SHIFT, + ((loff_t)pg_end << PAGE_SHIFT) - 1); + f2fs_lock_op(sbi); set_new_dnode(&dn, inode, NULL, NULL, 0); - ret = get_dnode_of_data(&dn, index, ALLOC_NODE); + ret = f2fs_get_dnode_of_data(&dn, index, ALLOC_NODE); if (ret) { f2fs_unlock_op(sbi); + up_write(&F2FS_I(inode)->i_mmap_sem); + up_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]); goto out; } @@ -1339,7 +1401,10 @@ static int f2fs_zero_range(struct inode *inode, loff_t offset, loff_t len, ret = f2fs_do_zero_range(&dn, index, end); f2fs_put_dnode(&dn); + f2fs_unlock_op(sbi); + up_write(&F2FS_I(inode)->i_mmap_sem); + up_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]); f2fs_balance_fs(sbi, dn.node_changed); @@ -1367,9 +1432,6 @@ out: else f2fs_i_size_write(inode, new_size); } -out_sem: - up_write(&F2FS_I(inode)->i_mmap_sem); - return ret; } @@ -1398,25 +1460,26 @@ static int f2fs_insert_range(struct inode *inode, loff_t offset, loff_t len) f2fs_balance_fs(sbi, true); - /* avoid gc operation during block exchange */ - down_write(&F2FS_I(inode)->dio_rwsem[WRITE]); - down_write(&F2FS_I(inode)->i_mmap_sem); - ret = truncate_blocks(inode, i_size_read(inode), true); + ret = f2fs_truncate_blocks(inode, i_size_read(inode), true); + up_write(&F2FS_I(inode)->i_mmap_sem); if (ret) - goto out; + return ret; /* write out all dirty pages from offset */ ret = filemap_write_and_wait_range(inode->i_mapping, offset, LLONG_MAX); if (ret) - goto out; - - truncate_pagecache(inode, offset); + return ret; pg_start = offset >> PAGE_SHIFT; pg_end = (offset + len) >> PAGE_SHIFT; delta = pg_end - pg_start; - idx = (i_size_read(inode) + PAGE_SIZE - 1) / PAGE_SIZE; + idx = DIV_ROUND_UP(i_size_read(inode), PAGE_SIZE); + + /* avoid gc operation during block exchange */ + down_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]); + down_write(&F2FS_I(inode)->i_mmap_sem); + truncate_pagecache(inode, offset); while (!ret && idx > pg_start) { nr = idx - pg_start; @@ -1431,16 +1494,17 @@ static int f2fs_insert_range(struct inode *inode, loff_t offset, loff_t len) idx + delta, nr, false); f2fs_unlock_op(sbi); } + up_write(&F2FS_I(inode)->i_mmap_sem); + up_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]); /* write out all moved pages, if possible */ + down_write(&F2FS_I(inode)->i_mmap_sem); filemap_write_and_wait_range(inode->i_mapping, offset, LLONG_MAX); truncate_pagecache(inode, offset); + up_write(&F2FS_I(inode)->i_mmap_sem); if (!ret) f2fs_i_size_write(inode, new_size); -out: - up_write(&F2FS_I(inode)->i_mmap_sem); - up_write(&F2FS_I(inode)->dio_rwsem[WRITE]); return ret; } @@ -1449,7 +1513,8 @@ static int expand_inode_data(struct inode *inode, loff_t offset, { struct f2fs_sb_info *sbi = F2FS_I_SB(inode); struct f2fs_map_blocks map = { .m_next_pgofs = NULL, - .m_next_extent = NULL, .m_seg_type = NO_CHECK_TYPE }; + .m_next_extent = NULL, .m_seg_type = NO_CHECK_TYPE, + .m_may_create = true }; pgoff_t pg_end; loff_t new_size = i_size_read(inode); loff_t off_end; @@ -1473,7 +1538,12 @@ static int expand_inode_data(struct inode *inode, loff_t offset, if (off_end) map.m_len++; - err = f2fs_map_blocks(inode, &map, 1, F2FS_GET_BLOCK_PRE_AIO); + if (f2fs_is_pinned_file(inode)) + map.m_seg_type = CURSEG_COLD_DATA; + + err = f2fs_map_blocks(inode, &map, 1, (f2fs_is_pinned_file(inode) ? + F2FS_GET_BLOCK_PRE_DIO : + F2FS_GET_BLOCK_PRE_AIO)); if (err) { pgoff_t last_off; @@ -1483,7 +1553,7 @@ static int expand_inode_data(struct inode *inode, loff_t offset, last_off = map.m_lblk + map.m_len - 1; /* update new size to the failed position */ - new_size = (last_off == pg_end) ? offset + len: + new_size = (last_off == pg_end) ? offset + len : (loff_t)(last_off + 1) << PAGE_SHIFT; } else { new_size = ((loff_t)pg_end << PAGE_SHIFT) + off_end; @@ -1563,13 +1633,13 @@ static int f2fs_release_file(struct inode *inode, struct file *filp) /* some remained atomic pages should discarded */ if (f2fs_is_atomic_file(inode)) - drop_inmem_pages(inode); + f2fs_drop_inmem_pages(inode); if (f2fs_is_volatile_file(inode)) { - clear_inode_flag(inode, FI_VOLATILE_FILE); - stat_dec_volatile_write(inode); set_inode_flag(inode, FI_DROP_CACHE); filemap_fdatawrite(inode->i_mapping); clear_inode_flag(inode, FI_DROP_CACHE); + clear_inode_flag(inode, FI_VOLATILE_FILE); + stat_dec_volatile_write(inode); } return 0; } @@ -1586,63 +1656,156 @@ static int f2fs_file_flush(struct file *file, fl_owner_t id) */ if (f2fs_is_atomic_file(inode) && F2FS_I(inode)->inmem_task == current) - drop_inmem_pages(inode); + f2fs_drop_inmem_pages(inode); + return 0; +} + +static int f2fs_setflags_common(struct inode *inode, u32 iflags, u32 mask) +{ + struct f2fs_inode_info *fi = F2FS_I(inode); + + /* Is it quota file? Do not allow user to mess with it */ + if (IS_NOQUOTA(inode)) + return -EPERM; + + fi->i_flags = iflags | (fi->i_flags & ~mask); + + if (fi->i_flags & F2FS_PROJINHERIT_FL) + set_inode_flag(inode, FI_PROJ_INHERIT); + else + clear_inode_flag(inode, FI_PROJ_INHERIT); + + inode->i_ctime = current_time(inode); + f2fs_set_inode_flags(inode); + f2fs_mark_inode_dirty_sync(inode, true); return 0; } +/* FS_IOC_GETFLAGS and FS_IOC_SETFLAGS support */ + +/* + * To make a new on-disk f2fs i_flag gettable via FS_IOC_GETFLAGS, add an entry + * for it to f2fs_fsflags_map[], and add its FS_*_FL equivalent to + * F2FS_GETTABLE_FS_FL. To also make it settable via FS_IOC_SETFLAGS, also add + * its FS_*_FL equivalent to F2FS_SETTABLE_FS_FL. + */ + +static const struct { + u32 iflag; + u32 fsflag; +} f2fs_fsflags_map[] = { + { F2FS_SYNC_FL, FS_SYNC_FL }, + { F2FS_IMMUTABLE_FL, FS_IMMUTABLE_FL }, + { F2FS_APPEND_FL, FS_APPEND_FL }, + { F2FS_NODUMP_FL, FS_NODUMP_FL }, + { F2FS_NOATIME_FL, FS_NOATIME_FL }, + { F2FS_INDEX_FL, FS_INDEX_FL }, + { F2FS_DIRSYNC_FL, FS_DIRSYNC_FL }, + { F2FS_PROJINHERIT_FL, FS_PROJINHERIT_FL }, +}; + +#define F2FS_GETTABLE_FS_FL ( \ + FS_SYNC_FL | \ + FS_IMMUTABLE_FL | \ + FS_APPEND_FL | \ + FS_NODUMP_FL | \ + FS_NOATIME_FL | \ + FS_INDEX_FL | \ + FS_DIRSYNC_FL | \ + FS_PROJINHERIT_FL | \ + FS_ENCRYPT_FL | \ + FS_INLINE_DATA_FL | \ + FS_NOCOW_FL) + +#define F2FS_SETTABLE_FS_FL ( \ + FS_SYNC_FL | \ + FS_IMMUTABLE_FL | \ + FS_APPEND_FL | \ + FS_NODUMP_FL | \ + FS_NOATIME_FL | \ + FS_DIRSYNC_FL | \ + FS_PROJINHERIT_FL) + +/* Convert f2fs on-disk i_flags to FS_IOC_{GET,SET}FLAGS flags */ +static inline u32 f2fs_iflags_to_fsflags(u32 iflags) +{ + u32 fsflags = 0; + int i; + + for (i = 0; i < ARRAY_SIZE(f2fs_fsflags_map); i++) + if (iflags & f2fs_fsflags_map[i].iflag) + fsflags |= f2fs_fsflags_map[i].fsflag; + + return fsflags; +} + +/* Convert FS_IOC_{GET,SET}FLAGS flags to f2fs on-disk i_flags */ +static inline u32 f2fs_fsflags_to_iflags(u32 fsflags) +{ + u32 iflags = 0; + int i; + + for (i = 0; i < ARRAY_SIZE(f2fs_fsflags_map); i++) + if (fsflags & f2fs_fsflags_map[i].fsflag) + iflags |= f2fs_fsflags_map[i].iflag; + + return iflags; +} + static int f2fs_ioc_getflags(struct file *filp, unsigned long arg) { struct inode *inode = file_inode(filp); struct f2fs_inode_info *fi = F2FS_I(inode); - unsigned int flags = fi->i_flags & FS_FL_USER_VISIBLE; - return put_user(flags, (int __user *)arg); + u32 fsflags = f2fs_iflags_to_fsflags(fi->i_flags); + + if (IS_ENCRYPTED(inode)) + fsflags |= FS_ENCRYPT_FL; + if (f2fs_has_inline_data(inode) || f2fs_has_inline_dentry(inode)) + fsflags |= FS_INLINE_DATA_FL; + if (is_inode_flag_set(inode, FI_PIN_FILE)) + fsflags |= FS_NOCOW_FL; + + fsflags &= F2FS_GETTABLE_FS_FL; + + return put_user(fsflags, (int __user *)arg); } static int f2fs_ioc_setflags(struct file *filp, unsigned long arg) { struct inode *inode = file_inode(filp); struct f2fs_inode_info *fi = F2FS_I(inode); - unsigned int flags; - unsigned int oldflags; + u32 fsflags, old_fsflags; + u32 iflags; int ret; if (!inode_owner_or_capable(inode)) return -EACCES; - if (get_user(flags, (int __user *)arg)) + if (get_user(fsflags, (int __user *)arg)) return -EFAULT; + if (fsflags & ~F2FS_GETTABLE_FS_FL) + return -EOPNOTSUPP; + fsflags &= F2FS_SETTABLE_FS_FL; + + iflags = f2fs_fsflags_to_iflags(fsflags); + if (f2fs_mask_flags(inode->i_mode, iflags) != iflags) + return -EOPNOTSUPP; + ret = mnt_want_write_file(filp); if (ret) return ret; inode_lock(inode); - /* Is it quota file? Do not allow user to mess with it */ - if (IS_NOQUOTA(inode)) { - ret = -EPERM; - goto unlock_out; - } - - flags = f2fs_mask_flags(inode->i_mode, flags); - - oldflags = fi->i_flags; - - if ((flags ^ oldflags) & (FS_APPEND_FL | FS_IMMUTABLE_FL)) { - if (!capable(CAP_LINUX_IMMUTABLE)) { - ret = -EPERM; - goto unlock_out; - } - } - - flags = flags & FS_FL_USER_MODIFIABLE; - flags |= oldflags & ~FS_FL_USER_MODIFIABLE; - fi->i_flags = flags; + old_fsflags = f2fs_iflags_to_fsflags(fi->i_flags); + ret = vfs_ioc_setflags_prepare(inode, old_fsflags, fsflags); + if (ret) + goto out; - inode->i_ctime = current_time(inode); - f2fs_set_inode_flags(inode); - f2fs_mark_inode_dirty_sync(inode, false); -unlock_out: + ret = f2fs_setflags_common(inode, iflags, + f2fs_fsflags_to_iflags(F2FS_SETTABLE_FS_FL)); +out: inode_unlock(inode); mnt_drop_write_file(filp); return ret; @@ -1658,6 +1821,8 @@ static int f2fs_ioc_getversion(struct file *filp, unsigned long arg) static int f2fs_ioc_start_atomic_write(struct file *filp) { struct inode *inode = file_inode(filp); + struct f2fs_inode_info *fi = F2FS_I(inode); + struct f2fs_sb_info *sbi = F2FS_I_SB(inode); int ret; if (!inode_owner_or_capable(inode)) @@ -1672,31 +1837,42 @@ static int f2fs_ioc_start_atomic_write(struct file *filp) inode_lock(inode); - if (f2fs_is_atomic_file(inode)) + if (f2fs_is_atomic_file(inode)) { + if (is_inode_flag_set(inode, FI_ATOMIC_REVOKE_REQUEST)) + ret = -EINVAL; goto out; + } ret = f2fs_convert_inline_inode(inode); if (ret) goto out; - set_inode_flag(inode, FI_ATOMIC_FILE); - set_inode_flag(inode, FI_HOT_DATA); - f2fs_update_time(F2FS_I_SB(inode), REQ_TIME); + down_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]); - if (!get_dirty_pages(inode)) - goto inc_stat; - - f2fs_msg(F2FS_I_SB(inode)->sb, KERN_WARNING, - "Unexpected flush for atomic writes: ino=%lu, npages=%u", - inode->i_ino, get_dirty_pages(inode)); + /* + * Should wait end_io to count F2FS_WB_CP_DATA correctly by + * f2fs_is_atomic_file. + */ + if (get_dirty_pages(inode)) + f2fs_warn(F2FS_I_SB(inode), "Unexpected flush for atomic writes: ino=%lu, npages=%u", + inode->i_ino, get_dirty_pages(inode)); ret = filemap_write_and_wait_range(inode->i_mapping, 0, LLONG_MAX); if (ret) { - clear_inode_flag(inode, FI_ATOMIC_FILE); - clear_inode_flag(inode, FI_HOT_DATA); + up_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]); goto out; } -inc_stat: + spin_lock(&sbi->inode_lock[ATOMIC_FILE]); + if (list_empty(&fi->inmem_ilist)) + list_add_tail(&fi->inmem_ilist, &sbi->inode_list[ATOMIC_FILE]); + spin_unlock(&sbi->inode_lock[ATOMIC_FILE]); + + /* add inode in inmem_list first and set atomic_file */ + set_inode_flag(inode, FI_ATOMIC_FILE); + clear_inode_flag(inode, FI_ATOMIC_REVOKE_REQUEST); + up_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]); + + f2fs_update_time(F2FS_I_SB(inode), REQ_TIME); F2FS_I(inode)->inmem_task = current; stat_inc_atomic_write(inode); stat_update_max_atomic_write(inode); @@ -1718,29 +1894,31 @@ static int f2fs_ioc_commit_atomic_write(struct file *filp) if (ret) return ret; - inode_lock(inode); + f2fs_balance_fs(F2FS_I_SB(inode), true); - down_write(&F2FS_I(inode)->dio_rwsem[WRITE]); + inode_lock(inode); - if (f2fs_is_volatile_file(inode)) + if (f2fs_is_volatile_file(inode)) { + ret = -EINVAL; goto err_out; + } if (f2fs_is_atomic_file(inode)) { - ret = commit_inmem_pages(inode); + ret = f2fs_commit_inmem_pages(inode); if (ret) goto err_out; ret = f2fs_do_sync_file(filp, 0, LLONG_MAX, 0, true); - if (!ret) { - clear_inode_flag(inode, FI_ATOMIC_FILE); - clear_inode_flag(inode, FI_HOT_DATA); - stat_dec_atomic_write(inode); - } + if (!ret) + f2fs_drop_inmem_pages(inode); } else { ret = f2fs_do_sync_file(filp, 0, LLONG_MAX, 1, false); } err_out: - up_write(&F2FS_I(inode)->dio_rwsem[WRITE]); + if (is_inode_flag_set(inode, FI_ATOMIC_REVOKE_REQUEST)) { + clear_inode_flag(inode, FI_ATOMIC_REVOKE_REQUEST); + ret = -EINVAL; + } inode_unlock(inode); mnt_drop_write_file(filp); return ret; @@ -1825,13 +2003,15 @@ static int f2fs_ioc_abort_volatile_write(struct file *filp) inode_lock(inode); if (f2fs_is_atomic_file(inode)) - drop_inmem_pages(inode); + f2fs_drop_inmem_pages(inode); if (f2fs_is_volatile_file(inode)) { clear_inode_flag(inode, FI_VOLATILE_FILE); stat_dec_volatile_write(inode); ret = f2fs_do_sync_file(filp, 0, LLONG_MAX, 0, true); } + clear_inode_flag(inode, FI_ATOMIC_REVOKE_REQUEST); + inode_unlock(inode); mnt_drop_write_file(filp); @@ -1845,7 +2025,7 @@ static int f2fs_ioc_shutdown(struct file *filp, unsigned long arg) struct f2fs_sb_info *sbi = F2FS_I_SB(inode); struct super_block *sb = sbi->sb; __u32 in; - int ret; + int ret = 0; if (!capable(CAP_SYS_ADMIN)) return -EPERM; @@ -1853,9 +2033,11 @@ static int f2fs_ioc_shutdown(struct file *filp, unsigned long arg) if (get_user(in, (__u32 __user *)arg)) return -EFAULT; - ret = mnt_want_write_file(filp); - if (ret) - return ret; + if (in != F2FS_GOING_DOWN_FULLSYNC) { + ret = mnt_want_write_file(filp); + if (ret) + return ret; + } switch (in) { case F2FS_GOING_DOWN_FULLSYNC: @@ -1866,6 +2048,7 @@ static int f2fs_ioc_shutdown(struct file *filp, unsigned long arg) } if (sb) { f2fs_stop_checkpoint(sbi, false); + set_sbi_flag(sbi, SBI_IS_SHUTDOWN); thaw_bdev(sb->s_bdev, sb); } break; @@ -1875,28 +2058,42 @@ static int f2fs_ioc_shutdown(struct file *filp, unsigned long arg) if (ret) goto out; f2fs_stop_checkpoint(sbi, false); + set_sbi_flag(sbi, SBI_IS_SHUTDOWN); break; case F2FS_GOING_DOWN_NOSYNC: f2fs_stop_checkpoint(sbi, false); + set_sbi_flag(sbi, SBI_IS_SHUTDOWN); break; case F2FS_GOING_DOWN_METAFLUSH: - sync_meta_pages(sbi, META, LONG_MAX, FS_META_IO); + f2fs_sync_meta_pages(sbi, META, LONG_MAX, FS_META_IO); f2fs_stop_checkpoint(sbi, false); + set_sbi_flag(sbi, SBI_IS_SHUTDOWN); break; + case F2FS_GOING_DOWN_NEED_FSCK: + set_sbi_flag(sbi, SBI_NEED_FSCK); + set_sbi_flag(sbi, SBI_CP_DISABLED_QUICK); + set_sbi_flag(sbi, SBI_IS_DIRTY); + /* do checkpoint only */ + ret = f2fs_sync_fs(sb, 1); + goto out; default: ret = -EINVAL; goto out; } - stop_gc_thread(sbi); - stop_discard_thread(sbi); + f2fs_stop_gc_thread(sbi); + f2fs_stop_discard_thread(sbi); - drop_discard_cmd(sbi); + f2fs_drop_discard_cmd(sbi); clear_opt(sbi, DISCARD); f2fs_update_time(sbi, REQ_TIME); out: - mnt_drop_write_file(filp); + if (in != F2FS_GOING_DOWN_FULLSYNC) + mnt_drop_write_file(filp); + + trace_f2fs_shutdown(sbi, in, ret); + return ret; } @@ -1911,7 +2108,7 @@ static int f2fs_ioc_fitrim(struct file *filp, unsigned long arg) if (!capable(CAP_SYS_ADMIN)) return -EPERM; - if (!blk_queue_discard(q)) + if (!f2fs_hw_support_discard(F2FS_SB(sb))) return -EOPNOTSUPP; if (copy_from_user(&range, (struct fstrim_range __user *)arg, @@ -1950,7 +2147,7 @@ static int f2fs_ioc_set_encryption_policy(struct file *filp, unsigned long arg) { struct inode *inode = file_inode(filp); - if (!f2fs_sb_has_encrypt(inode->i_sb)) + if (!f2fs_sb_has_encrypt(F2FS_I_SB(inode))) return -EOPNOTSUPP; f2fs_update_time(F2FS_I_SB(inode), REQ_TIME); @@ -1960,7 +2157,7 @@ static int f2fs_ioc_set_encryption_policy(struct file *filp, unsigned long arg) static int f2fs_ioc_get_encryption_policy(struct file *filp, unsigned long arg) { - if (!f2fs_sb_has_encrypt(file_inode(filp)->i_sb)) + if (!f2fs_sb_has_encrypt(F2FS_I_SB(file_inode(filp)))) return -EOPNOTSUPP; return fscrypt_ioctl_get_policy(filp, (void __user *)arg); } @@ -1971,7 +2168,7 @@ static int f2fs_ioc_get_encryption_pwsalt(struct file *filp, unsigned long arg) struct f2fs_sb_info *sbi = F2FS_I_SB(inode); int err; - if (!f2fs_sb_has_encrypt(inode->i_sb)) + if (!f2fs_sb_has_encrypt(sbi)) return -EOPNOTSUPP; err = mnt_want_write_file(filp); @@ -2055,15 +2252,15 @@ static int f2fs_ioc_gc_range(struct file *filp, unsigned long arg) if (f2fs_readonly(sbi->sb)) return -EROFS; + end = range.start + range.len; + if (range.start < MAIN_BLKADDR(sbi) || end >= MAX_BLKADDR(sbi)) { + return -EINVAL; + } + ret = mnt_want_write_file(filp); if (ret) return ret; - end = range.start + range.len; - if (range.start < MAIN_BLKADDR(sbi) || end >= MAX_BLKADDR(sbi)) { - ret = -EINVAL; - goto out; - } do_more: if (!range.sync) { if (!mutex_trylock(&sbi->gc_mutex)) { @@ -2075,7 +2272,7 @@ do_more: } ret = f2fs_gc(sbi, range.sync, true, GET_SEGNO(sbi, range.start)); - range.start += sbi->blocks_per_seg; + range.start += BLKS_PER_SEC(sbi); if (range.start <= end) goto do_more; out: @@ -2095,6 +2292,11 @@ static int f2fs_ioc_write_checkpoint(struct file *filp, unsigned long arg) if (f2fs_readonly(sbi->sb)) return -EROFS; + if (unlikely(is_sbi_flag_set(sbi, SBI_CP_DISABLED))) { + f2fs_info(sbi, "Skipping Checkpoint. Checkpoints currently disabled."); + return -EINVAL; + } + ret = mnt_want_write_file(filp); if (ret) return ret; @@ -2111,8 +2313,9 @@ static int f2fs_defragment_range(struct f2fs_sb_info *sbi, { struct inode *inode = file_inode(filp); struct f2fs_map_blocks map = { .m_next_extent = NULL, - .m_seg_type = NO_CHECK_TYPE }; - struct extent_info ei = {0,0,0}; + .m_seg_type = NO_CHECK_TYPE , + .m_may_create = false }; + struct extent_info ei = {0, 0, 0}; pgoff_t pg_start, pg_end, next_pgofs; unsigned int blk_per_seg = sbi->blocks_per_seg; unsigned int total = 0, sec_num; @@ -2121,7 +2324,7 @@ static int f2fs_defragment_range(struct f2fs_sb_info *sbi, int err; /* if in-place-update policy is enabled, don't waste time here */ - if (should_update_inplace(inode, NULL)) + if (f2fs_should_update_inplace(inode, NULL)) return -EINVAL; pg_start = range->start >> PAGE_SHIFT; @@ -2179,7 +2382,7 @@ static int f2fs_defragment_range(struct f2fs_sb_info *sbi, if (!fragmented) goto out; - sec_num = (total + BLKS_PER_SEC(sbi) - 1) / BLKS_PER_SEC(sbi); + sec_num = DIV_ROUND_UP(total, BLKS_PER_SEC(sbi)); /* * make sure there are enough free section for LFS allocation, this can @@ -2216,7 +2419,7 @@ do_map: while (idx < map.m_lblk + map.m_len && cnt < blk_per_seg) { struct page *page; - page = get_lock_data_page(inode, idx, true); + page = f2fs_get_lock_data_page(inode, idx, true); if (IS_ERR(page)) { err = PTR_ERR(page); goto clear_out; @@ -2327,15 +2530,10 @@ static int f2fs_move_file_range(struct file *file_in, loff_t pos_in, } inode_lock(src); - down_write(&F2FS_I(src)->dio_rwsem[WRITE]); if (src != dst) { ret = -EBUSY; if (!inode_trylock(dst)) goto out; - if (!down_write_trylock(&F2FS_I(dst)->dio_rwsem[WRITE])) { - inode_unlock(dst); - goto out; - } } ret = -EINVAL; @@ -2380,6 +2578,14 @@ static int f2fs_move_file_range(struct file *file_in, loff_t pos_in, goto out_unlock; f2fs_balance_fs(sbi, true); + + down_write(&F2FS_I(src)->i_gc_rwsem[WRITE]); + if (src != dst) { + ret = -EBUSY; + if (!down_write_trylock(&F2FS_I(dst)->i_gc_rwsem[WRITE])) + goto out_src; + } + f2fs_lock_op(sbi); ret = __exchange_data_block(src, dst, pos_in >> F2FS_BLKSIZE_BITS, pos_out >> F2FS_BLKSIZE_BITS, @@ -2392,13 +2598,15 @@ static int f2fs_move_file_range(struct file *file_in, loff_t pos_in, f2fs_i_size_write(dst, dst_osize); } f2fs_unlock_op(sbi); + + if (src != dst) + up_write(&F2FS_I(dst)->i_gc_rwsem[WRITE]); +out_src: + up_write(&F2FS_I(src)->i_gc_rwsem[WRITE]); out_unlock: - if (src != dst) { - up_write(&F2FS_I(dst)->dio_rwsem[WRITE]); + if (src != dst) inode_unlock(dst); - } out: - up_write(&F2FS_I(src)->dio_rwsem[WRITE]); inode_unlock(src); return ret; } @@ -2461,16 +2669,17 @@ static int f2fs_ioc_flush_device(struct file *filp, unsigned long arg) if (f2fs_readonly(sbi->sb)) return -EROFS; + if (unlikely(is_sbi_flag_set(sbi, SBI_CP_DISABLED))) + return -EINVAL; + if (copy_from_user(&range, (struct f2fs_flush_device __user *)arg, sizeof(range))) return -EFAULT; - if (sbi->s_ndevs <= 1 || sbi->s_ndevs - 1 <= range.dev_num || - sbi->segs_per_sec != 1) { - f2fs_msg(sbi->sb, KERN_WARNING, - "Can't flush %u in %d for segs_per_sec %u != 1\n", - range.dev_num, sbi->s_ndevs, - sbi->segs_per_sec); + if (!f2fs_is_multi_device(sbi) || sbi->s_ndevs - 1 <= range.dev_num || + __is_large_section(sbi)) { + f2fs_warn(sbi, "Can't flush %u in %d for segs_per_sec %u != 1", + range.dev_num, sbi->s_ndevs, sbi->segs_per_sec); return -EINVAL; } @@ -2525,12 +2734,13 @@ int f2fs_pin_file_control(struct inode *inode, bool inc) /* Use i_gc_failures for normal file as a risk signal. */ if (inc) - f2fs_i_gc_failures_write(inode, fi->i_gc_failures + 1); + f2fs_i_gc_failures_write(inode, + fi->i_gc_failures[GC_FAILURE_PIN] + 1); - if (fi->i_gc_failures > sbi->gc_pin_file_threshold) { - f2fs_msg(sbi->sb, KERN_WARNING, - "%s: Enable GC = ino %lx after %x GC trials\n", - __func__, inode->i_ino, fi->i_gc_failures); + if (fi->i_gc_failures[GC_FAILURE_PIN] > sbi->gc_pin_file_threshold) { + f2fs_warn(sbi, "%s: Enable GC = ino %lx after %x GC trials", + __func__, inode->i_ino, + fi->i_gc_failures[GC_FAILURE_PIN]); clear_inode_flag(inode, FI_PIN_FILE); return -EAGAIN; } @@ -2543,9 +2753,6 @@ static int f2fs_ioc_set_pin_file(struct file *filp, unsigned long arg) __u32 pin; int ret = 0; - if (!inode_owner_or_capable(inode)) - return -EACCES; - if (get_user(pin, (__u32 __user *)arg)) return -EFAULT; @@ -2561,14 +2768,14 @@ static int f2fs_ioc_set_pin_file(struct file *filp, unsigned long arg) inode_lock(inode); - if (should_update_outplace(inode, NULL)) { + if (f2fs_should_update_outplace(inode, NULL)) { ret = -EINVAL; goto out; } if (!pin) { clear_inode_flag(inode, FI_PIN_FILE); - F2FS_I(inode)->i_gc_failures = 1; + f2fs_i_gc_failures_write(inode, 0); goto done; } @@ -2581,7 +2788,7 @@ static int f2fs_ioc_set_pin_file(struct file *filp, unsigned long arg) goto out; set_inode_flag(inode, FI_PIN_FILE); - ret = F2FS_I(inode)->i_gc_failures; + ret = F2FS_I(inode)->i_gc_failures[GC_FAILURE_PIN]; done: f2fs_update_time(F2FS_I_SB(inode), REQ_TIME); out: @@ -2596,7 +2803,7 @@ static int f2fs_ioc_get_pin_file(struct file *filp, unsigned long arg) __u32 pin = 0; if (is_inode_flag_set(inode, FI_PIN_FILE)) - pin = F2FS_I(inode)->i_gc_failures; + pin = F2FS_I(inode)->i_gc_failures[GC_FAILURE_PIN]; return put_user(pin, (u32 __user *)arg); } @@ -2615,14 +2822,15 @@ int f2fs_precache_extents(struct inode *inode) map.m_next_pgofs = NULL; map.m_next_extent = &m_next_extent; map.m_seg_type = NO_CHECK_TYPE; + map.m_may_create = false; end = F2FS_I_SB(inode)->max_file_blocks; while (map.m_lblk < end) { map.m_len = end - map.m_lblk; - down_write(&fi->dio_rwsem[WRITE]); + down_write(&fi->i_gc_rwsem[WRITE]); err = f2fs_map_blocks(inode, &map, 0, F2FS_GET_BLOCK_PRECACHE); - up_write(&fi->dio_rwsem[WRITE]); + up_write(&fi->i_gc_rwsem[WRITE]); if (err) return err; @@ -2637,6 +2845,27 @@ static int f2fs_ioc_precache_extents(struct file *filp, unsigned long arg) return f2fs_precache_extents(file_inode(filp)); } +static int f2fs_ioc_resize_fs(struct file *filp, unsigned long arg) +{ + struct f2fs_sb_info *sbi = F2FS_I_SB(file_inode(filp)); + __u64 block_count; + int ret; + + if (!capable(CAP_SYS_ADMIN)) + return -EPERM; + + if (f2fs_readonly(sbi->sb)) + return -EROFS; + + if (copy_from_user(&block_count, (void __user *)arg, + sizeof(block_count))) + return -EFAULT; + + ret = f2fs_resize_fs(sbi, block_count); + + return ret; +} + long f2fs_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) { if (unlikely(f2fs_cp_error(F2FS_I_SB(file_inode(filp))))) @@ -2689,6 +2918,8 @@ long f2fs_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) return f2fs_ioc_set_pin_file(filp, arg); case F2FS_IOC_PRECACHE_EXTENTS: return f2fs_ioc_precache_extents(filp, arg); + case F2FS_IOC_RESIZE_FS: + return f2fs_ioc_resize_fs(filp, arg); default: return -ENOTTY; } @@ -2698,18 +2929,23 @@ static ssize_t f2fs_file_write_iter(struct kiocb *iocb, struct iov_iter *from) { struct file *file = iocb->ki_filp; struct inode *inode = file_inode(file); - struct blk_plug plug; ssize_t ret; - if (unlikely(f2fs_cp_error(F2FS_I_SB(inode)))) - return -EIO; + if (unlikely(f2fs_cp_error(F2FS_I_SB(inode)))) { + ret = -EIO; + goto out; + } - if ((iocb->ki_flags & IOCB_NOWAIT) && !(iocb->ki_flags & IOCB_DIRECT)) - return -EINVAL; + if ((iocb->ki_flags & IOCB_NOWAIT) && !(iocb->ki_flags & IOCB_DIRECT)) { + ret = -EINVAL; + goto out; + } if (!inode_trylock(inode)) { - if (iocb->ki_flags & IOCB_NOWAIT) - return -EAGAIN; + if (iocb->ki_flags & IOCB_NOWAIT) { + ret = -EAGAIN; + goto out; + } inode_lock(inode); } @@ -2722,16 +2958,16 @@ static ssize_t f2fs_file_write_iter(struct kiocb *iocb, struct iov_iter *from) if (iov_iter_fault_in_readable(from, iov_iter_count(from))) set_inode_flag(inode, FI_NO_PREALLOC); - if ((iocb->ki_flags & IOCB_NOWAIT) && - (iocb->ki_flags & IOCB_DIRECT)) { - if (!f2fs_overwrite_io(inode, iocb->ki_pos, + if ((iocb->ki_flags & IOCB_NOWAIT)) { + if (!f2fs_overwrite_io(inode, iocb->ki_pos, iov_iter_count(from)) || - f2fs_has_inline_data(inode) || - f2fs_force_buffered_io(inode, WRITE)) { - inode_unlock(inode); - return -EAGAIN; - } - + f2fs_has_inline_data(inode) || + f2fs_force_buffered_io(inode, iocb, from)) { + clear_inode_flag(inode, FI_NO_PREALLOC); + inode_unlock(inode); + ret = -EAGAIN; + goto out; + } } else { preallocated = true; target_size = iocb->ki_pos + iov_iter_count(from); @@ -2740,12 +2976,11 @@ static ssize_t f2fs_file_write_iter(struct kiocb *iocb, struct iov_iter *from) if (err) { clear_inode_flag(inode, FI_NO_PREALLOC); inode_unlock(inode); - return err; + ret = err; + goto out; } } - blk_start_plug(&plug); ret = __generic_file_write_iter(iocb, from); - blk_finish_plug(&plug); clear_inode_flag(inode, FI_NO_PREALLOC); /* if we couldn't write data, we should deallocate blocks. */ @@ -2756,7 +2991,9 @@ static ssize_t f2fs_file_write_iter(struct kiocb *iocb, struct iov_iter *from) f2fs_update_iostat(F2FS_I_SB(inode), APP_WRITE_IO, ret); } inode_unlock(inode); - +out: + trace_f2fs_file_write_iter(inode, iocb->ki_pos, + iov_iter_count(from), ret); if (ret > 0) { ssize_t err; @@ -2799,6 +3036,7 @@ long f2fs_compat_ioctl(struct file *file, unsigned int cmd, unsigned long arg) case F2FS_IOC_GET_PIN_FILE: case F2FS_IOC_SET_PIN_FILE: case F2FS_IOC_PRECACHE_EXTENTS: + case F2FS_IOC_RESIZE_FS: break; default: return -ENOIOCTLCMD; diff --git a/fs/f2fs/gc.c b/fs/f2fs/gc.c index d9b799d38fc6..bac60511c9a2 100644 --- a/fs/f2fs/gc.c +++ b/fs/f2fs/gc.c @@ -1,12 +1,9 @@ +// SPDX-License-Identifier: GPL-2.0 /* * fs/f2fs/gc.c * * Copyright (c) 2012 Samsung Electronics Co., Ltd. * http://www.samsung.com/ - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. */ #include <linux/fs.h> #include <linux/module.h> @@ -43,25 +40,28 @@ static int gc_thread_func(void *data) if (gc_th->gc_wake) gc_th->gc_wake = 0; - if (try_to_freeze()) + if (try_to_freeze()) { + stat_other_skip_bggc_count(sbi); continue; + } if (kthread_should_stop()) break; if (sbi->sb->s_writers.frozen >= SB_FREEZE_WRITE) { increase_sleep_time(gc_th, &wait_ms); + stat_other_skip_bggc_count(sbi); continue; } -#ifdef CONFIG_F2FS_FAULT_INJECTION if (time_to_inject(sbi, FAULT_CHECKPOINT)) { f2fs_show_injection_info(FAULT_CHECKPOINT); f2fs_stop_checkpoint(sbi, false); } -#endif - if (!sb_start_write_trylock(sbi->sb)) + if (!sb_start_write_trylock(sbi->sb)) { + stat_other_skip_bggc_count(sbi); continue; + } /* * [GC triggering condition] @@ -76,18 +76,21 @@ static int gc_thread_func(void *data) * invalidated soon after by user update or deletion. * So, I'd like to wait some time to collect dirty segments. */ - if (gc_th->gc_urgent) { + if (sbi->gc_mode == GC_URGENT) { wait_ms = gc_th->urgent_sleep_time; mutex_lock(&sbi->gc_mutex); goto do_gc; } - if (!mutex_trylock(&sbi->gc_mutex)) + if (!mutex_trylock(&sbi->gc_mutex)) { + stat_other_skip_bggc_count(sbi); goto next; + } - if (!is_idle(sbi)) { + if (!is_idle(sbi, GC_TIME)) { increase_sleep_time(gc_th, &wait_ms); mutex_unlock(&sbi->gc_mutex); + stat_io_skip_bggc_count(sbi); goto next; } @@ -114,7 +117,7 @@ next: return 0; } -int start_gc_thread(struct f2fs_sb_info *sbi) +int f2fs_start_gc_thread(struct f2fs_sb_info *sbi) { struct f2fs_gc_kthread *gc_th; dev_t dev = sbi->sb->s_bdev->bd_dev; @@ -131,8 +134,6 @@ int start_gc_thread(struct f2fs_sb_info *sbi) gc_th->max_sleep_time = DEF_GC_THREAD_MAX_SLEEP_TIME; gc_th->no_gc_sleep_time = DEF_GC_THREAD_NOGC_SLEEP_TIME; - gc_th->gc_idle = 0; - gc_th->gc_urgent = 0; gc_th->gc_wake= 0; sbi->gc_thread = gc_th; @@ -141,38 +142,36 @@ int start_gc_thread(struct f2fs_sb_info *sbi) "f2fs_gc-%u:%u", MAJOR(dev), MINOR(dev)); if (IS_ERR(gc_th->f2fs_gc_task)) { err = PTR_ERR(gc_th->f2fs_gc_task); - kfree(gc_th); + kvfree(gc_th); sbi->gc_thread = NULL; } out: return err; } -void stop_gc_thread(struct f2fs_sb_info *sbi) +void f2fs_stop_gc_thread(struct f2fs_sb_info *sbi) { struct f2fs_gc_kthread *gc_th = sbi->gc_thread; if (!gc_th) return; kthread_stop(gc_th->f2fs_gc_task); - kfree(gc_th); + kvfree(gc_th); sbi->gc_thread = NULL; } -static int select_gc_type(struct f2fs_gc_kthread *gc_th, int gc_type) +static int select_gc_type(struct f2fs_sb_info *sbi, int gc_type) { int gc_mode = (gc_type == BG_GC) ? GC_CB : GC_GREEDY; - if (!gc_th) - return gc_mode; - - if (gc_th->gc_idle) { - if (gc_th->gc_idle == 1) - gc_mode = GC_CB; - else if (gc_th->gc_idle == 2) - gc_mode = GC_GREEDY; - } - if (gc_th->gc_urgent) + switch (sbi->gc_mode) { + case GC_IDLE_CB: + gc_mode = GC_CB; + break; + case GC_IDLE_GREEDY: + case GC_URGENT: gc_mode = GC_GREEDY; + break; + } return gc_mode; } @@ -187,7 +186,7 @@ static void select_policy(struct f2fs_sb_info *sbi, int gc_type, p->max_search = dirty_i->nr_dirty[type]; p->ofs_unit = 1; } else { - p->gc_mode = select_gc_type(sbi->gc_thread, gc_type); + p->gc_mode = select_gc_type(sbi, gc_type); p->dirty_segmap = dirty_i->dirty_segmap[DIRTY]; p->max_search = dirty_i->nr_dirty[DIRTY]; p->ofs_unit = sbi->segs_per_sec; @@ -195,7 +194,7 @@ static void select_policy(struct f2fs_sb_info *sbi, int gc_type, /* we need to check every dirty segments in the FG_GC case */ if (gc_type != FG_GC && - (sbi->gc_thread && !sbi->gc_thread->gc_urgent) && + (sbi->gc_mode != GC_URGENT) && p->max_search > sbi->max_victim_search) p->max_search = sbi->max_victim_search; @@ -234,10 +233,6 @@ static unsigned int check_bg_victims(struct f2fs_sb_info *sbi) for_each_set_bit(secno, dirty_i->victim_secmap, MAIN_SECS(sbi)) { if (sec_usage_check(sbi, secno)) continue; - - if (no_fggc_candidate(sbi, secno)) - continue; - clear_bit(secno, dirty_i->victim_secmap); return GET_SEG_FROM_SEC(sbi, secno); } @@ -316,10 +311,11 @@ static int get_victim_by_default(struct f2fs_sb_info *sbi, struct sit_info *sm = SIT_I(sbi); struct victim_sel_policy p; unsigned int secno, last_victim; - unsigned int last_segment = MAIN_SEGS(sbi); + unsigned int last_segment; unsigned int nsearched = 0; mutex_lock(&dirty_i->seglist_lock); + last_segment = MAIN_SECS(sbi) * sbi->segs_per_sec; p.alloc_mode = alloc_mode; select_policy(sbi, gc_type, type, &p); @@ -328,8 +324,7 @@ static int get_victim_by_default(struct f2fs_sb_info *sbi, p.min_cost = get_max_cost(sbi, &p); if (*result != NULL_SEGNO) { - if (IS_DATASEG(get_seg_entry(sbi, *result)->type) && - get_valid_blocks(sbi, *result, false) && + if (get_valid_blocks(sbi, *result, false) && !sec_usage_check(sbi, GET_SEC_FROM_SEG(sbi, *result))) p.min_segno = *result; goto out; @@ -338,6 +333,22 @@ static int get_victim_by_default(struct f2fs_sb_info *sbi, if (p.max_search == 0) goto out; + if (__is_large_section(sbi) && p.alloc_mode == LFS) { + if (sbi->next_victim_seg[BG_GC] != NULL_SEGNO) { + p.min_segno = sbi->next_victim_seg[BG_GC]; + *result = p.min_segno; + sbi->next_victim_seg[BG_GC] = NULL_SEGNO; + goto got_result; + } + if (gc_type == FG_GC && + sbi->next_victim_seg[FG_GC] != NULL_SEGNO) { + p.min_segno = sbi->next_victim_seg[FG_GC]; + *result = p.min_segno; + sbi->next_victim_seg[FG_GC] = NULL_SEGNO; + goto got_result; + } + } + last_victim = sm->last_victim[p.gc_mode]; if (p.alloc_mode == LFS && gc_type == FG_GC) { p.min_segno = check_bg_victims(sbi); @@ -375,10 +386,12 @@ static int get_victim_by_default(struct f2fs_sb_info *sbi, if (sec_usage_check(sbi, secno)) goto next; - if (gc_type == BG_GC && test_bit(secno, dirty_i->victim_secmap)) + /* Don't touch checkpointed data */ + if (unlikely(is_sbi_flag_set(sbi, SBI_CP_DISABLED) && + get_ckpt_valid_blocks(sbi, segno) && + p.alloc_mode != SSR)) goto next; - if (gc_type == FG_GC && p.alloc_mode == LFS && - no_fggc_candidate(sbi, secno)) + if (gc_type == BG_GC && test_bit(secno, dirty_i->victim_secmap)) goto next; cost = get_gc_cost(sbi, segno, &p); @@ -393,12 +406,15 @@ next: sm->last_victim[p.gc_mode] = last_victim + 1; else sm->last_victim[p.gc_mode] = segno + 1; - sm->last_victim[p.gc_mode] %= MAIN_SEGS(sbi); + sm->last_victim[p.gc_mode] %= + (MAIN_SECS(sbi) * sbi->segs_per_sec); break; } } if (p.min_segno != NULL_SEGNO) { got_it: + *result = (p.min_segno / p.ofs_unit) * p.ofs_unit; +got_result: if (p.alloc_mode == LFS) { secno = GET_SEC_FROM_SEG(sbi, p.min_segno); if (gc_type == FG_GC) @@ -406,13 +422,13 @@ got_it: else set_bit(secno, dirty_i->victim_secmap); } - *result = (p.min_segno / p.ofs_unit) * p.ofs_unit; + } +out: + if (p.min_segno != NULL_SEGNO) trace_f2fs_get_victim(sbi->sb, type, gc_type, &p, sbi->cur_victim_sec, prefree_segments(sbi), free_segments(sbi)); - } -out: mutex_unlock(&dirty_i->seglist_lock); return (p.min_segno == NULL_SEGNO) ? 0 : 1; @@ -440,7 +456,7 @@ static void add_gc_inode(struct gc_inode_list *gc_list, struct inode *inode) iput(inode); return; } - new_ie = f2fs_kmem_cache_alloc(inode_entry_slab, GFP_NOFS); + new_ie = f2fs_kmem_cache_alloc(f2fs_inode_entry_slab, GFP_NOFS); new_ie->inode = inode; f2fs_radix_tree_insert(&gc_list->iroot, inode->i_ino, new_ie); @@ -454,7 +470,7 @@ static void put_gc_inode(struct gc_inode_list *gc_list) radix_tree_delete(&gc_list->iroot, ie->inode->i_ino); iput(ie->inode); list_del(&ie->list); - kmem_cache_free(inode_entry_slab, ie); + kmem_cache_free(f2fs_inode_entry_slab, ie); } } @@ -477,65 +493,81 @@ static int check_valid_map(struct f2fs_sb_info *sbi, * On validity, copy that node with cold status, otherwise (invalid node) * ignore that. */ -static void gc_node_segment(struct f2fs_sb_info *sbi, +static int gc_node_segment(struct f2fs_sb_info *sbi, struct f2fs_summary *sum, unsigned int segno, int gc_type) { struct f2fs_summary *entry; block_t start_addr; int off; int phase = 0; + bool fggc = (gc_type == FG_GC); + int submitted = 0; start_addr = START_BLOCK(sbi, segno); next_step: entry = sum; + if (fggc && phase == 2) + atomic_inc(&sbi->wb_sync_req[NODE]); + for (off = 0; off < sbi->blocks_per_seg; off++, entry++) { nid_t nid = le32_to_cpu(entry->nid); struct page *node_page; struct node_info ni; + int err; /* stop BG_GC if there is not enough free sections. */ if (gc_type == BG_GC && has_not_enough_free_secs(sbi, 0, 0)) - return; + return submitted; if (check_valid_map(sbi, segno, off) == 0) continue; if (phase == 0) { - ra_meta_pages(sbi, NAT_BLOCK_OFFSET(nid), 1, + f2fs_ra_meta_pages(sbi, NAT_BLOCK_OFFSET(nid), 1, META_NAT, true); continue; } if (phase == 1) { - ra_node_page(sbi, nid); + f2fs_ra_node_page(sbi, nid); continue; } /* phase == 2 */ - node_page = get_node_page(sbi, nid); + node_page = f2fs_get_node_page(sbi, nid); if (IS_ERR(node_page)) continue; - /* block may become invalid during get_node_page */ + /* block may become invalid during f2fs_get_node_page */ if (check_valid_map(sbi, segno, off) == 0) { f2fs_put_page(node_page, 1); continue; } - get_node_info(sbi, nid, &ni); + if (f2fs_get_node_info(sbi, nid, &ni)) { + f2fs_put_page(node_page, 1); + continue; + } + if (ni.blk_addr != start_addr + off) { f2fs_put_page(node_page, 1); continue; } - move_node_page(node_page, gc_type); + err = f2fs_move_node_page(node_page, gc_type); + if (!err && gc_type == FG_GC) + submitted++; stat_inc_node_blk_count(sbi, 1, gc_type); } if (++phase < 3) goto next_step; + + if (fggc) + atomic_dec(&sbi->wb_sync_req[NODE]); + return submitted; } /* @@ -545,7 +577,7 @@ next_step: * as indirect or double indirect node blocks, are given, it must be a caller's * bug. */ -block_t start_bidx_of_node(unsigned int node_ofs, struct inode *inode) +block_t f2fs_start_bidx_of_node(unsigned int node_ofs, struct inode *inode) { unsigned int indirect_blks = 2 * NIDS_PER_BLOCK + 4; unsigned int bidx; @@ -562,7 +594,7 @@ block_t start_bidx_of_node(unsigned int node_ofs, struct inode *inode) int dec = (node_ofs - indirect_blks - 3) / (NIDS_PER_BLOCK + 1); bidx = node_ofs - 5 - dec; } - return bidx * ADDRS_PER_BLOCK + ADDRS_PER_INODE(inode); + return bidx * ADDRS_PER_BLOCK(inode) + ADDRS_PER_INODE(inode); } static bool is_alive(struct f2fs_sb_info *sbi, struct f2fs_summary *sum, @@ -576,16 +608,18 @@ static bool is_alive(struct f2fs_sb_info *sbi, struct f2fs_summary *sum, nid = le32_to_cpu(sum->nid); ofs_in_node = le16_to_cpu(sum->ofs_in_node); - node_page = get_node_page(sbi, nid); + node_page = f2fs_get_node_page(sbi, nid); if (IS_ERR(node_page)) return false; - get_node_info(sbi, nid, dni); + if (f2fs_get_node_info(sbi, nid, dni)) { + f2fs_put_page(node_page, 1); + return false; + } if (sum->version != dni->version) { - f2fs_msg(sbi->sb, KERN_WARNING, - "%s: valid data with mismatched node version.", - __func__); + f2fs_warn(sbi, "%s: valid data with mismatched node version.", + __func__); set_sbi_flag(sbi, SBI_NEED_FSCK); } @@ -598,12 +632,95 @@ static bool is_alive(struct f2fs_sb_info *sbi, struct f2fs_summary *sum, return true; } +static int ra_data_block(struct inode *inode, pgoff_t index) +{ + struct f2fs_sb_info *sbi = F2FS_I_SB(inode); + struct address_space *mapping = inode->i_mapping; + struct dnode_of_data dn; + struct page *page; + struct extent_info ei = {0, 0, 0}; + struct f2fs_io_info fio = { + .sbi = sbi, + .ino = inode->i_ino, + .type = DATA, + .temp = COLD, + .op = REQ_OP_READ, + .op_flags = 0, + .encrypted_page = NULL, + .in_list = false, + .retry = false, + }; + int err; + + page = f2fs_grab_cache_page(mapping, index, true); + if (!page) + return -ENOMEM; + + if (f2fs_lookup_extent_cache(inode, index, &ei)) { + dn.data_blkaddr = ei.blk + index - ei.fofs; + if (unlikely(!f2fs_is_valid_blkaddr(sbi, dn.data_blkaddr, + DATA_GENERIC_ENHANCE_READ))) { + err = -EFSCORRUPTED; + goto put_page; + } + goto got_it; + } + + set_new_dnode(&dn, inode, NULL, NULL, 0); + err = f2fs_get_dnode_of_data(&dn, index, LOOKUP_NODE); + if (err) + goto put_page; + f2fs_put_dnode(&dn); + + if (!__is_valid_data_blkaddr(dn.data_blkaddr)) { + err = -ENOENT; + goto put_page; + } + if (unlikely(!f2fs_is_valid_blkaddr(sbi, dn.data_blkaddr, + DATA_GENERIC_ENHANCE))) { + err = -EFSCORRUPTED; + goto put_page; + } +got_it: + /* read page */ + fio.page = page; + fio.new_blkaddr = fio.old_blkaddr = dn.data_blkaddr; + + /* + * don't cache encrypted data into meta inode until previous dirty + * data were writebacked to avoid racing between GC and flush. + */ + f2fs_wait_on_page_writeback(page, DATA, true, true); + + f2fs_wait_on_block_writeback(inode, dn.data_blkaddr); + + fio.encrypted_page = f2fs_pagecache_get_page(META_MAPPING(sbi), + dn.data_blkaddr, + FGP_LOCK | FGP_CREAT, GFP_NOFS); + if (!fio.encrypted_page) { + err = -ENOMEM; + goto put_page; + } + + err = f2fs_submit_page_bio(&fio); + if (err) + goto put_encrypted_page; + f2fs_put_page(fio.encrypted_page, 0); + f2fs_put_page(page, 1); + return 0; +put_encrypted_page: + f2fs_put_page(fio.encrypted_page, 1); +put_page: + f2fs_put_page(page, 1); + return err; +} + /* * Move data block via META_MAPPING while keeping locked data page. * This can be used to move blocks, aka LBAs, directly on disk. */ -static void move_data_block(struct inode *inode, block_t bidx, - unsigned int segno, int off) +static int move_data_block(struct inode *inode, block_t bidx, + int gc_type, unsigned int segno, int off) { struct f2fs_io_info fio = { .sbi = F2FS_I_SB(inode), @@ -614,37 +731,47 @@ static void move_data_block(struct inode *inode, block_t bidx, .op_flags = REQ_SYNC, .encrypted_page = NULL, .in_list = false, + .retry = false, }; struct dnode_of_data dn; struct f2fs_summary sum; struct node_info ni; - struct page *page; + struct page *page, *mpage; block_t newaddr; - int err; + int err = 0; + bool lfs_mode = test_opt(fio.sbi, LFS); /* do not read out */ page = f2fs_grab_cache_page(inode->i_mapping, bidx, false); if (!page) - return; + return -ENOMEM; - if (!check_valid_map(F2FS_I_SB(inode), segno, off)) + if (!check_valid_map(F2FS_I_SB(inode), segno, off)) { + err = -ENOENT; goto out; + } - if (f2fs_is_atomic_file(inode)) + if (f2fs_is_atomic_file(inode)) { + F2FS_I(inode)->i_gc_failures[GC_FAILURE_ATOMIC]++; + F2FS_I_SB(inode)->skipped_atomic_files[gc_type]++; + err = -EAGAIN; goto out; + } if (f2fs_is_pinned_file(inode)) { f2fs_pin_file_control(inode, true); + err = -EAGAIN; goto out; } set_new_dnode(&dn, inode, NULL, NULL, 0); - err = get_dnode_of_data(&dn, bidx, LOOKUP_NODE); + err = f2fs_get_dnode_of_data(&dn, bidx, LOOKUP_NODE); if (err) goto out; if (unlikely(dn.data_blkaddr == NULL_ADDR)) { ClearPageUptodate(page); + err = -ENOENT; goto put_out; } @@ -652,43 +779,66 @@ static void move_data_block(struct inode *inode, block_t bidx, * don't cache encrypted data into meta inode until previous dirty * data were writebacked to avoid racing between GC and flush. */ - f2fs_wait_on_page_writeback(page, DATA, true); + f2fs_wait_on_page_writeback(page, DATA, true, true); + + f2fs_wait_on_block_writeback(inode, dn.data_blkaddr); + + err = f2fs_get_node_info(fio.sbi, dn.nid, &ni); + if (err) + goto put_out; - get_node_info(fio.sbi, dn.nid, &ni); set_summary(&sum, dn.nid, dn.ofs_in_node, ni.version); /* read page */ fio.page = page; fio.new_blkaddr = fio.old_blkaddr = dn.data_blkaddr; - allocate_data_block(fio.sbi, NULL, fio.old_blkaddr, &newaddr, + if (lfs_mode) + down_write(&fio.sbi->io_order_lock); + + mpage = f2fs_grab_cache_page(META_MAPPING(fio.sbi), + fio.old_blkaddr, false); + if (!mpage) + goto up_out; + + fio.encrypted_page = mpage; + + /* read source block in mpage */ + if (!PageUptodate(mpage)) { + err = f2fs_submit_page_bio(&fio); + if (err) { + f2fs_put_page(mpage, 1); + goto up_out; + } + lock_page(mpage); + if (unlikely(mpage->mapping != META_MAPPING(fio.sbi) || + !PageUptodate(mpage))) { + err = -EIO; + f2fs_put_page(mpage, 1); + goto up_out; + } + } + + f2fs_allocate_data_block(fio.sbi, NULL, fio.old_blkaddr, &newaddr, &sum, CURSEG_COLD_DATA, NULL, false); fio.encrypted_page = f2fs_pagecache_get_page(META_MAPPING(fio.sbi), newaddr, FGP_LOCK | FGP_CREAT, GFP_NOFS); if (!fio.encrypted_page) { err = -ENOMEM; + f2fs_put_page(mpage, 1); goto recover_block; } - err = f2fs_submit_page_bio(&fio); - if (err) - goto put_page_out; - - /* write page */ - lock_page(fio.encrypted_page); - - if (unlikely(fio.encrypted_page->mapping != META_MAPPING(fio.sbi))) { - err = -EIO; - goto put_page_out; - } - if (unlikely(!PageUptodate(fio.encrypted_page))) { - err = -EIO; - goto put_page_out; - } + /* write target block */ + f2fs_wait_on_page_writeback(fio.encrypted_page, DATA, true, true); + memcpy(page_address(fio.encrypted_page), + page_address(mpage), PAGE_SIZE); + f2fs_put_page(mpage, 1); + invalidate_mapping_pages(META_MAPPING(fio.sbi), + fio.old_blkaddr, fio.old_blkaddr); set_page_dirty(fio.encrypted_page); - f2fs_wait_on_page_writeback(fio.encrypted_page, DATA, true); if (clear_page_dirty_for_io(fio.encrypted_page)) dec_page_count(fio.sbi, F2FS_DIRTY_META); @@ -696,13 +846,14 @@ static void move_data_block(struct inode *inode, block_t bidx, ClearPageError(page); /* allocate block address */ - f2fs_wait_on_page_writeback(dn.node_page, NODE, true); + f2fs_wait_on_page_writeback(dn.node_page, NODE, true, true); fio.op = REQ_OP_WRITE; fio.op_flags = REQ_SYNC | REQ_NOIDLE; fio.new_blkaddr = newaddr; - err = f2fs_submit_page_write(&fio); - if (err) { + f2fs_submit_page_write(&fio); + if (fio.retry) { + err = -EAGAIN; if (PageWriteback(fio.encrypted_page)) end_page_writeback(fio.encrypted_page); goto put_page_out; @@ -718,37 +869,51 @@ put_page_out: f2fs_put_page(fio.encrypted_page, 1); recover_block: if (err) - __f2fs_replace_block(fio.sbi, &sum, newaddr, fio.old_blkaddr, + f2fs_do_replace_block(fio.sbi, &sum, newaddr, fio.old_blkaddr, true, true); +up_out: + if (lfs_mode) + up_write(&fio.sbi->io_order_lock); put_out: f2fs_put_dnode(&dn); out: f2fs_put_page(page, 1); + return err; } -static void move_data_page(struct inode *inode, block_t bidx, int gc_type, +static int move_data_page(struct inode *inode, block_t bidx, int gc_type, unsigned int segno, int off) { struct page *page; + int err = 0; - page = get_lock_data_page(inode, bidx, true); + page = f2fs_get_lock_data_page(inode, bidx, true); if (IS_ERR(page)) - return; + return PTR_ERR(page); - if (!check_valid_map(F2FS_I_SB(inode), segno, off)) + if (!check_valid_map(F2FS_I_SB(inode), segno, off)) { + err = -ENOENT; goto out; + } - if (f2fs_is_atomic_file(inode)) + if (f2fs_is_atomic_file(inode)) { + F2FS_I(inode)->i_gc_failures[GC_FAILURE_ATOMIC]++; + F2FS_I_SB(inode)->skipped_atomic_files[gc_type]++; + err = -EAGAIN; goto out; + } if (f2fs_is_pinned_file(inode)) { if (gc_type == FG_GC) f2fs_pin_file_control(inode, true); + err = -EAGAIN; goto out; } if (gc_type == BG_GC) { - if (PageWriteback(page)) + if (PageWriteback(page)) { + err = -EAGAIN; goto out; + } set_page_dirty(page); set_cold_data(page); } else { @@ -766,26 +931,32 @@ static void move_data_page(struct inode *inode, block_t bidx, int gc_type, .io_type = FS_GC_DATA_IO, }; bool is_dirty = PageDirty(page); - int err; retry: + f2fs_wait_on_page_writeback(page, DATA, true, true); + set_page_dirty(page); - f2fs_wait_on_page_writeback(page, DATA, true); if (clear_page_dirty_for_io(page)) { inode_dec_dirty_pages(inode); - remove_dirty_inode(inode); + f2fs_remove_dirty_inode(inode); } set_cold_data(page); - err = do_write_data_page(&fio); - if (err == -ENOMEM && is_dirty) { - congestion_wait(BLK_RW_ASYNC, HZ/50); - goto retry; + err = f2fs_do_write_data_page(&fio); + if (err) { + clear_cold_data(page); + if (err == -ENOMEM) { + congestion_wait(BLK_RW_ASYNC, HZ/50); + goto retry; + } + if (is_dirty) + set_page_dirty(page); } } out: f2fs_put_page(page, 1); + return err; } /* @@ -795,7 +966,7 @@ out: * If the parent node is not valid or the data block address is different, * the victim data block is ignored. */ -static void gc_data_segment(struct f2fs_sb_info *sbi, struct f2fs_summary *sum, +static int gc_data_segment(struct f2fs_sb_info *sbi, struct f2fs_summary *sum, struct gc_inode_list *gc_list, unsigned int segno, int gc_type) { struct super_block *sb = sbi->sb; @@ -803,6 +974,7 @@ static void gc_data_segment(struct f2fs_sb_info *sbi, struct f2fs_summary *sum, block_t start_addr; int off; int phase = 0; + int submitted = 0; start_addr = START_BLOCK(sbi, segno); @@ -819,19 +991,19 @@ next_step: /* stop BG_GC if there is not enough free sections. */ if (gc_type == BG_GC && has_not_enough_free_secs(sbi, 0, 0)) - return; + return submitted; if (check_valid_map(sbi, segno, off) == 0) continue; if (phase == 0) { - ra_meta_pages(sbi, NAT_BLOCK_OFFSET(nid), 1, + f2fs_ra_meta_pages(sbi, NAT_BLOCK_OFFSET(nid), 1, META_NAT, true); continue; } if (phase == 1) { - ra_node_page(sbi, nid); + f2fs_ra_node_page(sbi, nid); continue; } @@ -840,7 +1012,7 @@ next_step: continue; if (phase == 2) { - ra_node_page(sbi, dni.ino); + f2fs_ra_node_page(sbi, dni.ino); continue; } @@ -851,23 +1023,31 @@ next_step: if (IS_ERR(inode) || is_bad_inode(inode)) continue; - /* if inode uses special I/O path, let's go phase 3 */ - if (f2fs_post_read_required(inode)) { - add_gc_inode(gc_list, inode); + if (!down_write_trylock( + &F2FS_I(inode)->i_gc_rwsem[WRITE])) { + iput(inode); + sbi->skipped_gc_rwsem++; continue; } - if (!down_write_trylock( - &F2FS_I(inode)->dio_rwsem[WRITE])) { - iput(inode); + start_bidx = f2fs_start_bidx_of_node(nofs, inode) + + ofs_in_node; + + if (f2fs_post_read_required(inode)) { + int err = ra_data_block(inode, start_bidx); + + up_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]); + if (err) { + iput(inode); + continue; + } + add_gc_inode(gc_list, inode); continue; } - start_bidx = start_bidx_of_node(nofs, inode); - data_page = get_read_data_page(inode, - start_bidx + ofs_in_node, REQ_RAHEAD, - true); - up_write(&F2FS_I(inode)->dio_rwsem[WRITE]); + data_page = f2fs_get_read_data_page(inode, + start_bidx, REQ_RAHEAD, true); + up_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]); if (IS_ERR(data_page)) { iput(inode); continue; @@ -883,13 +1063,15 @@ next_step: if (inode) { struct f2fs_inode_info *fi = F2FS_I(inode); bool locked = false; + int err; if (S_ISREG(inode->i_mode)) { - if (!down_write_trylock(&fi->dio_rwsem[READ])) + if (!down_write_trylock(&fi->i_gc_rwsem[READ])) continue; if (!down_write_trylock( - &fi->dio_rwsem[WRITE])) { - up_write(&fi->dio_rwsem[READ]); + &fi->i_gc_rwsem[WRITE])) { + sbi->skipped_gc_rwsem++; + up_write(&fi->i_gc_rwsem[READ]); continue; } locked = true; @@ -898,17 +1080,22 @@ next_step: inode_dio_wait(inode); } - start_bidx = start_bidx_of_node(nofs, inode) + start_bidx = f2fs_start_bidx_of_node(nofs, inode) + ofs_in_node; if (f2fs_post_read_required(inode)) - move_data_block(inode, start_bidx, segno, off); + err = move_data_block(inode, start_bidx, + gc_type, segno, off); else - move_data_page(inode, start_bidx, gc_type, + err = move_data_page(inode, start_bidx, gc_type, segno, off); + if (!err && (gc_type == FG_GC || + f2fs_post_read_required(inode))) + submitted++; + if (locked) { - up_write(&fi->dio_rwsem[WRITE]); - up_write(&fi->dio_rwsem[READ]); + up_write(&fi->i_gc_rwsem[WRITE]); + up_write(&fi->i_gc_rwsem[READ]); } stat_inc_data_blk_count(sbi, 1, gc_type); @@ -917,6 +1104,8 @@ next_step: if (++phase < 5) goto next_step; + + return submitted; } static int __get_victim(struct f2fs_sb_info *sbi, unsigned int *victim, @@ -941,18 +1130,34 @@ static int do_garbage_collect(struct f2fs_sb_info *sbi, struct blk_plug plug; unsigned int segno = start_segno; unsigned int end_segno = start_segno + sbi->segs_per_sec; - int seg_freed = 0; + int seg_freed = 0, migrated = 0; unsigned char type = IS_DATASEG(get_seg_entry(sbi, segno)->type) ? SUM_TYPE_DATA : SUM_TYPE_NODE; + int submitted = 0; + + if (__is_large_section(sbi)) + end_segno = rounddown(end_segno, sbi->segs_per_sec); /* readahead multi ssa blocks those have contiguous address */ - if (sbi->segs_per_sec > 1) - ra_meta_pages(sbi, GET_SUM_BLOCK(sbi, segno), - sbi->segs_per_sec, META_SSA, true); + if (__is_large_section(sbi)) + f2fs_ra_meta_pages(sbi, GET_SUM_BLOCK(sbi, segno), + end_segno - segno, META_SSA, true); /* reference all summary page */ while (segno < end_segno) { - sum_page = get_sum_page(sbi, segno++); + sum_page = f2fs_get_sum_page(sbi, segno++); + if (IS_ERR(sum_page)) { + int err = PTR_ERR(sum_page); + + end_segno = segno - 1; + for (segno = start_segno; segno < end_segno; segno++) { + sum_page = find_get_page(META_MAPPING(sbi), + GET_SUM_BLOCK(sbi, segno)); + f2fs_put_page(sum_page, 0); + f2fs_put_page(sum_page, 0); + } + return err; + } unlock_page(sum_page); } @@ -965,13 +1170,22 @@ static int do_garbage_collect(struct f2fs_sb_info *sbi, GET_SUM_BLOCK(sbi, segno)); f2fs_put_page(sum_page, 0); - if (get_valid_blocks(sbi, segno, false) == 0 || - !PageUptodate(sum_page) || - unlikely(f2fs_cp_error(sbi))) - goto next; + if (get_valid_blocks(sbi, segno, false) == 0) + goto freed; + if (__is_large_section(sbi) && + migrated >= sbi->migration_granularity) + goto skip; + if (!PageUptodate(sum_page) || unlikely(f2fs_cp_error(sbi))) + goto skip; sum = page_address(sum_page); - f2fs_bug_on(sbi, type != GET_SUM_TYPE((&sum->footer))); + if (type != GET_SUM_TYPE((&sum->footer))) { + f2fs_err(sbi, "Inconsistent segment (%u) type [%d, %d] in SSA and SIT", + segno, type, GET_SUM_TYPE((&sum->footer))); + set_sbi_flag(sbi, SBI_NEED_FSCK); + f2fs_stop_checkpoint(sbi, false); + goto skip; + } /* * this is to avoid deadlock: @@ -981,21 +1195,27 @@ static int do_garbage_collect(struct f2fs_sb_info *sbi, * - lock_page(sum_page) */ if (type == SUM_TYPE_NODE) - gc_node_segment(sbi, sum->entries, segno, gc_type); - else - gc_data_segment(sbi, sum->entries, gc_list, segno, + submitted += gc_node_segment(sbi, sum->entries, segno, gc_type); + else + submitted += gc_data_segment(sbi, sum->entries, gc_list, + segno, gc_type); stat_inc_seg_count(sbi, type, gc_type); +freed: if (gc_type == FG_GC && get_valid_blocks(sbi, segno, false) == 0) seg_freed++; -next: + migrated++; + + if (__is_large_section(sbi) && segno + 1 < end_segno) + sbi->next_victim_seg[gc_type] = segno + 1; +skip: f2fs_put_page(sum_page, 0); } - if (gc_type == FG_GC) + if (submitted) f2fs_submit_merged_write(sbi, (type == SUM_TYPE_NODE) ? NODE : DATA); @@ -1018,6 +1238,9 @@ int f2fs_gc(struct f2fs_sb_info *sbi, bool sync, .ilist = LIST_HEAD_INIT(gc_list.ilist), .iroot = RADIX_TREE_INIT(GFP_NOFS), }; + unsigned long long last_skipped = sbi->skipped_atomic_files[FG_GC]; + unsigned long long first_skipped; + unsigned int skipped_round = 0, round = 0; trace_f2fs_gc_begin(sbi->sb, sync, background, get_pages(sbi, F2FS_DIRTY_NODES), @@ -1029,6 +1252,8 @@ int f2fs_gc(struct f2fs_sb_info *sbi, bool sync, prefree_segments(sbi)); cpc.reason = __get_cp_reason(sbi); + sbi->skipped_gc_rwsem = 0; + first_skipped = last_skipped; gc_more: if (unlikely(!(sbi->sb->s_flags & MS_ACTIVE))) { ret = -EINVAL; @@ -1045,8 +1270,9 @@ gc_more: * threshold, we can make them free by checkpoint. Then, we * secure free segments which doesn't need fggc any more. */ - if (prefree_segments(sbi)) { - ret = write_checkpoint(sbi, &cpc); + if (prefree_segments(sbi) && + !is_sbi_flag_set(sbi, SBI_CP_DISABLED)) { + ret = f2fs_write_checkpoint(sbi, &cpc); if (ret) goto stop; } @@ -1069,17 +1295,36 @@ gc_more: sec_freed++; total_freed += seg_freed; + if (gc_type == FG_GC) { + if (sbi->skipped_atomic_files[FG_GC] > last_skipped || + sbi->skipped_gc_rwsem) + skipped_round++; + last_skipped = sbi->skipped_atomic_files[FG_GC]; + round++; + } + if (gc_type == FG_GC) sbi->cur_victim_sec = NULL_SEGNO; - if (!sync) { - if (has_not_enough_free_secs(sbi, sec_freed, 0)) { + if (sync) + goto stop; + + if (has_not_enough_free_secs(sbi, sec_freed, 0)) { + if (skipped_round <= MAX_SKIP_GC_COUNT || + skipped_round * 2 < round) { segno = NULL_SEGNO; goto gc_more; } - if (gc_type == FG_GC) - ret = write_checkpoint(sbi, &cpc); + if (first_skipped < last_skipped && + (last_skipped - first_skipped) > + sbi->skipped_gc_rwsem) { + f2fs_drop_inmem_pages_all(sbi, true); + segno = NULL_SEGNO; + goto gc_more; + } + if (gc_type == FG_GC && !is_sbi_flag_set(sbi, SBI_CP_DISABLED)) + ret = f2fs_write_checkpoint(sbi, &cpc); } stop: SIT_I(sbi)->last_victim[ALLOC_NEXT] = 0; @@ -1103,23 +1348,187 @@ stop: return ret; } -void build_gc_manager(struct f2fs_sb_info *sbi) +void f2fs_build_gc_manager(struct f2fs_sb_info *sbi) { - u64 main_count, resv_count, ovp_count; - DIRTY_I(sbi)->v_ops = &default_v_ops; - /* threshold of # of valid blocks in a section for victims of FG_GC */ - main_count = SM_I(sbi)->main_segments << sbi->log_blocks_per_seg; - resv_count = SM_I(sbi)->reserved_segments << sbi->log_blocks_per_seg; - ovp_count = SM_I(sbi)->ovp_segments << sbi->log_blocks_per_seg; - - sbi->fggc_threshold = div64_u64((main_count - ovp_count) * - BLKS_PER_SEC(sbi), (main_count - resv_count)); sbi->gc_pin_file_threshold = DEF_GC_FAILED_PINNED_FILES; /* give warm/cold data area from slower device */ - if (sbi->s_ndevs && sbi->segs_per_sec == 1) + if (f2fs_is_multi_device(sbi) && !__is_large_section(sbi)) SIT_I(sbi)->last_victim[ALLOC_NEXT] = GET_SEGNO(sbi, FDEV(0).end_blk) + 1; } + +static int free_segment_range(struct f2fs_sb_info *sbi, unsigned int start, + unsigned int end) +{ + int type; + unsigned int segno, next_inuse; + int err = 0; + + /* Move out cursegs from the target range */ + for (type = CURSEG_HOT_DATA; type < NR_CURSEG_TYPE; type++) + allocate_segment_for_resize(sbi, type, start, end); + + /* do GC to move out valid blocks in the range */ + for (segno = start; segno <= end; segno += sbi->segs_per_sec) { + struct gc_inode_list gc_list = { + .ilist = LIST_HEAD_INIT(gc_list.ilist), + .iroot = RADIX_TREE_INIT(GFP_NOFS), + }; + + mutex_lock(&sbi->gc_mutex); + do_garbage_collect(sbi, segno, &gc_list, FG_GC); + mutex_unlock(&sbi->gc_mutex); + put_gc_inode(&gc_list); + + if (get_valid_blocks(sbi, segno, true)) + return -EAGAIN; + } + + err = f2fs_sync_fs(sbi->sb, 1); + if (err) + return err; + + next_inuse = find_next_inuse(FREE_I(sbi), end + 1, start); + if (next_inuse <= end) { + f2fs_err(sbi, "segno %u should be free but still inuse!", + next_inuse); + f2fs_bug_on(sbi, 1); + } + return err; +} + +static void update_sb_metadata(struct f2fs_sb_info *sbi, int secs) +{ + struct f2fs_super_block *raw_sb = F2FS_RAW_SUPER(sbi); + int section_count = le32_to_cpu(raw_sb->section_count); + int segment_count = le32_to_cpu(raw_sb->segment_count); + int segment_count_main = le32_to_cpu(raw_sb->segment_count_main); + long long block_count = le64_to_cpu(raw_sb->block_count); + int segs = secs * sbi->segs_per_sec; + + raw_sb->section_count = cpu_to_le32(section_count + secs); + raw_sb->segment_count = cpu_to_le32(segment_count + segs); + raw_sb->segment_count_main = cpu_to_le32(segment_count_main + segs); + raw_sb->block_count = cpu_to_le64(block_count + + (long long)segs * sbi->blocks_per_seg); +} + +static void update_fs_metadata(struct f2fs_sb_info *sbi, int secs) +{ + int segs = secs * sbi->segs_per_sec; + long long user_block_count = + le64_to_cpu(F2FS_CKPT(sbi)->user_block_count); + + SM_I(sbi)->segment_count = (int)SM_I(sbi)->segment_count + segs; + MAIN_SEGS(sbi) = (int)MAIN_SEGS(sbi) + segs; + FREE_I(sbi)->free_sections = (int)FREE_I(sbi)->free_sections + secs; + FREE_I(sbi)->free_segments = (int)FREE_I(sbi)->free_segments + segs; + F2FS_CKPT(sbi)->user_block_count = cpu_to_le64(user_block_count + + (long long)segs * sbi->blocks_per_seg); +} + +int f2fs_resize_fs(struct f2fs_sb_info *sbi, __u64 block_count) +{ + __u64 old_block_count, shrunk_blocks; + unsigned int secs; + int gc_mode, gc_type; + int err = 0; + __u32 rem; + + old_block_count = le64_to_cpu(F2FS_RAW_SUPER(sbi)->block_count); + if (block_count > old_block_count) + return -EINVAL; + + /* new fs size should align to section size */ + div_u64_rem(block_count, BLKS_PER_SEC(sbi), &rem); + if (rem) + return -EINVAL; + + if (block_count == old_block_count) + return 0; + + if (is_sbi_flag_set(sbi, SBI_NEED_FSCK)) { + f2fs_err(sbi, "Should run fsck to repair first."); + return -EFSCORRUPTED; + } + + if (test_opt(sbi, DISABLE_CHECKPOINT)) { + f2fs_err(sbi, "Checkpoint should be enabled."); + return -EINVAL; + } + + freeze_bdev(sbi->sb->s_bdev); + + shrunk_blocks = old_block_count - block_count; + secs = div_u64(shrunk_blocks, BLKS_PER_SEC(sbi)); + spin_lock(&sbi->stat_lock); + if (shrunk_blocks + valid_user_blocks(sbi) + + sbi->current_reserved_blocks + sbi->unusable_block_count + + F2FS_OPTION(sbi).root_reserved_blocks > sbi->user_block_count) + err = -ENOSPC; + else + sbi->user_block_count -= shrunk_blocks; + spin_unlock(&sbi->stat_lock); + if (err) { + thaw_bdev(sbi->sb->s_bdev, sbi->sb); + return err; + } + + mutex_lock(&sbi->resize_mutex); + set_sbi_flag(sbi, SBI_IS_RESIZEFS); + + mutex_lock(&DIRTY_I(sbi)->seglist_lock); + + MAIN_SECS(sbi) -= secs; + + for (gc_mode = 0; gc_mode < MAX_GC_POLICY; gc_mode++) + if (SIT_I(sbi)->last_victim[gc_mode] >= + MAIN_SECS(sbi) * sbi->segs_per_sec) + SIT_I(sbi)->last_victim[gc_mode] = 0; + + for (gc_type = BG_GC; gc_type <= FG_GC; gc_type++) + if (sbi->next_victim_seg[gc_type] >= + MAIN_SECS(sbi) * sbi->segs_per_sec) + sbi->next_victim_seg[gc_type] = NULL_SEGNO; + + mutex_unlock(&DIRTY_I(sbi)->seglist_lock); + + err = free_segment_range(sbi, MAIN_SECS(sbi) * sbi->segs_per_sec, + MAIN_SEGS(sbi) - 1); + if (err) + goto out; + + update_sb_metadata(sbi, -secs); + + err = f2fs_commit_super(sbi, false); + if (err) { + update_sb_metadata(sbi, secs); + goto out; + } + + update_fs_metadata(sbi, -secs); + clear_sbi_flag(sbi, SBI_IS_RESIZEFS); + err = f2fs_sync_fs(sbi->sb, 1); + if (err) { + update_fs_metadata(sbi, secs); + update_sb_metadata(sbi, secs); + f2fs_commit_super(sbi, false); + } +out: + if (err) { + set_sbi_flag(sbi, SBI_NEED_FSCK); + f2fs_err(sbi, "resize_fs failed, should run fsck to repair!"); + + MAIN_SECS(sbi) += secs; + spin_lock(&sbi->stat_lock); + sbi->user_block_count += shrunk_blocks; + spin_unlock(&sbi->stat_lock); + } + clear_sbi_flag(sbi, SBI_IS_RESIZEFS); + mutex_unlock(&sbi->resize_mutex); + thaw_bdev(sbi->sb->s_bdev, sbi->sb); + return err; +} diff --git a/fs/f2fs/gc.h b/fs/f2fs/gc.h index b0045d4c8d1e..bbac9d3787bd 100644 --- a/fs/f2fs/gc.h +++ b/fs/f2fs/gc.h @@ -1,12 +1,9 @@ +// SPDX-License-Identifier: GPL-2.0 /* * fs/f2fs/gc.h * * Copyright (c) 2012 Samsung Electronics Co., Ltd. * http://www.samsung.com/ - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. */ #define GC_THREAD_MIN_WB_PAGES 1 /* * a threshold to determine @@ -36,8 +33,6 @@ struct f2fs_gc_kthread { unsigned int no_gc_sleep_time; /* for changing gc mode */ - unsigned int gc_idle; - unsigned int gc_urgent; unsigned int gc_wake; }; diff --git a/fs/f2fs/hash.c b/fs/f2fs/hash.c index eb2e031ea887..cc82f142f811 100644 --- a/fs/f2fs/hash.c +++ b/fs/f2fs/hash.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0 /* * fs/f2fs/hash.c * @@ -7,10 +8,6 @@ * Portions of this code from linux/fs/ext3/hash.c * * Copyright (C) 2002 by Theodore Ts'o - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. */ #include <linux/types.h> #include <linux/fs.h> diff --git a/fs/f2fs/inline.c b/fs/f2fs/inline.c index ac951ee9b20b..aef822b1fa9d 100644 --- a/fs/f2fs/inline.c +++ b/fs/f2fs/inline.c @@ -1,11 +1,9 @@ +// SPDX-License-Identifier: GPL-2.0 /* * fs/f2fs/inline.c * Copyright (c) 2013, Intel Corporation * Authors: Huajun Li <huajun.li@intel.com> * Haicheng Li <haicheng.li@intel.com> - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. */ #include <linux/fs.h> @@ -43,7 +41,7 @@ bool f2fs_may_inline_dentry(struct inode *inode) return true; } -void read_inline_data(struct page *page, struct page *ipage) +void f2fs_do_read_inline_data(struct page *page, struct page *ipage) { struct inode *inode = page->mapping->host; void *src_addr, *dst_addr; @@ -65,7 +63,8 @@ void read_inline_data(struct page *page, struct page *ipage) SetPageUptodate(page); } -void truncate_inline_inode(struct inode *inode, struct page *ipage, u64 from) +void f2fs_truncate_inline_inode(struct inode *inode, + struct page *ipage, u64 from) { void *addr; @@ -74,7 +73,7 @@ void truncate_inline_inode(struct inode *inode, struct page *ipage, u64 from) addr = inline_data_addr(inode, ipage); - f2fs_wait_on_page_writeback(ipage, NODE, true); + f2fs_wait_on_page_writeback(ipage, NODE, true, true); memset(addr + from, 0, MAX_INLINE_DATA(inode) - from); set_page_dirty(ipage); @@ -97,7 +96,7 @@ int f2fs_read_inline_data(struct inode *inode, struct page *page) path, current->comm); } - ipage = get_node_page(F2FS_I_SB(inode), inode->i_ino); + ipage = f2fs_get_node_page(F2FS_I_SB(inode), inode->i_ino); if (IS_ERR(ipage)) { trace_android_fs_dataread_end(inode, page_offset(page), PAGE_SIZE); @@ -115,7 +114,7 @@ int f2fs_read_inline_data(struct inode *inode, struct page *page) if (page->index) zero_user_segment(page, 0, PAGE_SIZE); else - read_inline_data(page, ipage); + f2fs_do_read_inline_data(page, ipage); if (!PageUptodate(page)) SetPageUptodate(page); @@ -138,6 +137,7 @@ int f2fs_convert_inline_page(struct dnode_of_data *dn, struct page *page) .encrypted_page = NULL, .io_type = FS_DATA_IO, }; + struct node_info ni; int dirty, err; if (!f2fs_exist_data(dn->inode)) @@ -147,9 +147,25 @@ int f2fs_convert_inline_page(struct dnode_of_data *dn, struct page *page) if (err) return err; + err = f2fs_get_node_info(fio.sbi, dn->nid, &ni); + if (err) { + f2fs_put_dnode(dn); + return err; + } + + fio.version = ni.version; + + if (unlikely(dn->data_blkaddr != NEW_ADDR)) { + f2fs_put_dnode(dn); + set_sbi_flag(fio.sbi, SBI_NEED_FSCK); + f2fs_warn(fio.sbi, "%s: corrupted inline inode ino=%lx, i_addr[0]:0x%x, run fsck to fix.", + __func__, dn->inode->i_ino, dn->data_blkaddr); + return -EFSCORRUPTED; + } + f2fs_bug_on(F2FS_P_SB(page), PageWriteback(page)); - read_inline_data(page, dn->inode_page); + f2fs_do_read_inline_data(page, dn->inode_page); set_page_dirty(page); /* clear dirty state */ @@ -160,18 +176,18 @@ int f2fs_convert_inline_page(struct dnode_of_data *dn, struct page *page) ClearPageError(page); fio.old_blkaddr = dn->data_blkaddr; set_inode_flag(dn->inode, FI_HOT_DATA); - write_data_page(dn, &fio); - f2fs_wait_on_page_writeback(page, DATA, true); + f2fs_outplace_write_data(dn, &fio); + f2fs_wait_on_page_writeback(page, DATA, true, true); if (dirty) { inode_dec_dirty_pages(dn->inode); - remove_dirty_inode(dn->inode); + f2fs_remove_dirty_inode(dn->inode); } /* this converted inline_data should be recovered. */ set_inode_flag(dn->inode, FI_APPEND_WRITE); /* clear inline data and flag after data writeback */ - truncate_inline_inode(dn->inode, dn->inode_page, 0); + f2fs_truncate_inline_inode(dn->inode, dn->inode_page, 0); clear_inline_node(dn->inode_page); clear_out: stat_dec_inline_inode(dn->inode); @@ -196,7 +212,7 @@ int f2fs_convert_inline_inode(struct inode *inode) f2fs_lock_op(sbi); - ipage = get_node_page(sbi, inode->i_ino); + ipage = f2fs_get_node_page(sbi, inode->i_ino); if (IS_ERR(ipage)) { err = PTR_ERR(ipage); goto out; @@ -222,12 +238,10 @@ int f2fs_write_inline_data(struct inode *inode, struct page *page) { void *src_addr, *dst_addr; struct dnode_of_data dn; - struct address_space *mapping = page_mapping(page); - unsigned long flags; int err; set_new_dnode(&dn, inode, NULL, NULL, 0); - err = get_dnode_of_data(&dn, 0, LOOKUP_NODE); + err = f2fs_get_dnode_of_data(&dn, 0, LOOKUP_NODE); if (err) return err; @@ -238,17 +252,14 @@ int f2fs_write_inline_data(struct inode *inode, struct page *page) f2fs_bug_on(F2FS_I_SB(inode), page->index); - f2fs_wait_on_page_writeback(dn.inode_page, NODE, true); + f2fs_wait_on_page_writeback(dn.inode_page, NODE, true, true); src_addr = kmap_atomic(page); dst_addr = inline_data_addr(inode, dn.inode_page); memcpy(dst_addr, src_addr, MAX_INLINE_DATA(inode)); kunmap_atomic(src_addr); set_page_dirty(dn.inode_page); - spin_lock_irqsave(&mapping->tree_lock, flags); - radix_tree_tag_clear(&mapping->page_tree, page_index(page), - PAGECACHE_TAG_DIRTY); - spin_unlock_irqrestore(&mapping->tree_lock, flags); + f2fs_clear_radix_tree_dirty_tag(page); set_inode_flag(inode, FI_APPEND_WRITE); set_inode_flag(inode, FI_DATA_EXIST); @@ -258,7 +269,7 @@ int f2fs_write_inline_data(struct inode *inode, struct page *page) return 0; } -bool recover_inline_data(struct inode *inode, struct page *npage) +bool f2fs_recover_inline_data(struct inode *inode, struct page *npage) { struct f2fs_sb_info *sbi = F2FS_I_SB(inode); struct f2fs_inode *ri = NULL; @@ -279,10 +290,10 @@ bool recover_inline_data(struct inode *inode, struct page *npage) if (f2fs_has_inline_data(inode) && ri && (ri->i_inline & F2FS_INLINE_DATA)) { process_inline: - ipage = get_node_page(sbi, inode->i_ino); + ipage = f2fs_get_node_page(sbi, inode->i_ino); f2fs_bug_on(sbi, IS_ERR(ipage)); - f2fs_wait_on_page_writeback(ipage, NODE, true); + f2fs_wait_on_page_writeback(ipage, NODE, true, true); src_addr = inline_data_addr(inode, npage); dst_addr = inline_data_addr(inode, ipage); @@ -297,20 +308,20 @@ process_inline: } if (f2fs_has_inline_data(inode)) { - ipage = get_node_page(sbi, inode->i_ino); + ipage = f2fs_get_node_page(sbi, inode->i_ino); f2fs_bug_on(sbi, IS_ERR(ipage)); - truncate_inline_inode(inode, ipage, 0); + f2fs_truncate_inline_inode(inode, ipage, 0); clear_inode_flag(inode, FI_INLINE_DATA); f2fs_put_page(ipage, 1); } else if (ri && (ri->i_inline & F2FS_INLINE_DATA)) { - if (truncate_blocks(inode, 0, false)) + if (f2fs_truncate_blocks(inode, 0, false)) return false; goto process_inline; } return false; } -struct f2fs_dir_entry *find_in_inline_dir(struct inode *dir, +struct f2fs_dir_entry *f2fs_find_in_inline_dir(struct inode *dir, struct fscrypt_name *fname, struct page **res_page) { struct f2fs_sb_info *sbi = F2FS_SB(dir->i_sb); @@ -321,7 +332,7 @@ struct f2fs_dir_entry *find_in_inline_dir(struct inode *dir, void *inline_dentry; f2fs_hash_t namehash; - ipage = get_node_page(sbi, dir->i_ino); + ipage = f2fs_get_node_page(sbi, dir->i_ino); if (IS_ERR(ipage)) { *res_page = ipage; return NULL; @@ -332,7 +343,7 @@ struct f2fs_dir_entry *find_in_inline_dir(struct inode *dir, inline_dentry = inline_data_addr(dir, ipage); make_dentry_ptr_inline(dir, &d, inline_dentry); - de = find_target_dentry(fname, namehash, NULL, &d); + de = f2fs_find_target_dentry(fname, namehash, NULL, &d); unlock_page(ipage); if (de) *res_page = ipage; @@ -342,7 +353,7 @@ struct f2fs_dir_entry *find_in_inline_dir(struct inode *dir, return de; } -int make_empty_inline_dir(struct inode *inode, struct inode *parent, +int f2fs_make_empty_inline_dir(struct inode *inode, struct inode *parent, struct page *ipage) { struct f2fs_dentry_ptr d; @@ -351,7 +362,7 @@ int make_empty_inline_dir(struct inode *inode, struct inode *parent, inline_dentry = inline_data_addr(inode, ipage); make_dentry_ptr_inline(inode, &d, inline_dentry); - do_make_empty_dir(inode, parent, &d); + f2fs_do_make_empty_dir(inode, parent, &d); set_page_dirty(ipage); @@ -385,8 +396,16 @@ static int f2fs_move_inline_dirents(struct inode *dir, struct page *ipage, if (err) goto out; - f2fs_wait_on_page_writeback(page, DATA, true); - zero_user_segment(page, MAX_INLINE_DATA(dir), PAGE_SIZE); + if (unlikely(dn.data_blkaddr != NEW_ADDR)) { + f2fs_put_dnode(&dn); + set_sbi_flag(F2FS_P_SB(page), SBI_NEED_FSCK); + f2fs_warn(F2FS_P_SB(page), "%s: corrupted inline inode ino=%lx, i_addr[0]:0x%x, run fsck to fix.", + __func__, dir->i_ino, dn.data_blkaddr); + err = -EFSCORRUPTED; + goto out; + } + + f2fs_wait_on_page_writeback(page, DATA, true, true); dentry_blk = page_address(page); @@ -410,11 +429,19 @@ static int f2fs_move_inline_dirents(struct inode *dir, struct page *ipage, set_page_dirty(page); /* clear inline dir and flag after data writeback */ - truncate_inline_inode(dir, ipage, 0); + f2fs_truncate_inline_inode(dir, ipage, 0); stat_dec_inline_dir(dir); clear_inode_flag(dir, FI_INLINE_DENTRY); + /* + * should retrieve reserved space which was used to keep + * inline_dentry's structure for backward compatibility. + */ + if (!f2fs_sb_has_flexible_inline_xattr(F2FS_I_SB(dir)) && + !f2fs_has_inline_xattr(dir)) + F2FS_I(dir)->i_inline_xattr_size = 0; + f2fs_i_depth_write(dir, 1); if (i_size_read(dir) < PAGE_SIZE) f2fs_i_size_write(dir, PAGE_SIZE); @@ -453,7 +480,7 @@ static int f2fs_add_inline_entries(struct inode *dir, void *inline_dentry) new_name.len = le16_to_cpu(de->name_len); ino = le32_to_cpu(de->ino); - fake_mode = get_de_type(de) << S_SHIFT; + fake_mode = f2fs_get_de_type(de) << S_SHIFT; err = f2fs_add_regular_entry(dir, &new_name, NULL, NULL, ino, fake_mode); @@ -465,8 +492,8 @@ static int f2fs_add_inline_entries(struct inode *dir, void *inline_dentry) return 0; punch_dentry_pages: truncate_inode_pages(&dir->i_data, 0); - truncate_blocks(dir, 0, false); - remove_dirty_inode(dir); + f2fs_truncate_blocks(dir, 0, false); + f2fs_remove_dirty_inode(dir); return err; } @@ -484,7 +511,7 @@ static int f2fs_move_rehashed_dirents(struct inode *dir, struct page *ipage, } memcpy(backup_dentry, inline_dentry, MAX_INLINE_DATA(dir)); - truncate_inline_inode(dir, ipage, 0); + f2fs_truncate_inline_inode(dir, ipage, 0); unlock_page(ipage); @@ -496,17 +523,27 @@ static int f2fs_move_rehashed_dirents(struct inode *dir, struct page *ipage, stat_dec_inline_dir(dir); clear_inode_flag(dir, FI_INLINE_DENTRY); - kfree(backup_dentry); + + /* + * should retrieve reserved space which was used to keep + * inline_dentry's structure for backward compatibility. + */ + if (!f2fs_sb_has_flexible_inline_xattr(F2FS_I_SB(dir)) && + !f2fs_has_inline_xattr(dir)) + F2FS_I(dir)->i_inline_xattr_size = 0; + + kvfree(backup_dentry); return 0; recover: lock_page(ipage); + f2fs_wait_on_page_writeback(ipage, NODE, true, true); memcpy(inline_dentry, backup_dentry, MAX_INLINE_DATA(dir)); f2fs_i_depth_write(dir, 0); f2fs_i_size_write(dir, MAX_INLINE_DATA(dir)); set_page_dirty(ipage); f2fs_put_page(ipage, 1); - kfree(backup_dentry); + kvfree(backup_dentry); return err; } @@ -533,14 +570,14 @@ int f2fs_add_inline_entry(struct inode *dir, const struct qstr *new_name, struct page *page = NULL; int err = 0; - ipage = get_node_page(sbi, dir->i_ino); + ipage = f2fs_get_node_page(sbi, dir->i_ino); if (IS_ERR(ipage)) return PTR_ERR(ipage); inline_dentry = inline_data_addr(dir, ipage); make_dentry_ptr_inline(dir, &d, inline_dentry); - bit_pos = room_for_filename(d.bitmap, slots, d.max); + bit_pos = f2fs_room_for_filename(d.bitmap, slots, d.max); if (bit_pos >= d.max) { err = f2fs_convert_inline_dir(dir, ipage, inline_dentry); if (err) @@ -551,7 +588,7 @@ int f2fs_add_inline_entry(struct inode *dir, const struct qstr *new_name, if (inode) { down_write(&F2FS_I(inode)->i_sem); - page = init_inode_metadata(inode, dir, new_name, + page = f2fs_init_inode_metadata(inode, dir, new_name, orig_name, ipage); if (IS_ERR(page)) { err = PTR_ERR(page); @@ -559,7 +596,7 @@ int f2fs_add_inline_entry(struct inode *dir, const struct qstr *new_name, } } - f2fs_wait_on_page_writeback(ipage, NODE, true); + f2fs_wait_on_page_writeback(ipage, NODE, true, true); name_hash = f2fs_dentry_hash(new_name, NULL); f2fs_update_dentry(ino, mode, &d, new_name, name_hash, bit_pos); @@ -572,7 +609,7 @@ int f2fs_add_inline_entry(struct inode *dir, const struct qstr *new_name, f2fs_put_page(page, 1); } - update_parent_metadata(dir, inode, 0); + f2fs_update_parent_metadata(dir, inode, 0); fail: if (inode) up_write(&F2FS_I(inode)->i_sem); @@ -591,7 +628,7 @@ void f2fs_delete_inline_entry(struct f2fs_dir_entry *dentry, struct page *page, int i; lock_page(page); - f2fs_wait_on_page_writeback(page, NODE, true); + f2fs_wait_on_page_writeback(page, NODE, true, true); inline_dentry = inline_data_addr(dir, page); make_dentry_ptr_inline(dir, &d, inline_dentry); @@ -618,7 +655,7 @@ bool f2fs_empty_inline_dir(struct inode *dir) void *inline_dentry; struct f2fs_dentry_ptr d; - ipage = get_node_page(sbi, dir->i_ino); + ipage = f2fs_get_node_page(sbi, dir->i_ino); if (IS_ERR(ipage)) return false; @@ -649,10 +686,16 @@ int f2fs_read_inline_dir(struct file *file, struct dir_context *ctx, if (ctx->pos == d.max) return 0; - ipage = get_node_page(F2FS_I_SB(inode), inode->i_ino); + ipage = f2fs_get_node_page(F2FS_I_SB(inode), inode->i_ino); if (IS_ERR(ipage)) return PTR_ERR(ipage); + /* + * f2fs_readdir was protected by inode.i_rwsem, it is safe to access + * ipage without page's lock held. + */ + unlock_page(ipage); + inline_dentry = inline_data_addr(inode, ipage); make_dentry_ptr_inline(inode, &d, inline_dentry); @@ -661,7 +704,7 @@ int f2fs_read_inline_dir(struct file *file, struct dir_context *ctx, if (!err) ctx->pos = d.max; - f2fs_put_page(ipage, 1); + f2fs_put_page(ipage, 0); return err < 0 ? err : 0; } @@ -675,7 +718,7 @@ int f2fs_inline_data_fiemap(struct inode *inode, struct page *ipage; int err = 0; - ipage = get_node_page(F2FS_I_SB(inode), inode->i_ino); + ipage = f2fs_get_node_page(F2FS_I_SB(inode), inode->i_ino); if (IS_ERR(ipage)) return PTR_ERR(ipage); @@ -691,7 +734,10 @@ int f2fs_inline_data_fiemap(struct inode *inode, ilen = start + len; ilen -= start; - get_node_info(F2FS_I_SB(inode), inode->i_ino, &ni); + err = f2fs_get_node_info(F2FS_I_SB(inode), inode->i_ino, &ni); + if (err) + goto out; + byteaddr = (__u64)ni.blk_addr << inode->i_sb->s_blocksize_bits; byteaddr += (char *)inline_data_addr(inode, ipage) - (char *)F2FS_INODE(ipage); diff --git a/fs/f2fs/inode.c b/fs/f2fs/inode.c index 51846fc54fbd..2f02199047a9 100644 --- a/fs/f2fs/inode.c +++ b/fs/f2fs/inode.c @@ -1,12 +1,9 @@ +// SPDX-License-Identifier: GPL-2.0 /* * fs/f2fs/inode.c * * Copyright (c) 2012 Samsung Electronics Co., Ltd. * http://www.samsung.com/ - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. */ #include <linux/fs.h> #include <linux/f2fs_fs.h> @@ -17,6 +14,7 @@ #include "f2fs.h" #include "node.h" #include "segment.h" +#include "xattr.h" #include <trace/events/f2fs.h> @@ -36,15 +34,15 @@ void f2fs_set_inode_flags(struct inode *inode) unsigned int flags = F2FS_I(inode)->i_flags; unsigned int new_fl = 0; - if (flags & FS_SYNC_FL) + if (flags & F2FS_SYNC_FL) new_fl |= S_SYNC; - if (flags & FS_APPEND_FL) + if (flags & F2FS_APPEND_FL) new_fl |= S_APPEND; - if (flags & FS_IMMUTABLE_FL) + if (flags & F2FS_IMMUTABLE_FL) new_fl |= S_IMMUTABLE; - if (flags & FS_NOATIME_FL) + if (flags & F2FS_NOATIME_FL) new_fl |= S_NOATIME; - if (flags & FS_DIRSYNC_FL) + if (flags & F2FS_DIRSYNC_FL) new_fl |= S_DIRSYNC; if (f2fs_encrypted_inode(inode)) new_fl |= S_ENCRYPTED; @@ -68,13 +66,16 @@ static void __get_inode_rdev(struct inode *inode, struct f2fs_inode *ri) } } -static bool __written_first_block(struct f2fs_inode *ri) +static int __written_first_block(struct f2fs_sb_info *sbi, + struct f2fs_inode *ri) { block_t addr = le32_to_cpu(ri->i_addr[offset_in_addr(ri)]); - if (addr != NEW_ADDR && addr != NULL_ADDR) - return true; - return false; + if (!__is_valid_data_blkaddr(addr)) + return 1; + if (!f2fs_is_valid_blkaddr(sbi, addr, DATA_GENERIC_ENHANCE)) + return -EFSCORRUPTED; + return 0; } static void __set_inode_rdev(struct inode *inode, struct f2fs_inode *ri) @@ -103,7 +104,7 @@ static void __recover_inline_status(struct inode *inode, struct page *ipage) while (start < end) { if (*start++) { - f2fs_wait_on_page_writeback(ipage, NODE, true); + f2fs_wait_on_page_writeback(ipage, NODE, true, true); set_inode_flag(inode, FI_DATA_EXIST); set_raw_inline(inode, F2FS_INODE(ipage)); @@ -117,15 +118,15 @@ static void __recover_inline_status(struct inode *inode, struct page *ipage) static bool f2fs_enable_inode_chksum(struct f2fs_sb_info *sbi, struct page *page) { struct f2fs_inode *ri = &F2FS_NODE(page)->i; - int extra_isize = le32_to_cpu(ri->i_extra_isize); - if (!f2fs_sb_has_inode_chksum(sbi->sb)) + if (!f2fs_sb_has_inode_chksum(sbi)) return false; - if (!RAW_IS_INODE(F2FS_NODE(page)) || !(ri->i_inline & F2FS_EXTRA_ATTR)) + if (!IS_INODE(page) || !(ri->i_inline & F2FS_EXTRA_ATTR)) return false; - if (!F2FS_FITS_IN_INODE(ri, extra_isize, i_inode_checksum)) + if (!F2FS_FITS_IN_INODE(ri, le16_to_cpu(ri->i_extra_isize), + i_inode_checksum)) return false; return true; @@ -159,8 +160,15 @@ bool f2fs_inode_chksum_verify(struct f2fs_sb_info *sbi, struct page *page) struct f2fs_inode *ri; __u32 provided, calculated; + if (unlikely(is_sbi_flag_set(sbi, SBI_IS_SHUTDOWN))) + return true; + +#ifdef CONFIG_F2FS_CHECK_FS + if (!f2fs_enable_inode_chksum(sbi, page)) +#else if (!f2fs_enable_inode_chksum(sbi, page) || PageDirty(page) || PageWriteback(page)) +#endif return true; ri = &F2FS_NODE(page)->i; @@ -168,9 +176,8 @@ bool f2fs_inode_chksum_verify(struct f2fs_sb_info *sbi, struct page *page) calculated = f2fs_inode_chksum(sbi, page); if (provided != calculated) - f2fs_msg(sbi->sb, KERN_WARNING, - "checksum invalid, ino = %x, %x vs. %x", - ino_of_node(page), provided, calculated); + f2fs_warn(sbi, "checksum invalid, nid = %lu, ino_of_node = %x, %x vs. %x", + page->index, ino_of_node(page), provided, calculated); return provided == calculated; } @@ -185,6 +192,99 @@ void f2fs_inode_chksum_set(struct f2fs_sb_info *sbi, struct page *page) ri->i_inode_checksum = cpu_to_le32(f2fs_inode_chksum(sbi, page)); } +static bool sanity_check_inode(struct inode *inode, struct page *node_page) +{ + struct f2fs_sb_info *sbi = F2FS_I_SB(inode); + struct f2fs_inode_info *fi = F2FS_I(inode); + unsigned long long iblocks; + + iblocks = le64_to_cpu(F2FS_INODE(node_page)->i_blocks); + if (!iblocks) { + set_sbi_flag(sbi, SBI_NEED_FSCK); + f2fs_warn(sbi, "%s: corrupted inode i_blocks i_ino=%lx iblocks=%llu, run fsck to fix.", + __func__, inode->i_ino, iblocks); + return false; + } + + if (ino_of_node(node_page) != nid_of_node(node_page)) { + set_sbi_flag(sbi, SBI_NEED_FSCK); + f2fs_warn(sbi, "%s: corrupted inode footer i_ino=%lx, ino,nid: [%u, %u] run fsck to fix.", + __func__, inode->i_ino, + ino_of_node(node_page), nid_of_node(node_page)); + return false; + } + + if (f2fs_sb_has_flexible_inline_xattr(sbi) + && !f2fs_has_extra_attr(inode)) { + set_sbi_flag(sbi, SBI_NEED_FSCK); + f2fs_warn(sbi, "%s: corrupted inode ino=%lx, run fsck to fix.", + __func__, inode->i_ino); + return false; + } + + if (f2fs_has_extra_attr(inode) && + !f2fs_sb_has_extra_attr(sbi)) { + set_sbi_flag(sbi, SBI_NEED_FSCK); + f2fs_warn(sbi, "%s: inode (ino=%lx) is with extra_attr, but extra_attr feature is off", + __func__, inode->i_ino); + return false; + } + + if (fi->i_extra_isize > F2FS_TOTAL_EXTRA_ATTR_SIZE || + fi->i_extra_isize % sizeof(__le32)) { + set_sbi_flag(sbi, SBI_NEED_FSCK); + f2fs_warn(sbi, "%s: inode (ino=%lx) has corrupted i_extra_isize: %d, max: %zu", + __func__, inode->i_ino, fi->i_extra_isize, + F2FS_TOTAL_EXTRA_ATTR_SIZE); + return false; + } + + if (f2fs_has_extra_attr(inode) && + f2fs_sb_has_flexible_inline_xattr(sbi) && + f2fs_has_inline_xattr(inode) && + (!fi->i_inline_xattr_size || + fi->i_inline_xattr_size > MAX_INLINE_XATTR_SIZE)) { + set_sbi_flag(sbi, SBI_NEED_FSCK); + f2fs_warn(sbi, "%s: inode (ino=%lx) has corrupted i_inline_xattr_size: %d, max: %zu", + __func__, inode->i_ino, fi->i_inline_xattr_size, + MAX_INLINE_XATTR_SIZE); + return false; + } + + if (F2FS_I(inode)->extent_tree) { + struct extent_info *ei = &F2FS_I(inode)->extent_tree->largest; + + if (ei->len && + (!f2fs_is_valid_blkaddr(sbi, ei->blk, + DATA_GENERIC_ENHANCE) || + !f2fs_is_valid_blkaddr(sbi, ei->blk + ei->len - 1, + DATA_GENERIC_ENHANCE))) { + set_sbi_flag(sbi, SBI_NEED_FSCK); + f2fs_warn(sbi, "%s: inode (ino=%lx) extent info [%u, %u, %u] is incorrect, run fsck to fix", + __func__, inode->i_ino, + ei->blk, ei->fofs, ei->len); + return false; + } + } + + if (f2fs_has_inline_data(inode) && + (!S_ISREG(inode->i_mode) && !S_ISLNK(inode->i_mode))) { + set_sbi_flag(sbi, SBI_NEED_FSCK); + f2fs_warn(sbi, "%s: inode (ino=%lx, mode=%u) should not have inline_data, run fsck to fix", + __func__, inode->i_ino, inode->i_mode); + return false; + } + + if (f2fs_has_inline_dentry(inode) && !S_ISDIR(inode->i_mode)) { + set_sbi_flag(sbi, SBI_NEED_FSCK); + f2fs_warn(sbi, "%s: inode (ino=%lx, mode=%u) should not have inline_dentry, run fsck to fix", + __func__, inode->i_ino, inode->i_mode); + return false; + } + + return true; +} + static int do_read_inode(struct inode *inode) { struct f2fs_sb_info *sbi = F2FS_I_SB(inode); @@ -192,16 +292,13 @@ static int do_read_inode(struct inode *inode) struct page *node_page; struct f2fs_inode *ri; projid_t i_projid; + int err; /* Check if ino is within scope */ - if (check_nid_range(sbi, inode->i_ino)) { - f2fs_msg(inode->i_sb, KERN_ERR, "bad inode number: %lu", - (unsigned long) inode->i_ino); - WARN_ON(1); + if (f2fs_check_nid_range(sbi, inode->i_ino)) return -EINVAL; - } - node_page = get_node_page(sbi, inode->i_ino); + node_page = f2fs_get_node_page(sbi, inode->i_ino); if (IS_ERR(node_page)) return PTR_ERR(node_page); @@ -221,10 +318,15 @@ static int do_read_inode(struct inode *inode) inode->i_ctime.tv_nsec = le32_to_cpu(ri->i_ctime_nsec); inode->i_mtime.tv_nsec = le32_to_cpu(ri->i_mtime_nsec); inode->i_generation = le32_to_cpu(ri->i_generation); - - fi->i_current_depth = le32_to_cpu(ri->i_current_depth); + if (S_ISDIR(inode->i_mode)) + fi->i_current_depth = le32_to_cpu(ri->i_current_depth); + else if (S_ISREG(inode->i_mode)) + fi->i_gc_failures[GC_FAILURE_PIN] = + le16_to_cpu(ri->i_gc_failures); fi->i_xattr_nid = le32_to_cpu(ri->i_xattr_nid); fi->i_flags = le32_to_cpu(ri->i_flags); + if (S_ISREG(inode->i_mode)) + fi->i_flags &= ~F2FS_PROJINHERIT_FL; fi->flags = 0; fi->i_advise = ri->i_advise; fi->i_pino = le32_to_cpu(ri->i_pino); @@ -238,8 +340,7 @@ static int do_read_inode(struct inode *inode) fi->i_extra_isize = f2fs_has_extra_attr(inode) ? le16_to_cpu(ri->i_extra_isize) : 0; - if (f2fs_sb_has_flexible_inline_xattr(sbi->sb)) { - f2fs_bug_on(sbi, !f2fs_has_extra_attr(inode)); + if (f2fs_sb_has_flexible_inline_xattr(sbi)) { fi->i_inline_xattr_size = le16_to_cpu(ri->i_inline_xattr_size); } else if (f2fs_has_inline_xattr(inode) || f2fs_has_inline_dentry(inode)) { @@ -255,30 +356,48 @@ static int do_read_inode(struct inode *inode) fi->i_inline_xattr_size = 0; } + if (!sanity_check_inode(inode, node_page)) { + f2fs_put_page(node_page, 1); + return -EFSCORRUPTED; + } + /* check data exist */ if (f2fs_has_inline_data(inode) && !f2fs_exist_data(inode)) __recover_inline_status(inode, node_page); + /* try to recover cold bit for non-dir inode */ + if (!S_ISDIR(inode->i_mode) && !is_cold_node(node_page)) { + set_cold_node(node_page, false); + set_page_dirty(node_page); + } + /* get rdev by using inline_info */ __get_inode_rdev(inode, ri); - if (__written_first_block(ri)) - set_inode_flag(inode, FI_FIRST_BLOCK_WRITTEN); + if (S_ISREG(inode->i_mode)) { + err = __written_first_block(sbi, ri); + if (err < 0) { + f2fs_put_page(node_page, 1); + return err; + } + if (!err) + set_inode_flag(inode, FI_FIRST_BLOCK_WRITTEN); + } - if (!need_inode_block_update(sbi, inode->i_ino)) + if (!f2fs_need_inode_block_update(sbi, inode->i_ino)) fi->last_disk_size = inode->i_size; - if (fi->i_flags & FS_PROJINHERIT_FL) + if (fi->i_flags & F2FS_PROJINHERIT_FL) set_inode_flag(inode, FI_PROJ_INHERIT); - if (f2fs_has_extra_attr(inode) && f2fs_sb_has_project_quota(sbi->sb) && + if (f2fs_has_extra_attr(inode) && f2fs_sb_has_project_quota(sbi) && F2FS_FITS_IN_INODE(ri, fi->i_extra_isize, i_projid)) i_projid = (projid_t)le32_to_cpu(ri->i_projid); else i_projid = F2FS_DEF_PROJID; fi->i_projid = make_kprojid(&init_user_ns, i_projid); - if (f2fs_has_extra_attr(inode) && f2fs_sb_has_inode_crtime(sbi->sb) && + if (f2fs_has_extra_attr(inode) && f2fs_sb_has_inode_crtime(sbi) && F2FS_FITS_IN_INODE(ri, fi->i_extra_isize, i_crtime)) { fi->i_crtime.tv_sec = le64_to_cpu(ri->i_crtime); fi->i_crtime.tv_nsec = le32_to_cpu(ri->i_crtime_nsec); @@ -320,10 +439,10 @@ struct inode *f2fs_iget(struct super_block *sb, unsigned long ino) make_now: if (ino == F2FS_NODE_INO(sbi)) { inode->i_mapping->a_ops = &f2fs_node_aops; - mapping_set_gfp_mask(inode->i_mapping, GFP_F2FS_ZERO); + mapping_set_gfp_mask(inode->i_mapping, GFP_NOFS); } else if (ino == F2FS_META_INO(sbi)) { inode->i_mapping->a_ops = &f2fs_meta_aops; - mapping_set_gfp_mask(inode->i_mapping, GFP_F2FS_ZERO); + mapping_set_gfp_mask(inode->i_mapping, GFP_NOFS); } else if (S_ISREG(inode->i_mode)) { inode->i_op = &f2fs_file_inode_operations; inode->i_fop = &f2fs_file_operations; @@ -354,6 +473,7 @@ make_now: return inode; bad_inode: + f2fs_inode_synced(inode); iget_failed(inode); trace_f2fs_iget_exit(inode, ret); return ERR_PTR(ret); @@ -373,12 +493,12 @@ retry: return inode; } -void update_inode(struct inode *inode, struct page *node_page) +void f2fs_update_inode(struct inode *inode, struct page *node_page) { struct f2fs_inode *ri; struct extent_tree *et = F2FS_I(inode)->extent_tree; - f2fs_wait_on_page_writeback(node_page, NODE, true); + f2fs_wait_on_page_writeback(node_page, NODE, true, true); set_page_dirty(node_page); f2fs_inode_synced(inode); @@ -408,7 +528,12 @@ void update_inode(struct inode *inode, struct page *node_page) ri->i_atime_nsec = cpu_to_le32(inode->i_atime.tv_nsec); ri->i_ctime_nsec = cpu_to_le32(inode->i_ctime.tv_nsec); ri->i_mtime_nsec = cpu_to_le32(inode->i_mtime.tv_nsec); - ri->i_current_depth = cpu_to_le32(F2FS_I(inode)->i_current_depth); + if (S_ISDIR(inode->i_mode)) + ri->i_current_depth = + cpu_to_le32(F2FS_I(inode)->i_current_depth); + else if (S_ISREG(inode->i_mode)) + ri->i_gc_failures = + cpu_to_le16(F2FS_I(inode)->i_gc_failures[GC_FAILURE_PIN]); ri->i_xattr_nid = cpu_to_le32(F2FS_I(inode)->i_xattr_nid); ri->i_flags = cpu_to_le32(F2FS_I(inode)->i_flags); ri->i_pino = cpu_to_le32(F2FS_I(inode)->i_pino); @@ -418,11 +543,11 @@ void update_inode(struct inode *inode, struct page *node_page) if (f2fs_has_extra_attr(inode)) { ri->i_extra_isize = cpu_to_le16(F2FS_I(inode)->i_extra_isize); - if (f2fs_sb_has_flexible_inline_xattr(F2FS_I_SB(inode)->sb)) + if (f2fs_sb_has_flexible_inline_xattr(F2FS_I_SB(inode))) ri->i_inline_xattr_size = cpu_to_le16(F2FS_I(inode)->i_inline_xattr_size); - if (f2fs_sb_has_project_quota(F2FS_I_SB(inode)->sb) && + if (f2fs_sb_has_project_quota(F2FS_I_SB(inode)) && F2FS_FITS_IN_INODE(ri, F2FS_I(inode)->i_extra_isize, i_projid)) { projid_t i_projid; @@ -432,7 +557,7 @@ void update_inode(struct inode *inode, struct page *node_page) ri->i_projid = cpu_to_le32(i_projid); } - if (f2fs_sb_has_inode_crtime(F2FS_I_SB(inode)->sb) && + if (f2fs_sb_has_inode_crtime(F2FS_I_SB(inode)) && F2FS_FITS_IN_INODE(ri, F2FS_I(inode)->i_extra_isize, i_crtime)) { ri->i_crtime = @@ -452,14 +577,18 @@ void update_inode(struct inode *inode, struct page *node_page) F2FS_I(inode)->i_disk_time[1] = inode->i_ctime; F2FS_I(inode)->i_disk_time[2] = inode->i_mtime; F2FS_I(inode)->i_disk_time[3] = F2FS_I(inode)->i_crtime; + +#ifdef CONFIG_F2FS_CHECK_FS + f2fs_inode_chksum_set(F2FS_I_SB(inode), node_page); +#endif } -void update_inode_page(struct inode *inode) +void f2fs_update_inode_page(struct inode *inode) { struct f2fs_sb_info *sbi = F2FS_I_SB(inode); struct page *node_page; retry: - node_page = get_node_page(sbi, inode->i_ino); + node_page = f2fs_get_node_page(sbi, inode->i_ino); if (IS_ERR(node_page)) { int err = PTR_ERR(node_page); if (err == -ENOMEM) { @@ -470,7 +599,7 @@ retry: } return; } - update_inode(inode, node_page); + f2fs_update_inode(inode, node_page); f2fs_put_page(node_page, 1); } @@ -485,11 +614,14 @@ int f2fs_write_inode(struct inode *inode, struct writeback_control *wbc) if (!is_inode_flag_set(inode, FI_DIRTY_INODE)) return 0; + if (f2fs_is_checkpoint_ready(sbi)) + return -ENOSPC; + /* * We need to balance fs here to prevent from producing dirty node pages * during the urgent cleaning time when runing out of free sections. */ - update_inode_page(inode); + f2fs_update_inode_page(inode); if (wbc && wbc->nr_to_write) f2fs_balance_fs(sbi, true); return 0; @@ -506,7 +638,7 @@ void f2fs_evict_inode(struct inode *inode) /* some remained atomic pages should discarded */ if (f2fs_is_atomic_file(inode)) - drop_inmem_pages(inode); + f2fs_drop_inmem_pages(inode); trace_f2fs_evict_inode(inode); truncate_inode_pages_final(&inode->i_data); @@ -516,18 +648,22 @@ void f2fs_evict_inode(struct inode *inode) goto out_clear; f2fs_bug_on(sbi, get_dirty_pages(inode)); - remove_dirty_inode(inode); + f2fs_remove_dirty_inode(inode); f2fs_destroy_extent_tree(inode); if (inode->i_nlink || is_bad_inode(inode)) goto no_delete; - dquot_initialize(inode); + err = dquot_initialize(inode); + if (err) { + err = 0; + set_sbi_flag(sbi, SBI_QUOTA_NEED_REPAIR); + } - remove_ino_entry(sbi, inode->i_ino, APPEND_INO); - remove_ino_entry(sbi, inode->i_ino, UPDATE_INO); - remove_ino_entry(sbi, inode->i_ino, FLUSH_INO); + f2fs_remove_ino_entry(sbi, inode->i_ino, APPEND_INO); + f2fs_remove_ino_entry(sbi, inode->i_ino, UPDATE_INO); + f2fs_remove_ino_entry(sbi, inode->i_ino, FLUSH_INO); sb_start_intwrite(inode->i_sb); set_inode_flag(inode, FI_NO_ALLOC); @@ -536,15 +672,14 @@ retry: if (F2FS_HAS_BLOCKS(inode)) err = f2fs_truncate(inode); -#ifdef CONFIG_F2FS_FAULT_INJECTION if (time_to_inject(sbi, FAULT_EVICT_INODE)) { f2fs_show_injection_info(FAULT_EVICT_INODE); err = -EIO; } -#endif + if (!err) { f2fs_lock_op(sbi); - err = remove_inode_page(inode); + err = f2fs_remove_inode_page(inode); f2fs_unlock_op(sbi); if (err == -ENOENT) err = 0; @@ -556,9 +691,10 @@ retry: goto retry; } - if (err) - update_inode_page(inode); - dquot_free_inode(inode); + if (err) { + f2fs_update_inode_page(inode); + set_sbi_flag(sbi, SBI_QUOTA_NEED_REPAIR); + } sb_end_intwrite(inode->i_sb); no_delete: dquot_drop(inode); @@ -567,7 +703,8 @@ no_delete: stat_dec_inline_dir(inode); stat_dec_inline_inode(inode); - if (likely(!is_set_ckpt_flags(sbi, CP_ERROR_FLAG))) + if (likely(!is_set_ckpt_flags(sbi, CP_ERROR_FLAG) && + !is_sbi_flag_set(sbi, SBI_CP_DISABLED))) f2fs_bug_on(sbi, is_inode_flag_set(inode, FI_DIRTY_INODE)); else f2fs_inode_synced(inode); @@ -580,16 +717,19 @@ no_delete: invalidate_mapping_pages(NODE_MAPPING(sbi), xnid, xnid); if (inode->i_nlink) { if (is_inode_flag_set(inode, FI_APPEND_WRITE)) - add_ino_entry(sbi, inode->i_ino, APPEND_INO); + f2fs_add_ino_entry(sbi, inode->i_ino, APPEND_INO); if (is_inode_flag_set(inode, FI_UPDATE_WRITE)) - add_ino_entry(sbi, inode->i_ino, UPDATE_INO); + f2fs_add_ino_entry(sbi, inode->i_ino, UPDATE_INO); } if (is_inode_flag_set(inode, FI_FREE_NID)) { - alloc_nid_failed(sbi, inode->i_ino); + f2fs_alloc_nid_failed(sbi, inode->i_ino); clear_inode_flag(inode, FI_FREE_NID); } else { - f2fs_bug_on(sbi, err && - !exist_written_data(sbi, inode->i_ino, ORPHAN_INO)); + /* + * If xattr nid is corrupted, we can reach out error condition, + * err & !f2fs_exist_written_data(sbi, inode->i_ino, ORPHAN_INO)). + * In that case, f2fs_check_nid_range() is enough to give a clue. + */ } out_clear: fscrypt_put_encryption_info(inode, NULL); @@ -597,10 +737,11 @@ out_clear: } /* caller should call f2fs_lock_op() */ -void handle_failed_inode(struct inode *inode) +void f2fs_handle_failed_inode(struct inode *inode) { struct f2fs_sb_info *sbi = F2FS_I_SB(inode); struct node_info ni; + int err; /* * clear nlink of inode in order to release resource of inode @@ -612,7 +753,7 @@ void handle_failed_inode(struct inode *inode) * we must call this to avoid inode being remained as dirty, resulting * in a panic when flushing dirty inodes in gdirty_list. */ - update_inode_page(inode); + f2fs_update_inode_page(inode); f2fs_inode_synced(inode); /* don't make bad inode, since it becomes a regular file. */ @@ -623,22 +764,27 @@ void handle_failed_inode(struct inode *inode) * so we can prevent losing this orphan when encoutering checkpoint * and following suddenly power-off. */ - get_node_info(sbi, inode->i_ino, &ni); + err = f2fs_get_node_info(sbi, inode->i_ino, &ni); + if (err) { + set_sbi_flag(sbi, SBI_NEED_FSCK); + f2fs_warn(sbi, "May loss orphan inode, run fsck to fix."); + goto out; + } if (ni.blk_addr != NULL_ADDR) { - int err = acquire_orphan_inode(sbi); + err = f2fs_acquire_orphan_inode(sbi); if (err) { set_sbi_flag(sbi, SBI_NEED_FSCK); - f2fs_msg(sbi->sb, KERN_WARNING, - "Too many orphan inodes, run fsck to fix."); + f2fs_warn(sbi, "Too many orphan inodes, run fsck to fix."); } else { - add_orphan_inode(inode); + f2fs_add_orphan_inode(inode); } - alloc_nid_done(sbi, inode->i_ino); + f2fs_alloc_nid_done(sbi, inode->i_ino); } else { set_inode_flag(inode, FI_FREE_NID); } +out: f2fs_unlock_op(sbi); /* iput will drop the inode object */ diff --git a/fs/f2fs/namei.c b/fs/f2fs/namei.c index 0355891dbbf8..94902b82d4ec 100644 --- a/fs/f2fs/namei.c +++ b/fs/f2fs/namei.c @@ -1,24 +1,23 @@ +// SPDX-License-Identifier: GPL-2.0 /* * fs/f2fs/namei.c * * Copyright (c) 2012 Samsung Electronics Co., Ltd. * http://www.samsung.com/ - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. */ #include <linux/fs.h> #include <linux/f2fs_fs.h> #include <linux/pagemap.h> #include <linux/sched.h> #include <linux/ctype.h> +#include <linux/random.h> #include <linux/dcache.h> #include <linux/namei.h> #include <linux/quotaops.h> #include "f2fs.h" #include "node.h" +#include "segment.h" #include "xattr.h" #include "acl.h" #include <trace/events/f2fs.h> @@ -37,7 +36,7 @@ static struct inode *f2fs_new_inode(struct inode *dir, umode_t mode) return ERR_PTR(-ENOMEM); f2fs_lock_op(sbi); - if (!alloc_nid(sbi, &ino)) { + if (!f2fs_alloc_nid(sbi, &ino)) { f2fs_unlock_op(sbi); err = -ENOSPC; goto fail; @@ -52,7 +51,10 @@ static struct inode *f2fs_new_inode(struct inode *dir, umode_t mode) inode->i_blocks = 0; inode->i_mtime = inode->i_atime = inode->i_ctime = F2FS_I(inode)->i_crtime = current_time(inode); - inode->i_generation = sbi->s_next_generation++; + inode->i_generation = prandom_u32(); + + if (S_ISDIR(inode->i_mode)) + F2FS_I(inode)->i_current_depth = 1; err = insert_inode_locked(inode); if (err) { @@ -60,8 +62,8 @@ static struct inode *f2fs_new_inode(struct inode *dir, umode_t mode) goto fail; } - if (f2fs_sb_has_project_quota(sbi->sb) && - (F2FS_I(dir)->i_flags & FS_PROJINHERIT_FL)) + if (f2fs_sb_has_project_quota(sbi) && + (F2FS_I(dir)->i_flags & F2FS_PROJINHERIT_FL)) F2FS_I(inode)->i_projid = F2FS_I(dir)->i_projid; else F2FS_I(inode)->i_projid = make_kprojid(&init_user_ns, @@ -71,10 +73,6 @@ static struct inode *f2fs_new_inode(struct inode *dir, umode_t mode) if (err) goto fail_drop; - err = dquot_alloc_inode(inode); - if (err) - goto fail_drop; - set_inode_flag(inode, FI_NEW_INODE); /* If the directory encrypted, then we should encrypt the inode. */ @@ -82,7 +80,7 @@ static struct inode *f2fs_new_inode(struct inode *dir, umode_t mode) f2fs_may_encrypt(inode)) f2fs_set_encrypted_inode(inode); - if (f2fs_sb_has_extra_attr(sbi->sb)) { + if (f2fs_sb_has_extra_attr(sbi)) { set_inode_flag(inode, FI_EXTRA_ATTR); F2FS_I(inode)->i_extra_isize = F2FS_TOTAL_EXTRA_ATTR_SIZE; } @@ -95,7 +93,7 @@ static struct inode *f2fs_new_inode(struct inode *dir, umode_t mode) if (f2fs_may_inline_dentry(inode)) set_inode_flag(inode, FI_INLINE_DENTRY); - if (f2fs_sb_has_flexible_inline_xattr(sbi->sb)) { + if (f2fs_sb_has_flexible_inline_xattr(sbi)) { f2fs_bug_on(sbi, !f2fs_has_extra_attr(inode)); if (f2fs_has_inline_xattr(inode)) xattr_size = F2FS_OPTION(sbi).inline_xattr_size; @@ -116,11 +114,13 @@ static struct inode *f2fs_new_inode(struct inode *dir, umode_t mode) f2fs_mask_flags(mode, F2FS_I(dir)->i_flags & F2FS_FL_INHERITED); if (S_ISDIR(inode->i_mode)) - F2FS_I(inode)->i_flags |= FS_INDEX_FL; + F2FS_I(inode)->i_flags |= F2FS_INDEX_FL; - if (F2FS_I(inode)->i_flags & FS_PROJINHERIT_FL) + if (F2FS_I(inode)->i_flags & F2FS_PROJINHERIT_FL) set_inode_flag(inode, FI_PROJ_INHERIT); + f2fs_set_inode_flags(inode); + trace_f2fs_new_inode(inode, 0); return inode; @@ -143,7 +143,7 @@ fail_drop: return ERR_PTR(err); } -static int is_extension_exist(const unsigned char *s, const char *sub) +static inline int is_extension_exist(const unsigned char *s, const char *sub) { size_t slen = strlen(s); size_t sublen = strlen(sub); @@ -181,19 +181,22 @@ static inline void set_file_temperature(struct f2fs_sb_info *sbi, struct inode * hot_count = sbi->raw_super->hot_ext_count; for (i = 0; i < cold_count + hot_count; i++) { - if (!is_extension_exist(name, extlist[i])) - continue; - if (i < cold_count) - file_set_cold(inode); - else - file_set_hot(inode); - break; + if (is_extension_exist(name, extlist[i])) + break; } up_read(&sbi->sb_lock); + + if (i == cold_count + hot_count) + return; + + if (i < cold_count) + file_set_cold(inode); + else + file_set_hot(inode); } -int update_extension_list(struct f2fs_sb_info *sbi, const char *name, +int f2fs_update_extension_list(struct f2fs_sb_info *sbi, const char *name, bool hot, bool set) { __u8 (*extlist)[F2FS_EXTENSION_LEN] = sbi->raw_super->extension_list; @@ -243,7 +246,7 @@ int update_extension_list(struct f2fs_sb_info *sbi, const char *name, return -EINVAL; if (hot) { - strncpy(extlist[count], name, strlen(name)); + memcpy(extlist[count], name, strlen(name)); sbi->raw_super->hot_ext_count = hot_count + 1; } else { char buf[F2FS_MAX_EXTENSION][F2FS_EXTENSION_LEN]; @@ -251,7 +254,7 @@ int update_extension_list(struct f2fs_sb_info *sbi, const char *name, memcpy(buf, &extlist[cold_count], F2FS_EXTENSION_LEN * hot_count); memset(extlist[cold_count], 0, F2FS_EXTENSION_LEN); - strncpy(extlist[cold_count], name, strlen(name)); + memcpy(extlist[cold_count], name, strlen(name)); memcpy(&extlist[cold_count + 1], buf, F2FS_EXTENSION_LEN * hot_count); sbi->raw_super->extension_count = cpu_to_le32(cold_count + 1); @@ -269,6 +272,9 @@ static int f2fs_create(struct inode *dir, struct dentry *dentry, umode_t mode, if (unlikely(f2fs_cp_error(sbi))) return -EIO; + err = f2fs_is_checkpoint_ready(sbi); + if (err) + return err; err = dquot_initialize(dir); if (err) @@ -292,9 +298,10 @@ static int f2fs_create(struct inode *dir, struct dentry *dentry, umode_t mode, goto out; f2fs_unlock_op(sbi); - alloc_nid_done(sbi, ino); + f2fs_alloc_nid_done(sbi, ino); - d_instantiate_new(dentry, inode); + d_instantiate(dentry, inode); + unlock_new_inode(inode); if (IS_DIRSYNC(dir)) f2fs_sync_fs(sbi->sb, 1); @@ -302,7 +309,7 @@ static int f2fs_create(struct inode *dir, struct dentry *dentry, umode_t mode, f2fs_balance_fs(sbi, true); return 0; out: - handle_failed_inode(inode); + f2fs_handle_failed_inode(inode); return err; } @@ -315,6 +322,9 @@ static int f2fs_link(struct dentry *old_dentry, struct inode *dir, if (unlikely(f2fs_cp_error(sbi))) return -EIO; + err = f2fs_is_checkpoint_ready(sbi); + if (err) + return err; err = fscrypt_prepare_link(old_dentry, dir, dentry); if (err) @@ -376,9 +386,8 @@ static int __recover_dot_dentries(struct inode *dir, nid_t pino) int err = 0; if (f2fs_readonly(sbi->sb)) { - f2fs_msg(sbi->sb, KERN_INFO, - "skip recovering inline_dots inode (ino:%lu, pino:%u) " - "in readonly mountpoint", dir->i_ino, pino); + f2fs_info(sbi, "skip recovering inline_dots inode (ino:%lu, pino:%u) in readonly mountpoint", + dir->i_ino, pino); return 0; } @@ -397,7 +406,7 @@ static int __recover_dot_dentries(struct inode *dir, nid_t pino) err = PTR_ERR(page); goto out; } else { - err = __f2fs_add_link(dir, &dot, NULL, dir->i_ino, S_IFDIR); + err = f2fs_do_add_link(dir, &dot, NULL, dir->i_ino, S_IFDIR); if (err) goto out; } @@ -408,7 +417,7 @@ static int __recover_dot_dentries(struct inode *dir, nid_t pino) else if (IS_ERR(page)) err = PTR_ERR(page); else - err = __f2fs_add_link(dir, &dotdot, NULL, pino, S_IFDIR); + err = f2fs_do_add_link(dir, &dotdot, NULL, pino, S_IFDIR); out: if (!err) clear_inode_flag(dir, FI_INLINE_DOTS); @@ -471,9 +480,8 @@ static struct dentry *f2fs_lookup(struct inode *dir, struct dentry *dentry, if (f2fs_encrypted_inode(dir) && (S_ISDIR(inode->i_mode) || S_ISLNK(inode->i_mode)) && !fscrypt_has_permitted_context(dir, inode)) { - f2fs_msg(inode->i_sb, KERN_WARNING, - "Inconsistent encryption contexts: %lu/%lu", - dir->i_ino, inode->i_ino); + f2fs_warn(F2FS_I_SB(inode), "Inconsistent encryption contexts: %lu/%lu", + dir->i_ino, inode->i_ino); err = -EPERM; goto out_iput; } @@ -520,7 +528,7 @@ static int f2fs_unlink(struct inode *dir, struct dentry *dentry) f2fs_balance_fs(sbi, true); f2fs_lock_op(sbi); - err = acquire_orphan_inode(sbi); + err = f2fs_acquire_orphan_inode(sbi); if (err) { f2fs_unlock_op(sbi); f2fs_put_page(page, 0); @@ -558,6 +566,9 @@ static int f2fs_symlink(struct inode *dir, struct dentry *dentry, if (unlikely(f2fs_cp_error(sbi))) return -EIO; + err = f2fs_is_checkpoint_ready(sbi); + if (err) + return err; err = fscrypt_prepare_symlink(dir, symname, len, dir->i_sb->s_blocksize, &disk_link); @@ -582,9 +593,9 @@ static int f2fs_symlink(struct inode *dir, struct dentry *dentry, f2fs_lock_op(sbi); err = f2fs_add_link(dentry, inode); if (err) - goto out_handle_failed_inode; + goto out_f2fs_handle_failed_inode; f2fs_unlock_op(sbi); - alloc_nid_done(sbi, inode->i_ino); + f2fs_alloc_nid_done(sbi, inode->i_ino); err = fscrypt_encrypt_symlink(inode, symname, len, &disk_link); if (err) @@ -593,7 +604,8 @@ static int f2fs_symlink(struct inode *dir, struct dentry *dentry, err = page_symlink(inode, disk_link.name, disk_link.len); err_out: - d_instantiate_new(dentry, inode); + d_instantiate(dentry, inode); + unlock_new_inode(inode); /* * Let's flush symlink data in order to avoid broken symlink as much as @@ -617,11 +629,11 @@ err_out: f2fs_balance_fs(sbi, true); goto out_free_encrypted_link; -out_handle_failed_inode: - handle_failed_inode(inode); +out_f2fs_handle_failed_inode: + f2fs_handle_failed_inode(inode); out_free_encrypted_link: if (disk_link.name != (unsigned char *)symname) - kfree(disk_link.name); + kvfree(disk_link.name); return err; } @@ -654,9 +666,10 @@ static int f2fs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode) goto out_fail; f2fs_unlock_op(sbi); - alloc_nid_done(sbi, inode->i_ino); + f2fs_alloc_nid_done(sbi, inode->i_ino); - d_instantiate_new(dentry, inode); + d_instantiate(dentry, inode); + unlock_new_inode(inode); if (IS_DIRSYNC(dir)) f2fs_sync_fs(sbi->sb, 1); @@ -666,7 +679,7 @@ static int f2fs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode) out_fail: clear_inode_flag(inode, FI_INC_LINK); - handle_failed_inode(inode); + f2fs_handle_failed_inode(inode); return err; } @@ -687,6 +700,9 @@ static int f2fs_mknod(struct inode *dir, struct dentry *dentry, if (unlikely(f2fs_cp_error(sbi))) return -EIO; + err = f2fs_is_checkpoint_ready(sbi); + if (err) + return err; err = dquot_initialize(dir); if (err) @@ -705,9 +721,10 @@ static int f2fs_mknod(struct inode *dir, struct dentry *dentry, goto out; f2fs_unlock_op(sbi); - alloc_nid_done(sbi, inode->i_ino); + f2fs_alloc_nid_done(sbi, inode->i_ino); - d_instantiate_new(dentry, inode); + d_instantiate(dentry, inode); + unlock_new_inode(inode); if (IS_DIRSYNC(dir)) f2fs_sync_fs(sbi->sb, 1); @@ -715,7 +732,7 @@ static int f2fs_mknod(struct inode *dir, struct dentry *dentry, f2fs_balance_fs(sbi, true); return 0; out: - handle_failed_inode(inode); + f2fs_handle_failed_inode(inode); return err; } @@ -744,7 +761,7 @@ static int __f2fs_tmpfile(struct inode *dir, struct dentry *dentry, } f2fs_lock_op(sbi); - err = acquire_orphan_inode(sbi); + err = f2fs_acquire_orphan_inode(sbi); if (err) goto out; @@ -756,8 +773,8 @@ static int __f2fs_tmpfile(struct inode *dir, struct dentry *dentry, * add this non-linked tmpfile to orphan list, in this way we could * remove all unused data of tmpfile after abnormal power-off. */ - add_orphan_inode(inode); - alloc_nid_done(sbi, inode->i_ino); + f2fs_add_orphan_inode(inode); + f2fs_alloc_nid_done(sbi, inode->i_ino); if (whiteout) { f2fs_i_links_write(inode, false); @@ -773,9 +790,9 @@ static int __f2fs_tmpfile(struct inode *dir, struct dentry *dentry, return 0; release_out: - release_orphan_inode(sbi); + f2fs_release_orphan_inode(sbi); out: - handle_failed_inode(inode); + f2fs_handle_failed_inode(inode); return err; } @@ -817,10 +834,13 @@ static int f2fs_rename(struct inode *old_dir, struct dentry *old_dentry, struct f2fs_dir_entry *old_entry; struct f2fs_dir_entry *new_entry; bool is_old_inline = f2fs_has_inline_dentry(old_dir); - int err = -ENOENT; + int err; if (unlikely(f2fs_cp_error(sbi))) return -EIO; + err = f2fs_is_checkpoint_ready(sbi); + if (err) + return err; if (is_inode_flag_set(new_dir, FI_PROJ_INHERIT) && (!projid_eq(F2FS_I(new_dir)->i_projid, @@ -841,6 +861,7 @@ static int f2fs_rename(struct inode *old_dir, struct dentry *old_dentry, goto out; } + err = -ENOENT; old_entry = f2fs_find_entry(old_dir, &old_dentry->d_name, &old_page); if (!old_entry) { if (IS_ERR(old_page)) @@ -882,7 +903,7 @@ static int f2fs_rename(struct inode *old_dir, struct dentry *old_dentry, f2fs_lock_op(sbi); - err = acquire_orphan_inode(sbi); + err = f2fs_acquire_orphan_inode(sbi); if (err) goto put_out_dir; @@ -896,9 +917,9 @@ static int f2fs_rename(struct inode *old_dir, struct dentry *old_dentry, up_write(&F2FS_I(new_inode)->i_sem); if (!new_inode->i_nlink) - add_orphan_inode(new_inode); + f2fs_add_orphan_inode(new_inode); else - release_orphan_inode(sbi); + f2fs_release_orphan_inode(sbi); } else { f2fs_balance_fs(sbi, true); @@ -966,13 +987,19 @@ static int f2fs_rename(struct inode *old_dir, struct dentry *old_dentry, f2fs_put_page(old_dir_page, 0); f2fs_i_links_write(old_dir, false); } - if (F2FS_OPTION(sbi).fsync_mode == FSYNC_MODE_STRICT) - add_ino_entry(sbi, new_dir->i_ino, TRANS_DIR_INO); + if (F2FS_OPTION(sbi).fsync_mode == FSYNC_MODE_STRICT) { + f2fs_add_ino_entry(sbi, new_dir->i_ino, TRANS_DIR_INO); + if (S_ISDIR(old_inode->i_mode)) + f2fs_add_ino_entry(sbi, old_inode->i_ino, + TRANS_DIR_INO); + } f2fs_unlock_op(sbi); if (IS_DIRSYNC(old_dir) || IS_DIRSYNC(new_dir)) f2fs_sync_fs(sbi->sb, 1); + + f2fs_update_time(sbi, REQ_TIME); return 0; put_out_dir: @@ -1002,10 +1029,13 @@ static int f2fs_cross_rename(struct inode *old_dir, struct dentry *old_dentry, struct f2fs_dir_entry *old_dir_entry = NULL, *new_dir_entry = NULL; struct f2fs_dir_entry *old_entry, *new_entry; int old_nlink = 0, new_nlink = 0; - int err = -ENOENT; + int err; if (unlikely(f2fs_cp_error(sbi))) return -EIO; + err = f2fs_is_checkpoint_ready(sbi); + if (err) + return err; if ((is_inode_flag_set(new_dir, FI_PROJ_INHERIT) && !projid_eq(F2FS_I(new_dir)->i_projid, @@ -1023,6 +1053,7 @@ static int f2fs_cross_rename(struct inode *old_dir, struct dentry *old_dentry, if (err) goto out; + err = -ENOENT; old_entry = f2fs_find_entry(old_dir, &old_dentry->d_name, &old_page); if (!old_entry) { if (IS_ERR(old_page)) @@ -1118,14 +1149,16 @@ static int f2fs_cross_rename(struct inode *old_dir, struct dentry *old_dentry, f2fs_mark_inode_dirty_sync(new_dir, false); if (F2FS_OPTION(sbi).fsync_mode == FSYNC_MODE_STRICT) { - add_ino_entry(sbi, old_dir->i_ino, TRANS_DIR_INO); - add_ino_entry(sbi, new_dir->i_ino, TRANS_DIR_INO); + f2fs_add_ino_entry(sbi, old_dir->i_ino, TRANS_DIR_INO); + f2fs_add_ino_entry(sbi, new_dir->i_ino, TRANS_DIR_INO); } f2fs_unlock_op(sbi); if (IS_DIRSYNC(old_dir) || IS_DIRSYNC(new_dir)) f2fs_sync_fs(sbi->sb, 1); + + f2fs_update_time(sbi, REQ_TIME); return 0; out_new_dir: if (new_dir_entry) { diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c index 50592991b6cc..508e684c8dbd 100644 --- a/fs/f2fs/node.c +++ b/fs/f2fs/node.c @@ -1,12 +1,9 @@ +// SPDX-License-Identifier: GPL-2.0 /* * fs/f2fs/node.c * * Copyright (c) 2012 Samsung Electronics Co., Ltd. * http://www.samsung.com/ - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. */ #include <linux/fs.h> #include <linux/f2fs_fs.h> @@ -23,13 +20,28 @@ #include "trace.h" #include <trace/events/f2fs.h> -#define on_build_free_nids(nmi) mutex_is_locked(&(nm_i)->build_lock) +#define on_f2fs_build_free_nids(nmi) mutex_is_locked(&(nm_i)->build_lock) static struct kmem_cache *nat_entry_slab; static struct kmem_cache *free_nid_slab; static struct kmem_cache *nat_entry_set_slab; +static struct kmem_cache *fsync_node_entry_slab; + +/* + * Check whether the given nid is within node id range. + */ +int f2fs_check_nid_range(struct f2fs_sb_info *sbi, nid_t nid) +{ + if (unlikely(nid < F2FS_ROOT_INO(sbi) || nid >= NM_I(sbi)->max_nid)) { + set_sbi_flag(sbi, SBI_NEED_FSCK); + f2fs_warn(sbi, "%s: out-of-range nid=%x, run fsck to fix.", + __func__, nid); + return -EFSCORRUPTED; + } + return 0; +} -bool available_free_memory(struct f2fs_sb_info *sbi, int type) +bool f2fs_available_free_memory(struct f2fs_sb_info *sbi, int type) { struct f2fs_nm_info *nm_i = NM_I(sbi); struct sysinfo val; @@ -87,44 +99,35 @@ bool available_free_memory(struct f2fs_sb_info *sbi, int type) static void clear_node_page_dirty(struct page *page) { - struct address_space *mapping = page->mapping; - unsigned int long flags; - if (PageDirty(page)) { - spin_lock_irqsave(&mapping->tree_lock, flags); - radix_tree_tag_clear(&mapping->page_tree, - page_index(page), - PAGECACHE_TAG_DIRTY); - spin_unlock_irqrestore(&mapping->tree_lock, flags); - + f2fs_clear_radix_tree_dirty_tag(page); clear_page_dirty_for_io(page); - dec_page_count(F2FS_M_SB(mapping), F2FS_DIRTY_NODES); + dec_page_count(F2FS_P_SB(page), F2FS_DIRTY_NODES); } ClearPageUptodate(page); } static struct page *get_current_nat_page(struct f2fs_sb_info *sbi, nid_t nid) { - pgoff_t index = current_nat_addr(sbi, nid); - return get_meta_page(sbi, index); + return f2fs_get_meta_page_nofail(sbi, current_nat_addr(sbi, nid)); } static struct page *get_next_nat_page(struct f2fs_sb_info *sbi, nid_t nid) { struct page *src_page; struct page *dst_page; - pgoff_t src_off; pgoff_t dst_off; void *src_addr; void *dst_addr; struct f2fs_nm_info *nm_i = NM_I(sbi); - src_off = current_nat_addr(sbi, nid); - dst_off = next_nat_addr(sbi, src_off); + dst_off = next_nat_addr(sbi, current_nat_addr(sbi, nid)); /* get current nat block page with lock */ - src_page = get_meta_page(sbi, src_off); - dst_page = grab_meta_page(sbi, dst_off); + src_page = get_current_nat_page(sbi, nid); + if (IS_ERR(src_page)) + return src_page; + dst_page = f2fs_grab_meta_page(sbi, dst_off); f2fs_bug_on(sbi, PageDirty(src_page)); src_addr = page_address(src_page); @@ -169,14 +172,30 @@ static struct nat_entry *__init_nat_entry(struct f2fs_nm_info *nm_i, if (raw_ne) node_info_from_raw_nat(&ne->ni, raw_ne); + + spin_lock(&nm_i->nat_list_lock); list_add_tail(&ne->list, &nm_i->nat_entries); + spin_unlock(&nm_i->nat_list_lock); + nm_i->nat_cnt++; return ne; } static struct nat_entry *__lookup_nat_cache(struct f2fs_nm_info *nm_i, nid_t n) { - return radix_tree_lookup(&nm_i->nat_root, n); + struct nat_entry *ne; + + ne = radix_tree_lookup(&nm_i->nat_root, n); + + /* for recent accessed nat entry, move it to tail of lru list */ + if (ne && !get_nat_flag(ne, IS_DIRTY)) { + spin_lock(&nm_i->nat_list_lock); + if (!list_empty(&ne->list)) + list_move_tail(&ne->list, &nm_i->nat_entries); + spin_unlock(&nm_i->nat_list_lock); + } + + return ne; } static unsigned int __gang_lookup_nat_cache(struct f2fs_nm_info *nm_i, @@ -187,7 +206,6 @@ static unsigned int __gang_lookup_nat_cache(struct f2fs_nm_info *nm_i, static void __del_from_nat_cache(struct f2fs_nm_info *nm_i, struct nat_entry *e) { - list_del(&e->list); radix_tree_delete(&nm_i->nat_root, nat_get_nid(e)); nm_i->nat_cnt--; __free_nat_entry(e); @@ -238,16 +256,21 @@ static void __set_nat_cache_dirty(struct f2fs_nm_info *nm_i, nm_i->dirty_nat_cnt++; set_nat_flag(ne, IS_DIRTY, true); refresh_list: + spin_lock(&nm_i->nat_list_lock); if (new_ne) list_del_init(&ne->list); else list_move_tail(&ne->list, &head->entry_list); + spin_unlock(&nm_i->nat_list_lock); } static void __clear_nat_cache_dirty(struct f2fs_nm_info *nm_i, struct nat_entry_set *set, struct nat_entry *ne) { + spin_lock(&nm_i->nat_list_lock); list_move_tail(&ne->list, &nm_i->nat_entries); + spin_unlock(&nm_i->nat_list_lock); + set_nat_flag(ne, IS_DIRTY, false); set->entry_cnt--; nm_i->dirty_nat_cnt--; @@ -260,7 +283,73 @@ static unsigned int __gang_lookup_nat_set(struct f2fs_nm_info *nm_i, start, nr); } -int need_dentry_mark(struct f2fs_sb_info *sbi, nid_t nid) +bool f2fs_in_warm_node_list(struct f2fs_sb_info *sbi, struct page *page) +{ + return NODE_MAPPING(sbi) == page->mapping && + IS_DNODE(page) && is_cold_node(page); +} + +void f2fs_init_fsync_node_info(struct f2fs_sb_info *sbi) +{ + spin_lock_init(&sbi->fsync_node_lock); + INIT_LIST_HEAD(&sbi->fsync_node_list); + sbi->fsync_seg_id = 0; + sbi->fsync_node_num = 0; +} + +static unsigned int f2fs_add_fsync_node_entry(struct f2fs_sb_info *sbi, + struct page *page) +{ + struct fsync_node_entry *fn; + unsigned long flags; + unsigned int seq_id; + + fn = f2fs_kmem_cache_alloc(fsync_node_entry_slab, GFP_NOFS); + + get_page(page); + fn->page = page; + INIT_LIST_HEAD(&fn->list); + + spin_lock_irqsave(&sbi->fsync_node_lock, flags); + list_add_tail(&fn->list, &sbi->fsync_node_list); + fn->seq_id = sbi->fsync_seg_id++; + seq_id = fn->seq_id; + sbi->fsync_node_num++; + spin_unlock_irqrestore(&sbi->fsync_node_lock, flags); + + return seq_id; +} + +void f2fs_del_fsync_node_entry(struct f2fs_sb_info *sbi, struct page *page) +{ + struct fsync_node_entry *fn; + unsigned long flags; + + spin_lock_irqsave(&sbi->fsync_node_lock, flags); + list_for_each_entry(fn, &sbi->fsync_node_list, list) { + if (fn->page == page) { + list_del(&fn->list); + sbi->fsync_node_num--; + spin_unlock_irqrestore(&sbi->fsync_node_lock, flags); + kmem_cache_free(fsync_node_entry_slab, fn); + put_page(page); + return; + } + } + spin_unlock_irqrestore(&sbi->fsync_node_lock, flags); + f2fs_bug_on(sbi, 1); +} + +void f2fs_reset_fsync_node_info(struct f2fs_sb_info *sbi) +{ + unsigned long flags; + + spin_lock_irqsave(&sbi->fsync_node_lock, flags); + sbi->fsync_seg_id = 0; + spin_unlock_irqrestore(&sbi->fsync_node_lock, flags); +} + +int f2fs_need_dentry_mark(struct f2fs_sb_info *sbi, nid_t nid) { struct f2fs_nm_info *nm_i = NM_I(sbi); struct nat_entry *e; @@ -277,7 +366,7 @@ int need_dentry_mark(struct f2fs_sb_info *sbi, nid_t nid) return need; } -bool is_checkpointed_node(struct f2fs_sb_info *sbi, nid_t nid) +bool f2fs_is_checkpointed_node(struct f2fs_sb_info *sbi, nid_t nid) { struct f2fs_nm_info *nm_i = NM_I(sbi); struct nat_entry *e; @@ -291,7 +380,7 @@ bool is_checkpointed_node(struct f2fs_sb_info *sbi, nid_t nid) return is_cp; } -bool need_inode_block_update(struct f2fs_sb_info *sbi, nid_t ino) +bool f2fs_need_inode_block_update(struct f2fs_sb_info *sbi, nid_t ino) { struct f2fs_nm_info *nm_i = NM_I(sbi); struct nat_entry *e; @@ -364,8 +453,7 @@ static void set_node_addr(struct f2fs_sb_info *sbi, struct node_info *ni, new_blkaddr == NULL_ADDR); f2fs_bug_on(sbi, nat_get_blkaddr(e) == NEW_ADDR && new_blkaddr == NEW_ADDR); - f2fs_bug_on(sbi, nat_get_blkaddr(e) != NEW_ADDR && - nat_get_blkaddr(e) != NULL_ADDR && + f2fs_bug_on(sbi, __is_valid_data_blkaddr(nat_get_blkaddr(e)) && new_blkaddr == NEW_ADDR); /* increment version no as node is removed */ @@ -376,7 +464,7 @@ static void set_node_addr(struct f2fs_sb_info *sbi, struct node_info *ni, /* change address */ nat_set_blkaddr(e, new_blkaddr); - if (new_blkaddr == NEW_ADDR || new_blkaddr == NULL_ADDR) + if (!__is_valid_data_blkaddr(new_blkaddr)) set_nat_flag(e, IS_CHECKPOINTED, false); __set_nat_cache_dirty(nm_i, e); @@ -391,7 +479,7 @@ static void set_node_addr(struct f2fs_sb_info *sbi, struct node_info *ni, up_write(&nm_i->nat_tree_lock); } -int try_to_free_nats(struct f2fs_sb_info *sbi, int nr_shrink) +int f2fs_try_to_free_nats(struct f2fs_sb_info *sbi, int nr_shrink) { struct f2fs_nm_info *nm_i = NM_I(sbi); int nr = nr_shrink; @@ -399,13 +487,25 @@ int try_to_free_nats(struct f2fs_sb_info *sbi, int nr_shrink) if (!down_write_trylock(&nm_i->nat_tree_lock)) return 0; - while (nr_shrink && !list_empty(&nm_i->nat_entries)) { + spin_lock(&nm_i->nat_list_lock); + while (nr_shrink) { struct nat_entry *ne; + + if (list_empty(&nm_i->nat_entries)) + break; + ne = list_first_entry(&nm_i->nat_entries, struct nat_entry, list); + list_del(&ne->list); + spin_unlock(&nm_i->nat_list_lock); + __del_from_nat_cache(nm_i, ne); nr_shrink--; + + spin_lock(&nm_i->nat_list_lock); } + spin_unlock(&nm_i->nat_list_lock); + up_write(&nm_i->nat_tree_lock); return nr - nr_shrink; } @@ -413,7 +513,8 @@ int try_to_free_nats(struct f2fs_sb_info *sbi, int nr_shrink) /* * This function always returns success */ -void get_node_info(struct f2fs_sb_info *sbi, nid_t nid, struct node_info *ni) +int f2fs_get_node_info(struct f2fs_sb_info *sbi, nid_t nid, + struct node_info *ni) { struct f2fs_nm_info *nm_i = NM_I(sbi); struct curseg_info *curseg = CURSEG_I(sbi, CURSEG_HOT_DATA); @@ -424,6 +525,7 @@ void get_node_info(struct f2fs_sb_info *sbi, nid_t nid, struct node_info *ni) struct f2fs_nat_entry ne; struct nat_entry *e; pgoff_t index; + block_t blkaddr; int i; ni->nid = nid; @@ -436,14 +538,14 @@ void get_node_info(struct f2fs_sb_info *sbi, nid_t nid, struct node_info *ni) ni->blk_addr = nat_get_blkaddr(e); ni->version = nat_get_version(e); up_read(&nm_i->nat_tree_lock); - return; + return 0; } memset(&ne, 0, sizeof(struct f2fs_nat_entry)); /* Check current segment summary */ down_read(&curseg->journal_rwsem); - i = lookup_journal_in_cursum(journal, NAT_JOURNAL, nid, 0); + i = f2fs_lookup_journal_in_cursum(journal, NAT_JOURNAL, nid, 0); if (i >= 0) { ne = nat_in_journal(journal, i); node_info_from_raw_nat(ni, &ne); @@ -458,20 +560,29 @@ void get_node_info(struct f2fs_sb_info *sbi, nid_t nid, struct node_info *ni) index = current_nat_addr(sbi, nid); up_read(&nm_i->nat_tree_lock); - page = get_meta_page(sbi, index); + page = f2fs_get_meta_page(sbi, index); + if (IS_ERR(page)) + return PTR_ERR(page); + nat_blk = (struct f2fs_nat_block *)page_address(page); ne = nat_blk->entries[nid - start_nid]; node_info_from_raw_nat(ni, &ne); f2fs_put_page(page, 1); cache: + blkaddr = le32_to_cpu(ne.block_addr); + if (__is_valid_data_blkaddr(blkaddr) && + !f2fs_is_valid_blkaddr(sbi, blkaddr, DATA_GENERIC_ENHANCE)) + return -EFAULT; + /* cache nat entry */ cache_nat_entry(sbi, nid, &ne); + return 0; } /* * readahead MAX_RA_NODE number of node pages. */ -static void ra_node_pages(struct page *parent, int start, int n) +static void f2fs_ra_node_pages(struct page *parent, int start, int n) { struct f2fs_sb_info *sbi = F2FS_P_SB(parent); struct blk_plug plug; @@ -485,18 +596,18 @@ static void ra_node_pages(struct page *parent, int start, int n) end = min(end, NIDS_PER_BLOCK); for (i = start; i < end; i++) { nid = get_nid(parent, i, false); - ra_node_page(sbi, nid); + f2fs_ra_node_page(sbi, nid); } blk_finish_plug(&plug); } -pgoff_t get_next_page_offset(struct dnode_of_data *dn, pgoff_t pgofs) +pgoff_t f2fs_get_next_page_offset(struct dnode_of_data *dn, pgoff_t pgofs) { const long direct_index = ADDRS_PER_INODE(dn->inode); - const long direct_blks = ADDRS_PER_BLOCK; - const long indirect_blks = ADDRS_PER_BLOCK * NIDS_PER_BLOCK; - unsigned int skipped_unit = ADDRS_PER_BLOCK; + const long direct_blks = ADDRS_PER_BLOCK(dn->inode); + const long indirect_blks = ADDRS_PER_BLOCK(dn->inode) * NIDS_PER_BLOCK; + unsigned int skipped_unit = ADDRS_PER_BLOCK(dn->inode); int cur_level = dn->cur_level; int max_level = dn->max_level; pgoff_t base = 0; @@ -530,9 +641,9 @@ static int get_node_path(struct inode *inode, long block, int offset[4], unsigned int noffset[4]) { const long direct_index = ADDRS_PER_INODE(inode); - const long direct_blks = ADDRS_PER_BLOCK; + const long direct_blks = ADDRS_PER_BLOCK(inode); const long dptrs_per_blk = NIDS_PER_BLOCK; - const long indirect_blks = ADDRS_PER_BLOCK * NIDS_PER_BLOCK; + const long indirect_blks = ADDRS_PER_BLOCK(inode) * NIDS_PER_BLOCK; const long dindirect_blks = indirect_blks * NIDS_PER_BLOCK; int n = 0; int level = 0; @@ -606,7 +717,7 @@ got: * f2fs_unlock_op() only if ro is not set RDONLY_NODE. * In the case of RDONLY_NODE, we don't need to care about mutex. */ -int get_dnode_of_data(struct dnode_of_data *dn, pgoff_t index, int mode) +int f2fs_get_dnode_of_data(struct dnode_of_data *dn, pgoff_t index, int mode) { struct f2fs_sb_info *sbi = F2FS_I_SB(dn->inode); struct page *npage[4]; @@ -625,7 +736,7 @@ int get_dnode_of_data(struct dnode_of_data *dn, pgoff_t index, int mode) npage[0] = dn->inode_page; if (!npage[0]) { - npage[0] = get_node_page(sbi, nids[0]); + npage[0] = f2fs_get_node_page(sbi, nids[0]); if (IS_ERR(npage[0])) return PTR_ERR(npage[0]); } @@ -649,24 +760,24 @@ int get_dnode_of_data(struct dnode_of_data *dn, pgoff_t index, int mode) if (!nids[i] && mode == ALLOC_NODE) { /* alloc new node */ - if (!alloc_nid(sbi, &(nids[i]))) { + if (!f2fs_alloc_nid(sbi, &(nids[i]))) { err = -ENOSPC; goto release_pages; } dn->nid = nids[i]; - npage[i] = new_node_page(dn, noffset[i]); + npage[i] = f2fs_new_node_page(dn, noffset[i]); if (IS_ERR(npage[i])) { - alloc_nid_failed(sbi, nids[i]); + f2fs_alloc_nid_failed(sbi, nids[i]); err = PTR_ERR(npage[i]); goto release_pages; } set_nid(parent, offset[i - 1], nids[i], i == 1); - alloc_nid_done(sbi, nids[i]); + f2fs_alloc_nid_done(sbi, nids[i]); done = true; } else if (mode == LOOKUP_NODE_RA && i == level && level > 1) { - npage[i] = get_node_page_ra(parent, offset[i - 1]); + npage[i] = f2fs_get_node_page_ra(parent, offset[i - 1]); if (IS_ERR(npage[i])) { err = PTR_ERR(npage[i]); goto release_pages; @@ -681,7 +792,7 @@ int get_dnode_of_data(struct dnode_of_data *dn, pgoff_t index, int mode) } if (!done) { - npage[i] = get_node_page(sbi, nids[i]); + npage[i] = f2fs_get_node_page(sbi, nids[i]); if (IS_ERR(npage[i])) { err = PTR_ERR(npage[i]); f2fs_put_page(npage[0], 0); @@ -715,21 +826,24 @@ release_out: return err; } -static void truncate_node(struct dnode_of_data *dn) +static int truncate_node(struct dnode_of_data *dn) { struct f2fs_sb_info *sbi = F2FS_I_SB(dn->inode); struct node_info ni; + int err; pgoff_t index; - get_node_info(sbi, dn->nid, &ni); + err = f2fs_get_node_info(sbi, dn->nid, &ni); + if (err) + return err; /* Deallocate node address */ - invalidate_blocks(sbi, ni.blk_addr); + f2fs_invalidate_blocks(sbi, ni.blk_addr); dec_valid_node_count(sbi, dn->inode, dn->nid == dn->inode->i_ino); set_node_addr(sbi, &ni, NULL_ADDR, false); if (dn->nid == dn->inode->i_ino) { - remove_orphan_inode(sbi, dn->nid); + f2fs_remove_orphan_inode(sbi, dn->nid); dec_valid_inode_count(sbi); f2fs_inode_synced(dn->inode); } @@ -745,17 +859,20 @@ static void truncate_node(struct dnode_of_data *dn) dn->node_page = NULL; trace_f2fs_truncate_node(dn->inode, dn->nid, ni.blk_addr); + + return 0; } static int truncate_dnode(struct dnode_of_data *dn) { struct page *page; + int err; if (dn->nid == 0) return 1; /* get direct node */ - page = get_node_page(F2FS_I_SB(dn->inode), dn->nid); + page = f2fs_get_node_page(F2FS_I_SB(dn->inode), dn->nid); if (IS_ERR(page) && PTR_ERR(page) == -ENOENT) return 1; else if (IS_ERR(page)) @@ -764,8 +881,11 @@ static int truncate_dnode(struct dnode_of_data *dn) /* Make dnode_of_data for parameter */ dn->node_page = page; dn->ofs_in_node = 0; - truncate_data_blocks(dn); - truncate_node(dn); + f2fs_truncate_data_blocks(dn); + err = truncate_node(dn); + if (err) + return err; + return 1; } @@ -785,13 +905,13 @@ static int truncate_nodes(struct dnode_of_data *dn, unsigned int nofs, trace_f2fs_truncate_nodes_enter(dn->inode, dn->nid, dn->data_blkaddr); - page = get_node_page(F2FS_I_SB(dn->inode), dn->nid); + page = f2fs_get_node_page(F2FS_I_SB(dn->inode), dn->nid); if (IS_ERR(page)) { trace_f2fs_truncate_nodes_exit(dn->inode, PTR_ERR(page)); return PTR_ERR(page); } - ra_node_pages(page, ofs, NIDS_PER_BLOCK); + f2fs_ra_node_pages(page, ofs, NIDS_PER_BLOCK); rn = F2FS_NODE(page); if (depth < 3) { @@ -830,7 +950,9 @@ static int truncate_nodes(struct dnode_of_data *dn, unsigned int nofs, if (!ofs) { /* remove current indirect node */ dn->node_page = page; - truncate_node(dn); + ret = truncate_node(dn); + if (ret) + goto out_err; freed++; } else { f2fs_put_page(page, 1); @@ -861,7 +983,7 @@ static int truncate_partial_nodes(struct dnode_of_data *dn, /* get indirect nodes in the path */ for (i = 0; i < idx + 1; i++) { /* reference count'll be increased */ - pages[i] = get_node_page(F2FS_I_SB(dn->inode), nid[i]); + pages[i] = f2fs_get_node_page(F2FS_I_SB(dn->inode), nid[i]); if (IS_ERR(pages[i])) { err = PTR_ERR(pages[i]); idx = i - 1; @@ -870,7 +992,7 @@ static int truncate_partial_nodes(struct dnode_of_data *dn, nid[i + 1] = get_nid(pages[i], offset[i + 1], false); } - ra_node_pages(pages[idx], offset[idx + 1], NIDS_PER_BLOCK); + f2fs_ra_node_pages(pages[idx], offset[idx + 1], NIDS_PER_BLOCK); /* free direct nodes linked to a partial indirect node */ for (i = offset[idx + 1]; i < NIDS_PER_BLOCK; i++) { @@ -888,7 +1010,9 @@ static int truncate_partial_nodes(struct dnode_of_data *dn, if (offset[idx + 1] == 0) { dn->node_page = pages[idx]; dn->nid = nid[idx]; - truncate_node(dn); + err = truncate_node(dn); + if (err) + goto fail; } else { f2fs_put_page(pages[idx], 1); } @@ -907,7 +1031,7 @@ fail: /* * All the block addresses of data and nodes should be nullified. */ -int truncate_inode_blocks(struct inode *inode, pgoff_t from) +int f2fs_truncate_inode_blocks(struct inode *inode, pgoff_t from) { struct f2fs_sb_info *sbi = F2FS_I_SB(inode); int err = 0, cont = 1; @@ -923,7 +1047,7 @@ int truncate_inode_blocks(struct inode *inode, pgoff_t from) if (level < 0) return level; - page = get_node_page(sbi, inode->i_ino); + page = f2fs_get_node_page(sbi, inode->i_ino); if (IS_ERR(page)) { trace_f2fs_truncate_inode_blocks_exit(inode, PTR_ERR(page)); return PTR_ERR(page); @@ -987,7 +1111,7 @@ skip_partial: ri->i_nid[offset[0] - NODE_DIR1_BLOCK]) { lock_page(page); BUG_ON(page->mapping != NODE_MAPPING(sbi)); - f2fs_wait_on_page_writeback(page, NODE, true); + f2fs_wait_on_page_writeback(page, NODE, true, true); ri->i_nid[offset[0] - NODE_DIR1_BLOCK] = 0; set_page_dirty(page); unlock_page(page); @@ -1003,24 +1127,30 @@ fail: } /* caller must lock inode page */ -int truncate_xattr_node(struct inode *inode) +int f2fs_truncate_xattr_node(struct inode *inode) { struct f2fs_sb_info *sbi = F2FS_I_SB(inode); nid_t nid = F2FS_I(inode)->i_xattr_nid; struct dnode_of_data dn; struct page *npage; + int err; if (!nid) return 0; - npage = get_node_page(sbi, nid); + npage = f2fs_get_node_page(sbi, nid); if (IS_ERR(npage)) return PTR_ERR(npage); + set_new_dnode(&dn, inode, NULL, npage, nid); + err = truncate_node(&dn); + if (err) { + f2fs_put_page(npage, 1); + return err; + } + f2fs_i_xnid_write(inode, 0); - set_new_dnode(&dn, inode, NULL, npage, nid); - truncate_node(&dn); return 0; } @@ -1028,17 +1158,17 @@ int truncate_xattr_node(struct inode *inode) * Caller should grab and release a rwsem by calling f2fs_lock_op() and * f2fs_unlock_op(). */ -int remove_inode_page(struct inode *inode) +int f2fs_remove_inode_page(struct inode *inode) { struct dnode_of_data dn; int err; set_new_dnode(&dn, inode, NULL, NULL, inode->i_ino); - err = get_dnode_of_data(&dn, 0, LOOKUP_NODE); + err = f2fs_get_dnode_of_data(&dn, 0, LOOKUP_NODE); if (err) return err; - err = truncate_xattr_node(inode); + err = f2fs_truncate_xattr_node(inode); if (err) { f2fs_put_dnode(&dn); return err; @@ -1047,18 +1177,30 @@ int remove_inode_page(struct inode *inode) /* remove potential inline_data blocks */ if (S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode) || S_ISLNK(inode->i_mode)) - truncate_data_blocks_range(&dn, 1); + f2fs_truncate_data_blocks_range(&dn, 1); /* 0 is possible, after f2fs_new_inode() has failed */ - f2fs_bug_on(F2FS_I_SB(inode), - inode->i_blocks != 0 && inode->i_blocks != 8); + if (unlikely(f2fs_cp_error(F2FS_I_SB(inode)))) { + f2fs_put_dnode(&dn); + return -EIO; + } + + if (unlikely(inode->i_blocks != 0 && inode->i_blocks != 8)) { + f2fs_warn(F2FS_I_SB(inode), "Inconsistent i_blocks, ino:%lu, iblocks:%llu", + inode->i_ino, (unsigned long long)inode->i_blocks); + set_sbi_flag(F2FS_I_SB(inode), SBI_NEED_FSCK); + } /* will put inode & node pages */ - truncate_node(&dn); + err = truncate_node(&dn); + if (err) { + f2fs_put_dnode(&dn); + return err; + } return 0; } -struct page *new_inode_page(struct inode *inode) +struct page *f2fs_new_inode_page(struct inode *inode) { struct dnode_of_data dn; @@ -1066,10 +1208,10 @@ struct page *new_inode_page(struct inode *inode) set_new_dnode(&dn, inode, NULL, NULL, inode->i_ino); /* caller should f2fs_put_page(page, 1); */ - return new_node_page(&dn, 0); + return f2fs_new_node_page(&dn, 0); } -struct page *new_node_page(struct dnode_of_data *dn, unsigned int ofs) +struct page *f2fs_new_node_page(struct dnode_of_data *dn, unsigned int ofs) { struct f2fs_sb_info *sbi = F2FS_I_SB(dn->inode); struct node_info new_ni; @@ -1087,7 +1229,11 @@ struct page *new_node_page(struct dnode_of_data *dn, unsigned int ofs) goto fail; #ifdef CONFIG_F2FS_CHECK_FS - get_node_info(sbi, dn->nid, &new_ni); + err = f2fs_get_node_info(sbi, dn->nid, &new_ni); + if (err) { + dec_valid_node_count(sbi, dn->inode, !ofs); + goto fail; + } f2fs_bug_on(sbi, new_ni.blk_addr != NULL_ADDR); #endif new_ni.nid = dn->nid; @@ -1097,7 +1243,7 @@ struct page *new_node_page(struct dnode_of_data *dn, unsigned int ofs) new_ni.version = 0; set_node_addr(sbi, &new_ni, NEW_ADDR, false); - f2fs_wait_on_page_writeback(page, NODE, true); + f2fs_wait_on_page_writeback(page, NODE, true, true); fill_node_footer(page, dn->nid, dn->inode->i_ino, ofs, true); set_cold_node(page, S_ISDIR(dn->inode->i_mode)); if (!PageUptodate(page)) @@ -1135,13 +1281,22 @@ static int read_node_page(struct page *page, int op_flags) .page = page, .encrypted_page = NULL, }; + int err; - if (PageUptodate(page)) + if (PageUptodate(page)) { + if (!f2fs_inode_chksum_verify(sbi, page)) { + ClearPageUptodate(page); + return -EFSBADCRC; + } return LOCKED_PAGE; + } - get_node_info(sbi, page->index, &ni); + err = f2fs_get_node_info(sbi, page->index, &ni); + if (err) + return err; - if (unlikely(ni.blk_addr == NULL_ADDR)) { + if (unlikely(ni.blk_addr == NULL_ADDR) || + is_sbi_flag_set(sbi, SBI_IS_SHUTDOWN)) { ClearPageUptodate(page); return -ENOENT; } @@ -1153,14 +1308,15 @@ static int read_node_page(struct page *page, int op_flags) /* * Readahead a node page */ -void ra_node_page(struct f2fs_sb_info *sbi, nid_t nid) +void f2fs_ra_node_page(struct f2fs_sb_info *sbi, nid_t nid) { struct page *apage; int err; if (!nid) return; - f2fs_bug_on(sbi, check_nid_range(sbi, nid)); + if (f2fs_check_nid_range(sbi, nid)) + return; rcu_read_lock(); apage = radix_tree_lookup(&NODE_MAPPING(sbi)->page_tree, nid); @@ -1184,7 +1340,8 @@ static struct page *__get_node_page(struct f2fs_sb_info *sbi, pgoff_t nid, if (!nid) return ERR_PTR(-ENOENT); - f2fs_bug_on(sbi, check_nid_range(sbi, nid)); + if (f2fs_check_nid_range(sbi, nid)) + return ERR_PTR(-EINVAL); repeat: page = f2fs_grab_cache_page(NODE_MAPPING(sbi), nid, false); if (!page) @@ -1200,7 +1357,7 @@ repeat: } if (parent) - ra_node_pages(parent, start + 1, MAX_RA_NODE); + f2fs_ra_node_pages(parent, start + 1, MAX_RA_NODE); lock_page(page); @@ -1215,16 +1372,15 @@ repeat: } if (!f2fs_inode_chksum_verify(sbi, page)) { - err = -EBADMSG; + err = -EFSBADCRC; goto out_err; } page_hit: if(unlikely(nid != nid_of_node(page))) { - f2fs_msg(sbi->sb, KERN_WARNING, "inconsistent node block, " - "nid:%lu, node_footer[nid:%u,ino:%u,ofs:%u,cpver:%llu,blkaddr:%u]", - nid, nid_of_node(page), ino_of_node(page), - ofs_of_node(page), cpver_of_node(page), - next_blkaddr_of_node(page)); + f2fs_warn(sbi, "inconsistent node block, nid:%lu, node_footer[nid:%u,ino:%u,ofs:%u,cpver:%llu,blkaddr:%u]", + nid, nid_of_node(page), ino_of_node(page), + ofs_of_node(page), cpver_of_node(page), + next_blkaddr_of_node(page)); err = -EINVAL; out_err: ClearPageUptodate(page); @@ -1234,12 +1390,12 @@ out_err: return page; } -struct page *get_node_page(struct f2fs_sb_info *sbi, pgoff_t nid) +struct page *f2fs_get_node_page(struct f2fs_sb_info *sbi, pgoff_t nid) { return __get_node_page(sbi, nid, NULL, 0); } -struct page *get_node_page_ra(struct page *parent, int start) +struct page *f2fs_get_node_page_ra(struct page *parent, int start) { struct f2fs_sb_info *sbi = F2FS_P_SB(parent); nid_t nid = get_nid(parent, start, false); @@ -1274,7 +1430,7 @@ static void flush_inline_data(struct f2fs_sb_info *sbi, nid_t ino) ret = f2fs_write_inline_data(inode, page); inode_dec_dirty_pages(inode); - remove_dirty_inode(inode); + f2fs_remove_dirty_inode(inode); if (ret) set_page_dirty(page); page_out: @@ -1285,21 +1441,17 @@ iput_out: static struct page *last_fsync_dnode(struct f2fs_sb_info *sbi, nid_t ino) { - pgoff_t index, end; + pgoff_t index; struct pagevec pvec; struct page *last_page = NULL; + int nr_pages; pagevec_init(&pvec, 0); index = 0; - end = ULONG_MAX; - - while (index <= end) { - int i, nr_pages; - nr_pages = pagevec_lookup_tag(&pvec, NODE_MAPPING(sbi), &index, - PAGECACHE_TAG_DIRTY, - min(end - index, (pgoff_t)PAGEVEC_SIZE-1) + 1); - if (nr_pages == 0) - break; + + while ((nr_pages = pagevec_lookup_tag(&pvec, NODE_MAPPING(sbi), &index, + PAGECACHE_TAG_DIRTY))) { + int i; for (i = 0; i < nr_pages; i++) { struct page *page = pvec.pages[i]; @@ -1345,7 +1497,7 @@ continue_unlock: static int __write_node_page(struct page *page, bool atomic, bool *submitted, struct writeback_control *wbc, bool do_balance, - enum iostat_type io_type) + enum iostat_type io_type, unsigned int *seq_id) { struct f2fs_sb_info *sbi = F2FS_P_SB(page); nid_t nid; @@ -1362,22 +1514,27 @@ static int __write_node_page(struct page *page, bool atomic, bool *submitted, .io_type = io_type, .io_wbc = wbc, }; + unsigned int seq; trace_f2fs_writepage(page, NODE); - if (unlikely(f2fs_cp_error(sbi))) { - dec_page_count(sbi, F2FS_DIRTY_NODES); - unlock_page(page); - return 0; - } + if (unlikely(f2fs_cp_error(sbi))) + goto redirty_out; if (unlikely(is_sbi_flag_set(sbi, SBI_POR_DOING))) goto redirty_out; + if (wbc->sync_mode == WB_SYNC_NONE && + IS_DNODE(page) && is_cold_node(page)) + goto redirty_out; + /* get old block addr of this node page */ nid = nid_of_node(page); f2fs_bug_on(sbi, page->index != nid); + if (f2fs_get_node_info(sbi, nid, &ni)) + goto redirty_out; + if (wbc->for_reclaim) { if (!down_read_trylock(&sbi->node_write)) goto redirty_out; @@ -1385,8 +1542,6 @@ static int __write_node_page(struct page *page, bool atomic, bool *submitted, down_read(&sbi->node_write); } - get_node_info(sbi, nid, &ni); - /* This page is already truncated */ if (unlikely(ni.blk_addr == NULL_ADDR)) { ClearPageUptodate(page); @@ -1396,20 +1551,33 @@ static int __write_node_page(struct page *page, bool atomic, bool *submitted, return 0; } + if (__is_valid_data_blkaddr(ni.blk_addr) && + !f2fs_is_valid_blkaddr(sbi, ni.blk_addr, + DATA_GENERIC_ENHANCE)) { + up_read(&sbi->node_write); + goto redirty_out; + } + if (atomic && !test_opt(sbi, NOBARRIER)) fio.op_flags |= WRITE_FLUSH_FUA; set_page_writeback(page); ClearPageError(page); + + if (f2fs_in_warm_node_list(sbi, page)) { + seq = f2fs_add_fsync_node_entry(sbi, page); + if (seq_id) + *seq_id = seq; + } + fio.old_blkaddr = ni.blk_addr; - write_node_page(nid, &fio); + f2fs_do_write_node_page(nid, &fio); set_node_addr(sbi, &ni, fio.new_blkaddr, is_fsync_dnode(page)); dec_page_count(sbi, F2FS_DIRTY_NODES); up_read(&sbi->node_write); if (wbc->for_reclaim) { - f2fs_submit_merged_write_cond(sbi, page->mapping->host, 0, - page->index, NODE); + f2fs_submit_merged_write_cond(sbi, NULL, page, 0, NODE); submitted = NULL; } @@ -1431,8 +1599,10 @@ redirty_out: return AOP_WRITEPAGE_ACTIVATE; } -void move_node_page(struct page *node_page, int gc_type) +int f2fs_move_node_page(struct page *node_page, int gc_type) { + int err = 0; + if (gc_type == FG_GC) { struct writeback_control wbc = { .sync_mode = WB_SYNC_ALL, @@ -1440,16 +1610,20 @@ void move_node_page(struct page *node_page, int gc_type) .for_reclaim = 0, }; + f2fs_wait_on_page_writeback(node_page, NODE, true, true); + set_page_dirty(node_page); - f2fs_wait_on_page_writeback(node_page, NODE, true); - f2fs_bug_on(F2FS_P_SB(node_page), PageWriteback(node_page)); - if (!clear_page_dirty_for_io(node_page)) + if (!clear_page_dirty_for_io(node_page)) { + err = -EAGAIN; goto out_page; + } if (__write_node_page(node_page, false, NULL, - &wbc, false, FS_GC_NODE_IO)) + &wbc, false, FS_GC_NODE_IO, NULL)) { + err = -EAGAIN; unlock_page(node_page); + } goto release_page; } else { /* set page dirty and write it */ @@ -1460,24 +1634,28 @@ out_page: unlock_page(node_page); release_page: f2fs_put_page(node_page, 0); + return err; } static int f2fs_write_node_page(struct page *page, struct writeback_control *wbc) { - return __write_node_page(page, false, NULL, wbc, false, FS_NODE_IO); + return __write_node_page(page, false, NULL, wbc, false, + FS_NODE_IO, NULL); } -int fsync_node_pages(struct f2fs_sb_info *sbi, struct inode *inode, - struct writeback_control *wbc, bool atomic) +int f2fs_fsync_node_pages(struct f2fs_sb_info *sbi, struct inode *inode, + struct writeback_control *wbc, bool atomic, + unsigned int *seq_id) { - pgoff_t index, end; - pgoff_t last_idx = ULONG_MAX; + pgoff_t index; struct pagevec pvec; int ret = 0; struct page *last_page = NULL; bool marked = false; nid_t ino = inode->i_ino; + int nr_pages; + int nwritten = 0; if (atomic) { last_page = last_fsync_dnode(sbi, ino); @@ -1487,15 +1665,10 @@ int fsync_node_pages(struct f2fs_sb_info *sbi, struct inode *inode, retry: pagevec_init(&pvec, 0); index = 0; - end = ULONG_MAX; - - while (index <= end) { - int i, nr_pages; - nr_pages = pagevec_lookup_tag(&pvec, NODE_MAPPING(sbi), &index, - PAGECACHE_TAG_DIRTY, - min(end - index, (pgoff_t)PAGEVEC_SIZE-1) + 1); - if (nr_pages == 0) - break; + + while ((nr_pages = pagevec_lookup_tag(&pvec, NODE_MAPPING(sbi), &index, + PAGECACHE_TAG_DIRTY))) { + int i; for (i = 0; i < nr_pages; i++) { struct page *page = pvec.pages[i]; @@ -1528,8 +1701,7 @@ continue_unlock: goto continue_unlock; } - f2fs_wait_on_page_writeback(page, NODE, true); - BUG_ON(PageWriteback(page)); + f2fs_wait_on_page_writeback(page, NODE, true, true); set_fsync_mark(page, 0); set_dentry_mark(page, 0); @@ -1539,9 +1711,9 @@ continue_unlock: if (IS_INODE(page)) { if (is_inode_flag_set(inode, FI_DIRTY_INODE)) - update_inode(inode, page); + f2fs_update_inode(inode, page); set_dentry_mark(page, - need_dentry_mark(sbi, ino)); + f2fs_need_dentry_mark(sbi, ino)); } /* may be written by other thread */ if (!PageDirty(page)) @@ -1554,13 +1726,13 @@ continue_unlock: ret = __write_node_page(page, atomic && page == last_page, &submitted, wbc, true, - FS_NODE_IO); + FS_NODE_IO, seq_id); if (ret) { unlock_page(page); f2fs_put_page(last_page, 0); break; } else if (submitted) { - last_idx = page->index; + nwritten++; } if (page == last_page) { @@ -1576,48 +1748,51 @@ continue_unlock: break; } if (!ret && atomic && !marked) { - f2fs_msg(sbi->sb, KERN_DEBUG, - "Retry to write fsync mark: ino=%u, idx=%lx", - ino, last_page->index); + f2fs_debug(sbi, "Retry to write fsync mark: ino=%u, idx=%lx", + ino, last_page->index); lock_page(last_page); - f2fs_wait_on_page_writeback(last_page, NODE, true); + f2fs_wait_on_page_writeback(last_page, NODE, true, true); set_page_dirty(last_page); unlock_page(last_page); goto retry; } out: - if (last_idx != ULONG_MAX) - f2fs_submit_merged_write_cond(sbi, NULL, ino, last_idx, NODE); + if (nwritten) + f2fs_submit_merged_write_cond(sbi, NULL, NULL, ino, NODE); return ret ? -EIO: 0; } -int sync_node_pages(struct f2fs_sb_info *sbi, struct writeback_control *wbc, +int f2fs_sync_node_pages(struct f2fs_sb_info *sbi, + struct writeback_control *wbc, bool do_balance, enum iostat_type io_type) { - pgoff_t index, end; + pgoff_t index; struct pagevec pvec; int step = 0; int nwritten = 0; int ret = 0; + int nr_pages, done = 0; pagevec_init(&pvec, 0); next_step: index = 0; - end = ULONG_MAX; - - while (index <= end) { - int i, nr_pages; - nr_pages = pagevec_lookup_tag(&pvec, NODE_MAPPING(sbi), &index, - PAGECACHE_TAG_DIRTY, - min(end - index, (pgoff_t)PAGEVEC_SIZE-1) + 1); - if (nr_pages == 0) - break; + + while (!done && (nr_pages = pagevec_lookup_tag(&pvec, + NODE_MAPPING(sbi), &index, PAGECACHE_TAG_DIRTY))) { + int i; for (i = 0; i < nr_pages; i++) { struct page *page = pvec.pages[i]; bool submitted = false; + /* give a priority to WB_SYNC threads */ + if (atomic_read(&sbi->wb_sync_req[NODE]) && + wbc->sync_mode == WB_SYNC_NONE) { + done = 1; + break; + } + /* * flushing sequence with step: * 0. indirect nodes @@ -1633,7 +1808,9 @@ next_step: !is_cold_node(page))) continue; lock_node: - if (!trylock_page(page)) + if (wbc->sync_mode == WB_SYNC_ALL) + lock_page(page); + else if (!trylock_page(page)) continue; if (unlikely(page->mapping != NODE_MAPPING(sbi))) { @@ -1655,9 +1832,8 @@ continue_unlock: goto lock_node; } - f2fs_wait_on_page_writeback(page, NODE, true); + f2fs_wait_on_page_writeback(page, NODE, true, true); - BUG_ON(PageWriteback(page)); if (!clear_page_dirty_for_io(page)) goto continue_unlock; @@ -1665,7 +1841,7 @@ continue_unlock: set_dentry_mark(page, 0); ret = __write_node_page(page, false, &submitted, - wbc, do_balance, io_type); + wbc, do_balance, io_type, NULL); if (ret) unlock_page(page); else if (submitted) @@ -1684,10 +1860,12 @@ continue_unlock: } if (step < 2) { + if (wbc->sync_mode == WB_SYNC_NONE && step == 1) + goto out; step++; goto next_step; } - +out: if (nwritten) f2fs_submit_merged_write(sbi, NODE); @@ -1696,37 +1874,40 @@ continue_unlock: return ret; } -int wait_on_node_pages_writeback(struct f2fs_sb_info *sbi, nid_t ino) +int f2fs_wait_on_node_pages_writeback(struct f2fs_sb_info *sbi, + unsigned int seq_id) { - pgoff_t index = 0, end = ULONG_MAX; - struct pagevec pvec; + struct fsync_node_entry *fn; + struct page *page; + struct list_head *head = &sbi->fsync_node_list; + unsigned long flags; + unsigned int cur_seq_id = 0; int ret2 = 0, ret = 0; - pagevec_init(&pvec, 0); - - while (index <= end) { - int i, nr_pages; - nr_pages = pagevec_lookup_tag(&pvec, NODE_MAPPING(sbi), &index, - PAGECACHE_TAG_WRITEBACK, - min(end - index, (pgoff_t)PAGEVEC_SIZE-1) + 1); - if (nr_pages == 0) + while (seq_id && cur_seq_id < seq_id) { + spin_lock_irqsave(&sbi->fsync_node_lock, flags); + if (list_empty(head)) { + spin_unlock_irqrestore(&sbi->fsync_node_lock, flags); + break; + } + fn = list_first_entry(head, struct fsync_node_entry, list); + if (fn->seq_id > seq_id) { + spin_unlock_irqrestore(&sbi->fsync_node_lock, flags); break; + } + cur_seq_id = fn->seq_id; + page = fn->page; + get_page(page); + spin_unlock_irqrestore(&sbi->fsync_node_lock, flags); - for (i = 0; i < nr_pages; i++) { - struct page *page = pvec.pages[i]; + f2fs_wait_on_page_writeback(page, NODE, true, false); + if (TestClearPageError(page)) + ret = -EIO; - /* until radix tree lookup accepts end_index */ - if (unlikely(page->index > end)) - continue; + put_page(page); - if (ino && ino_of_node(page) == ino) { - f2fs_wait_on_page_writeback(page, NODE, true); - if (TestClearPageError(page)) - ret = -EIO; - } - } - pagevec_release(&pvec); - cond_resched(); + if (ret) + break; } if (unlikely(test_and_clear_bit(AS_ENOSPC, &NODE_MAPPING(sbi)->flags))) @@ -1735,6 +1916,7 @@ int wait_on_node_pages_writeback(struct f2fs_sb_info *sbi, nid_t ino) ret2 = -EIO; if (!ret) ret = ret2; + return ret; } @@ -1752,17 +1934,26 @@ static int f2fs_write_node_pages(struct address_space *mapping, f2fs_balance_fs_bg(sbi); /* collect a number of dirty node pages and write together */ - if (get_pages(sbi, F2FS_DIRTY_NODES) < nr_pages_to_skip(sbi, NODE)) + if (wbc->sync_mode != WB_SYNC_ALL && + get_pages(sbi, F2FS_DIRTY_NODES) < + nr_pages_to_skip(sbi, NODE)) + goto skip_write; + + if (wbc->sync_mode == WB_SYNC_ALL) + atomic_inc(&sbi->wb_sync_req[NODE]); + else if (atomic_read(&sbi->wb_sync_req[NODE])) goto skip_write; trace_f2fs_writepages(mapping->host, wbc, NODE); diff = nr_pages_to_write(sbi, NODE, wbc); - wbc->sync_mode = WB_SYNC_NONE; blk_start_plug(&plug); - sync_node_pages(sbi, wbc, true, FS_NODE_IO); + f2fs_sync_node_pages(sbi, wbc, true, FS_NODE_IO); blk_finish_plug(&plug); wbc->nr_to_write = max((long)0, wbc->nr_to_write - diff); + + if (wbc->sync_mode == WB_SYNC_ALL) + atomic_dec(&sbi->wb_sync_req[NODE]); return 0; skip_write: @@ -1777,10 +1968,14 @@ static int f2fs_set_node_page_dirty(struct page *page) if (!PageUptodate(page)) SetPageUptodate(page); +#ifdef CONFIG_F2FS_CHECK_FS + if (IS_INODE(page)) + f2fs_inode_chksum_set(F2FS_P_SB(page), page); +#endif if (!PageDirty(page)) { __set_page_dirty_nobuffers(page); inc_page_count(F2FS_P_SB(page), F2FS_DIRTY_NODES); - SetPagePrivate(page); + f2fs_set_page_private(page, 0); f2fs_trace_pid(page); return 1; } @@ -1895,6 +2090,9 @@ static bool add_free_nid(struct f2fs_sb_info *sbi, if (unlikely(nid == 0)) return false; + if (unlikely(f2fs_check_nid_range(sbi, nid))) + return false; + i = f2fs_kmem_cache_alloc(free_nid_slab, GFP_NOFS); i->nid = nid; i->state = FREE_NID; @@ -1908,20 +2106,20 @@ static bool add_free_nid(struct f2fs_sb_info *sbi, * Thread A Thread B * - f2fs_create * - f2fs_new_inode - * - alloc_nid + * - f2fs_alloc_nid * - __insert_nid_to_list(PREALLOC_NID) * - f2fs_balance_fs_bg - * - build_free_nids - * - __build_free_nids + * - f2fs_build_free_nids + * - __f2fs_build_free_nids * - scan_nat_page * - add_free_nid * - __lookup_nat_cache * - f2fs_add_link - * - init_inode_metadata - * - new_inode_page - * - new_node_page + * - f2fs_init_inode_metadata + * - f2fs_new_inode_page + * - f2fs_new_node_page * - set_node_addr - * - alloc_nid_done + * - f2fs_alloc_nid_done * - __remove_nid_from_list(PREALLOC_NID) * - __insert_nid_to_list(FREE_NID) */ @@ -1971,7 +2169,7 @@ static void remove_free_nid(struct f2fs_sb_info *sbi, nid_t nid) kmem_cache_free(free_nid_slab, i); } -static void scan_nat_page(struct f2fs_sb_info *sbi, +static int scan_nat_page(struct f2fs_sb_info *sbi, struct page *nat_page, nid_t start_nid) { struct f2fs_nm_info *nm_i = NM_I(sbi); @@ -1989,7 +2187,10 @@ static void scan_nat_page(struct f2fs_sb_info *sbi, break; blk_addr = le32_to_cpu(nat_blk->entries[i].block_addr); - f2fs_bug_on(sbi, blk_addr == NEW_ADDR); + + if (blk_addr == NEW_ADDR) + return -EINVAL; + if (blk_addr == NULL_ADDR) { add_free_nid(sbi, start_nid, true, true); } else { @@ -1998,6 +2199,8 @@ static void scan_nat_page(struct f2fs_sb_info *sbi, spin_unlock(&NM_I(sbi)->nid_list_lock); } } + + return 0; } static void scan_curseg_cache(struct f2fs_sb_info *sbi) @@ -2053,10 +2256,11 @@ out: up_read(&nm_i->nat_tree_lock); } -static void __build_free_nids(struct f2fs_sb_info *sbi, bool sync, bool mount) +static int __f2fs_build_free_nids(struct f2fs_sb_info *sbi, + bool sync, bool mount) { struct f2fs_nm_info *nm_i = NM_I(sbi); - int i = 0; + int i = 0, ret; nid_t nid = nm_i->next_scan_nid; if (unlikely(nid >= nm_i->max_nid)) @@ -2064,21 +2268,21 @@ static void __build_free_nids(struct f2fs_sb_info *sbi, bool sync, bool mount) /* Enough entries */ if (nm_i->nid_cnt[FREE_NID] >= NAT_ENTRY_PER_BLOCK) - return; + return 0; - if (!sync && !available_free_memory(sbi, FREE_NIDS)) - return; + if (!sync && !f2fs_available_free_memory(sbi, FREE_NIDS)) + return 0; if (!mount) { /* try to find free nids in free_nid_bitmap */ scan_free_nid_bits(sbi); if (nm_i->nid_cnt[FREE_NID] >= NAT_ENTRY_PER_BLOCK) - return; + return 0; } /* readahead nat pages to be scanned */ - ra_meta_pages(sbi, NAT_BLOCK_OFFSET(nid), FREE_NID_PAGES, + f2fs_ra_meta_pages(sbi, NAT_BLOCK_OFFSET(nid), FREE_NID_PAGES, META_NAT, true); down_read(&nm_i->nat_tree_lock); @@ -2088,8 +2292,19 @@ static void __build_free_nids(struct f2fs_sb_info *sbi, bool sync, bool mount) nm_i->nat_block_bitmap)) { struct page *page = get_current_nat_page(sbi, nid); - scan_nat_page(sbi, page, nid); - f2fs_put_page(page, 1); + if (IS_ERR(page)) { + ret = PTR_ERR(page); + } else { + ret = scan_nat_page(sbi, page, nid); + f2fs_put_page(page, 1); + } + + if (ret) { + up_read(&nm_i->nat_tree_lock); + f2fs_bug_on(sbi, !mount); + f2fs_err(sbi, "NAT is corrupt, run fsck to fix it"); + return ret; + } } nid += (NAT_ENTRY_PER_BLOCK - (nid % NAT_ENTRY_PER_BLOCK)); @@ -2108,15 +2323,21 @@ static void __build_free_nids(struct f2fs_sb_info *sbi, bool sync, bool mount) up_read(&nm_i->nat_tree_lock); - ra_meta_pages(sbi, NAT_BLOCK_OFFSET(nm_i->next_scan_nid), + f2fs_ra_meta_pages(sbi, NAT_BLOCK_OFFSET(nm_i->next_scan_nid), nm_i->ra_nid_pages, META_NAT, false); + + return 0; } -void build_free_nids(struct f2fs_sb_info *sbi, bool sync, bool mount) +int f2fs_build_free_nids(struct f2fs_sb_info *sbi, bool sync, bool mount) { + int ret; + mutex_lock(&NM_I(sbi)->build_lock); - __build_free_nids(sbi, sync, mount); + ret = __f2fs_build_free_nids(sbi, sync, mount); mutex_unlock(&NM_I(sbi)->build_lock); + + return ret; } /* @@ -2124,17 +2345,16 @@ void build_free_nids(struct f2fs_sb_info *sbi, bool sync, bool mount) * from second parameter of this function. * The returned nid could be used ino as well as nid when inode is created. */ -bool alloc_nid(struct f2fs_sb_info *sbi, nid_t *nid) +bool f2fs_alloc_nid(struct f2fs_sb_info *sbi, nid_t *nid) { struct f2fs_nm_info *nm_i = NM_I(sbi); struct free_nid *i = NULL; retry: -#ifdef CONFIG_F2FS_FAULT_INJECTION if (time_to_inject(sbi, FAULT_ALLOC_NID)) { f2fs_show_injection_info(FAULT_ALLOC_NID); return false; } -#endif + spin_lock(&nm_i->nid_list_lock); if (unlikely(nm_i->available_nids == 0)) { @@ -2142,8 +2362,8 @@ retry: return false; } - /* We should not use stale free nids created by build_free_nids */ - if (nm_i->nid_cnt[FREE_NID] && !on_build_free_nids(nm_i)) { + /* We should not use stale free nids created by f2fs_build_free_nids */ + if (nm_i->nid_cnt[FREE_NID] && !on_f2fs_build_free_nids(nm_i)) { f2fs_bug_on(sbi, list_empty(&nm_i->free_nid_list)); i = list_first_entry(&nm_i->free_nid_list, struct free_nid, list); @@ -2160,14 +2380,15 @@ retry: spin_unlock(&nm_i->nid_list_lock); /* Let's scan nat pages and its caches to get free nids */ - build_free_nids(sbi, true, false); - goto retry; + if (!f2fs_build_free_nids(sbi, true, false)) + goto retry; + return false; } /* - * alloc_nid() should be called prior to this function. + * f2fs_alloc_nid() should be called prior to this function. */ -void alloc_nid_done(struct f2fs_sb_info *sbi, nid_t nid) +void f2fs_alloc_nid_done(struct f2fs_sb_info *sbi, nid_t nid) { struct f2fs_nm_info *nm_i = NM_I(sbi); struct free_nid *i; @@ -2182,9 +2403,9 @@ void alloc_nid_done(struct f2fs_sb_info *sbi, nid_t nid) } /* - * alloc_nid() should be called prior to this function. + * f2fs_alloc_nid() should be called prior to this function. */ -void alloc_nid_failed(struct f2fs_sb_info *sbi, nid_t nid) +void f2fs_alloc_nid_failed(struct f2fs_sb_info *sbi, nid_t nid) { struct f2fs_nm_info *nm_i = NM_I(sbi); struct free_nid *i; @@ -2197,7 +2418,7 @@ void alloc_nid_failed(struct f2fs_sb_info *sbi, nid_t nid) i = __lookup_free_nid_list(nm_i, nid); f2fs_bug_on(sbi, !i); - if (!available_free_memory(sbi, FREE_NIDS)) { + if (!f2fs_available_free_memory(sbi, FREE_NIDS)) { __remove_free_nid(sbi, i, PREALLOC_NID); need_free = true; } else { @@ -2214,7 +2435,7 @@ void alloc_nid_failed(struct f2fs_sb_info *sbi, nid_t nid) kmem_cache_free(free_nid_slab, i); } -int try_to_free_nids(struct f2fs_sb_info *sbi, int nr_shrink) +int f2fs_try_to_free_nids(struct f2fs_sb_info *sbi, int nr_shrink) { struct f2fs_nm_info *nm_i = NM_I(sbi); struct free_nid *i, *next; @@ -2242,14 +2463,14 @@ int try_to_free_nids(struct f2fs_sb_info *sbi, int nr_shrink) return nr - nr_shrink; } -void recover_inline_xattr(struct inode *inode, struct page *page) +void f2fs_recover_inline_xattr(struct inode *inode, struct page *page) { void *src_addr, *dst_addr; size_t inline_size; struct page *ipage; struct f2fs_inode *ri; - ipage = get_node_page(F2FS_I_SB(inode), inode->i_ino); + ipage = f2fs_get_node_page(F2FS_I_SB(inode), inode->i_ino); f2fs_bug_on(F2FS_I_SB(inode), IS_ERR(ipage)); ri = F2FS_INODE(page); @@ -2264,14 +2485,14 @@ void recover_inline_xattr(struct inode *inode, struct page *page) src_addr = inline_xattr_addr(inode, page); inline_size = inline_xattr_size(inode); - f2fs_wait_on_page_writeback(ipage, NODE, true); + f2fs_wait_on_page_writeback(ipage, NODE, true, true); memcpy(dst_addr, src_addr, inline_size); update_inode: - update_inode(inode, ipage); + f2fs_update_inode(inode, ipage); f2fs_put_page(ipage, 1); } -int recover_xattr_data(struct inode *inode, struct page *page) +int f2fs_recover_xattr_data(struct inode *inode, struct page *page) { struct f2fs_sb_info *sbi = F2FS_I_SB(inode); nid_t prev_xnid = F2FS_I(inode)->i_xattr_nid; @@ -2279,30 +2500,34 @@ int recover_xattr_data(struct inode *inode, struct page *page) struct dnode_of_data dn; struct node_info ni; struct page *xpage; + int err; if (!prev_xnid) goto recover_xnid; /* 1: invalidate the previous xattr nid */ - get_node_info(sbi, prev_xnid, &ni); - invalidate_blocks(sbi, ni.blk_addr); + err = f2fs_get_node_info(sbi, prev_xnid, &ni); + if (err) + return err; + + f2fs_invalidate_blocks(sbi, ni.blk_addr); dec_valid_node_count(sbi, inode, false); set_node_addr(sbi, &ni, NULL_ADDR, false); recover_xnid: /* 2: update xattr nid in inode */ - if (!alloc_nid(sbi, &new_xnid)) + if (!f2fs_alloc_nid(sbi, &new_xnid)) return -ENOSPC; set_new_dnode(&dn, inode, NULL, NULL, new_xnid); - xpage = new_node_page(&dn, XATTR_NODE_OFFSET); + xpage = f2fs_new_node_page(&dn, XATTR_NODE_OFFSET); if (IS_ERR(xpage)) { - alloc_nid_failed(sbi, new_xnid); + f2fs_alloc_nid_failed(sbi, new_xnid); return PTR_ERR(xpage); } - alloc_nid_done(sbi, new_xnid); - update_inode_page(inode); + f2fs_alloc_nid_done(sbi, new_xnid); + f2fs_update_inode_page(inode); /* 3: update and set xattr node page dirty */ memcpy(F2FS_NODE(xpage), F2FS_NODE(page), VALID_XATTR_BLOCK_SIZE); @@ -2313,14 +2538,17 @@ recover_xnid: return 0; } -int recover_inode_page(struct f2fs_sb_info *sbi, struct page *page) +int f2fs_recover_inode_page(struct f2fs_sb_info *sbi, struct page *page) { struct f2fs_inode *src, *dst; nid_t ino = ino_of_node(page); struct node_info old_ni, new_ni; struct page *ipage; + int err; - get_node_info(sbi, ino, &old_ni); + err = f2fs_get_node_info(sbi, ino, &old_ni); + if (err) + return err; if (unlikely(old_ni.blk_addr != NULL_ADDR)) return -EINVAL; @@ -2337,7 +2565,7 @@ retry: if (!PageUptodate(ipage)) SetPageUptodate(ipage); fill_node_footer(ipage, ino, ino, 0, true); - set_cold_node(page, false); + set_cold_node(ipage, false); src = F2FS_INODE(page); dst = F2FS_INODE(ipage); @@ -2351,15 +2579,22 @@ retry: if (dst->i_inline & F2FS_EXTRA_ATTR) { dst->i_extra_isize = src->i_extra_isize; - if (f2fs_sb_has_flexible_inline_xattr(sbi->sb) && + if (f2fs_sb_has_flexible_inline_xattr(sbi) && F2FS_FITS_IN_INODE(src, le16_to_cpu(src->i_extra_isize), i_inline_xattr_size)) dst->i_inline_xattr_size = src->i_inline_xattr_size; - if (f2fs_sb_has_project_quota(sbi->sb) && + if (f2fs_sb_has_project_quota(sbi) && F2FS_FITS_IN_INODE(src, le16_to_cpu(src->i_extra_isize), i_projid)) dst->i_projid = src->i_projid; + + if (f2fs_sb_has_inode_crtime(sbi) && + F2FS_FITS_IN_INODE(src, le16_to_cpu(src->i_extra_isize), + i_crtime_nsec)) { + dst->i_crtime = src->i_crtime; + dst->i_crtime_nsec = src->i_crtime_nsec; + } } new_ni = old_ni; @@ -2374,7 +2609,7 @@ retry: return 0; } -void restore_node_summary(struct f2fs_sb_info *sbi, +int f2fs_restore_node_summary(struct f2fs_sb_info *sbi, unsigned int segno, struct f2fs_summary_block *sum) { struct f2fs_node *rn; @@ -2391,10 +2626,13 @@ void restore_node_summary(struct f2fs_sb_info *sbi, nrpages = min(last_offset - i, BIO_MAX_PAGES); /* readahead node pages */ - ra_meta_pages(sbi, addr, nrpages, META_POR, true); + f2fs_ra_meta_pages(sbi, addr, nrpages, META_POR, true); for (idx = addr; idx < addr + nrpages; idx++) { - struct page *page = get_tmp_page(sbi, idx); + struct page *page = f2fs_get_tmp_page(sbi, idx); + + if (IS_ERR(page)) + return PTR_ERR(page); rn = F2FS_NODE(page); sum_entry->nid = rn->footer.nid; @@ -2407,6 +2645,7 @@ void restore_node_summary(struct f2fs_sb_info *sbi, invalidate_mapping_pages(META_MAPPING(sbi), addr, addr + nrpages); } + return 0; } static void remove_nats_in_journal(struct f2fs_sb_info *sbi) @@ -2483,7 +2722,7 @@ static void __update_nat_bits(struct f2fs_sb_info *sbi, nid_t start_nid, i = 1; } for (; i < NAT_ENTRY_PER_BLOCK; i++) { - if (nat_blk->entries[i].block_addr != NULL_ADDR) + if (le32_to_cpu(nat_blk->entries[i].block_addr) != NULL_ADDR) valid++; } if (valid == 0) { @@ -2499,7 +2738,7 @@ static void __update_nat_bits(struct f2fs_sb_info *sbi, nid_t start_nid, __clear_bit_le(nat_index, nm_i->full_nat_bits); } -static void __flush_nat_entry_set(struct f2fs_sb_info *sbi, +static int __flush_nat_entry_set(struct f2fs_sb_info *sbi, struct nat_entry_set *set, struct cp_control *cpc) { struct curseg_info *curseg = CURSEG_I(sbi, CURSEG_HOT_DATA); @@ -2523,6 +2762,9 @@ static void __flush_nat_entry_set(struct f2fs_sb_info *sbi, down_write(&curseg->journal_rwsem); } else { page = get_next_nat_page(sbi, start_nid); + if (IS_ERR(page)) + return PTR_ERR(page); + nat_blk = page_address(page); f2fs_bug_on(sbi, !nat_blk); } @@ -2536,7 +2778,7 @@ static void __flush_nat_entry_set(struct f2fs_sb_info *sbi, f2fs_bug_on(sbi, nat_get_blkaddr(ne) == NEW_ADDR); if (to_journal) { - offset = lookup_journal_in_cursum(journal, + offset = f2fs_lookup_journal_in_cursum(journal, NAT_JOURNAL, nid, 1); f2fs_bug_on(sbi, offset < 0); raw_ne = &nat_in_journal(journal, offset); @@ -2568,12 +2810,13 @@ static void __flush_nat_entry_set(struct f2fs_sb_info *sbi, radix_tree_delete(&NM_I(sbi)->nat_set_root, set->set); kmem_cache_free(nat_entry_set_slab, set); } + return 0; } /* * This function is called during the checkpointing process. */ -void flush_nat_entries(struct f2fs_sb_info *sbi, struct cp_control *cpc) +int f2fs_flush_nat_entries(struct f2fs_sb_info *sbi, struct cp_control *cpc) { struct f2fs_nm_info *nm_i = NM_I(sbi); struct curseg_info *curseg = CURSEG_I(sbi, CURSEG_HOT_DATA); @@ -2583,9 +2826,17 @@ void flush_nat_entries(struct f2fs_sb_info *sbi, struct cp_control *cpc) unsigned int found; nid_t set_idx = 0; LIST_HEAD(sets); + int err = 0; + + /* during unmount, let's flush nat_bits before checking dirty_nat_cnt */ + if (enabled_nat_bits(sbi, cpc)) { + down_write(&nm_i->nat_tree_lock); + remove_nats_in_journal(sbi); + up_write(&nm_i->nat_tree_lock); + } if (!nm_i->dirty_nat_cnt) - return; + return 0; down_write(&nm_i->nat_tree_lock); @@ -2608,11 +2859,16 @@ void flush_nat_entries(struct f2fs_sb_info *sbi, struct cp_control *cpc) } /* flush dirty nats in nat entry set */ - list_for_each_entry_safe(set, tmp, &sets, set_list) - __flush_nat_entry_set(sbi, set, cpc); + list_for_each_entry_safe(set, tmp, &sets, set_list) { + err = __flush_nat_entry_set(sbi, set, cpc); + if (err) + break; + } up_write(&nm_i->nat_tree_lock); /* Allow dirty nats by node block allocation in write_begin */ + + return err; } static int __get_nat_bitmaps(struct f2fs_sb_info *sbi) @@ -2636,7 +2892,11 @@ static int __get_nat_bitmaps(struct f2fs_sb_info *sbi) nat_bits_addr = __start_cp_addr(sbi) + sbi->blocks_per_seg - nm_i->nat_bits_blocks; for (i = 0; i < nm_i->nat_bits_blocks; i++) { - struct page *page = get_meta_page(sbi, nat_bits_addr++); + struct page *page; + + page = f2fs_get_meta_page(sbi, nat_bits_addr++); + if (IS_ERR(page)) + return PTR_ERR(page); memcpy(nm_i->nat_bits + (i << F2FS_BLKSIZE_BITS), page_address(page), F2FS_BLKSIZE); @@ -2652,7 +2912,7 @@ static int __get_nat_bitmaps(struct f2fs_sb_info *sbi) nm_i->full_nat_bits = nm_i->nat_bits + 8; nm_i->empty_nat_bits = nm_i->full_nat_bits + nat_bits_bytes; - f2fs_msg(sbi->sb, KERN_NOTICE, "Found nat_bits in checkpoint"); + f2fs_notice(sbi, "Found nat_bits in checkpoint"); return 0; } @@ -2720,6 +2980,7 @@ static int init_node_manager(struct f2fs_sb_info *sbi) INIT_RADIX_TREE(&nm_i->nat_root, GFP_NOIO); INIT_RADIX_TREE(&nm_i->nat_set_root, GFP_NOIO); INIT_LIST_HEAD(&nm_i->nat_entries); + spin_lock_init(&nm_i->nat_list_lock); mutex_init(&nm_i->build_lock); spin_lock_init(&nm_i->nid_list_lock); @@ -2755,15 +3016,17 @@ static int init_free_nid_cache(struct f2fs_sb_info *sbi) struct f2fs_nm_info *nm_i = NM_I(sbi); int i; - nm_i->free_nid_bitmap = f2fs_kzalloc(sbi, nm_i->nat_blocks * - sizeof(unsigned char *), GFP_KERNEL); + nm_i->free_nid_bitmap = + f2fs_kzalloc(sbi, array_size(sizeof(unsigned char *), + nm_i->nat_blocks), + GFP_KERNEL); if (!nm_i->free_nid_bitmap) return -ENOMEM; for (i = 0; i < nm_i->nat_blocks; i++) { nm_i->free_nid_bitmap[i] = f2fs_kvzalloc(sbi, - NAT_ENTRY_BITMAP_SIZE_ALIGNED, GFP_KERNEL); - if (!nm_i->free_nid_bitmap) + f2fs_bitmap_size(NAT_ENTRY_PER_BLOCK), GFP_KERNEL); + if (!nm_i->free_nid_bitmap[i]) return -ENOMEM; } @@ -2772,14 +3035,16 @@ static int init_free_nid_cache(struct f2fs_sb_info *sbi) if (!nm_i->nat_block_bitmap) return -ENOMEM; - nm_i->free_nid_count = f2fs_kvzalloc(sbi, nm_i->nat_blocks * - sizeof(unsigned short), GFP_KERNEL); + nm_i->free_nid_count = + f2fs_kvzalloc(sbi, array_size(sizeof(unsigned short), + nm_i->nat_blocks), + GFP_KERNEL); if (!nm_i->free_nid_count) return -ENOMEM; return 0; } -int build_node_manager(struct f2fs_sb_info *sbi) +int f2fs_build_node_manager(struct f2fs_sb_info *sbi) { int err; @@ -2799,11 +3064,10 @@ int build_node_manager(struct f2fs_sb_info *sbi) /* load free nid status from nat_bits table */ load_free_nid_bitmap(sbi); - build_free_nids(sbi, true, true); - return 0; + return f2fs_build_free_nids(sbi, true, true); } -void destroy_node_manager(struct f2fs_sb_info *sbi) +void f2fs_destroy_node_manager(struct f2fs_sb_info *sbi) { struct f2fs_nm_info *nm_i = NM_I(sbi); struct free_nid *i, *next_i; @@ -2835,8 +3099,13 @@ void destroy_node_manager(struct f2fs_sb_info *sbi) unsigned idx; nid = nat_get_nid(natvec[found - 1]) + 1; - for (idx = 0; idx < found; idx++) + for (idx = 0; idx < found; idx++) { + spin_lock(&nm_i->nat_list_lock); + list_del(&natvec[idx]->list); + spin_unlock(&nm_i->nat_list_lock); + __del_from_nat_cache(nm_i, natvec[idx]); + } } f2fs_bug_on(sbi, nm_i->nat_cnt); @@ -2862,20 +3131,20 @@ void destroy_node_manager(struct f2fs_sb_info *sbi) for (i = 0; i < nm_i->nat_blocks; i++) kvfree(nm_i->free_nid_bitmap[i]); - kfree(nm_i->free_nid_bitmap); + kvfree(nm_i->free_nid_bitmap); } kvfree(nm_i->free_nid_count); - kfree(nm_i->nat_bitmap); - kfree(nm_i->nat_bits); + kvfree(nm_i->nat_bitmap); + kvfree(nm_i->nat_bits); #ifdef CONFIG_F2FS_CHECK_FS - kfree(nm_i->nat_bitmap_mir); + kvfree(nm_i->nat_bitmap_mir); #endif sbi->nm_info = NULL; - kfree(nm_i); + kvfree(nm_i); } -int __init create_node_manager_caches(void) +int __init f2fs_create_node_manager_caches(void) { nat_entry_slab = f2fs_kmem_cache_create("nat_entry", sizeof(struct nat_entry)); @@ -2891,8 +3160,15 @@ int __init create_node_manager_caches(void) sizeof(struct nat_entry_set)); if (!nat_entry_set_slab) goto destroy_free_nid; + + fsync_node_entry_slab = f2fs_kmem_cache_create("fsync_node_entry", + sizeof(struct fsync_node_entry)); + if (!fsync_node_entry_slab) + goto destroy_nat_entry_set; return 0; +destroy_nat_entry_set: + kmem_cache_destroy(nat_entry_set_slab); destroy_free_nid: kmem_cache_destroy(free_nid_slab); destroy_nat_entry: @@ -2901,8 +3177,9 @@ fail: return -ENOMEM; } -void destroy_node_manager_caches(void) +void f2fs_destroy_node_manager_caches(void) { + kmem_cache_destroy(fsync_node_entry_slab); kmem_cache_destroy(nat_entry_set_slab); kmem_cache_destroy(free_nid_slab); kmem_cache_destroy(nat_entry_slab); diff --git a/fs/f2fs/node.h b/fs/f2fs/node.h index b95e49e4a928..e05af5df5648 100644 --- a/fs/f2fs/node.h +++ b/fs/f2fs/node.h @@ -1,12 +1,9 @@ +// SPDX-License-Identifier: GPL-2.0 /* * fs/f2fs/node.h * * Copyright (c) 2012 Samsung Electronics Co., Ltd. * http://www.samsung.com/ - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. */ /* start node id of a node block dedicated to the given node id */ #define START_NID(nid) (((nid) / NAT_ENTRY_PER_BLOCK) * NAT_ENTRY_PER_BLOCK) @@ -135,6 +132,11 @@ static inline bool excess_cached_nats(struct f2fs_sb_info *sbi) return NM_I(sbi)->nat_cnt >= DEF_NAT_CACHE_THRESHOLD; } +static inline bool excess_dirty_nodes(struct f2fs_sb_info *sbi) +{ + return get_pages(sbi, F2FS_DIRTY_NODES) >= sbi->blocks_per_seg * 8; +} + enum mem_type { FREE_NIDS, /* indicates the free nid list */ NAT_ENTRIES, /* indicates the cached nat entry */ @@ -359,7 +361,7 @@ static inline int set_nid(struct page *p, int off, nid_t nid, bool i) { struct f2fs_node *rn = F2FS_NODE(p); - f2fs_wait_on_page_writeback(p, NODE, true); + f2fs_wait_on_page_writeback(p, NODE, true, true); if (i) rn->i.i_nid[off - NODE_DIR1_BLOCK] = cpu_to_le32(nid); @@ -444,6 +446,10 @@ static inline void set_mark(struct page *page, int mark, int type) else flag &= ~(0x1 << type); rn->footer.flag = cpu_to_le32(flag); + +#ifdef CONFIG_F2FS_CHECK_FS + f2fs_inode_chksum_set(F2FS_P_SB(page), page); +#endif } #define set_dentry_mark(page, mark) set_mark(page, mark, DENT_BIT_SHIFT) #define set_fsync_mark(page, mark) set_mark(page, mark, FSYNC_BIT_SHIFT) diff --git a/fs/f2fs/recovery.c b/fs/f2fs/recovery.c index c2355b486669..5d0c77054252 100644 --- a/fs/f2fs/recovery.c +++ b/fs/f2fs/recovery.c @@ -1,12 +1,9 @@ +// SPDX-License-Identifier: GPL-2.0 /* * fs/f2fs/recovery.c * * Copyright (c) 2012 Samsung Electronics Co., Ltd. * http://www.samsung.com/ - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. */ #include <linux/fs.h> #include <linux/f2fs_fs.h> @@ -47,7 +44,7 @@ static struct kmem_cache *fsync_entry_slab; -bool space_for_roll_forward(struct f2fs_sb_info *sbi) +bool f2fs_space_for_roll_forward(struct f2fs_sb_info *sbi) { s64 nalloc = percpu_counter_sum_positive(&sbi->alloc_valid_block_count); @@ -99,8 +96,12 @@ err_out: return ERR_PTR(err); } -static void del_fsync_inode(struct fsync_inode_entry *entry) +static void del_fsync_inode(struct fsync_inode_entry *entry, int drop) { + if (drop) { + /* inode should not be recovered, drop it */ + f2fs_inode_synced(entry->inode); + } iput(entry->inode); list_del(&entry->list); kmem_cache_free(fsync_entry_slab, entry); @@ -162,7 +163,7 @@ retry: goto out_put; } - err = acquire_orphan_inode(F2FS_I_SB(inode)); + err = f2fs_acquire_orphan_inode(F2FS_I_SB(inode)); if (err) { iput(einode); goto out_put; @@ -173,7 +174,7 @@ retry: } else if (IS_ERR(page)) { err = PTR_ERR(page); } else { - err = __f2fs_do_add_link(dir, &fname, inode, + err = f2fs_add_dentry(dir, &fname, inode, inode->i_ino, inode->i_mode); } if (err == -ENOMEM) @@ -187,10 +188,36 @@ out: name = "<encrypted>"; else name = raw_inode->i_name; - f2fs_msg(inode->i_sb, KERN_NOTICE, - "%s: ino = %x, name = %s, dir = %lx, err = %d", - __func__, ino_of_node(ipage), name, - IS_ERR(dir) ? 0 : dir->i_ino, err); + f2fs_notice(F2FS_I_SB(inode), "%s: ino = %x, name = %s, dir = %lx, err = %d", + __func__, ino_of_node(ipage), name, + IS_ERR(dir) ? 0 : dir->i_ino, err); + return err; +} + +static int recover_quota_data(struct inode *inode, struct page *page) +{ + struct f2fs_inode *raw = F2FS_INODE(page); + struct iattr attr; + uid_t i_uid = le32_to_cpu(raw->i_uid); + gid_t i_gid = le32_to_cpu(raw->i_gid); + int err; + + memset(&attr, 0, sizeof(attr)); + + attr.ia_uid = make_kuid(&init_user_ns, i_uid); + attr.ia_gid = make_kgid(&init_user_ns, i_gid); + + if (!uid_eq(attr.ia_uid, inode->i_uid)) + attr.ia_valid |= ATTR_UID; + if (!gid_eq(attr.ia_gid, inode->i_gid)) + attr.ia_valid |= ATTR_GID; + + if (!attr.ia_valid) + return 0; + + err = dquot_transfer(inode, &attr); + if (err) + set_sbi_flag(F2FS_I_SB(inode), SBI_QUOTA_NEED_REPAIR); return err; } @@ -204,16 +231,35 @@ static void recover_inline_flags(struct inode *inode, struct f2fs_inode *ri) set_inode_flag(inode, FI_DATA_EXIST); else clear_inode_flag(inode, FI_DATA_EXIST); - if (!(ri->i_inline & F2FS_INLINE_DOTS)) - clear_inode_flag(inode, FI_INLINE_DOTS); } -static void recover_inode(struct inode *inode, struct page *page) +static int recover_inode(struct inode *inode, struct page *page) { struct f2fs_inode *raw = F2FS_INODE(page); char *name; + int err; inode->i_mode = le16_to_cpu(raw->i_mode); + + err = recover_quota_data(inode, page); + if (err) + return err; + + i_uid_write(inode, le32_to_cpu(raw->i_uid)); + i_gid_write(inode, le32_to_cpu(raw->i_gid)); + + if (raw->i_inline & F2FS_EXTRA_ATTR) { + if (f2fs_sb_has_project_quota(F2FS_I_SB(inode)) && + F2FS_FITS_IN_INODE(raw, le16_to_cpu(raw->i_extra_isize), + i_projid)) { + projid_t i_projid; + + i_projid = (projid_t)le32_to_cpu(raw->i_projid); + F2FS_I(inode)->i_projid = + make_kprojid(&init_user_ns, i_projid); + } + } + f2fs_i_size_write(inode, le64_to_cpu(raw->i_size)); inode->i_atime.tv_sec = le64_to_cpu(raw->i_atime); inode->i_ctime.tv_sec = le64_to_cpu(raw->i_ctime); @@ -223,17 +269,23 @@ static void recover_inode(struct inode *inode, struct page *page) inode->i_mtime.tv_nsec = le32_to_cpu(raw->i_mtime_nsec); F2FS_I(inode)->i_advise = raw->i_advise; + F2FS_I(inode)->i_flags = le32_to_cpu(raw->i_flags); + f2fs_set_inode_flags(inode); + F2FS_I(inode)->i_gc_failures[GC_FAILURE_PIN] = + le16_to_cpu(raw->i_gc_failures); recover_inline_flags(inode, raw); + f2fs_mark_inode_dirty_sync(inode, true); + if (file_enc_name(inode)) name = "<encrypted>"; else name = F2FS_INODE(page)->i_name; - f2fs_msg(inode->i_sb, KERN_NOTICE, - "recover_inode: ino = %x, name = %s, inline = %x", - ino_of_node(page), name, raw->i_inline); + f2fs_notice(F2FS_I_SB(inode), "recover_inode: ino = %x, name = %s, inline = %x", + ino_of_node(page), name, raw->i_inline); + return 0; } static int find_fsync_dnodes(struct f2fs_sb_info *sbi, struct list_head *head, @@ -243,8 +295,8 @@ static int find_fsync_dnodes(struct f2fs_sb_info *sbi, struct list_head *head, struct page *page = NULL; block_t blkaddr; unsigned int loop_cnt = 0; - unsigned int free_blocks = sbi->user_block_count - - valid_user_blocks(sbi); + unsigned int free_blocks = MAIN_SEGS(sbi) * sbi->blocks_per_seg - + valid_user_blocks(sbi); int err = 0; /* get node pages in the current segment */ @@ -254,13 +306,19 @@ static int find_fsync_dnodes(struct f2fs_sb_info *sbi, struct list_head *head, while (1) { struct fsync_inode_entry *entry; - if (!is_valid_blkaddr(sbi, blkaddr, META_POR)) + if (!f2fs_is_valid_blkaddr(sbi, blkaddr, META_POR)) return 0; - page = get_tmp_page(sbi, blkaddr); + page = f2fs_get_tmp_page(sbi, blkaddr); + if (IS_ERR(page)) { + err = PTR_ERR(page); + break; + } - if (!is_recoverable_dnode(page)) + if (!is_recoverable_dnode(page)) { + f2fs_put_page(page, 1); break; + } if (!is_fsync_dnode(page)) goto next; @@ -271,9 +329,11 @@ static int find_fsync_dnodes(struct f2fs_sb_info *sbi, struct list_head *head, if (!check_only && IS_INODE(page) && is_dent_dnode(page)) { - err = recover_inode_page(sbi, page); - if (err) + err = f2fs_recover_inode_page(sbi, page); + if (err) { + f2fs_put_page(page, 1); break; + } quota_inode = true; } @@ -289,6 +349,7 @@ static int find_fsync_dnodes(struct f2fs_sb_info *sbi, struct list_head *head, err = 0; goto next; } + f2fs_put_page(page, 1); break; } } @@ -300,10 +361,10 @@ next: /* sanity check in order to detect looped node chain */ if (++loop_cnt >= free_blocks || blkaddr == next_blkaddr_of_node(page)) { - f2fs_msg(sbi->sb, KERN_NOTICE, - "%s: detect looped node chain, " - "blkaddr:%u, next:%u", - __func__, blkaddr, next_blkaddr_of_node(page)); + f2fs_notice(sbi, "%s: detect looped node chain, blkaddr:%u, next:%u", + __func__, blkaddr, + next_blkaddr_of_node(page)); + f2fs_put_page(page, 1); err = -EINVAL; break; } @@ -312,18 +373,17 @@ next: blkaddr = next_blkaddr_of_node(page); f2fs_put_page(page, 1); - ra_meta_pages_cond(sbi, blkaddr); + f2fs_ra_meta_pages_cond(sbi, blkaddr); } - f2fs_put_page(page, 1); return err; } -static void destroy_fsync_dnodes(struct list_head *head) +static void destroy_fsync_dnodes(struct list_head *head, int drop) { struct fsync_inode_entry *entry, *tmp; list_for_each_entry_safe(entry, tmp, head, list) - del_fsync_inode(entry); + del_fsync_inode(entry, drop); } static int check_index_in_prev_nodes(struct f2fs_sb_info *sbi, @@ -355,7 +415,9 @@ static int check_index_in_prev_nodes(struct f2fs_sb_info *sbi, } } - sum_page = get_sum_page(sbi, segno); + sum_page = f2fs_get_sum_page(sbi, segno); + if (IS_ERR(sum_page)) + return PTR_ERR(sum_page); sum_node = (struct f2fs_summary_block *)page_address(sum_page); sum = sum_node->entries[blkoff]; f2fs_put_page(sum_page, 1); @@ -375,7 +437,7 @@ got_it: } /* Get the node page */ - node_page = get_node_page(sbi, nid); + node_page = f2fs_get_node_page(sbi, nid); if (IS_ERR(node_page)) return PTR_ERR(node_page); @@ -400,7 +462,8 @@ got_it: inode = dn->inode; } - bidx = start_bidx_of_node(offset, inode) + le16_to_cpu(sum.ofs_in_node); + bidx = f2fs_start_bidx_of_node(offset, inode) + + le16_to_cpu(sum.ofs_in_node); /* * if inode page is locked, unlock temporarily, but its reference @@ -410,11 +473,11 @@ got_it: unlock_page(dn->inode_page); set_new_dnode(&tdn, inode, NULL, NULL, 0); - if (get_dnode_of_data(&tdn, bidx, LOOKUP_NODE)) + if (f2fs_get_dnode_of_data(&tdn, bidx, LOOKUP_NODE)) goto out; if (tdn.data_blkaddr == blkaddr) - truncate_data_blocks_range(&tdn, 1); + f2fs_truncate_data_blocks_range(&tdn, 1); f2fs_put_dnode(&tdn); out: @@ -427,7 +490,7 @@ out: truncate_out: if (datablock_addr(tdn.inode, tdn.node_page, tdn.ofs_in_node) == blkaddr) - truncate_data_blocks_range(&tdn, 1); + f2fs_truncate_data_blocks_range(&tdn, 1); if (dn->inode->i_ino == nid && !dn->inode_page_locked) unlock_page(dn->inode_page); return 0; @@ -443,25 +506,25 @@ static int do_recover_data(struct f2fs_sb_info *sbi, struct inode *inode, /* step 1: recover xattr */ if (IS_INODE(page)) { - recover_inline_xattr(inode, page); + f2fs_recover_inline_xattr(inode, page); } else if (f2fs_has_xattr_block(ofs_of_node(page))) { - err = recover_xattr_data(inode, page); + err = f2fs_recover_xattr_data(inode, page); if (!err) recovered++; goto out; } /* step 2: recover inline data */ - if (recover_inline_data(inode, page)) + if (f2fs_recover_inline_data(inode, page)) goto out; /* step 3: recover data indices */ - start = start_bidx_of_node(ofs_of_node(page), inode); + start = f2fs_start_bidx_of_node(ofs_of_node(page), inode); end = start + ADDRS_PER_PAGE(page, inode); set_new_dnode(&dn, inode, NULL, NULL, 0); retry_dn: - err = get_dnode_of_data(&dn, start, ALLOC_NODE); + err = f2fs_get_dnode_of_data(&dn, start, ALLOC_NODE); if (err) { if (err == -ENOMEM) { congestion_wait(BLK_RW_ASYNC, HZ/50); @@ -470,17 +533,19 @@ retry_dn: goto out; } - f2fs_wait_on_page_writeback(dn.node_page, NODE, true); + f2fs_wait_on_page_writeback(dn.node_page, NODE, true, true); + + err = f2fs_get_node_info(sbi, dn.nid, &ni); + if (err) + goto err; - get_node_info(sbi, dn.nid, &ni); f2fs_bug_on(sbi, ni.ino != ino_of_node(page)); if (ofs_of_node(dn.node_page) != ofs_of_node(page)) { - f2fs_msg(sbi->sb, KERN_WARNING, - "Inconsistent ofs_of_node, ino:%lu, ofs:%u, %u", - inode->i_ino, ofs_of_node(dn.node_page), - ofs_of_node(page)); - err = -EFAULT; + f2fs_warn(sbi, "Inconsistent ofs_of_node, ino:%lu, ofs:%u, %u", + inode->i_ino, ofs_of_node(dn.node_page), + ofs_of_node(page)); + err = -EFSCORRUPTED; goto err; } @@ -490,13 +555,25 @@ retry_dn: src = datablock_addr(dn.inode, dn.node_page, dn.ofs_in_node); dest = datablock_addr(dn.inode, page, dn.ofs_in_node); + if (__is_valid_data_blkaddr(src) && + !f2fs_is_valid_blkaddr(sbi, src, META_POR)) { + err = -EFSCORRUPTED; + goto err; + } + + if (__is_valid_data_blkaddr(dest) && + !f2fs_is_valid_blkaddr(sbi, dest, META_POR)) { + err = -EFSCORRUPTED; + goto err; + } + /* skip recovering if dest is the same as src */ if (src == dest) continue; /* dest is invalid, just invalidate src block */ if (dest == NULL_ADDR) { - truncate_data_blocks_range(&dn, 1); + f2fs_truncate_data_blocks_range(&dn, 1); continue; } @@ -510,20 +587,19 @@ retry_dn: * and then reserve one new block in dnode page. */ if (dest == NEW_ADDR) { - truncate_data_blocks_range(&dn, 1); - reserve_new_block(&dn); + f2fs_truncate_data_blocks_range(&dn, 1); + f2fs_reserve_new_block(&dn); continue; } /* dest is valid block, try to recover from src to dest */ - if (is_valid_blkaddr(sbi, dest, META_POR)) { + if (f2fs_is_valid_blkaddr(sbi, dest, META_POR)) { if (src == NULL_ADDR) { - err = reserve_new_block(&dn); -#ifdef CONFIG_F2FS_FAULT_INJECTION - while (err) - err = reserve_new_block(&dn); -#endif + err = f2fs_reserve_new_block(&dn); + while (err && + IS_ENABLED(CONFIG_F2FS_FAULT_INJECTION)) + err = f2fs_reserve_new_block(&dn); /* We should not get -ENOSPC */ f2fs_bug_on(sbi, err); if (err) @@ -554,16 +630,14 @@ retry_prev: err: f2fs_put_dnode(&dn); out: - f2fs_msg(sbi->sb, KERN_NOTICE, - "recover_data: ino = %lx (i_size: %s) recovered = %d, err = %d", - inode->i_ino, - file_keep_isize(inode) ? "keep" : "recover", - recovered, err); + f2fs_notice(sbi, "recover_data: ino = %lx (i_size: %s) recovered = %d, err = %d", + inode->i_ino, file_keep_isize(inode) ? "keep" : "recover", + recovered, err); return err; } static int recover_data(struct f2fs_sb_info *sbi, struct list_head *inode_list, - struct list_head *dir_list) + struct list_head *tmp_inode_list, struct list_head *dir_list) { struct curseg_info *curseg; struct page *page = NULL; @@ -577,12 +651,16 @@ static int recover_data(struct f2fs_sb_info *sbi, struct list_head *inode_list, while (1) { struct fsync_inode_entry *entry; - if (!is_valid_blkaddr(sbi, blkaddr, META_POR)) + if (!f2fs_is_valid_blkaddr(sbi, blkaddr, META_POR)) break; - ra_meta_pages_cond(sbi, blkaddr); + f2fs_ra_meta_pages_cond(sbi, blkaddr); - page = get_tmp_page(sbi, blkaddr); + page = f2fs_get_tmp_page(sbi, blkaddr); + if (IS_ERR(page)) { + err = PTR_ERR(page); + break; + } if (!is_recoverable_dnode(page)) { f2fs_put_page(page, 1); @@ -597,8 +675,13 @@ static int recover_data(struct f2fs_sb_info *sbi, struct list_head *inode_list, * In this case, we can lose the latest inode(x). * So, call recover_inode for the inode update. */ - if (IS_INODE(page)) - recover_inode(entry->inode, page); + if (IS_INODE(page)) { + err = recover_inode(entry->inode, page); + if (err) { + f2fs_put_page(page, 1); + break; + } + } if (entry->last_dentry == blkaddr) { err = recover_dentry(entry->inode, page, dir_list); if (err) { @@ -613,20 +696,20 @@ static int recover_data(struct f2fs_sb_info *sbi, struct list_head *inode_list, } if (entry->blkaddr == blkaddr) - del_fsync_inode(entry); + list_move_tail(&entry->list, tmp_inode_list); next: /* check next segment */ blkaddr = next_blkaddr_of_node(page); f2fs_put_page(page, 1); } if (!err) - allocate_new_segments(sbi); + f2fs_allocate_new_segments(sbi); return err; } -int recover_fsync_data(struct f2fs_sb_info *sbi, bool check_only) +int f2fs_recover_fsync_data(struct f2fs_sb_info *sbi, bool check_only) { - struct list_head inode_list; + struct list_head inode_list, tmp_inode_list; struct list_head dir_list; int err; int ret = 0; @@ -637,7 +720,7 @@ int recover_fsync_data(struct f2fs_sb_info *sbi, bool check_only) #endif if (s_flags & MS_RDONLY) { - f2fs_msg(sbi->sb, KERN_INFO, "orphan cleanup on readonly fs"); + f2fs_info(sbi, "recover fsync data on readonly fs"); sbi->sb->s_flags &= ~MS_RDONLY; } @@ -656,6 +739,7 @@ int recover_fsync_data(struct f2fs_sb_info *sbi, bool check_only) } INIT_LIST_HEAD(&inode_list); + INIT_LIST_HEAD(&tmp_inode_list); INIT_LIST_HEAD(&dir_list); /* prevent checkpoint */ @@ -674,11 +758,16 @@ int recover_fsync_data(struct f2fs_sb_info *sbi, bool check_only) need_writecp = true; /* step #2: recover data */ - err = recover_data(sbi, &inode_list, &dir_list); + err = recover_data(sbi, &inode_list, &tmp_inode_list, &dir_list); if (!err) f2fs_bug_on(sbi, !list_empty(&inode_list)); + else { + /* restore s_flags to let iput() trash data */ + sbi->sb->s_flags = s_flags; + } skip: - destroy_fsync_dnodes(&inode_list); + destroy_fsync_dnodes(&inode_list, err); + destroy_fsync_dnodes(&tmp_inode_list, err); /* truncate meta pages to be used by the recovery */ truncate_inode_pages_range(META_MAPPING(sbi), @@ -687,19 +776,23 @@ skip: if (err) { truncate_inode_pages_final(NODE_MAPPING(sbi)); truncate_inode_pages_final(META_MAPPING(sbi)); + } else { + clear_sbi_flag(sbi, SBI_POR_DOING); } - - clear_sbi_flag(sbi, SBI_POR_DOING); mutex_unlock(&sbi->cp_mutex); /* let's drop all the directory inodes for clean checkpoint */ - destroy_fsync_dnodes(&dir_list); + destroy_fsync_dnodes(&dir_list, err); + + if (need_writecp) { + set_sbi_flag(sbi, SBI_IS_RECOVERED); - if (!err && need_writecp) { - struct cp_control cpc = { - .reason = CP_RECOVERY, - }; - err = write_checkpoint(sbi, &cpc); + if (!err) { + struct cp_control cpc = { + .reason = CP_RECOVERY, + }; + err = f2fs_write_checkpoint(sbi, &cpc); + } } kmem_cache_destroy(fsync_entry_slab); diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index 9abe8284a91d..7ddd8664487b 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -1,12 +1,9 @@ +// SPDX-License-Identifier: GPL-2.0 /* * fs/f2fs/segment.c * * Copyright (c) 2012 Samsung Electronics Co., Ltd. * http://www.samsung.com/ - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. */ #include <linux/fs.h> #include <linux/f2fs_fs.h> @@ -169,7 +166,7 @@ found: return result - size + __reverse_ffz(tmp); } -bool need_SSR(struct f2fs_sb_info *sbi) +bool f2fs_need_SSR(struct f2fs_sb_info *sbi) { int node_secs = get_blocktype_secs(sbi, F2FS_DIRTY_NODES); int dent_secs = get_blocktype_secs(sbi, F2FS_DIRTY_DENTS); @@ -177,23 +174,22 @@ bool need_SSR(struct f2fs_sb_info *sbi) if (test_opt(sbi, LFS)) return false; - if (sbi->gc_thread && sbi->gc_thread->gc_urgent) + if (sbi->gc_mode == GC_URGENT) + return true; + if (unlikely(is_sbi_flag_set(sbi, SBI_CP_DISABLED))) return true; return free_sections(sbi) <= (node_secs + 2 * dent_secs + imeta_secs + SM_I(sbi)->min_ssr_sections + reserved_sections(sbi)); } -void register_inmem_page(struct inode *inode, struct page *page) +void f2fs_register_inmem_page(struct inode *inode, struct page *page) { - struct f2fs_sb_info *sbi = F2FS_I_SB(inode); - struct f2fs_inode_info *fi = F2FS_I(inode); struct inmem_pages *new; f2fs_trace_pid(page); - set_page_private(page, (unsigned long)ATOMIC_WRITTEN_PAGE); - SetPagePrivate(page); + f2fs_set_page_private(page, (unsigned long)ATOMIC_WRITTEN_PAGE); new = f2fs_kmem_cache_alloc(inmem_entry_slab, GFP_NOFS); @@ -202,21 +198,18 @@ void register_inmem_page(struct inode *inode, struct page *page) INIT_LIST_HEAD(&new->list); /* increase reference count with clean state */ - mutex_lock(&fi->inmem_lock); get_page(page); - list_add_tail(&new->list, &fi->inmem_pages); - spin_lock(&sbi->inode_lock[ATOMIC_FILE]); - if (list_empty(&fi->inmem_ilist)) - list_add_tail(&fi->inmem_ilist, &sbi->inode_list[ATOMIC_FILE]); - spin_unlock(&sbi->inode_lock[ATOMIC_FILE]); + mutex_lock(&F2FS_I(inode)->inmem_lock); + list_add_tail(&new->list, &F2FS_I(inode)->inmem_pages); inc_page_count(F2FS_I_SB(inode), F2FS_INMEM_PAGES); - mutex_unlock(&fi->inmem_lock); + mutex_unlock(&F2FS_I(inode)->inmem_lock); trace_f2fs_register_inmem_page(page, INMEM); } static int __revoke_inmem_pages(struct inode *inode, - struct list_head *head, bool drop, bool recover) + struct list_head *head, bool drop, bool recover, + bool trylock) { struct f2fs_sb_info *sbi = F2FS_I_SB(inode); struct inmem_pages *cur, *tmp; @@ -228,7 +221,18 @@ static int __revoke_inmem_pages(struct inode *inode, if (drop) trace_f2fs_commit_inmem_page(page, INMEM_DROP); - lock_page(page); + if (trylock) { + /* + * to avoid deadlock in between page lock and + * inmem_lock. + */ + if (!trylock_page(page)) + continue; + } else { + lock_page(page); + } + + f2fs_wait_on_page_writeback(page, DATA, true, true); if (recover) { struct dnode_of_data dn; @@ -237,7 +241,8 @@ static int __revoke_inmem_pages(struct inode *inode, trace_f2fs_commit_inmem_page(page, INMEM_REVOKE); retry: set_new_dnode(&dn, inode, NULL, NULL, 0); - err = get_dnode_of_data(&dn, page->index, LOOKUP_NODE); + err = f2fs_get_dnode_of_data(&dn, page->index, + LOOKUP_NODE); if (err) { if (err == -ENOMEM) { congestion_wait(BLK_RW_ASYNC, HZ/50); @@ -247,9 +252,15 @@ retry: err = -EAGAIN; goto next; } - get_node_info(sbi, dn.nid, &ni); + + err = f2fs_get_node_info(sbi, dn.nid, &ni); + if (err) { + f2fs_put_dnode(&dn); + return err; + } + if (cur->old_addr == NEW_ADDR) { - invalidate_blocks(sbi, dn.data_blkaddr); + f2fs_invalidate_blocks(sbi, dn.data_blkaddr); f2fs_update_data_blkaddr(&dn, NEW_ADDR); } else f2fs_replace_block(sbi, &dn, dn.data_blkaddr, @@ -258,10 +269,11 @@ retry: } next: /* we don't need to invalidate this in the sccessful status */ - if (drop || recover) + if (drop || recover) { ClearPageUptodate(page); - set_page_private(page, 0); - ClearPagePrivate(page); + clear_cold_data(page); + } + f2fs_clear_page_private(page); f2fs_put_page(page, 1); list_del(&cur->list); @@ -271,7 +283,7 @@ next: return err; } -void drop_inmem_pages_all(struct f2fs_sb_info *sbi) +void f2fs_drop_inmem_pages_all(struct f2fs_sb_info *sbi, bool gc_failure) { struct list_head *head = &sbi->inode_list[ATOMIC_FILE]; struct inode *inode; @@ -287,33 +299,45 @@ next: spin_unlock(&sbi->inode_lock[ATOMIC_FILE]); if (inode) { - drop_inmem_pages(inode); + if (gc_failure) { + if (fi->i_gc_failures[GC_FAILURE_ATOMIC]) + goto drop; + goto skip; + } +drop: + set_inode_flag(inode, FI_ATOMIC_REVOKE_REQUEST); + f2fs_drop_inmem_pages(inode); iput(inode); } +skip: congestion_wait(BLK_RW_ASYNC, HZ/50); cond_resched(); goto next; } -void drop_inmem_pages(struct inode *inode) +void f2fs_drop_inmem_pages(struct inode *inode) { struct f2fs_sb_info *sbi = F2FS_I_SB(inode); struct f2fs_inode_info *fi = F2FS_I(inode); - mutex_lock(&fi->inmem_lock); - __revoke_inmem_pages(inode, &fi->inmem_pages, true, false); + while (!list_empty(&fi->inmem_pages)) { + mutex_lock(&fi->inmem_lock); + __revoke_inmem_pages(inode, &fi->inmem_pages, + true, false, true); + mutex_unlock(&fi->inmem_lock); + } + + clear_inode_flag(inode, FI_ATOMIC_FILE); + fi->i_gc_failures[GC_FAILURE_ATOMIC] = 0; + stat_dec_atomic_write(inode); + spin_lock(&sbi->inode_lock[ATOMIC_FILE]); if (!list_empty(&fi->inmem_ilist)) list_del_init(&fi->inmem_ilist); spin_unlock(&sbi->inode_lock[ATOMIC_FILE]); - mutex_unlock(&fi->inmem_lock); - - clear_inode_flag(inode, FI_ATOMIC_FILE); - clear_inode_flag(inode, FI_HOT_DATA); - stat_dec_atomic_write(inode); } -void drop_inmem_page(struct inode *inode, struct page *page) +void f2fs_drop_inmem_page(struct inode *inode, struct page *page) { struct f2fs_inode_info *fi = F2FS_I(inode); struct f2fs_sb_info *sbi = F2FS_I_SB(inode); @@ -328,7 +352,7 @@ void drop_inmem_page(struct inode *inode, struct page *page) break; } - f2fs_bug_on(sbi, !cur || cur->page != page); + f2fs_bug_on(sbi, list_empty(head) || cur->page != page); list_del(&cur->list); mutex_unlock(&fi->inmem_lock); @@ -336,15 +360,13 @@ void drop_inmem_page(struct inode *inode, struct page *page) kmem_cache_free(inmem_entry_slab, cur); ClearPageUptodate(page); - set_page_private(page, 0); - ClearPagePrivate(page); + f2fs_clear_page_private(page); f2fs_put_page(page, 0); trace_f2fs_commit_inmem_page(page, INMEM_INVALIDATE); } -static int __commit_inmem_pages(struct inode *inode, - struct list_head *revoke_list) +static int __f2fs_commit_inmem_pages(struct inode *inode) { struct f2fs_sb_info *sbi = F2FS_I_SB(inode); struct f2fs_inode_info *fi = F2FS_I(inode); @@ -357,9 +379,12 @@ static int __commit_inmem_pages(struct inode *inode, .op_flags = REQ_SYNC | REQ_PRIO, .io_type = FS_DATA_IO, }; - pgoff_t last_idx = ULONG_MAX; + struct list_head revoke_list; + bool submit_bio = false; int err = 0; + INIT_LIST_HEAD(&revoke_list); + list_for_each_entry_safe(cur, tmp, &fi->inmem_pages, list) { struct page *page = cur->page; @@ -367,18 +392,19 @@ static int __commit_inmem_pages(struct inode *inode, if (page->mapping == inode->i_mapping) { trace_f2fs_commit_inmem_page(page, INMEM); + f2fs_wait_on_page_writeback(page, DATA, true, true); + set_page_dirty(page); - f2fs_wait_on_page_writeback(page, DATA, true); if (clear_page_dirty_for_io(page)) { inode_dec_dirty_pages(inode); - remove_dirty_inode(inode); + f2fs_remove_dirty_inode(inode); } retry: fio.page = page; fio.old_blkaddr = NULL_ADDR; fio.encrypted_page = NULL; fio.need_lock = LOCK_DONE; - err = do_write_data_page(&fio); + err = f2fs_do_write_data_page(&fio); if (err) { if (err == -ENOMEM) { congestion_wait(BLK_RW_ASYNC, HZ/50); @@ -390,62 +416,60 @@ retry: } /* record old blkaddr for revoking */ cur->old_addr = fio.old_blkaddr; - last_idx = page->index; + submit_bio = true; } unlock_page(page); - list_move_tail(&cur->list, revoke_list); + list_move_tail(&cur->list, &revoke_list); } - if (last_idx != ULONG_MAX) - f2fs_submit_merged_write_cond(sbi, inode, 0, last_idx, DATA); + if (submit_bio) + f2fs_submit_merged_write_cond(sbi, inode, NULL, 0, DATA); - if (!err) - __revoke_inmem_pages(inode, revoke_list, false, false); + if (err) { + /* + * try to revoke all committed pages, but still we could fail + * due to no memory or other reason, if that happened, EAGAIN + * will be returned, which means in such case, transaction is + * already not integrity, caller should use journal to do the + * recovery or rewrite & commit last transaction. For other + * error number, revoking was done by filesystem itself. + */ + err = __revoke_inmem_pages(inode, &revoke_list, + false, true, false); + + /* drop all uncommitted pages */ + __revoke_inmem_pages(inode, &fi->inmem_pages, + true, false, false); + } else { + __revoke_inmem_pages(inode, &revoke_list, + false, false, false); + } return err; } -int commit_inmem_pages(struct inode *inode) +int f2fs_commit_inmem_pages(struct inode *inode) { struct f2fs_sb_info *sbi = F2FS_I_SB(inode); struct f2fs_inode_info *fi = F2FS_I(inode); - struct list_head revoke_list; int err; - INIT_LIST_HEAD(&revoke_list); f2fs_balance_fs(sbi, true); - f2fs_lock_op(sbi); + down_write(&fi->i_gc_rwsem[WRITE]); + + f2fs_lock_op(sbi); set_inode_flag(inode, FI_ATOMIC_COMMIT); mutex_lock(&fi->inmem_lock); - err = __commit_inmem_pages(inode, &revoke_list); - if (err) { - int ret; - /* - * try to revoke all committed pages, but still we could fail - * due to no memory or other reason, if that happened, EAGAIN - * will be returned, which means in such case, transaction is - * already not integrity, caller should use journal to do the - * recovery or rewrite & commit last transaction. For other - * error number, revoking was done by filesystem itself. - */ - ret = __revoke_inmem_pages(inode, &revoke_list, false, true); - if (ret) - err = ret; - - /* drop all uncommitted pages */ - __revoke_inmem_pages(inode, &fi->inmem_pages, true, false); - } - spin_lock(&sbi->inode_lock[ATOMIC_FILE]); - if (!list_empty(&fi->inmem_ilist)) - list_del_init(&fi->inmem_ilist); - spin_unlock(&sbi->inode_lock[ATOMIC_FILE]); + err = __f2fs_commit_inmem_pages(inode); mutex_unlock(&fi->inmem_lock); clear_inode_flag(inode, FI_ATOMIC_COMMIT); f2fs_unlock_op(sbi); + up_write(&fi->i_gc_rwsem[WRITE]); + return err; } @@ -455,17 +479,18 @@ int commit_inmem_pages(struct inode *inode) */ void f2fs_balance_fs(struct f2fs_sb_info *sbi, bool need) { -#ifdef CONFIG_F2FS_FAULT_INJECTION if (time_to_inject(sbi, FAULT_CHECKPOINT)) { f2fs_show_injection_info(FAULT_CHECKPOINT); f2fs_stop_checkpoint(sbi, false); } -#endif /* balance_fs_bg is able to be pending */ if (need && excess_cached_nats(sbi)) f2fs_balance_fs_bg(sbi); + if (f2fs_is_checkpoint_ready(sbi)) + return; + /* * We should do GC or end up with checkpoint, if there are so many dirty * dir/node pages without enough free segments. @@ -482,33 +507,39 @@ void f2fs_balance_fs_bg(struct f2fs_sb_info *sbi) return; /* try to shrink extent cache when there is no enough memory */ - if (!available_free_memory(sbi, EXTENT_CACHE)) + if (!f2fs_available_free_memory(sbi, EXTENT_CACHE)) f2fs_shrink_extent_tree(sbi, EXTENT_CACHE_SHRINK_NUMBER); /* check the # of cached NAT entries */ - if (!available_free_memory(sbi, NAT_ENTRIES)) - try_to_free_nats(sbi, NAT_ENTRY_PER_BLOCK); + if (!f2fs_available_free_memory(sbi, NAT_ENTRIES)) + f2fs_try_to_free_nats(sbi, NAT_ENTRY_PER_BLOCK); - if (!available_free_memory(sbi, FREE_NIDS)) - try_to_free_nids(sbi, MAX_FREE_NIDS); + if (!f2fs_available_free_memory(sbi, FREE_NIDS)) + f2fs_try_to_free_nids(sbi, MAX_FREE_NIDS); else - build_free_nids(sbi, false, false); + f2fs_build_free_nids(sbi, false, false); - if (!is_idle(sbi) && !excess_dirty_nats(sbi)) + if (!is_idle(sbi, REQ_TIME) && + (!excess_dirty_nats(sbi) && !excess_dirty_nodes(sbi))) return; /* checkpoint is the only way to shrink partial cached entries */ - if (!available_free_memory(sbi, NAT_ENTRIES) || - !available_free_memory(sbi, INO_ENTRIES) || + if (!f2fs_available_free_memory(sbi, NAT_ENTRIES) || + !f2fs_available_free_memory(sbi, INO_ENTRIES) || excess_prefree_segs(sbi) || excess_dirty_nats(sbi) || + excess_dirty_nodes(sbi) || f2fs_time_over(sbi, CP_TIME)) { if (test_opt(sbi, DATA_FLUSH)) { struct blk_plug plug; + mutex_lock(&sbi->flush_lock); + blk_start_plug(&plug); - sync_dirty_inodes(sbi, FILE_INODE); + f2fs_sync_dirty_inodes(sbi, FILE_INODE); blk_finish_plug(&plug); + + mutex_unlock(&sbi->flush_lock); } f2fs_sync_fs(sbi->sb, true); stat_inc_bg_cp_count(sbi->stat_info); @@ -518,9 +549,13 @@ void f2fs_balance_fs_bg(struct f2fs_sb_info *sbi) static int __submit_flush_wait(struct f2fs_sb_info *sbi, struct block_device *bdev) { - struct bio *bio = f2fs_bio_alloc(sbi, 0, true); + struct bio *bio; int ret; + bio = f2fs_bio_alloc(sbi, 0, false); + if (!bio) + return -ENOMEM; + bio->bi_rw = REQ_OP_WRITE; bio->bi_bdev = bdev; ret = submit_bio_wait(WRITE_FLUSH, bio); @@ -536,11 +571,11 @@ static int submit_flush_wait(struct f2fs_sb_info *sbi, nid_t ino) int ret = 0; int i; - if (!sbi->s_ndevs) + if (!f2fs_is_multi_device(sbi)) return __submit_flush_wait(sbi, sbi->sb->s_bdev); for (i = 0; i < sbi->s_ndevs; i++) { - if (!is_dirty_device(sbi, ino, i, FLUSH_INO)) + if (!f2fs_is_dirty_device(sbi, ino, i, FLUSH_INO)) continue; ret = __submit_flush_wait(sbi, FDEV(i).bdev); if (ret) @@ -597,14 +632,17 @@ int f2fs_issue_flush(struct f2fs_sb_info *sbi, nid_t ino) return 0; if (!test_opt(sbi, FLUSH_MERGE)) { + atomic_inc(&fcc->queued_flush); ret = submit_flush_wait(sbi, ino); + atomic_dec(&fcc->queued_flush); atomic_inc(&fcc->issued_flush); return ret; } - if (atomic_inc_return(&fcc->issing_flush) == 1 || sbi->s_ndevs > 1) { + if (atomic_inc_return(&fcc->queued_flush) == 1 || + f2fs_is_multi_device(sbi)) { ret = submit_flush_wait(sbi, ino); - atomic_dec(&fcc->issing_flush); + atomic_dec(&fcc->queued_flush); atomic_inc(&fcc->issued_flush); return ret; @@ -623,14 +661,14 @@ int f2fs_issue_flush(struct f2fs_sb_info *sbi, nid_t ino) if (fcc->f2fs_issue_flush) { wait_for_completion(&cmd.wait); - atomic_dec(&fcc->issing_flush); + atomic_dec(&fcc->queued_flush); } else { struct llist_node *list; list = llist_del_all(&fcc->issue_list); if (!list) { wait_for_completion(&cmd.wait); - atomic_dec(&fcc->issing_flush); + atomic_dec(&fcc->queued_flush); } else { struct flush_cmd *tmp, *next; @@ -639,7 +677,7 @@ int f2fs_issue_flush(struct f2fs_sb_info *sbi, nid_t ino) llist_for_each_entry_safe(tmp, next, list, llnode) { if (tmp == &cmd) { cmd.ret = ret; - atomic_dec(&fcc->issing_flush); + atomic_dec(&fcc->queued_flush); continue; } tmp->ret = ret; @@ -651,7 +689,7 @@ int f2fs_issue_flush(struct f2fs_sb_info *sbi, nid_t ino) return cmd.ret; } -int create_flush_cmd_control(struct f2fs_sb_info *sbi) +int f2fs_create_flush_cmd_control(struct f2fs_sb_info *sbi) { dev_t dev = sbi->sb->s_bdev->bd_dev; struct flush_cmd_control *fcc; @@ -668,7 +706,7 @@ int create_flush_cmd_control(struct f2fs_sb_info *sbi) if (!fcc) return -ENOMEM; atomic_set(&fcc->issued_flush, 0); - atomic_set(&fcc->issing_flush, 0); + atomic_set(&fcc->queued_flush, 0); init_waitqueue_head(&fcc->flush_wait_queue); init_llist_head(&fcc->issue_list); SM_I(sbi)->fcc_info = fcc; @@ -680,7 +718,7 @@ init_thread: "f2fs_flush-%u:%u", MAJOR(dev), MINOR(dev)); if (IS_ERR(fcc->f2fs_issue_flush)) { err = PTR_ERR(fcc->f2fs_issue_flush); - kfree(fcc); + kvfree(fcc); SM_I(sbi)->fcc_info = NULL; return err; } @@ -688,7 +726,7 @@ init_thread: return err; } -void destroy_flush_cmd_control(struct f2fs_sb_info *sbi, bool free) +void f2fs_destroy_flush_cmd_control(struct f2fs_sb_info *sbi, bool free) { struct flush_cmd_control *fcc = SM_I(sbi)->fcc_info; @@ -699,7 +737,7 @@ void destroy_flush_cmd_control(struct f2fs_sb_info *sbi, bool free) kthread_stop(flush_thread); } if (free) { - kfree(fcc); + kvfree(fcc); SM_I(sbi)->fcc_info = NULL; } } @@ -708,7 +746,7 @@ int f2fs_flush_device_cache(struct f2fs_sb_info *sbi) { int ret = 0, i; - if (!sbi->s_ndevs) + if (!f2fs_is_multi_device(sbi)) return 0; for (i = 1; i < sbi->s_ndevs; i++) { @@ -780,7 +818,7 @@ static void __remove_dirty_segment(struct f2fs_sb_info *sbi, unsigned int segno, static void locate_dirty_segment(struct f2fs_sb_info *sbi, unsigned int segno) { struct dirty_seglist_info *dirty_i = DIRTY_I(sbi); - unsigned short valid_blocks; + unsigned short valid_blocks, ckpt_valid_blocks; if (segno == NULL_SEGNO || IS_CURSEG(sbi, segno)) return; @@ -788,8 +826,10 @@ static void locate_dirty_segment(struct f2fs_sb_info *sbi, unsigned int segno) mutex_lock(&dirty_i->seglist_lock); valid_blocks = get_valid_blocks(sbi, segno, false); + ckpt_valid_blocks = get_ckpt_valid_blocks(sbi, segno); - if (valid_blocks == 0) { + if (valid_blocks == 0 && (!is_sbi_flag_set(sbi, SBI_CP_DISABLED) || + ckpt_valid_blocks == sbi->blocks_per_seg)) { __locate_dirty_segment(sbi, segno, PRE); __remove_dirty_segment(sbi, segno, DIRTY); } else if (valid_blocks < sbi->blocks_per_seg) { @@ -802,6 +842,82 @@ static void locate_dirty_segment(struct f2fs_sb_info *sbi, unsigned int segno) mutex_unlock(&dirty_i->seglist_lock); } +/* This moves currently empty dirty blocks to prefree. Must hold seglist_lock */ +void f2fs_dirty_to_prefree(struct f2fs_sb_info *sbi) +{ + struct dirty_seglist_info *dirty_i = DIRTY_I(sbi); + unsigned int segno; + + mutex_lock(&dirty_i->seglist_lock); + for_each_set_bit(segno, dirty_i->dirty_segmap[DIRTY], MAIN_SEGS(sbi)) { + if (get_valid_blocks(sbi, segno, false)) + continue; + if (IS_CURSEG(sbi, segno)) + continue; + __locate_dirty_segment(sbi, segno, PRE); + __remove_dirty_segment(sbi, segno, DIRTY); + } + mutex_unlock(&dirty_i->seglist_lock); +} + +block_t f2fs_get_unusable_blocks(struct f2fs_sb_info *sbi) +{ + int ovp_hole_segs = + (overprovision_segments(sbi) - reserved_segments(sbi)); + block_t ovp_holes = ovp_hole_segs << sbi->log_blocks_per_seg; + struct dirty_seglist_info *dirty_i = DIRTY_I(sbi); + block_t holes[2] = {0, 0}; /* DATA and NODE */ + block_t unusable; + struct seg_entry *se; + unsigned int segno; + + mutex_lock(&dirty_i->seglist_lock); + for_each_set_bit(segno, dirty_i->dirty_segmap[DIRTY], MAIN_SEGS(sbi)) { + se = get_seg_entry(sbi, segno); + if (IS_NODESEG(se->type)) + holes[NODE] += sbi->blocks_per_seg - se->valid_blocks; + else + holes[DATA] += sbi->blocks_per_seg - se->valid_blocks; + } + mutex_unlock(&dirty_i->seglist_lock); + + unusable = holes[DATA] > holes[NODE] ? holes[DATA] : holes[NODE]; + if (unusable > ovp_holes) + return unusable - ovp_holes; + return 0; +} + +int f2fs_disable_cp_again(struct f2fs_sb_info *sbi, block_t unusable) +{ + int ovp_hole_segs = + (overprovision_segments(sbi) - reserved_segments(sbi)); + if (unusable > F2FS_OPTION(sbi).unusable_cap) + return -EAGAIN; + if (is_sbi_flag_set(sbi, SBI_CP_DISABLED_QUICK) && + dirty_segments(sbi) > ovp_hole_segs) + return -EAGAIN; + return 0; +} + +/* This is only used by SBI_CP_DISABLED */ +static unsigned int get_free_segment(struct f2fs_sb_info *sbi) +{ + struct dirty_seglist_info *dirty_i = DIRTY_I(sbi); + unsigned int segno = 0; + + mutex_lock(&dirty_i->seglist_lock); + for_each_set_bit(segno, dirty_i->dirty_segmap[DIRTY], MAIN_SEGS(sbi)) { + if (get_valid_blocks(sbi, segno, false)) + continue; + if (get_ckpt_valid_blocks(sbi, segno)) + continue; + mutex_unlock(&dirty_i->seglist_lock); + return segno; + } + mutex_unlock(&dirty_i->seglist_lock); + return NULL_SEGNO; +} + static struct discard_cmd *__create_discard_cmd(struct f2fs_sb_info *sbi, struct block_device *bdev, block_t lstart, block_t start, block_t len) @@ -822,9 +938,12 @@ static struct discard_cmd *__create_discard_cmd(struct f2fs_sb_info *sbi, dc->len = len; dc->ref = 0; dc->state = D_PREP; + dc->queued = 0; dc->error = 0; init_completion(&dc->wait); list_add_tail(&dc->list, pend_list); + spin_lock_init(&dc->lock); + dc->bio_ref = 0; atomic_inc(&dcc->discard_cmd_cnt); dcc->undiscard_blks += len; @@ -851,7 +970,7 @@ static void __detach_discard_cmd(struct discard_cmd_control *dcc, struct discard_cmd *dc) { if (dc->state == D_DONE) - atomic_dec(&dcc->issing_discard); + atomic_sub(dc->queued, &dcc->queued_discard); list_del(&dc->list); rb_erase(&dc->rb_node, &dcc->root); @@ -866,28 +985,43 @@ static void __remove_discard_cmd(struct f2fs_sb_info *sbi, struct discard_cmd *dc) { struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info; + unsigned long flags; trace_f2fs_remove_discard(dc->bdev, dc->start, dc->len); + spin_lock_irqsave(&dc->lock, flags); + if (dc->bio_ref) { + spin_unlock_irqrestore(&dc->lock, flags); + return; + } + spin_unlock_irqrestore(&dc->lock, flags); + f2fs_bug_on(sbi, dc->ref); if (dc->error == -EOPNOTSUPP) dc->error = 0; if (dc->error) - f2fs_msg(sbi->sb, KERN_INFO, - "Issue discard(%u, %u, %u) failed, ret: %d", - dc->lstart, dc->start, dc->len, dc->error); + printk_ratelimited( + "%sF2FS-fs: Issue discard(%u, %u, %u) failed, ret: %d", + KERN_INFO, dc->lstart, dc->start, dc->len, dc->error); __detach_discard_cmd(dcc, dc); } static void f2fs_submit_discard_endio(struct bio *bio) { struct discard_cmd *dc = (struct discard_cmd *)bio->bi_private; + unsigned long flags; dc->error = bio->bi_error; - dc->state = D_DONE; - complete_all(&dc->wait); + + spin_lock_irqsave(&dc->lock, flags); + dc->bio_ref--; + if (!dc->bio_ref && dc->state == D_SUBMIT) { + dc->state = D_DONE; + complete_all(&dc->wait); + } + spin_unlock_irqrestore(&dc->lock, flags); bio_put(bio); } @@ -1006,69 +1140,147 @@ static void __init_discard_policy(struct f2fs_sb_info *sbi, /* common policy */ dpolicy->type = discard_type; dpolicy->sync = true; + dpolicy->ordered = false; dpolicy->granularity = granularity; dpolicy->max_requests = DEF_MAX_DISCARD_REQUEST; dpolicy->io_aware_gran = MAX_PLIST_NUM; + dpolicy->timeout = 0; if (discard_type == DPOLICY_BG) { dpolicy->min_interval = DEF_MIN_DISCARD_ISSUE_TIME; + dpolicy->mid_interval = DEF_MID_DISCARD_ISSUE_TIME; dpolicy->max_interval = DEF_MAX_DISCARD_ISSUE_TIME; dpolicy->io_aware = true; dpolicy->sync = false; + dpolicy->ordered = true; if (utilization(sbi) > DEF_DISCARD_URGENT_UTIL) { dpolicy->granularity = 1; dpolicy->max_interval = DEF_MIN_DISCARD_ISSUE_TIME; } } else if (discard_type == DPOLICY_FORCE) { dpolicy->min_interval = DEF_MIN_DISCARD_ISSUE_TIME; + dpolicy->mid_interval = DEF_MID_DISCARD_ISSUE_TIME; dpolicy->max_interval = DEF_MAX_DISCARD_ISSUE_TIME; dpolicy->io_aware = false; } else if (discard_type == DPOLICY_FSTRIM) { dpolicy->io_aware = false; } else if (discard_type == DPOLICY_UMOUNT) { + dpolicy->max_requests = UINT_MAX; dpolicy->io_aware = false; + /* we need to issue all to keep CP_TRIMMED_FLAG */ + dpolicy->granularity = 1; } } - +static void __update_discard_tree_range(struct f2fs_sb_info *sbi, + struct block_device *bdev, block_t lstart, + block_t start, block_t len); /* this function is copied from blkdev_issue_discard from block/blk-lib.c */ -static void __submit_discard_cmd(struct f2fs_sb_info *sbi, +static int __submit_discard_cmd(struct f2fs_sb_info *sbi, struct discard_policy *dpolicy, - struct discard_cmd *dc) + struct discard_cmd *dc, + unsigned int *issued) { + struct block_device *bdev = dc->bdev; + struct request_queue *q = bdev_get_queue(bdev); + unsigned int max_discard_blocks = + SECTOR_TO_BLOCK(q->limits.max_discard_sectors); struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info; struct list_head *wait_list = (dpolicy->type == DPOLICY_FSTRIM) ? &(dcc->fstrim_list) : &(dcc->wait_list); - struct bio *bio = NULL; int flag = dpolicy->sync ? REQ_SYNC : 0; + block_t lstart, start, len, total_len; + int err = 0; if (dc->state != D_PREP) - return; + return 0; - trace_f2fs_issue_discard(dc->bdev, dc->start, dc->len); + if (is_sbi_flag_set(sbi, SBI_NEED_FSCK)) + return 0; - dc->error = __blkdev_issue_discard(dc->bdev, - SECTOR_FROM_BLOCK(dc->start), - SECTOR_FROM_BLOCK(dc->len), - GFP_NOFS, 0, &bio); - if (!dc->error) { - /* should keep before submission to avoid D_DONE right away */ - dc->state = D_SUBMIT; - atomic_inc(&dcc->issued_discard); - atomic_inc(&dcc->issing_discard); - if (bio) { - bio->bi_private = dc; - bio->bi_end_io = f2fs_submit_discard_endio; - submit_bio(flag, bio); - list_move_tail(&dc->list, wait_list); - __check_sit_bitmap(sbi, dc->start, dc->start + dc->len); + trace_f2fs_issue_discard(bdev, dc->start, dc->len); + + lstart = dc->lstart; + start = dc->start; + len = dc->len; + total_len = len; + + dc->len = 0; - f2fs_update_iostat(sbi, FS_DISCARD, 1); + while (total_len && *issued < dpolicy->max_requests && !err) { + struct bio *bio = NULL; + unsigned long flags; + bool last = true; + + if (len > max_discard_blocks) { + len = max_discard_blocks; + last = false; } - } else { - __remove_discard_cmd(sbi, dc); + + (*issued)++; + if (*issued == dpolicy->max_requests) + last = true; + + dc->len += len; + + if (time_to_inject(sbi, FAULT_DISCARD)) { + f2fs_show_injection_info(FAULT_DISCARD); + err = -EIO; + goto submit; + } + err = __blkdev_issue_discard(bdev, + SECTOR_FROM_BLOCK(start), + SECTOR_FROM_BLOCK(len), + GFP_NOFS, 0, &bio); +submit: + if (err) { + spin_lock_irqsave(&dc->lock, flags); + if (dc->state == D_PARTIAL) + dc->state = D_SUBMIT; + spin_unlock_irqrestore(&dc->lock, flags); + + break; + } + + f2fs_bug_on(sbi, !bio); + + /* + * should keep before submission to avoid D_DONE + * right away + */ + spin_lock_irqsave(&dc->lock, flags); + if (last) + dc->state = D_SUBMIT; + else + dc->state = D_PARTIAL; + dc->bio_ref++; + spin_unlock_irqrestore(&dc->lock, flags); + + atomic_inc(&dcc->queued_discard); + dc->queued++; + list_move_tail(&dc->list, wait_list); + + /* sanity check on discard range */ + __check_sit_bitmap(sbi, lstart, lstart + len); + + bio->bi_private = dc; + bio->bi_end_io = f2fs_submit_discard_endio; + submit_bio(flag, bio); + + atomic_inc(&dcc->issued_discard); + + f2fs_update_iostat(sbi, FS_DISCARD, 1); + + lstart += len; + start += len; + total_len -= len; + len = total_len; } + + if (!err && len) + __update_discard_tree_range(sbi, bdev, lstart, start, len); + return err; } static struct discard_cmd *__insert_discard_tree(struct f2fs_sb_info *sbi, @@ -1088,7 +1300,7 @@ static struct discard_cmd *__insert_discard_tree(struct f2fs_sb_info *sbi, goto do_insert; } - p = __lookup_rb_tree_for_insert(sbi, &dcc->root, &parent, lstart); + p = f2fs_lookup_rb_tree_for_insert(sbi, &dcc->root, &parent, lstart); do_insert: dc = __attach_discard_cmd(sbi, bdev, lstart, start, len, parent, p); if (!dc) @@ -1149,11 +1361,12 @@ static void __update_discard_tree_range(struct f2fs_sb_info *sbi, struct discard_cmd *dc; struct discard_info di = {0}; struct rb_node **insert_p = NULL, *insert_parent = NULL; + struct request_queue *q = bdev_get_queue(bdev); + unsigned int max_discard_blocks = + SECTOR_TO_BLOCK(q->limits.max_discard_sectors); block_t end = lstart + len; - mutex_lock(&dcc->cmd_lock); - - dc = (struct discard_cmd *)__lookup_rb_tree_ret(&dcc->root, + dc = (struct discard_cmd *)f2fs_lookup_rb_tree_ret(&dcc->root, NULL, lstart, (struct rb_entry **)&prev_dc, (struct rb_entry **)&next_dc, @@ -1192,7 +1405,8 @@ static void __update_discard_tree_range(struct f2fs_sb_info *sbi, if (prev_dc && prev_dc->state == D_PREP && prev_dc->bdev == bdev && - __is_discard_back_mergeable(&di, &prev_dc->di)) { + __is_discard_back_mergeable(&di, &prev_dc->di, + max_discard_blocks)) { prev_dc->di.len += di.len; dcc->undiscard_blks += di.len; __relocate_discard_cmd(dcc, prev_dc); @@ -1203,7 +1417,8 @@ static void __update_discard_tree_range(struct f2fs_sb_info *sbi, if (next_dc && next_dc->state == D_PREP && next_dc->bdev == bdev && - __is_discard_front_mergeable(&di, &next_dc->di)) { + __is_discard_front_mergeable(&di, &next_dc->di, + max_discard_blocks)) { next_dc->di.lstart = di.lstart; next_dc->di.len += di.len; next_dc->di.start = di.start; @@ -1226,8 +1441,6 @@ static void __update_discard_tree_range(struct f2fs_sb_info *sbi, node = rb_next(&prev_dc->rb_node); next_dc = rb_entry_safe(node, struct discard_cmd, rb_node); } - - mutex_unlock(&dcc->cmd_lock); } static int __queue_discard_cmd(struct f2fs_sb_info *sbi, @@ -1235,17 +1448,82 @@ static int __queue_discard_cmd(struct f2fs_sb_info *sbi, { block_t lblkstart = blkstart; + if (!f2fs_bdev_support_discard(bdev)) + return 0; + trace_f2fs_queue_discard(bdev, blkstart, blklen); - if (sbi->s_ndevs) { + if (f2fs_is_multi_device(sbi)) { int devi = f2fs_target_device_index(sbi, blkstart); blkstart -= FDEV(devi).start_blk; } + mutex_lock(&SM_I(sbi)->dcc_info->cmd_lock); __update_discard_tree_range(sbi, bdev, lblkstart, blkstart, blklen); + mutex_unlock(&SM_I(sbi)->dcc_info->cmd_lock); return 0; } +static unsigned int __issue_discard_cmd_orderly(struct f2fs_sb_info *sbi, + struct discard_policy *dpolicy) +{ + struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info; + struct discard_cmd *prev_dc = NULL, *next_dc = NULL; + struct rb_node **insert_p = NULL, *insert_parent = NULL; + struct discard_cmd *dc; + struct blk_plug plug; + unsigned int pos = dcc->next_pos; + unsigned int issued = 0; + bool io_interrupted = false; + + mutex_lock(&dcc->cmd_lock); + dc = (struct discard_cmd *)f2fs_lookup_rb_tree_ret(&dcc->root, + NULL, pos, + (struct rb_entry **)&prev_dc, + (struct rb_entry **)&next_dc, + &insert_p, &insert_parent, true); + if (!dc) + dc = next_dc; + + blk_start_plug(&plug); + + while (dc) { + struct rb_node *node; + int err = 0; + + if (dc->state != D_PREP) + goto next; + + if (dpolicy->io_aware && !is_idle(sbi, DISCARD_TIME)) { + io_interrupted = true; + break; + } + + dcc->next_pos = dc->lstart + dc->len; + err = __submit_discard_cmd(sbi, dpolicy, dc, &issued); + + if (issued >= dpolicy->max_requests) + break; +next: + node = rb_next(&dc->rb_node); + if (err) + __remove_discard_cmd(sbi, dc); + dc = rb_entry_safe(node, struct discard_cmd, rb_node); + } + + blk_finish_plug(&plug); + + if (!dc) + dcc->next_pos = 0; + + mutex_unlock(&dcc->cmd_lock); + + if (!issued && io_interrupted) + issued = -1; + + return issued; +} + static int __issue_discard_cmd(struct f2fs_sb_info *sbi, struct discard_policy *dpolicy) { @@ -1253,39 +1531,55 @@ static int __issue_discard_cmd(struct f2fs_sb_info *sbi, struct list_head *pend_list; struct discard_cmd *dc, *tmp; struct blk_plug plug; - int i, iter = 0, issued = 0; + int i, issued = 0; bool io_interrupted = false; + if (dpolicy->timeout != 0) + f2fs_update_time(sbi, dpolicy->timeout); + for (i = MAX_PLIST_NUM - 1; i >= 0; i--) { + if (dpolicy->timeout != 0 && + f2fs_time_over(sbi, dpolicy->timeout)) + break; + if (i + 1 < dpolicy->granularity) break; + + if (i < DEFAULT_DISCARD_GRANULARITY && dpolicy->ordered) + return __issue_discard_cmd_orderly(sbi, dpolicy); + pend_list = &dcc->pend_list[i]; mutex_lock(&dcc->cmd_lock); if (list_empty(pend_list)) goto next; - f2fs_bug_on(sbi, !__check_rb_tree_consistence(sbi, &dcc->root)); + if (unlikely(dcc->rbtree_check)) + f2fs_bug_on(sbi, !f2fs_check_rb_tree_consistence(sbi, + &dcc->root)); blk_start_plug(&plug); list_for_each_entry_safe(dc, tmp, pend_list, list) { f2fs_bug_on(sbi, dc->state != D_PREP); + if (dpolicy->timeout != 0 && + f2fs_time_over(sbi, dpolicy->timeout)) + break; + if (dpolicy->io_aware && i < dpolicy->io_aware_gran && - !is_idle(sbi)) { + !is_idle(sbi, DISCARD_TIME)) { io_interrupted = true; - goto skip; + break; } - __submit_discard_cmd(sbi, dpolicy, dc); - issued++; -skip: - if (++iter >= dpolicy->max_requests) + __submit_discard_cmd(sbi, dpolicy, dc, &issued); + + if (issued >= dpolicy->max_requests) break; } blk_finish_plug(&plug); next: mutex_unlock(&dcc->cmd_lock); - if (iter >= dpolicy->max_requests) + if (issued >= dpolicy->max_requests || io_interrupted) break; } @@ -1317,7 +1611,7 @@ static bool __drop_discard_cmd(struct f2fs_sb_info *sbi) return dropped; } -void drop_discard_cmd(struct f2fs_sb_info *sbi) +void f2fs_drop_discard_cmd(struct f2fs_sb_info *sbi) { __drop_discard_cmd(sbi); } @@ -1383,21 +1677,22 @@ next: return trimmed; } -static void __wait_all_discard_cmd(struct f2fs_sb_info *sbi, +static unsigned int __wait_all_discard_cmd(struct f2fs_sb_info *sbi, struct discard_policy *dpolicy) { struct discard_policy dp; + unsigned int discard_blks; - if (dpolicy) { - __wait_discard_cmd_range(sbi, dpolicy, 0, UINT_MAX); - return; - } + if (dpolicy) + return __wait_discard_cmd_range(sbi, dpolicy, 0, UINT_MAX); /* wait all */ __init_discard_policy(sbi, &dp, DPOLICY_FSTRIM, 1); - __wait_discard_cmd_range(sbi, &dp, 0, UINT_MAX); + discard_blks = __wait_discard_cmd_range(sbi, &dp, 0, UINT_MAX); __init_discard_policy(sbi, &dp, DPOLICY_UMOUNT, 1); - __wait_discard_cmd_range(sbi, &dp, 0, UINT_MAX); + discard_blks += __wait_discard_cmd_range(sbi, &dp, 0, UINT_MAX); + + return discard_blks; } /* This should be covered by global mutex, &sit_i->sentry_lock */ @@ -1408,7 +1703,8 @@ static void f2fs_wait_discard_bio(struct f2fs_sb_info *sbi, block_t blkaddr) bool need_wait = false; mutex_lock(&dcc->cmd_lock); - dc = (struct discard_cmd *)__lookup_rb_tree(&dcc->root, NULL, blkaddr); + dc = (struct discard_cmd *)f2fs_lookup_rb_tree(&dcc->root, + NULL, blkaddr); if (dc) { if (dc->state == D_PREP) { __punch_discard_cmd(sbi, dc, blkaddr); @@ -1423,7 +1719,7 @@ static void f2fs_wait_discard_bio(struct f2fs_sb_info *sbi, block_t blkaddr) __wait_one_discard_bio(sbi, dc); } -void stop_discard_thread(struct f2fs_sb_info *sbi) +void f2fs_stop_discard_thread(struct f2fs_sb_info *sbi) { struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info; @@ -1436,7 +1732,7 @@ void stop_discard_thread(struct f2fs_sb_info *sbi) } /* This comes from f2fs_put_super */ -bool f2fs_wait_discard_bios(struct f2fs_sb_info *sbi) +bool f2fs_issue_discard_timeout(struct f2fs_sb_info *sbi) { struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info; struct discard_policy dpolicy; @@ -1444,11 +1740,14 @@ bool f2fs_wait_discard_bios(struct f2fs_sb_info *sbi) __init_discard_policy(sbi, &dpolicy, DPOLICY_UMOUNT, dcc->discard_granularity); + dpolicy.timeout = UMOUNT_DISCARD_TIMEOUT; __issue_discard_cmd(sbi, &dpolicy); dropped = __drop_discard_cmd(sbi); /* just to make sure there is no pending discard commands */ __wait_all_discard_cmd(sbi, NULL); + + f2fs_bug_on(sbi, atomic_read(&dcc->discard_cmd_cnt)); return dropped; } @@ -1471,25 +1770,38 @@ static int issue_discard_thread(void *data) kthread_should_stop() || freezing(current) || dcc->discard_wake, msecs_to_jiffies(wait_ms)); + + if (dcc->discard_wake) + dcc->discard_wake = 0; + + /* clean up pending candidates before going to sleep */ + if (atomic_read(&dcc->queued_discard)) + __wait_all_discard_cmd(sbi, NULL); + if (try_to_freeze()) continue; if (f2fs_readonly(sbi->sb)) continue; if (kthread_should_stop()) return 0; + if (is_sbi_flag_set(sbi, SBI_NEED_FSCK)) { + wait_ms = dpolicy.max_interval; + continue; + } - if (dcc->discard_wake) - dcc->discard_wake = 0; - - if (sbi->gc_thread && sbi->gc_thread->gc_urgent) + if (sbi->gc_mode == GC_URGENT) __init_discard_policy(sbi, &dpolicy, DPOLICY_FORCE, 1); sb_start_intwrite(sbi->sb); issued = __issue_discard_cmd(sbi, &dpolicy); - if (issued) { + if (issued > 0) { __wait_all_discard_cmd(sbi, &dpolicy); wait_ms = dpolicy.min_interval; + } else if (issued == -1){ + wait_ms = f2fs_time_to_wait(sbi, DISCARD_TIME); + if (!wait_ms) + wait_ms = dpolicy.mid_interval; } else { wait_ms = dpolicy.max_interval; } @@ -1508,42 +1820,34 @@ static int __f2fs_issue_discard_zone(struct f2fs_sb_info *sbi, block_t lblkstart = blkstart; int devi = 0; - if (sbi->s_ndevs) { + if (f2fs_is_multi_device(sbi)) { devi = f2fs_target_device_index(sbi, blkstart); + if (blkstart < FDEV(devi).start_blk || + blkstart > FDEV(devi).end_blk) { + f2fs_err(sbi, "Invalid block %x", blkstart); + return -EIO; + } blkstart -= FDEV(devi).start_blk; } - /* - * We need to know the type of the zone: for conventional zones, - * use regular discard if the drive supports it. For sequential - * zones, reset the zone write pointer. - */ - switch (get_blkz_type(sbi, bdev, blkstart)) { - - case BLK_ZONE_TYPE_CONVENTIONAL: - if (!blk_queue_discard(bdev_get_queue(bdev))) - return 0; - return __queue_discard_cmd(sbi, bdev, lblkstart, blklen); - case BLK_ZONE_TYPE_SEQWRITE_REQ: - case BLK_ZONE_TYPE_SEQWRITE_PREF: + /* For sequential zones, reset the zone write pointer */ + if (f2fs_blkz_is_seq(sbi, devi, blkstart)) { sector = SECTOR_FROM_BLOCK(blkstart); nr_sects = SECTOR_FROM_BLOCK(blklen); if (sector & (bdev_zone_sectors(bdev) - 1) || nr_sects != bdev_zone_sectors(bdev)) { - f2fs_msg(sbi->sb, KERN_INFO, - "(%d) %s: Unaligned discard attempted (block %x + %x)", - devi, sbi->s_ndevs ? FDEV(devi).path: "", - blkstart, blklen); + f2fs_err(sbi, "(%d) %s: Unaligned zone reset attempted (block %x + %x)", + devi, sbi->s_ndevs ? FDEV(devi).path : "", + blkstart, blklen); return -EIO; } trace_f2fs_issue_reset_zone(bdev, blkstart); - return blkdev_reset_zones(bdev, sector, - nr_sects, GFP_NOFS); - default: - /* Unknown zone type: broken device ? */ - return -EIO; + return blkdev_reset_zones(bdev, sector, nr_sects, GFP_NOFS); } + + /* For conventional zones, use regular discard if supported */ + return __queue_discard_cmd(sbi, bdev, lblkstart, blklen); } #endif @@ -1551,8 +1855,7 @@ static int __issue_discard_async(struct f2fs_sb_info *sbi, struct block_device *bdev, block_t blkstart, block_t blklen) { #ifdef CONFIG_BLK_DEV_ZONED - if (f2fs_sb_has_blkzoned(sbi->sb) && - bdev_zoned_model(bdev) != BLK_ZONED_NONE) + if (f2fs_sb_has_blkzoned(sbi) && bdev_is_zoned(bdev)) return __f2fs_issue_discard_zone(sbi, bdev, blkstart, blklen); #endif return __queue_discard_cmd(sbi, bdev, blkstart, blklen); @@ -1614,11 +1917,11 @@ static bool add_discard_addrs(struct f2fs_sb_info *sbi, struct cp_control *cpc, struct list_head *head = &SM_I(sbi)->dcc_info->entry_list; int i; - if (se->valid_blocks == max_blocks || !f2fs_discard_en(sbi)) + if (se->valid_blocks == max_blocks || !f2fs_hw_support_discard(sbi)) return false; if (!force) { - if (!test_opt(sbi, DISCARD) || !se->valid_blocks || + if (!f2fs_realtime_discard_enable(sbi) || !se->valid_blocks || SM_I(sbi)->dcc_info->nr_discards >= SM_I(sbi)->dcc_info->max_discards) return false; @@ -1658,20 +1961,24 @@ static bool add_discard_addrs(struct f2fs_sb_info *sbi, struct cp_control *cpc, return false; } -void release_discard_addrs(struct f2fs_sb_info *sbi) +static void release_discard_addr(struct discard_entry *entry) +{ + list_del(&entry->list); + kmem_cache_free(discard_entry_slab, entry); +} + +void f2fs_release_discard_addrs(struct f2fs_sb_info *sbi) { struct list_head *head = &(SM_I(sbi)->dcc_info->entry_list); struct discard_entry *entry, *this; /* drop caches */ - list_for_each_entry_safe(entry, this, head, list) { - list_del(&entry->list); - kmem_cache_free(discard_entry_slab, entry); - } + list_for_each_entry_safe(entry, this, head, list) + release_discard_addr(entry); } /* - * Should call clear_prefree_segments after checkpoint is done. + * Should call f2fs_clear_prefree_segments after checkpoint is done. */ static void set_prefree_as_free_segments(struct f2fs_sb_info *sbi) { @@ -1684,7 +1991,8 @@ static void set_prefree_as_free_segments(struct f2fs_sb_info *sbi) mutex_unlock(&dirty_i->seglist_lock); } -void clear_prefree_segments(struct f2fs_sb_info *sbi, struct cp_control *cpc) +void f2fs_clear_prefree_segments(struct f2fs_sb_info *sbi, + struct cp_control *cpc) { struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info; struct list_head *head = &dcc->entry_list; @@ -1694,30 +2002,39 @@ void clear_prefree_segments(struct f2fs_sb_info *sbi, struct cp_control *cpc) unsigned int start = 0, end = -1; unsigned int secno, start_segno; bool force = (cpc->reason & CP_DISCARD); + bool need_align = test_opt(sbi, LFS) && __is_large_section(sbi); mutex_lock(&dirty_i->seglist_lock); while (1) { int i; + + if (need_align && end != -1) + end--; start = find_next_bit(prefree_map, MAIN_SEGS(sbi), end + 1); if (start >= MAIN_SEGS(sbi)) break; end = find_next_zero_bit(prefree_map, MAIN_SEGS(sbi), start + 1); - for (i = start; i < end; i++) - clear_bit(i, prefree_map); + if (need_align) { + start = rounddown(start, sbi->segs_per_sec); + end = roundup(end, sbi->segs_per_sec); + } - dirty_i->nr_dirty[PRE] -= end - start; + for (i = start; i < end; i++) { + if (test_and_clear_bit(i, prefree_map)) + dirty_i->nr_dirty[PRE]--; + } - if (!test_opt(sbi, DISCARD)) + if (!f2fs_realtime_discard_enable(sbi)) continue; if (force && start >= cpc->trim_start && (end - 1) <= cpc->trim_end) continue; - if (!test_opt(sbi, LFS) || sbi->segs_per_sec == 1) { + if (!test_opt(sbi, LFS) || !__is_large_section(sbi)) { f2fs_issue_discard(sbi, START_BLOCK(sbi, start), (end - start) << sbi->log_blocks_per_seg); continue; @@ -1749,7 +2066,7 @@ find_next: sbi->blocks_per_seg, cur_pos); len = next_pos - cur_pos; - if (f2fs_sb_has_blkzoned(sbi->sb) || + if (f2fs_sb_has_blkzoned(sbi) || (force && len < cpc->trim_minlen)) goto skip; @@ -1767,9 +2084,8 @@ skip: if (cur_pos < sbi->blocks_per_seg) goto find_next; - list_del(&entry->list); + release_discard_addr(entry); dcc->nr_discards -= total_len; - kmem_cache_free(discard_entry_slab, entry); } wake_up_discard_thread(sbi, false); @@ -1798,12 +2114,14 @@ static int create_discard_cmd_control(struct f2fs_sb_info *sbi) INIT_LIST_HEAD(&dcc->fstrim_list); mutex_init(&dcc->cmd_lock); atomic_set(&dcc->issued_discard, 0); - atomic_set(&dcc->issing_discard, 0); + atomic_set(&dcc->queued_discard, 0); atomic_set(&dcc->discard_cmd_cnt, 0); dcc->nr_discards = 0; dcc->max_discards = MAIN_SEGS(sbi) << sbi->log_blocks_per_seg; dcc->undiscard_blks = 0; + dcc->next_pos = 0; dcc->root = RB_ROOT; + dcc->rbtree_check = false; init_waitqueue_head(&dcc->discard_wait_queue); SM_I(sbi)->dcc_info = dcc; @@ -1812,7 +2130,7 @@ init_thread: "f2fs_discard-%u:%u", MAJOR(dev), MINOR(dev)); if (IS_ERR(dcc->f2fs_issue_discard)) { err = PTR_ERR(dcc->f2fs_issue_discard); - kfree(dcc); + kvfree(dcc); SM_I(sbi)->dcc_info = NULL; return err; } @@ -1827,9 +2145,9 @@ static void destroy_discard_cmd_control(struct f2fs_sb_info *sbi) if (!dcc) return; - stop_discard_thread(sbi); + f2fs_stop_discard_thread(sbi); - kfree(dcc); + kvfree(dcc); SM_I(sbi)->dcc_info = NULL; } @@ -1874,8 +2192,9 @@ static void update_sit_entry(struct f2fs_sb_info *sbi, block_t blkaddr, int del) (new_vblocks > sbi->blocks_per_seg))); se->valid_blocks = new_vblocks; - se->mtime = get_mtime(sbi); - SIT_I(sbi)->max_mtime = se->mtime; + se->mtime = get_mtime(sbi, false); + if (se->mtime > SIT_I(sbi)->max_mtime) + SIT_I(sbi)->max_mtime = se->mtime; /* Update valid block bitmap */ if (del > 0) { @@ -1884,26 +2203,25 @@ static void update_sit_entry(struct f2fs_sb_info *sbi, block_t blkaddr, int del) mir_exist = f2fs_test_and_set_bit(offset, se->cur_valid_map_mir); if (unlikely(exist != mir_exist)) { - f2fs_msg(sbi->sb, KERN_ERR, "Inconsistent error " - "when setting bitmap, blk:%u, old bit:%d", - blkaddr, exist); + f2fs_err(sbi, "Inconsistent error when setting bitmap, blk:%u, old bit:%d", + blkaddr, exist); f2fs_bug_on(sbi, 1); } #endif if (unlikely(exist)) { - f2fs_msg(sbi->sb, KERN_ERR, - "Bitmap was wrongly set, blk:%u", blkaddr); + f2fs_err(sbi, "Bitmap was wrongly set, blk:%u", + blkaddr); f2fs_bug_on(sbi, 1); se->valid_blocks--; del = 0; } - if (f2fs_discard_en(sbi) && - !f2fs_test_and_set_bit(offset, se->discard_map)) + if (!f2fs_test_and_set_bit(offset, se->discard_map)) sbi->discard_blks--; /* don't overwrite by SSR to keep node chain */ - if (IS_NODESEG(se->type)) { + if (IS_NODESEG(se->type) && + !is_sbi_flag_set(sbi, SBI_CP_DISABLED)) { if (!f2fs_test_and_set_bit(offset, se->ckpt_valid_map)) se->ckpt_valid_blocks++; } @@ -1913,22 +2231,32 @@ static void update_sit_entry(struct f2fs_sb_info *sbi, block_t blkaddr, int del) mir_exist = f2fs_test_and_clear_bit(offset, se->cur_valid_map_mir); if (unlikely(exist != mir_exist)) { - f2fs_msg(sbi->sb, KERN_ERR, "Inconsistent error " - "when clearing bitmap, blk:%u, old bit:%d", - blkaddr, exist); + f2fs_err(sbi, "Inconsistent error when clearing bitmap, blk:%u, old bit:%d", + blkaddr, exist); f2fs_bug_on(sbi, 1); } #endif if (unlikely(!exist)) { - f2fs_msg(sbi->sb, KERN_ERR, - "Bitmap was wrongly cleared, blk:%u", blkaddr); + f2fs_err(sbi, "Bitmap was wrongly cleared, blk:%u", + blkaddr); f2fs_bug_on(sbi, 1); se->valid_blocks++; del = 0; + } else if (unlikely(is_sbi_flag_set(sbi, SBI_CP_DISABLED))) { + /* + * If checkpoints are off, we must not reuse data that + * was used in the previous checkpoint. If it was used + * before, we must track that to know how much space we + * really have. + */ + if (f2fs_test_bit(offset, se->ckpt_valid_map)) { + spin_lock(&sbi->stat_lock); + sbi->unusable_block_count++; + spin_unlock(&sbi->stat_lock); + } } - if (f2fs_discard_en(sbi) && - f2fs_test_and_clear_bit(offset, se->discard_map)) + if (f2fs_test_and_clear_bit(offset, se->discard_map)) sbi->discard_blks++; } if (!f2fs_test_bit(offset, se->ckpt_valid_map)) @@ -1939,11 +2267,11 @@ static void update_sit_entry(struct f2fs_sb_info *sbi, block_t blkaddr, int del) /* update total number of valid blocks to be written in ckpt area */ SIT_I(sbi)->written_valid_blocks += del; - if (sbi->segs_per_sec > 1) + if (__is_large_section(sbi)) get_sec_entry(sbi, segno)->valid_blocks += del; } -void invalidate_blocks(struct f2fs_sb_info *sbi, block_t addr) +void f2fs_invalidate_blocks(struct f2fs_sb_info *sbi, block_t addr) { unsigned int segno = GET_SEGNO(sbi, addr); struct sit_info *sit_i = SIT_I(sbi); @@ -1952,6 +2280,8 @@ void invalidate_blocks(struct f2fs_sb_info *sbi, block_t addr) if (addr == NEW_ADDR) return; + invalidate_mapping_pages(META_MAPPING(sbi), addr, addr); + /* add it into sit main buffer */ down_write(&sit_i->sentry_lock); @@ -1963,14 +2293,14 @@ void invalidate_blocks(struct f2fs_sb_info *sbi, block_t addr) up_write(&sit_i->sentry_lock); } -bool is_checkpointed_data(struct f2fs_sb_info *sbi, block_t blkaddr) +bool f2fs_is_checkpointed_data(struct f2fs_sb_info *sbi, block_t blkaddr) { struct sit_info *sit_i = SIT_I(sbi); unsigned int segno, offset; struct seg_entry *se; bool is_cp = false; - if (blkaddr == NEW_ADDR || blkaddr == NULL_ADDR) + if (!__is_valid_data_blkaddr(blkaddr)) return true; down_read(&sit_i->sentry_lock); @@ -2002,7 +2332,7 @@ static void __add_sum_entry(struct f2fs_sb_info *sbi, int type, /* * Calculate the number of current summary pages for writing */ -int npages_for_summary_flush(struct f2fs_sb_info *sbi, bool for_ra) +int f2fs_npages_for_summary_flush(struct f2fs_sb_info *sbi, bool for_ra) { int valid_sum_count = 0; int i, sum_in_page; @@ -2032,14 +2362,15 @@ int npages_for_summary_flush(struct f2fs_sb_info *sbi, bool for_ra) /* * Caller should put this summary page */ -struct page *get_sum_page(struct f2fs_sb_info *sbi, unsigned int segno) +struct page *f2fs_get_sum_page(struct f2fs_sb_info *sbi, unsigned int segno) { - return get_meta_page(sbi, GET_SUM_BLOCK(sbi, segno)); + return f2fs_get_meta_page_nofail(sbi, GET_SUM_BLOCK(sbi, segno)); } -void update_meta_page(struct f2fs_sb_info *sbi, void *src, block_t blk_addr) +void f2fs_update_meta_page(struct f2fs_sb_info *sbi, + void *src, block_t blk_addr) { - struct page *page = grab_meta_page(sbi, blk_addr); + struct page *page = f2fs_grab_meta_page(sbi, blk_addr); memcpy(page_address(page), src, PAGE_SIZE); set_page_dirty(page); @@ -2049,18 +2380,19 @@ void update_meta_page(struct f2fs_sb_info *sbi, void *src, block_t blk_addr) static void write_sum_page(struct f2fs_sb_info *sbi, struct f2fs_summary_block *sum_blk, block_t blk_addr) { - update_meta_page(sbi, (void *)sum_blk, blk_addr); + f2fs_update_meta_page(sbi, (void *)sum_blk, blk_addr); } static void write_current_sum_page(struct f2fs_sb_info *sbi, int type, block_t blk_addr) { struct curseg_info *curseg = CURSEG_I(sbi, type); - struct page *page = grab_meta_page(sbi, blk_addr); + struct page *page = f2fs_grab_meta_page(sbi, blk_addr); struct f2fs_summary_block *src = curseg->sum_blk; struct f2fs_summary_block *dst; dst = (struct f2fs_summary_block *)page_address(page); + memset(dst, 0, PAGE_SIZE); mutex_lock(&curseg->curseg_mutex); @@ -2201,9 +2533,12 @@ static void reset_curseg(struct f2fs_sb_info *sbi, int type, int modified) static unsigned int __get_next_segno(struct f2fs_sb_info *sbi, int type) { /* if segs_per_sec is large than 1, we need to keep original policy. */ - if (sbi->segs_per_sec != 1) + if (__is_large_section(sbi)) return CURSEG_I(sbi, type)->segno; + if (unlikely(is_sbi_flag_set(sbi, SBI_CP_DISABLED))) + return 0; + if (test_opt(sbi, NOHEAP) && (type == CURSEG_HOT_DATA || IS_NODESEG(type))) return 0; @@ -2300,7 +2635,8 @@ static void change_curseg(struct f2fs_sb_info *sbi, int type) curseg->alloc_type = SSR; __next_free_blkoff(sbi, curseg, 0); - sum_page = get_sum_page(sbi, new_segno); + sum_page = f2fs_get_sum_page(sbi, new_segno); + f2fs_bug_on(sbi, IS_ERR(sum_page)); sum_node = (struct f2fs_summary_block *)page_address(sum_page); memcpy(curseg->sum_blk, sum_node, SUM_ENTRY_SIZE); f2fs_put_page(sum_page, 1); @@ -2314,7 +2650,7 @@ static int get_ssr_segment(struct f2fs_sb_info *sbi, int type) int i, cnt; bool reversed = false; - /* need_SSR() already forces to do this */ + /* f2fs_need_SSR() already forces to do this */ if (v_ops->get_victim(sbi, &segno, BG_GC, type, SSR)) { curseg->next_segno = segno; return 1; @@ -2347,6 +2683,15 @@ static int get_ssr_segment(struct f2fs_sb_info *sbi, int type) return 1; } } + + /* find valid_blocks=0 in dirty list */ + if (unlikely(is_sbi_flag_set(sbi, SBI_CP_DISABLED))) { + segno = get_free_segment(sbi); + if (segno != NULL_SEGNO) { + curseg->next_segno = segno; + return 1; + } + } return 0; } @@ -2364,9 +2709,10 @@ static void allocate_segment_by_default(struct f2fs_sb_info *sbi, else if (!is_set_ckpt_flags(sbi, CP_CRC_RECOVERY_FLAG) && type == CURSEG_WARM_NODE) new_curseg(sbi, type, false); - else if (curseg->alloc_type == LFS && is_next_segment_free(sbi, type)) + else if (curseg->alloc_type == LFS && is_next_segment_free(sbi, type) && + likely(!is_sbi_flag_set(sbi, SBI_CP_DISABLED))) new_curseg(sbi, type, false); - else if (need_SSR(sbi) && get_ssr_segment(sbi, type)) + else if (f2fs_need_SSR(sbi) && get_ssr_segment(sbi, type)) change_curseg(sbi, type); else new_curseg(sbi, type, false); @@ -2374,7 +2720,40 @@ static void allocate_segment_by_default(struct f2fs_sb_info *sbi, stat_inc_seg_type(sbi, curseg); } -void allocate_new_segments(struct f2fs_sb_info *sbi) +void allocate_segment_for_resize(struct f2fs_sb_info *sbi, int type, + unsigned int start, unsigned int end) +{ + struct curseg_info *curseg = CURSEG_I(sbi, type); + unsigned int segno; + + down_read(&SM_I(sbi)->curseg_lock); + mutex_lock(&curseg->curseg_mutex); + down_write(&SIT_I(sbi)->sentry_lock); + + segno = CURSEG_I(sbi, type)->segno; + if (segno < start || segno > end) + goto unlock; + + if (f2fs_need_SSR(sbi) && get_ssr_segment(sbi, type)) + change_curseg(sbi, type); + else + new_curseg(sbi, type, true); + + stat_inc_seg_type(sbi, curseg); + + locate_dirty_segment(sbi, segno); +unlock: + up_write(&SIT_I(sbi)->sentry_lock); + + if (segno != curseg->segno) + f2fs_notice(sbi, "For resize: curseg of type %d: %u ==> %u", + type, segno, curseg->segno); + + mutex_unlock(&curseg->curseg_mutex); + up_read(&SM_I(sbi)->curseg_lock); +} + +void f2fs_allocate_new_segments(struct f2fs_sb_info *sbi) { struct curseg_info *curseg; unsigned int old_segno; @@ -2396,7 +2775,8 @@ static const struct segment_allocation default_salloc_ops = { .allocate_segment = allocate_segment_by_default, }; -bool exist_trim_candidates(struct f2fs_sb_info *sbi, struct cp_control *cpc) +bool f2fs_exist_trim_candidates(struct f2fs_sb_info *sbi, + struct cp_control *cpc) { __u64 trim_start = cpc->trim_start; bool has_candidate = false; @@ -2414,7 +2794,7 @@ bool exist_trim_candidates(struct f2fs_sb_info *sbi, struct cp_control *cpc) return has_candidate; } -static void __issue_discard_cmd_range(struct f2fs_sb_info *sbi, +static unsigned int __issue_discard_cmd_range(struct f2fs_sb_info *sbi, struct discard_policy *dpolicy, unsigned int start, unsigned int end) { @@ -2424,14 +2804,17 @@ static void __issue_discard_cmd_range(struct f2fs_sb_info *sbi, struct discard_cmd *dc; struct blk_plug plug; int issued; + unsigned int trimmed = 0; next: issued = 0; mutex_lock(&dcc->cmd_lock); - f2fs_bug_on(sbi, !__check_rb_tree_consistence(sbi, &dcc->root)); + if (unlikely(dcc->rbtree_check)) + f2fs_bug_on(sbi, !f2fs_check_rb_tree_consistence(sbi, + &dcc->root)); - dc = (struct discard_cmd *)__lookup_rb_tree_ret(&dcc->root, + dc = (struct discard_cmd *)f2fs_lookup_rb_tree_ret(&dcc->root, NULL, start, (struct rb_entry **)&prev_dc, (struct rb_entry **)&next_dc, @@ -2443,6 +2826,7 @@ next: while (dc && dc->lstart <= end) { struct rb_node *node; + int err = 0; if (dc->len < dpolicy->granularity) goto skip; @@ -2452,19 +2836,24 @@ next: goto skip; } - __submit_discard_cmd(sbi, dpolicy, dc); + err = __submit_discard_cmd(sbi, dpolicy, dc, &issued); - if (++issued >= dpolicy->max_requests) { + if (issued >= dpolicy->max_requests) { start = dc->lstart + dc->len; + if (err) + __remove_discard_cmd(sbi, dc); + blk_finish_plug(&plug); mutex_unlock(&dcc->cmd_lock); - __wait_all_discard_cmd(sbi, NULL); + trimmed += __wait_all_discard_cmd(sbi, NULL); congestion_wait(BLK_RW_ASYNC, HZ/50); goto next; } skip: node = rb_next(&dc->rb_node); + if (err) + __remove_discard_cmd(sbi, dc); dc = rb_entry_safe(node, struct discard_cmd, rb_node); if (fatal_signal_pending(current)) @@ -2473,6 +2862,8 @@ skip: blk_finish_plug(&plug); mutex_unlock(&dcc->cmd_lock); + + return trimmed; } int f2fs_trim_fs(struct f2fs_sb_info *sbi, struct fstrim_range *range) @@ -2485,23 +2876,27 @@ int f2fs_trim_fs(struct f2fs_sb_info *sbi, struct fstrim_range *range) struct discard_policy dpolicy; unsigned long long trimmed = 0; int err = 0; + bool need_align = test_opt(sbi, LFS) && __is_large_section(sbi); if (start >= MAX_BLKADDR(sbi) || range->len < sbi->blocksize) return -EINVAL; - if (end <= MAIN_BLKADDR(sbi)) + if (end < MAIN_BLKADDR(sbi)) goto out; if (is_sbi_flag_set(sbi, SBI_NEED_FSCK)) { - f2fs_msg(sbi->sb, KERN_WARNING, - "Found FS corruption, run fsck to fix."); - goto out; + f2fs_warn(sbi, "Found FS corruption, run fsck to fix."); + return -EFSCORRUPTED; } /* start/end segment number in main_area */ start_segno = (start <= MAIN_BLKADDR(sbi)) ? 0 : GET_SEGNO(sbi, start); end_segno = (end >= MAX_BLKADDR(sbi)) ? MAIN_SEGS(sbi) - 1 : GET_SEGNO(sbi, end); + if (need_align) { + start_segno = rounddown(start_segno, sbi->segs_per_sec); + end_segno = roundup(end_segno + 1, sbi->segs_per_sec) - 1; + } cpc.reason = CP_DISCARD; cpc.trim_minlen = max_t(__u64, 1, F2FS_BYTES_TO_BLK(range->minlen)); @@ -2512,29 +2907,32 @@ int f2fs_trim_fs(struct f2fs_sb_info *sbi, struct fstrim_range *range) goto out; mutex_lock(&sbi->gc_mutex); - err = write_checkpoint(sbi, &cpc); + err = f2fs_write_checkpoint(sbi, &cpc); mutex_unlock(&sbi->gc_mutex); if (err) goto out; - start_block = START_BLOCK(sbi, start_segno); - end_block = START_BLOCK(sbi, end_segno + 1); - - __init_discard_policy(sbi, &dpolicy, DPOLICY_FSTRIM, cpc.trim_minlen); - __issue_discard_cmd_range(sbi, &dpolicy, start_block, end_block); - /* * We filed discard candidates, but actually we don't need to wait for * all of them, since they'll be issued in idle time along with runtime * discard option. User configuration looks like using runtime discard * or periodic fstrim instead of it. */ - if (!test_opt(sbi, DISCARD)) { - trimmed = __wait_discard_cmd_range(sbi, &dpolicy, + if (f2fs_realtime_discard_enable(sbi)) + goto out; + + start_block = START_BLOCK(sbi, start_segno); + end_block = START_BLOCK(sbi, end_segno + 1); + + __init_discard_policy(sbi, &dpolicy, DPOLICY_FSTRIM, cpc.trim_minlen); + trimmed = __issue_discard_cmd_range(sbi, &dpolicy, + start_block, end_block); + + trimmed += __wait_discard_cmd_range(sbi, &dpolicy, start_block, end_block); - range->len = F2FS_BLK_TO_BYTES(trimmed); - } out: + if (!err) + range->len = F2FS_BLK_TO_BYTES(trimmed); return err; } @@ -2546,7 +2944,7 @@ static bool __has_curseg_space(struct f2fs_sb_info *sbi, int type) return false; } -int rw_hint_to_seg_type(enum rw_hint hint) +int f2fs_rw_hint_to_seg_type(enum rw_hint hint) { switch (hint) { case WRITE_LIFE_SHORT: @@ -2619,7 +3017,7 @@ int rw_hint_to_seg_type(enum rw_hint hint) * WRITE_LIFE_LONG " WRITE_LIFE_LONG */ -enum rw_hint io_type_to_rw_hint(struct f2fs_sb_info *sbi, +enum rw_hint f2fs_io_type_to_rw_hint(struct f2fs_sb_info *sbi, enum page_type type, enum temp_type temp) { if (F2FS_OPTION(sbi).whint_mode == WHINT_MODE_USER) { @@ -2686,9 +3084,11 @@ static int __get_segment_type_6(struct f2fs_io_info *fio) if (is_cold_data(fio->page) || file_is_cold(inode)) return CURSEG_COLD_DATA; if (file_is_hot(inode) || - is_inode_flag_set(inode, FI_HOT_DATA)) + is_inode_flag_set(inode, FI_HOT_DATA) || + f2fs_is_atomic_file(inode) || + f2fs_is_volatile_file(inode)) return CURSEG_HOT_DATA; - /* rw_hint_to_seg_type(inode->i_write_hint); */ + /* f2fs_rw_hint_to_seg_type(inode->i_write_hint); */ return CURSEG_WARM_DATA; } else { if (IS_DNODE(fio->page)) @@ -2725,7 +3125,7 @@ static int __get_segment_type(struct f2fs_io_info *fio) return type; } -void allocate_data_block(struct f2fs_sb_info *sbi, struct page *page, +void f2fs_allocate_data_block(struct f2fs_sb_info *sbi, struct page *page, block_t old_blkaddr, block_t *new_blkaddr, struct f2fs_summary *sum, int type, struct f2fs_io_info *fio, bool add_list) @@ -2785,6 +3185,7 @@ void allocate_data_block(struct f2fs_sb_info *sbi, struct page *page, INIT_LIST_HEAD(&fio->list); fio->in_list = true; + fio->retry = false; io = sbi->write_io[fio->type] + fio->temp; spin_lock(&io->io_lock); list_add_tail(&fio->list, &io->io_list); @@ -2801,13 +3202,13 @@ static void update_device_state(struct f2fs_io_info *fio) struct f2fs_sb_info *sbi = fio->sbi; unsigned int devidx; - if (!sbi->s_ndevs) + if (!f2fs_is_multi_device(sbi)) return; devidx = f2fs_target_device_index(sbi, fio->new_blkaddr); /* update device state for fsync */ - set_dirty_device(sbi, fio->ino, devidx, FLUSH_INO); + f2fs_set_dirty_device(sbi, fio->ino, devidx, FLUSH_INO); /* update device state for checkpoint */ if (!f2fs_test_bit(devidx, (char *)&sbi->dirty_device)) { @@ -2820,23 +3221,31 @@ static void update_device_state(struct f2fs_io_info *fio) static void do_write_page(struct f2fs_summary *sum, struct f2fs_io_info *fio) { int type = __get_segment_type(fio); - int err; + bool keep_order = (test_opt(fio->sbi, LFS) && type == CURSEG_COLD_DATA); + if (keep_order) + down_read(&fio->sbi->io_order_lock); reallocate: - allocate_data_block(fio->sbi, fio->page, fio->old_blkaddr, + f2fs_allocate_data_block(fio->sbi, fio->page, fio->old_blkaddr, &fio->new_blkaddr, sum, type, fio, true); + if (GET_SEGNO(fio->sbi, fio->old_blkaddr) != NULL_SEGNO) + invalidate_mapping_pages(META_MAPPING(fio->sbi), + fio->old_blkaddr, fio->old_blkaddr); /* writeout dirty page into bdev */ - err = f2fs_submit_page_write(fio); - if (err == -EAGAIN) { + f2fs_submit_page_write(fio); + if (fio->retry) { fio->old_blkaddr = fio->new_blkaddr; goto reallocate; - } else if (!err) { - update_device_state(fio); } + + update_device_state(fio); + + if (keep_order) + up_read(&fio->sbi->io_order_lock); } -void write_meta_page(struct f2fs_sb_info *sbi, struct page *page, +void f2fs_do_write_meta_page(struct f2fs_sb_info *sbi, struct page *page, enum iostat_type io_type) { struct f2fs_io_info fio = { @@ -2859,10 +3268,11 @@ void write_meta_page(struct f2fs_sb_info *sbi, struct page *page, ClearPageError(page); f2fs_submit_page_write(&fio); + stat_inc_meta_count(sbi, page->index); f2fs_update_iostat(sbi, io_type, F2FS_BLKSIZE); } -void write_node_page(unsigned int nid, struct f2fs_io_info *fio) +void f2fs_do_write_node_page(unsigned int nid, struct f2fs_io_info *fio) { struct f2fs_summary sum; @@ -2872,40 +3282,49 @@ void write_node_page(unsigned int nid, struct f2fs_io_info *fio) f2fs_update_iostat(fio->sbi, fio->io_type, F2FS_BLKSIZE); } -void write_data_page(struct dnode_of_data *dn, struct f2fs_io_info *fio) +void f2fs_outplace_write_data(struct dnode_of_data *dn, + struct f2fs_io_info *fio) { struct f2fs_sb_info *sbi = fio->sbi; struct f2fs_summary sum; - struct node_info ni; f2fs_bug_on(sbi, dn->data_blkaddr == NULL_ADDR); - get_node_info(sbi, dn->nid, &ni); - set_summary(&sum, dn->nid, dn->ofs_in_node, ni.version); + set_summary(&sum, dn->nid, dn->ofs_in_node, fio->version); do_write_page(&sum, fio); f2fs_update_data_blkaddr(dn, fio->new_blkaddr); f2fs_update_iostat(sbi, fio->io_type, F2FS_BLKSIZE); } -int rewrite_data_page(struct f2fs_io_info *fio) +int f2fs_inplace_write_data(struct f2fs_io_info *fio) { int err; struct f2fs_sb_info *sbi = fio->sbi; + unsigned int segno; fio->new_blkaddr = fio->old_blkaddr; /* i/o temperature is needed for passing down write hints */ __get_segment_type(fio); - f2fs_bug_on(sbi, !IS_DATASEG(get_seg_entry(sbi, - GET_SEGNO(sbi, fio->new_blkaddr))->type)); + segno = GET_SEGNO(sbi, fio->new_blkaddr); + + if (!IS_DATASEG(get_seg_entry(sbi, segno)->type)) { + set_sbi_flag(sbi, SBI_NEED_FSCK); + f2fs_warn(sbi, "%s: incorrect segment(%u) type, run fsck to fix.", + __func__, segno); + return -EFSCORRUPTED; + } stat_inc_inplace_blocks(fio->sbi); - err = f2fs_submit_page_bio(fio); - if (!err) + if (fio->bio) + err = f2fs_merge_page_bio(fio); + else + err = f2fs_submit_page_bio(fio); + if (!err) { update_device_state(fio); - - f2fs_update_iostat(fio->sbi, fio->io_type, F2FS_BLKSIZE); + f2fs_update_iostat(fio->sbi, fio->io_type, F2FS_BLKSIZE); + } return err; } @@ -2922,7 +3341,7 @@ static inline int __f2fs_get_curseg(struct f2fs_sb_info *sbi, return i; } -void __f2fs_replace_block(struct f2fs_sb_info *sbi, struct f2fs_summary *sum, +void f2fs_do_replace_block(struct f2fs_sb_info *sbi, struct f2fs_summary *sum, block_t old_blkaddr, block_t new_blkaddr, bool recover_curseg, bool recover_newaddr) { @@ -2977,8 +3396,11 @@ void __f2fs_replace_block(struct f2fs_sb_info *sbi, struct f2fs_summary *sum, if (!recover_curseg || recover_newaddr) update_sit_entry(sbi, new_blkaddr, 1); - if (GET_SEGNO(sbi, old_blkaddr) != NULL_SEGNO) + if (GET_SEGNO(sbi, old_blkaddr) != NULL_SEGNO) { + invalidate_mapping_pages(META_MAPPING(sbi), + old_blkaddr, old_blkaddr); update_sit_entry(sbi, old_blkaddr, -1); + } locate_dirty_segment(sbi, GET_SEGNO(sbi, old_blkaddr)); locate_dirty_segment(sbi, GET_SEGNO(sbi, new_blkaddr)); @@ -3007,42 +3429,56 @@ void f2fs_replace_block(struct f2fs_sb_info *sbi, struct dnode_of_data *dn, set_summary(&sum, dn->nid, dn->ofs_in_node, version); - __f2fs_replace_block(sbi, &sum, old_addr, new_addr, + f2fs_do_replace_block(sbi, &sum, old_addr, new_addr, recover_curseg, recover_newaddr); f2fs_update_data_blkaddr(dn, new_addr); } void f2fs_wait_on_page_writeback(struct page *page, - enum page_type type, bool ordered) + enum page_type type, bool ordered, bool locked) { if (PageWriteback(page)) { struct f2fs_sb_info *sbi = F2FS_P_SB(page); - f2fs_submit_merged_write_cond(sbi, page->mapping->host, - 0, page->index, type); - if (ordered) + f2fs_submit_merged_write_cond(sbi, NULL, page, 0, type); + if (ordered) { wait_on_page_writeback(page); - else + f2fs_bug_on(sbi, locked && PageWriteback(page)); + } else { wait_for_stable_page(page); + } } } -void f2fs_wait_on_block_writeback(struct f2fs_sb_info *sbi, block_t blkaddr) +void f2fs_wait_on_block_writeback(struct inode *inode, block_t blkaddr) { + struct f2fs_sb_info *sbi = F2FS_I_SB(inode); struct page *cpage; - if (blkaddr == NEW_ADDR || blkaddr == NULL_ADDR) + if (!f2fs_post_read_required(inode)) + return; + + if (!__is_valid_data_blkaddr(blkaddr)) return; cpage = find_lock_page(META_MAPPING(sbi), blkaddr); if (cpage) { - f2fs_wait_on_page_writeback(cpage, DATA, true); + f2fs_wait_on_page_writeback(cpage, DATA, true, true); f2fs_put_page(cpage, 1); } } -static void read_compacted_summaries(struct f2fs_sb_info *sbi) +void f2fs_wait_on_block_writeback_range(struct inode *inode, block_t blkaddr, + block_t len) +{ + block_t i; + + for (i = 0; i < len; i++) + f2fs_wait_on_block_writeback(inode, blkaddr + i); +} + +static int read_compacted_summaries(struct f2fs_sb_info *sbi) { struct f2fs_checkpoint *ckpt = F2FS_CKPT(sbi); struct curseg_info *seg_i; @@ -3053,7 +3489,9 @@ static void read_compacted_summaries(struct f2fs_sb_info *sbi) start = start_sum_block(sbi); - page = get_meta_page(sbi, start++); + page = f2fs_get_meta_page(sbi, start++); + if (IS_ERR(page)) + return PTR_ERR(page); kaddr = (unsigned char *)page_address(page); /* Step 1: restore nat cache */ @@ -3093,12 +3531,15 @@ static void read_compacted_summaries(struct f2fs_sb_info *sbi) f2fs_put_page(page, 1); page = NULL; - page = get_meta_page(sbi, start++); + page = f2fs_get_meta_page(sbi, start++); + if (IS_ERR(page)) + return PTR_ERR(page); kaddr = (unsigned char *)page_address(page); offset = 0; } } f2fs_put_page(page, 1); + return 0; } static int read_normal_summaries(struct f2fs_sb_info *sbi, int type) @@ -3110,6 +3551,7 @@ static int read_normal_summaries(struct f2fs_sb_info *sbi, int type) unsigned short blk_off; unsigned int segno = 0; block_t blk_addr = 0; + int err = 0; /* get segment number and block addr */ if (IS_DATASEG(type)) { @@ -3132,7 +3574,9 @@ static int read_normal_summaries(struct f2fs_sb_info *sbi, int type) blk_addr = GET_SUM_BLOCK(sbi, segno); } - new = get_meta_page(sbi, blk_addr); + new = f2fs_get_meta_page(sbi, blk_addr); + if (IS_ERR(new)) + return PTR_ERR(new); sum = (struct f2fs_summary_block *)page_address(new); if (IS_NODESEG(type)) { @@ -3144,7 +3588,9 @@ static int read_normal_summaries(struct f2fs_sb_info *sbi, int type) ns->ofs_in_node = 0; } } else { - restore_node_summary(sbi, segno, sum); + err = f2fs_restore_node_summary(sbi, segno, sum); + if (err) + goto out; } } @@ -3164,8 +3610,9 @@ static int read_normal_summaries(struct f2fs_sb_info *sbi, int type) curseg->alloc_type = ckpt->alloc_type[type]; curseg->next_blkoff = blk_off; mutex_unlock(&curseg->curseg_mutex); +out: f2fs_put_page(new, 1); - return 0; + return err; } static int restore_curseg_summaries(struct f2fs_sb_info *sbi) @@ -3176,19 +3623,21 @@ static int restore_curseg_summaries(struct f2fs_sb_info *sbi) int err; if (is_set_ckpt_flags(sbi, CP_COMPACT_SUM_FLAG)) { - int npages = npages_for_summary_flush(sbi, true); + int npages = f2fs_npages_for_summary_flush(sbi, true); if (npages >= 2) - ra_meta_pages(sbi, start_sum_block(sbi), npages, + f2fs_ra_meta_pages(sbi, start_sum_block(sbi), npages, META_CP, true); /* restore for compacted data summary */ - read_compacted_summaries(sbi); + err = read_compacted_summaries(sbi); + if (err) + return err; type = CURSEG_HOT_NODE; } if (__exist_node_summaries(sbi)) - ra_meta_pages(sbi, sum_blk_addr(sbi, NR_CURSEG_TYPE, type), + f2fs_ra_meta_pages(sbi, sum_blk_addr(sbi, NR_CURSEG_TYPE, type), NR_CURSEG_TYPE - type, META_CP, true); for (; type <= CURSEG_COLD_NODE; type++) { @@ -3199,8 +3648,11 @@ static int restore_curseg_summaries(struct f2fs_sb_info *sbi) /* sanity check for summary blocks */ if (nats_in_cursum(nat_j) > NAT_JOURNAL_ENTRIES || - sits_in_cursum(sit_j) > SIT_JOURNAL_ENTRIES) + sits_in_cursum(sit_j) > SIT_JOURNAL_ENTRIES) { + f2fs_err(sbi, "invalid journal entries nats %u sits %u\n", + nats_in_cursum(nat_j), sits_in_cursum(sit_j)); return -EINVAL; + } return 0; } @@ -3214,8 +3666,9 @@ static void write_compacted_summaries(struct f2fs_sb_info *sbi, block_t blkaddr) int written_size = 0; int i, j; - page = grab_meta_page(sbi, blkaddr++); + page = f2fs_grab_meta_page(sbi, blkaddr++); kaddr = (unsigned char *)page_address(page); + memset(kaddr, 0, PAGE_SIZE); /* Step 1: write nat cache */ seg_i = CURSEG_I(sbi, CURSEG_HOT_DATA); @@ -3238,8 +3691,9 @@ static void write_compacted_summaries(struct f2fs_sb_info *sbi, block_t blkaddr) for (j = 0; j < blkoff; j++) { if (!page) { - page = grab_meta_page(sbi, blkaddr++); + page = f2fs_grab_meta_page(sbi, blkaddr++); kaddr = (unsigned char *)page_address(page); + memset(kaddr, 0, PAGE_SIZE); written_size = 0; } summary = (struct f2fs_summary *)(kaddr + written_size); @@ -3274,7 +3728,7 @@ static void write_normal_summaries(struct f2fs_sb_info *sbi, write_current_sum_page(sbi, i, blkaddr + (i - type)); } -void write_data_summaries(struct f2fs_sb_info *sbi, block_t start_blk) +void f2fs_write_data_summaries(struct f2fs_sb_info *sbi, block_t start_blk) { if (is_set_ckpt_flags(sbi, CP_COMPACT_SUM_FLAG)) write_compacted_summaries(sbi, start_blk); @@ -3282,12 +3736,12 @@ void write_data_summaries(struct f2fs_sb_info *sbi, block_t start_blk) write_normal_summaries(sbi, start_blk, CURSEG_HOT_DATA); } -void write_node_summaries(struct f2fs_sb_info *sbi, block_t start_blk) +void f2fs_write_node_summaries(struct f2fs_sb_info *sbi, block_t start_blk) { write_normal_summaries(sbi, start_blk, CURSEG_HOT_NODE); } -int lookup_journal_in_cursum(struct f2fs_journal *journal, int type, +int f2fs_lookup_journal_in_cursum(struct f2fs_journal *journal, int type, unsigned int val, int alloc) { int i; @@ -3312,7 +3766,7 @@ int lookup_journal_in_cursum(struct f2fs_journal *journal, int type, static struct page *get_current_sit_page(struct f2fs_sb_info *sbi, unsigned int segno) { - return get_meta_page(sbi, current_sit_addr(sbi, segno)); + return f2fs_get_meta_page_nofail(sbi, current_sit_addr(sbi, segno)); } static struct page *get_next_sit_page(struct f2fs_sb_info *sbi, @@ -3325,7 +3779,7 @@ static struct page *get_next_sit_page(struct f2fs_sb_info *sbi, src_off = current_sit_addr(sbi, start); dst_off = next_sit_addr(sbi, src_off); - page = grab_meta_page(sbi, dst_off); + page = f2fs_grab_meta_page(sbi, dst_off); seg_info_to_sit_page(sbi, page, start); set_page_dirty(page); @@ -3421,7 +3875,7 @@ static void remove_sits_in_journal(struct f2fs_sb_info *sbi) * CP calls this function, which flushes SIT entries including sit_journal, * and moves prefree segs to free segs. */ -void flush_sit_entries(struct f2fs_sb_info *sbi, struct cp_control *cpc) +void f2fs_flush_sit_entries(struct f2fs_sb_info *sbi, struct cp_control *cpc) { struct sit_info *sit_i = SIT_I(sbi); unsigned long *bitmap = sit_i->dirty_sentries_bitmap; @@ -3429,7 +3883,7 @@ void flush_sit_entries(struct f2fs_sb_info *sbi, struct cp_control *cpc) struct f2fs_journal *journal = curseg->journal; struct sit_entry_set *ses, *tmp; struct list_head *head = &SM_I(sbi)->sit_entry_set; - bool to_journal = true; + bool to_journal = !is_sbi_flag_set(sbi, SBI_IS_RESIZEFS); struct seg_entry *se; down_write(&sit_i->sentry_lock); @@ -3448,7 +3902,8 @@ void flush_sit_entries(struct f2fs_sb_info *sbi, struct cp_control *cpc) * entries, remove all entries from journal and add and account * them in sit entry set. */ - if (!__has_cursum_space(journal, sit_i->dirty_sentries, SIT_JOURNAL)) + if (!__has_cursum_space(journal, sit_i->dirty_sentries, SIT_JOURNAL) || + !to_journal) remove_sits_in_journal(sbi); /* @@ -3480,6 +3935,11 @@ void flush_sit_entries(struct f2fs_sb_info *sbi, struct cp_control *cpc) int offset, sit_offset; se = get_seg_entry(sbi, segno); +#ifdef CONFIG_F2FS_CHECK_FS + if (memcmp(se->cur_valid_map, se->cur_valid_map_mir, + SIT_VBLOCK_MAP_SIZE)) + f2fs_bug_on(sbi, 1); +#endif /* add discard candidates */ if (!(cpc->reason & CP_DISCARD)) { @@ -3488,17 +3948,21 @@ void flush_sit_entries(struct f2fs_sb_info *sbi, struct cp_control *cpc) } if (to_journal) { - offset = lookup_journal_in_cursum(journal, + offset = f2fs_lookup_journal_in_cursum(journal, SIT_JOURNAL, segno, 1); f2fs_bug_on(sbi, offset < 0); segno_in_journal(journal, offset) = cpu_to_le32(segno); seg_info_to_raw_sit(se, &sit_in_journal(journal, offset)); + check_block_count(sbi, segno, + &sit_in_journal(journal, offset)); } else { sit_offset = SIT_ENTRY_OFFSET(sit_i, segno); seg_info_to_raw_sit(se, &raw_sit->entries[sit_offset]); + check_block_count(sbi, segno, + &raw_sit->entries[sit_offset]); } __clear_bit(segno, bitmap); @@ -3546,8 +4010,10 @@ static int build_sit_info(struct f2fs_sb_info *sbi) SM_I(sbi)->sit_info = sit_i; - sit_i->sentries = f2fs_kvzalloc(sbi, MAIN_SEGS(sbi) * - sizeof(struct seg_entry), GFP_KERNEL); + sit_i->sentries = + f2fs_kvzalloc(sbi, array_size(sizeof(struct seg_entry), + MAIN_SEGS(sbi)), + GFP_KERNEL); if (!sit_i->sentries) return -ENOMEM; @@ -3573,22 +4039,22 @@ static int build_sit_info(struct f2fs_sb_info *sbi) return -ENOMEM; #endif - if (f2fs_discard_en(sbi)) { - sit_i->sentries[start].discard_map - = f2fs_kzalloc(sbi, SIT_VBLOCK_MAP_SIZE, - GFP_KERNEL); - if (!sit_i->sentries[start].discard_map) - return -ENOMEM; - } + sit_i->sentries[start].discard_map + = f2fs_kzalloc(sbi, SIT_VBLOCK_MAP_SIZE, + GFP_KERNEL); + if (!sit_i->sentries[start].discard_map) + return -ENOMEM; } sit_i->tmp_map = f2fs_kzalloc(sbi, SIT_VBLOCK_MAP_SIZE, GFP_KERNEL); if (!sit_i->tmp_map) return -ENOMEM; - if (sbi->segs_per_sec > 1) { - sit_i->sec_entries = f2fs_kvzalloc(sbi, MAIN_SECS(sbi) * - sizeof(struct sec_entry), GFP_KERNEL); + if (__is_large_section(sbi)) { + sit_i->sec_entries = + f2fs_kvzalloc(sbi, array_size(sizeof(struct sec_entry), + MAIN_SECS(sbi)), + GFP_KERNEL); if (!sit_i->sec_entries) return -ENOMEM; } @@ -3664,7 +4130,8 @@ static int build_curseg(struct f2fs_sb_info *sbi) struct curseg_info *array; int i; - array = f2fs_kzalloc(sbi, sizeof(*array) * NR_CURSEG_TYPE, GFP_KERNEL); + array = f2fs_kzalloc(sbi, array_size(NR_CURSEG_TYPE, sizeof(*array)), + GFP_KERNEL); if (!array) return -ENOMEM; @@ -3697,9 +4164,10 @@ static int build_sit_entries(struct f2fs_sb_info *sbi) unsigned int i, start, end; unsigned int readed, start_blk = 0; int err = 0; + block_t total_node_blocks = 0; do { - readed = ra_meta_pages(sbi, start_blk, BIO_MAX_PAGES, + readed = f2fs_ra_meta_pages(sbi, start_blk, BIO_MAX_PAGES, META_SIT, true); start = start_blk * sit_i->sents_per_block; @@ -3711,6 +4179,8 @@ static int build_sit_entries(struct f2fs_sb_info *sbi) se = &sit_i->sentries[start]; page = get_current_sit_page(sbi, start); + if (IS_ERR(page)) + return PTR_ERR(page); sit_blk = (struct f2fs_sit_block *)page_address(page); sit = sit_blk->entries[SIT_ENTRY_OFFSET(sit_i, start)]; f2fs_put_page(page, 1); @@ -3719,23 +4189,23 @@ static int build_sit_entries(struct f2fs_sb_info *sbi) if (err) return err; seg_info_from_raw_sit(se, &sit); + if (IS_NODESEG(se->type)) + total_node_blocks += se->valid_blocks; /* build discard map only one time */ - if (f2fs_discard_en(sbi)) { - if (is_set_ckpt_flags(sbi, CP_TRIMMED_FLAG)) { - memset(se->discard_map, 0xff, - SIT_VBLOCK_MAP_SIZE); - } else { - memcpy(se->discard_map, - se->cur_valid_map, - SIT_VBLOCK_MAP_SIZE); - sbi->discard_blks += - sbi->blocks_per_seg - - se->valid_blocks; - } + if (is_set_ckpt_flags(sbi, CP_TRIMMED_FLAG)) { + memset(se->discard_map, 0xff, + SIT_VBLOCK_MAP_SIZE); + } else { + memcpy(se->discard_map, + se->cur_valid_map, + SIT_VBLOCK_MAP_SIZE); + sbi->discard_blks += + sbi->blocks_per_seg - + se->valid_blocks; } - if (sbi->segs_per_sec > 1) + if (__is_large_section(sbi)) get_sec_entry(sbi, start)->valid_blocks += se->valid_blocks; } @@ -3747,33 +4217,53 @@ static int build_sit_entries(struct f2fs_sb_info *sbi) unsigned int old_valid_blocks; start = le32_to_cpu(segno_in_journal(journal, i)); + if (start >= MAIN_SEGS(sbi)) { + f2fs_err(sbi, "Wrong journal entry on segno %u", + start); + set_sbi_flag(sbi, SBI_NEED_FSCK); + err = -EFSCORRUPTED; + break; + } + se = &sit_i->sentries[start]; sit = sit_in_journal(journal, i); old_valid_blocks = se->valid_blocks; + if (IS_NODESEG(se->type)) + total_node_blocks -= old_valid_blocks; err = check_block_count(sbi, start, &sit); if (err) break; seg_info_from_raw_sit(se, &sit); + if (IS_NODESEG(se->type)) + total_node_blocks += se->valid_blocks; - if (f2fs_discard_en(sbi)) { - if (is_set_ckpt_flags(sbi, CP_TRIMMED_FLAG)) { - memset(se->discard_map, 0xff, - SIT_VBLOCK_MAP_SIZE); - } else { - memcpy(se->discard_map, se->cur_valid_map, - SIT_VBLOCK_MAP_SIZE); - sbi->discard_blks += old_valid_blocks - - se->valid_blocks; - } + if (is_set_ckpt_flags(sbi, CP_TRIMMED_FLAG)) { + memset(se->discard_map, 0xff, SIT_VBLOCK_MAP_SIZE); + } else { + memcpy(se->discard_map, se->cur_valid_map, + SIT_VBLOCK_MAP_SIZE); + sbi->discard_blks += old_valid_blocks; + sbi->discard_blks -= se->valid_blocks; } - if (sbi->segs_per_sec > 1) + if (__is_large_section(sbi)) { get_sec_entry(sbi, start)->valid_blocks += - se->valid_blocks - old_valid_blocks; + se->valid_blocks; + get_sec_entry(sbi, start)->valid_blocks -= + old_valid_blocks; + } } up_read(&curseg->journal_rwsem); + + if (!err && total_node_blocks != valid_node_count(sbi)) { + f2fs_err(sbi, "SIT is corrupted node# %u vs %u", + total_node_blocks, valid_node_count(sbi)); + set_sbi_flag(sbi, SBI_NEED_FSCK); + err = -EFSCORRUPTED; + } + return err; } @@ -3885,13 +4375,13 @@ static int sanity_check_curseg(struct f2fs_sb_info *sbi) if (!f2fs_test_bit(blkofs, se->cur_valid_map)) continue; out: - f2fs_msg(sbi->sb, KERN_ERR, - "Current segment's next free block offset is " - "inconsistent with bitmap, logtype:%u, " - "segno:%u, type:%u, next_blkoff:%u, blkofs:%u", - i, curseg->segno, curseg->alloc_type, - curseg->next_blkoff, blkofs); - return -EINVAL; + f2fs_err(sbi, + "Current segment's next free block offset is " + "inconsistent with bitmap, logtype:%u, " + "segno:%u, type:%u, next_blkoff:%u, blkofs:%u", + i, curseg->segno, curseg->alloc_type, + curseg->next_blkoff, blkofs); + return -EFSCORRUPTED; } } return 0; @@ -3907,7 +4397,7 @@ static void init_min_max_mtime(struct f2fs_sb_info *sbi) down_write(&sit_i->sentry_lock); - sit_i->min_mtime = LLONG_MAX; + sit_i->min_mtime = ULLONG_MAX; for (segno = 0; segno < MAIN_SEGS(sbi); segno += sbi->segs_per_sec) { unsigned int i; @@ -3921,11 +4411,11 @@ static void init_min_max_mtime(struct f2fs_sb_info *sbi) if (sit_i->min_mtime > mtime) sit_i->min_mtime = mtime; } - sit_i->max_mtime = get_mtime(sbi); + sit_i->max_mtime = get_mtime(sbi, false); up_write(&sit_i->sentry_lock); } -int build_segment_manager(struct f2fs_sb_info *sbi) +int f2fs_build_segment_manager(struct f2fs_sb_info *sbi) { struct f2fs_super_block *raw_super = F2FS_RAW_SUPER(sbi); struct f2fs_checkpoint *ckpt = F2FS_CKPT(sbi); @@ -3954,6 +4444,7 @@ int build_segment_manager(struct f2fs_sb_info *sbi) sm_info->ipu_policy = 1 << F2FS_IPU_FSYNC; sm_info->min_ipu_util = DEF_MIN_IPU_UTIL; sm_info->min_fsync_blocks = DEF_MIN_FSYNC_BLOCKS; + sm_info->min_seq_blocks = sbi->blocks_per_seg * sbi->segs_per_sec; sm_info->min_hot_blocks = DEF_MIN_HOT_BLOCKS; sm_info->min_ssr_sections = reserved_sections(sbi); @@ -3962,7 +4453,7 @@ int build_segment_manager(struct f2fs_sb_info *sbi) init_rwsem(&sm_info->curseg_lock); if (!f2fs_readonly(sbi->sb)) { - err = create_flush_cmd_control(sbi); + err = f2fs_create_flush_cmd_control(sbi); if (err) return err; } @@ -4030,7 +4521,7 @@ static void destroy_dirty_segmap(struct f2fs_sb_info *sbi) destroy_victim_secmap(sbi); SM_I(sbi)->dirty_info = NULL; - kfree(dirty_i); + kvfree(dirty_i); } static void destroy_curseg(struct f2fs_sb_info *sbi) @@ -4042,10 +4533,10 @@ static void destroy_curseg(struct f2fs_sb_info *sbi) return; SM_I(sbi)->curseg_array = NULL; for (i = 0; i < NR_CURSEG_TYPE; i++) { - kfree(array[i].sum_blk); - kfree(array[i].journal); + kvfree(array[i].sum_blk); + kvfree(array[i].journal); } - kfree(array); + kvfree(array); } static void destroy_free_segmap(struct f2fs_sb_info *sbi) @@ -4056,7 +4547,7 @@ static void destroy_free_segmap(struct f2fs_sb_info *sbi) SM_I(sbi)->free_info = NULL; kvfree(free_i->free_segmap); kvfree(free_i->free_secmap); - kfree(free_i); + kvfree(free_i); } static void destroy_sit_info(struct f2fs_sb_info *sbi) @@ -4069,45 +4560,45 @@ static void destroy_sit_info(struct f2fs_sb_info *sbi) if (sit_i->sentries) { for (start = 0; start < MAIN_SEGS(sbi); start++) { - kfree(sit_i->sentries[start].cur_valid_map); + kvfree(sit_i->sentries[start].cur_valid_map); #ifdef CONFIG_F2FS_CHECK_FS - kfree(sit_i->sentries[start].cur_valid_map_mir); + kvfree(sit_i->sentries[start].cur_valid_map_mir); #endif - kfree(sit_i->sentries[start].ckpt_valid_map); - kfree(sit_i->sentries[start].discard_map); + kvfree(sit_i->sentries[start].ckpt_valid_map); + kvfree(sit_i->sentries[start].discard_map); } } - kfree(sit_i->tmp_map); + kvfree(sit_i->tmp_map); kvfree(sit_i->sentries); kvfree(sit_i->sec_entries); kvfree(sit_i->dirty_sentries_bitmap); SM_I(sbi)->sit_info = NULL; - kfree(sit_i->sit_bitmap); + kvfree(sit_i->sit_bitmap); #ifdef CONFIG_F2FS_CHECK_FS - kfree(sit_i->sit_bitmap_mir); + kvfree(sit_i->sit_bitmap_mir); #endif - kfree(sit_i); + kvfree(sit_i); } -void destroy_segment_manager(struct f2fs_sb_info *sbi) +void f2fs_destroy_segment_manager(struct f2fs_sb_info *sbi) { struct f2fs_sm_info *sm_info = SM_I(sbi); if (!sm_info) return; - destroy_flush_cmd_control(sbi, true); + f2fs_destroy_flush_cmd_control(sbi, true); destroy_discard_cmd_control(sbi); destroy_dirty_segmap(sbi); destroy_curseg(sbi); destroy_free_segmap(sbi); destroy_sit_info(sbi); sbi->sm_info = NULL; - kfree(sm_info); + kvfree(sm_info); } -int __init create_segment_manager_caches(void) +int __init f2fs_create_segment_manager_caches(void) { discard_entry_slab = f2fs_kmem_cache_create("discard_entry", sizeof(struct discard_entry)); @@ -4140,7 +4631,7 @@ fail: return -ENOMEM; } -void destroy_segment_manager_caches(void) +void f2fs_destroy_segment_manager_caches(void) { kmem_cache_destroy(sit_entry_set_slab); kmem_cache_destroy(discard_cmd_slab); diff --git a/fs/f2fs/segment.h b/fs/f2fs/segment.h index d28073cff8cf..b74602813a05 100644 --- a/fs/f2fs/segment.h +++ b/fs/f2fs/segment.h @@ -1,12 +1,9 @@ +// SPDX-License-Identifier: GPL-2.0 /* * fs/f2fs/segment.h * * Copyright (c) 2012 Samsung Electronics Co., Ltd. * http://www.samsung.com/ - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. */ #include <linux/blkdev.h> #include <linux/backing-dev.h> @@ -85,7 +82,7 @@ (GET_SEGOFF_FROM_SEG0(sbi, blk_addr) & ((sbi)->blocks_per_seg - 1)) #define GET_SEGNO(sbi, blk_addr) \ - ((((blk_addr) == NULL_ADDR) || ((blk_addr) == NEW_ADDR)) ? \ + ((!__is_valid_data_blkaddr(blk_addr)) ? \ NULL_SEGNO : GET_L2R_SEGNO(FREE_I(sbi), \ GET_SEGNO_FROM_SEG0(sbi, blk_addr))) #define BLKS_PER_SEC(sbi) \ @@ -112,7 +109,7 @@ #define START_SEGNO(segno) \ (SIT_BLOCK_OFFSET(segno) * SIT_ENTRY_PER_BLOCK) #define SIT_BLK_CNT(sbi) \ - ((MAIN_SEGS(sbi) + SIT_ENTRY_PER_BLOCK - 1) / SIT_ENTRY_PER_BLOCK) + DIV_ROUND_UP(MAIN_SEGS(sbi), SIT_ENTRY_PER_BLOCK) #define f2fs_bitmap_size(nr) \ (BITS_TO_LONGS(nr) * sizeof(unsigned long)) @@ -215,6 +212,8 @@ struct segment_allocation { #define IS_DUMMY_WRITTEN_PAGE(page) \ (page_private(page) == (unsigned long)DUMMY_WRITTEN_PAGE) +#define MAX_SKIP_GC_COUNT 16 + struct inmem_pages { struct list_head list; struct page *page; @@ -334,12 +333,18 @@ static inline unsigned int get_valid_blocks(struct f2fs_sb_info *sbi, * In order to get # of valid blocks in a section instantly from many * segments, f2fs manages two counting structures separately. */ - if (use_section && sbi->segs_per_sec > 1) + if (use_section && __is_large_section(sbi)) return get_sec_entry(sbi, segno)->valid_blocks; else return get_seg_entry(sbi, segno)->valid_blocks; } +static inline unsigned int get_ckpt_valid_blocks(struct f2fs_sb_info *sbi, + unsigned int segno) +{ + return get_seg_entry(sbi, segno)->ckpt_valid_blocks; +} + static inline void seg_info_from_raw_sit(struct seg_entry *se, struct f2fs_sit_entry *rs) { @@ -375,6 +380,7 @@ static inline void seg_info_to_sit_page(struct f2fs_sb_info *sbi, int i; raw_sit = (struct f2fs_sit_block *)page_address(page); + memset(raw_sit, 0, PAGE_SIZE); for (i = 0; i < end - start; i++) { rs = &raw_sit->entries[i]; se = get_seg_entry(sbi, start + i); @@ -576,6 +582,15 @@ static inline bool has_not_enough_free_secs(struct f2fs_sb_info *sbi, reserved_sections(sbi) + needed); } +static inline int f2fs_is_checkpoint_ready(struct f2fs_sb_info *sbi) +{ + if (likely(!is_sbi_flag_set(sbi, SBI_CP_DISABLED))) + return 0; + if (likely(!has_not_enough_free_secs(sbi, 0, 0))) + return 0; + return -ENOSPC; +} + static inline bool excess_prefree_segs(struct f2fs_sb_info *sbi) { return prefree_segments(sbi) > SM_I(sbi)->rec_prefree_segments; @@ -641,17 +656,15 @@ static inline void check_seg_range(struct f2fs_sb_info *sbi, unsigned int segno) f2fs_bug_on(sbi, segno > TOTAL_SEGS(sbi) - 1); } -static inline void verify_block_addr(struct f2fs_io_info *fio, block_t blk_addr) +static inline void verify_fio_blkaddr(struct f2fs_io_info *fio) { struct f2fs_sb_info *sbi = fio->sbi; - if (PAGE_TYPE_OF_BIO(fio->type) == META && - (!is_read_io(fio->op) || fio->is_meta)) - BUG_ON(blk_addr < SEG0_BLKADDR(sbi) || - blk_addr >= MAIN_BLKADDR(sbi)); - else - BUG_ON(blk_addr < MAIN_BLKADDR(sbi) || - blk_addr >= MAX_BLKADDR(sbi)); + if (__is_valid_data_blkaddr(fio->old_blkaddr)) + verify_blkaddr(sbi, fio->old_blkaddr, __is_meta_io(fio) ? + META_GENERIC : DATA_GENERIC); + verify_blkaddr(sbi, fio->new_blkaddr, __is_meta_io(fio) ? + META_GENERIC : DATA_GENERIC_ENHANCE); } /* @@ -680,21 +693,19 @@ static inline int check_block_count(struct f2fs_sb_info *sbi, } while (cur_pos < sbi->blocks_per_seg); if (unlikely(GET_SIT_VBLOCKS(raw_sit) != valid_blocks)) { - f2fs_msg(sbi->sb, KERN_ERR, - "Mismatch valid blocks %d vs. %d", - GET_SIT_VBLOCKS(raw_sit), valid_blocks); + f2fs_err(sbi, "Mismatch valid blocks %d vs. %d", + GET_SIT_VBLOCKS(raw_sit), valid_blocks); set_sbi_flag(sbi, SBI_NEED_FSCK); - return -EINVAL; + return -EFSCORRUPTED; } /* check segment usage, and check boundary of a given segment number */ if (unlikely(GET_SIT_VBLOCKS(raw_sit) > sbi->blocks_per_seg || segno > TOTAL_SEGS(sbi) - 1)) { - f2fs_msg(sbi->sb, KERN_ERR, - "Wrong valid blocks %d or segno %u", - GET_SIT_VBLOCKS(raw_sit), segno); + f2fs_err(sbi, "Wrong valid blocks %d or segno %u", + GET_SIT_VBLOCKS(raw_sit), segno); set_sbi_flag(sbi, SBI_NEED_FSCK); - return -EINVAL; + return -EFSCORRUPTED; } return 0; } @@ -744,11 +755,23 @@ static inline void set_to_next_sit(struct sit_info *sit_i, unsigned int start) #endif } -static inline unsigned long long get_mtime(struct f2fs_sb_info *sbi) +static inline unsigned long long get_mtime(struct f2fs_sb_info *sbi, + bool base_time) { struct sit_info *sit_i = SIT_I(sbi); - return sit_i->elapsed_time + CURRENT_TIME_SEC.tv_sec - - sit_i->mounted_time; + time64_t diff, now = ktime_get_real_seconds(); + + if (now >= sit_i->mounted_time) + return sit_i->elapsed_time + now - sit_i->mounted_time; + + /* system time is set to the past */ + if (!base_time) { + diff = sit_i->mounted_time - now; + if (sit_i->elapsed_time >= diff) + return sit_i->elapsed_time - diff; + return 0; + } + return sit_i->elapsed_time; } static inline void set_summary(struct f2fs_summary *sum, nid_t nid, @@ -772,15 +795,6 @@ static inline block_t sum_blk_addr(struct f2fs_sb_info *sbi, int base, int type) - (base + 1) + type; } -static inline bool no_fggc_candidate(struct f2fs_sb_info *sbi, - unsigned int secno) -{ - if (get_valid_blocks(sbi, GET_SEG_FROM_SEC(sbi, secno), true) > - sbi->fggc_threshold) - return true; - return false; -} - static inline bool sec_usage_check(struct f2fs_sb_info *sbi, unsigned int secno) { if (IS_CURSEC(sbi, secno) || (sbi->cur_victim_sec == secno)) @@ -849,7 +863,7 @@ static inline void wake_up_discard_thread(struct f2fs_sb_info *sbi, bool force) } } mutex_unlock(&dcc->cmd_lock); - if (!wakeup) + if (!wakeup || !is_idle(sbi, DISCARD_TIME)) return; wake_up: dcc->discard_wake = 1; diff --git a/fs/f2fs/shrinker.c b/fs/f2fs/shrinker.c index 0b5664a1a6cc..a467aca29cfe 100644 --- a/fs/f2fs/shrinker.c +++ b/fs/f2fs/shrinker.c @@ -1,13 +1,10 @@ +// SPDX-License-Identifier: GPL-2.0 /* * f2fs shrinker support * the basic infra was copied from fs/ubifs/shrinker.c * * Copyright (c) 2015 Motorola Mobility * Copyright (c) 2015 Jaegeuk Kim <jaegeuk@kernel.org> - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. */ #include <linux/fs.h> #include <linux/f2fs_fs.h> @@ -109,11 +106,11 @@ unsigned long f2fs_shrink_scan(struct shrinker *shrink, /* shrink clean nat cache entries */ if (freed < nr) - freed += try_to_free_nats(sbi, nr - freed); + freed += f2fs_try_to_free_nats(sbi, nr - freed); /* shrink free nids cache entries */ if (freed < nr) - freed += try_to_free_nids(sbi, nr - freed); + freed += f2fs_try_to_free_nids(sbi, nr - freed); spin_lock(&f2fs_list_lock); p = p->next; @@ -138,6 +135,6 @@ void f2fs_leave_shrinker(struct f2fs_sb_info *sbi) f2fs_shrink_extent_tree(sbi, __count_extent_cache(sbi)); spin_lock(&f2fs_list_lock); - list_del(&sbi->s_list); + list_del_init(&sbi->s_list); spin_unlock(&f2fs_list_lock); } diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index ebe4668c4997..281ec7da8128 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -1,12 +1,9 @@ +// SPDX-License-Identifier: GPL-2.0 /* * fs/f2fs/super.c * * Copyright (c) 2012 Samsung Electronics Co., Ltd. * http://www.samsung.com/ - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. */ #include <linux/module.h> #include <linux/init.h> @@ -41,7 +38,7 @@ static struct kmem_cache *f2fs_inode_cachep; #ifdef CONFIG_F2FS_FAULT_INJECTION -char *fault_name[FAULT_MAX] = { +const char *f2fs_fault_name[FAULT_MAX] = { [FAULT_KMALLOC] = "kmalloc", [FAULT_KVMALLOC] = "kvmalloc", [FAULT_PAGE_ALLOC] = "page alloc", @@ -53,22 +50,27 @@ char *fault_name[FAULT_MAX] = { [FAULT_DIR_DEPTH] = "too big dir depth", [FAULT_EVICT_INODE] = "evict_inode fail", [FAULT_TRUNCATE] = "truncate fail", - [FAULT_IO] = "IO error", + [FAULT_READ_IO] = "read IO error", [FAULT_CHECKPOINT] = "checkpoint error", + [FAULT_DISCARD] = "discard error", + [FAULT_WRITE_IO] = "write IO error", }; -static void f2fs_build_fault_attr(struct f2fs_sb_info *sbi, - unsigned int rate) +void f2fs_build_fault_attr(struct f2fs_sb_info *sbi, unsigned int rate, + unsigned int type) { struct f2fs_fault_info *ffi = &F2FS_OPTION(sbi).fault_info; if (rate) { atomic_set(&ffi->inject_ops, 0); ffi->inject_rate = rate; - ffi->inject_type = (1 << FAULT_MAX) - 1; - } else { - memset(ffi, 0, sizeof(struct f2fs_fault_info)); } + + if (type) + ffi->inject_type = type; + + if (!rate && !type) + memset(ffi, 0, sizeof(struct f2fs_fault_info)); } #endif @@ -113,6 +115,7 @@ enum { Opt_mode, Opt_io_size_bits, Opt_fault_injection, + Opt_fault_type, Opt_lazytime, Opt_nolazytime, Opt_quota, @@ -133,6 +136,10 @@ enum { Opt_alloc, Opt_fsync, Opt_test_dummy_encryption, + Opt_checkpoint_disable, + Opt_checkpoint_disable_cap, + Opt_checkpoint_disable_cap_perc, + Opt_checkpoint_enable, Opt_err, }; @@ -170,6 +177,7 @@ static match_table_t f2fs_tokens = { {Opt_mode, "mode=%s"}, {Opt_io_size_bits, "io_bits=%u"}, {Opt_fault_injection, "fault_injection=%u"}, + {Opt_fault_type, "fault_type=%u"}, {Opt_lazytime, "lazytime"}, {Opt_nolazytime, "nolazytime"}, {Opt_quota, "quota"}, @@ -190,44 +198,52 @@ static match_table_t f2fs_tokens = { {Opt_alloc, "alloc_mode=%s"}, {Opt_fsync, "fsync_mode=%s"}, {Opt_test_dummy_encryption, "test_dummy_encryption"}, + {Opt_checkpoint_disable, "checkpoint=disable"}, + {Opt_checkpoint_disable_cap, "checkpoint=disable:%u"}, + {Opt_checkpoint_disable_cap_perc, "checkpoint=disable:%u%%"}, + {Opt_checkpoint_enable, "checkpoint=enable"}, {Opt_err, NULL}, }; -void f2fs_msg(struct super_block *sb, const char *level, const char *fmt, ...) +void f2fs_printk(struct f2fs_sb_info *sbi, const char *fmt, ...) { struct va_format vaf; va_list args; + int level; va_start(args, fmt); - vaf.fmt = fmt; + + level = printk_get_level(fmt); + vaf.fmt = printk_skip_level(fmt); vaf.va = &args; - printk_ratelimited("%sF2FS-fs (%s): %pV\n", level, sb->s_id, &vaf); + printk("%c%cF2FS-fs (%s): %pV\n", + KERN_SOH_ASCII, level, sbi->sb->s_id, &vaf); + va_end(args); } static inline void limit_reserve_root(struct f2fs_sb_info *sbi) { - block_t limit = (sbi->user_block_count << 1) / 1000; + block_t limit = min((sbi->user_block_count << 1) / 1000, + sbi->user_block_count - sbi->reserved_blocks); /* limit is 0.2% */ if (test_opt(sbi, RESERVE_ROOT) && F2FS_OPTION(sbi).root_reserved_blocks > limit) { F2FS_OPTION(sbi).root_reserved_blocks = limit; - f2fs_msg(sbi->sb, KERN_INFO, - "Reduce reserved blocks for root = %u", - F2FS_OPTION(sbi).root_reserved_blocks); + f2fs_info(sbi, "Reduce reserved blocks for root = %u", + F2FS_OPTION(sbi).root_reserved_blocks); } if (!test_opt(sbi, RESERVE_ROOT) && (!uid_eq(F2FS_OPTION(sbi).s_resuid, make_kuid(&init_user_ns, F2FS_DEF_RESUID)) || !gid_eq(F2FS_OPTION(sbi).s_resgid, make_kgid(&init_user_ns, F2FS_DEF_RESGID)))) - f2fs_msg(sbi->sb, KERN_INFO, - "Ignore s_resuid=%u, s_resgid=%u w/o reserve_root", - from_kuid_munged(&init_user_ns, - F2FS_OPTION(sbi).s_resuid), - from_kgid_munged(&init_user_ns, - F2FS_OPTION(sbi).s_resgid)); + f2fs_info(sbi, "Ignore s_resuid=%u, s_resgid=%u w/o reserve_root", + from_kuid_munged(&init_user_ns, + F2FS_OPTION(sbi).s_resuid), + from_kgid_munged(&init_user_ns, + F2FS_OPTION(sbi).s_resgid)); } static void init_once(void *foo) @@ -248,42 +264,36 @@ static int f2fs_set_qf_name(struct super_block *sb, int qtype, int ret = -EINVAL; if (sb_any_quota_loaded(sb) && !F2FS_OPTION(sbi).s_qf_names[qtype]) { - f2fs_msg(sb, KERN_ERR, - "Cannot change journaled " - "quota options when quota turned on"); + f2fs_err(sbi, "Cannot change journaled quota options when quota turned on"); return -EINVAL; } - if (f2fs_sb_has_quota_ino(sb)) { - f2fs_msg(sb, KERN_INFO, - "QUOTA feature is enabled, so ignore qf_name"); + if (f2fs_sb_has_quota_ino(sbi)) { + f2fs_info(sbi, "QUOTA feature is enabled, so ignore qf_name"); return 0; } qname = match_strdup(args); if (!qname) { - f2fs_msg(sb, KERN_ERR, - "Not enough memory for storing quotafile name"); - return -EINVAL; + f2fs_err(sbi, "Not enough memory for storing quotafile name"); + return -ENOMEM; } if (F2FS_OPTION(sbi).s_qf_names[qtype]) { if (strcmp(F2FS_OPTION(sbi).s_qf_names[qtype], qname) == 0) ret = 0; else - f2fs_msg(sb, KERN_ERR, - "%s quota file already specified", + f2fs_err(sbi, "%s quota file already specified", QTYPE2NAME(qtype)); goto errout; } if (strchr(qname, '/')) { - f2fs_msg(sb, KERN_ERR, - "quotafile must be on filesystem root"); + f2fs_err(sbi, "quotafile must be on filesystem root"); goto errout; } F2FS_OPTION(sbi).s_qf_names[qtype] = qname; set_opt(sbi, QUOTA); return 0; errout: - kfree(qname); + kvfree(qname); return ret; } @@ -292,11 +302,10 @@ static int f2fs_clear_qf_name(struct super_block *sb, int qtype) struct f2fs_sb_info *sbi = F2FS_SB(sb); if (sb_any_quota_loaded(sb) && F2FS_OPTION(sbi).s_qf_names[qtype]) { - f2fs_msg(sb, KERN_ERR, "Cannot change journaled quota options" - " when quota turned on"); + f2fs_err(sbi, "Cannot change journaled quota options when quota turned on"); return -EINVAL; } - kfree(F2FS_OPTION(sbi).s_qf_names[qtype]); + kvfree(F2FS_OPTION(sbi).s_qf_names[qtype]); F2FS_OPTION(sbi).s_qf_names[qtype] = NULL; return 0; } @@ -308,9 +317,8 @@ static int f2fs_check_quota_options(struct f2fs_sb_info *sbi) * 'grpquota' mount options are allowed even without quota feature * to support legacy quotas in quota files. */ - if (test_opt(sbi, PRJQUOTA) && !f2fs_sb_has_project_quota(sbi->sb)) { - f2fs_msg(sbi->sb, KERN_ERR, "Project quota feature not enabled. " - "Cannot enable project quota enforcement."); + if (test_opt(sbi, PRJQUOTA) && !f2fs_sb_has_project_quota(sbi)) { + f2fs_err(sbi, "Project quota feature not enabled. Cannot enable project quota enforcement."); return -1; } if (F2FS_OPTION(sbi).s_qf_names[USRQUOTA] || @@ -330,29 +338,20 @@ static int f2fs_check_quota_options(struct f2fs_sb_info *sbi) if (test_opt(sbi, GRPQUOTA) || test_opt(sbi, USRQUOTA) || test_opt(sbi, PRJQUOTA)) { - f2fs_msg(sbi->sb, KERN_ERR, "old and new quota " - "format mixing"); + f2fs_err(sbi, "old and new quota format mixing"); return -1; } if (!F2FS_OPTION(sbi).s_jquota_fmt) { - f2fs_msg(sbi->sb, KERN_ERR, "journaled quota format " - "not specified"); + f2fs_err(sbi, "journaled quota format not specified"); return -1; } } - if (f2fs_sb_has_quota_ino(sbi->sb) && F2FS_OPTION(sbi).s_jquota_fmt) { - f2fs_msg(sbi->sb, KERN_INFO, - "QUOTA feature is enabled, so ignore jquota_fmt"); + if (f2fs_sb_has_quota_ino(sbi) && F2FS_OPTION(sbi).s_jquota_fmt) { + f2fs_info(sbi, "QUOTA feature is enabled, so ignore jquota_fmt"); F2FS_OPTION(sbi).s_jquota_fmt = 0; } - if (f2fs_sb_has_quota_ino(sbi->sb) && f2fs_readonly(sbi->sb)) { - f2fs_msg(sbi->sb, KERN_INFO, - "Filesystem with quota feature cannot be mounted RDWR " - "without CONFIG_QUOTA"); - return -1; - } return 0; } #endif @@ -360,7 +359,6 @@ static int f2fs_check_quota_options(struct f2fs_sb_info *sbi) static int parse_options(struct super_block *sb, char *options) { struct f2fs_sb_info *sbi = F2FS_SB(sb); - struct request_queue *q; substring_t args[MAX_OPT_ARGS]; char *p, *name; int arg = 0; @@ -400,10 +398,10 @@ static int parse_options(struct super_block *sb, char *options) set_opt(sbi, BG_GC); set_opt(sbi, FORCE_FG_GC); } else { - kfree(name); + kvfree(name); return -EINVAL; } - kfree(name); + kvfree(name); break; case Opt_disable_roll_forward: set_opt(sbi, DISABLE_ROLL_FORWARD); @@ -415,19 +413,11 @@ static int parse_options(struct super_block *sb, char *options) return -EINVAL; break; case Opt_discard: - q = bdev_get_queue(sb->s_bdev); - if (blk_queue_discard(q)) { - set_opt(sbi, DISCARD); - } else if (!f2fs_sb_has_blkzoned(sb)) { - f2fs_msg(sb, KERN_WARNING, - "mounting with \"discard\" option, but " - "the device does not support discard"); - } + set_opt(sbi, DISCARD); break; case Opt_nodiscard: - if (f2fs_sb_has_blkzoned(sb)) { - f2fs_msg(sb, KERN_WARNING, - "discard is required for zoned block devices"); + if (f2fs_sb_has_blkzoned(sbi)) { + f2fs_warn(sbi, "discard is required for zoned block devices"); return -EINVAL; } clear_opt(sbi, DISCARD); @@ -459,20 +449,16 @@ static int parse_options(struct super_block *sb, char *options) break; #else case Opt_user_xattr: - f2fs_msg(sb, KERN_INFO, - "user_xattr options not supported"); + f2fs_info(sbi, "user_xattr options not supported"); break; case Opt_nouser_xattr: - f2fs_msg(sb, KERN_INFO, - "nouser_xattr options not supported"); + f2fs_info(sbi, "nouser_xattr options not supported"); break; case Opt_inline_xattr: - f2fs_msg(sb, KERN_INFO, - "inline_xattr options not supported"); + f2fs_info(sbi, "inline_xattr options not supported"); break; case Opt_noinline_xattr: - f2fs_msg(sb, KERN_INFO, - "noinline_xattr options not supported"); + f2fs_info(sbi, "noinline_xattr options not supported"); break; #endif #ifdef CONFIG_F2FS_FS_POSIX_ACL @@ -484,10 +470,10 @@ static int parse_options(struct super_block *sb, char *options) break; #else case Opt_acl: - f2fs_msg(sb, KERN_INFO, "acl options not supported"); + f2fs_info(sbi, "acl options not supported"); break; case Opt_noacl: - f2fs_msg(sb, KERN_INFO, "noacl options not supported"); + f2fs_info(sbi, "noacl options not supported"); break; #endif case Opt_active_logs: @@ -537,9 +523,8 @@ static int parse_options(struct super_block *sb, char *options) if (args->from && match_int(args, &arg)) return -EINVAL; if (test_opt(sbi, RESERVE_ROOT)) { - f2fs_msg(sb, KERN_INFO, - "Preserve previous reserve_root=%u", - F2FS_OPTION(sbi).root_reserved_blocks); + f2fs_info(sbi, "Preserve previous reserve_root=%u", + F2FS_OPTION(sbi).root_reserved_blocks); } else { F2FS_OPTION(sbi).root_reserved_blocks = arg; set_opt(sbi, RESERVE_ROOT); @@ -550,8 +535,7 @@ static int parse_options(struct super_block *sb, char *options) return -EINVAL; uid = make_kuid(current_user_ns(), arg); if (!uid_valid(uid)) { - f2fs_msg(sb, KERN_ERR, - "Invalid uid value %d", arg); + f2fs_err(sbi, "Invalid uid value %d", arg); return -EINVAL; } F2FS_OPTION(sbi).s_resuid = uid; @@ -561,8 +545,7 @@ static int parse_options(struct super_block *sb, char *options) return -EINVAL; gid = make_kgid(current_user_ns(), arg); if (!gid_valid(gid)) { - f2fs_msg(sb, KERN_ERR, - "Invalid gid value %d", arg); + f2fs_err(sbi, "Invalid gid value %d", arg); return -EINVAL; } F2FS_OPTION(sbi).s_resgid = gid; @@ -574,11 +557,9 @@ static int parse_options(struct super_block *sb, char *options) return -ENOMEM; if (strlen(name) == 8 && !strncmp(name, "adaptive", 8)) { - if (f2fs_sb_has_blkzoned(sb)) { - f2fs_msg(sb, KERN_WARNING, - "adaptive mode is not allowed with " - "zoned block device feature"); - kfree(name); + if (f2fs_sb_has_blkzoned(sbi)) { + f2fs_warn(sbi, "adaptive mode is not allowed with zoned block device feature"); + kvfree(name); return -EINVAL; } set_opt_mode(sbi, F2FS_MOUNT_ADAPTIVE); @@ -586,33 +567,44 @@ static int parse_options(struct super_block *sb, char *options) !strncmp(name, "lfs", 3)) { set_opt_mode(sbi, F2FS_MOUNT_LFS); } else { - kfree(name); + kvfree(name); return -EINVAL; } - kfree(name); + kvfree(name); break; case Opt_io_size_bits: if (args->from && match_int(args, &arg)) return -EINVAL; - if (arg > __ilog2_u32(BIO_MAX_PAGES)) { - f2fs_msg(sb, KERN_WARNING, - "Not support %d, larger than %d", - 1 << arg, BIO_MAX_PAGES); + if (arg <= 0 || arg > __ilog2_u32(BIO_MAX_PAGES)) { + f2fs_warn(sbi, "Not support %d, larger than %d", + 1 << arg, BIO_MAX_PAGES); return -EINVAL; } F2FS_OPTION(sbi).write_io_size_bits = arg; break; +#ifdef CONFIG_F2FS_FAULT_INJECTION case Opt_fault_injection: if (args->from && match_int(args, &arg)) return -EINVAL; -#ifdef CONFIG_F2FS_FAULT_INJECTION - f2fs_build_fault_attr(sbi, arg); + f2fs_build_fault_attr(sbi, arg, F2FS_ALL_FAULT_TYPE); set_opt(sbi, FAULT_INJECTION); + break; + + case Opt_fault_type: + if (args->from && match_int(args, &arg)) + return -EINVAL; + f2fs_build_fault_attr(sbi, 0, arg); + set_opt(sbi, FAULT_INJECTION); + break; #else - f2fs_msg(sb, KERN_INFO, - "FAULT_INJECTION was not selected"); -#endif + case Opt_fault_injection: + f2fs_info(sbi, "fault_injection options not supported"); break; + + case Opt_fault_type: + f2fs_info(sbi, "fault_type options not supported"); + break; +#endif case Opt_lazytime: sb->s_flags |= MS_LAZYTIME; break; @@ -690,8 +682,7 @@ static int parse_options(struct super_block *sb, char *options) case Opt_jqfmt_vfsv0: case Opt_jqfmt_vfsv1: case Opt_noquota: - f2fs_msg(sb, KERN_INFO, - "quota operations not supported"); + f2fs_info(sbi, "quota operations not supported"); break; #endif case Opt_whint: @@ -708,10 +699,10 @@ static int parse_options(struct super_block *sb, char *options) !strncmp(name, "fs-based", 8)) { F2FS_OPTION(sbi).whint_mode = WHINT_MODE_FS; } else { - kfree(name); + kvfree(name); return -EINVAL; } - kfree(name); + kvfree(name); break; case Opt_alloc: name = match_strdup(&args[0]); @@ -725,10 +716,10 @@ static int parse_options(struct super_block *sb, char *options) !strncmp(name, "reuse", 5)) { F2FS_OPTION(sbi).alloc_mode = ALLOC_MODE_REUSE; } else { - kfree(name); + kvfree(name); return -EINVAL; } - kfree(name); + kvfree(name); break; case Opt_fsync: name = match_strdup(&args[0]); @@ -745,71 +736,104 @@ static int parse_options(struct super_block *sb, char *options) F2FS_OPTION(sbi).fsync_mode = FSYNC_MODE_NOBARRIER; } else { - kfree(name); + kvfree(name); return -EINVAL; } - kfree(name); + kvfree(name); break; case Opt_test_dummy_encryption: #ifdef CONFIG_F2FS_FS_ENCRYPTION - if (!f2fs_sb_has_encrypt(sb)) { - f2fs_msg(sb, KERN_ERR, "Encrypt feature is off"); + if (!f2fs_sb_has_encrypt(sbi)) { + f2fs_err(sbi, "Encrypt feature is off"); return -EINVAL; } F2FS_OPTION(sbi).test_dummy_encryption = true; - f2fs_msg(sb, KERN_INFO, - "Test dummy encryption mode enabled"); + f2fs_info(sbi, "Test dummy encryption mode enabled"); #else - f2fs_msg(sb, KERN_INFO, - "Test dummy encryption mount option ignored"); + f2fs_info(sbi, "Test dummy encryption mount option ignored"); #endif break; + case Opt_checkpoint_disable_cap_perc: + if (args->from && match_int(args, &arg)) + return -EINVAL; + if (arg < 0 || arg > 100) + return -EINVAL; + if (arg == 100) + F2FS_OPTION(sbi).unusable_cap = + sbi->user_block_count; + else + F2FS_OPTION(sbi).unusable_cap = + (sbi->user_block_count / 100) * arg; + set_opt(sbi, DISABLE_CHECKPOINT); + break; + case Opt_checkpoint_disable_cap: + if (args->from && match_int(args, &arg)) + return -EINVAL; + F2FS_OPTION(sbi).unusable_cap = arg; + set_opt(sbi, DISABLE_CHECKPOINT); + break; + case Opt_checkpoint_disable: + set_opt(sbi, DISABLE_CHECKPOINT); + break; + case Opt_checkpoint_enable: + clear_opt(sbi, DISABLE_CHECKPOINT); + break; default: - f2fs_msg(sb, KERN_ERR, - "Unrecognized mount option \"%s\" or missing value", - p); + f2fs_err(sbi, "Unrecognized mount option \"%s\" or missing value", + p); return -EINVAL; } } #ifdef CONFIG_QUOTA if (f2fs_check_quota_options(sbi)) return -EINVAL; +#else + if (f2fs_sb_has_quota_ino(sbi) && !f2fs_readonly(sbi->sb)) { + f2fs_info(sbi, "Filesystem with quota feature cannot be mounted RDWR without CONFIG_QUOTA"); + return -EINVAL; + } + if (f2fs_sb_has_project_quota(sbi) && !f2fs_readonly(sbi->sb)) { + f2fs_err(sbi, "Filesystem with project quota feature cannot be mounted RDWR without CONFIG_QUOTA"); + return -EINVAL; + } #endif if (F2FS_IO_SIZE_BITS(sbi) && !test_opt(sbi, LFS)) { - f2fs_msg(sb, KERN_ERR, - "Should set mode=lfs with %uKB-sized IO", - F2FS_IO_SIZE_KB(sbi)); + f2fs_err(sbi, "Should set mode=lfs with %uKB-sized IO", + F2FS_IO_SIZE_KB(sbi)); return -EINVAL; } if (test_opt(sbi, INLINE_XATTR_SIZE)) { - if (!f2fs_sb_has_extra_attr(sb) || - !f2fs_sb_has_flexible_inline_xattr(sb)) { - f2fs_msg(sb, KERN_ERR, - "extra_attr or flexible_inline_xattr " - "feature is off"); + int min_size, max_size; + + if (!f2fs_sb_has_extra_attr(sbi) || + !f2fs_sb_has_flexible_inline_xattr(sbi)) { + f2fs_err(sbi, "extra_attr or flexible_inline_xattr feature is off"); return -EINVAL; } if (!test_opt(sbi, INLINE_XATTR)) { - f2fs_msg(sb, KERN_ERR, - "inline_xattr_size option should be " - "set with inline_xattr option"); + f2fs_err(sbi, "inline_xattr_size option should be set with inline_xattr option"); return -EINVAL; } - if (!F2FS_OPTION(sbi).inline_xattr_size || - F2FS_OPTION(sbi).inline_xattr_size >= - DEF_ADDRS_PER_INODE - - F2FS_TOTAL_EXTRA_ATTR_SIZE - - DEF_INLINE_RESERVED_SIZE - - DEF_MIN_INLINE_SIZE) { - f2fs_msg(sb, KERN_ERR, - "inline xattr size is out of range"); + + min_size = sizeof(struct f2fs_xattr_header) / sizeof(__le32); + max_size = MAX_INLINE_XATTR_SIZE; + + if (F2FS_OPTION(sbi).inline_xattr_size < min_size || + F2FS_OPTION(sbi).inline_xattr_size > max_size) { + f2fs_err(sbi, "inline xattr size is out of range: %d ~ %d", + min_size, max_size); return -EINVAL; } } + if (test_opt(sbi, DISABLE_CHECKPOINT) && test_opt(sbi, LFS)) { + f2fs_err(sbi, "LFS not compatible with checkpoint=disable\n"); + return -EINVAL; + } + /* Not pass down write hints if the number of active logs is lesser * than NR_CURSEG_TYPE. */ @@ -830,15 +854,14 @@ static struct inode *f2fs_alloc_inode(struct super_block *sb) /* Initialize f2fs-specific inode info */ atomic_set(&fi->dirty_pages, 0); - fi->i_current_depth = 1; init_rwsem(&fi->i_sem); INIT_LIST_HEAD(&fi->dirty_list); INIT_LIST_HEAD(&fi->gdirty_list); INIT_LIST_HEAD(&fi->inmem_ilist); INIT_LIST_HEAD(&fi->inmem_pages); mutex_init(&fi->inmem_lock); - init_rwsem(&fi->dio_rwsem[READ]); - init_rwsem(&fi->dio_rwsem[WRITE]); + init_rwsem(&fi->i_gc_rwsem[READ]); + init_rwsem(&fi->i_gc_rwsem[WRITE]); init_rwsem(&fi->i_mmap_sem); init_rwsem(&fi->i_xattr_sem); @@ -866,7 +889,7 @@ static int f2fs_drop_inode(struct inode *inode) /* some remained atomic pages should discarded */ if (f2fs_is_atomic_file(inode)) - drop_inmem_pages(inode); + f2fs_drop_inmem_pages(inode); /* should remain fi->extent_tree for writepage */ f2fs_destroy_extent_node(inode); @@ -874,6 +897,10 @@ static int f2fs_drop_inode(struct inode *inode) sb_start_intwrite(inode->i_sb); f2fs_i_size_write(inode, 0); + f2fs_submit_merged_write_cond(F2FS_I_SB(inode), + inode, NULL, 0, DATA); + truncate_inode_pages_final(inode->i_mapping); + if (F2FS_HAS_BLOCKS(inode)) f2fs_truncate(inode); @@ -976,10 +1003,10 @@ static void destroy_device_list(struct f2fs_sb_info *sbi) for (i = 0; i < sbi->s_ndevs; i++) { blkdev_put(FDEV(i).bdev, FMODE_EXCL); #ifdef CONFIG_BLK_DEV_ZONED - kfree(FDEV(i).blkz_type); + kvfree(FDEV(i).blkz_seq); #endif } - kfree(sbi->devs); + kvfree(sbi->devs); } static void f2fs_put_super(struct super_block *sb) @@ -998,32 +1025,30 @@ static void f2fs_put_super(struct super_block *sb) * But, the previous checkpoint was not done by umount, it needs to do * clean checkpoint again. */ - if (is_sbi_flag_set(sbi, SBI_IS_DIRTY) || - !is_set_ckpt_flags(sbi, CP_UMOUNT_FLAG)) { + if ((is_sbi_flag_set(sbi, SBI_IS_DIRTY) || + !is_set_ckpt_flags(sbi, CP_UMOUNT_FLAG))) { struct cp_control cpc = { .reason = CP_UMOUNT, }; - write_checkpoint(sbi, &cpc); + f2fs_write_checkpoint(sbi, &cpc); } /* be sure to wait for any on-going discard commands */ - dropped = f2fs_wait_discard_bios(sbi); + dropped = f2fs_issue_discard_timeout(sbi); - if (f2fs_discard_en(sbi) && !sbi->discard_blks && !dropped) { + if ((f2fs_hw_support_discard(sbi) || f2fs_hw_should_discard(sbi)) && + !sbi->discard_blks && !dropped) { struct cp_control cpc = { .reason = CP_UMOUNT | CP_TRIMMED, }; - write_checkpoint(sbi, &cpc); + f2fs_write_checkpoint(sbi, &cpc); } - /* write_checkpoint can update stat informaion */ - f2fs_destroy_stats(sbi); - /* * normally superblock is clean, so we need to release this. * In addition, EIO will skip do checkpoint, we need this as well. */ - release_ino_entry(sbi, true); + f2fs_release_ino_entry(sbi, true); f2fs_leave_shrinker(sbi); mutex_unlock(&sbi->umount_mutex); @@ -1031,33 +1056,46 @@ static void f2fs_put_super(struct super_block *sb) /* our cp_error case, we can wait for any writeback page */ f2fs_flush_merged_writes(sbi); + f2fs_wait_on_all_pages_writeback(sbi); + + f2fs_bug_on(sbi, sbi->fsync_node_num); + iput(sbi->node_inode); + sbi->node_inode = NULL; + iput(sbi->meta_inode); + sbi->meta_inode = NULL; + + /* + * iput() can update stat information, if f2fs_write_checkpoint() + * above failed with error. + */ + f2fs_destroy_stats(sbi); /* destroy f2fs internal modules */ - destroy_node_manager(sbi); - destroy_segment_manager(sbi); + f2fs_destroy_node_manager(sbi); + f2fs_destroy_segment_manager(sbi); - kfree(sbi->ckpt); + kvfree(sbi->ckpt); f2fs_unregister_sysfs(sbi); sb->s_fs_info = NULL; if (sbi->s_chksum_driver) crypto_free_shash(sbi->s_chksum_driver); - kfree(sbi->raw_super); + kvfree(sbi->raw_super); destroy_device_list(sbi); if (sbi->write_io_dummy) mempool_destroy(sbi->write_io_dummy); #ifdef CONFIG_QUOTA for (i = 0; i < MAXQUOTAS; i++) - kfree(F2FS_OPTION(sbi).s_qf_names[i]); + kvfree(F2FS_OPTION(sbi).s_qf_names[i]); #endif destroy_percpu_info(sbi); for (i = 0; i < NR_PAGE_TYPE; i++) - kfree(sbi->write_io[i]); - kfree(sbi); + kvfree(sbi->write_io[i]); + kvfree(sbi); } int f2fs_sync_fs(struct super_block *sb, int sync) @@ -1067,6 +1105,8 @@ int f2fs_sync_fs(struct super_block *sb, int sync) if (unlikely(f2fs_cp_error(sbi))) return 0; + if (unlikely(is_sbi_flag_set(sbi, SBI_CP_DISABLED))) + return 0; trace_f2fs_sync_fs(sb, sync); @@ -1079,7 +1119,7 @@ int f2fs_sync_fs(struct super_block *sb, int sync) cpc.reason = __get_cp_reason(sbi); mutex_lock(&sbi->gc_mutex); - err = write_checkpoint(sbi, &cpc); + err = f2fs_write_checkpoint(sbi, &cpc); mutex_unlock(&sbi->gc_mutex); } f2fs_trace_ios(NULL, 1); @@ -1166,6 +1206,14 @@ static int f2fs_statfs(struct dentry *dentry, struct kstatfs *buf) buf->f_blocks = total_count - start_count; buf->f_bfree = user_block_count - valid_user_blocks(sbi) - sbi->current_reserved_blocks; + + spin_lock(&sbi->stat_lock); + if (unlikely(buf->f_bfree <= sbi->unusable_block_count)) + buf->f_bfree = 0; + else + buf->f_bfree -= sbi->unusable_block_count; + spin_unlock(&sbi->stat_lock); + if (buf->f_bfree > F2FS_OPTION(sbi).root_reserved_blocks) buf->f_bavail = buf->f_bfree - F2FS_OPTION(sbi).root_reserved_blocks; @@ -1250,6 +1298,8 @@ static int f2fs_show_options(struct seq_file *seq, struct dentry *root) seq_puts(seq, ",disable_roll_forward"); if (test_opt(sbi, DISCARD)) seq_puts(seq, ",discard"); + else + seq_puts(seq, ",nodiscard"); if (test_opt(sbi, NOHEAP)) seq_puts(seq, ",no_heap"); else @@ -1310,11 +1360,15 @@ static int f2fs_show_options(struct seq_file *seq, struct dentry *root) from_kgid_munged(&init_user_ns, F2FS_OPTION(sbi).s_resgid)); if (F2FS_IO_SIZE_BITS(sbi)) - seq_printf(seq, ",io_size=%uKB", F2FS_IO_SIZE_KB(sbi)); + seq_printf(seq, ",io_bits=%u", + F2FS_OPTION(sbi).write_io_size_bits); #ifdef CONFIG_F2FS_FAULT_INJECTION - if (test_opt(sbi, FAULT_INJECTION)) + if (test_opt(sbi, FAULT_INJECTION)) { seq_printf(seq, ",fault_injection=%u", F2FS_OPTION(sbi).fault_info.inject_rate); + seq_printf(seq, ",fault_type=%u", + F2FS_OPTION(sbi).fault_info.inject_type); + } #endif #ifdef CONFIG_QUOTA if (test_opt(sbi, QUOTA)) @@ -1341,10 +1395,15 @@ static int f2fs_show_options(struct seq_file *seq, struct dentry *root) else if (F2FS_OPTION(sbi).alloc_mode == ALLOC_MODE_REUSE) seq_printf(seq, ",alloc_mode=%s", "reuse"); + if (test_opt(sbi, DISABLE_CHECKPOINT)) + seq_printf(seq, ",checkpoint=disable:%u", + F2FS_OPTION(sbi).unusable_cap); if (F2FS_OPTION(sbi).fsync_mode == FSYNC_MODE_POSIX) seq_printf(seq, ",fsync_mode=%s", "posix"); else if (F2FS_OPTION(sbi).fsync_mode == FSYNC_MODE_STRICT) seq_printf(seq, ",fsync_mode=%s", "strict"); + else if (F2FS_OPTION(sbi).fsync_mode == FSYNC_MODE_NOBARRIER) + seq_printf(seq, ",fsync_mode=%s", "nobarrier"); return 0; } @@ -1357,7 +1416,8 @@ static void default_options(struct f2fs_sb_info *sbi) F2FS_OPTION(sbi).alloc_mode = ALLOC_MODE_DEFAULT; F2FS_OPTION(sbi).fsync_mode = FSYNC_MODE_POSIX; F2FS_OPTION(sbi).test_dummy_encryption = false; - sbi->readdir_ra = 1; + F2FS_OPTION(sbi).s_resuid = make_kuid(&init_user_ns, F2FS_DEF_RESUID); + F2FS_OPTION(sbi).s_resgid = make_kgid(&init_user_ns, F2FS_DEF_RESGID); set_opt(sbi, BG_GC); set_opt(sbi, INLINE_XATTR); @@ -1366,13 +1426,14 @@ static void default_options(struct f2fs_sb_info *sbi) set_opt(sbi, EXTENT_CACHE); set_opt(sbi, NOHEAP); sbi->sb->s_flags |= MS_LAZYTIME; + clear_opt(sbi, DISABLE_CHECKPOINT); + F2FS_OPTION(sbi).unusable_cap = 0; set_opt(sbi, FLUSH_MERGE); - if (f2fs_sb_has_blkzoned(sbi->sb)) { + set_opt(sbi, DISCARD); + if (f2fs_sb_has_blkzoned(sbi)) set_opt_mode(sbi, F2FS_MOUNT_LFS); - set_opt(sbi, DISCARD); - } else { + else set_opt_mode(sbi, F2FS_MOUNT_ADAPTIVE); - } #ifdef CONFIG_F2FS_FS_XATTR set_opt(sbi, XATTR_USER); @@ -1381,14 +1442,82 @@ static void default_options(struct f2fs_sb_info *sbi) set_opt(sbi, POSIX_ACL); #endif -#ifdef CONFIG_F2FS_FAULT_INJECTION - f2fs_build_fault_attr(sbi, 0); -#endif + f2fs_build_fault_attr(sbi, 0, 0); } #ifdef CONFIG_QUOTA static int f2fs_enable_quotas(struct super_block *sb); #endif + +static int f2fs_disable_checkpoint(struct f2fs_sb_info *sbi) +{ + unsigned int s_flags = sbi->sb->s_flags; + struct cp_control cpc; + int err = 0; + int ret; + block_t unusable; + + if (s_flags & MS_RDONLY) { + f2fs_err(sbi, "checkpoint=disable on readonly fs"); + return -EINVAL; + } + sbi->sb->s_flags |= MS_ACTIVE; + + f2fs_update_time(sbi, DISABLE_TIME); + + while (!f2fs_time_over(sbi, DISABLE_TIME)) { + mutex_lock(&sbi->gc_mutex); + err = f2fs_gc(sbi, true, false, NULL_SEGNO); + if (err == -ENODATA) { + err = 0; + break; + } + if (err && err != -EAGAIN) + break; + } + + ret = sync_filesystem(sbi->sb); + if (ret || err) { + err = ret ? ret: err; + goto restore_flag; + } + + unusable = f2fs_get_unusable_blocks(sbi); + if (f2fs_disable_cp_again(sbi, unusable)) { + err = -EAGAIN; + goto restore_flag; + } + + mutex_lock(&sbi->gc_mutex); + cpc.reason = CP_PAUSE; + set_sbi_flag(sbi, SBI_CP_DISABLED); + err = f2fs_write_checkpoint(sbi, &cpc); + if (err) + goto out_unlock; + + spin_lock(&sbi->stat_lock); + sbi->unusable_block_count = unusable; + spin_unlock(&sbi->stat_lock); + +out_unlock: + mutex_unlock(&sbi->gc_mutex); +restore_flag: + sbi->sb->s_flags = s_flags; /* Restore MS_RDONLY status */ + return err; +} + +static void f2fs_enable_checkpoint(struct f2fs_sb_info *sbi) +{ + mutex_lock(&sbi->gc_mutex); + f2fs_dirty_to_prefree(sbi); + + clear_sbi_flag(sbi, SBI_CP_DISABLED); + set_sbi_flag(sbi, SBI_IS_DIRTY); + mutex_unlock(&sbi->gc_mutex); + + f2fs_sync_fs(sbi->sb, 1); +} + static int f2fs_remount(struct super_block *sb, int *flags, char *data) { struct f2fs_sb_info *sbi = F2FS_SB(sb); @@ -1398,6 +1527,8 @@ static int f2fs_remount(struct super_block *sb, int *flags, char *data) bool need_restart_gc = false; bool need_stop_gc = false; bool no_extent_cache = !test_opt(sbi, EXTENT_CACHE); + bool disable_checkpoint = test_opt(sbi, DISABLE_CHECKPOINT); + bool checkpoint_changed; #ifdef CONFIG_QUOTA int i, j; #endif @@ -1418,7 +1549,7 @@ static int f2fs_remount(struct super_block *sb, int *flags, char *data) GFP_KERNEL); if (!org_mount_opt.s_qf_names[i]) { for (j = 0; j < i; j++) - kfree(org_mount_opt.s_qf_names[j]); + kvfree(org_mount_opt.s_qf_names[j]); return -ENOMEM; } } else { @@ -1430,8 +1561,8 @@ static int f2fs_remount(struct super_block *sb, int *flags, char *data) /* recover superblocks we couldn't write due to previous RO mount */ if (!(*flags & MS_RDONLY) && is_sbi_flag_set(sbi, SBI_NEED_SB_WRITE)) { err = f2fs_commit_super(sbi, false); - f2fs_msg(sb, KERN_INFO, - "Try to recover all the superblocks, ret: %d", err); + f2fs_info(sbi, "Try to recover all the superblocks, ret: %d", + err); if (!err) clear_sbi_flag(sbi, SBI_NEED_SB_WRITE); } @@ -1442,6 +1573,8 @@ static int f2fs_remount(struct super_block *sb, int *flags, char *data) err = parse_options(sb, data); if (err) goto restore_opts; + checkpoint_changed = + disable_checkpoint != test_opt(sbi, DISABLE_CHECKPOINT); /* * Previous and new state of filesystem is RO, @@ -1460,7 +1593,7 @@ static int f2fs_remount(struct super_block *sb, int *flags, char *data) sb->s_flags &= ~MS_RDONLY; if (sb_any_quota_suspended(sb)) { dquot_resume(sb, -1); - } else if (f2fs_sb_has_quota_ino(sb)) { + } else if (f2fs_sb_has_quota_ino(sbi)) { err = f2fs_enable_quotas(sb); if (err) goto restore_opts; @@ -1470,8 +1603,13 @@ static int f2fs_remount(struct super_block *sb, int *flags, char *data) /* disallow enable/disable extent_cache dynamically */ if (no_extent_cache == !!test_opt(sbi, EXTENT_CACHE)) { err = -EINVAL; - f2fs_msg(sbi->sb, KERN_WARNING, - "switch extent_cache option is not allowed"); + f2fs_warn(sbi, "switch extent_cache option is not allowed"); + goto restore_opts; + } + + if ((*flags & MS_RDONLY) && test_opt(sbi, DISABLE_CHECKPOINT)) { + err = -EINVAL; + f2fs_warn(sbi, "disabling checkpoint not compatible with read-only"); goto restore_opts; } @@ -1482,11 +1620,11 @@ static int f2fs_remount(struct super_block *sb, int *flags, char *data) */ if ((*flags & MS_RDONLY) || !test_opt(sbi, BG_GC)) { if (sbi->gc_thread) { - stop_gc_thread(sbi); + f2fs_stop_gc_thread(sbi); need_restart_gc = true; } } else if (!sbi->gc_thread) { - err = start_gc_thread(sbi); + err = f2fs_start_gc_thread(sbi); if (err) goto restore_opts; need_stop_gc = true; @@ -1503,15 +1641,25 @@ static int f2fs_remount(struct super_block *sb, int *flags, char *data) clear_sbi_flag(sbi, SBI_IS_CLOSE); } + if (checkpoint_changed) { + if (test_opt(sbi, DISABLE_CHECKPOINT)) { + err = f2fs_disable_checkpoint(sbi); + if (err) + goto restore_gc; + } else { + f2fs_enable_checkpoint(sbi); + } + } + /* * We stop issue flush thread if FS is mounted as RO * or if flush_merge is not passed in mount option. */ if ((*flags & MS_RDONLY) || !test_opt(sbi, FLUSH_MERGE)) { clear_opt(sbi, FLUSH_MERGE); - destroy_flush_cmd_control(sbi, false); + f2fs_destroy_flush_cmd_control(sbi, false); } else { - err = create_flush_cmd_control(sbi); + err = f2fs_create_flush_cmd_control(sbi); if (err) goto restore_gc; } @@ -1519,27 +1667,27 @@ skip: #ifdef CONFIG_QUOTA /* Release old quota file names */ for (i = 0; i < MAXQUOTAS; i++) - kfree(org_mount_opt.s_qf_names[i]); + kvfree(org_mount_opt.s_qf_names[i]); #endif /* Update the POSIXACL Flag */ sb->s_flags = (sb->s_flags & ~MS_POSIXACL) | (test_opt(sbi, POSIX_ACL) ? MS_POSIXACL : 0); limit_reserve_root(sbi); + *flags = (*flags & ~MS_LAZYTIME) | (sb->s_flags & MS_LAZYTIME); return 0; restore_gc: if (need_restart_gc) { - if (start_gc_thread(sbi)) - f2fs_msg(sbi->sb, KERN_WARNING, - "background gc thread has stopped"); + if (f2fs_start_gc_thread(sbi)) + f2fs_warn(sbi, "background gc thread has stopped"); } else if (need_stop_gc) { - stop_gc_thread(sbi); + f2fs_stop_gc_thread(sbi); } restore_opts: #ifdef CONFIG_QUOTA F2FS_OPTION(sbi).s_jquota_fmt = org_mount_opt.s_jquota_fmt; for (i = 0; i < MAXQUOTAS; i++) { - kfree(F2FS_OPTION(sbi).s_qf_names[i]); + kvfree(F2FS_OPTION(sbi).s_qf_names[i]); F2FS_OPTION(sbi).s_qf_names[i] = org_mount_opt.s_qf_names[i]; } #endif @@ -1578,6 +1726,7 @@ repeat: congestion_wait(BLK_RW_ASYNC, HZ/50); goto repeat; } + set_sbi_flag(F2FS_SB(sb), SBI_QUOTA_NEED_REPAIR); return PTR_ERR(page); } @@ -1589,6 +1738,7 @@ repeat: } if (unlikely(!PageUptodate(page))) { f2fs_put_page(page, 1); + set_sbi_flag(F2FS_SB(sb), SBI_QUOTA_NEED_REPAIR); return -EIO; } @@ -1630,6 +1780,7 @@ retry: congestion_wait(BLK_RW_ASYNC, HZ/50); goto retry; } + set_sbi_flag(F2FS_SB(sb), SBI_QUOTA_NEED_REPAIR); break; } @@ -1666,6 +1817,11 @@ static qsize_t *f2fs_get_reserved_space(struct inode *inode) static int f2fs_quota_on_mount(struct f2fs_sb_info *sbi, int type) { + if (is_set_ckpt_flags(sbi, CP_QUOTA_NEED_FSCK_FLAG)) { + f2fs_err(sbi, "quota sysfile may be corrupted, skip loading it"); + return 0; + } + return dquot_quota_on_mount(sbi->sb, F2FS_OPTION(sbi).s_qf_names[type], F2FS_OPTION(sbi).s_jquota_fmt, type); } @@ -1675,11 +1831,10 @@ int f2fs_enable_quota_files(struct f2fs_sb_info *sbi, bool rdonly) int enabled = 0; int i, err; - if (f2fs_sb_has_quota_ino(sbi->sb) && rdonly) { + if (f2fs_sb_has_quota_ino(sbi) && rdonly) { err = f2fs_enable_quotas(sbi->sb); if (err) { - f2fs_msg(sbi->sb, KERN_ERR, - "Cannot turn on quota_ino: %d", err); + f2fs_err(sbi, "Cannot turn on quota_ino: %d", err); return 0; } return 1; @@ -1692,8 +1847,8 @@ int f2fs_enable_quota_files(struct f2fs_sb_info *sbi, bool rdonly) enabled = 1; continue; } - f2fs_msg(sbi->sb, KERN_ERR, - "Cannot turn on quotas: %d on %d", err, i); + f2fs_err(sbi, "Cannot turn on quotas: %d on %d", + err, i); } } return enabled; @@ -1706,7 +1861,7 @@ static int f2fs_quota_enable(struct super_block *sb, int type, int format_id, unsigned long qf_inum; int err; - BUG_ON(!f2fs_sb_has_quota_ino(sb)); + BUG_ON(!f2fs_sb_has_quota_ino(F2FS_SB(sb))); qf_inum = f2fs_qf_ino(sb, type); if (!qf_inum) @@ -1714,8 +1869,7 @@ static int f2fs_quota_enable(struct super_block *sb, int type, int format_id, qf_inode = f2fs_iget(sb, qf_inum); if (IS_ERR(qf_inode)) { - f2fs_msg(sb, KERN_ERR, - "Bad quota inode %u:%lu", type, qf_inum); + f2fs_err(F2FS_SB(sb), "Bad quota inode %u:%lu", type, qf_inum); return PTR_ERR(qf_inode); } @@ -1728,15 +1882,22 @@ static int f2fs_quota_enable(struct super_block *sb, int type, int format_id, static int f2fs_enable_quotas(struct super_block *sb) { + struct f2fs_sb_info *sbi = F2FS_SB(sb); int type, err = 0; unsigned long qf_inum; bool quota_mopt[MAXQUOTAS] = { - test_opt(F2FS_SB(sb), USRQUOTA), - test_opt(F2FS_SB(sb), GRPQUOTA), - test_opt(F2FS_SB(sb), PRJQUOTA), + test_opt(sbi, USRQUOTA), + test_opt(sbi, GRPQUOTA), + test_opt(sbi, PRJQUOTA), }; + if (is_set_ckpt_flags(F2FS_SB(sb), CP_QUOTA_NEED_FSCK_FLAG)) { + f2fs_err(sbi, "quota file may be corrupted, skip loading it"); + return 0; + } + sb_dqopt(sb)->flags |= DQUOT_QUOTA_SYS_FILE; + for (type = 0; type < MAXQUOTAS; type++) { qf_inum = f2fs_qf_ino(sb, type); if (qf_inum) { @@ -1744,12 +1905,12 @@ static int f2fs_enable_quotas(struct super_block *sb) DQUOT_USAGE_ENABLED | (quota_mopt[type] ? DQUOT_LIMITS_ENABLED : 0)); if (err) { - f2fs_msg(sb, KERN_ERR, - "Failed to enable quota tracking " - "(type=%d, err=%d). Please run " - "fsck to fix.", type, err); + f2fs_err(sbi, "Failed to enable quota tracking (type=%d, err=%d). Please run fsck to fix.", + type, err); for (type--; type >= 0; type--) dquot_quota_off(sb, type); + set_sbi_flag(F2FS_SB(sb), + SBI_QUOTA_NEED_REPAIR); return err; } } @@ -1757,35 +1918,65 @@ static int f2fs_enable_quotas(struct super_block *sb) return 0; } -static int f2fs_quota_sync(struct super_block *sb, int type) +int f2fs_quota_sync(struct super_block *sb, int type) { + struct f2fs_sb_info *sbi = F2FS_SB(sb); struct quota_info *dqopt = sb_dqopt(sb); int cnt; int ret; + /* + * do_quotactl + * f2fs_quota_sync + * down_read(quota_sem) + * dquot_writeback_dquots() + * f2fs_dquot_commit + * block_operation + * down_read(quota_sem) + */ + f2fs_lock_op(sbi); + + down_read(&sbi->quota_sem); ret = dquot_writeback_dquots(sb, type); if (ret) - return ret; + goto out; /* * Now when everything is written we can discard the pagecache so * that userspace sees the changes. */ for (cnt = 0; cnt < MAXQUOTAS; cnt++) { + struct address_space *mapping; + if (type != -1 && cnt != type) continue; if (!sb_has_quota_active(sb, cnt)) continue; - ret = filemap_write_and_wait(dqopt->files[cnt]->i_mapping); + mapping = dqopt->files[cnt]->i_mapping; + + ret = filemap_fdatawrite(mapping); if (ret) - return ret; + goto out; + + /* if we are using journalled quota */ + if (is_journalled_quota(sbi)) + continue; + + ret = filemap_fdatawait(mapping); + if (ret) + set_sbi_flag(F2FS_SB(sb), SBI_QUOTA_NEED_REPAIR); inode_lock(dqopt->files[cnt]); truncate_inode_pages(&dqopt->files[cnt]->i_data, 0); inode_unlock(dqopt->files[cnt]); } - return 0; +out: + if (ret) + set_sbi_flag(F2FS_SB(sb), SBI_QUOTA_NEED_REPAIR); + up_read(&sbi->quota_sem); + f2fs_unlock_op(sbi); + return ret; } static int f2fs_quota_on(struct super_block *sb, int type, int format_id, @@ -1805,9 +1996,8 @@ static int f2fs_quota_on(struct super_block *sb, int type, int format_id, inode = d_inode(path->dentry); inode_lock(inode); - F2FS_I(inode)->i_flags |= FS_NOATIME_FL | FS_IMMUTABLE_FL; - inode_set_flags(inode, S_NOATIME | S_IMMUTABLE, - S_NOATIME | S_IMMUTABLE); + F2FS_I(inode)->i_flags |= F2FS_NOATIME_FL | F2FS_IMMUTABLE_FL; + f2fs_set_inode_flags(inode); inode_unlock(inode); f2fs_mark_inode_dirty_sync(inode, false); @@ -1822,15 +2012,17 @@ static int f2fs_quota_off(struct super_block *sb, int type) if (!inode || !igrab(inode)) return dquot_quota_off(sb, type); - f2fs_quota_sync(sb, type); + err = f2fs_quota_sync(sb, type); + if (err) + goto out_put; err = dquot_quota_off(sb, type); - if (err || f2fs_sb_has_quota_ino(sb)) + if (err || f2fs_sb_has_quota_ino(F2FS_SB(sb))) goto out_put; inode_lock(inode); - F2FS_I(inode)->i_flags &= ~(FS_NOATIME_FL | FS_IMMUTABLE_FL); - inode_set_flags(inode, 0, S_NOATIME | S_IMMUTABLE); + F2FS_I(inode)->i_flags &= ~(F2FS_NOATIME_FL | F2FS_IMMUTABLE_FL); + f2fs_set_inode_flags(inode); inode_unlock(inode); f2fs_mark_inode_dirty_sync(inode, false); out_put: @@ -1841,9 +2033,105 @@ out_put: void f2fs_quota_off_umount(struct super_block *sb) { int type; + int err; + + for (type = 0; type < MAXQUOTAS; type++) { + err = f2fs_quota_off(sb, type); + if (err) { + int ret = dquot_quota_off(sb, type); - for (type = 0; type < MAXQUOTAS; type++) - f2fs_quota_off(sb, type); + f2fs_err(F2FS_SB(sb), "Fail to turn off disk quota (type: %d, err: %d, ret:%d), Please run fsck to fix it.", + type, err, ret); + set_sbi_flag(F2FS_SB(sb), SBI_QUOTA_NEED_REPAIR); + } + } + /* + * In case of checkpoint=disable, we must flush quota blocks. + * This can cause NULL exception for node_inode in end_io, since + * put_super already dropped it. + */ + sync_filesystem(sb); +} + +static void f2fs_truncate_quota_inode_pages(struct super_block *sb) +{ + struct quota_info *dqopt = sb_dqopt(sb); + int type; + + for (type = 0; type < MAXQUOTAS; type++) { + if (!dqopt->files[type]) + continue; + f2fs_inode_synced(dqopt->files[type]); + } +} + +static int f2fs_dquot_commit(struct dquot *dquot) +{ + struct f2fs_sb_info *sbi = F2FS_SB(dquot->dq_sb); + int ret; + + down_read(&sbi->quota_sem); + ret = dquot_commit(dquot); + if (ret < 0) + set_sbi_flag(sbi, SBI_QUOTA_NEED_REPAIR); + up_read(&sbi->quota_sem); + return ret; +} + +static int f2fs_dquot_acquire(struct dquot *dquot) +{ + struct f2fs_sb_info *sbi = F2FS_SB(dquot->dq_sb); + int ret; + + down_read(&sbi->quota_sem); + ret = dquot_acquire(dquot); + if (ret < 0) + set_sbi_flag(sbi, SBI_QUOTA_NEED_REPAIR); + up_read(&sbi->quota_sem); + return ret; +} + +static int f2fs_dquot_release(struct dquot *dquot) +{ + struct f2fs_sb_info *sbi = F2FS_SB(dquot->dq_sb); + int ret; + + down_read(&sbi->quota_sem); + ret = dquot_release(dquot); + if (ret < 0) + set_sbi_flag(sbi, SBI_QUOTA_NEED_REPAIR); + up_read(&sbi->quota_sem); + return ret; +} + +static int f2fs_dquot_mark_dquot_dirty(struct dquot *dquot) +{ + struct super_block *sb = dquot->dq_sb; + struct f2fs_sb_info *sbi = F2FS_SB(sb); + int ret; + + down_read(&sbi->quota_sem); + ret = dquot_mark_dquot_dirty(dquot); + + /* if we are using journalled quota */ + if (is_journalled_quota(sbi)) + set_sbi_flag(sbi, SBI_QUOTA_NEED_FLUSH); + + up_read(&sbi->quota_sem); + return ret; +} + +static int f2fs_dquot_commit_info(struct super_block *sb, int type) +{ + struct f2fs_sb_info *sbi = F2FS_SB(sb); + int ret; + + down_read(&sbi->quota_sem); + ret = dquot_commit_info(sb, type); + if (ret < 0) + set_sbi_flag(sbi, SBI_QUOTA_NEED_REPAIR); + up_read(&sbi->quota_sem); + return ret; } #if 0 @@ -1856,11 +2144,11 @@ static int f2fs_get_projid(struct inode *inode, kprojid_t *projid) static const struct dquot_operations f2fs_quota_operations = { .get_reserved_space = f2fs_get_reserved_space, - .write_dquot = dquot_commit, - .acquire_dquot = dquot_acquire, - .release_dquot = dquot_release, - .mark_dirty = dquot_mark_dquot_dirty, - .write_info = dquot_commit_info, + .write_dquot = f2fs_dquot_commit, + .acquire_dquot = f2fs_dquot_acquire, + .release_dquot = f2fs_dquot_release, + .mark_dirty = f2fs_dquot_mark_dquot_dirty, + .write_info = f2fs_dquot_commit_info, .alloc_dquot = dquot_alloc, .destroy_dquot = dquot_destroy, #if 0 @@ -1879,6 +2167,11 @@ static const struct quotactl_ops f2fs_quotactl_ops = { .set_dqblk = dquot_set_dqblk, }; #else +int f2fs_quota_sync(struct super_block *sb, int type) +{ + return 0; +} + void f2fs_quota_off_umount(struct super_block *sb) { } @@ -1924,7 +2217,7 @@ static int f2fs_set_context(struct inode *inode, const void *ctx, size_t len, * if LOST_FOUND feature is enabled. * */ - if (f2fs_sb_has_lost_found(sbi->sb) && + if (f2fs_sb_has_lost_found(sbi) && inode->i_ino == F2FS_ROOT_INO(sbi)) return -EPERM; @@ -1938,19 +2231,13 @@ static bool f2fs_dummy_context(struct inode *inode) return DUMMY_ENCRYPTION_ENABLED(F2FS_I_SB(inode)); } -static unsigned f2fs_max_namelen(struct inode *inode) -{ - return S_ISLNK(inode->i_mode) ? - inode->i_sb->s_blocksize : F2FS_NAME_LEN; -} - static const struct fscrypt_operations f2fs_cryptops = { .key_prefix = "f2fs:", .get_context = f2fs_get_context, .set_context = f2fs_set_context, .dummy_context = f2fs_dummy_context, .empty_dir = f2fs_empty_dir, - .max_namelen = f2fs_max_namelen, + .max_namelen = F2FS_NAME_LEN, }; #endif @@ -1960,7 +2247,7 @@ static struct inode *f2fs_nfs_get_inode(struct super_block *sb, struct f2fs_sb_info *sbi = F2FS_SB(sb); struct inode *inode; - if (check_nid_range(sbi, ino)) + if (f2fs_check_nid_range(sbi, ino)) return ERR_PTR(-ESTALE); /* @@ -2002,7 +2289,7 @@ static const struct export_operations f2fs_export_ops = { static loff_t max_file_blocks(void) { loff_t result = 0; - loff_t leaf_count = ADDRS_PER_BLOCK; + loff_t leaf_count = DEF_ADDRS_PER_BLOCK; /* * note: previously, result is equal to (DEF_ADDRS_PER_INODE - @@ -2063,55 +2350,49 @@ static inline bool sanity_check_area_boundary(struct f2fs_sb_info *sbi, (segment_count << log_blocks_per_seg); if (segment0_blkaddr != cp_blkaddr) { - f2fs_msg(sb, KERN_INFO, - "Mismatch start address, segment0(%u) cp_blkaddr(%u)", - segment0_blkaddr, cp_blkaddr); + f2fs_info(sbi, "Mismatch start address, segment0(%u) cp_blkaddr(%u)", + segment0_blkaddr, cp_blkaddr); return true; } if (cp_blkaddr + (segment_count_ckpt << log_blocks_per_seg) != sit_blkaddr) { - f2fs_msg(sb, KERN_INFO, - "Wrong CP boundary, start(%u) end(%u) blocks(%u)", - cp_blkaddr, sit_blkaddr, - segment_count_ckpt << log_blocks_per_seg); + f2fs_info(sbi, "Wrong CP boundary, start(%u) end(%u) blocks(%u)", + cp_blkaddr, sit_blkaddr, + segment_count_ckpt << log_blocks_per_seg); return true; } if (sit_blkaddr + (segment_count_sit << log_blocks_per_seg) != nat_blkaddr) { - f2fs_msg(sb, KERN_INFO, - "Wrong SIT boundary, start(%u) end(%u) blocks(%u)", - sit_blkaddr, nat_blkaddr, - segment_count_sit << log_blocks_per_seg); + f2fs_info(sbi, "Wrong SIT boundary, start(%u) end(%u) blocks(%u)", + sit_blkaddr, nat_blkaddr, + segment_count_sit << log_blocks_per_seg); return true; } if (nat_blkaddr + (segment_count_nat << log_blocks_per_seg) != ssa_blkaddr) { - f2fs_msg(sb, KERN_INFO, - "Wrong NAT boundary, start(%u) end(%u) blocks(%u)", - nat_blkaddr, ssa_blkaddr, - segment_count_nat << log_blocks_per_seg); + f2fs_info(sbi, "Wrong NAT boundary, start(%u) end(%u) blocks(%u)", + nat_blkaddr, ssa_blkaddr, + segment_count_nat << log_blocks_per_seg); return true; } if (ssa_blkaddr + (segment_count_ssa << log_blocks_per_seg) != main_blkaddr) { - f2fs_msg(sb, KERN_INFO, - "Wrong SSA boundary, start(%u) end(%u) blocks(%u)", - ssa_blkaddr, main_blkaddr, - segment_count_ssa << log_blocks_per_seg); + f2fs_info(sbi, "Wrong SSA boundary, start(%u) end(%u) blocks(%u)", + ssa_blkaddr, main_blkaddr, + segment_count_ssa << log_blocks_per_seg); return true; } if (main_end_blkaddr > seg_end_blkaddr) { - f2fs_msg(sb, KERN_INFO, - "Wrong MAIN_AREA boundary, start(%u) end(%u) block(%u)", - main_blkaddr, - segment0_blkaddr + - (segment_count << log_blocks_per_seg), - segment_count_main << log_blocks_per_seg); + f2fs_info(sbi, "Wrong MAIN_AREA boundary, start(%u) end(%u) block(%u)", + main_blkaddr, + segment0_blkaddr + + (segment_count << log_blocks_per_seg), + segment_count_main << log_blocks_per_seg); return true; } else if (main_end_blkaddr < seg_end_blkaddr) { int err = 0; @@ -2128,12 +2409,11 @@ static inline bool sanity_check_area_boundary(struct f2fs_sb_info *sbi, err = __f2fs_commit_super(bh, NULL); res = err ? "failed" : "done"; } - f2fs_msg(sb, KERN_INFO, - "Fix alignment : %s, start(%u) end(%u) block(%u)", - res, main_blkaddr, - segment0_blkaddr + - (segment_count << log_blocks_per_seg), - segment_count_main << log_blocks_per_seg); + f2fs_info(sbi, "Fix alignment : %s, start(%u) end(%u) block(%u)", + res, main_blkaddr, + segment0_blkaddr + + (segment_count << log_blocks_per_seg), + segment_count_main << log_blocks_per_seg); if (err) return true; } @@ -2143,41 +2423,56 @@ static inline bool sanity_check_area_boundary(struct f2fs_sb_info *sbi, static int sanity_check_raw_super(struct f2fs_sb_info *sbi, struct buffer_head *bh) { + block_t segment_count, segs_per_sec, secs_per_zone; + block_t total_sections, blocks_per_seg; struct f2fs_super_block *raw_super = (struct f2fs_super_block *) (bh->b_data + F2FS_SUPER_OFFSET); - struct super_block *sb = sbi->sb; unsigned int blocksize; + size_t crc_offset = 0; + __u32 crc = 0; - if (F2FS_SUPER_MAGIC != le32_to_cpu(raw_super->magic)) { - f2fs_msg(sb, KERN_INFO, - "Magic Mismatch, valid(0x%x) - read(0x%x)", - F2FS_SUPER_MAGIC, le32_to_cpu(raw_super->magic)); - return 1; + if (le32_to_cpu(raw_super->magic) != F2FS_SUPER_MAGIC) { + f2fs_info(sbi, "Magic Mismatch, valid(0x%x) - read(0x%x)", + F2FS_SUPER_MAGIC, le32_to_cpu(raw_super->magic)); + return -EINVAL; + } + + /* Check checksum_offset and crc in superblock */ + if (__F2FS_HAS_FEATURE(raw_super, F2FS_FEATURE_SB_CHKSUM)) { + crc_offset = le32_to_cpu(raw_super->checksum_offset); + if (crc_offset != + offsetof(struct f2fs_super_block, crc)) { + f2fs_info(sbi, "Invalid SB checksum offset: %zu", + crc_offset); + return -EFSCORRUPTED; + } + crc = le32_to_cpu(raw_super->crc); + if (!f2fs_crc_valid(sbi, crc, raw_super, crc_offset)) { + f2fs_info(sbi, "Invalid SB checksum value: %u", crc); + return -EFSCORRUPTED; + } } /* Currently, support only 4KB page cache size */ if (F2FS_BLKSIZE != PAGE_SIZE) { - f2fs_msg(sb, KERN_INFO, - "Invalid page_cache_size (%lu), supports only 4KB\n", - PAGE_SIZE); - return 1; + f2fs_info(sbi, "Invalid page_cache_size (%lu), supports only 4KB", + PAGE_SIZE); + return -EFSCORRUPTED; } /* Currently, support only 4KB block size */ blocksize = 1 << le32_to_cpu(raw_super->log_blocksize); if (blocksize != F2FS_BLKSIZE) { - f2fs_msg(sb, KERN_INFO, - "Invalid blocksize (%u), supports only 4KB\n", - blocksize); - return 1; + f2fs_info(sbi, "Invalid blocksize (%u), supports only 4KB", + blocksize); + return -EFSCORRUPTED; } /* check log blocks per segment */ if (le32_to_cpu(raw_super->log_blocks_per_seg) != 9) { - f2fs_msg(sb, KERN_INFO, - "Invalid log blocks per segment (%u)\n", - le32_to_cpu(raw_super->log_blocks_per_seg)); - return 1; + f2fs_info(sbi, "Invalid log blocks per segment (%u)", + le32_to_cpu(raw_super->log_blocks_per_seg)); + return -EFSCORRUPTED; } /* Currently, support 512/1024/2048/4096 bytes sector size */ @@ -2185,47 +2480,102 @@ static int sanity_check_raw_super(struct f2fs_sb_info *sbi, F2FS_MAX_LOG_SECTOR_SIZE || le32_to_cpu(raw_super->log_sectorsize) < F2FS_MIN_LOG_SECTOR_SIZE) { - f2fs_msg(sb, KERN_INFO, "Invalid log sectorsize (%u)", - le32_to_cpu(raw_super->log_sectorsize)); - return 1; + f2fs_info(sbi, "Invalid log sectorsize (%u)", + le32_to_cpu(raw_super->log_sectorsize)); + return -EFSCORRUPTED; } if (le32_to_cpu(raw_super->log_sectors_per_block) + le32_to_cpu(raw_super->log_sectorsize) != F2FS_MAX_LOG_SECTOR_SIZE) { - f2fs_msg(sb, KERN_INFO, - "Invalid log sectors per block(%u) log sectorsize(%u)", - le32_to_cpu(raw_super->log_sectors_per_block), - le32_to_cpu(raw_super->log_sectorsize)); - return 1; + f2fs_info(sbi, "Invalid log sectors per block(%u) log sectorsize(%u)", + le32_to_cpu(raw_super->log_sectors_per_block), + le32_to_cpu(raw_super->log_sectorsize)); + return -EFSCORRUPTED; + } + + segment_count = le32_to_cpu(raw_super->segment_count); + segs_per_sec = le32_to_cpu(raw_super->segs_per_sec); + secs_per_zone = le32_to_cpu(raw_super->secs_per_zone); + total_sections = le32_to_cpu(raw_super->section_count); + + /* blocks_per_seg should be 512, given the above check */ + blocks_per_seg = 1 << le32_to_cpu(raw_super->log_blocks_per_seg); + + if (segment_count > F2FS_MAX_SEGMENT || + segment_count < F2FS_MIN_SEGMENTS) { + f2fs_info(sbi, "Invalid segment count (%u)", segment_count); + return -EFSCORRUPTED; + } + + if (total_sections > segment_count || + total_sections < F2FS_MIN_SEGMENTS || + segs_per_sec > segment_count || !segs_per_sec) { + f2fs_info(sbi, "Invalid segment/section count (%u, %u x %u)", + segment_count, total_sections, segs_per_sec); + return -EFSCORRUPTED; + } + + if ((segment_count / segs_per_sec) < total_sections) { + f2fs_info(sbi, "Small segment_count (%u < %u * %u)", + segment_count, segs_per_sec, total_sections); + return -EFSCORRUPTED; + } + + if (segment_count > (le64_to_cpu(raw_super->block_count) >> 9)) { + f2fs_info(sbi, "Wrong segment_count / block_count (%u > %llu)", + segment_count, le64_to_cpu(raw_super->block_count)); + return -EFSCORRUPTED; + } + + if (secs_per_zone > total_sections || !secs_per_zone) { + f2fs_info(sbi, "Wrong secs_per_zone / total_sections (%u, %u)", + secs_per_zone, total_sections); + return -EFSCORRUPTED; + } + if (le32_to_cpu(raw_super->extension_count) > F2FS_MAX_EXTENSION || + raw_super->hot_ext_count > F2FS_MAX_EXTENSION || + (le32_to_cpu(raw_super->extension_count) + + raw_super->hot_ext_count) > F2FS_MAX_EXTENSION) { + f2fs_info(sbi, "Corrupted extension count (%u + %u > %u)", + le32_to_cpu(raw_super->extension_count), + raw_super->hot_ext_count, + F2FS_MAX_EXTENSION); + return -EFSCORRUPTED; + } + + if (le32_to_cpu(raw_super->cp_payload) > + (blocks_per_seg - F2FS_CP_PACKS)) { + f2fs_info(sbi, "Insane cp_payload (%u > %u)", + le32_to_cpu(raw_super->cp_payload), + blocks_per_seg - F2FS_CP_PACKS); + return -EFSCORRUPTED; } /* check reserved ino info */ if (le32_to_cpu(raw_super->node_ino) != 1 || le32_to_cpu(raw_super->meta_ino) != 2 || le32_to_cpu(raw_super->root_ino) != 3) { - f2fs_msg(sb, KERN_INFO, - "Invalid Fs Meta Ino: node(%u) meta(%u) root(%u)", - le32_to_cpu(raw_super->node_ino), - le32_to_cpu(raw_super->meta_ino), - le32_to_cpu(raw_super->root_ino)); - return 1; + f2fs_info(sbi, "Invalid Fs Meta Ino: node(%u) meta(%u) root(%u)", + le32_to_cpu(raw_super->node_ino), + le32_to_cpu(raw_super->meta_ino), + le32_to_cpu(raw_super->root_ino)); + return -EFSCORRUPTED; } if (le32_to_cpu(raw_super->segment_count) > F2FS_MAX_SEGMENT) { - f2fs_msg(sb, KERN_INFO, - "Invalid segment count (%u)", + f2fs_info(sbi, "Invalid segment count (%u)", le32_to_cpu(raw_super->segment_count)); return 1; } /* check CP/SIT/NAT/SSA/MAIN_AREA area boundary */ if (sanity_check_area_boundary(sbi, bh)) - return 1; + return -EFSCORRUPTED; return 0; } -int sanity_check_ckpt(struct f2fs_sb_info *sbi) +int f2fs_sanity_check_ckpt(struct f2fs_sb_info *sbi) { unsigned int total, fsmeta; struct f2fs_super_block *raw_super = F2FS_RAW_SUPER(sbi); @@ -2235,6 +2585,10 @@ int sanity_check_ckpt(struct f2fs_sb_info *sbi) unsigned int sit_segs, nat_segs; unsigned int sit_bitmap_size, nat_bitmap_size; unsigned int log_blocks_per_seg; + unsigned int segment_count_main; + unsigned int cp_pack_start_sum, cp_payload; + block_t user_block_count, valid_user_blocks; + block_t avail_node_count, valid_node_count; int i, j; total = le32_to_cpu(raw_super->segment_count); @@ -2254,8 +2608,33 @@ int sanity_check_ckpt(struct f2fs_sb_info *sbi) if (unlikely(fsmeta < F2FS_MIN_SEGMENTS || ovp_segments == 0 || reserved_segments == 0)) { - f2fs_msg(sbi->sb, KERN_ERR, - "Wrong layout: check mkfs.f2fs version"); + f2fs_err(sbi, "Wrong layout: check mkfs.f2fs version"); + return 1; + } + + user_block_count = le64_to_cpu(ckpt->user_block_count); + segment_count_main = le32_to_cpu(raw_super->segment_count_main); + log_blocks_per_seg = le32_to_cpu(raw_super->log_blocks_per_seg); + if (!user_block_count || user_block_count >= + segment_count_main << log_blocks_per_seg) { + f2fs_err(sbi, "Wrong user_block_count: %u", + user_block_count); + return 1; + } + + valid_user_blocks = le64_to_cpu(ckpt->valid_block_count); + if (valid_user_blocks > user_block_count) { + f2fs_err(sbi, "Wrong valid_user_blocks: %u, user_block_count: %u", + valid_user_blocks, user_block_count); + return 1; + } + + valid_node_count = le32_to_cpu(ckpt->valid_node_count); + avail_node_count = sbi->total_node_count - sbi->nquota_files - + F2FS_RESERVED_NODE_NUM; + if (valid_node_count > avail_node_count) { + f2fs_err(sbi, "Wrong valid_node_count: %u, avail_node_count: %u", + valid_node_count, avail_node_count); return 1; } @@ -2269,10 +2648,9 @@ int sanity_check_ckpt(struct f2fs_sb_info *sbi) for (j = i + 1; j < NR_CURSEG_NODE_TYPE; j++) { if (le32_to_cpu(ckpt->cur_node_segno[i]) == le32_to_cpu(ckpt->cur_node_segno[j])) { - f2fs_msg(sbi->sb, KERN_ERR, - "Node segment (%u, %u) has the same " - "segno: %u", i, j, - le32_to_cpu(ckpt->cur_node_segno[i])); + f2fs_err(sbi, "Node segment (%u, %u) has the same segno: %u", + i, j, + le32_to_cpu(ckpt->cur_node_segno[i])); return 1; } } @@ -2284,10 +2662,9 @@ int sanity_check_ckpt(struct f2fs_sb_info *sbi) for (j = i + 1; j < NR_CURSEG_DATA_TYPE; j++) { if (le32_to_cpu(ckpt->cur_data_segno[i]) == le32_to_cpu(ckpt->cur_data_segno[j])) { - f2fs_msg(sbi->sb, KERN_ERR, - "Data segment (%u, %u) has the same " - "segno: %u", i, j, - le32_to_cpu(ckpt->cur_data_segno[i])); + f2fs_err(sbi, "Data segment (%u, %u) has the same segno: %u", + i, j, + le32_to_cpu(ckpt->cur_data_segno[i])); return 1; } } @@ -2296,7 +2673,7 @@ int sanity_check_ckpt(struct f2fs_sb_info *sbi) for (j = 0; j < NR_CURSEG_DATA_TYPE; j++) { if (le32_to_cpu(ckpt->cur_node_segno[i]) == le32_to_cpu(ckpt->cur_data_segno[j])) { - f2fs_msg(sbi->sb, KERN_ERR, + f2fs_err(sbi, "Node segment (%u) and Data segment (%u)" " has the same segno: %u", i, j, le32_to_cpu(ckpt->cur_node_segno[i])); @@ -2311,14 +2688,32 @@ int sanity_check_ckpt(struct f2fs_sb_info *sbi) if (sit_bitmap_size != ((sit_segs / 2) << log_blocks_per_seg) / 8 || nat_bitmap_size != ((nat_segs / 2) << log_blocks_per_seg) / 8) { - f2fs_msg(sbi->sb, KERN_ERR, - "Wrong bitmap size: sit: %u, nat:%u", - sit_bitmap_size, nat_bitmap_size); + f2fs_err(sbi, "Wrong bitmap size: sit: %u, nat:%u", + sit_bitmap_size, nat_bitmap_size); + return 1; + } + + cp_pack_start_sum = __start_sum_addr(sbi); + cp_payload = __cp_payload(sbi); + if (cp_pack_start_sum < cp_payload + 1 || + cp_pack_start_sum > blocks_per_seg - 1 - + NR_CURSEG_TYPE) { + f2fs_err(sbi, "Wrong cp_pack_start_sum: %u", + cp_pack_start_sum); + return 1; + } + + if (__is_set_ckpt_flags(ckpt, CP_LARGE_NAT_BITMAP_FLAG) && + le32_to_cpu(ckpt->checksum_offset) != CP_MIN_CHKSUM_OFFSET) { + f2fs_warn(sbi, "using deprecated layout of large_nat_bitmap, " + "please run fsck v1.13.0 or higher to repair, chksum_offset: %u, " + "fixed with patch: \"f2fs-tools: relocate chksum_offset for large_nat_bitmap feature\"", + le32_to_cpu(ckpt->checksum_offset)); return 1; } if (unlikely(f2fs_cp_error(sbi))) { - f2fs_msg(sbi->sb, KERN_ERR, "A bug case: need to run fsck"); + f2fs_err(sbi, "A bug case: need to run fsck"); return 1; } return 0; @@ -2327,7 +2722,7 @@ int sanity_check_ckpt(struct f2fs_sb_info *sbi) static void init_sb_info(struct f2fs_sb_info *sbi) { struct f2fs_super_block *raw_super = sbi->raw_super; - int i, j; + int i; sbi->log_sectors_per_block = le32_to_cpu(raw_super->log_sectors_per_block); @@ -2345,23 +2740,30 @@ static void init_sb_info(struct f2fs_sb_info *sbi) sbi->node_ino_num = le32_to_cpu(raw_super->node_ino); sbi->meta_ino_num = le32_to_cpu(raw_super->meta_ino); sbi->cur_victim_sec = NULL_SECNO; + sbi->next_victim_seg[BG_GC] = NULL_SEGNO; + sbi->next_victim_seg[FG_GC] = NULL_SEGNO; sbi->max_victim_search = DEF_MAX_VICTIM_SEARCH; + sbi->migration_granularity = sbi->segs_per_sec; sbi->dir_level = DEF_DIR_LEVEL; sbi->interval_time[CP_TIME] = DEF_CP_INTERVAL; sbi->interval_time[REQ_TIME] = DEF_IDLE_INTERVAL; + sbi->interval_time[DISCARD_TIME] = DEF_IDLE_INTERVAL; + sbi->interval_time[GC_TIME] = DEF_IDLE_INTERVAL; + sbi->interval_time[DISABLE_TIME] = DEF_DISABLE_INTERVAL; + sbi->interval_time[UMOUNT_DISCARD_TIMEOUT] = + DEF_UMOUNT_DISCARD_TIMEOUT; clear_sbi_flag(sbi, SBI_NEED_FSCK); for (i = 0; i < NR_COUNT_TYPE; i++) atomic_set(&sbi->nr_pages[i], 0); - atomic_set(&sbi->wb_sync_req, 0); + for (i = 0; i < META; i++) + atomic_set(&sbi->wb_sync_req[i], 0); INIT_LIST_HEAD(&sbi->s_list); mutex_init(&sbi->umount_mutex); - for (i = 0; i < NR_PAGE_TYPE - 1; i++) - for (j = HOT; j < NR_TEMP_TYPE; j++) - mutex_init(&sbi->wio_mutex[i][j]); + init_rwsem(&sbi->io_order_lock); spin_lock_init(&sbi->cp_lock); sbi->dirty_device = 0; @@ -2378,8 +2780,12 @@ static int init_percpu_info(struct f2fs_sb_info *sbi) if (err) return err; - return percpu_counter_init(&sbi->total_valid_inode_count, 0, + err = percpu_counter_init(&sbi->total_valid_inode_count, 0, GFP_KERNEL); + if (err) + percpu_counter_destroy(&sbi->alloc_valid_block_count); + + return err; } #ifdef CONFIG_BLK_DEV_ZONED @@ -2393,7 +2799,7 @@ static int init_blkz_info(struct f2fs_sb_info *sbi, int devi) unsigned int n = 0; int err = -EIO; - if (!f2fs_sb_has_blkzoned(sbi->sb)) + if (!f2fs_sb_has_blkzoned(sbi)) return 0; if (sbi->blocks_per_blkz && sbi->blocks_per_blkz != @@ -2409,15 +2815,19 @@ static int init_blkz_info(struct f2fs_sb_info *sbi, int devi) if (nr_sectors & (bdev_zone_sectors(bdev) - 1)) FDEV(devi).nr_blkz++; - FDEV(devi).blkz_type = f2fs_kmalloc(sbi, FDEV(devi).nr_blkz, - GFP_KERNEL); - if (!FDEV(devi).blkz_type) + FDEV(devi).blkz_seq = f2fs_kzalloc(sbi, + BITS_TO_LONGS(FDEV(devi).nr_blkz) + * sizeof(unsigned long), + GFP_KERNEL); + if (!FDEV(devi).blkz_seq) return -ENOMEM; #define F2FS_REPORT_NR_ZONES 4096 - zones = f2fs_kzalloc(sbi, sizeof(struct blk_zone) * - F2FS_REPORT_NR_ZONES, GFP_KERNEL); + zones = f2fs_kzalloc(sbi, + array_size(F2FS_REPORT_NR_ZONES, + sizeof(struct blk_zone)), + GFP_KERNEL); if (!zones) return -ENOMEM; @@ -2436,13 +2846,14 @@ static int init_blkz_info(struct f2fs_sb_info *sbi, int devi) } for (i = 0; i < nr_zones; i++) { - FDEV(devi).blkz_type[n] = zones[i].type; + if (zones[i].type != BLK_ZONE_TYPE_CONVENTIONAL) + set_bit(n, FDEV(devi).blkz_seq); sector += zones[i].len; n++; } } - kfree(zones); + kvfree(zones); return err; } @@ -2471,18 +2882,17 @@ static int read_raw_super_block(struct f2fs_sb_info *sbi, for (block = 0; block < 2; block++) { bh = sb_bread(sb, block); if (!bh) { - f2fs_msg(sb, KERN_ERR, "Unable to read %dth superblock", - block + 1); + f2fs_err(sbi, "Unable to read %dth superblock", + block + 1); err = -EIO; continue; } /* sanity checking of raw super */ - if (sanity_check_raw_super(sbi, bh)) { - f2fs_msg(sb, KERN_ERR, - "Can't find valid F2FS filesystem in %dth superblock", - block + 1); - err = -EINVAL; + err = sanity_check_raw_super(sbi, bh); + if (err) { + f2fs_err(sbi, "Can't find valid F2FS filesystem in %dth superblock", + block + 1); brelse(bh); continue; } @@ -2502,7 +2912,7 @@ static int read_raw_super_block(struct f2fs_sb_info *sbi, /* No valid superblock */ if (!*raw_super) - kfree(super); + kvfree(super); else err = 0; @@ -2512,6 +2922,7 @@ static int read_raw_super_block(struct f2fs_sb_info *sbi, int f2fs_commit_super(struct f2fs_sb_info *sbi, bool recover) { struct buffer_head *bh; + __u32 crc = 0; int err; if ((recover && f2fs_readonly(sbi->sb)) || @@ -2520,6 +2931,13 @@ int f2fs_commit_super(struct f2fs_sb_info *sbi, bool recover) return -EROFS; } + /* we should update superblock crc here */ + if (!recover && f2fs_sb_has_sb_chksum(sbi)) { + crc = f2fs_crc32(sbi, F2FS_RAW_SUPER(sbi), + offsetof(struct f2fs_super_block, crc)); + F2FS_RAW_SUPER(sbi)->crc = cpu_to_le32(crc); + } + /* write back-up superblock first */ bh = sb_bread(sbi->sb, sbi->valid_super_block ? 0 : 1); if (!bh) @@ -2561,8 +2979,10 @@ static int f2fs_scan_devices(struct f2fs_sb_info *sbi) * Initialize multiple devices information, or single * zoned block device information. */ - sbi->devs = f2fs_kzalloc(sbi, sizeof(struct f2fs_dev_info) * - max_devices, GFP_KERNEL); + sbi->devs = f2fs_kzalloc(sbi, + array_size(max_devices, + sizeof(struct f2fs_dev_info)), + GFP_KERNEL); if (!sbi->devs) return -ENOMEM; @@ -2604,37 +3024,33 @@ static int f2fs_scan_devices(struct f2fs_sb_info *sbi) #ifdef CONFIG_BLK_DEV_ZONED if (bdev_zoned_model(FDEV(i).bdev) == BLK_ZONED_HM && - !f2fs_sb_has_blkzoned(sbi->sb)) { - f2fs_msg(sbi->sb, KERN_ERR, - "Zoned block device feature not enabled\n"); + !f2fs_sb_has_blkzoned(sbi)) { + f2fs_err(sbi, "Zoned block device feature not enabled\n"); return -EINVAL; } if (bdev_zoned_model(FDEV(i).bdev) != BLK_ZONED_NONE) { if (init_blkz_info(sbi, i)) { - f2fs_msg(sbi->sb, KERN_ERR, - "Failed to initialize F2FS blkzone information"); + f2fs_err(sbi, "Failed to initialize F2FS blkzone information"); return -EINVAL; } if (max_devices == 1) break; - f2fs_msg(sbi->sb, KERN_INFO, - "Mount Device [%2d]: %20s, %8u, %8x - %8x (zone: %s)", - i, FDEV(i).path, - FDEV(i).total_segments, - FDEV(i).start_blk, FDEV(i).end_blk, - bdev_zoned_model(FDEV(i).bdev) == BLK_ZONED_HA ? - "Host-aware" : "Host-managed"); + f2fs_info(sbi, "Mount Device [%2d]: %20s, %8u, %8x - %8x (zone: %s)", + i, FDEV(i).path, + FDEV(i).total_segments, + FDEV(i).start_blk, FDEV(i).end_blk, + bdev_zoned_model(FDEV(i).bdev) == BLK_ZONED_HA ? + "Host-aware" : "Host-managed"); continue; } #endif - f2fs_msg(sbi->sb, KERN_INFO, - "Mount Device [%2d]: %20s, %8u, %8x - %8x", - i, FDEV(i).path, - FDEV(i).total_segments, - FDEV(i).start_blk, FDEV(i).end_blk); - } - f2fs_msg(sbi->sb, KERN_INFO, - "IO Block Size: %8d KB", F2FS_IO_SIZE_KB(sbi)); + f2fs_info(sbi, "Mount Device [%2d]: %20s, %8u, %8x - %8x", + i, FDEV(i).path, + FDEV(i).total_segments, + FDEV(i).start_blk, FDEV(i).end_blk); + } + f2fs_info(sbi, + "IO Block Size: %8d KB", F2FS_IO_SIZE_KB(sbi)); return 0; } @@ -2648,6 +3064,8 @@ static void f2fs_tuning_parameters(struct f2fs_sb_info *sbi) sm_i->dcc_info->discard_granularity = 1; sm_i->ipu_policy = 1 << F2FS_IPU_FORCE; } + + sbi->readdir_ra = 1; } static int f2fs_fill_super(struct super_block *sb, void *data, int silent) @@ -2656,10 +3074,11 @@ static int f2fs_fill_super(struct super_block *sb, void *data, int silent) struct f2fs_super_block *raw_super; struct inode *root; int err; - bool retry = true, need_fsck = false; + bool skip_recovery = false, need_fsck = false; char *options = NULL; int recovery, i, valid_super_block; struct curseg_info *seg_i; + int retry_cnt = 1; try_onemore: err = -EINVAL; @@ -2677,7 +3096,7 @@ try_onemore: /* Load the checksum driver */ sbi->s_chksum_driver = crypto_alloc_shash("crc32", 0, 0); if (IS_ERR(sbi->s_chksum_driver)) { - f2fs_msg(sb, KERN_ERR, "Cannot load crc32 driver."); + f2fs_err(sbi, "Cannot load crc32 driver."); err = PTR_ERR(sbi->s_chksum_driver); sbi->s_chksum_driver = NULL; goto free_sbi; @@ -2685,7 +3104,7 @@ try_onemore: /* set a block size */ if (unlikely(!sb_set_blocksize(sb, F2FS_BLKSIZE))) { - f2fs_msg(sb, KERN_ERR, "unable to set blocksize"); + f2fs_err(sbi, "unable to set blocksize"); goto free_sbi; } @@ -2697,11 +3116,8 @@ try_onemore: sb->s_fs_info = sbi; sbi->raw_super = raw_super; - F2FS_OPTION(sbi).s_resuid = make_kuid(&init_user_ns, F2FS_DEF_RESUID); - F2FS_OPTION(sbi).s_resgid = make_kgid(&init_user_ns, F2FS_DEF_RESGID); - /* precompute checksum seed for metadata */ - if (f2fs_sb_has_inode_chksum(sb)) + if (f2fs_sb_has_inode_chksum(sbi)) sbi->s_chksum_seed = f2fs_chksum(sbi, ~0, raw_super->uuid, sizeof(raw_super->uuid)); @@ -2711,9 +3127,8 @@ try_onemore: * devices, but mandatory for host-managed zoned block devices. */ #ifndef CONFIG_BLK_DEV_ZONED - if (f2fs_sb_has_blkzoned(sb)) { - f2fs_msg(sb, KERN_ERR, - "Zoned block device support is not enabled\n"); + if (f2fs_sb_has_blkzoned(sbi)) { + f2fs_err(sbi, "Zoned block device support is not enabled"); err = -EOPNOTSUPP; goto free_sb_buf; } @@ -2734,17 +3149,13 @@ try_onemore: sb->s_maxbytes = sbi->max_file_blocks << le32_to_cpu(raw_super->log_blocksize); sb->s_max_links = F2FS_LINK_MAX; - get_random_bytes(&sbi->s_next_generation, sizeof(u32)); #ifdef CONFIG_QUOTA sb->dq_op = &f2fs_quota_operations; - if (f2fs_sb_has_quota_ino(sb)) - sb->s_qcop = &dquot_quotactl_sysfile_ops; - else - sb->s_qcop = &f2fs_quotactl_ops; + sb->s_qcop = &f2fs_quotactl_ops; sb->s_quota_types = QTYPE_MASK_USR | QTYPE_MASK_GRP | QTYPE_MASK_PRJ; - if (f2fs_sb_has_quota_ino(sbi->sb)) { + if (f2fs_sb_has_quota_ino(sbi)) { for (i = 0; i < MAXQUOTAS; i++) { if (f2fs_qf_ino(sbi->sb, i)) sbi->nquota_files++; @@ -2768,7 +3179,9 @@ try_onemore: /* init f2fs-specific super block info */ sbi->valid_super_block = valid_super_block; mutex_init(&sbi->gc_mutex); + mutex_init(&sbi->writepages); mutex_init(&sbi->cp_mutex); + mutex_init(&sbi->resize_mutex); init_rwsem(&sbi->node_write); init_rwsem(&sbi->node_change); @@ -2784,12 +3197,14 @@ try_onemore: int n = (i == META) ? 1: NR_TEMP_TYPE; int j; - sbi->write_io[i] = f2fs_kmalloc(sbi, - n * sizeof(struct f2fs_bio_info), - GFP_KERNEL); + sbi->write_io[i] = + f2fs_kmalloc(sbi, + array_size(n, + sizeof(struct f2fs_bio_info)), + GFP_KERNEL); if (!sbi->write_io[i]) { err = -ENOMEM; - goto free_options; + goto free_bio_info; } for (j = HOT; j < n; j++) { @@ -2802,6 +3217,7 @@ try_onemore: } init_rwsem(&sbi->cp_rwsem); + init_rwsem(&sbi->quota_sem); init_waitqueue_head(&sbi->cp_wait); init_sb_info(sbi); @@ -2821,21 +3237,31 @@ try_onemore: /* get an inode for meta space */ sbi->meta_inode = f2fs_iget(sb, F2FS_META_INO(sbi)); if (IS_ERR(sbi->meta_inode)) { - f2fs_msg(sb, KERN_ERR, "Failed to read F2FS meta data inode"); + f2fs_err(sbi, "Failed to read F2FS meta data inode"); err = PTR_ERR(sbi->meta_inode); goto free_io_dummy; } - err = get_valid_checkpoint(sbi); + err = f2fs_get_valid_checkpoint(sbi); if (err) { - f2fs_msg(sb, KERN_ERR, "Failed to get valid F2FS checkpoint"); + f2fs_err(sbi, "Failed to get valid F2FS checkpoint"); goto free_meta_inode; } + if (__is_set_ckpt_flags(F2FS_CKPT(sbi), CP_QUOTA_NEED_FSCK_FLAG)) + set_sbi_flag(sbi, SBI_QUOTA_NEED_REPAIR); + if (__is_set_ckpt_flags(F2FS_CKPT(sbi), CP_DISABLED_QUICK_FLAG)) { + set_sbi_flag(sbi, SBI_CP_DISABLED_QUICK); + sbi->interval_time[DISABLE_TIME] = DEF_DISABLE_QUICK_INTERVAL; + } + + if (__is_set_ckpt_flags(F2FS_CKPT(sbi), CP_FSCK_FLAG)) + set_sbi_flag(sbi, SBI_NEED_FSCK); + /* Initialize device list */ err = f2fs_scan_devices(sbi); if (err) { - f2fs_msg(sb, KERN_ERR, "Failed to find devices"); + f2fs_err(sbi, "Failed to find devices"); goto free_devices; } @@ -2855,22 +3281,25 @@ try_onemore: INIT_LIST_HEAD(&sbi->inode_list[i]); spin_lock_init(&sbi->inode_lock[i]); } + mutex_init(&sbi->flush_lock); + + f2fs_init_extent_cache_info(sbi); - init_extent_cache_info(sbi); + f2fs_init_ino_entry_info(sbi); - init_ino_entry_info(sbi); + f2fs_init_fsync_node_info(sbi); /* setup f2fs internal modules */ - err = build_segment_manager(sbi); + err = f2fs_build_segment_manager(sbi); if (err) { - f2fs_msg(sb, KERN_ERR, - "Failed to initialize F2FS segment manager"); + f2fs_err(sbi, "Failed to initialize F2FS segment manager (%d)", + err); goto free_sm; } - err = build_node_manager(sbi); + err = f2fs_build_node_manager(sbi); if (err) { - f2fs_msg(sb, KERN_ERR, - "Failed to initialize F2FS node manager"); + f2fs_err(sbi, "Failed to initialize F2FS node manager (%d)", + err); goto free_nm; } @@ -2885,28 +3314,29 @@ try_onemore: sbi->kbytes_written = le64_to_cpu(seg_i->journal->info.kbytes_written); - build_gc_manager(sbi); + f2fs_build_gc_manager(sbi); + + err = f2fs_build_stats(sbi); + if (err) + goto free_nm; /* get an inode for node space */ sbi->node_inode = f2fs_iget(sb, F2FS_NODE_INO(sbi)); if (IS_ERR(sbi->node_inode)) { - f2fs_msg(sb, KERN_ERR, "Failed to read node inode"); + f2fs_err(sbi, "Failed to read node inode"); err = PTR_ERR(sbi->node_inode); - goto free_nm; + goto free_stats; } - err = f2fs_build_stats(sbi); - if (err) - goto free_node_inode; - /* read root inode and dentry */ root = f2fs_iget(sb, F2FS_ROOT_INO(sbi)); if (IS_ERR(root)) { - f2fs_msg(sb, KERN_ERR, "Failed to read root inode"); + f2fs_err(sbi, "Failed to read root inode"); err = PTR_ERR(root); - goto free_stats; + goto free_node_inode; } - if (!S_ISDIR(root->i_mode) || !root->i_blocks || !root->i_size) { + if (!S_ISDIR(root->i_mode) || !root->i_blocks || + !root->i_size || !root->i_nlink) { iput(root); err = -EINVAL; goto free_node_inode; @@ -2915,7 +3345,7 @@ try_onemore: sb->s_root = d_make_root(root); /* allocate root dentry */ if (!sb->s_root) { err = -ENOMEM; - goto free_root_inode; + goto free_node_inode; } err = f2fs_register_sysfs(sbi); @@ -2923,152 +3353,168 @@ try_onemore: goto free_root_inode; #ifdef CONFIG_QUOTA - /* - * Turn on quotas which were not enabled for read-only mounts if - * filesystem has quota feature, so that they are updated correctly. - */ - if (f2fs_sb_has_quota_ino(sb) && !f2fs_readonly(sb)) { + /* Enable quota usage during mount */ + if (f2fs_sb_has_quota_ino(sbi) && !f2fs_readonly(sb)) { err = f2fs_enable_quotas(sb); - if (err) { - f2fs_msg(sb, KERN_ERR, - "Cannot turn on quotas: error %d", err); - goto free_sysfs; - } + if (err) + f2fs_err(sbi, "Cannot turn on quotas: error %d", err); } #endif /* if there are nt orphan nodes free them */ - err = recover_orphan_inodes(sbi); + err = f2fs_recover_orphan_inodes(sbi); if (err) goto free_meta; + if (unlikely(is_set_ckpt_flags(sbi, CP_DISABLED_FLAG))) + goto reset_checkpoint; + /* recover fsynced data */ if (!test_opt(sbi, DISABLE_ROLL_FORWARD)) { /* * mount should be failed, when device has readonly mode, and * previous checkpoint was not done by clean system shutdown. */ - if (bdev_read_only(sb->s_bdev) && - !is_set_ckpt_flags(sbi, CP_UMOUNT_FLAG)) { - err = -EROFS; - goto free_meta; + if (f2fs_hw_is_readonly(sbi)) { + if (!is_set_ckpt_flags(sbi, CP_UMOUNT_FLAG)) { + err = -EROFS; + f2fs_err(sbi, "Need to recover fsync data, but write access unavailable"); + goto free_meta; + } + f2fs_info(sbi, "write access unavailable, skipping recovery"); + goto reset_checkpoint; } if (need_fsck) set_sbi_flag(sbi, SBI_NEED_FSCK); - if (!retry) - goto skip_recovery; + if (skip_recovery) + goto reset_checkpoint; - err = recover_fsync_data(sbi, false); + err = f2fs_recover_fsync_data(sbi, false); if (err < 0) { + if (err != -ENOMEM) + skip_recovery = true; need_fsck = true; - f2fs_msg(sb, KERN_ERR, - "Cannot recover all fsync data errno=%d", err); + f2fs_err(sbi, "Cannot recover all fsync data errno=%d", + err); goto free_meta; } } else { - err = recover_fsync_data(sbi, true); + err = f2fs_recover_fsync_data(sbi, true); if (!f2fs_readonly(sb) && err > 0) { err = -EINVAL; - f2fs_msg(sb, KERN_ERR, - "Need to recover fsync data"); + f2fs_err(sbi, "Need to recover fsync data"); goto free_meta; } } -skip_recovery: - /* recover_fsync_data() cleared this already */ +reset_checkpoint: + /* f2fs_recover_fsync_data() cleared this already */ clear_sbi_flag(sbi, SBI_POR_DOING); + if (test_opt(sbi, DISABLE_CHECKPOINT)) { + err = f2fs_disable_checkpoint(sbi); + if (err) + goto sync_free_meta; + } else if (is_set_ckpt_flags(sbi, CP_DISABLED_FLAG)) { + f2fs_enable_checkpoint(sbi); + } + /* * If filesystem is not mounted as read-only then * do start the gc_thread. */ if (test_opt(sbi, BG_GC) && !f2fs_readonly(sb)) { /* After POR, we can run background GC thread.*/ - err = start_gc_thread(sbi); + err = f2fs_start_gc_thread(sbi); if (err) - goto free_meta; + goto sync_free_meta; } - kfree(options); + kvfree(options); /* recover broken superblock */ if (recovery) { err = f2fs_commit_super(sbi, true); - f2fs_msg(sb, KERN_INFO, - "Try to recover %dth superblock, ret: %d", - sbi->valid_super_block ? 1 : 2, err); + f2fs_info(sbi, "Try to recover %dth superblock, ret: %d", + sbi->valid_super_block ? 1 : 2, err); } f2fs_join_shrinker(sbi); f2fs_tuning_parameters(sbi); - f2fs_msg(sbi->sb, KERN_NOTICE, "Mounted with checkpoint version = %llx", - cur_cp_version(F2FS_CKPT(sbi))); + f2fs_notice(sbi, "Mounted with checkpoint version = %llx", + cur_cp_version(F2FS_CKPT(sbi))); f2fs_update_time(sbi, CP_TIME); f2fs_update_time(sbi, REQ_TIME); + clear_sbi_flag(sbi, SBI_CP_DISABLED_QUICK); return 0; +sync_free_meta: + /* safe to flush all the data */ + sync_filesystem(sbi->sb); + retry_cnt = 0; + free_meta: #ifdef CONFIG_QUOTA - if (f2fs_sb_has_quota_ino(sb) && !f2fs_readonly(sb)) + f2fs_truncate_quota_inode_pages(sb); + if (f2fs_sb_has_quota_ino(sbi) && !f2fs_readonly(sb)) f2fs_quota_off_umount(sbi->sb); #endif - f2fs_sync_inode_meta(sbi); /* - * Some dirty meta pages can be produced by recover_orphan_inodes() + * Some dirty meta pages can be produced by f2fs_recover_orphan_inodes() * failed by EIO. Then, iput(node_inode) can trigger balance_fs_bg() - * followed by write_checkpoint() through f2fs_write_node_pages(), which - * falls into an infinite loop in sync_meta_pages(). + * followed by f2fs_write_checkpoint() through f2fs_write_node_pages(), which + * falls into an infinite loop in f2fs_sync_meta_pages(). */ truncate_inode_pages_final(META_MAPPING(sbi)); -#ifdef CONFIG_QUOTA -free_sysfs: -#endif + /* evict some inodes being cached by GC */ + evict_inodes(sb); f2fs_unregister_sysfs(sbi); free_root_inode: dput(sb->s_root); sb->s_root = NULL; -free_stats: - f2fs_destroy_stats(sbi); free_node_inode: - release_ino_entry(sbi, true); + f2fs_release_ino_entry(sbi, true); truncate_inode_pages_final(NODE_MAPPING(sbi)); iput(sbi->node_inode); + sbi->node_inode = NULL; +free_stats: + f2fs_destroy_stats(sbi); free_nm: - destroy_node_manager(sbi); + f2fs_destroy_node_manager(sbi); free_sm: - destroy_segment_manager(sbi); + f2fs_destroy_segment_manager(sbi); free_devices: destroy_device_list(sbi); - kfree(sbi->ckpt); + kvfree(sbi->ckpt); free_meta_inode: make_bad_inode(sbi->meta_inode); iput(sbi->meta_inode); + sbi->meta_inode = NULL; free_io_dummy: mempool_destroy(sbi->write_io_dummy); free_percpu: destroy_percpu_info(sbi); free_bio_info: for (i = 0; i < NR_PAGE_TYPE; i++) - kfree(sbi->write_io[i]); + kvfree(sbi->write_io[i]); free_options: #ifdef CONFIG_QUOTA for (i = 0; i < MAXQUOTAS; i++) - kfree(F2FS_OPTION(sbi).s_qf_names[i]); + kvfree(F2FS_OPTION(sbi).s_qf_names[i]); #endif - kfree(options); + kvfree(options); free_sb_buf: - kfree(raw_super); + kvfree(raw_super); free_sbi: if (sbi->s_chksum_driver) crypto_free_shash(sbi->s_chksum_driver); - kfree(sbi); + kvfree(sbi); /* give only one another chance */ - if (retry) { - retry = false; + if (retry_cnt > 0 && skip_recovery) { + retry_cnt--; shrink_dcache_sb(sb); goto try_onemore; } @@ -3084,9 +3530,22 @@ static struct dentry *f2fs_mount(struct file_system_type *fs_type, int flags, static void kill_f2fs_super(struct super_block *sb) { if (sb->s_root) { - set_sbi_flag(F2FS_SB(sb), SBI_IS_CLOSE); - stop_gc_thread(F2FS_SB(sb)); - stop_discard_thread(F2FS_SB(sb)); + struct f2fs_sb_info *sbi = F2FS_SB(sb); + + set_sbi_flag(sbi, SBI_IS_CLOSE); + f2fs_stop_gc_thread(sbi); + f2fs_stop_discard_thread(sbi); + + if (is_sbi_flag_set(sbi, SBI_IS_DIRTY) || + !is_set_ckpt_flags(sbi, CP_UMOUNT_FLAG)) { + struct cp_control cpc = { + .reason = CP_UMOUNT, + }; + f2fs_write_checkpoint(sbi, &cpc); + } + + if (is_sbi_flag_set(sbi, SBI_IS_RECOVERED) && f2fs_readonly(sb)) + sb->s_flags &= ~MS_RDONLY; } kill_block_super(sb); } @@ -3135,16 +3594,16 @@ static int __init init_f2fs_fs(void) err = init_inodecache(); if (err) goto fail; - err = create_node_manager_caches(); + err = f2fs_create_node_manager_caches(); if (err) goto free_inodecache; - err = create_segment_manager_caches(); + err = f2fs_create_segment_manager_caches(); if (err) goto free_node_manager_caches; - err = create_checkpoint_caches(); + err = f2fs_create_checkpoint_caches(); if (err) goto free_segment_manager_caches; - err = create_extent_cache(); + err = f2fs_create_extent_cache(); if (err) goto free_checkpoint_caches; err = f2fs_init_sysfs(); @@ -3156,9 +3615,7 @@ static int __init init_f2fs_fs(void) err = register_filesystem(&f2fs_fs_type); if (err) goto free_shrinker; - err = f2fs_create_root_stats(); - if (err) - goto free_filesystem; + f2fs_create_root_stats(); err = f2fs_init_post_read_processing(); if (err) goto free_root_stats; @@ -3166,20 +3623,19 @@ static int __init init_f2fs_fs(void) free_root_stats: f2fs_destroy_root_stats(); -free_filesystem: unregister_filesystem(&f2fs_fs_type); free_shrinker: unregister_shrinker(&f2fs_shrinker_info); free_sysfs: f2fs_exit_sysfs(); free_extent_cache: - destroy_extent_cache(); + f2fs_destroy_extent_cache(); free_checkpoint_caches: - destroy_checkpoint_caches(); + f2fs_destroy_checkpoint_caches(); free_segment_manager_caches: - destroy_segment_manager_caches(); + f2fs_destroy_segment_manager_caches(); free_node_manager_caches: - destroy_node_manager_caches(); + f2fs_destroy_node_manager_caches(); free_inodecache: destroy_inodecache(); fail: @@ -3193,10 +3649,10 @@ static void __exit exit_f2fs_fs(void) unregister_filesystem(&f2fs_fs_type); unregister_shrinker(&f2fs_shrinker_info); f2fs_exit_sysfs(); - destroy_extent_cache(); - destroy_checkpoint_caches(); - destroy_segment_manager_caches(); - destroy_node_manager_caches(); + f2fs_destroy_extent_cache(); + f2fs_destroy_checkpoint_caches(); + f2fs_destroy_segment_manager_caches(); + f2fs_destroy_node_manager_caches(); destroy_inodecache(); f2fs_destroy_trace_ios(); } diff --git a/fs/f2fs/sysfs.c b/fs/f2fs/sysfs.c index 2c53de9251be..9e910b15bb4b 100644 --- a/fs/f2fs/sysfs.c +++ b/fs/f2fs/sysfs.c @@ -1,14 +1,12 @@ +// SPDX-License-Identifier: GPL-2.0 /* * f2fs sysfs interface * * Copyright (c) 2012 Samsung Electronics Co., Ltd. * http://www.samsung.com/ * Copyright (c) 2017 Chao Yu <chao@kernel.org> - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. */ +#include <linux/compiler.h> #include <linux/proc_fs.h> #include <linux/f2fs_fs.h> #include <linux/seq_file.h> @@ -70,6 +68,20 @@ static ssize_t dirty_segments_show(struct f2fs_attr *a, (unsigned long long)(dirty_segments(sbi))); } +static ssize_t unusable_show(struct f2fs_attr *a, + struct f2fs_sb_info *sbi, char *buf) +{ + block_t unusable; + + if (test_opt(sbi, DISABLE_CHECKPOINT)) + unusable = sbi->unusable_block_count; + else + unusable = f2fs_get_unusable_blocks(sbi); + return snprintf(buf, PAGE_SIZE, "%llu\n", + (unsigned long long)unusable); +} + + static ssize_t lifetime_write_kbytes_show(struct f2fs_attr *a, struct f2fs_sb_info *sbi, char *buf) { @@ -92,33 +104,36 @@ static ssize_t features_show(struct f2fs_attr *a, if (!sb->s_bdev->bd_part) return snprintf(buf, PAGE_SIZE, "0\n"); - if (f2fs_sb_has_encrypt(sb)) + if (f2fs_sb_has_encrypt(sbi)) len += snprintf(buf, PAGE_SIZE - len, "%s", "encryption"); - if (f2fs_sb_has_blkzoned(sb)) + if (f2fs_sb_has_blkzoned(sbi)) len += snprintf(buf + len, PAGE_SIZE - len, "%s%s", len ? ", " : "", "blkzoned"); - if (f2fs_sb_has_extra_attr(sb)) + if (f2fs_sb_has_extra_attr(sbi)) len += snprintf(buf + len, PAGE_SIZE - len, "%s%s", len ? ", " : "", "extra_attr"); - if (f2fs_sb_has_project_quota(sb)) + if (f2fs_sb_has_project_quota(sbi)) len += snprintf(buf + len, PAGE_SIZE - len, "%s%s", len ? ", " : "", "projquota"); - if (f2fs_sb_has_inode_chksum(sb)) + if (f2fs_sb_has_inode_chksum(sbi)) len += snprintf(buf + len, PAGE_SIZE - len, "%s%s", len ? ", " : "", "inode_checksum"); - if (f2fs_sb_has_flexible_inline_xattr(sb)) + if (f2fs_sb_has_flexible_inline_xattr(sbi)) len += snprintf(buf + len, PAGE_SIZE - len, "%s%s", len ? ", " : "", "flexible_inline_xattr"); - if (f2fs_sb_has_quota_ino(sb)) + if (f2fs_sb_has_quota_ino(sbi)) len += snprintf(buf + len, PAGE_SIZE - len, "%s%s", len ? ", " : "", "quota_ino"); - if (f2fs_sb_has_inode_crtime(sb)) + if (f2fs_sb_has_inode_crtime(sbi)) len += snprintf(buf + len, PAGE_SIZE - len, "%s%s", len ? ", " : "", "inode_crtime"); - if (f2fs_sb_has_lost_found(sb)) + if (f2fs_sb_has_lost_found(sbi)) len += snprintf(buf + len, PAGE_SIZE - len, "%s%s", len ? ", " : "", "lost_found"); + if (f2fs_sb_has_sb_chksum(sbi)) + len += snprintf(buf + len, PAGE_SIZE - len, "%s%s", + len ? ", " : "", "sb_checksum"); len += snprintf(buf + len, PAGE_SIZE - len, "\n"); return len; } @@ -147,13 +162,13 @@ static ssize_t f2fs_sbi_show(struct f2fs_attr *a, int len = 0, i; len += snprintf(buf + len, PAGE_SIZE - len, - "cold file extenstion:\n"); + "cold file extension:\n"); for (i = 0; i < cold_count; i++) len += snprintf(buf + len, PAGE_SIZE - len, "%s\n", extlist[i]); len += snprintf(buf + len, PAGE_SIZE - len, - "hot file extenstion:\n"); + "hot file extension:\n"); for (i = cold_count; i < cold_count + hot_count; i++) len += snprintf(buf + len, PAGE_SIZE - len, "%s\n", extlist[i]); @@ -165,7 +180,7 @@ static ssize_t f2fs_sbi_show(struct f2fs_attr *a, return snprintf(buf, PAGE_SIZE, "%u\n", *ui); } -static ssize_t f2fs_sbi_store(struct f2fs_attr *a, +static ssize_t __sbi_store(struct f2fs_attr *a, struct f2fs_sb_info *sbi, const char *buf, size_t count) { @@ -201,13 +216,13 @@ static ssize_t f2fs_sbi_store(struct f2fs_attr *a, down_write(&sbi->sb_lock); - ret = update_extension_list(sbi, name, hot, set); + ret = f2fs_update_extension_list(sbi, name, hot, set); if (ret) goto out; ret = f2fs_commit_super(sbi, false); if (ret) - update_extension_list(sbi, name, hot, !set); + f2fs_update_extension_list(sbi, name, hot, !set); out: up_write(&sbi->sb_lock); return ret ? ret : count; @@ -221,6 +236,8 @@ out: #ifdef CONFIG_F2FS_FAULT_INJECTION if (a->struct_type == FAULT_INFO_TYPE && t >= (1 << FAULT_MAX)) return -EINVAL; + if (a->struct_type == FAULT_INFO_RATE && t >= UINT_MAX) + return -EINVAL; #endif if (a->struct_type == RESERVED_BLOCKS) { spin_lock(&sbi->stat_lock); @@ -245,22 +262,70 @@ out: return count; } + if (!strcmp(a->attr.name, "migration_granularity")) { + if (t == 0 || t > sbi->segs_per_sec) + return -EINVAL; + } + if (!strcmp(a->attr.name, "trim_sections")) return -EINVAL; - *ui = t; + if (!strcmp(a->attr.name, "gc_urgent")) { + if (t >= 1) { + sbi->gc_mode = GC_URGENT; + if (sbi->gc_thread) { + sbi->gc_thread->gc_wake = 1; + wake_up_interruptible_all( + &sbi->gc_thread->gc_wait_queue_head); + wake_up_discard_thread(sbi, true); + } + } else { + sbi->gc_mode = GC_NORMAL; + } + return count; + } + if (!strcmp(a->attr.name, "gc_idle")) { + if (t == GC_IDLE_CB) + sbi->gc_mode = GC_IDLE_CB; + else if (t == GC_IDLE_GREEDY) + sbi->gc_mode = GC_IDLE_GREEDY; + else + sbi->gc_mode = GC_NORMAL; + return count; + } + - if (!strcmp(a->attr.name, "iostat_enable") && *ui == 0) - f2fs_reset_iostat(sbi); - if (!strcmp(a->attr.name, "gc_urgent") && t == 1 && sbi->gc_thread) { - sbi->gc_thread->gc_wake = 1; - wake_up_interruptible_all(&sbi->gc_thread->gc_wait_queue_head); - wake_up_discard_thread(sbi, true); + if (!strcmp(a->attr.name, "iostat_enable")) { + sbi->iostat_enable = !!t; + if (!sbi->iostat_enable) + f2fs_reset_iostat(sbi); + return count; } + *ui = (unsigned int)t; + return count; } +static ssize_t f2fs_sbi_store(struct f2fs_attr *a, + struct f2fs_sb_info *sbi, + const char *buf, size_t count) +{ + ssize_t ret; + bool gc_entry = (!strcmp(a->attr.name, "gc_urgent") || + a->struct_type == GC_THREAD); + + if (gc_entry) { + if (!down_read_trylock(&sbi->sb->s_umount)) + return -EAGAIN; + } + ret = __sbi_store(a, sbi, buf, count); + if (gc_entry) + up_read(&sbi->sb->s_umount); + + return ret; +} + static ssize_t f2fs_attr_show(struct kobject *kobj, struct attribute *attr, char *buf) { @@ -299,6 +364,7 @@ enum feat_id { FEAT_QUOTA_INO, FEAT_INODE_CRTIME, FEAT_LOST_FOUND, + FEAT_SB_CHECKSUM, }; static ssize_t f2fs_feature_show(struct f2fs_attr *a, @@ -315,6 +381,7 @@ static ssize_t f2fs_feature_show(struct f2fs_attr *a, case FEAT_QUOTA_INO: case FEAT_INODE_CRTIME: case FEAT_LOST_FOUND: + case FEAT_SB_CHECKSUM: return snprintf(buf, PAGE_SIZE, "supported\n"); } return 0; @@ -349,8 +416,8 @@ F2FS_RW_ATTR(GC_THREAD, f2fs_gc_kthread, gc_urgent_sleep_time, F2FS_RW_ATTR(GC_THREAD, f2fs_gc_kthread, gc_min_sleep_time, min_sleep_time); F2FS_RW_ATTR(GC_THREAD, f2fs_gc_kthread, gc_max_sleep_time, max_sleep_time); F2FS_RW_ATTR(GC_THREAD, f2fs_gc_kthread, gc_no_gc_sleep_time, no_gc_sleep_time); -F2FS_RW_ATTR(GC_THREAD, f2fs_gc_kthread, gc_idle, gc_idle); -F2FS_RW_ATTR(GC_THREAD, f2fs_gc_kthread, gc_urgent, gc_urgent); +F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, gc_idle, gc_mode); +F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, gc_urgent, gc_mode); F2FS_RW_ATTR(SM_INFO, f2fs_sm_info, reclaim_segments, rec_prefree_segments); F2FS_RW_ATTR(DCC_INFO, discard_cmd_control, max_small_discards, max_discards); F2FS_RW_ATTR(DCC_INFO, discard_cmd_control, discard_granularity, discard_granularity); @@ -359,15 +426,22 @@ F2FS_RW_ATTR(SM_INFO, f2fs_sm_info, batched_trim_sections, trim_sections); F2FS_RW_ATTR(SM_INFO, f2fs_sm_info, ipu_policy, ipu_policy); F2FS_RW_ATTR(SM_INFO, f2fs_sm_info, min_ipu_util, min_ipu_util); F2FS_RW_ATTR(SM_INFO, f2fs_sm_info, min_fsync_blocks, min_fsync_blocks); +F2FS_RW_ATTR(SM_INFO, f2fs_sm_info, min_seq_blocks, min_seq_blocks); F2FS_RW_ATTR(SM_INFO, f2fs_sm_info, min_hot_blocks, min_hot_blocks); F2FS_RW_ATTR(SM_INFO, f2fs_sm_info, min_ssr_sections, min_ssr_sections); F2FS_RW_ATTR(NM_INFO, f2fs_nm_info, ram_thresh, ram_thresh); F2FS_RW_ATTR(NM_INFO, f2fs_nm_info, ra_nid_pages, ra_nid_pages); F2FS_RW_ATTR(NM_INFO, f2fs_nm_info, dirty_nats_ratio, dirty_nats_ratio); F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, max_victim_search, max_victim_search); +F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, migration_granularity, migration_granularity); F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, dir_level, dir_level); F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, cp_interval, interval_time[CP_TIME]); F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, idle_interval, interval_time[REQ_TIME]); +F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, discard_idle_interval, + interval_time[DISCARD_TIME]); +F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, gc_idle_interval, interval_time[GC_TIME]); +F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, + umount_discard_timeout, interval_time[UMOUNT_DISCARD_TIMEOUT]); F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, iostat_enable, iostat_enable); F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, readdir_ra, readdir_ra); F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, gc_pin_file_thresh, gc_pin_file_threshold); @@ -380,6 +454,7 @@ F2FS_GENERAL_RO_ATTR(dirty_segments); F2FS_GENERAL_RO_ATTR(lifetime_write_kbytes); F2FS_GENERAL_RO_ATTR(features); F2FS_GENERAL_RO_ATTR(current_reserved_blocks); +F2FS_GENERAL_RO_ATTR(unusable); #ifdef CONFIG_F2FS_FS_ENCRYPTION F2FS_FEATURE_RO_ATTR(encryption, FEAT_CRYPTO); @@ -395,6 +470,7 @@ F2FS_FEATURE_RO_ATTR(flexible_inline_xattr, FEAT_FLEXIBLE_INLINE_XATTR); F2FS_FEATURE_RO_ATTR(quota_ino, FEAT_QUOTA_INO); F2FS_FEATURE_RO_ATTR(inode_crtime, FEAT_INODE_CRTIME); F2FS_FEATURE_RO_ATTR(lost_found, FEAT_LOST_FOUND); +F2FS_FEATURE_RO_ATTR(sb_checksum, FEAT_SB_CHECKSUM); #define ATTR_LIST(name) (&f2fs_attr_##name.attr) static struct attribute *f2fs_attrs[] = { @@ -411,15 +487,20 @@ static struct attribute *f2fs_attrs[] = { ATTR_LIST(ipu_policy), ATTR_LIST(min_ipu_util), ATTR_LIST(min_fsync_blocks), + ATTR_LIST(min_seq_blocks), ATTR_LIST(min_hot_blocks), ATTR_LIST(min_ssr_sections), ATTR_LIST(max_victim_search), + ATTR_LIST(migration_granularity), ATTR_LIST(dir_level), ATTR_LIST(ram_thresh), ATTR_LIST(ra_nid_pages), ATTR_LIST(dirty_nats_ratio), ATTR_LIST(cp_interval), ATTR_LIST(idle_interval), + ATTR_LIST(discard_idle_interval), + ATTR_LIST(gc_idle_interval), + ATTR_LIST(umount_discard_timeout), ATTR_LIST(iostat_enable), ATTR_LIST(readdir_ra), ATTR_LIST(gc_pin_file_thresh), @@ -429,6 +510,7 @@ static struct attribute *f2fs_attrs[] = { ATTR_LIST(inject_type), #endif ATTR_LIST(dirty_segments), + ATTR_LIST(unusable), ATTR_LIST(lifetime_write_kbytes), ATTR_LIST(features), ATTR_LIST(reserved_blocks), @@ -451,6 +533,7 @@ static struct attribute *f2fs_feat_attrs[] = { ATTR_LIST(quota_ino), ATTR_LIST(inode_crtime), ATTR_LIST(lost_found), + ATTR_LIST(sb_checksum), NULL, }; @@ -482,7 +565,8 @@ static struct kobject f2fs_feat = { .kset = &f2fs_kset, }; -static int segment_info_seq_show(struct seq_file *seq, void *offset) +static int __maybe_unused segment_info_seq_show(struct seq_file *seq, + void *offset) { struct super_block *sb = seq->private; struct f2fs_sb_info *sbi = F2FS_SB(sb); @@ -498,8 +582,7 @@ static int segment_info_seq_show(struct seq_file *seq, void *offset) if ((i % 10) == 0) seq_printf(seq, "%-10d", i); - seq_printf(seq, "%d|%-3u", se->type, - get_valid_blocks(sbi, i, false)); + seq_printf(seq, "%d|%-3u", se->type, se->valid_blocks); if ((i % 10) == 9 || i == (total_segs - 1)) seq_putc(seq, '\n'); else @@ -509,7 +592,8 @@ static int segment_info_seq_show(struct seq_file *seq, void *offset) return 0; } -static int segment_bits_seq_show(struct seq_file *seq, void *offset) +static int __maybe_unused segment_bits_seq_show(struct seq_file *seq, + void *offset) { struct super_block *sb = seq->private; struct f2fs_sb_info *sbi = F2FS_SB(sb); @@ -524,8 +608,7 @@ static int segment_bits_seq_show(struct seq_file *seq, void *offset) struct seg_entry *se = get_seg_entry(sbi, i); seq_printf(seq, "%-10d", i); - seq_printf(seq, "%d|%-3u|", se->type, - get_valid_blocks(sbi, i, false)); + seq_printf(seq, "%d|%-3u|", se->type, se->valid_blocks); for (j = 0; j < SIT_VBLOCK_MAP_SIZE; j++) seq_printf(seq, " %.2x", se->cur_valid_map[j]); seq_putc(seq, '\n'); @@ -533,7 +616,8 @@ static int segment_bits_seq_show(struct seq_file *seq, void *offset) return 0; } -static int iostat_info_seq_show(struct seq_file *seq, void *offset) +static int __maybe_unused iostat_info_seq_show(struct seq_file *seq, + void *offset) { struct super_block *sb = seq->private; struct f2fs_sb_info *sbi = F2FS_SB(sb); @@ -575,6 +659,28 @@ static int iostat_info_seq_show(struct seq_file *seq, void *offset) return 0; } +static int __maybe_unused victim_bits_seq_show(struct seq_file *seq, + void *offset) +{ + struct super_block *sb = seq->private; + struct f2fs_sb_info *sbi = F2FS_SB(sb); + struct dirty_seglist_info *dirty_i = DIRTY_I(sbi); + int i; + + seq_puts(seq, "format: victim_secmap bitmaps\n"); + + for (i = 0; i < MAIN_SECS(sbi); i++) { + if ((i % 10) == 0) + seq_printf(seq, "%-10d", i); + seq_printf(seq, "%d", test_bit(i, dirty_i->victim_secmap) ? 1 : 0); + if ((i % 10) == 9 || i == (MAIN_SECS(sbi) - 1)) + seq_putc(seq, '\n'); + else + seq_putc(seq, ' '); + } + return 0; +} + #define F2FS_PROC_FILE_DEF(_name) \ static int _name##_open_fs(struct inode *inode, struct file *file) \ { \ @@ -591,6 +697,7 @@ static const struct file_operations f2fs_seq_##_name##_fops = { \ F2FS_PROC_FILE_DEF(segment_info); F2FS_PROC_FILE_DEF(segment_bits); F2FS_PROC_FILE_DEF(iostat_info); +F2FS_PROC_FILE_DEF(victim_bits); int __init f2fs_init_sysfs(void) { @@ -641,6 +748,8 @@ int f2fs_register_sysfs(struct f2fs_sb_info *sbi) &f2fs_seq_segment_bits_fops, sb); proc_create_data("iostat_info", S_IRUGO, sbi->s_proc, &f2fs_seq_iostat_info_fops, sb); + proc_create_data("victim_bits", S_IRUGO, sbi->s_proc, + &f2fs_seq_victim_bits_fops, sb); } return 0; } @@ -651,6 +760,7 @@ void f2fs_unregister_sysfs(struct f2fs_sb_info *sbi) remove_proc_entry("iostat_info", sbi->s_proc); remove_proc_entry("segment_info", sbi->s_proc); remove_proc_entry("segment_bits", sbi->s_proc); + remove_proc_entry("victim_bits", sbi->s_proc); remove_proc_entry(sbi->sb->s_id, f2fs_proc_root); } kobject_del(&sbi->s_kobj); diff --git a/fs/f2fs/trace.c b/fs/f2fs/trace.c index cb696791044b..d0ab533a9ce8 100644 --- a/fs/f2fs/trace.c +++ b/fs/f2fs/trace.c @@ -1,12 +1,9 @@ +// SPDX-License-Identifier: GPL-2.0 /* * f2fs IO tracer * * Copyright (c) 2014 Motorola Mobility * Copyright (c) 2014 Jaegeuk Kim <jaegeuk@kernel.org> - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. */ #include <linux/fs.h> #include <linux/f2fs_fs.h> @@ -17,7 +14,7 @@ #include "trace.h" static RADIX_TREE(pids, GFP_ATOMIC); -static struct mutex pids_lock; +static spinlock_t pids_lock; static struct last_io_info last_io; static inline void __print_last_io(void) @@ -65,7 +62,7 @@ retry: if (radix_tree_preload(GFP_NOFS)) return; - mutex_lock(&pids_lock); + spin_lock(&pids_lock); p = radix_tree_lookup(&pids, pid); if (p == current) goto out; @@ -83,7 +80,7 @@ retry: MAJOR(inode->i_sb->s_dev), MINOR(inode->i_sb->s_dev), pid, current->comm); out: - mutex_unlock(&pids_lock); + spin_unlock(&pids_lock); radix_tree_preload_end(); } @@ -128,7 +125,7 @@ void f2fs_trace_ios(struct f2fs_io_info *fio, int flush) void f2fs_build_trace_ios(void) { - mutex_init(&pids_lock); + spin_lock_init(&pids_lock); } #define PIDVEC_SIZE 128 @@ -156,7 +153,7 @@ void f2fs_destroy_trace_ios(void) pid_t next_pid = 0; unsigned int found; - mutex_lock(&pids_lock); + spin_lock(&pids_lock); while ((found = gang_lookup_pids(pid, next_pid, PIDVEC_SIZE))) { unsigned idx; @@ -164,5 +161,5 @@ void f2fs_destroy_trace_ios(void) for (idx = 0; idx < found; idx++) radix_tree_delete(&pids, pid[idx]); } - mutex_unlock(&pids_lock); + spin_unlock(&pids_lock); } diff --git a/fs/f2fs/trace.h b/fs/f2fs/trace.h index 67db24ac1e85..e8075fc5b228 100644 --- a/fs/f2fs/trace.h +++ b/fs/f2fs/trace.h @@ -1,12 +1,9 @@ +// SPDX-License-Identifier: GPL-2.0 /* * f2fs IO tracer * * Copyright (c) 2014 Motorola Mobility * Copyright (c) 2014 Jaegeuk Kim <jaegeuk@kernel.org> - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. */ #ifndef __F2FS_TRACE_H__ #define __F2FS_TRACE_H__ diff --git a/fs/f2fs/xattr.c b/fs/f2fs/xattr.c index 116be979b897..d48af3b5857b 100644 --- a/fs/f2fs/xattr.c +++ b/fs/f2fs/xattr.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0 /* * fs/f2fs/xattr.c * @@ -13,10 +14,6 @@ * suggestion of Luka Renko <luka.renko@hermes.si>. * xattr consolidation Copyright (c) 2004 James Morris <jmorris@redhat.com>, * Red Hat Inc. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. */ #include <linux/rwsem.h> #include <linux/f2fs_fs.h> @@ -38,9 +35,6 @@ static size_t f2fs_xattr_generic_list(const struct xattr_handler *handler, return -EOPNOTSUPP; break; case F2FS_XATTR_INDEX_TRUSTED: - if (!capable(CAP_SYS_ADMIN)) - return -EPERM; - break; case F2FS_XATTR_INDEX_SECURITY: break; default: @@ -69,9 +63,6 @@ static int f2fs_xattr_generic_get(const struct xattr_handler *handler, return -EOPNOTSUPP; break; case F2FS_XATTR_INDEX_TRUSTED: - if (!capable(CAP_SYS_ADMIN)) - return -EPERM; - break; case F2FS_XATTR_INDEX_SECURITY: break; default: @@ -142,6 +133,8 @@ static int f2fs_xattr_advise_set(const struct xattr_handler *handler, size_t size, int flags) { struct inode *inode = d_inode(dentry); + unsigned char old_advise = F2FS_I(inode)->i_advise; + unsigned char new_advise; if (strcmp(name, "") != 0) return -EINVAL; @@ -150,7 +143,14 @@ static int f2fs_xattr_advise_set(const struct xattr_handler *handler, if (value == NULL) return -EINVAL; - F2FS_I(inode)->i_advise |= *(char *)value; + new_advise = *(char *)value; + if (new_advise & ~FADVISE_MODIFIABLE_BITS) + return -EINVAL; + + new_advise = new_advise & FADVISE_MODIFIABLE_BITS; + new_advise |= old_advise & ~FADVISE_MODIFIABLE_BITS; + + F2FS_I(inode)->i_advise = new_advise; f2fs_mark_inode_dirty_sync(inode, true); return 0; } @@ -248,12 +248,17 @@ static inline const struct xattr_handler *f2fs_xattr_handler(int index) return handler; } -static struct f2fs_xattr_entry *__find_xattr(void *base_addr, int index, - size_t len, const char *name) +static struct f2fs_xattr_entry *__find_xattr(void *base_addr, + void *last_base_addr, int index, + size_t len, const char *name) { struct f2fs_xattr_entry *entry; list_for_each_xattr(entry, base_addr) { + if ((void *)(entry) + sizeof(__u32) > last_base_addr || + (void *)XATTR_NEXT_ENTRY(entry) > last_base_addr) + return NULL; + if (entry->e_name_index != index) continue; if (entry->e_name_len != len) @@ -270,11 +275,11 @@ static struct f2fs_xattr_entry *__find_inline_xattr(struct inode *inode, { struct f2fs_xattr_entry *entry; unsigned int inline_size = inline_xattr_size(inode); + void *max_addr = base_addr + inline_size; list_for_each_xattr(entry, base_addr) { - if ((void *)entry + sizeof(__u32) > base_addr + inline_size || - (void *)XATTR_NEXT_ENTRY(entry) + sizeof(__u32) > - base_addr + inline_size) { + if ((void *)entry + sizeof(__u32) > max_addr || + (void *)XATTR_NEXT_ENTRY(entry) > max_addr) { *last_addr = entry; return NULL; } @@ -285,6 +290,13 @@ static struct f2fs_xattr_entry *__find_inline_xattr(struct inode *inode, if (!memcmp(entry->e_name, name, len)) break; } + + /* inline xattr header or entry across max inline xattr size */ + if (IS_XATTR_LAST_ENTRY(entry) && + (void *)entry + sizeof(__u32) > max_addr) { + *last_addr = entry; + return NULL; + } return entry; } @@ -299,7 +311,7 @@ static int read_inline_xattr(struct inode *inode, struct page *ipage, if (ipage) { inline_addr = inline_xattr_addr(inode, ipage); } else { - page = get_node_page(sbi, inode->i_ino); + page = f2fs_get_node_page(sbi, inode->i_ino); if (IS_ERR(page)) return PTR_ERR(page); @@ -320,7 +332,7 @@ static int read_xattr_block(struct inode *inode, void *txattr_addr) void *xattr_addr; /* The inode already has an extended attribute block. */ - xpage = get_node_page(sbi, xnid); + xpage = f2fs_get_node_page(sbi, xnid); if (IS_ERR(xpage)) return PTR_ERR(xpage); @@ -334,22 +346,24 @@ static int read_xattr_block(struct inode *inode, void *txattr_addr) static int lookup_all_xattrs(struct inode *inode, struct page *ipage, unsigned int index, unsigned int len, const char *name, struct f2fs_xattr_entry **xe, - void **base_addr) + void **base_addr, int *base_size) { - void *cur_addr, *txattr_addr, *last_addr = NULL; + void *cur_addr, *txattr_addr, *last_txattr_addr; + void *last_addr = NULL; nid_t xnid = F2FS_I(inode)->i_xattr_nid; - unsigned int size = xnid ? VALID_XATTR_BLOCK_SIZE : 0; unsigned int inline_size = inline_xattr_size(inode); int err = 0; - if (!size && !inline_size) + if (!xnid && !inline_size) return -ENODATA; - txattr_addr = f2fs_kzalloc(F2FS_I_SB(inode), - inline_size + size + XATTR_PADDING_SIZE, GFP_NOFS); + *base_size = XATTR_SIZE(xnid, inode) + XATTR_PADDING_SIZE; + txattr_addr = f2fs_kzalloc(F2FS_I_SB(inode), *base_size, GFP_NOFS); if (!txattr_addr) return -ENOMEM; + last_txattr_addr = (void *)txattr_addr + XATTR_SIZE(xnid, inode); + /* read from inline xattr */ if (inline_size) { err = read_inline_xattr(inode, ipage, txattr_addr); @@ -358,8 +372,10 @@ static int lookup_all_xattrs(struct inode *inode, struct page *ipage, *xe = __find_inline_xattr(inode, txattr_addr, &last_addr, index, len, name); - if (*xe) + if (*xe) { + *base_size = inline_size; goto check; + } } /* read from xattr node block */ @@ -374,7 +390,14 @@ static int lookup_all_xattrs(struct inode *inode, struct page *ipage, else cur_addr = txattr_addr; - *xe = __find_xattr(cur_addr, index, len, name); + *xe = __find_xattr(cur_addr, last_txattr_addr, index, len, name); + if (!*xe) { + f2fs_err(F2FS_I_SB(inode), "inode (%lu) has corrupted xattr", + inode->i_ino); + set_sbi_flag(F2FS_I_SB(inode), SBI_NEED_FSCK); + err = -EFSCORRUPTED; + goto out; + } check: if (IS_XATTR_LAST_ENTRY(*xe)) { err = -ENODATA; @@ -384,7 +407,7 @@ check: *base_addr = txattr_addr; return 0; out: - kzfree(txattr_addr); + kvfree(txattr_addr); return err; } @@ -427,7 +450,7 @@ static int read_all_xattrs(struct inode *inode, struct page *ipage, *base_addr = txattr_addr; return 0; fail: - kzfree(txattr_addr); + kvfree(txattr_addr); return err; } @@ -444,7 +467,7 @@ static inline int write_all_xattrs(struct inode *inode, __u32 hsize, int err = 0; if (hsize > inline_size && !F2FS_I(inode)->i_xattr_nid) - if (!alloc_nid(sbi, &new_nid)) + if (!f2fs_alloc_nid(sbi, &new_nid)) return -ENOSPC; /* write to inline xattr */ @@ -452,20 +475,20 @@ static inline int write_all_xattrs(struct inode *inode, __u32 hsize, if (ipage) { inline_addr = inline_xattr_addr(inode, ipage); } else { - in_page = get_node_page(sbi, inode->i_ino); + in_page = f2fs_get_node_page(sbi, inode->i_ino); if (IS_ERR(in_page)) { - alloc_nid_failed(sbi, new_nid); + f2fs_alloc_nid_failed(sbi, new_nid); return PTR_ERR(in_page); } inline_addr = inline_xattr_addr(inode, in_page); } f2fs_wait_on_page_writeback(ipage ? ipage : in_page, - NODE, true); + NODE, true, true); /* no need to use xattr node block */ if (hsize <= inline_size) { - err = truncate_xattr_node(inode); - alloc_nid_failed(sbi, new_nid); + err = f2fs_truncate_xattr_node(inode); + f2fs_alloc_nid_failed(sbi, new_nid); if (err) { f2fs_put_page(in_page, 1); return err; @@ -478,24 +501,24 @@ static inline int write_all_xattrs(struct inode *inode, __u32 hsize, /* write to xattr node block */ if (F2FS_I(inode)->i_xattr_nid) { - xpage = get_node_page(sbi, F2FS_I(inode)->i_xattr_nid); + xpage = f2fs_get_node_page(sbi, F2FS_I(inode)->i_xattr_nid); if (IS_ERR(xpage)) { err = PTR_ERR(xpage); - alloc_nid_failed(sbi, new_nid); + f2fs_alloc_nid_failed(sbi, new_nid); goto in_page_out; } f2fs_bug_on(sbi, new_nid); - f2fs_wait_on_page_writeback(xpage, NODE, true); + f2fs_wait_on_page_writeback(xpage, NODE, true, true); } else { struct dnode_of_data dn; set_new_dnode(&dn, inode, NULL, NULL, new_nid); - xpage = new_node_page(&dn, XATTR_NODE_OFFSET); + xpage = f2fs_new_node_page(&dn, XATTR_NODE_OFFSET); if (IS_ERR(xpage)) { err = PTR_ERR(xpage); - alloc_nid_failed(sbi, new_nid); + f2fs_alloc_nid_failed(sbi, new_nid); goto in_page_out; } - alloc_nid_done(sbi, new_nid); + f2fs_alloc_nid_done(sbi, new_nid); } xattr_addr = page_address(xpage); @@ -520,6 +543,7 @@ int f2fs_getxattr(struct inode *inode, int index, const char *name, int error = 0; unsigned int size, len; void *base_addr = NULL; + int base_size; if (name == NULL) return -EINVAL; @@ -530,7 +554,7 @@ int f2fs_getxattr(struct inode *inode, int index, const char *name, down_read(&F2FS_I(inode)->i_xattr_sem); error = lookup_all_xattrs(inode, ipage, index, len, name, - &entry, &base_addr); + &entry, &base_addr, &base_size); up_read(&F2FS_I(inode)->i_xattr_sem); if (error) return error; @@ -544,11 +568,16 @@ int f2fs_getxattr(struct inode *inode, int index, const char *name, if (buffer) { char *pval = entry->e_name + entry->e_name_len; + + if (base_size - (pval - (char *)base_addr) < size) { + error = -ERANGE; + goto out; + } memcpy(buffer, pval, size); } error = size; out: - kzfree(base_addr); + kvfree(base_addr); return error; } @@ -587,7 +616,7 @@ ssize_t f2fs_listxattr(struct dentry *dentry, char *buffer, size_t buffer_size) } error = buffer_size - rest; cleanup: - kzfree(base_addr); + kvfree(base_addr); return error; } @@ -605,7 +634,8 @@ static int __f2fs_setxattr(struct inode *inode, int index, struct page *ipage, int flags) { struct f2fs_xattr_entry *here, *last; - void *base_addr; + void *base_addr, *last_base_addr; + nid_t xnid = F2FS_I(inode)->i_xattr_nid; int found, newsize; size_t len; __u32 new_hsize; @@ -629,8 +659,17 @@ static int __f2fs_setxattr(struct inode *inode, int index, if (error) return error; + last_base_addr = (void *)base_addr + XATTR_SIZE(xnid, inode); + /* find entry with wanted name. */ - here = __find_xattr(base_addr, index, len, name); + here = __find_xattr(base_addr, last_base_addr, index, len, name); + if (!here) { + f2fs_err(F2FS_I_SB(inode), "inode (%lu) has corrupted xattr", + inode->i_ino); + set_sbi_flag(F2FS_I_SB(inode), SBI_NEED_FSCK); + error = -EFSCORRUPTED; + goto exit; + } found = IS_XATTR_LAST_ENTRY(here) ? 0 : 1; @@ -718,7 +757,7 @@ static int __f2fs_setxattr(struct inode *inode, int index, if (!error && S_ISDIR(inode->i_mode)) set_sbi_flag(F2FS_I_SB(inode), SBI_NEED_CP); exit: - kzfree(base_addr); + kvfree(base_addr); return error; } @@ -733,7 +772,7 @@ int f2fs_setxattr(struct inode *inode, int index, const char *name, if (err) return err; - /* this case is only from init_inode_metadata */ + /* this case is only from f2fs_init_inode_metadata */ if (ipage) return __f2fs_setxattr(inode, index, name, value, size, ipage, flags); diff --git a/fs/f2fs/xattr.h b/fs/f2fs/xattr.h index 08a4840d6d7d..2909c9241145 100644 --- a/fs/f2fs/xattr.h +++ b/fs/f2fs/xattr.h @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0 /* * fs/f2fs/xattr.h * @@ -9,10 +10,6 @@ * On-disk format of extended attributes for the ext2 filesystem. * * (C) 2001 Andreas Gruenbacher, <a.gruenbacher@computer.org> - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. */ #ifndef __F2FS_XATTR_H__ #define __F2FS_XATTR_H__ @@ -74,6 +71,8 @@ struct f2fs_xattr_entry { entry = XATTR_NEXT_ENTRY(entry)) #define VALID_XATTR_BLOCK_SIZE (PAGE_SIZE - sizeof(struct node_footer)) #define XATTR_PADDING_SIZE (sizeof(__u32)) +#define XATTR_SIZE(x,i) (((x) ? VALID_XATTR_BLOCK_SIZE : 0) + \ + (inline_xattr_size(i))) #define MIN_OFFSET(i) XATTR_ALIGN(inline_xattr_size(i) + \ VALID_XATTR_BLOCK_SIZE) @@ -81,6 +80,12 @@ struct f2fs_xattr_entry { sizeof(struct f2fs_xattr_header) - \ sizeof(struct f2fs_xattr_entry)) +#define MAX_INLINE_XATTR_SIZE \ + (DEF_ADDRS_PER_INODE - \ + F2FS_TOTAL_EXTRA_ATTR_SIZE / sizeof(__le32) - \ + DEF_INLINE_RESERVED_SIZE - \ + MIN_INLINE_DENTRY_SIZE / sizeof(__le32)) + /* * On-disk structure of f2fs_xattr * We use inline xattrs space + 1 block for xattr. diff --git a/fs/fat/fatent.c b/fs/fat/fatent.c index be8529739d23..94fdb8efd93e 100644 --- a/fs/fat/fatent.c +++ b/fs/fat/fatent.c @@ -92,7 +92,8 @@ static int fat12_ent_bread(struct super_block *sb, struct fat_entry *fatent, err_brelse: brelse(bhs[0]); err: - fat_msg(sb, KERN_ERR, "FAT read failed (blocknr %llu)", (llu)blocknr); + fat_msg_ratelimit(sb, KERN_ERR, + "FAT read failed (blocknr %llu)", (llu)blocknr); return -EIO; } @@ -105,8 +106,8 @@ static int fat_ent_bread(struct super_block *sb, struct fat_entry *fatent, fatent->fat_inode = MSDOS_SB(sb)->fat_inode; fatent->bhs[0] = sb_bread(sb, blocknr); if (!fatent->bhs[0]) { - fat_msg(sb, KERN_ERR, "FAT read failed (blocknr %llu)", - (llu)blocknr); + fat_msg_ratelimit(sb, KERN_ERR, + "FAT read failed (blocknr %llu)", (llu)blocknr); return -EIO; } fatent->nr_bhs = 1; diff --git a/fs/fat/inode.c b/fs/fat/inode.c index 944fff1ef536..79375a009306 100644 --- a/fs/fat/inode.c +++ b/fs/fat/inode.c @@ -775,8 +775,9 @@ retry: fat_get_blknr_offset(sbi, i_pos, &blocknr, &offset); bh = sb_bread(sb, blocknr); if (!bh) { - fat_msg(sb, KERN_ERR, "unable to read inode block " - "for updating (i_pos %lld)", i_pos); + fat_msg_ratelimit(sb, KERN_ERR, + "unable to read inode block for updating (i_pos %lld)", + i_pos); return -EIO; } spin_lock(&sbi->inode_hash_lock); diff --git a/fs/file_table.c b/fs/file_table.c index ad17e05ebf95..b4baa0de4988 100644 --- a/fs/file_table.c +++ b/fs/file_table.c @@ -41,6 +41,141 @@ static struct kmem_cache *filp_cachep __read_mostly; static struct percpu_counter nr_files __cacheline_aligned_in_smp; +#ifdef CONFIG_FILE_TABLE_DEBUG +#include <linux/hashtable.h> +#include <mount.h> +static DEFINE_MUTEX(global_files_lock); +static DEFINE_HASHTABLE(global_files_hashtable, 10); + +struct global_filetable_lookup_key { + struct work_struct work; + uintptr_t value; +}; + +void global_filetable_print_warning_once(void) +{ + pr_err_once("\n**********************************************************\n"); + pr_err_once("** NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE **\n"); + pr_err_once("** **\n"); + pr_err_once("** VFS FILE TABLE DEBUG is enabled . **\n"); + pr_err_once("** Allocating extra memory and slowing access to files **\n"); + pr_err_once("** **\n"); + pr_err_once("** This means that this is a DEBUG kernel and it is **\n"); + pr_err_once("** unsafe for production use. **\n"); + pr_err_once("** **\n"); + pr_err_once("** If you see this message and you are not debugging **\n"); + pr_err_once("** the kernel, report this immediately to your vendor! **\n"); + pr_err_once("** **\n"); + pr_err_once("** NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE **\n"); + pr_err_once("**********************************************************\n"); +} + +void global_filetable_add(struct file *filp) +{ + struct mount *mnt; + + if (filp->f_path.dentry->d_iname == NULL || + strlen(filp->f_path.dentry->d_iname) == 0) + return; + + mnt = real_mount(filp->f_path.mnt); + + mutex_lock(&global_files_lock); + hash_add(global_files_hashtable, &filp->f_hash, (uintptr_t)mnt); + mutex_unlock(&global_files_lock); +} + +void global_filetable_del(struct file *filp) +{ + mutex_lock(&global_files_lock); + hash_del(&filp->f_hash); + mutex_unlock(&global_files_lock); +} + +static void global_print_file(struct file *filp, char *path_buffer, int *count) +{ + char *pathname; + + pathname = d_path(&filp->f_path, path_buffer, PAGE_SIZE); + if (IS_ERR(pathname)) + pr_err("VFS: File %d Address : %pa partial filename: %s ref_count=%ld\n", + ++(*count), &filp, filp->f_path.dentry->d_iname, + atomic_long_read(&filp->f_count)); + else + pr_err("VFS: File %d Address : %pa full filepath: %s ref_count=%ld\n", + ++(*count), &filp, pathname, + atomic_long_read(&filp->f_count)); +} + +static void global_filetable_print(uintptr_t lookup_mnt) +{ + struct hlist_node *tmp; + struct file *filp; + struct mount *mnt; + int index; + int count = 0; + char *path_buffer = (char *)__get_free_page(GFP_TEMPORARY); + + mutex_lock(&global_files_lock); + pr_err("\n**********************************************************\n"); + pr_err("** NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE **\n"); + + pr_err("\n"); + pr_err("VFS: The following files hold a reference to the mount\n"); + pr_err("\n"); + hash_for_each_possible_safe(global_files_hashtable, filp, tmp, f_hash, + lookup_mnt) { + mnt = real_mount(filp->f_path.mnt); + if ((uintptr_t)mnt == lookup_mnt) + global_print_file(filp, path_buffer, &count); + } + pr_err("\n"); + pr_err("VFS: Found total of %d open files\n", count); + pr_err("\n"); + + count = 0; + pr_err("\n"); + pr_err("VFS: The following files need to cleaned up\n"); + pr_err("\n"); + hash_for_each_safe(global_files_hashtable, index, tmp, filp, f_hash) { + if (atomic_long_read(&filp->f_count) == 0) + global_print_file(filp, path_buffer, &count); + } + + pr_err("\n"); + pr_err("VFS: Found total of %d files awaiting clean-up\n", count); + pr_err("\n"); + pr_err("** NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE **\n"); + pr_err("\n**********************************************************\n"); + + mutex_unlock(&global_files_lock); + free_page((unsigned long)path_buffer); +} + +static void global_filetable_print_work_fn(struct work_struct *work) +{ + struct global_filetable_lookup_key *key; + uintptr_t lookup_mnt; + + key = container_of(work, struct global_filetable_lookup_key, work); + lookup_mnt = key->value; + kfree(key); + global_filetable_print(lookup_mnt); +} + +void global_filetable_delayed_print(struct mount *mnt) +{ + struct global_filetable_lookup_key *key; + + key = kzalloc(sizeof(*key), GFP_KERNEL); + if (key == NULL) + return; + key->value = (uintptr_t)mnt; + INIT_WORK(&key->work, global_filetable_print_work_fn); + schedule_work(&key->work); +} +#endif /* CONFIG_FILE_TABLE_DEBUG */ + static void file_free_rcu(struct rcu_head *head) { struct file *f = container_of(head, struct file, f_u.fu_rcuhead); @@ -219,6 +354,7 @@ static void __fput(struct file *file) put_write_access(inode); __mnt_drop_write(mnt); } + global_filetable_del(file); file->f_path.dentry = NULL; file->f_path.mnt = NULL; file->f_inode = NULL; @@ -314,6 +450,7 @@ void __init files_init(void) filp_cachep = kmem_cache_create("filp", sizeof(struct file), 0, SLAB_HWCACHE_ALIGN | SLAB_PANIC, NULL); percpu_counter_init(&nr_files, 0, GFP_KERNEL); + global_filetable_print_warning_once(); } /* diff --git a/fs/fuse/Makefile b/fs/fuse/Makefile index e95eeb445e58..3805040bee46 100644 --- a/fs/fuse/Makefile +++ b/fs/fuse/Makefile @@ -5,4 +5,4 @@ obj-$(CONFIG_FUSE_FS) += fuse.o obj-$(CONFIG_CUSE) += cuse.o -fuse-objs := dev.o dir.o file.o inode.o control.o +fuse-objs := dev.o dir.o file.o inode.o control.o passthrough.o diff --git a/fs/fuse/dev.c b/fs/fuse/dev.c index 755a56f9fb58..7909b72b1f88 100644 --- a/fs/fuse/dev.c +++ b/fs/fuse/dev.c @@ -7,6 +7,7 @@ */ #include "fuse_i.h" +#include "fuse_passthrough.h" #include <linux/init.h> #include <linux/module.h> @@ -596,9 +597,14 @@ ssize_t fuse_simple_request(struct fuse_conn *fc, struct fuse_args *args) args->out.numargs * sizeof(struct fuse_arg)); fuse_request_send(fc, req); ret = req->out.h.error; - if (!ret && args->out.argvar) { - BUG_ON(args->out.numargs != 1); - ret = req->out.args[0].size; + if (!ret) { + if (args->out.argvar) { + BUG_ON(args->out.numargs != 1); + ret = req->out.args[0].size; + } + + if (req->passthrough_filp != NULL) + args->out.passthrough_filp = req->passthrough_filp; } fuse_put_request(fc, req); @@ -1986,6 +1992,7 @@ static ssize_t fuse_dev_do_write(struct fuse_dev *fud, } fuse_copy_finish(cs); + fuse_setup_passthrough(fc, req); spin_lock(&fpq->lock); clear_bit(FR_LOCKED, &req->flags); if (!fpq->connected) diff --git a/fs/fuse/dir.c b/fs/fuse/dir.c index 3bc6e1990e64..7a5ff8d5afbd 100644 --- a/fs/fuse/dir.c +++ b/fs/fuse/dir.c @@ -480,6 +480,7 @@ static int fuse_create_open(struct inode *dir, struct dentry *entry, args.out.args[0].value = &outentry; args.out.args[1].size = sizeof(outopen); args.out.args[1].value = &outopen; + args.out.passthrough_filp = NULL; err = fuse_simple_request(fc, &args); if (err) goto out_free_ff; @@ -492,6 +493,8 @@ static int fuse_create_open(struct inode *dir, struct dentry *entry, ff->fh = outopen.fh; ff->nodeid = outentry.nodeid; ff->open_flags = outopen.open_flags; + if (args.out.passthrough_filp != NULL) + ff->passthrough_filp = args.out.passthrough_filp; inode = fuse_iget(dir->i_sb, outentry.nodeid, outentry.generation, &outentry.attr, entry_attr_timeout(&outentry), 0); if (!inode) { diff --git a/fs/fuse/file.c b/fs/fuse/file.c index f7d025d1684c..9fc6c2597dcc 100644 --- a/fs/fuse/file.c +++ b/fs/fuse/file.c @@ -7,6 +7,7 @@ */ #include "fuse_i.h" +#include "fuse_passthrough.h" #include <linux/pagemap.h> #include <linux/slab.h> @@ -22,8 +23,10 @@ static const struct file_operations fuse_direct_io_file_operations; static int fuse_send_open(struct fuse_conn *fc, u64 nodeid, struct file *file, - int opcode, struct fuse_open_out *outargp) + int opcode, struct fuse_open_out *outargp, + struct file **passthrough_filpp) { + int ret_val; struct fuse_open_in inarg; FUSE_ARGS(args); @@ -39,8 +42,14 @@ static int fuse_send_open(struct fuse_conn *fc, u64 nodeid, struct file *file, args.out.numargs = 1; args.out.args[0].size = sizeof(*outargp); args.out.args[0].value = outargp; + args.out.passthrough_filp = NULL; - return fuse_simple_request(fc, &args); + ret_val = fuse_simple_request(fc, &args); + + if (args.out.passthrough_filp != NULL) + *passthrough_filpp = args.out.passthrough_filp; + + return ret_val; } struct fuse_file *fuse_file_alloc(struct fuse_conn *fc) @@ -51,6 +60,10 @@ struct fuse_file *fuse_file_alloc(struct fuse_conn *fc) if (unlikely(!ff)) return NULL; + ff->passthrough_filp = NULL; + ff->passthrough_enabled = 0; + if (fc->passthrough) + ff->passthrough_enabled = 1; ff->fc = fc; ff->reserved_req = fuse_request_alloc(0); if (unlikely(!ff->reserved_req)) { @@ -119,6 +132,7 @@ int fuse_do_open(struct fuse_conn *fc, u64 nodeid, struct file *file, bool isdir) { struct fuse_file *ff; + struct file *passthrough_filp = NULL; int opcode = isdir ? FUSE_OPENDIR : FUSE_OPEN; ff = fuse_file_alloc(fc); @@ -131,10 +145,12 @@ int fuse_do_open(struct fuse_conn *fc, u64 nodeid, struct file *file, struct fuse_open_out outarg; int err; - err = fuse_send_open(fc, nodeid, file, opcode, &outarg); + err = fuse_send_open(fc, nodeid, file, opcode, &outarg, + &(passthrough_filp)); if (!err) { ff->fh = outarg.fh; ff->open_flags = outarg.open_flags; + ff->passthrough_filp = passthrough_filp; } else if (err != -ENOSYS || isdir) { fuse_file_free(ff); @@ -260,6 +276,8 @@ void fuse_release_common(struct file *file, int opcode) if (unlikely(!ff)) return; + fuse_passthrough_release(ff); + req = ff->reserved_req; fuse_prepare_release(ff, file->f_flags, opcode); @@ -891,6 +909,43 @@ static int fuse_readpages_fill(void *_data, struct page *page) return -EIO; } +#ifdef CONFIG_CMA + if (is_cma_pageblock(page)) { + struct page *oldpage = page, *newpage; + int err; + + /* make sure that old page is not free in-between the calls */ + page_cache_get(oldpage); + + newpage = alloc_page(GFP_HIGHUSER); + if (!newpage) { + page_cache_release(oldpage); + return -ENOMEM; + } + + err = replace_page_cache_page(oldpage, newpage, GFP_KERNEL); + if (err) { + __free_page(newpage); + page_cache_release(oldpage); + return err; + } + + /* + * Decrement the count on new page to make page cache the only + * owner of it + */ + lock_page(newpage); + put_page(newpage); + + lru_cache_add_file(newpage); + + /* finally release the old page and swap pointers */ + unlock_page(oldpage); + page_cache_release(oldpage); + page = newpage; + } +#endif + page_cache_get(page); req->pages[req->num_pages] = page; req->page_descs[req->num_pages].length = PAGE_SIZE; @@ -936,8 +991,10 @@ out: static ssize_t fuse_file_read_iter(struct kiocb *iocb, struct iov_iter *to) { + ssize_t ret_val; struct inode *inode = iocb->ki_filp->f_mapping->host; struct fuse_conn *fc = get_fuse_conn(inode); + struct fuse_file *ff = iocb->ki_filp->private_data; /* * In auto invalidate mode, always update attributes on read. @@ -952,7 +1009,12 @@ static ssize_t fuse_file_read_iter(struct kiocb *iocb, struct iov_iter *to) return err; } - return generic_file_read_iter(iocb, to); + if (ff && ff->passthrough_enabled && ff->passthrough_filp) + ret_val = fuse_passthrough_read_iter(iocb, to); + else + ret_val = generic_file_read_iter(iocb, to); + + return ret_val; } static void fuse_write_fill(struct fuse_req *req, struct fuse_file *ff, @@ -1184,6 +1246,7 @@ static ssize_t fuse_perform_write(struct file *file, static ssize_t fuse_file_write_iter(struct kiocb *iocb, struct iov_iter *from) { struct file *file = iocb->ki_filp; + struct fuse_file *ff = file->private_data; struct address_space *mapping = file->f_mapping; ssize_t written = 0; ssize_t written_buffered = 0; @@ -1217,8 +1280,14 @@ static ssize_t fuse_file_write_iter(struct kiocb *iocb, struct iov_iter *from) if (err) goto out; + if (ff && ff->passthrough_enabled && ff->passthrough_filp) { + written = fuse_passthrough_write_iter(iocb, from); + goto out; + } + if (iocb->ki_flags & IOCB_DIRECT) { loff_t pos = iocb->ki_pos; + written = generic_file_direct_write(iocb, from, pos); if (written < 0 || !iov_iter_count(from)) goto out; @@ -2090,6 +2159,9 @@ static const struct vm_operations_struct fuse_file_vm_ops = { static int fuse_file_mmap(struct file *file, struct vm_area_struct *vma) { + struct fuse_file *ff = file->private_data; + + ff->passthrough_enabled = 0; if ((vma->vm_flags & VM_SHARED) && (vma->vm_flags & VM_MAYWRITE)) fuse_link_write_file(file); @@ -2100,6 +2172,9 @@ static int fuse_file_mmap(struct file *file, struct vm_area_struct *vma) static int fuse_direct_mmap(struct file *file, struct vm_area_struct *vma) { + struct fuse_file *ff = file->private_data; + + ff->passthrough_enabled = 0; /* Can't provide the coherency needed for MAP_SHARED */ if (vma->vm_flags & VM_MAYSHARE) return -ENODEV; diff --git a/fs/fuse/fuse_i.h b/fs/fuse/fuse_i.h index cc5b053c4c04..411dcdec3db9 100644 --- a/fs/fuse/fuse_i.h +++ b/fs/fuse/fuse_i.h @@ -158,6 +158,10 @@ struct fuse_file { /** Has flock been performed on this file? */ bool flock:1; + + /* the read write file */ + struct file *passthrough_filp; + bool passthrough_enabled; }; /** One input argument of a request */ @@ -237,6 +241,7 @@ struct fuse_args { unsigned argvar:1; unsigned numargs; struct fuse_arg args[2]; + struct file *passthrough_filp; } out; }; @@ -387,6 +392,9 @@ struct fuse_req { /** Request is stolen from fuse_file->reserved_req */ struct file *stolen_file; + + /** fuse passthrough file */ + struct file *passthrough_filp; }; struct fuse_iqueue { @@ -544,6 +552,9 @@ struct fuse_conn { /** write-back cache policy (default is write-through) */ unsigned writeback_cache:1; + /** passthrough IO. */ + unsigned passthrough:1; + /* * The following bitfields are only for optimization purposes * and hence races in setting them will not cause malfunction diff --git a/fs/fuse/fuse_passthrough.h b/fs/fuse/fuse_passthrough.h new file mode 100644 index 000000000000..62f12c12ffec --- /dev/null +++ b/fs/fuse/fuse_passthrough.h @@ -0,0 +1,31 @@ +/* + * Copyright (c) 2015-2016, The Linux Foundation. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 and + * only version 2 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + */ + +#ifndef _FS_FUSE_PASSTHROUGH_H +#define _FS_FUSE_PASSTHROUGH_H + +#include "fuse_i.h" + +#include <linux/fuse.h> +#include <linux/file.h> + +void fuse_setup_passthrough(struct fuse_conn *fc, struct fuse_req *req); + +ssize_t fuse_passthrough_read_iter(struct kiocb *iocb, struct iov_iter *to); + +ssize_t fuse_passthrough_write_iter(struct kiocb *iocb, struct iov_iter *from); + +void fuse_passthrough_release(struct fuse_file *ff); + +#endif /* _FS_FUSE_PASSTHROUGH_H */ diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c index 4b2eb65be0d4..ca9c492a1885 100644 --- a/fs/fuse/inode.c +++ b/fs/fuse/inode.c @@ -857,6 +857,7 @@ static void process_init_reply(struct fuse_conn *fc, struct fuse_req *req) fc->conn_error = 1; else { unsigned long ra_pages; + struct super_block *sb = fc->sb; process_init_limits(fc, arg); @@ -895,6 +896,13 @@ static void process_init_reply(struct fuse_conn *fc, struct fuse_req *req) fc->async_dio = 1; if (arg->flags & FUSE_WRITEBACK_CACHE) fc->writeback_cache = 1; + if (arg->flags & FUSE_PASSTHROUGH) { + fc->passthrough = 1; + /* Prevent further stacking */ + sb->s_stack_depth = FILESYSTEM_MAX_STACK_DEPTH; + pr_info("FUSE: Pass through is enabled [%s : %d]!\n", + current->comm, current->pid); + } if (arg->time_gran && arg->time_gran <= 1000000000) fc->sb->s_time_gran = arg->time_gran; } else { diff --git a/fs/fuse/passthrough.c b/fs/fuse/passthrough.c new file mode 100644 index 000000000000..785af63acabd --- /dev/null +++ b/fs/fuse/passthrough.c @@ -0,0 +1,132 @@ +/* + * Copyright (c) 2015-2016, The Linux Foundation. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 and + * only version 2 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + */ + +#include "fuse_passthrough.h" + +#include <linux/aio.h> +#include <linux/fs_stack.h> + +void fuse_setup_passthrough(struct fuse_conn *fc, struct fuse_req *req) +{ + int daemon_fd, fs_stack_depth; + unsigned open_out_index; + struct file *passthrough_filp; + struct inode *passthrough_inode; + struct super_block *passthrough_sb; + struct fuse_open_out *open_out; + + req->passthrough_filp = NULL; + + if (!(fc->passthrough)) + return; + + if ((req->in.h.opcode != FUSE_OPEN) && + (req->in.h.opcode != FUSE_CREATE)) + return; + + open_out_index = req->in.numargs - 1; + + BUG_ON(open_out_index != 0 && open_out_index != 1); + BUG_ON(req->out.args[open_out_index].size != sizeof(*open_out)); + + open_out = req->out.args[open_out_index].value; + + daemon_fd = (int)open_out->passthrough_fd; + if (daemon_fd < 0) + return; + + passthrough_filp = fget_raw(daemon_fd); + if (!passthrough_filp) + return; + + passthrough_inode = file_inode(passthrough_filp); + passthrough_sb = passthrough_inode->i_sb; + fs_stack_depth = passthrough_sb->s_stack_depth + 1; + + /* If we reached the stacking limit go through regular io */ + if (fs_stack_depth > FILESYSTEM_MAX_STACK_DEPTH) { + /* Release the passthrough file. */ + fput(passthrough_filp); + pr_err("FUSE: maximum fs stacking depth exceeded, cannot use passthrough for this file\n"); + return; + } + req->passthrough_filp = passthrough_filp; +} + +static ssize_t fuse_passthrough_read_write_iter(struct kiocb *iocb, + struct iov_iter *iter, int do_write) +{ + ssize_t ret_val; + struct fuse_file *ff; + struct file *fuse_file, *passthrough_filp; + struct inode *fuse_inode, *passthrough_inode; + struct fuse_conn *fc; + + ff = iocb->ki_filp->private_data; + fuse_file = iocb->ki_filp; + passthrough_filp = ff->passthrough_filp; + fc = ff->fc; + + /* lock passthrough file to prevent it from being released */ + get_file(passthrough_filp); + iocb->ki_filp = passthrough_filp; + fuse_inode = fuse_file->f_path.dentry->d_inode; + passthrough_inode = file_inode(passthrough_filp); + + if (do_write) { + if (!passthrough_filp->f_op->write_iter) + return -EIO; + ret_val = passthrough_filp->f_op->write_iter(iocb, iter); + + if (ret_val >= 0 || ret_val == -EIOCBQUEUED) { + spin_lock(&fc->lock); + fsstack_copy_inode_size(fuse_inode, passthrough_inode); + spin_unlock(&fc->lock); + fsstack_copy_attr_times(fuse_inode, passthrough_inode); + } + } else { + if (!passthrough_filp->f_op->read_iter) + return -EIO; + ret_val = passthrough_filp->f_op->read_iter(iocb, iter); + if (ret_val >= 0 || ret_val == -EIOCBQUEUED) + fsstack_copy_attr_atime(fuse_inode, passthrough_inode); + } + + iocb->ki_filp = fuse_file; + + /* unlock passthrough file */ + fput(passthrough_filp); + + return ret_val; +} + +ssize_t fuse_passthrough_read_iter(struct kiocb *iocb, struct iov_iter *to) +{ + return fuse_passthrough_read_write_iter(iocb, to, 0); +} + +ssize_t fuse_passthrough_write_iter(struct kiocb *iocb, struct iov_iter *from) +{ + return fuse_passthrough_read_write_iter(iocb, from, 1); +} + +void fuse_passthrough_release(struct fuse_file *ff) +{ + if (!(ff->passthrough_filp)) + return; + + /* Release the passthrough file. */ + fput(ff->passthrough_filp); + ff->passthrough_filp = NULL; +} diff --git a/fs/gfs2/acl.c b/fs/gfs2/acl.c index ff0ac96a8e7b..db4c867369b5 100644 --- a/fs/gfs2/acl.c +++ b/fs/gfs2/acl.c @@ -78,8 +78,11 @@ int gfs2_set_acl(struct inode *inode, struct posix_acl *acl, int type) if (type == ACL_TYPE_ACCESS) { umode_t mode = inode->i_mode; - + struct posix_acl *old_acl = acl; error = posix_acl_update_mode(inode, &inode->i_mode, &acl); + + if (!acl) + posix_acl_release(old_acl); if (error) return error; if (mode != inode->i_mode) diff --git a/fs/gfs2/aops.c b/fs/gfs2/aops.c index 1caee0534587..582ef53f2104 100644 --- a/fs/gfs2/aops.c +++ b/fs/gfs2/aops.c @@ -249,22 +249,6 @@ static int gfs2_write_jdata_pagevec(struct address_space *mapping, for(i = 0; i < nr_pages; i++) { struct page *page = pvec->pages[i]; - /* - * At this point, the page may be truncated or - * invalidated (changing page->mapping to NULL), or - * even swizzled back from swapper_space to tmpfs file - * mapping. However, page->index will not change - * because we have a reference on the page. - */ - if (page->index > end) { - /* - * can't be range_cyclic (1st pass) because - * end == -1 in that case. - */ - ret = 1; - break; - } - *done_index = page->index; lock_page(page); @@ -382,8 +366,8 @@ retry: tag_pages_for_writeback(mapping, index, end); done_index = index; while (!done && (index <= end)) { - nr_pages = pagevec_lookup_tag(&pvec, mapping, &index, tag, - min(end - index, (pgoff_t)PAGEVEC_SIZE-1) + 1); + nr_pages = pagevec_lookup_range_tag(&pvec, mapping, &index, end, + tag); if (nr_pages == 0) break; diff --git a/fs/gfs2/ops_fstype.c b/fs/gfs2/ops_fstype.c index 8ed2b1a71637..d66790fd702f 100644 --- a/fs/gfs2/ops_fstype.c +++ b/fs/gfs2/ops_fstype.c @@ -1226,7 +1226,7 @@ static int set_gfs2_super(struct super_block *s, void *data) * We set the bdi here to the queue backing, file systems can * overwrite this in ->fill_super() */ - s->s_bdi = &bdev_get_queue(s->s_bdev)->backing_dev_info; + s->s_bdi = bdev_get_queue(s->s_bdev)->backing_dev_info; return 0; } diff --git a/fs/inode.c b/fs/inode.c index 3da76119d937..28c8c265f2f6 100644 --- a/fs/inode.c +++ b/fs/inode.c @@ -633,6 +633,7 @@ again: dispose_list(&dispose); } +EXPORT_SYMBOL_GPL(evict_inodes); /** * invalidate_inodes - attempt to free all inodes on a superblock @@ -2052,3 +2053,27 @@ void inode_nohighmem(struct inode *inode) mapping_set_gfp_mask(inode->i_mapping, GFP_USER); } EXPORT_SYMBOL(inode_nohighmem); + +/* + * Generic function to check FS_IOC_SETFLAGS values and reject any invalid + * configurations. + * + * Note: the caller should be holding i_mutex, or else be sure that they have + * exclusive access to the inode structure. + */ +int vfs_ioc_setflags_prepare(struct inode *inode, unsigned int oldflags, + unsigned int flags) +{ + /* + * The IMMUTABLE and APPEND_ONLY flags can only be changed by + * the relevant capability. + * + * This test looks nicer. Thanks to Pauline Middelink + */ + if ((flags ^ oldflags) & (FS_APPEND_FL | FS_IMMUTABLE_FL) && + !capable(CAP_LINUX_IMMUTABLE)) + return -EPERM; + + return 0; +} +EXPORT_SYMBOL(vfs_ioc_setflags_prepare); diff --git a/fs/internal.h b/fs/internal.h index 6387b35a1c0d..0ec681176ae9 100644 --- a/fs/internal.h +++ b/fs/internal.h @@ -123,7 +123,6 @@ extern void inode_add_lru(struct inode *inode); extern void inode_io_list_del(struct inode *inode); extern long get_nr_dirty_inodes(void); -extern void evict_inodes(struct super_block *); extern int invalidate_inodes(struct super_block *, bool); /* @@ -153,3 +152,29 @@ extern void mnt_pin_kill(struct mount *m); * fs/nsfs.c */ extern struct dentry_operations ns_dentry_operations; + +#ifdef CONFIG_FILE_TABLE_DEBUG +void global_filetable_print_warning_once(void); +void global_filetable_add(struct file *filp); +void global_filetable_del(struct file *filp); +void global_filetable_delayed_print(struct mount *mnt); + +#else /* i.e NOT CONFIG_FILE_TABLE_DEBUG */ + +static inline void global_filetable_print_warning_once(void) +{ +} + +static inline void global_filetable_add(struct file *filp) +{ +} + +static inline void global_filetable_del(struct file *filp) +{ +} + +static inline void global_filetable_delayed_print(struct mount *mnt) +{ +} + +#endif /* CONFIG_FILE_TABLE_DEBUG */ diff --git a/fs/jffs2/dir.c b/fs/jffs2/dir.c index 7a3368929245..6f467642d530 100644 --- a/fs/jffs2/dir.c +++ b/fs/jffs2/dir.c @@ -207,7 +207,8 @@ static int jffs2_create(struct inode *dir_i, struct dentry *dentry, __func__, inode->i_ino, inode->i_mode, inode->i_nlink, f->inocache->pino_nlink, inode->i_mapping->nrpages); - d_instantiate_new(dentry, inode); + unlock_new_inode(inode); + d_instantiate(dentry, inode); return 0; fail: @@ -427,7 +428,8 @@ static int jffs2_symlink (struct inode *dir_i, struct dentry *dentry, const char mutex_unlock(&dir_f->sem); jffs2_complete_reservation(c); - d_instantiate_new(dentry, inode); + unlock_new_inode(inode); + d_instantiate(dentry, inode); return 0; fail: @@ -571,7 +573,8 @@ static int jffs2_mkdir (struct inode *dir_i, struct dentry *dentry, umode_t mode mutex_unlock(&dir_f->sem); jffs2_complete_reservation(c); - d_instantiate_new(dentry, inode); + unlock_new_inode(inode); + d_instantiate(dentry, inode); return 0; fail: @@ -746,7 +749,8 @@ static int jffs2_mknod (struct inode *dir_i, struct dentry *dentry, umode_t mode mutex_unlock(&dir_f->sem); jffs2_complete_reservation(c); - d_instantiate_new(dentry, inode); + unlock_new_inode(inode); + d_instantiate(dentry, inode); return 0; fail: diff --git a/fs/jfs/namei.c b/fs/jfs/namei.c index f217ae750adb..9d7551f5c32a 100644 --- a/fs/jfs/namei.c +++ b/fs/jfs/namei.c @@ -178,7 +178,8 @@ static int jfs_create(struct inode *dip, struct dentry *dentry, umode_t mode, unlock_new_inode(ip); iput(ip); } else { - d_instantiate_new(dentry, ip); + unlock_new_inode(ip); + d_instantiate(dentry, ip); } out2: @@ -312,7 +313,8 @@ static int jfs_mkdir(struct inode *dip, struct dentry *dentry, umode_t mode) unlock_new_inode(ip); iput(ip); } else { - d_instantiate_new(dentry, ip); + unlock_new_inode(ip); + d_instantiate(dentry, ip); } out2: @@ -1056,7 +1058,8 @@ static int jfs_symlink(struct inode *dip, struct dentry *dentry, unlock_new_inode(ip); iput(ip); } else { - d_instantiate_new(dentry, ip); + unlock_new_inode(ip); + d_instantiate(dentry, ip); } out2: @@ -1440,7 +1443,8 @@ static int jfs_mknod(struct inode *dir, struct dentry *dentry, unlock_new_inode(ip); iput(ip); } else { - d_instantiate_new(dentry, ip); + unlock_new_inode(ip); + d_instantiate(dentry, ip); } out1: diff --git a/fs/mbcache.c b/fs/mbcache.c index 187477ded6b3..de509271d031 100644 --- a/fs/mbcache.c +++ b/fs/mbcache.c @@ -222,8 +222,19 @@ __mb_cache_entry_release(struct mb_cache_entry *ce) * then reacquire the lock in the proper order. */ spin_lock(&mb_cache_spinlock); - if (list_empty(&ce->e_lru_list)) - list_add_tail(&ce->e_lru_list, &mb_cache_lru_list); + /* + * Evaluate the conditions under global lock mb_cache_spinlock, + * to check if mb_cache_entry_get() is running now + * and has already deleted the entry from mb_cache_lru_list + * and incremented ce->e_refcnt to prevent further additions + * to mb_cache_lru_list. + */ + if (!(ce->e_used || ce->e_queued || + atomic_read(&ce->e_refcnt))) { + if (list_empty(&ce->e_lru_list)) + list_add_tail(&ce->e_lru_list, + &mb_cache_lru_list); + } spin_unlock(&mb_cache_spinlock); } __spin_unlock_mb_cache_entry(ce); @@ -262,7 +273,6 @@ mb_cache_shrink_scan(struct shrinker *shrink, struct shrink_control *sc) list_del_init(&ce->e_lru_list); if (ce->e_used || ce->e_queued || atomic_read(&ce->e_refcnt)) continue; - spin_unlock(&mb_cache_spinlock); /* Prevent any find or get operation on the entry */ hlist_bl_lock(ce->e_block_hash_p); hlist_bl_lock(ce->e_index_hash_p); @@ -271,10 +281,10 @@ mb_cache_shrink_scan(struct shrinker *shrink, struct shrink_control *sc) !list_empty(&ce->e_lru_list)) { hlist_bl_unlock(ce->e_index_hash_p); hlist_bl_unlock(ce->e_block_hash_p); - spin_lock(&mb_cache_spinlock); continue; } __mb_cache_entry_unhash_unlock(ce); + spin_unlock(&mb_cache_spinlock); list_add_tail(&ce->e_lru_list, &free_list); spin_lock(&mb_cache_spinlock); } @@ -516,7 +526,6 @@ mb_cache_entry_alloc(struct mb_cache *cache, gfp_t gfp_flags) if (ce->e_used || ce->e_queued || atomic_read(&ce->e_refcnt)) continue; - spin_unlock(&mb_cache_spinlock); /* * Prevent any find or get operation on the * entry. @@ -530,13 +539,13 @@ mb_cache_entry_alloc(struct mb_cache *cache, gfp_t gfp_flags) hlist_bl_unlock(ce->e_index_hash_p); hlist_bl_unlock(ce->e_block_hash_p); l = &mb_cache_lru_list; - spin_lock(&mb_cache_spinlock); continue; } mb_assert(list_empty(&ce->e_lru_list)); mb_assert(!(ce->e_used || ce->e_queued || atomic_read(&ce->e_refcnt))); __mb_cache_entry_unhash_unlock(ce); + spin_unlock(&mb_cache_spinlock); goto found; } } @@ -670,6 +679,7 @@ mb_cache_entry_get(struct mb_cache *cache, struct block_device *bdev, cache->c_bucket_bits); block_hash_p = &cache->c_block_hash[bucket]; /* First serialize access to the block corresponding hash chain. */ + spin_lock(&mb_cache_spinlock); hlist_bl_lock(block_hash_p); hlist_bl_for_each_entry(ce, l, block_hash_p, e_block_list) { mb_assert(ce->e_block_hash_p == block_hash_p); @@ -678,9 +688,11 @@ mb_cache_entry_get(struct mb_cache *cache, struct block_device *bdev, * Prevent a free from removing the entry. */ atomic_inc(&ce->e_refcnt); + if (!list_empty(&ce->e_lru_list)) + list_del_init(&ce->e_lru_list); hlist_bl_unlock(block_hash_p); + spin_unlock(&mb_cache_spinlock); __spin_lock_mb_cache_entry(ce); - atomic_dec(&ce->e_refcnt); if (ce->e_used > 0) { DEFINE_WAIT(wait); while (ce->e_used > 0) { @@ -695,13 +707,9 @@ mb_cache_entry_get(struct mb_cache *cache, struct block_device *bdev, finish_wait(&mb_cache_queue, &wait); } ce->e_used += 1 + MB_CACHE_WRITER; + atomic_dec(&ce->e_refcnt); __spin_unlock_mb_cache_entry(ce); - if (!list_empty(&ce->e_lru_list)) { - spin_lock(&mb_cache_spinlock); - list_del_init(&ce->e_lru_list); - spin_unlock(&mb_cache_spinlock); - } if (!__mb_cache_entry_is_block_hashed(ce)) { __mb_cache_entry_release(ce); return NULL; @@ -710,6 +718,7 @@ mb_cache_entry_get(struct mb_cache *cache, struct block_device *bdev, } } hlist_bl_unlock(block_hash_p); + spin_unlock(&mb_cache_spinlock); return NULL; } diff --git a/fs/namei.c b/fs/namei.c index 3a8f1327149f..fe1612ac009d 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -40,6 +40,9 @@ #include "internal.h" #include "mount.h" +#define CREATE_TRACE_POINTS +#include <trace/events/namei.h> + /* [Feb-1997 T. Schoebel-Theuer] * Fundamental changes in the pathname lookup mechanisms (namei) * were necessary because of omirr. The reason is that omirr needs @@ -784,6 +787,81 @@ static inline int d_revalidate(struct dentry *dentry, unsigned int flags) return dentry->d_op->d_revalidate(dentry, flags); } +#define INIT_PATH_SIZE 64 + +static void success_walk_trace(struct nameidata *nd) +{ + struct path *pt = &nd->path; + struct inode *i = nd->inode; + char buf[INIT_PATH_SIZE], *try_buf; + int cur_path_size; + char *p; + + /* When eBPF/ tracepoint is disabled, keep overhead low. */ + if (!trace_inodepath_enabled()) + return; + + /* First try stack allocated buffer. */ + try_buf = buf; + cur_path_size = INIT_PATH_SIZE; + + while (cur_path_size <= PATH_MAX) { + /* Free previous heap allocation if we are now trying + * a second or later heap allocation. + */ + if (try_buf != buf) + kfree(try_buf); + + /* All but the first alloc are on the heap. */ + if (cur_path_size != INIT_PATH_SIZE) { + try_buf = kmalloc(cur_path_size, GFP_KERNEL); + if (!try_buf) { + try_buf = buf; + sprintf(try_buf, "error:buf_alloc_failed"); + break; + } + } + + p = d_path(pt, try_buf, cur_path_size); + + if (!IS_ERR(p)) { + char *end = mangle_path(try_buf, p, "\n"); + + if (end) { + try_buf[end - try_buf] = 0; + break; + } else { + /* On mangle errors, double path size + * till PATH_MAX. + */ + cur_path_size = cur_path_size << 1; + continue; + } + } + + if (PTR_ERR(p) == -ENAMETOOLONG) { + /* If d_path complains that name is too long, + * then double path size till PATH_MAX. + */ + cur_path_size = cur_path_size << 1; + continue; + } + + sprintf(try_buf, "error:d_path_failed_%lu", + -1 * PTR_ERR(p)); + break; + } + + if (cur_path_size > PATH_MAX) + sprintf(try_buf, "error:d_path_name_too_long"); + + trace_inodepath(i, try_buf); + + if (try_buf != buf) + kfree(try_buf); + return; +} + /** * complete_walk - successful completion of path walk * @nd: pointer nameidata @@ -806,15 +884,21 @@ static int complete_walk(struct nameidata *nd) return -ECHILD; } - if (likely(!(nd->flags & LOOKUP_JUMPED))) + if (likely(!(nd->flags & LOOKUP_JUMPED))) { + success_walk_trace(nd); return 0; + } - if (likely(!(dentry->d_flags & DCACHE_OP_WEAK_REVALIDATE))) + if (likely(!(dentry->d_flags & DCACHE_OP_WEAK_REVALIDATE))) { + success_walk_trace(nd); return 0; + } status = dentry->d_op->d_weak_revalidate(dentry, nd->flags); - if (status > 0) + if (status > 0) { + success_walk_trace(nd); return 0; + } if (!status) status = -ESTALE; @@ -2734,8 +2818,14 @@ int vfs_create2(struct vfsmount *mnt, struct inode *dir, struct dentry *dentry, if (error) return error; error = dir->i_op->create(dir, dentry, mode, want_excl); + if (error) + return error; + error = security_inode_post_create(dir, dentry, mode); + if (error) + return error; if (!error) fsnotify_create(dir, dentry); + return error; } EXPORT_SYMBOL(vfs_create2); @@ -3428,6 +3518,8 @@ out2: error = -ESTALE; } file = ERR_PTR(error); + } else { + global_filetable_add(file); } return file; } @@ -3595,8 +3687,16 @@ int vfs_mknod2(struct vfsmount *mnt, struct inode *dir, struct dentry *dentry, u return error; error = dir->i_op->mknod(dir, dentry, mode, dev); + if (error) + return error; + + error = security_inode_post_create(dir, dentry, mode); + if (error) + return error; + if (!error) fsnotify_create(dir, dentry); + return error; } EXPORT_SYMBOL(vfs_mknod2); diff --git a/fs/namespace.c b/fs/namespace.c index e7f6b482bf7e..58c6f27b141d 100644 --- a/fs/namespace.c +++ b/fs/namespace.c @@ -1625,6 +1625,8 @@ static int do_umount(struct mount *mnt, int flags) out: unlock_mount_hash(); namespace_unlock(); + if (retval == -EBUSY) + global_filetable_delayed_print(mnt); return retval; } diff --git a/fs/nilfs2/btree.c b/fs/nilfs2/btree.c index 3a3821b00486..9deca59be7e5 100644 --- a/fs/nilfs2/btree.c +++ b/fs/nilfs2/btree.c @@ -2147,8 +2147,8 @@ static void nilfs_btree_lookup_dirty_buffers(struct nilfs_bmap *btree, pagevec_init(&pvec, 0); - while (pagevec_lookup_tag(&pvec, btcache, &index, PAGECACHE_TAG_DIRTY, - PAGEVEC_SIZE)) { + while (pagevec_lookup_tag(&pvec, btcache, &index, + PAGECACHE_TAG_DIRTY)) { for (i = 0; i < pagevec_count(&pvec); i++) { bh = head = page_buffers(pvec.pages[i]); do { diff --git a/fs/nilfs2/namei.c b/fs/nilfs2/namei.c index cd7f5b0abe84..c9a1a491aa91 100644 --- a/fs/nilfs2/namei.c +++ b/fs/nilfs2/namei.c @@ -50,7 +50,8 @@ static inline int nilfs_add_nondir(struct dentry *dentry, struct inode *inode) { int err = nilfs_add_link(dentry, inode); if (!err) { - d_instantiate_new(dentry, inode); + d_instantiate(dentry, inode); + unlock_new_inode(inode); return 0; } inode_dec_link_count(inode); @@ -245,7 +246,8 @@ static int nilfs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode) goto out_fail; nilfs_mark_inode_dirty(inode); - d_instantiate_new(dentry, inode); + d_instantiate(dentry, inode); + unlock_new_inode(inode); out: if (!err) err = nilfs_transaction_commit(dir->i_sb); diff --git a/fs/nilfs2/page.c b/fs/nilfs2/page.c index 45d650addd56..447999563737 100644 --- a/fs/nilfs2/page.c +++ b/fs/nilfs2/page.c @@ -262,8 +262,7 @@ int nilfs_copy_dirty_pages(struct address_space *dmap, pagevec_init(&pvec, 0); repeat: - if (!pagevec_lookup_tag(&pvec, smap, &index, PAGECACHE_TAG_DIRTY, - PAGEVEC_SIZE)) + if (!pagevec_lookup_tag(&pvec, smap, &index, PAGECACHE_TAG_DIRTY)) return 0; for (i = 0; i < pagevec_count(&pvec); i++) { @@ -382,8 +381,8 @@ void nilfs_clear_dirty_pages(struct address_space *mapping, bool silent) pagevec_init(&pvec, 0); - while (pagevec_lookup_tag(&pvec, mapping, &index, PAGECACHE_TAG_DIRTY, - PAGEVEC_SIZE)) { + while (pagevec_lookup_tag(&pvec, mapping, &index, + PAGECACHE_TAG_DIRTY)) { for (i = 0; i < pagevec_count(&pvec); i++) { struct page *page = pvec.pages[i]; diff --git a/fs/nilfs2/segment.c b/fs/nilfs2/segment.c index d58c0c62b2ae..b174508d8766 100644 --- a/fs/nilfs2/segment.c +++ b/fs/nilfs2/segment.c @@ -705,18 +705,14 @@ static size_t nilfs_lookup_dirty_data_buffers(struct inode *inode, pagevec_init(&pvec, 0); repeat: if (unlikely(index > last) || - !pagevec_lookup_tag(&pvec, mapping, &index, PAGECACHE_TAG_DIRTY, - min_t(pgoff_t, last - index, - PAGEVEC_SIZE - 1) + 1)) + !pagevec_lookup_range_tag(&pvec, mapping, &index, last, + PAGECACHE_TAG_DIRTY)) return ndirties; for (i = 0; i < pagevec_count(&pvec); i++) { struct buffer_head *bh, *head; struct page *page = pvec.pages[i]; - if (unlikely(page->index > last)) - break; - lock_page(page); if (!page_has_buffers(page)) create_empty_buffers(page, i_blocksize(inode), 0); @@ -753,8 +749,8 @@ static void nilfs_lookup_dirty_node_buffers(struct inode *inode, pagevec_init(&pvec, 0); - while (pagevec_lookup_tag(&pvec, mapping, &index, PAGECACHE_TAG_DIRTY, - PAGEVEC_SIZE)) { + while (pagevec_lookup_tag(&pvec, mapping, &index, + PAGECACHE_TAG_DIRTY)) { for (i = 0; i < pagevec_count(&pvec); i++) { bh = head = page_buffers(pvec.pages[i]); do { diff --git a/fs/nilfs2/super.c b/fs/nilfs2/super.c index 354013ea22ec..67c6c650b21e 100644 --- a/fs/nilfs2/super.c +++ b/fs/nilfs2/super.c @@ -1079,7 +1079,7 @@ nilfs_fill_super(struct super_block *sb, void *data, int silent) sb->s_time_gran = 1; sb->s_max_links = NILFS_LINK_MAX; - sb->s_bdi = &bdev_get_queue(sb->s_bdev)->backing_dev_info; + sb->s_bdi = bdev_get_queue(sb->s_bdev)->backing_dev_info; err = load_nilfs(nilfs, sb); if (err) diff --git a/fs/overlayfs/copy_up.c b/fs/overlayfs/copy_up.c index 3972ac87a8cb..3189a32d8fa3 100644 --- a/fs/overlayfs/copy_up.c +++ b/fs/overlayfs/copy_up.c @@ -357,7 +357,7 @@ int ovl_copy_up_one(struct dentry *parent, struct dentry *dentry, } out_unlock: unlock_rename(workdir, upperdir); - revert_creds(old_cred); + ovl_revert_creds(old_cred); if (link) free_page((unsigned long) link); diff --git a/fs/overlayfs/dir.c b/fs/overlayfs/dir.c index eedacae889b9..953c88dd6519 100644 --- a/fs/overlayfs/dir.c +++ b/fs/overlayfs/dir.c @@ -407,7 +407,7 @@ static int ovl_create_or_link(struct dentry *dentry, int mode, dev_t rdev, if (!ovl_dentry_is_opaque(dentry)) { err = ovl_create_upper(dentry, inode, &stat, link, hardlink); } else { - const struct cred *old_cred; + const struct cred *old_cred, *hold_cred = NULL; struct cred *override_cred; old_cred = ovl_override_creds(dentry->d_sb); @@ -415,15 +415,22 @@ static int ovl_create_or_link(struct dentry *dentry, int mode, dev_t rdev, err = -ENOMEM; override_cred = prepare_creds(); if (override_cred) { - override_cred->fsuid = old_cred->fsuid; - override_cred->fsgid = old_cred->fsgid; - put_cred(override_creds(override_cred)); + const struct cred *our_cred; + + our_cred = old_cred; + if (!our_cred) + our_cred = current_cred(); + override_cred->fsuid = our_cred->fsuid; + override_cred->fsgid = our_cred->fsgid; + hold_cred = override_creds(override_cred); put_cred(override_cred); err = ovl_create_over_whiteout(dentry, inode, &stat, link, hardlink); } - revert_creds(old_cred); + ovl_revert_creds(old_cred ?: hold_cred); + if (old_cred && hold_cred) + put_cred(hold_cred); } if (!err) @@ -657,7 +664,7 @@ static int ovl_do_remove(struct dentry *dentry, bool is_dir) err = ovl_remove_and_whiteout(dentry, is_dir); - revert_creds(old_cred); + ovl_revert_creds(old_cred); } out_drop_write: ovl_drop_write(dentry); @@ -896,8 +903,7 @@ out_dput_old: out_unlock: unlock_rename(new_upperdir, old_upperdir); out_revert_creds: - if (old_opaque || new_opaque) - revert_creds(old_cred); + ovl_revert_creds(old_cred); out_drop_write: ovl_drop_write(old); out: diff --git a/fs/overlayfs/overlayfs.h b/fs/overlayfs/overlayfs.h index 27a42975d7cd..0723b8f97651 100644 --- a/fs/overlayfs/overlayfs.h +++ b/fs/overlayfs/overlayfs.h @@ -151,6 +151,7 @@ bool ovl_dentry_is_opaque(struct dentry *dentry); void ovl_dentry_set_opaque(struct dentry *dentry, bool opaque); bool ovl_is_whiteout(struct dentry *dentry); const struct cred *ovl_override_creds(struct super_block *sb); +void ovl_revert_creds(const struct cred *oldcred); void ovl_dentry_update(struct dentry *dentry, struct dentry *upperdentry); struct dentry *ovl_lookup(struct inode *dir, struct dentry *dentry, unsigned int flags); diff --git a/fs/overlayfs/readdir.c b/fs/overlayfs/readdir.c index da999e73c97a..9ff8f8192bf1 100644 --- a/fs/overlayfs/readdir.c +++ b/fs/overlayfs/readdir.c @@ -223,7 +223,7 @@ static int ovl_check_whiteouts(struct dentry *dir, struct ovl_readdir_data *rdd) } mutex_unlock(&dir->d_inode->i_mutex); } - revert_creds(old_cred); + ovl_revert_creds(old_cred); return err; } diff --git a/fs/overlayfs/super.c b/fs/overlayfs/super.c index fa20c95bd456..48f7c1ab94cd 100644 --- a/fs/overlayfs/super.c +++ b/fs/overlayfs/super.c @@ -31,6 +31,7 @@ struct ovl_config { char *lowerdir; char *upperdir; char *workdir; + bool override_creds; }; /* private information held for overlayfs's superblock */ @@ -252,9 +253,17 @@ const struct cred *ovl_override_creds(struct super_block *sb) { struct ovl_fs *ofs = sb->s_fs_info; + if (!ofs->config.override_creds) + return NULL; return override_creds(ofs->creator_cred); } +void ovl_revert_creds(const struct cred *old_cred) +{ + if (old_cred) + revert_creds(old_cred); +} + static bool ovl_is_opaquedir(struct dentry *dentry) { int res; @@ -626,6 +635,11 @@ static int ovl_statfs(struct dentry *dentry, struct kstatfs *buf) return err; } +static bool __read_mostly ovl_override_creds_def = true; +module_param_named(override_creds, ovl_override_creds_def, bool, 0644); +MODULE_PARM_DESC(ovl_override_creds_def, + "Use mounter's credentials for accesses"); + /** * ovl_show_options * @@ -642,6 +656,9 @@ static int ovl_show_options(struct seq_file *m, struct dentry *dentry) seq_show_option(m, "upperdir", ufs->config.upperdir); seq_show_option(m, "workdir", ufs->config.workdir); } + if (ufs->config.override_creds != ovl_override_creds_def) + seq_show_option(m, "override_creds", + ufs->config.override_creds ? "on" : "off"); return 0; } @@ -666,6 +683,8 @@ enum { OPT_LOWERDIR, OPT_UPPERDIR, OPT_WORKDIR, + OPT_OVERRIDE_CREDS_ON, + OPT_OVERRIDE_CREDS_OFF, OPT_ERR, }; @@ -673,6 +692,8 @@ static const match_table_t ovl_tokens = { {OPT_LOWERDIR, "lowerdir=%s"}, {OPT_UPPERDIR, "upperdir=%s"}, {OPT_WORKDIR, "workdir=%s"}, + {OPT_OVERRIDE_CREDS_ON, "override_creds=on"}, + {OPT_OVERRIDE_CREDS_OFF, "override_creds=off"}, {OPT_ERR, NULL} }; @@ -703,6 +724,7 @@ static int ovl_parse_opt(char *opt, struct ovl_config *config) { char *p; + config->override_creds = ovl_override_creds_def; while ((p = ovl_next_opt(&opt)) != NULL) { int token; substring_t args[MAX_OPT_ARGS]; @@ -733,6 +755,14 @@ static int ovl_parse_opt(char *opt, struct ovl_config *config) return -ENOMEM; break; + case OPT_OVERRIDE_CREDS_ON: + config->override_creds = true; + break; + + case OPT_OVERRIDE_CREDS_OFF: + config->override_creds = false; + break; + default: pr_err("overlayfs: unrecognized mount option \"%s\" or missing value\n", p); return -EINVAL; diff --git a/fs/proc/array.c b/fs/proc/array.c index 6238f45eed02..c4478abd1bef 100644 --- a/fs/proc/array.c +++ b/fs/proc/array.c @@ -169,51 +169,45 @@ static inline void task_state(struct seq_file *m, struct pid_namespace *ns, task_unlock(p); rcu_read_unlock(); - seq_printf(m, - "State:\t%s\n" - "Tgid:\t%d\n" - "Ngid:\t%d\n" - "Pid:\t%d\n" - "PPid:\t%d\n" - "TracerPid:\t%d\n" - "Uid:\t%d\t%d\t%d\t%d\n" - "Gid:\t%d\t%d\t%d\t%d\n" - "FDSize:\t%d\nGroups:\t", - get_task_state(p), - tgid, ngid, pid_nr_ns(pid, ns), ppid, tpid, - from_kuid_munged(user_ns, cred->uid), - from_kuid_munged(user_ns, cred->euid), - from_kuid_munged(user_ns, cred->suid), - from_kuid_munged(user_ns, cred->fsuid), - from_kgid_munged(user_ns, cred->gid), - from_kgid_munged(user_ns, cred->egid), - from_kgid_munged(user_ns, cred->sgid), - from_kgid_munged(user_ns, cred->fsgid), - max_fds); - + seq_printf(m, "State:\t%s", get_task_state(p)); + + seq_put_decimal_ull(m, "\nTgid:\t", tgid); + seq_put_decimal_ull(m, "\nPid:\t", pid_nr_ns(pid, ns)); + seq_put_decimal_ull(m, "\nPPid:\t", ppid); + seq_put_decimal_ull(m, "\nTracerPid:\t", tpid); + seq_put_decimal_ull(m, "\nUid:\t", from_kuid_munged(user_ns, cred->uid)); + seq_put_decimal_ull(m, "\t", from_kuid_munged(user_ns, cred->euid)); + seq_put_decimal_ull(m, "\t", from_kuid_munged(user_ns, cred->suid)); + seq_put_decimal_ull(m, "\t", from_kuid_munged(user_ns, cred->fsuid)); + seq_put_decimal_ull(m, "\nGid:\t", from_kgid_munged(user_ns, cred->gid)); + seq_put_decimal_ull(m, "\t", from_kgid_munged(user_ns, cred->egid)); + seq_put_decimal_ull(m, "\t", from_kgid_munged(user_ns, cred->sgid)); + seq_put_decimal_ull(m, "\t", from_kgid_munged(user_ns, cred->fsgid)); + seq_put_decimal_ull(m, "\nNgid:\t", ngid); + seq_put_decimal_ull(m, "\nFDSize:\t", max_fds); + + seq_puts(m, "\nGroups:\t"); group_info = cred->group_info; for (g = 0; g < group_info->ngroups; g++) - seq_printf(m, "%d ", - from_kgid_munged(user_ns, GROUP_AT(group_info, g))); + seq_put_decimal_ull(m, g ? " " : "", + from_kgid_munged(user_ns, GROUP_AT(group_info, g))); put_cred(cred); + /* Trailing space shouldn't have been added in the first place. */ + seq_putc(m, ' '); #ifdef CONFIG_PID_NS seq_puts(m, "\nNStgid:"); for (g = ns->level; g <= pid->level; g++) - seq_printf(m, "\t%d", - task_tgid_nr_ns(p, pid->numbers[g].ns)); + seq_put_decimal_ull(m, "\t", task_tgid_nr_ns(p, pid->numbers[g].ns)); seq_puts(m, "\nNSpid:"); for (g = ns->level; g <= pid->level; g++) - seq_printf(m, "\t%d", - task_pid_nr_ns(p, pid->numbers[g].ns)); + seq_put_decimal_ull(m, "\t", task_pid_nr_ns(p, pid->numbers[g].ns)); seq_puts(m, "\nNSpgid:"); for (g = ns->level; g <= pid->level; g++) - seq_printf(m, "\t%d", - task_pgrp_nr_ns(p, pid->numbers[g].ns)); + seq_put_decimal_ull(m, "\t", task_pgrp_nr_ns(p, pid->numbers[g].ns)); seq_puts(m, "\nNSsid:"); for (g = ns->level; g <= pid->level; g++) - seq_printf(m, "\t%d", - task_session_nr_ns(p, pid->numbers[g].ns)); + seq_put_decimal_ull(m, "\t", task_session_nr_ns(p, pid->numbers[g].ns)); #endif seq_putc(m, '\n'); } @@ -282,11 +276,12 @@ static inline void task_sig(struct seq_file *m, struct task_struct *p) unlock_task_sighand(p, &flags); } - seq_printf(m, "Threads:\t%d\n", num_threads); - seq_printf(m, "SigQ:\t%lu/%lu\n", qsize, qlim); + seq_put_decimal_ull(m, "Threads:\t", num_threads); + seq_put_decimal_ull(m, "\nSigQ:\t", qsize); + seq_put_decimal_ull(m, "/", qlim); /* render them all */ - render_sigset_t(m, "SigPnd:\t", &pending); + render_sigset_t(m, "\nSigPnd:\t", &pending); render_sigset_t(m, "ShdPnd:\t", &shpending); render_sigset_t(m, "SigBlk:\t", &blocked); render_sigset_t(m, "SigIgn:\t", &ignored); @@ -331,7 +326,8 @@ static inline void task_cap(struct seq_file *m, struct task_struct *p) static inline void task_seccomp(struct seq_file *m, struct task_struct *p) { #ifdef CONFIG_SECCOMP - seq_printf(m, "Seccomp:\t%d\n", p->seccomp.mode); + seq_put_decimal_ull(m, "Seccomp:\t", p->seccomp.mode); + seq_putc(m, '\n'); #endif seq_printf(m, "Speculation_Store_Bypass:\t"); switch (arch_prctl_spec_ctrl_get(p, PR_SPEC_STORE_BYPASS)) { @@ -363,10 +359,9 @@ static inline void task_seccomp(struct seq_file *m, struct task_struct *p) static inline void task_context_switch_counts(struct seq_file *m, struct task_struct *p) { - seq_printf(m, "voluntary_ctxt_switches:\t%lu\n" - "nonvoluntary_ctxt_switches:\t%lu\n", - p->nvcsw, - p->nivcsw); + seq_put_decimal_ull(m, "voluntary_ctxt_switches:\t", p->nvcsw); + seq_put_decimal_ull(m, "\nnonvoluntary_ctxt_switches:\t", p->nivcsw); + seq_putc(m, '\n'); } static void task_cpus_allowed(struct seq_file *m, struct task_struct *task) @@ -510,41 +505,41 @@ static int do_task_stat(struct seq_file *m, struct pid_namespace *ns, start_time = nsec_to_clock_t(task->real_start_time); seq_printf(m, "%d (%s) %c", pid_nr_ns(pid, ns), tcomm, state); - seq_put_decimal_ll(m, ' ', ppid); - seq_put_decimal_ll(m, ' ', pgid); - seq_put_decimal_ll(m, ' ', sid); - seq_put_decimal_ll(m, ' ', tty_nr); - seq_put_decimal_ll(m, ' ', tty_pgrp); - seq_put_decimal_ull(m, ' ', task->flags); - seq_put_decimal_ull(m, ' ', min_flt); - seq_put_decimal_ull(m, ' ', cmin_flt); - seq_put_decimal_ull(m, ' ', maj_flt); - seq_put_decimal_ull(m, ' ', cmaj_flt); - seq_put_decimal_ull(m, ' ', cputime_to_clock_t(utime)); - seq_put_decimal_ull(m, ' ', cputime_to_clock_t(stime)); - seq_put_decimal_ll(m, ' ', cputime_to_clock_t(cutime)); - seq_put_decimal_ll(m, ' ', cputime_to_clock_t(cstime)); - seq_put_decimal_ll(m, ' ', priority); - seq_put_decimal_ll(m, ' ', nice); - seq_put_decimal_ll(m, ' ', num_threads); - seq_put_decimal_ull(m, ' ', 0); - seq_put_decimal_ull(m, ' ', start_time); - seq_put_decimal_ull(m, ' ', vsize); - seq_put_decimal_ull(m, ' ', mm ? get_mm_rss(mm) : 0); - seq_put_decimal_ull(m, ' ', rsslim); - seq_put_decimal_ull(m, ' ', mm ? (permitted ? mm->start_code : 1) : 0); - seq_put_decimal_ull(m, ' ', mm ? (permitted ? mm->end_code : 1) : 0); - seq_put_decimal_ull(m, ' ', (permitted && mm) ? mm->start_stack : 0); - seq_put_decimal_ull(m, ' ', esp); - seq_put_decimal_ull(m, ' ', eip); + seq_put_decimal_ll(m, " ", ppid); + seq_put_decimal_ll(m, " ", pgid); + seq_put_decimal_ll(m, " ", sid); + seq_put_decimal_ll(m, " ", tty_nr); + seq_put_decimal_ll(m, " ", tty_pgrp); + seq_put_decimal_ull(m, " ", task->flags); + seq_put_decimal_ull(m, " ", min_flt); + seq_put_decimal_ull(m, " ", cmin_flt); + seq_put_decimal_ull(m, " ", maj_flt); + seq_put_decimal_ull(m, " ", cmaj_flt); + seq_put_decimal_ull(m, " ", cputime_to_clock_t(utime)); + seq_put_decimal_ull(m, " ", cputime_to_clock_t(stime)); + seq_put_decimal_ll(m, " ", cputime_to_clock_t(cutime)); + seq_put_decimal_ll(m, " ", cputime_to_clock_t(cstime)); + seq_put_decimal_ll(m, " ", priority); + seq_put_decimal_ll(m, " ", nice); + seq_put_decimal_ll(m, " ", num_threads); + seq_put_decimal_ull(m, " ", 0); + seq_put_decimal_ull(m, " ", start_time); + seq_put_decimal_ull(m, " ", vsize); + seq_put_decimal_ull(m, " ", mm ? get_mm_rss(mm) : 0); + seq_put_decimal_ull(m, " ", rsslim); + seq_put_decimal_ull(m, " ", mm ? (permitted ? mm->start_code : 1) : 0); + seq_put_decimal_ull(m, " ", mm ? (permitted ? mm->end_code : 1) : 0); + seq_put_decimal_ull(m, " ", (permitted && mm) ? mm->start_stack : 0); + seq_put_decimal_ull(m, " ", esp); + seq_put_decimal_ull(m, " ", eip); /* The signal information here is obsolete. * It must be decimal for Linux 2.0 compatibility. * Use /proc/#/status for real-time signals. */ - seq_put_decimal_ull(m, ' ', task->pending.signal.sig[0] & 0x7fffffffUL); - seq_put_decimal_ull(m, ' ', task->blocked.sig[0] & 0x7fffffffUL); - seq_put_decimal_ull(m, ' ', sigign.sig[0] & 0x7fffffffUL); - seq_put_decimal_ull(m, ' ', sigcatch.sig[0] & 0x7fffffffUL); + seq_put_decimal_ull(m, " ", task->pending.signal.sig[0] & 0x7fffffffUL); + seq_put_decimal_ull(m, " ", task->blocked.sig[0] & 0x7fffffffUL); + seq_put_decimal_ull(m, " ", sigign.sig[0] & 0x7fffffffUL); + seq_put_decimal_ull(m, " ", sigcatch.sig[0] & 0x7fffffffUL); /* * We used to output the absolute kernel address, but that's an @@ -558,31 +553,31 @@ static int do_task_stat(struct seq_file *m, struct pid_namespace *ns, else seq_puts(m, " 0"); - seq_put_decimal_ull(m, ' ', 0); - seq_put_decimal_ull(m, ' ', 0); - seq_put_decimal_ll(m, ' ', task->exit_signal); - seq_put_decimal_ll(m, ' ', task_cpu(task)); - seq_put_decimal_ull(m, ' ', task->rt_priority); - seq_put_decimal_ull(m, ' ', task->policy); - seq_put_decimal_ull(m, ' ', delayacct_blkio_ticks(task)); - seq_put_decimal_ull(m, ' ', cputime_to_clock_t(gtime)); - seq_put_decimal_ll(m, ' ', cputime_to_clock_t(cgtime)); + seq_put_decimal_ull(m, " ", 0); + seq_put_decimal_ull(m, " ", 0); + seq_put_decimal_ll(m, " ", task->exit_signal); + seq_put_decimal_ll(m, " ", task_cpu(task)); + seq_put_decimal_ull(m, " ", task->rt_priority); + seq_put_decimal_ull(m, " ", task->policy); + seq_put_decimal_ull(m, " ", delayacct_blkio_ticks(task)); + seq_put_decimal_ull(m, " ", cputime_to_clock_t(gtime)); + seq_put_decimal_ll(m, " ", cputime_to_clock_t(cgtime)); if (mm && permitted) { - seq_put_decimal_ull(m, ' ', mm->start_data); - seq_put_decimal_ull(m, ' ', mm->end_data); - seq_put_decimal_ull(m, ' ', mm->start_brk); - seq_put_decimal_ull(m, ' ', mm->arg_start); - seq_put_decimal_ull(m, ' ', mm->arg_end); - seq_put_decimal_ull(m, ' ', mm->env_start); - seq_put_decimal_ull(m, ' ', mm->env_end); + seq_put_decimal_ull(m, " ", mm->start_data); + seq_put_decimal_ull(m, " ", mm->end_data); + seq_put_decimal_ull(m, " ", mm->start_brk); + seq_put_decimal_ull(m, " ", mm->arg_start); + seq_put_decimal_ull(m, " ", mm->arg_end); + seq_put_decimal_ull(m, " ", mm->env_start); + seq_put_decimal_ull(m, " ", mm->env_end); } else - seq_printf(m, " 0 0 0 0 0 0 0"); + seq_puts(m, " 0 0 0 0 0 0 0"); if (permitted) - seq_put_decimal_ll(m, ' ', task->exit_code); + seq_put_decimal_ll(m, " ", task->exit_code); else - seq_put_decimal_ll(m, ' ', 0); + seq_puts(m, " 0"); seq_putc(m, '\n'); if (mm) @@ -618,13 +613,13 @@ int proc_pid_statm(struct seq_file *m, struct pid_namespace *ns, * seq_printf(m, "%lu %lu %lu %lu 0 %lu 0\n", * size, resident, shared, text, data); */ - seq_put_decimal_ull(m, 0, size); - seq_put_decimal_ull(m, ' ', resident); - seq_put_decimal_ull(m, ' ', shared); - seq_put_decimal_ull(m, ' ', text); - seq_put_decimal_ull(m, ' ', 0); - seq_put_decimal_ull(m, ' ', data); - seq_put_decimal_ull(m, ' ', 0); + seq_put_decimal_ull(m, "", size); + seq_put_decimal_ull(m, " ", resident); + seq_put_decimal_ull(m, " ", shared); + seq_put_decimal_ull(m, " ", text); + seq_put_decimal_ull(m, " ", 0); + seq_put_decimal_ull(m, " ", data); + seq_put_decimal_ull(m, " ", 0); seq_putc(m, '\n'); return 0; diff --git a/fs/proc/base.c b/fs/proc/base.c index 91d58005d9b9..8da1fc940a86 100644 --- a/fs/proc/base.c +++ b/fs/proc/base.c @@ -1047,15 +1047,20 @@ static ssize_t oom_adj_read(struct file *file, char __user *buf, size_t count, int oom_adj = OOM_ADJUST_MIN; size_t len; unsigned long flags; + int mult = 1; if (!task) return -ESRCH; if (lock_task_sighand(task, &flags)) { - if (task->signal->oom_score_adj == OOM_SCORE_ADJ_MAX) + if (task->signal->oom_score_adj == OOM_SCORE_ADJ_MAX) { oom_adj = OOM_ADJUST_MAX; - else - oom_adj = (task->signal->oom_score_adj * -OOM_DISABLE) / - OOM_SCORE_ADJ_MAX; + } else { + if (task->signal->oom_score_adj < 0) + mult = -1; + oom_adj = roundup(mult * task->signal->oom_score_adj * + -OOM_DISABLE, OOM_SCORE_ADJ_MAX) / + OOM_SCORE_ADJ_MAX * mult; + } unlock_task_sighand(task, &flags); } put_task_struct(task); @@ -1439,6 +1444,204 @@ static const struct file_operations proc_pid_sched_operations = { #endif +/* + * Print out various scheduling related per-task fields: + */ + +#ifdef CONFIG_SMP + +static int sched_wake_up_idle_show(struct seq_file *m, void *v) +{ + struct inode *inode = m->private; + struct task_struct *p; + + p = get_proc_task(inode); + if (!p) + return -ESRCH; + + seq_printf(m, "%d\n", sched_get_wake_up_idle(p)); + + put_task_struct(p); + + return 0; +} + +static ssize_t +sched_wake_up_idle_write(struct file *file, const char __user *buf, + size_t count, loff_t *offset) +{ + struct inode *inode = file_inode(file); + struct task_struct *p; + char buffer[PROC_NUMBUF]; + int wake_up_idle, err; + + memset(buffer, 0, sizeof(buffer)); + if (count > sizeof(buffer) - 1) + count = sizeof(buffer) - 1; + if (copy_from_user(buffer, buf, count)) { + err = -EFAULT; + goto out; + } + + err = kstrtoint(strstrip(buffer), 0, &wake_up_idle); + if (err) + goto out; + + p = get_proc_task(inode); + if (!p) + return -ESRCH; + + err = sched_set_wake_up_idle(p, wake_up_idle); + + put_task_struct(p); + +out: + return err < 0 ? err : count; +} + +static int sched_wake_up_idle_open(struct inode *inode, struct file *filp) +{ + return single_open(filp, sched_wake_up_idle_show, inode); +} + +static const struct file_operations proc_pid_sched_wake_up_idle_operations = { + .open = sched_wake_up_idle_open, + .read = seq_read, + .write = sched_wake_up_idle_write, + .llseek = seq_lseek, + .release = single_release, +}; + +#endif /* CONFIG_SMP */ + +#ifdef CONFIG_SCHED_HMP + +static int sched_init_task_load_show(struct seq_file *m, void *v) +{ + struct inode *inode = m->private; + struct task_struct *p; + + p = get_proc_task(inode); + if (!p) + return -ESRCH; + + seq_printf(m, "%d\n", sched_get_init_task_load(p)); + + put_task_struct(p); + + return 0; +} + +static ssize_t +sched_init_task_load_write(struct file *file, const char __user *buf, + size_t count, loff_t *offset) +{ + struct inode *inode = file_inode(file); + struct task_struct *p; + char buffer[PROC_NUMBUF]; + int init_task_load, err; + + memset(buffer, 0, sizeof(buffer)); + if (count > sizeof(buffer) - 1) + count = sizeof(buffer) - 1; + if (copy_from_user(buffer, buf, count)) { + err = -EFAULT; + goto out; + } + + err = kstrtoint(strstrip(buffer), 0, &init_task_load); + if (err) + goto out; + + p = get_proc_task(inode); + if (!p) + return -ESRCH; + + err = sched_set_init_task_load(p, init_task_load); + + put_task_struct(p); + +out: + return err < 0 ? err : count; +} + +static int sched_init_task_load_open(struct inode *inode, struct file *filp) +{ + return single_open(filp, sched_init_task_load_show, inode); +} + +static const struct file_operations proc_pid_sched_init_task_load_operations = { + .open = sched_init_task_load_open, + .read = seq_read, + .write = sched_init_task_load_write, + .llseek = seq_lseek, + .release = single_release, +}; + +static int sched_group_id_show(struct seq_file *m, void *v) +{ + struct inode *inode = m->private; + struct task_struct *p; + + p = get_proc_task(inode); + if (!p) + return -ESRCH; + + seq_printf(m, "%d\n", sched_get_group_id(p)); + + put_task_struct(p); + + return 0; +} + +static ssize_t +sched_group_id_write(struct file *file, const char __user *buf, + size_t count, loff_t *offset) +{ + struct inode *inode = file_inode(file); + struct task_struct *p; + char buffer[PROC_NUMBUF]; + int group_id, err; + + memset(buffer, 0, sizeof(buffer)); + if (count > sizeof(buffer) - 1) + count = sizeof(buffer) - 1; + if (copy_from_user(buffer, buf, count)) { + err = -EFAULT; + goto out; + } + + err = kstrtoint(strstrip(buffer), 0, &group_id); + if (err) + goto out; + + p = get_proc_task(inode); + if (!p) + return -ESRCH; + + err = sched_set_group_id(p, group_id); + + put_task_struct(p); + +out: + return err < 0 ? err : count; +} + +static int sched_group_id_open(struct inode *inode, struct file *filp) +{ + return single_open(filp, sched_group_id_show, inode); +} + +static const struct file_operations proc_pid_sched_group_id_operations = { + .open = sched_group_id_open, + .read = seq_read, + .write = sched_group_id_write, + .llseek = seq_lseek, + .release = single_release, +}; + +#endif /* CONFIG_SCHED_HMP */ + #ifdef CONFIG_SCHED_AUTOGROUP /* * Print out autogroup related information: @@ -2892,6 +3095,13 @@ static const struct pid_entry tgid_base_stuff[] = { ONE("status", S_IRUGO, proc_pid_status), ONE("personality", S_IRUSR, proc_pid_personality), ONE("limits", S_IRUGO, proc_pid_limits), +#ifdef CONFIG_SMP + REG("sched_wake_up_idle", S_IRUGO|S_IWUSR, proc_pid_sched_wake_up_idle_operations), +#endif +#ifdef CONFIG_SCHED_HMP + REG("sched_init_task_load", S_IRUGO|S_IWUSR, proc_pid_sched_init_task_load_operations), + REG("sched_group_id", S_IRUGO|S_IWUGO, proc_pid_sched_group_id_operations), +#endif #ifdef CONFIG_SCHED_DEBUG REG("sched", S_IRUGO|S_IWUSR, proc_pid_sched_operations), #endif @@ -2916,9 +3126,13 @@ static const struct pid_entry tgid_base_stuff[] = { REG("mounts", S_IRUGO, proc_mounts_operations), REG("mountinfo", S_IRUGO, proc_mountinfo_operations), REG("mountstats", S_IRUSR, proc_mountstats_operations), +#ifdef CONFIG_PROCESS_RECLAIM + REG("reclaim", S_IWUSR, proc_reclaim_operations), +#endif #ifdef CONFIG_PROC_PAGE_MONITOR REG("clear_refs", S_IWUSR, proc_clear_refs_operations), REG("smaps", S_IRUGO, proc_pid_smaps_operations), + REG("smaps_rollup", S_IRUGO, proc_pid_smaps_rollup_operations), REG("pagemap", S_IRUSR, proc_pagemap_operations), #endif #ifdef CONFIG_SECURITY @@ -3313,6 +3527,7 @@ static const struct pid_entry tid_base_stuff[] = { #ifdef CONFIG_PROC_PAGE_MONITOR REG("clear_refs", S_IWUSR, proc_clear_refs_operations), REG("smaps", S_IRUGO, proc_tid_smaps_operations), + REG("smaps_rollup", S_IRUGO, proc_pid_smaps_rollup_operations), REG("pagemap", S_IRUSR, proc_pagemap_operations), #endif #ifdef CONFIG_SECURITY diff --git a/fs/proc/internal.h b/fs/proc/internal.h index fa4dd182bfaa..2bfff313bf94 100644 --- a/fs/proc/internal.h +++ b/fs/proc/internal.h @@ -209,6 +209,7 @@ struct pde_opener { extern const struct inode_operations proc_link_inode_operations; extern const struct inode_operations proc_pid_link_inode_operations; +extern const struct file_operations proc_reclaim_operations; extern void proc_init_inodecache(void); extern struct inode *proc_get_inode(struct super_block *, struct proc_dir_entry *); @@ -284,10 +285,12 @@ extern int proc_remount(struct super_block *, int *, char *); /* * task_[no]mmu.c */ +struct mem_size_stats; struct proc_maps_private { struct inode *inode; struct task_struct *task; struct mm_struct *mm; + struct mem_size_stats *rollup; #ifdef CONFIG_MMU struct vm_area_struct *tail_vma; #endif @@ -303,6 +306,7 @@ extern const struct file_operations proc_tid_maps_operations; extern const struct file_operations proc_pid_numa_maps_operations; extern const struct file_operations proc_tid_numa_maps_operations; extern const struct file_operations proc_pid_smaps_operations; +extern const struct file_operations proc_pid_smaps_rollup_operations; extern const struct file_operations proc_tid_smaps_operations; extern const struct file_operations proc_clear_refs_operations; extern const struct file_operations proc_pagemap_operations; diff --git a/fs/proc/stat.c b/fs/proc/stat.c index 510413eb25b8..0fd3f81fe51f 100644 --- a/fs/proc/stat.c +++ b/fs/proc/stat.c @@ -117,17 +117,16 @@ static int show_stat(struct seq_file *p, void *v) } sum += arch_irq_stat(); - seq_puts(p, "cpu "); - seq_put_decimal_ull(p, ' ', cputime64_to_clock_t(user)); - seq_put_decimal_ull(p, ' ', cputime64_to_clock_t(nice)); - seq_put_decimal_ull(p, ' ', cputime64_to_clock_t(system)); - seq_put_decimal_ull(p, ' ', cputime64_to_clock_t(idle)); - seq_put_decimal_ull(p, ' ', cputime64_to_clock_t(iowait)); - seq_put_decimal_ull(p, ' ', cputime64_to_clock_t(irq)); - seq_put_decimal_ull(p, ' ', cputime64_to_clock_t(softirq)); - seq_put_decimal_ull(p, ' ', cputime64_to_clock_t(steal)); - seq_put_decimal_ull(p, ' ', cputime64_to_clock_t(guest)); - seq_put_decimal_ull(p, ' ', cputime64_to_clock_t(guest_nice)); + seq_put_decimal_ull(p, "cpu ", cputime64_to_clock_t(user)); + seq_put_decimal_ull(p, " ", cputime64_to_clock_t(nice)); + seq_put_decimal_ull(p, " ", cputime64_to_clock_t(system)); + seq_put_decimal_ull(p, " ", cputime64_to_clock_t(idle)); + seq_put_decimal_ull(p, " ", cputime64_to_clock_t(iowait)); + seq_put_decimal_ull(p, " ", cputime64_to_clock_t(irq)); + seq_put_decimal_ull(p, " ", cputime64_to_clock_t(softirq)); + seq_put_decimal_ull(p, " ", cputime64_to_clock_t(steal)); + seq_put_decimal_ull(p, " ", cputime64_to_clock_t(guest)); + seq_put_decimal_ull(p, " ", cputime64_to_clock_t(guest_nice)); seq_putc(p, '\n'); for_each_online_cpu(i) { @@ -143,23 +142,23 @@ static int show_stat(struct seq_file *p, void *v) guest = kcpustat_cpu(i).cpustat[CPUTIME_GUEST]; guest_nice = kcpustat_cpu(i).cpustat[CPUTIME_GUEST_NICE]; seq_printf(p, "cpu%d", i); - seq_put_decimal_ull(p, ' ', cputime64_to_clock_t(user)); - seq_put_decimal_ull(p, ' ', cputime64_to_clock_t(nice)); - seq_put_decimal_ull(p, ' ', cputime64_to_clock_t(system)); - seq_put_decimal_ull(p, ' ', cputime64_to_clock_t(idle)); - seq_put_decimal_ull(p, ' ', cputime64_to_clock_t(iowait)); - seq_put_decimal_ull(p, ' ', cputime64_to_clock_t(irq)); - seq_put_decimal_ull(p, ' ', cputime64_to_clock_t(softirq)); - seq_put_decimal_ull(p, ' ', cputime64_to_clock_t(steal)); - seq_put_decimal_ull(p, ' ', cputime64_to_clock_t(guest)); - seq_put_decimal_ull(p, ' ', cputime64_to_clock_t(guest_nice)); + seq_put_decimal_ull(p, " ", cputime64_to_clock_t(user)); + seq_put_decimal_ull(p, " ", cputime64_to_clock_t(nice)); + seq_put_decimal_ull(p, " ", cputime64_to_clock_t(system)); + seq_put_decimal_ull(p, " ", cputime64_to_clock_t(idle)); + seq_put_decimal_ull(p, " ", cputime64_to_clock_t(iowait)); + seq_put_decimal_ull(p, " ", cputime64_to_clock_t(irq)); + seq_put_decimal_ull(p, " ", cputime64_to_clock_t(softirq)); + seq_put_decimal_ull(p, " ", cputime64_to_clock_t(steal)); + seq_put_decimal_ull(p, " ", cputime64_to_clock_t(guest)); + seq_put_decimal_ull(p, " ", cputime64_to_clock_t(guest_nice)); seq_putc(p, '\n'); } - seq_printf(p, "intr %llu", (unsigned long long)sum); + seq_put_decimal_ull(p, "intr ", (unsigned long long)sum); /* sum again ? it could be updated? */ for_each_irq_nr(j) - seq_put_decimal_ull(p, ' ', kstat_irqs_usr(j)); + seq_put_decimal_ull(p, " ", kstat_irqs_usr(j)); seq_printf(p, "\nctxt %llu\n" @@ -173,10 +172,10 @@ static int show_stat(struct seq_file *p, void *v) nr_running(), nr_iowait()); - seq_printf(p, "softirq %llu", (unsigned long long)sum_softirq); + seq_put_decimal_ull(p, "softirq ", (unsigned long long)sum_softirq); for (i = 0; i < NR_SOFTIRQS; i++) - seq_put_decimal_ull(p, ' ', per_softirq_sums[i]); + seq_put_decimal_ull(p, " ", per_softirq_sums[i]); seq_putc(p, '\n'); return 0; diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c index fc8dca4a7b19..c3faa39c0ce1 100644 --- a/fs/proc/task_mmu.c +++ b/fs/proc/task_mmu.c @@ -14,6 +14,8 @@ #include <linux/swapops.h> #include <linux/mmu_notifier.h> #include <linux/page_idle.h> +#include <linux/mm_inline.h> +#include <linux/ctype.h> #include <asm/elf.h> #include <asm/uaccess.h> @@ -288,6 +290,7 @@ static int proc_map_release(struct inode *inode, struct file *file) if (priv->mm) mmdrop(priv->mm); + kfree(priv->rollup); return seq_release_private(inode, file); } @@ -314,6 +317,23 @@ static int is_stack(struct proc_maps_private *priv, vma->vm_end >= vma->vm_mm->start_stack; } +static void show_vma_header_prefix(struct seq_file *m, + unsigned long start, unsigned long end, + vm_flags_t flags, unsigned long long pgoff, + dev_t dev, unsigned long ino) +{ + seq_setwidth(m, 25 + sizeof(void *) * 6 - 1); + seq_printf(m, "%08lx-%08lx %c%c%c%c %08llx %02x:%02x %lu ", + start, + end, + flags & VM_READ ? 'r' : '-', + flags & VM_WRITE ? 'w' : '-', + flags & VM_EXEC ? 'x' : '-', + flags & VM_MAYSHARE ? 's' : 'p', + pgoff, + MAJOR(dev), MINOR(dev), ino); +} + static void show_map_vma(struct seq_file *m, struct vm_area_struct *vma, int is_pid) { @@ -337,17 +357,7 @@ show_map_vma(struct seq_file *m, struct vm_area_struct *vma, int is_pid) /* We don't show the stack guard page in /proc/maps */ start = vma->vm_start; end = vma->vm_end; - - seq_setwidth(m, 25 + sizeof(void *) * 6 - 1); - seq_printf(m, "%08lx-%08lx %c%c%c%c %08llx %02x:%02x %lu ", - start, - end, - flags & VM_READ ? 'r' : '-', - flags & VM_WRITE ? 'w' : '-', - flags & VM_EXEC ? 'x' : '-', - flags & VM_MAYSHARE ? 's' : 'p', - pgoff, - MAJOR(dev), MINOR(dev), ino); + show_vma_header_prefix(m, start, end, flags, pgoff, dev, ino); /* * Print the dentry name for named mappings, and a @@ -382,7 +392,6 @@ show_map_vma(struct seq_file *m, struct vm_area_struct *vma, int is_pid) name = "[stack]"; goto done; } - if (vma_get_anon_name(vma)) { seq_pad(m, ' '); seq_print_vma_name(m, vma); @@ -473,6 +482,7 @@ const struct file_operations proc_tid_maps_operations = { #ifdef CONFIG_PROC_PAGE_MONITOR struct mem_size_stats { + bool first; unsigned long resident; unsigned long shared_clean; unsigned long shared_dirty; @@ -484,7 +494,9 @@ struct mem_size_stats { unsigned long swap; unsigned long shared_hugetlb; unsigned long private_hugetlb; + unsigned long first_vma_start; u64 pss; + u64 pss_locked; u64 swap_pss; }; @@ -694,69 +706,105 @@ static int smaps_hugetlb_range(pte_t *pte, unsigned long hmask, static int show_smap(struct seq_file *m, void *v, int is_pid) { + struct proc_maps_private *priv = m->private; struct vm_area_struct *vma = v; - struct mem_size_stats mss; + struct mem_size_stats mss_stack; + struct mem_size_stats *mss; struct mm_walk smaps_walk = { .pmd_entry = smaps_pte_range, #ifdef CONFIG_HUGETLB_PAGE .hugetlb_entry = smaps_hugetlb_range, #endif .mm = vma->vm_mm, - .private = &mss, }; + int ret = 0; + bool rollup_mode; + bool last_vma; + + if (priv->rollup) { + rollup_mode = true; + mss = priv->rollup; + if (mss->first) { + mss->first_vma_start = vma->vm_start; + mss->first = false; + } + last_vma = !m_next_vma(priv, vma); + } else { + rollup_mode = false; + memset(&mss_stack, 0, sizeof(mss_stack)); + mss = &mss_stack; + } + + smaps_walk.private = mss; - memset(&mss, 0, sizeof mss); /* mmap_sem is held in m_start */ walk_page_vma(vma, &smaps_walk); + if (vma->vm_flags & VM_LOCKED) + mss->pss_locked += mss->pss; + + if (!rollup_mode) { + show_map_vma(m, vma, is_pid); + } else if (last_vma) { + show_vma_header_prefix( + m, mss->first_vma_start, vma->vm_end, 0, 0, 0, 0); + seq_pad(m, ' '); + seq_puts(m, "[rollup]\n"); + } else { + ret = SEQ_SKIP; + } - show_map_vma(m, vma, is_pid); - - if (vma_get_anon_name(vma)) { + if (!rollup_mode && vma_get_anon_name(vma)) { seq_puts(m, "Name: "); seq_print_vma_name(m, vma); seq_putc(m, '\n'); } - seq_printf(m, - "Size: %8lu kB\n" - "Rss: %8lu kB\n" - "Pss: %8lu kB\n" - "Shared_Clean: %8lu kB\n" - "Shared_Dirty: %8lu kB\n" - "Private_Clean: %8lu kB\n" - "Private_Dirty: %8lu kB\n" - "Referenced: %8lu kB\n" - "Anonymous: %8lu kB\n" - "AnonHugePages: %8lu kB\n" - "Shared_Hugetlb: %8lu kB\n" - "Private_Hugetlb: %7lu kB\n" - "Swap: %8lu kB\n" - "SwapPss: %8lu kB\n" - "KernelPageSize: %8lu kB\n" - "MMUPageSize: %8lu kB\n" - "Locked: %8lu kB\n", - (vma->vm_end - vma->vm_start) >> 10, - mss.resident >> 10, - (unsigned long)(mss.pss >> (10 + PSS_SHIFT)), - mss.shared_clean >> 10, - mss.shared_dirty >> 10, - mss.private_clean >> 10, - mss.private_dirty >> 10, - mss.referenced >> 10, - mss.anonymous >> 10, - mss.anonymous_thp >> 10, - mss.shared_hugetlb >> 10, - mss.private_hugetlb >> 10, - mss.swap >> 10, - (unsigned long)(mss.swap_pss >> (10 + PSS_SHIFT)), - vma_kernel_pagesize(vma) >> 10, - vma_mmu_pagesize(vma) >> 10, - (vma->vm_flags & VM_LOCKED) ? - (unsigned long)(mss.pss >> (10 + PSS_SHIFT)) : 0); - - show_smap_vma_flags(m, vma); + if (!rollup_mode) + seq_printf(m, + "Size: %8lu kB\n" + "KernelPageSize: %8lu kB\n" + "MMUPageSize: %8lu kB\n", + (vma->vm_end - vma->vm_start) >> 10, + vma_kernel_pagesize(vma) >> 10, + vma_mmu_pagesize(vma) >> 10); + + + if (!rollup_mode || last_vma) + seq_printf(m, + "Rss: %8lu kB\n" + "Pss: %8lu kB\n" + "Shared_Clean: %8lu kB\n" + "Shared_Dirty: %8lu kB\n" + "Private_Clean: %8lu kB\n" + "Private_Dirty: %8lu kB\n" + "Referenced: %8lu kB\n" + "Anonymous: %8lu kB\n" + "AnonHugePages: %8lu kB\n" + "Shared_Hugetlb: %8lu kB\n" + "Private_Hugetlb: %7lu kB\n" + "Swap: %8lu kB\n" + "SwapPss: %8lu kB\n" + "Locked: %8lu kB\n", + mss->resident >> 10, + (unsigned long)(mss->pss >> (10 + PSS_SHIFT)), + mss->shared_clean >> 10, + mss->shared_dirty >> 10, + mss->private_clean >> 10, + mss->private_dirty >> 10, + mss->referenced >> 10, + mss->anonymous >> 10, + mss->anonymous_thp >> 10, + mss->shared_hugetlb >> 10, + mss->private_hugetlb >> 10, + mss->swap >> 10, + (unsigned long)(mss->swap_pss >> (10 + PSS_SHIFT)), + (unsigned long)(mss->pss >> (10 + PSS_SHIFT))); + + if (!rollup_mode) { + show_smap_vma_flags(m, vma); + } m_cache_vma(m, vma); - return 0; + return ret; } static int show_pid_smap(struct seq_file *m, void *v) @@ -788,6 +836,25 @@ static int pid_smaps_open(struct inode *inode, struct file *file) return do_maps_open(inode, file, &proc_pid_smaps_op); } +static int pid_smaps_rollup_open(struct inode *inode, struct file *file) +{ + struct seq_file *seq; + struct proc_maps_private *priv; + int ret = do_maps_open(inode, file, &proc_pid_smaps_op); + + if (ret < 0) + return ret; + seq = file->private_data; + priv = seq->private; + priv->rollup = kzalloc(sizeof(*priv->rollup), GFP_KERNEL); + if (!priv->rollup) { + proc_map_release(inode, file); + return -ENOMEM; + } + priv->rollup->first = true; + return 0; +} + static int tid_smaps_open(struct inode *inode, struct file *file) { return do_maps_open(inode, file, &proc_tid_smaps_op); @@ -800,6 +867,13 @@ const struct file_operations proc_pid_smaps_operations = { .release = proc_map_release, }; +const struct file_operations proc_pid_smaps_rollup_operations = { + .open = pid_smaps_rollup_open, + .read = seq_read, + .llseek = seq_lseek, + .release = proc_map_release, +}; + const struct file_operations proc_tid_smaps_operations = { .open = tid_smaps_open, .read = seq_read, @@ -1433,6 +1507,241 @@ const struct file_operations proc_pagemap_operations = { }; #endif /* CONFIG_PROC_PAGE_MONITOR */ +#ifdef CONFIG_PROCESS_RECLAIM +static int reclaim_pte_range(pmd_t *pmd, unsigned long addr, + unsigned long end, struct mm_walk *walk) +{ + struct reclaim_param *rp = walk->private; + struct vm_area_struct *vma = rp->vma; + pte_t *pte, ptent; + spinlock_t *ptl; + struct page *page; + LIST_HEAD(page_list); + int isolated; + int reclaimed; + + split_huge_page_pmd(vma, addr, pmd); + if (pmd_trans_unstable(pmd) || !rp->nr_to_reclaim) + return 0; +cont: + isolated = 0; + pte = pte_offset_map_lock(vma->vm_mm, pmd, addr, &ptl); + for (; addr != end; pte++, addr += PAGE_SIZE) { + ptent = *pte; + if (!pte_present(ptent)) + continue; + + page = vm_normal_page(vma, addr, ptent); + if (!page) + continue; + + if (isolate_lru_page(page)) + continue; + + list_add(&page->lru, &page_list); + inc_zone_page_state(page, NR_ISOLATED_ANON + + page_is_file_cache(page)); + isolated++; + rp->nr_scanned++; + if ((isolated >= SWAP_CLUSTER_MAX) || !rp->nr_to_reclaim) + break; + } + pte_unmap_unlock(pte - 1, ptl); + reclaimed = reclaim_pages_from_list(&page_list, vma); + rp->nr_reclaimed += reclaimed; + rp->nr_to_reclaim -= reclaimed; + if (rp->nr_to_reclaim < 0) + rp->nr_to_reclaim = 0; + + if (rp->nr_to_reclaim && (addr != end)) + goto cont; + + cond_resched(); + return 0; +} + +enum reclaim_type { + RECLAIM_FILE, + RECLAIM_ANON, + RECLAIM_ALL, + RECLAIM_RANGE, +}; + +struct reclaim_param reclaim_task_anon(struct task_struct *task, + int nr_to_reclaim) +{ + struct mm_struct *mm; + struct vm_area_struct *vma; + struct mm_walk reclaim_walk = {}; + struct reclaim_param rp; + + rp.nr_reclaimed = 0; + rp.nr_scanned = 0; + get_task_struct(task); + mm = get_task_mm(task); + if (!mm) + goto out; + + reclaim_walk.mm = mm; + reclaim_walk.pmd_entry = reclaim_pte_range; + + rp.nr_to_reclaim = nr_to_reclaim; + reclaim_walk.private = &rp; + + down_read(&mm->mmap_sem); + for (vma = mm->mmap; vma; vma = vma->vm_next) { + if (is_vm_hugetlb_page(vma)) + continue; + + if (vma->vm_file) + continue; + + if (vma->vm_flags & VM_LOCKED) + continue; + + if (!rp.nr_to_reclaim) + break; + + rp.vma = vma; + walk_page_range(vma->vm_start, vma->vm_end, + &reclaim_walk); + } + + flush_tlb_mm(mm); + up_read(&mm->mmap_sem); + mmput(mm); +out: + put_task_struct(task); + return rp; +} + +static ssize_t reclaim_write(struct file *file, const char __user *buf, + size_t count, loff_t *ppos) +{ + struct task_struct *task; + char buffer[200]; + struct mm_struct *mm; + struct vm_area_struct *vma; + enum reclaim_type type; + char *type_buf; + struct mm_walk reclaim_walk = {}; + unsigned long start = 0; + unsigned long end = 0; + struct reclaim_param rp; + + memset(buffer, 0, sizeof(buffer)); + if (count > sizeof(buffer) - 1) + count = sizeof(buffer) - 1; + + if (copy_from_user(buffer, buf, count)) + return -EFAULT; + + type_buf = strstrip(buffer); + if (!strcmp(type_buf, "file")) + type = RECLAIM_FILE; + else if (!strcmp(type_buf, "anon")) + type = RECLAIM_ANON; + else if (!strcmp(type_buf, "all")) + type = RECLAIM_ALL; + else if (isdigit(*type_buf)) + type = RECLAIM_RANGE; + else + goto out_err; + + if (type == RECLAIM_RANGE) { + char *token; + unsigned long long len, len_in, tmp; + token = strsep(&type_buf, " "); + if (!token) + goto out_err; + tmp = memparse(token, &token); + if (tmp & ~PAGE_MASK || tmp > ULONG_MAX) + goto out_err; + start = tmp; + + token = strsep(&type_buf, " "); + if (!token) + goto out_err; + len_in = memparse(token, &token); + len = (len_in + ~PAGE_MASK) & PAGE_MASK; + if (len > ULONG_MAX) + goto out_err; + /* + * Check to see whether len was rounded up from small -ve + * to zero. + */ + if (len_in && !len) + goto out_err; + + end = start + len; + if (end < start) + goto out_err; + } + + task = get_proc_task(file->f_path.dentry->d_inode); + if (!task) + return -ESRCH; + + mm = get_task_mm(task); + if (!mm) + goto out; + + reclaim_walk.mm = mm; + reclaim_walk.pmd_entry = reclaim_pte_range; + + rp.nr_to_reclaim = ~0; + rp.nr_reclaimed = 0; + reclaim_walk.private = &rp; + + down_read(&mm->mmap_sem); + if (type == RECLAIM_RANGE) { + vma = find_vma(mm, start); + while (vma) { + if (vma->vm_start > end) + break; + if (is_vm_hugetlb_page(vma)) + continue; + + rp.vma = vma; + walk_page_range(max(vma->vm_start, start), + min(vma->vm_end, end), + &reclaim_walk); + vma = vma->vm_next; + } + } else { + for (vma = mm->mmap; vma; vma = vma->vm_next) { + if (is_vm_hugetlb_page(vma)) + continue; + + if (type == RECLAIM_ANON && vma->vm_file) + continue; + + if (type == RECLAIM_FILE && !vma->vm_file) + continue; + + rp.vma = vma; + walk_page_range(vma->vm_start, vma->vm_end, + &reclaim_walk); + } + } + + flush_tlb_mm(mm); + up_read(&mm->mmap_sem); + mmput(mm); +out: + put_task_struct(task); + return count; + +out_err: + return -EINVAL; +} + +const struct file_operations proc_reclaim_operations = { + .write = reclaim_write, + .llseek = noop_llseek, +}; +#endif + #ifdef CONFIG_NUMA struct numa_maps { diff --git a/fs/pstore/inode.c b/fs/pstore/inode.c index ac6c78fe19cf..7a380208b006 100644 --- a/fs/pstore/inode.c +++ b/fs/pstore/inode.c @@ -36,7 +36,7 @@ #include <linux/slab.h> #include <linux/spinlock.h> #include <linux/uaccess.h> -#include <linux/syslog.h> +#include <linux/vmalloc.h> #include "internal.h" @@ -121,18 +121,6 @@ static const struct seq_operations pstore_ftrace_seq_ops = { .show = pstore_ftrace_seq_show, }; -static int pstore_check_syslog_permissions(struct pstore_private *ps) -{ - switch (ps->type) { - case PSTORE_TYPE_DMESG: - case PSTORE_TYPE_CONSOLE: - return check_syslog_permissions(SYSLOG_ACTION_READ_ALL, - SYSLOG_FROM_READER); - default: - return 0; - } -} - static ssize_t pstore_file_read(struct file *file, char __user *userbuf, size_t count, loff_t *ppos) { @@ -151,10 +139,6 @@ static int pstore_file_open(struct inode *inode, struct file *file) int err; const struct seq_operations *sops = NULL; - err = pstore_check_syslog_permissions(ps); - if (err) - return err; - if (ps->type == PSTORE_TYPE_FTRACE) sops = &pstore_ftrace_seq_ops; @@ -191,11 +175,6 @@ static const struct file_operations pstore_file_operations = { static int pstore_unlink(struct inode *dir, struct dentry *dentry) { struct pstore_private *p = d_inode(dentry)->i_private; - int err; - - err = pstore_check_syslog_permissions(p); - if (err) - return err; if (p->psi->erase) p->psi->erase(p->type, p->id, p->count, @@ -216,7 +195,7 @@ static void pstore_evict_inode(struct inode *inode) spin_lock_irqsave(&allpstore_lock, flags); list_del(&p->list); spin_unlock_irqrestore(&allpstore_lock, flags); - kfree(p); + vfree(p); } } @@ -328,7 +307,7 @@ int pstore_mkfile(enum pstore_type_id type, char *psname, u64 id, int count, goto fail; inode->i_mode = S_IFREG | 0444; inode->i_fop = &pstore_file_operations; - private = kmalloc(sizeof *private + size, GFP_KERNEL); + private = vmalloc(sizeof *private + size); if (!private) goto fail_alloc; private->type = type; @@ -402,7 +381,7 @@ int pstore_mkfile(enum pstore_type_id type, char *psname, u64 id, int count, fail_lockedalloc: mutex_unlock(&d_inode(root)->i_mutex); - kfree(private); + vfree(private); fail_alloc: iput(inode); @@ -429,7 +408,7 @@ static int pstore_fill_super(struct super_block *sb, void *data, int silent) inode = pstore_get_inode(sb); if (inode) { - inode->i_mode = S_IFDIR | 0755; + inode->i_mode = S_IFDIR | 0750; inode->i_op = &pstore_dir_inode_operations; inode->i_fop = &simple_dir_operations; inc_nlink(inode); diff --git a/fs/pstore/platform.c b/fs/pstore/platform.c index 6fbfa8189451..21bf055bdebf 100644 --- a/fs/pstore/platform.c +++ b/fs/pstore/platform.c @@ -36,6 +36,7 @@ #include <linux/hardirq.h> #include <linux/jiffies.h> #include <linux/workqueue.h> +#include <linux/vmalloc.h> #include "internal.h" @@ -580,7 +581,7 @@ void pstore_get_records(int quiet) big_oops_buf_sz); if (unzipped_len > 0) { - kfree(buf); + vfree(buf); buf = big_oops_buf; size = unzipped_len; compressed = false; @@ -594,7 +595,7 @@ void pstore_get_records(int quiet) compressed, (size_t)size, time, psi); if (unzipped_len < 0) { /* Free buffer other than big oops */ - kfree(buf); + vfree(buf); buf = NULL; } else unzipped_len = -1; diff --git a/fs/pstore/ram.c b/fs/pstore/ram.c index 283b04218997..2a004480fc4f 100644 --- a/fs/pstore/ram.c +++ b/fs/pstore/ram.c @@ -233,7 +233,7 @@ static ssize_t ramoops_pstore_read(u64 *id, enum pstore_type_id *type, /* ECC correction notice */ ecc_notice_size = persistent_ram_ecc_string(prz, NULL, 0); - *buf = kmalloc(size + ecc_notice_size + 1, GFP_KERNEL); + *buf = vmalloc(size + ecc_notice_size + 1); if (*buf == NULL) return -ENOMEM; diff --git a/fs/pstore/ram_core.c b/fs/pstore/ram_core.c index 11e558efd61e..cdf82f4b5698 100644 --- a/fs/pstore/ram_core.c +++ b/fs/pstore/ram_core.c @@ -291,7 +291,7 @@ void persistent_ram_save_old(struct persistent_ram_zone *prz) if (!prz->old_log) { persistent_ram_ecc_old(prz); - prz->old_log = kmalloc(size, GFP_KERNEL); + prz->old_log = vmalloc(size); } if (!prz->old_log) { pr_err("failed to allocate buffer\n"); @@ -377,7 +377,7 @@ void *persistent_ram_old(struct persistent_ram_zone *prz) void persistent_ram_free_old(struct persistent_ram_zone *prz) { - kfree(prz->old_log); + vfree(prz->old_log); prz->old_log = NULL; prz->old_log_size = 0; } diff --git a/fs/reiserfs/namei.c b/fs/reiserfs/namei.c index eb611bdd4725..3ebc70167e41 100644 --- a/fs/reiserfs/namei.c +++ b/fs/reiserfs/namei.c @@ -687,7 +687,8 @@ static int reiserfs_create(struct inode *dir, struct dentry *dentry, umode_t mod reiserfs_update_inode_transaction(inode); reiserfs_update_inode_transaction(dir); - d_instantiate_new(dentry, inode); + unlock_new_inode(inode); + d_instantiate(dentry, inode); retval = journal_end(&th); out_failed: @@ -770,7 +771,8 @@ static int reiserfs_mknod(struct inode *dir, struct dentry *dentry, umode_t mode goto out_failed; } - d_instantiate_new(dentry, inode); + unlock_new_inode(inode); + d_instantiate(dentry, inode); retval = journal_end(&th); out_failed: @@ -869,7 +871,8 @@ static int reiserfs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode /* the above add_entry did not update dir's stat data */ reiserfs_update_sd(&th, dir); - d_instantiate_new(dentry, inode); + unlock_new_inode(inode); + d_instantiate(dentry, inode); retval = journal_end(&th); out_failed: reiserfs_write_unlock(dir->i_sb); @@ -1183,7 +1186,8 @@ static int reiserfs_symlink(struct inode *parent_dir, goto out_failed; } - d_instantiate_new(dentry, inode); + unlock_new_inode(inode); + d_instantiate(dentry, inode); retval = journal_end(&th); out_failed: reiserfs_write_unlock(parent_dir->i_sb); diff --git a/fs/sdcardfs/dentry.c b/fs/sdcardfs/dentry.c index 3795d2c26915..eb279de02ffb 100644 --- a/fs/sdcardfs/dentry.c +++ b/fs/sdcardfs/dentry.c @@ -123,6 +123,12 @@ out: return err; } +/* 1 = delete, 0 = cache */ +static int sdcardfs_d_delete(const struct dentry *d) +{ + return SDCARDFS_SB(d->d_sb)->options.nocache ? 1 : 0; +} + static void sdcardfs_d_release(struct dentry *dentry) { if (!dentry || !dentry->d_fsdata) @@ -182,6 +188,7 @@ static void sdcardfs_canonical_path(const struct path *path, const struct dentry_operations sdcardfs_ci_dops = { .d_revalidate = sdcardfs_d_revalidate, + .d_delete = sdcardfs_d_delete, .d_release = sdcardfs_d_release, .d_hash = sdcardfs_hash_ci, .d_compare = sdcardfs_cmp_ci, diff --git a/fs/sdcardfs/derived_perm.c b/fs/sdcardfs/derived_perm.c index 85126ec6533c..1f142fed5a5e 100644 --- a/fs/sdcardfs/derived_perm.c +++ b/fs/sdcardfs/derived_perm.c @@ -62,6 +62,7 @@ void get_derived_permission_new(struct dentry *parent, struct dentry *dentry, int err; struct qstr q_Android = QSTR_LITERAL("Android"); struct qstr q_data = QSTR_LITERAL("data"); + struct qstr q_sandbox = QSTR_LITERAL("sandbox"); struct qstr q_obb = QSTR_LITERAL("obb"); struct qstr q_media = QSTR_LITERAL("media"); struct qstr q_cache = QSTR_LITERAL("cache"); @@ -110,6 +111,9 @@ void get_derived_permission_new(struct dentry *parent, struct dentry *dentry, if (qstr_case_eq(name, &q_data)) { /* App-specific directories inside; let anyone traverse */ info->data->perm = PERM_ANDROID_DATA; + } else if (qstr_case_eq(name, &q_sandbox)) { + /* App-specific directories inside; let anyone traverse */ + info->data->perm = PERM_ANDROID_DATA; } else if (qstr_case_eq(name, &q_obb)) { /* App-specific directories inside; let anyone traverse */ info->data->perm = PERM_ANDROID_OBB; @@ -356,7 +360,8 @@ int need_graft_path(struct dentry *dentry) struct sdcardfs_sb_info *sbi = SDCARDFS_SB(dentry->d_sb); struct qstr obb = QSTR_LITERAL("obb"); - if (parent_info->data->perm == PERM_ANDROID && + if (!sbi->options.unshared_obb && + parent_info->data->perm == PERM_ANDROID && qstr_case_eq(&dentry->d_name, &obb)) { /* /Android/obb is the base obbpath of DERIVED_UNIFIED */ diff --git a/fs/sdcardfs/inode.c b/fs/sdcardfs/inode.c index 6c0039284ae0..ab0952f13510 100644 --- a/fs/sdcardfs/inode.c +++ b/fs/sdcardfs/inode.c @@ -525,7 +525,7 @@ static const char *sdcardfs_follow_link(struct dentry *dentry, void **cookie) static int sdcardfs_permission_wrn(struct inode *inode, int mask) { - WARN_RATELIMIT(1, "sdcardfs does not support permission. Use permission2.\n"); + pr_debug("sdcardfs does not support permission. Use permission2.\n"); return -EINVAL; } diff --git a/fs/sdcardfs/main.c b/fs/sdcardfs/main.c index 30e0c431a1ea..6e44903b3d2f 100644 --- a/fs/sdcardfs/main.c +++ b/fs/sdcardfs/main.c @@ -34,6 +34,8 @@ enum { Opt_reserved_mb, Opt_gid_derivation, Opt_default_normal, + Opt_nocache, + Opt_unshared_obb, Opt_err, }; @@ -47,7 +49,9 @@ static const match_table_t sdcardfs_tokens = { {Opt_multiuser, "multiuser"}, {Opt_gid_derivation, "derive_gid"}, {Opt_default_normal, "default_normal"}, + {Opt_unshared_obb, "unshared_obb"}, {Opt_reserved_mb, "reserved_mb=%u"}, + {Opt_nocache, "nocache"}, {Opt_err, NULL} }; @@ -71,6 +75,7 @@ static int parse_options(struct super_block *sb, char *options, int silent, /* by default, gid derivation is off */ opts->gid_derivation = false; opts->default_normal = false; + opts->nocache = false; *debug = 0; @@ -128,6 +133,12 @@ static int parse_options(struct super_block *sb, char *options, int silent, case Opt_default_normal: opts->default_normal = true; break; + case Opt_nocache: + opts->nocache = true; + break; + case Opt_unshared_obb: + opts->unshared_obb = true; + break; /* unknown option */ default: if (!silent) @@ -181,13 +192,16 @@ int parse_options_remount(struct super_block *sb, char *options, int silent, return 0; vfsopts->mask = option; break; + case Opt_unshared_obb: case Opt_default_normal: case Opt_multiuser: case Opt_userid: case Opt_fsuid: case Opt_fsgid: case Opt_reserved_mb: - pr_warn("Option \"%s\" can't be changed during remount\n", p); + case Opt_gid_derivation: + if (!silent) + pr_warn("Option \"%s\" can't be changed during remount\n", p); break; /* unknown option */ default: @@ -264,7 +278,7 @@ static int sdcardfs_read_super(struct vfsmount *mnt, struct super_block *sb, pr_info("sdcardfs: dev_name -> %s\n", dev_name); pr_info("sdcardfs: options -> %s\n", (char *)raw_data); - pr_info("sdcardfs: mnt -> %p\n", mnt); + pr_info("sdcardfs: mnt -> %pK\n", mnt); /* parse lower path */ err = kern_path(dev_name, LOOKUP_FOLLOW | LOOKUP_DIRECTORY, @@ -295,6 +309,13 @@ static int sdcardfs_read_super(struct vfsmount *mnt, struct super_block *sb, atomic_inc(&lower_sb->s_active); sdcardfs_set_lower_super(sb, lower_sb); + sb->s_stack_depth = lower_sb->s_stack_depth + 1; + if (sb->s_stack_depth > FILESYSTEM_MAX_STACK_DEPTH) { + pr_err("sdcardfs: maximum fs stacking depth exceeded\n"); + err = -EINVAL; + goto out_sput; + } + /* inherit maxbytes from lower file system */ sb->s_maxbytes = lower_sb->s_maxbytes; diff --git a/fs/sdcardfs/sdcardfs.h b/fs/sdcardfs/sdcardfs.h index 99227a07a8d6..2bee162921ce 100644 --- a/fs/sdcardfs/sdcardfs.h +++ b/fs/sdcardfs/sdcardfs.h @@ -197,7 +197,9 @@ struct sdcardfs_mount_options { bool multiuser; bool gid_derivation; bool default_normal; + bool unshared_obb; unsigned int reserved_mb; + bool nocache; }; struct sdcardfs_vfsmount_options { diff --git a/fs/sdcardfs/super.c b/fs/sdcardfs/super.c index cffcdb11cb8a..45cfa73b285e 100644 --- a/fs/sdcardfs/super.c +++ b/fs/sdcardfs/super.c @@ -144,7 +144,7 @@ static int sdcardfs_remount_fs2(struct vfsmount *mnt, struct super_block *sb, pr_err("sdcardfs: remount flags 0x%x unsupported\n", *flags); err = -EINVAL; } - pr_info("Remount options were %s for vfsmnt %p.\n", options, mnt); + pr_info("Remount options were %s for vfsmnt %pK.\n", options, mnt); err = parse_options_remount(sb, options, *flags & ~MS_SILENT, mnt->data); @@ -311,6 +311,10 @@ static int sdcardfs_show_options(struct vfsmount *mnt, struct seq_file *m, seq_puts(m, ",default_normal"); if (opts->reserved_mb != 0) seq_printf(m, ",reserved=%uMB", opts->reserved_mb); + if (opts->nocache) + seq_printf(m, ",nocache"); + if (opts->unshared_obb) + seq_printf(m, ",unshared_obb"); return 0; }; diff --git a/fs/sdfat/Kconfig b/fs/sdfat/Kconfig new file mode 100644 index 000000000000..08b12f7f768b --- /dev/null +++ b/fs/sdfat/Kconfig @@ -0,0 +1,126 @@ +config SDFAT_FS + tristate "sdFAT filesystem support" + select NLS + select NLS_UTF8 + select NLS_CODEPAGE_437 + select NLS_ISO8859_1 + help + If you want to use the sdFAT file system, then you must say Y or M + here to inlucde sdFAT support. + sdFAT is unified FAT-based file system which supports not only fat12/ + 16/32 with vfat but also exfat. sdFAT supports winnt short-name rule. + (winnt: emulate the Windows NT rule for display/create.) + + To compile this as a module, choose M here: the module will be called + sdfat_core and sdfat_fs. + +config SDFAT_USE_FOR_EXFAT + bool "Register sdFAT as exFAT" + default y + depends on SDFAT_FS && !EXFAT_FS + help + If you want to register sdFAT as available for exFAT, say Y. + +config SDFAT_USE_FOR_VFAT + bool "Register sdFAT as VFAT" + default y + depends on SDFAT_FS && !VFAT_FS + help + If you want to register sdFAT as available for VFAT, say Y. + +config SDFAT_DELAYED_META_DIRTY + bool "Enable delayed metadata dirty" + default y + depends on SDFAT_FS + help + If you enable this feature, metadata(FAT/Directory entry) is updated + by flush thread. + +config SDFAT_SUPPORT_DIR_SYNC + bool "Enable supporting dir sync" + default n + depends on SDFAT_FS + help + If you enable this feature, the modification for directory operation + is written to a storage at once. + +config SDFAT_DEFAULT_CODEPAGE + int "Default codepage for sdFAT" + default 437 + depends on SDFAT_FS + help + This option should be set to the codepage of your sdFAT filesystems. + +config SDFAT_DEFAULT_IOCHARSET + string "Default iocharset for sdFAT" + default "utf8" + depends on SDFAT_FS + help + Set this to the default input/output character set you'd + like sdFAT to use. It should probably match the character set + that most of your sdFAT filesystems use, and can be overridden + with the "iocharset" mount option for sdFAT filesystems. + +config SDFAT_CHECK_RO_ATTR + bool "Check read-only attribute" + default n + depends on SDFAT_FS + +config SDFAT_ALIGNED_MPAGE_WRITE + bool "Enable supporting aligned mpage_write" + default y + depends on SDFAT_FS + +config SDFAT_VIRTUAL_XATTR + bool "Virtual xattr support for sdFAT" + default y + depends on SDFAT_FS + help + To support virtual xattr. + +config SDFAT_VIRTUAL_XATTR_SELINUX_LABEL + string "Default string for SELinux label" + default "u:object_r:sdcard_external:s0" + depends on SDFAT_FS && SDFAT_VIRTUAL_XATTR + help + Set this to the default string for SELinux label. + +config SDFAT_SUPPORT_STLOG + bool "Enable storage log" + default y + depends on SDFAT_FS && PROC_STLOG + +config SDFAT_DEBUG + bool "enable debug features" + depends on SDFAT_FS + default y + +config SDFAT_DBG_IOCTL + bool "enable debug-ioctl features" + depends on SDFAT_FS && SDFAT_DEBUG + default n + +config SDFAT_DBG_MSG + bool "enable debug messages" + depends on SDFAT_FS && SDFAT_DEBUG + default y + +config SDFAT_DBG_BUGON + bool "enable strict BUG_ON() for debugging" + depends on SDFAT_FS && SDFAT_DEBUG + default n + +config SDFAT_DBG_WARNON + bool "enable strict WARN_ON() for debugging" + depends on SDFAT_FS && SDFAT_DEBUG + default n + +config SDFAT_STATISTICS + bool "enable statistics for bigdata" + depends on SDFAT_FS + default y + +config SDFAT_UEVENT + bool "enable uevent" + depends on SDFAT_FS + default y diff --git a/fs/sdfat/LICENSE b/fs/sdfat/LICENSE new file mode 100644 index 000000000000..d159169d1050 --- /dev/null +++ b/fs/sdfat/LICENSE @@ -0,0 +1,339 @@ + GNU GENERAL PUBLIC LICENSE + Version 2, June 1991 + + Copyright (C) 1989, 1991 Free Software Foundation, Inc., + 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + Everyone is permitted to copy and distribute verbatim copies + of this license document, but changing it is not allowed. + + Preamble + + The licenses for most software are designed to take away your +freedom to share and change it. By contrast, the GNU General Public +License is intended to guarantee your freedom to share and change free +software--to make sure the software is free for all its users. This +General Public License applies to most of the Free Software +Foundation's software and to any other program whose authors commit to +using it. (Some other Free Software Foundation software is covered by +the GNU Lesser General Public License instead.) You can apply it to +your programs, too. + + When we speak of free software, we are referring to freedom, not +price. Our General Public Licenses are designed to make sure that you +have the freedom to distribute copies of free software (and charge for +this service if you wish), that you receive source code or can get it +if you want it, that you can change the software or use pieces of it +in new free programs; and that you know you can do these things. + + To protect your rights, we need to make restrictions that forbid +anyone to deny you these rights or to ask you to surrender the rights. +These restrictions translate to certain responsibilities for you if you +distribute copies of the software, or if you modify it. + + For example, if you distribute copies of such a program, whether +gratis or for a fee, you must give the recipients all the rights that +you have. You must make sure that they, too, receive or can get the +source code. And you must show them these terms so they know their +rights. + + We protect your rights with two steps: (1) copyright the software, and +(2) offer you this license which gives you legal permission to copy, +distribute and/or modify the software. + + Also, for each author's protection and ours, we want to make certain +that everyone understands that there is no warranty for this free +software. If the software is modified by someone else and passed on, we +want its recipients to know that what they have is not the original, so +that any problems introduced by others will not reflect on the original +authors' reputations. + + Finally, any free program is threatened constantly by software +patents. We wish to avoid the danger that redistributors of a free +program will individually obtain patent licenses, in effect making the +program proprietary. To prevent this, we have made it clear that any +patent must be licensed for everyone's free use or not licensed at all. + + The precise terms and conditions for copying, distribution and +modification follow. + + GNU GENERAL PUBLIC LICENSE + TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION + + 0. This License applies to any program or other work which contains +a notice placed by the copyright holder saying it may be distributed +under the terms of this General Public License. The "Program", below, +refers to any such program or work, and a "work based on the Program" +means either the Program or any derivative work under copyright law: +that is to say, a work containing the Program or a portion of it, +either verbatim or with modifications and/or translated into another +language. (Hereinafter, translation is included without limitation in +the term "modification".) Each licensee is addressed as "you". + +Activities other than copying, distribution and modification are not +covered by this License; they are outside its scope. The act of +running the Program is not restricted, and the output from the Program +is covered only if its contents constitute a work based on the +Program (independent of having been made by running the Program). +Whether that is true depends on what the Program does. + + 1. You may copy and distribute verbatim copies of the Program's +source code as you receive it, in any medium, provided that you +conspicuously and appropriately publish on each copy an appropriate +copyright notice and disclaimer of warranty; keep intact all the +notices that refer to this License and to the absence of any warranty; +and give any other recipients of the Program a copy of this License +along with the Program. + +You may charge a fee for the physical act of transferring a copy, and +you may at your option offer warranty protection in exchange for a fee. + + 2. You may modify your copy or copies of the Program or any portion +of it, thus forming a work based on the Program, and copy and +distribute such modifications or work under the terms of Section 1 +above, provided that you also meet all of these conditions: + + a) You must cause the modified files to carry prominent notices + stating that you changed the files and the date of any change. + + b) You must cause any work that you distribute or publish, that in + whole or in part contains or is derived from the Program or any + part thereof, to be licensed as a whole at no charge to all third + parties under the terms of this License. + + c) If the modified program normally reads commands interactively + when run, you must cause it, when started running for such + interactive use in the most ordinary way, to print or display an + announcement including an appropriate copyright notice and a + notice that there is no warranty (or else, saying that you provide + a warranty) and that users may redistribute the program under + these conditions, and telling the user how to view a copy of this + License. (Exception: if the Program itself is interactive but + does not normally print such an announcement, your work based on + the Program is not required to print an announcement.) + +These requirements apply to the modified work as a whole. If +identifiable sections of that work are not derived from the Program, +and can be reasonably considered independent and separate works in +themselves, then this License, and its terms, do not apply to those +sections when you distribute them as separate works. But when you +distribute the same sections as part of a whole which is a work based +on the Program, the distribution of the whole must be on the terms of +this License, whose permissions for other licensees extend to the +entire whole, and thus to each and every part regardless of who wrote it. + +Thus, it is not the intent of this section to claim rights or contest +your rights to work written entirely by you; rather, the intent is to +exercise the right to control the distribution of derivative or +collective works based on the Program. + +In addition, mere aggregation of another work not based on the Program +with the Program (or with a work based on the Program) on a volume of +a storage or distribution medium does not bring the other work under +the scope of this License. + + 3. You may copy and distribute the Program (or a work based on it, +under Section 2) in object code or executable form under the terms of +Sections 1 and 2 above provided that you also do one of the following: + + a) Accompany it with the complete corresponding machine-readable + source code, which must be distributed under the terms of Sections + 1 and 2 above on a medium customarily used for software interchange; or, + + b) Accompany it with a written offer, valid for at least three + years, to give any third party, for a charge no more than your + cost of physically performing source distribution, a complete + machine-readable copy of the corresponding source code, to be + distributed under the terms of Sections 1 and 2 above on a medium + customarily used for software interchange; or, + + c) Accompany it with the information you received as to the offer + to distribute corresponding source code. (This alternative is + allowed only for noncommercial distribution and only if you + received the program in object code or executable form with such + an offer, in accord with Subsection b above.) + +The source code for a work means the preferred form of the work for +making modifications to it. For an executable work, complete source +code means all the source code for all modules it contains, plus any +associated interface definition files, plus the scripts used to +control compilation and installation of the executable. However, as a +special exception, the source code distributed need not include +anything that is normally distributed (in either source or binary +form) with the major components (compiler, kernel, and so on) of the +operating system on which the executable runs, unless that component +itself accompanies the executable. + +If distribution of executable or object code is made by offering +access to copy from a designated place, then offering equivalent +access to copy the source code from the same place counts as +distribution of the source code, even though third parties are not +compelled to copy the source along with the object code. + + 4. You may not copy, modify, sublicense, or distribute the Program +except as expressly provided under this License. Any attempt +otherwise to copy, modify, sublicense or distribute the Program is +void, and will automatically terminate your rights under this License. +However, parties who have received copies, or rights, from you under +this License will not have their licenses terminated so long as such +parties remain in full compliance. + + 5. You are not required to accept this License, since you have not +signed it. However, nothing else grants you permission to modify or +distribute the Program or its derivative works. These actions are +prohibited by law if you do not accept this License. Therefore, by +modifying or distributing the Program (or any work based on the +Program), you indicate your acceptance of this License to do so, and +all its terms and conditions for copying, distributing or modifying +the Program or works based on it. + + 6. Each time you redistribute the Program (or any work based on the +Program), the recipient automatically receives a license from the +original licensor to copy, distribute or modify the Program subject to +these terms and conditions. You may not impose any further +restrictions on the recipients' exercise of the rights granted herein. +You are not responsible for enforcing compliance by third parties to +this License. + + 7. If, as a consequence of a court judgment or allegation of patent +infringement or for any other reason (not limited to patent issues), +conditions are imposed on you (whether by court order, agreement or +otherwise) that contradict the conditions of this License, they do not +excuse you from the conditions of this License. If you cannot +distribute so as to satisfy simultaneously your obligations under this +License and any other pertinent obligations, then as a consequence you +may not distribute the Program at all. For example, if a patent +license would not permit royalty-free redistribution of the Program by +all those who receive copies directly or indirectly through you, then +the only way you could satisfy both it and this License would be to +refrain entirely from distribution of the Program. + +If any portion of this section is held invalid or unenforceable under +any particular circumstance, the balance of the section is intended to +apply and the section as a whole is intended to apply in other +circumstances. + +It is not the purpose of this section to induce you to infringe any +patents or other property right claims or to contest validity of any +such claims; this section has the sole purpose of protecting the +integrity of the free software distribution system, which is +implemented by public license practices. Many people have made +generous contributions to the wide range of software distributed +through that system in reliance on consistent application of that +system; it is up to the author/donor to decide if he or she is willing +to distribute software through any other system and a licensee cannot +impose that choice. + +This section is intended to make thoroughly clear what is believed to +be a consequence of the rest of this License. + + 8. If the distribution and/or use of the Program is restricted in +certain countries either by patents or by copyrighted interfaces, the +original copyright holder who places the Program under this License +may add an explicit geographical distribution limitation excluding +those countries, so that distribution is permitted only in or among +countries not thus excluded. In such case, this License incorporates +the limitation as if written in the body of this License. + + 9. The Free Software Foundation may publish revised and/or new versions +of the General Public License from time to time. Such new versions will +be similar in spirit to the present version, but may differ in detail to +address new problems or concerns. + +Each version is given a distinguishing version number. If the Program +specifies a version number of this License which applies to it and "any +later version", you have the option of following the terms and conditions +either of that version or of any later version published by the Free +Software Foundation. If the Program does not specify a version number of +this License, you may choose any version ever published by the Free Software +Foundation. + + 10. If you wish to incorporate parts of the Program into other free +programs whose distribution conditions are different, write to the author +to ask for permission. For software which is copyrighted by the Free +Software Foundation, write to the Free Software Foundation; we sometimes +make exceptions for this. Our decision will be guided by the two goals +of preserving the free status of all derivatives of our free software and +of promoting the sharing and reuse of software generally. + + NO WARRANTY + + 11. BECAUSE THE PROGRAM IS LICENSED FREE OF CHARGE, THERE IS NO WARRANTY +FOR THE PROGRAM, TO THE EXTENT PERMITTED BY APPLICABLE LAW. EXCEPT WHEN +OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES +PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED +OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF +MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE ENTIRE RISK AS +TO THE QUALITY AND PERFORMANCE OF THE PROGRAM IS WITH YOU. SHOULD THE +PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY SERVICING, +REPAIR OR CORRECTION. + + 12. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING +WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY AND/OR +REDISTRIBUTE THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, +INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING +OUT OF THE USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED +TO LOSS OF DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY +YOU OR THIRD PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER +PROGRAMS), EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE +POSSIBILITY OF SUCH DAMAGES. + + END OF TERMS AND CONDITIONS + + How to Apply These Terms to Your New Programs + + If you develop a new program, and you want it to be of the greatest +possible use to the public, the best way to achieve this is to make it +free software which everyone can redistribute and change under these terms. + + To do so, attach the following notices to the program. It is safest +to attach them to the start of each source file to most effectively +convey the exclusion of warranty; and each file should have at least +the "copyright" line and a pointer to where the full notice is found. + + <one line to give the program's name and a brief idea of what it does.> + Copyright (C) <year> <name of author> + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License along + with this program; if not, write to the Free Software Foundation, Inc., + 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + +Also add information on how to contact you by electronic and paper mail. + +If the program is interactive, make it output a short notice like this +when it starts in an interactive mode: + + Gnomovision version 69, Copyright (C) year name of author + Gnomovision comes with ABSOLUTELY NO WARRANTY; for details type `show w'. + This is free software, and you are welcome to redistribute it + under certain conditions; type `show c' for details. + +The hypothetical commands `show w' and `show c' should show the appropriate +parts of the General Public License. Of course, the commands you use may +be called something other than `show w' and `show c'; they could even be +mouse-clicks or menu items--whatever suits your program. + +You should also get your employer (if you work as a programmer) or your +school, if any, to sign a "copyright disclaimer" for the program, if +necessary. Here is a sample; alter the names: + + Yoyodyne, Inc., hereby disclaims all copyright interest in the program + `Gnomovision' (which makes passes at compilers) written by James Hacker. + + <signature of Ty Coon>, 1 April 1989 + Ty Coon, President of Vice + +This General Public License does not permit incorporating your program into +proprietary programs. If your program is a subroutine library, you may +consider it more useful to permit linking proprietary applications with the +library. If this is what you want to do, use the GNU Lesser General +Public License instead of this License. diff --git a/fs/sdfat/Makefile b/fs/sdfat/Makefile new file mode 100644 index 000000000000..a5cd0858c6ca --- /dev/null +++ b/fs/sdfat/Makefile @@ -0,0 +1,24 @@ +# +# Makefile for the linux FAT12/16/32(VFAT)/64(exFAT) filesystem driver. +# + +obj-$(CONFIG_SDFAT_FS) += sdfat_fs.o + +sdfat_fs-objs := sdfat.o core.o core_fat.o core_exfat.o api.o blkdev.o \ + fatent.o amap_smart.o cache.o dfr.o nls.o misc.o \ + mpage.o extent.o + +sdfat_fs-$(CONFIG_SDFAT_VIRTUAL_XATTR) += xattr.o +sdfat_fs-$(CONFIG_SDFAT_STATISTICS) += statistics.o + + +all: + make -C /lib/modules/$(shell uname -r)/build M=$(PWD) modules + +clean: + make -C /lib/modules/$(shell uname -r)/build M=$(PWD) clean + +cscope: + rm -rf cscope.files cscope.files + find $(PWD) \( -name '*.c' -o -name '*.cpp' -o -name '*.cc' -o -name '*.h' -o -name '*.s' -o -name '*.S' \) -print > cscope.files + cscope diff --git a/fs/sdfat/README.md b/fs/sdfat/README.md new file mode 100644 index 000000000000..f291e8f1ae00 --- /dev/null +++ b/fs/sdfat/README.md @@ -0,0 +1,19 @@ +sdFAT FS support for Linux Kernel 4.4 +===================================== + +sdFAT is unified FAT-based file system which supports not only fat12/16/32 with +vfat but also exfat. sdFAT supports winnt short-name rule. + +Suggested Kernel config: + + CONFIG_SDFAT_FS=y + CONFIG_SDFAT_DELAYED_META_DIRTY=y + CONFIG_SDFAT_SUPPORT_DIR_SYNC=y + CONFIG_SDFAT_DEFAULT_CODEPAGE=437 + CONFIG_SDFAT_DEFAULT_IOCHARSET="utf8" + CONFIG_SDFAT_ALIGNED_MPAGE_WRITE=y + CONFIG_SDFAT_VIRTUAL_XATTR=y + CONFIG_SDFAT_VIRTUAL_XATTR_SELINUX_LABEL="u:object_r:vfat:s0" + CONFIG_SDFAT_DEBUG=y + CONFIG_SDFAT_DBG_MSG=y + CONFIG_SDFAT_STATISTICS=y diff --git a/fs/sdfat/amap_smart.c b/fs/sdfat/amap_smart.c new file mode 100644 index 000000000000..b556f868d76e --- /dev/null +++ b/fs/sdfat/amap_smart.c @@ -0,0 +1,1314 @@ +/* + * Copyright (C) 2012-2013 Samsung Electronics Co., Ltd. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version 2 + * of the License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, see <http://www.gnu.org/licenses/>. + */ + +/************************************************************************/ +/* */ +/* PROJECT : exFAT & FAT12/16/32 File System */ +/* FILE : amap_smart.c */ +/* PURPOSE : FAT32 Smart allocation code for sdFAT */ +/* */ +/*----------------------------------------------------------------------*/ +/* NOTES */ +/* */ +/* */ +/************************************************************************/ + +#include <linux/slab.h> +#include <linux/vmalloc.h> + +#include "sdfat.h" +#include "core.h" +#include "amap_smart.h" + +/* AU list related functions */ +static inline void amap_list_del(struct list_head *entry) +{ + __list_del(entry->prev, entry->next); + + /* Will be used to check if the entry is a single entry(selected) */ + entry->prev = NULL; + entry->next = NULL; +} + +static inline int amap_insert_to_list(AU_INFO_T *au, struct slist_head *shead) +{ + struct slist_head *entry = &au->shead; + + ASSERT(!entry->head); + + entry->next = shead->next; + entry->head = shead; + + shead->next = entry; + + return 0; +} + +static inline int amap_remove_from_list(AU_INFO_T *au, struct slist_head *shead) +{ + struct slist_head *entry = &au->shead; + struct slist_head *iter; + + BUG_ON(entry->head != shead); + + iter = shead; + + while (iter->next) { + if (iter->next == entry) { + // iter->next = iter->next->next + iter->next = entry->next; + + entry->next = NULL; + entry->head = NULL; + return 0; + } + iter = iter->next; + } + + BUG_ON("Not reachable"); +} + +/* Full-linear serach => Find AU with max. number of fclu */ +static inline AU_INFO_T *amap_find_hot_au_largest(struct slist_head *shead) +{ + struct slist_head *iter; + uint16_t max_fclu = 0; + AU_INFO_T *entry, *ret = NULL; + + ASSERT(shead->head == shead); /* Singly-list condition */ + ASSERT(shead->next != shead); + + iter = shead->next; + + while (iter) { + entry = list_entry(iter, AU_INFO_T, shead); + + if (entry->free_clusters > max_fclu) { + max_fclu = entry->free_clusters; + ret = entry; + } + + iter = iter->next; + } + + return ret; +} + +/* Find partially used AU with max. number of fclu. + * If there is no partial AU available, pick a clean one + */ +static inline AU_INFO_T *amap_find_hot_au_partial(AMAP_T *amap) +{ + struct slist_head *iter; + uint16_t max_fclu = 0; + AU_INFO_T *entry, *ret = NULL; + + iter = &amap->slist_hot; + ASSERT(iter->head == iter); /* Singly-list condition */ + ASSERT(iter->next != iter); + + iter = iter->next; + + while (iter) { + entry = list_entry(iter, AU_INFO_T, shead); + + if (entry->free_clusters > max_fclu) { + if (entry->free_clusters < amap->clusters_per_au) { + max_fclu = entry->free_clusters; + ret = entry; + } else { + if (!ret) + ret = entry; + } + } + + iter = iter->next; + } + + return ret; +} + + + + +/* + * Size-base AU management functions + */ + +/* + * Add au into cold AU MAP + * au: an isolated (not in a list) AU data structure + */ +int amap_add_cold_au(AMAP_T *amap, AU_INFO_T *au) +{ + FCLU_NODE_T *fclu_node = NULL; + + /* Check if a single entry */ + BUG_ON(au->head.prev); + + /* Ignore if the au is full */ + if (!au->free_clusters) + return 0; + + /* Find entry */ + fclu_node = NODE(au->free_clusters, amap); + + /* Insert to the list */ + list_add_tail(&(au->head), &(fclu_node->head)); + + /* Update fclu_hint (Increase) */ + if (au->free_clusters > amap->fclu_hint) + amap->fclu_hint = au->free_clusters; + + return 0; +} + +/* + * Remove an AU from AU MAP + */ +int amap_remove_cold_au(AMAP_T *amap, AU_INFO_T *au) +{ + struct list_head *prev = au->head.prev; + + /* Single entries are not managed in lists */ + if (!prev) { + BUG_ON(au->free_clusters > 0); + return 0; + } + + /* remove from list */ + amap_list_del(&(au->head)); + + return 0; +} + + +/* "Find" best fit AU + * returns NULL if there is no AU w/ enough free space. + * + * This function doesn't change AU status. + * The caller should call amap_remove_cold_au() if needed. + */ +AU_INFO_T *amap_find_cold_au_bestfit(AMAP_T *amap, uint16_t free_clusters) +{ + AU_INFO_T *au = NULL; + FCLU_NODE_T *fclu_iter; + + if (free_clusters <= 0 || free_clusters > amap->clusters_per_au) { + EMSG("AMAP: amap_find_cold_au_bestfit / unexpected arg. (%d)\n", + free_clusters); + return NULL; + } + + fclu_iter = NODE(free_clusters, amap); + + if (amap->fclu_hint < free_clusters) { + /* There is no AUs with enough free_clusters */ + return NULL; + } + + /* Naive Hash management (++) */ + do { + if (!list_empty(&fclu_iter->head)) { + struct list_head *first = fclu_iter->head.next; + + au = list_entry(first, AU_INFO_T, head); + + break; + } + + fclu_iter++; + } while (fclu_iter < (amap->fclu_nodes + amap->clusters_per_au)); + + + // BUG_ON(au->free_clusters < 0); + BUG_ON(au && (au->free_clusters > amap->clusters_per_au)); + + return au; +} + + +/* "Pop" best fit AU + * + * returns NULL if there is no AU w/ enough free space. + * The returned AU will not be in the list anymore. + */ +AU_INFO_T *amap_pop_cold_au_bestfit(AMAP_T *amap, uint16_t free_clusters) +{ + /* Naive implementation */ + AU_INFO_T *au; + + au = amap_find_cold_au_bestfit(amap, free_clusters); + if (au) + amap_remove_cold_au(amap, au); + + return au; +} + + + +/* Pop the AU with the largest free space + * + * search from 'start_fclu' to 0 + * (target freecluster : -1 for each step) + * start_fclu = 0 means to search from the max. value + */ +AU_INFO_T *amap_pop_cold_au_largest(AMAP_T *amap, uint16_t start_fclu) +{ + AU_INFO_T *au = NULL; + FCLU_NODE_T *fclu_iter; + + if (!start_fclu) + start_fclu = amap->clusters_per_au; + if (start_fclu > amap->clusters_per_au) + start_fclu = amap->clusters_per_au; + + /* Use hint (search start point) */ + if (amap->fclu_hint < start_fclu) + fclu_iter = NODE(amap->fclu_hint, amap); + else + fclu_iter = NODE(start_fclu, amap); + + /* Naive Hash management */ + do { + if (!list_empty(&fclu_iter->head)) { + struct list_head *first = fclu_iter->head.next; + + au = list_entry(first, AU_INFO_T, head); + // BUG_ON((au < amap->entries) || ((amap->entries + amap->n_au) <= au)); + + amap_list_del(first); + + // (Hint) Possible maximum value of free clusters (among cold) + /* if it wasn't the whole search, don't update fclu_hint */ + if (start_fclu == amap->clusters_per_au) + amap->fclu_hint = au->free_clusters; + + break; + } + + fclu_iter--; + } while (amap->fclu_nodes <= fclu_iter); + + return au; +} + + + +/* + * =============================================== + * Allocation Map related functions + * =============================================== + */ + +/* Create AMAP related data structure (mount time) */ +int amap_create(struct super_block *sb, u32 pack_ratio, u32 sect_per_au, u32 hidden_sect) +{ + FS_INFO_T *fsi = &(SDFAT_SB(sb)->fsi); + AMAP_T *amap; + int total_used_clusters; + int n_au_table = 0; + int i, i_clu, i_au; + int i_au_root = -1, i_au_hot_from = INT_MAX; + u32 misaligned_sect = hidden_sect; + u64 tmp; + + BUG_ON(!fsi->bd_opened); + + if (fsi->amap) + return -EEXIST; + + /* Check conditions */ + if (fsi->vol_type != FAT32) { + sdfat_msg(sb, KERN_ERR, "smart allocation is only available " + "with fat32-fs"); + return -ENOTSUPP; + } + + if (fsi->num_sectors < AMAP_MIN_SUPPORT_SECTORS) { + sdfat_msg(sb, KERN_ERR, "smart allocation is only available " + "with sectors above %d", AMAP_MIN_SUPPORT_SECTORS); + return -ENOTSUPP; + } + + /* AU size must be a multiple of clu_size */ + if ((sect_per_au <= 0) || (sect_per_au & (fsi->sect_per_clus - 1))) { + sdfat_msg(sb, KERN_ERR, + "invalid AU size (sect_per_au : %u, " + "sect_per_clus : %u) " + "please re-format for performance.", + sect_per_au, fsi->sect_per_clus); + return -EINVAL; + } + + /* the start sector of this partition must be a multiple of clu_size */ + if (misaligned_sect & (fsi->sect_per_clus - 1)) { + sdfat_msg(sb, KERN_ERR, + "misaligned part (start sect : %u, " + "sect_per_clus : %u) " + "please re-format for performance.", + misaligned_sect, fsi->sect_per_clus); + return -EINVAL; + } + + /* data start sector must be a multiple of clu_size */ + if (fsi->data_start_sector & (fsi->sect_per_clus - 1)) { + sdfat_msg(sb, KERN_ERR, + "misaligned data area (start sect : %llu, " + "sect_per_clus : %u) " + "please re-format for performance.", + fsi->data_start_sector, fsi->sect_per_clus); + return -EINVAL; + } + + misaligned_sect &= (sect_per_au - 1); + + /* Allocate data structrues */ + amap = kzalloc(sizeof(AMAP_T), GFP_NOIO); + if (!amap) + return -ENOMEM; + + amap->sb = sb; + + tmp = fsi->num_sectors + misaligned_sect + sect_per_au - 1; + do_div(tmp, sect_per_au); + amap->n_au = tmp; + amap->n_clean_au = 0; + amap->n_full_au = 0; + + /* Reflect block-partition align first, + * then partition-data_start align + */ + amap->clu_align_bias = (misaligned_sect / fsi->sect_per_clus); + amap->clu_align_bias += (fsi->data_start_sector >> fsi->sect_per_clus_bits) - CLUS_BASE; + amap->clusters_per_au = sect_per_au / fsi->sect_per_clus; + + /* That is, + * the size of cluster is at least 4KB if the size of AU is 4MB + */ + if (amap->clusters_per_au > MAX_CLU_PER_AU) { + sdfat_log_msg(sb, KERN_INFO, + "too many clusters per AU (clus/au:%d > %d).", + amap->clusters_per_au, + MAX_CLU_PER_AU); + } + + /* is it needed? why here? */ + // set_sb_dirty(sb); + + spin_lock_init(&amap->amap_lock); + + amap->option.packing_ratio = pack_ratio; + amap->option.au_size = sect_per_au; + amap->option.au_align_factor = hidden_sect; + + + /* Allocate AU info table */ + n_au_table = (amap->n_au + N_AU_PER_TABLE - 1) / N_AU_PER_TABLE; + amap->au_table = kmalloc(sizeof(AU_INFO_T *) * n_au_table, GFP_NOIO); + if (!amap->au_table) { + sdfat_msg(sb, KERN_ERR, + "failed to alloc amap->au_table\n"); + kfree(amap); + return -ENOMEM; + } + + for (i = 0; i < n_au_table; i++) + amap->au_table[i] = (AU_INFO_T *)get_zeroed_page(GFP_NOIO); + + /* Allocate buckets indexed by # of free clusters */ + amap->fclu_order = get_order(sizeof(FCLU_NODE_T) * amap->clusters_per_au); + + // XXX: amap->clusters_per_au limitation is 512 (w/ 8 byte list_head) + sdfat_log_msg(sb, KERN_INFO, "page orders for AU nodes : %d " + "(clus_per_au : %d, node_size : %lu)", + amap->fclu_order, + amap->clusters_per_au, + (unsigned long)sizeof(FCLU_NODE_T)); + + if (!amap->fclu_order) + amap->fclu_nodes = (FCLU_NODE_T *)get_zeroed_page(GFP_NOIO); + else + amap->fclu_nodes = vzalloc(PAGE_SIZE << amap->fclu_order); + + amap->fclu_hint = amap->clusters_per_au; + + /* Hot AU list, ignored AU list */ + amap->slist_hot.next = NULL; + amap->slist_hot.head = &amap->slist_hot; + amap->total_fclu_hot = 0; + + amap->slist_ignored.next = NULL; + amap->slist_ignored.head = &amap->slist_ignored; + + /* Strategy related vars. */ + amap->cur_cold.au = NULL; + amap->cur_hot.au = NULL; + amap->n_need_packing = 0; + + + /* Build AMAP info */ + total_used_clusters = 0; // Count # of used clusters + + i_au_root = i_AU_of_CLU(amap, fsi->root_dir); + i_au_hot_from = amap->n_au - (SMART_ALLOC_N_HOT_AU - 1); + + for (i = 0; i < amap->clusters_per_au; i++) + INIT_LIST_HEAD(&amap->fclu_nodes[i].head); + + /* + * Thanks to kzalloc() + * amap->entries[i_au].free_clusters = 0; + * amap->entries[i_au].head.prev = NULL; + * amap->entries[i_au].head.next = NULL; + */ + + /* Parse FAT table */ + for (i_clu = CLUS_BASE; i_clu < fsi->num_clusters; i_clu++) { + u32 clu_data; + AU_INFO_T *au; + + if (fat_ent_get(sb, i_clu, &clu_data)) { + sdfat_msg(sb, KERN_ERR, + "failed to read fat entry(%u)\n", i_clu); + goto free_and_eio; + } + + if (IS_CLUS_FREE(clu_data)) { + au = GET_AU(amap, i_AU_of_CLU(amap, i_clu)); + au->free_clusters++; + } else + total_used_clusters++; + } + + /* Build AU list */ + for (i_au = 0; i_au < amap->n_au; i_au++) { + AU_INFO_T *au = GET_AU(amap, i_au); + + au->idx = i_au; + BUG_ON(au->free_clusters > amap->clusters_per_au); + + if (au->free_clusters == amap->clusters_per_au) + amap->n_clean_au++; + else if (au->free_clusters == 0) + amap->n_full_au++; + + /* If hot, insert to the hot list */ + if (i_au >= i_au_hot_from) { + amap_add_hot_au(amap, au); + amap->total_fclu_hot += au->free_clusters; + } else if (i_au != i_au_root || SMART_ALLOC_N_HOT_AU == 0) { + /* Otherwise, insert to the free cluster hash */ + amap_add_cold_au(amap, au); + } + } + + /* Hot list -> (root) -> (last) -> (last - 1) -> ... */ + if (i_au_root >= 0 && SMART_ALLOC_N_HOT_AU > 0) { + amap_add_hot_au(amap, GET_AU(amap, i_au_root)); + amap->total_fclu_hot += GET_AU(amap, i_au_root)->free_clusters; + } + + fsi->amap = amap; + fsi->used_clusters = total_used_clusters; + + sdfat_msg(sb, KERN_INFO, + "AMAP: Smart allocation enabled (opt : %u / %u / %u)", + amap->option.au_size, amap->option.au_align_factor, + amap->option.packing_ratio); + + /* Debug purpose - check */ + //{ + //u32 used_clusters; + //fat_count_used_clusters(sb, &used_clusters) + //ASSERT(used_clusters == total_used_clusters); + //} + + return 0; + + +free_and_eio: + if (amap) { + if (amap->au_table) { + for (i = 0; i < n_au_table; i++) + free_page((unsigned long)amap->au_table[i]); + kfree(amap->au_table); + } + if (amap->fclu_nodes) { + if (!amap->fclu_order) + free_page((unsigned long)amap->fclu_nodes); + else + vfree(amap->fclu_nodes); + } + kfree(amap); + } + return -EIO; +} + + +/* Free AMAP related structure */ +void amap_destroy(struct super_block *sb) +{ + AMAP_T *amap = SDFAT_SB(sb)->fsi.amap; + int n_au_table; + + if (!amap) + return; + + DMSG("%s\n", __func__); + + n_au_table = (amap->n_au + N_AU_PER_TABLE - 1) / N_AU_PER_TABLE; + + if (amap->au_table) { + int i; + + for (i = 0; i < n_au_table; i++) + free_page((unsigned long)amap->au_table[i]); + + kfree(amap->au_table); + } + if (!amap->fclu_order) + free_page((unsigned long)amap->fclu_nodes); + else + vfree(amap->fclu_nodes); + kfree(amap); + SDFAT_SB(sb)->fsi.amap = NULL; +} + + +/* + * Check status of FS + * and change destination if needed to disable AU-aligned alloc. + * (from ALLOC_COLD_ALIGNED to ALLOC_COLD_SEQ) + */ +static inline int amap_update_dest(AMAP_T *amap, int ori_dest) +{ + FS_INFO_T *fsi = &(SDFAT_SB(amap->sb)->fsi); + int n_partial_au, n_partial_freeclus; + + if (ori_dest != ALLOC_COLD_ALIGNED) + return ori_dest; + + /* # of partial AUs and # of clusters in those AUs */ + n_partial_au = amap->n_au - amap->n_clean_au - amap->n_full_au; + n_partial_freeclus = fsi->num_clusters - fsi->used_clusters - + amap->clusters_per_au * amap->n_clean_au; + + /* Status of AUs : Full / Partial / Clean + * If there are many partial (and badly fragmented) AUs, + * the throughput will decrease extremly. + * + * The follow code will treat those worst cases. + */ + + /* XXX: AMAP heuristics */ + if ((amap->n_clean_au * 50 <= amap->n_au) && + (n_partial_freeclus*2) < (n_partial_au*amap->clusters_per_au)) { + /* If clean AUs are fewer than 2% of n_au (80 AUs per 16GB) + * and fragment ratio is more than 2 (AVG free_clusters=half AU) + * + * disable clean-first allocation + * enable VFAT-like sequential allocation + */ + return ALLOC_COLD_SEQ; + } + + return ori_dest; +} + + +#define PACKING_SOFTLIMIT (amap->option.packing_ratio) +#define PACKING_HARDLIMIT (amap->option.packing_ratio * 4) +/* + * Pick a packing AU if needed. + * Otherwise just return NULL + * + * This function includes some heuristics. + */ +static inline AU_INFO_T *amap_get_packing_au(AMAP_T *amap, int dest, int num_to_wb, int *clu_to_skip) +{ + AU_INFO_T *au = NULL; + + if (dest == ALLOC_COLD_PACKING) { + /* ALLOC_COLD_PACKING: + * Packing-first mode for defrag. + * Optimized to save clean AU + * + * 1) best-fit AU + * 2) Smallest AU (w/ minimum free clusters) + */ + if (num_to_wb >= amap->clusters_per_au) + num_to_wb = num_to_wb % amap->clusters_per_au; + + /* 이거 주석처리하면, AU size 딱 맞을때는 clean, 나머지는 작은거부터 */ + if (num_to_wb == 0) + num_to_wb = 1; // Don't use clean AUs + + au = amap_find_cold_au_bestfit(amap, num_to_wb); + if (au && au->free_clusters == amap->clusters_per_au && num_to_wb > 1) { + /* if au is clean then get a new partial one */ + au = amap_find_cold_au_bestfit(amap, 1); + } + + if (au) { + amap->n_need_packing = 0; + amap_remove_cold_au(amap, au); + return au; + } + } + + + /* Heuristic packing: + * This will improve QoS greatly. + * + * Count # of AU_ALIGNED allocation. + * If the number exceeds the specific threshold, + * allocate on a partial AU or generate random I/O. + */ + if ((PACKING_SOFTLIMIT > 0) && + (amap->n_need_packing >= PACKING_SOFTLIMIT) && + (num_to_wb < (int)amap->clusters_per_au)) { + /* Best-fit packing: + * If num_to_wb (expected number to be allocated) is smaller + * than AU_SIZE, find a best-fit AU. + */ + + /* Back margin (heuristics) */ + if (num_to_wb < amap->clusters_per_au / 4) + num_to_wb = amap->clusters_per_au / 4; + + au = amap_find_cold_au_bestfit(amap, num_to_wb); + if (au != NULL) { + amap_remove_cold_au(amap, au); + + MMSG("AMAP: packing (cnt: %d) / softlimit, " + "best-fit (num_to_wb: %d))\n", + amap->n_need_packing, num_to_wb); + + if (au->free_clusters > num_to_wb) { // Best-fit search: if 문 무조건 hit + *clu_to_skip = au->free_clusters - num_to_wb; + /* otherwise don't skip */ + } + amap->n_need_packing = 0; + return au; + } + } + + if ((PACKING_HARDLIMIT) && amap->n_need_packing >= PACKING_HARDLIMIT) { + /* Compulsory SLC flushing: + * If there was no chance to do best-fit packing + * and the # of AU-aligned allocation exceeds HARD threshold, + * then pick a clean AU and generate a compulsory random I/O. + */ + au = amap_pop_cold_au_largest(amap, amap->clusters_per_au); + if (au) { + MMSG("AMAP: packing (cnt: %d) / hard-limit, largest)\n", + amap->n_need_packing); + + if (au->free_clusters >= 96) { + *clu_to_skip = au->free_clusters / 2; + MMSG("AMAP: cluster idx re-position\n"); + } + amap->n_need_packing = 0; + return au; + } + } + + /* Update # of clean AU allocation */ + amap->n_need_packing++; + return NULL; +} + + +/* Pick a target AU: + * This function should be called + * only if there are one or more free clusters in the bdev. + */ +TARGET_AU_T *amap_get_target_au(AMAP_T *amap, int dest, int num_to_wb) +{ + int loop_count = 0; + +retry: + if (++loop_count >= 3) { + /* No space available (or AMAP consistency error) + * This could happen because of the ignored AUs but not likely + * (because the defrag daemon will not work if there is no enough space) + */ + BUG_ON(amap->slist_ignored.next == NULL); + return NULL; + } + + /* Hot clusters (DIR) */ + if (dest == ALLOC_HOT) { + + /* Working hot AU exist? */ + if (amap->cur_hot.au == NULL || amap->cur_hot.au->free_clusters == 0) { + AU_INFO_T *au; + + if (amap->total_fclu_hot == 0) { + /* No more hot AU avaialbe */ + dest = ALLOC_COLD; + + goto retry; + } + + au = amap_find_hot_au_partial(amap); + + BUG_ON(au == NULL); + BUG_ON(au->free_clusters <= 0); + + amap->cur_hot.au = au; + amap->cur_hot.idx = 0; + amap->cur_hot.clu_to_skip = 0; + } + + /* Now allocate on a hot AU */ + return &amap->cur_hot; + } + + /* Cold allocation: + * If amap->cur_cold.au has one or more free cluster(s), + * then just return amap->cur_cold + */ + if ((!amap->cur_cold.au) + || (amap->cur_cold.idx == amap->clusters_per_au) + || (amap->cur_cold.au->free_clusters == 0)) { + + AU_INFO_T *au = NULL; + const AU_INFO_T *old_au = amap->cur_cold.au; + int n_clu_to_skip = 0; + + if (old_au) { + ASSERT(!IS_AU_WORKING(old_au, amap)); + /* must be NOT WORKING AU. + * (only for information gathering) + */ + } + + /* Next target AU is needed: + * There are 3 possible ALLOC options for cold AU + * + * ALLOC_COLD_ALIGNED: Clean AU first, but heuristic packing is ON + * ALLOC_COLD_PACKING: Packing AU first (usually for defrag) + * ALLOC_COLD_SEQ : Sequential AU allocation (VFAT-like) + */ + + /* Experimental: Modify allocation destination if needed (ALIGNED => SEQ) */ + // dest = amap_update_dest(amap, dest); + + if ((dest == ALLOC_COLD_SEQ) && old_au) { + int i_au = old_au->idx + 1; + + while (i_au != old_au->idx) { + au = GET_AU(amap, i_au); + + if ((au->free_clusters > 0) && + !IS_AU_HOT(au, amap) && + !IS_AU_IGNORED(au, amap)) { + MMSG("AMAP: new cold AU(%d) with %d " + "clusters (seq)\n", + au->idx, au->free_clusters); + + amap_remove_cold_au(amap, au); + goto ret_new_cold; + } + i_au++; + if (i_au >= amap->n_au) + i_au = 0; + } + + // no cold AUs are available => Hot allocation + dest = ALLOC_HOT; + goto retry; + } + + + /* + * Check if packing is needed + * (ALLOC_COLD_PACKING is treated by this function) + */ + au = amap_get_packing_au(amap, dest, num_to_wb, &n_clu_to_skip); + if (au) { + MMSG("AMAP: new cold AU(%d) with %d clusters " + "(packing)\n", au->idx, au->free_clusters); + goto ret_new_cold; + } + + /* ALLOC_COLD_ALIGNED */ + /* Check if the adjacent AU is clean */ + if (old_au && ((old_au->idx + 1) < amap->n_au)) { + au = GET_AU(amap, old_au->idx + 1); + if ((au->free_clusters == amap->clusters_per_au) && + !IS_AU_HOT(au, amap) && + !IS_AU_IGNORED(au, amap)) { + MMSG("AMAP: new cold AU(%d) with %d clusters " + "(adjacent)\n", au->idx, au->free_clusters); + amap_remove_cold_au(amap, au); + goto ret_new_cold; + } + } + + /* Clean or largest AU */ + au = amap_pop_cold_au_largest(amap, 0); + if (!au) { + //ASSERT(amap->total_fclu_hot == (fsi->num_clusters - fsi->used_clusters - 2)); + dest = ALLOC_HOT; + goto retry; + } + + MMSG("AMAP: New cold AU (%d) with %d clusters\n", + au->idx, au->free_clusters); + +ret_new_cold: + SET_AU_WORKING(au); + + amap->cur_cold.au = au; + amap->cur_cold.idx = 0; + amap->cur_cold.clu_to_skip = n_clu_to_skip; + } + + return &amap->cur_cold; +} + +/* Put and update target AU */ +void amap_put_target_au(AMAP_T *amap, TARGET_AU_T *cur, unsigned int num_allocated) +{ + /* Update AMAP info vars. */ + if (num_allocated > 0 && + (cur->au->free_clusters + num_allocated) == amap->clusters_per_au) { + /* if the target AU was a clean AU before this allocation ... */ + amap->n_clean_au--; + } + if (num_allocated > 0 && + cur->au->free_clusters == 0) + amap->n_full_au++; + + if (IS_AU_HOT(cur->au, amap)) { + /* Hot AU */ + MMSG("AMAP: hot allocation at AU %d\n", cur->au->idx); + amap->total_fclu_hot -= num_allocated; + + /* Intra-AU round-robin */ + if (cur->idx >= amap->clusters_per_au) + cur->idx = 0; + + /* No more space available */ + if (cur->au->free_clusters == 0) + cur->au = NULL; + + } else { + /* non-hot AU */ + ASSERT(IS_AU_WORKING(cur->au, amap)); + + if (cur->idx >= amap->clusters_per_au || cur->au->free_clusters == 0) { + /* It should be inserted back to AU MAP */ + cur->au->shead.head = NULL; // SET_AU_NOT_WORKING + amap_add_cold_au(amap, cur->au); + + // cur->au = NULL; // This value will be used for the next AU selection + cur->idx = amap->clusters_per_au; // AU closing + } + } + +} + + +/* Reposition target->idx for packing (Heuristics): + * Skip (num_to_skip) free clusters in (cur->au) + */ +static inline int amap_skip_cluster(struct super_block *sb, TARGET_AU_T *cur, int num_to_skip) +{ + AMAP_T *amap = SDFAT_SB(sb)->fsi.amap; + u32 clu, read_clu; + MMSG_VAR(int num_to_skip_orig = num_to_skip); + + if (num_to_skip >= cur->au->free_clusters) { + EMSG("AMAP(%s): skip mis-use. amap_566\n", __func__); + return -EIO; + } + + clu = CLU_of_i_AU(amap, cur->au->idx, cur->idx); + while (num_to_skip > 0) { + if (clu >= CLUS_BASE) { + /* Cf. + * If AMAP's integrity is okay, + * we don't need to check if (clu < fsi->num_clusters) + */ + + if (fat_ent_get(sb, clu, &read_clu)) + return -EIO; + + if (IS_CLUS_FREE(read_clu)) + num_to_skip--; + } + + // Move clu->idx + clu++; + (cur->idx)++; + + if (cur->idx >= amap->clusters_per_au) { + /* End of AU (Not supposed) */ + EMSG("AMAP: Skip - End of AU?! (amap_596)\n"); + cur->idx = 0; + return -EIO; + } + } + + MMSG("AMAP: Skip_clusters (%d skipped => %d, among %d free clus)\n", + num_to_skip_orig, cur->idx, cur->au->free_clusters); + + return 0; +} + + +/* AMAP-based allocation function for FAT32 */ +s32 amap_fat_alloc_cluster(struct super_block *sb, u32 num_alloc, CHAIN_T *p_chain, s32 dest) +{ + AMAP_T *amap = SDFAT_SB(sb)->fsi.amap; + TARGET_AU_T *cur = NULL; + AU_INFO_T *target_au = NULL; /* Allocation target AU */ + s32 ret = -ENOSPC; + u32 last_clu = CLUS_EOF, read_clu; + u32 new_clu, total_cnt; + u32 num_allocated = 0, num_allocated_each = 0; + FS_INFO_T *fsi = &(SDFAT_SB(sb)->fsi); + + BUG_ON(!amap); + BUG_ON(IS_CLUS_EOF(fsi->used_clusters)); + + total_cnt = fsi->num_clusters - CLUS_BASE; + + if (unlikely(total_cnt < fsi->used_clusters)) { + sdfat_fs_error_ratelimit(sb, + "AMAP(%s): invalid used clusters(t:%u,u:%u)\n", + __func__, total_cnt, fsi->used_clusters); + return -EIO; + } + + if (num_alloc > total_cnt - fsi->used_clusters) + return -ENOSPC; + + p_chain->dir = CLUS_EOF; + + set_sb_dirty(sb); + + // spin_lock(&amap->amap_lock); + +retry_alloc: + /* Allocation strategy implemented */ + cur = amap_get_target_au(amap, dest, fsi->reserved_clusters); + if (unlikely(!cur)) { + // There is no available AU (only ignored-AU are left) + sdfat_msg(sb, KERN_ERR, "AMAP Allocator: no avaialble AU."); + goto error; + } + + /* If there are clusters to skip */ + if (cur->clu_to_skip > 0) { + if (amap_skip_cluster(sb, &amap->cur_cold, cur->clu_to_skip)) { + ret = -EIO; + goto error; + } + cur->clu_to_skip = 0; + } + + target_au = cur->au; + + /* + * cur->au : target AU info pointer + * cur->idx : the intra-cluster idx in the AU to start from + */ + BUG_ON(!cur->au); + BUG_ON(!cur->au->free_clusters); + BUG_ON(cur->idx >= amap->clusters_per_au); + + num_allocated_each = 0; + new_clu = CLU_of_i_AU(amap, target_au->idx, cur->idx); + + do { + /* Allocate at the target AU */ + if ((new_clu >= CLUS_BASE) && (new_clu < fsi->num_clusters)) { + if (fat_ent_get(sb, new_clu, &read_clu)) { + // spin_unlock(&amap->amap_lock); + ret = -EIO; + goto error; + } + + if (IS_CLUS_FREE(read_clu)) { + BUG_ON(GET_AU(amap, i_AU_of_CLU(amap, new_clu)) != target_au); + + /* Free cluster found */ + if (fat_ent_set(sb, new_clu, CLUS_EOF)) { + ret = -EIO; + goto error; + } + + num_allocated_each++; + + if (IS_CLUS_EOF(p_chain->dir)) { + p_chain->dir = new_clu; + } else { + if (fat_ent_set(sb, last_clu, new_clu)) { + ret = -EIO; + goto error; + } + } + last_clu = new_clu; + + /* Update au info */ + target_au->free_clusters--; + } + + } + + new_clu++; + (cur->idx)++; + + /* End of the AU */ + if ((cur->idx >= amap->clusters_per_au) || !(target_au->free_clusters)) + break; + } while (num_allocated_each < num_alloc); + + /* Update strategy info */ + amap_put_target_au(amap, cur, num_allocated_each); + + + num_allocated += num_allocated_each; + fsi->used_clusters += num_allocated_each; + num_alloc -= num_allocated_each; + + + if (num_alloc > 0) + goto retry_alloc; + + // spin_unlock(&amap->amap_lock); + return 0; +error: + if (num_allocated) + fsi->fs_func->free_cluster(sb, p_chain, 0); + return ret; +} + + +/* Free cluster for FAT32 (not implemented yet) */ +s32 amap_free_cluster(struct super_block *sb, CHAIN_T *p_chain, s32 do_relse) +{ + return -ENOTSUPP; +} + + +/* + * This is called by fat_free_cluster() + * to update AMAP info. + */ +s32 amap_release_cluster(struct super_block *sb, u32 clu) +{ + AMAP_T *amap = SDFAT_SB(sb)->fsi.amap; + AU_INFO_T *au; + int i_au; + + // spin_lock(&amap->amap_lock); + + /* Update AU info */ + i_au = i_AU_of_CLU(amap, clu); + BUG_ON(i_au >= amap->n_au); + au = GET_AU(amap, i_au); + if (au->free_clusters >= amap->clusters_per_au) { + sdfat_fs_error(sb, "%s, au->free_clusters(%hd) is " + "greater than or equal to amap->clusters_per_au(%hd)", + __func__, au->free_clusters, amap->clusters_per_au); + return -EIO; + } + + if (IS_AU_HOT(au, amap)) { + MMSG("AMAP: Hot cluster freed\n"); + au->free_clusters++; + amap->total_fclu_hot++; + } else if (!IS_AU_WORKING(au, amap) && !IS_AU_IGNORED(au, amap)) { + /* Ordinary AU - update AU tree */ + // Can be optimized by implementing amap_update_au + amap_remove_cold_au(amap, au); + au->free_clusters++; + amap_add_cold_au(amap, au); + } else + au->free_clusters++; + + + /* Update AMAP info */ + if (au->free_clusters == amap->clusters_per_au) + amap->n_clean_au++; + if (au->free_clusters == 1) + amap->n_full_au--; + + // spin_unlock(&amap->amap_lock); + return 0; +} + + +/* + * Check if the cluster is in a working AU + * The caller should hold sb lock. + * This func. should be used only if smart allocation is on + */ +s32 amap_check_working(struct super_block *sb, u32 clu) +{ + AMAP_T *amap = SDFAT_SB(sb)->fsi.amap; + AU_INFO_T *au; + + BUG_ON(!amap); + au = GET_AU(amap, i_AU_of_CLU(amap, clu)); + return IS_AU_WORKING(au, amap); +} + + +/* + * Return the # of free clusters in that AU + */ +s32 amap_get_freeclus(struct super_block *sb, u32 clu) +{ + AMAP_T *amap = SDFAT_SB(sb)->fsi.amap; + AU_INFO_T *au; + + BUG_ON(!amap); + au = GET_AU(amap, i_AU_of_CLU(amap, clu)); + return (s32)au->free_clusters; +} + + +/* + * Add the AU containing 'clu' to the ignored AU list. + * The AU will not be used by the allocator. + * + * XXX: Ignored counter needed + */ +s32 amap_mark_ignore(struct super_block *sb, u32 clu) +{ + AMAP_T *amap = SDFAT_SB(sb)->fsi.amap; + AU_INFO_T *au; + + BUG_ON(!amap); + au = GET_AU(amap, i_AU_of_CLU(amap, clu)); + + if (IS_AU_HOT(au, amap)) { + /* Doesn't work with hot AUs */ + return -EPERM; + } else if (IS_AU_WORKING(au, amap)) { + return -EBUSY; + } + + //BUG_ON(IS_AU_IGNORED(au, amap) && (GET_IGN_CNT(au) == 0)); + if (IS_AU_IGNORED(au, amap)) + return 0; + + amap_remove_cold_au(amap, au); + amap_insert_to_list(au, &amap->slist_ignored); + + BUG_ON(!IS_AU_IGNORED(au, amap)); + + //INC_IGN_CNT(au); + MMSG("AMAP: Mark ignored AU (%d)\n", au->idx); + return 0; +} + + +/* + * This function could be used only on IGNORED AUs. + * The caller should care whether it's ignored or not before using this func. + */ +s32 amap_unmark_ignore(struct super_block *sb, u32 clu) +{ + AMAP_T *amap = SDFAT_SB(sb)->fsi.amap; + AU_INFO_T *au; + + BUG_ON(!amap); + + au = GET_AU(amap, i_AU_of_CLU(amap, clu)); + + BUG_ON(!IS_AU_IGNORED(au, amap)); + // BUG_ON(GET_IGN_CNT(au) == 0); + + amap_remove_from_list(au, &amap->slist_ignored); + amap_add_cold_au(amap, au); + + BUG_ON(IS_AU_IGNORED(au, amap)); + + //DEC_IGN_CNT(au); + + MMSG("AMAP: Unmark ignored AU (%d)\n", au->idx); + + return 0; +} + +/* + * Unmark all ignored AU + * This will return # of unmarked AUs + */ +s32 amap_unmark_ignore_all(struct super_block *sb) +{ + AMAP_T *amap = SDFAT_SB(sb)->fsi.amap; + struct slist_head *entry; + AU_INFO_T *au; + int n = 0; + + BUG_ON(!amap); + entry = amap->slist_ignored.next; + while (entry) { + au = list_entry(entry, AU_INFO_T, shead); + + BUG_ON(au != GET_AU(amap, au->idx)); + BUG_ON(!IS_AU_IGNORED(au, amap)); + + //CLEAR_IGN_CNT(au); + amap_remove_from_list(au, &amap->slist_ignored); + amap_add_cold_au(amap, au); + + MMSG("AMAP: Unmark ignored AU (%d)\n", au->idx); + n++; + + entry = amap->slist_ignored.next; + } + + BUG_ON(amap->slist_ignored.next != NULL); + MMSG("AMAP: unmark_ignore_all, total %d AUs\n", n); + + return n; +} + +/** + * @fn amap_get_au_stat + * @brief report AUs status depending on mode + * @return positive on success, 0 otherwise + * @param sbi super block info + * @param mode TOTAL, CLEAN and FULL + */ +u32 amap_get_au_stat(struct super_block *sb, s32 mode) +{ + AMAP_T *amap = SDFAT_SB(sb)->fsi.amap; + + if (!amap) + return 0; + + if (mode == VOL_AU_STAT_TOTAL) + return amap->n_au; + else if (mode == VOL_AU_STAT_CLEAN) + return amap->n_clean_au; + else if (mode == VOL_AU_STAT_FULL) + return amap->n_full_au; + + return 0; +} + diff --git a/fs/sdfat/amap_smart.h b/fs/sdfat/amap_smart.h new file mode 100644 index 000000000000..caee6f6b3681 --- /dev/null +++ b/fs/sdfat/amap_smart.h @@ -0,0 +1,137 @@ +/* + * Copyright (C) 2012-2013 Samsung Electronics Co., Ltd. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version 2 + * of the License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, see <http://www.gnu.org/licenses/>. + */ + +#ifndef _SDFAT_AMAP_H +#define _SDFAT_AMAP_H + +#include <linux/fs.h> +#include <linux/list.h> +#include <linux/rbtree.h> + +/* AMAP Configuration Variable */ +#define SMART_ALLOC_N_HOT_AU (5) + +/* Allocating Destination (for smart allocator): + * moved to sdfat.h + */ +/* + * #define ALLOC_COLD_ALIGNED (1) + * #define ALLOC_COLD_PACKING (2) + * #define ALLOC_COLD_SEQ (4) + */ + +/* Minimum sectors for support AMAP create */ +#define AMAP_MIN_SUPPORT_SECTORS (1048576) + +#define amap_add_hot_au(amap, au) amap_insert_to_list(au, &amap->slist_hot) + +/* singly linked list */ +struct slist_head { + struct slist_head *next; + struct slist_head *head; +}; + +/* AU entry type */ +typedef struct __AU_INFO_T { + uint16_t idx; /* the index of the AU (0, 1, 2, ... ) */ + uint16_t free_clusters; /* # of available cluster */ + union { + struct list_head head; + struct slist_head shead;/* singly linked list head for hot list */ + }; +} AU_INFO_T; + + +/* Allocation Target AU */ +typedef struct __TARGET_AU_T { + AU_INFO_T *au; /* Working AU */ + uint16_t idx; /* Intra-AU cluster index */ + uint16_t clu_to_skip; /* Clusters to skip */ +} TARGET_AU_T; + + +/* AMAP free-clusters-based node */ +typedef struct { + struct list_head head; /* the list of AUs */ +} FCLU_NODE_T; + + +/* AMAP options */ +typedef struct { + unsigned int packing_ratio; /* Tunable packing ratio */ + unsigned int au_size; /* AU size in sectors */ + unsigned int au_align_factor; /* Hidden sectors % au_size */ +} AMAP_OPT_T; + +typedef struct __AMAP_T { + spinlock_t amap_lock; /* obsolete */ + struct super_block *sb; + + int n_au; + int n_clean_au, n_full_au; + int clu_align_bias; + uint16_t clusters_per_au; + AU_INFO_T **au_table; /* An array of AU_INFO entries */ + AMAP_OPT_T option; + + /* Size-based AU management pool (cold) */ + FCLU_NODE_T *fclu_nodes; /* An array of listheads */ + int fclu_order; /* Page order that fclu_nodes needs */ + int fclu_hint; /* maximum # of free clusters in an AU */ + + /* Hot AU list */ + unsigned int total_fclu_hot; /* Free clusters in hot list */ + struct slist_head slist_hot; /* Hot AU list */ + + /* Ignored AU list */ + struct slist_head slist_ignored; + + /* Allocator variables (keep 2 AUs at maximum) */ + TARGET_AU_T cur_cold; + TARGET_AU_T cur_hot; + int n_need_packing; +} AMAP_T; + + +/* AU table */ +#define N_AU_PER_TABLE (int)(PAGE_SIZE / sizeof(AU_INFO_T)) +#define GET_AU(amap, i_AU) (amap->au_table[(i_AU) / N_AU_PER_TABLE] + ((i_AU) % N_AU_PER_TABLE)) +//#define MAX_CLU_PER_AU (int)(PAGE_SIZE / sizeof(FCLU_NODE_T)) +#define MAX_CLU_PER_AU (1024) + +/* Cold AU bucket <-> # of freeclusters */ +#define NODE_CLEAN(amap) (&amap->fclu_nodes[amap->clusters_per_au - 1]) +#define NODE(fclu, amap) (&amap->fclu_nodes[fclu - 1]) +#define FREE_CLUSTERS(node, amap) ((int)(node - amap->fclu_nodes) + 1) + +/* AU status */ +#define MAGIC_WORKING ((struct slist_head *)0xFFFF5091) +#define IS_AU_HOT(au, amap) (au->shead.head == &amap->slist_hot) +#define IS_AU_IGNORED(au, amap) (au->shead.head == &amap->slist_ignored) +#define IS_AU_WORKING(au, amap) (au->shead.head == MAGIC_WORKING) +#define SET_AU_WORKING(au) (au->shead.head = MAGIC_WORKING) + +/* AU <-> cluster */ +#define i_AU_of_CLU(amap, clu) ((amap->clu_align_bias + clu) / amap->clusters_per_au) +#define CLU_of_i_AU(amap, i_au, idx) \ + ((uint32_t)(i_au) * (uint32_t)amap->clusters_per_au + (idx) - amap->clu_align_bias) + +/* + * NOTE : AMAP internal functions are moved to core.h + */ + +#endif /* _SDFAT_AMAP_H */ diff --git a/fs/sdfat/api.c b/fs/sdfat/api.c new file mode 100644 index 000000000000..45b0c4106bda --- /dev/null +++ b/fs/sdfat/api.c @@ -0,0 +1,636 @@ +/* + * Copyright (C) 2012-2013 Samsung Electronics Co., Ltd. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version 2 + * of the License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, see <http://www.gnu.org/licenses/>. + */ + +/************************************************************************/ +/* */ +/* PROJECT : exFAT & FAT12/16/32 File System */ +/* FILE : sdfat_api.c */ +/* PURPOSE : sdFAT volume lock layer */ +/* */ +/************************************************************************/ + +#include <linux/version.h> +#include <linux/module.h> +#include <linux/init.h> +#include <linux/mutex.h> + +#include "version.h" +#include "config.h" + +#include "sdfat.h" +#include "core.h" + +/*----------------------------------------------------------------------*/ +/* Internal structures */ +/*----------------------------------------------------------------------*/ + +/*----------------------------------------------------------------------*/ +/* Constant & Macro Definitions */ +/*----------------------------------------------------------------------*/ +static DEFINE_MUTEX(_lock_core); + +/*----------------------------------------------------------------------*/ +/* Global Variable Definitions */ +/*----------------------------------------------------------------------*/ + +/*----------------------------------------------------------------------*/ +/* Local Variable Definitions */ +/*----------------------------------------------------------------------*/ + +/*----------------------------------------------------------------------*/ +/* Local Function Declarations */ +/*----------------------------------------------------------------------*/ + +/*======================================================================*/ +/* Global Function Definitions */ +/* - All functions for global use have same return value format, */ +/* that is, 0 on success and minus error number on */ +/* various error condition. */ +/*======================================================================*/ + +/*----------------------------------------------------------------------*/ +/* sdFAT Filesystem Init & Exit Functions */ +/*----------------------------------------------------------------------*/ + +s32 fsapi_init(void) +{ + return fscore_init(); +} + +s32 fsapi_shutdown(void) +{ + return fscore_shutdown(); +} + +/*----------------------------------------------------------------------*/ +/* Volume Management Functions */ +/*----------------------------------------------------------------------*/ + +/* mount the file system volume */ +s32 fsapi_mount(struct super_block *sb) +{ + s32 err; + + /* acquire the core lock for file system ccritical section */ + mutex_lock(&_lock_core); + + err = meta_cache_init(sb); + if (err) + goto out; + + err = fscore_mount(sb); +out: + if (err) + meta_cache_shutdown(sb); + + /* release the core lock for file system critical section */ + mutex_unlock(&_lock_core); + + return err; +} +EXPORT_SYMBOL(fsapi_mount); + +/* unmount the file system volume */ +s32 fsapi_umount(struct super_block *sb) +{ + s32 err; + + /* acquire the core lock for file system ccritical section */ + mutex_lock(&_lock_core); + + mutex_lock(&(SDFAT_SB(sb)->s_vlock)); + err = fscore_umount(sb); + meta_cache_shutdown(sb); + mutex_unlock(&(SDFAT_SB(sb)->s_vlock)); + + /* release the core lock for file system critical section */ + mutex_unlock(&_lock_core); + + return err; +} +EXPORT_SYMBOL(fsapi_umount); + +/* get the information of a file system volume */ +s32 fsapi_statfs(struct super_block *sb, VOL_INFO_T *info) +{ + FS_INFO_T *fsi = &(SDFAT_SB(sb)->fsi); + + /* check the validity of pointer parameters */ + ASSERT(info); + + if (fsi->used_clusters == (u32) ~0) { + s32 err; + + mutex_lock(&(SDFAT_SB(sb)->s_vlock)); + err = fscore_statfs(sb, info); + mutex_unlock(&(SDFAT_SB(sb)->s_vlock)); + return err; + } + + info->FatType = fsi->vol_type; + info->ClusterSize = fsi->cluster_size; + info->NumClusters = fsi->num_clusters - 2; /* clu 0 & 1 */ + info->UsedClusters = fsi->used_clusters + fsi->reserved_clusters; + info->FreeClusters = info->NumClusters - info->UsedClusters; + + return 0; +} +EXPORT_SYMBOL(fsapi_statfs); + +/* synchronize a file system volume */ +s32 fsapi_sync_fs(struct super_block *sb, s32 do_sync) +{ + s32 err; + + mutex_lock(&(SDFAT_SB(sb)->s_vlock)); + err = fscore_sync_fs(sb, do_sync); + mutex_unlock(&(SDFAT_SB(sb)->s_vlock)); + return err; +} +EXPORT_SYMBOL(fsapi_sync_fs); + +s32 fsapi_set_vol_flags(struct super_block *sb, u16 new_flag, s32 always_sync) +{ + s32 err; + + mutex_lock(&(SDFAT_SB(sb)->s_vlock)); + err = fscore_set_vol_flags(sb, new_flag, always_sync); + mutex_unlock(&(SDFAT_SB(sb)->s_vlock)); + return err; +} +EXPORT_SYMBOL(fsapi_set_vol_flags); + +/*----------------------------------------------------------------------*/ +/* File Operation Functions */ +/*----------------------------------------------------------------------*/ + +/* lookup */ +s32 fsapi_lookup(struct inode *inode, u8 *path, FILE_ID_T *fid) +{ + s32 err; + struct super_block *sb = inode->i_sb; + + /* check the validity of pointer parameters */ + ASSERT(fid && path); + + if (unlikely(!strlen(path))) + return -EINVAL; + + mutex_lock(&(SDFAT_SB(sb)->s_vlock)); + err = fscore_lookup(inode, path, fid); + mutex_unlock(&(SDFAT_SB(sb)->s_vlock)); + return err; +} +EXPORT_SYMBOL(fsapi_lookup); + +/* create a file */ +s32 fsapi_create(struct inode *inode, u8 *path, u8 mode, FILE_ID_T *fid) +{ + s32 err; + struct super_block *sb = inode->i_sb; + + /* check the validity of pointer parameters */ + ASSERT(fid && path); + + if (unlikely(!strlen(path))) + return -EINVAL; + + mutex_lock(&(SDFAT_SB(sb)->s_vlock)); + err = fscore_create(inode, path, mode, fid); + mutex_unlock(&(SDFAT_SB(sb)->s_vlock)); + return err; +} +EXPORT_SYMBOL(fsapi_create); + +/* read the target string of symlink */ +s32 fsapi_read_link(struct inode *inode, FILE_ID_T *fid, void *buffer, u64 count, u64 *rcount) +{ + s32 err; + struct super_block *sb = inode->i_sb; + + /* check the validity of pointer parameters */ + ASSERT(fid && buffer); + + mutex_lock(&(SDFAT_SB(sb)->s_vlock)); + err = fscore_read_link(inode, fid, buffer, count, rcount); + mutex_unlock(&(SDFAT_SB(sb)->s_vlock)); + return err; +} +EXPORT_SYMBOL(fsapi_read_link); + +/* write the target string of symlink */ +s32 fsapi_write_link(struct inode *inode, FILE_ID_T *fid, void *buffer, u64 count, u64 *wcount) +{ + s32 err; + struct super_block *sb = inode->i_sb; + + /* check the validity of pointer parameters */ + ASSERT(fid && buffer); + + mutex_lock(&(SDFAT_SB(sb)->s_vlock)); + err = fscore_write_link(inode, fid, buffer, count, wcount); + mutex_unlock(&(SDFAT_SB(sb)->s_vlock)); + return err; +} +EXPORT_SYMBOL(fsapi_write_link); + +/* resize the file length */ +s32 fsapi_truncate(struct inode *inode, u64 old_size, u64 new_size) +{ + s32 err; + struct super_block *sb = inode->i_sb; + + mutex_lock(&(SDFAT_SB(sb)->s_vlock)); + TMSG("%s entered (inode %p size %llu)\n", __func__, inode, new_size); + err = fscore_truncate(inode, old_size, new_size); + TMSG("%s exitted (%d)\n", __func__, err); + mutex_unlock(&(SDFAT_SB(sb)->s_vlock)); + return err; +} +EXPORT_SYMBOL(fsapi_truncate); + +/* rename or move a old file into a new file */ +s32 fsapi_rename(struct inode *old_parent_inode, FILE_ID_T *fid, + struct inode *new_parent_inode, struct dentry *new_dentry) +{ + s32 err; + struct super_block *sb = old_parent_inode->i_sb; + + /* check the validity of pointer parameters */ + ASSERT(fid); + + mutex_lock(&(SDFAT_SB(sb)->s_vlock)); + err = fscore_rename(old_parent_inode, fid, new_parent_inode, new_dentry); + mutex_unlock(&(SDFAT_SB(sb)->s_vlock)); + return err; +} +EXPORT_SYMBOL(fsapi_rename); + +/* remove a file */ +s32 fsapi_remove(struct inode *inode, FILE_ID_T *fid) +{ + s32 err; + struct super_block *sb = inode->i_sb; + + /* check the validity of pointer parameters */ + ASSERT(fid); + + mutex_lock(&(SDFAT_SB(sb)->s_vlock)); + err = fscore_remove(inode, fid); + mutex_unlock(&(SDFAT_SB(sb)->s_vlock)); + return err; +} +EXPORT_SYMBOL(fsapi_remove); + +/* get the information of a given file */ +s32 fsapi_read_inode(struct inode *inode, DIR_ENTRY_T *info) +{ + s32 err; + struct super_block *sb = inode->i_sb; + + mutex_lock(&(SDFAT_SB(sb)->s_vlock)); + TMSG("%s entered (inode %p info %p\n", __func__, inode, info); + err = fscore_read_inode(inode, info); + TMSG("%s exited (err:%d)\n", __func__, err); + mutex_unlock(&(SDFAT_SB(sb)->s_vlock)); + return err; +} +EXPORT_SYMBOL(fsapi_read_inode); + +/* set the information of a given file */ +s32 fsapi_write_inode(struct inode *inode, DIR_ENTRY_T *info, int sync) +{ + s32 err; + struct super_block *sb = inode->i_sb; + + mutex_lock(&(SDFAT_SB(sb)->s_vlock)); + TMSG("%s entered (inode %p info %p sync:%d\n", + __func__, inode, info, sync); + err = fscore_write_inode(inode, info, sync); + TMSG("%s exited (err:%d)\n", __func__, err); + mutex_unlock(&(SDFAT_SB(sb)->s_vlock)); + return err; +} +EXPORT_SYMBOL(fsapi_write_inode); + +/* return the cluster number in the given cluster offset */ +s32 fsapi_map_clus(struct inode *inode, u32 clu_offset, u32 *clu, int dest) +{ + s32 err; + struct super_block *sb = inode->i_sb; + + /* check the validity of pointer parameters */ + ASSERT(clu); + + mutex_lock(&(SDFAT_SB(sb)->s_vlock)); + TMSG("%s entered (inode:%p clus:%08x dest:%d\n", + __func__, inode, *clu, dest); + err = fscore_map_clus(inode, clu_offset, clu, dest); + TMSG("%s exited (clu:%08x err:%d)\n", __func__, *clu, err); + mutex_unlock(&(SDFAT_SB(sb)->s_vlock)); + return err; +} +EXPORT_SYMBOL(fsapi_map_clus); + +/* reserve a cluster */ +s32 fsapi_reserve_clus(struct inode *inode) +{ + s32 err; + struct super_block *sb = inode->i_sb; + + mutex_lock(&(SDFAT_SB(sb)->s_vlock)); + TMSG("%s entered (inode:%p)\n", __func__, inode); + err = fscore_reserve_clus(inode); + TMSG("%s exited (err:%d)\n", __func__, err); + mutex_unlock(&(SDFAT_SB(sb)->s_vlock)); + return err; +} +EXPORT_SYMBOL(fsapi_reserve_clus); + +/*----------------------------------------------------------------------*/ +/* Directory Operation Functions */ +/*----------------------------------------------------------------------*/ + +/* create(make) a directory */ +s32 fsapi_mkdir(struct inode *inode, u8 *path, FILE_ID_T *fid) +{ + s32 err; + struct super_block *sb = inode->i_sb; + + /* check the validity of pointer parameters */ + ASSERT(fid && path); + + if (unlikely(!strlen(path))) + return -EINVAL; + + mutex_lock(&(SDFAT_SB(sb)->s_vlock)); + err = fscore_mkdir(inode, path, fid); + mutex_unlock(&(SDFAT_SB(sb)->s_vlock)); + return err; +} +EXPORT_SYMBOL(fsapi_mkdir); + +/* read a directory entry from the opened directory */ +s32 fsapi_readdir(struct inode *inode, DIR_ENTRY_T *dir_entry) +{ + s32 err; + struct super_block *sb = inode->i_sb; + + /* check the validity of pointer parameters */ + ASSERT(dir_entry); + + mutex_lock(&(SDFAT_SB(sb)->s_vlock)); + err = fscore_readdir(inode, dir_entry); + mutex_unlock(&(SDFAT_SB(sb)->s_vlock)); + return err; +} +EXPORT_SYMBOL(fsapi_readdir); + +/* remove a directory */ +s32 fsapi_rmdir(struct inode *inode, FILE_ID_T *fid) +{ + s32 err; + struct super_block *sb = inode->i_sb; + + /* check the validity of pointer parameters */ + ASSERT(fid); + + mutex_lock(&(SDFAT_SB(sb)->s_vlock)); + err = fscore_rmdir(inode, fid); + mutex_unlock(&(SDFAT_SB(sb)->s_vlock)); + return err; +} +EXPORT_SYMBOL(fsapi_rmdir); + +/* unlink a file. + * that is, remove an entry from a directory. BUT don't truncate + */ +s32 fsapi_unlink(struct inode *inode, FILE_ID_T *fid) +{ + s32 err; + struct super_block *sb = inode->i_sb; + + /* check the validity of pointer parameters */ + ASSERT(fid); + mutex_lock(&(SDFAT_SB(sb)->s_vlock)); + err = fscore_unlink(inode, fid); + mutex_unlock(&(SDFAT_SB(sb)->s_vlock)); + return err; +} +EXPORT_SYMBOL(fsapi_unlink); + +/* reflect the internal dirty flags to VFS bh dirty flags */ +s32 fsapi_cache_flush(struct super_block *sb, int do_sync) +{ + mutex_lock(&(SDFAT_SB(sb)->s_vlock)); + fcache_flush(sb, do_sync); + dcache_flush(sb, do_sync); + mutex_unlock(&(SDFAT_SB(sb)->s_vlock)); + return 0; +} +EXPORT_SYMBOL(fsapi_cache_flush); + +/* release FAT & buf cache */ +s32 fsapi_cache_release(struct super_block *sb) +{ +#ifdef CONFIG_SDFAT_DEBUG + mutex_lock(&(SDFAT_SB(sb)->s_vlock)); + + fcache_release_all(sb); + dcache_release_all(sb); + + mutex_unlock(&(SDFAT_SB(sb)->s_vlock)); +#endif /* CONFIG_SDFAT_DEBUG */ + return 0; +} +EXPORT_SYMBOL(fsapi_cache_release); + +u32 fsapi_get_au_stat(struct super_block *sb, s32 mode) +{ + /* volume lock is not required */ + return fscore_get_au_stat(sb, mode); +} +EXPORT_SYMBOL(fsapi_get_au_stat); + +/* clear extent cache */ +void fsapi_invalidate_extent(struct inode *inode) +{ + /* Volume lock is not required, + * because it is only called by evict_inode. + * If any other function can call it, + * you should check whether volume lock is needed or not. + */ + extent_cache_inval_inode(inode); +} +EXPORT_SYMBOL(fsapi_invalidate_extent); + +/* check device is ejected */ +s32 fsapi_check_bdi_valid(struct super_block *sb) +{ + return fscore_check_bdi_valid(sb); +} +EXPORT_SYMBOL(fsapi_check_bdi_valid); + + + +#ifdef CONFIG_SDFAT_DFR +/*----------------------------------------------------------------------*/ +/* Defragmentation related */ +/*----------------------------------------------------------------------*/ +s32 fsapi_dfr_get_info(struct super_block *sb, void *arg) +{ + /* volume lock is not required */ + return defrag_get_info(sb, (struct defrag_info_arg *)arg); +} +EXPORT_SYMBOL(fsapi_dfr_get_info); + +s32 fsapi_dfr_scan_dir(struct super_block *sb, void *args) +{ + s32 err; + + /* check the validity of pointer parameters */ + ASSERT(args); + + mutex_lock(&(SDFAT_SB(sb)->s_vlock)); + err = defrag_scan_dir(sb, (struct defrag_trav_arg *)args); + mutex_unlock(&(SDFAT_SB(sb)->s_vlock)); + return err; +} +EXPORT_SYMBOL(fsapi_dfr_scan_dir); + +s32 fsapi_dfr_validate_clus(struct inode *inode, void *chunk, int skip_prev) +{ + s32 err; + struct super_block *sb = inode->i_sb; + + mutex_lock(&(SDFAT_SB(sb)->s_vlock)); + err = defrag_validate_cluster(inode, + (struct defrag_chunk_info *)chunk, skip_prev); + mutex_unlock(&(SDFAT_SB(sb)->s_vlock)); + return err; +} +EXPORT_SYMBOL(fsapi_dfr_validate_clus); + +s32 fsapi_dfr_reserve_clus(struct super_block *sb, s32 nr_clus) +{ + s32 err; + + mutex_lock(&(SDFAT_SB(sb)->s_vlock)); + err = defrag_reserve_clusters(sb, nr_clus); + mutex_unlock(&(SDFAT_SB(sb)->s_vlock)); + return err; +} +EXPORT_SYMBOL(fsapi_dfr_reserve_clus); + +s32 fsapi_dfr_mark_ignore(struct super_block *sb, unsigned int clus) +{ + /* volume lock is not required */ + return defrag_mark_ignore(sb, clus); +} +EXPORT_SYMBOL(fsapi_dfr_mark_ignore); + +void fsapi_dfr_unmark_ignore_all(struct super_block *sb) +{ + /* volume lock is not required */ + defrag_unmark_ignore_all(sb); +} +EXPORT_SYMBOL(fsapi_dfr_unmark_ignore_all); + +s32 fsapi_dfr_map_clus(struct inode *inode, u32 clu_offset, u32 *clu) +{ + s32 err; + struct super_block *sb = inode->i_sb; + + /* check the validity of pointer parameters */ + ASSERT(clu); + + mutex_lock(&(SDFAT_SB(sb)->s_vlock)); + err = defrag_map_cluster(inode, clu_offset, clu); + mutex_unlock(&(SDFAT_SB(sb)->s_vlock)); + + return err; +} +EXPORT_SYMBOL(fsapi_dfr_map_clus); + +void fsapi_dfr_writepage_endio(struct page *page) +{ + /* volume lock is not required */ + defrag_writepage_end_io(page); +} +EXPORT_SYMBOL(fsapi_dfr_writepage_endio); + +void fsapi_dfr_update_fat_prev(struct super_block *sb, int force) +{ + mutex_lock(&(SDFAT_SB(sb)->s_vlock)); + defrag_update_fat_prev(sb, force); + mutex_unlock(&(SDFAT_SB(sb)->s_vlock)); +} +EXPORT_SYMBOL(fsapi_dfr_update_fat_prev); + +void fsapi_dfr_update_fat_next(struct super_block *sb) +{ + mutex_lock(&(SDFAT_SB(sb)->s_vlock)); + defrag_update_fat_next(sb); + mutex_unlock(&(SDFAT_SB(sb)->s_vlock)); +} +EXPORT_SYMBOL(fsapi_dfr_update_fat_next); + +void fsapi_dfr_check_discard(struct super_block *sb) +{ + mutex_lock(&(SDFAT_SB(sb)->s_vlock)); + defrag_check_discard(sb); + mutex_unlock(&(SDFAT_SB(sb)->s_vlock)); +} +EXPORT_SYMBOL(fsapi_dfr_check_discard); + +void fsapi_dfr_free_clus(struct super_block *sb, u32 clus) +{ + mutex_lock(&(SDFAT_SB(sb)->s_vlock)); + defrag_free_cluster(sb, clus); + mutex_unlock(&(SDFAT_SB(sb)->s_vlock)); +} +EXPORT_SYMBOL(fsapi_dfr_free_clus); + +s32 fsapi_dfr_check_dfr_required(struct super_block *sb, int *totalau, int *cleanau, int *fullau) +{ + /* volume lock is not required */ + return defrag_check_defrag_required(sb, totalau, cleanau, fullau); +} +EXPORT_SYMBOL(fsapi_dfr_check_dfr_required); + +s32 fsapi_dfr_check_dfr_on(struct inode *inode, loff_t start, loff_t end, s32 cancel, const char *caller) +{ + /* volume lock is not required */ + return defrag_check_defrag_on(inode, start, end, cancel, caller); +} +EXPORT_SYMBOL(fsapi_dfr_check_dfr_on); + + + +#ifdef CONFIG_SDFAT_DFR_DEBUG +void fsapi_dfr_spo_test(struct super_block *sb, int flag, const char *caller) +{ + /* volume lock is not required */ + defrag_spo_test(sb, flag, caller); +} +EXPORT_SYMBOL(fsapi_dfr_spo_test); +#endif + + +#endif /* CONFIG_SDFAT_DFR */ + +/* end of sdfat_api.c */ diff --git a/fs/sdfat/api.h b/fs/sdfat/api.h new file mode 100644 index 000000000000..344297ab58ae --- /dev/null +++ b/fs/sdfat/api.h @@ -0,0 +1,409 @@ +/* + * Copyright (C) 2012-2013 Samsung Electronics Co., Ltd. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version 2 + * of the License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, see <http://www.gnu.org/licenses/>. + */ + +#ifndef _SDFAT_API_H +#define _SDFAT_API_H + +#include "config.h" +#include "sdfat_fs.h" + +#ifdef __cplusplus +extern "C" { +#endif /* __cplusplus */ + + +/*----------------------------------------------------------------------*/ +/* Configure Constant & Macro Definitions */ +/*----------------------------------------------------------------------*/ +/* cache size (in number of sectors) */ +/* (should be an exponential value of 2) */ +#define FAT_CACHE_SIZE 128 +#define FAT_CACHE_HASH_SIZE 64 +#define BUF_CACHE_SIZE 256 +#define BUF_CACHE_HASH_SIZE 64 + +/* Read-ahead related */ +/* First config vars. should be pow of 2 */ +#define FCACHE_MAX_RA_SIZE (PAGE_SIZE) +#define DCACHE_MAX_RA_SIZE (128*1024) + +/*----------------------------------------------------------------------*/ +/* Constant & Macro Definitions */ +/*----------------------------------------------------------------------*/ +/* type values */ +#define TYPE_UNUSED 0x0000 +#define TYPE_DELETED 0x0001 +#define TYPE_INVALID 0x0002 +#define TYPE_CRITICAL_PRI 0x0100 +#define TYPE_BITMAP 0x0101 +#define TYPE_UPCASE 0x0102 +#define TYPE_VOLUME 0x0103 +#define TYPE_DIR 0x0104 +#define TYPE_FILE 0x011F +#define TYPE_SYMLINK 0x015F +#define TYPE_CRITICAL_SEC 0x0200 +#define TYPE_STREAM 0x0201 +#define TYPE_EXTEND 0x0202 +#define TYPE_ACL 0x0203 +#define TYPE_BENIGN_PRI 0x0400 +#define TYPE_GUID 0x0401 +#define TYPE_PADDING 0x0402 +#define TYPE_ACLTAB 0x0403 +#define TYPE_BENIGN_SEC 0x0800 +#define TYPE_ALL 0x0FFF + +/* eio values */ +#define SDFAT_EIO_NONE (0x00000000) +#define SDFAT_EIO_READ (0x00000001) +#define SDFAT_EIO_WRITE (0x00000002) +#define SDFAT_EIO_BDI (0x00000004) + +/* modes for volume allocation unit status */ +#define VOL_AU_STAT_TOTAL (0) +#define VOL_AU_STAT_CLEAN (1) +#define VOL_AU_STAT_FULL (2) + +/*----------------------------------------------------------------------*/ +/* NLS Type Definitions */ +/*----------------------------------------------------------------------*/ + +/* DOS name structure */ +typedef struct { + u8 name[DOS_NAME_LENGTH]; + u8 name_case; +} DOS_NAME_T; + +/* unicode name structure */ +typedef struct { + u16 name[MAX_NAME_LENGTH+3]; /* +3 for null and for converting */ + u16 name_hash; + u8 name_len; +} UNI_NAME_T; + +/*----------------------------------------------------------------------*/ +/* Type Definitions */ +/*----------------------------------------------------------------------*/ +/* should be merged it to DATE_TIME_T */ +typedef union { + struct { + u8 off : 7; + u8 valid : 1; + }; + u8 value; +} TIMEZONE_T; + +typedef struct { + u16 sec; /* 0 ~ 59 */ + u16 min; /* 0 ~ 59 */ + u16 hour; /* 0 ~ 23 */ + u16 day; /* 1 ~ 31 */ + u16 mon; /* 1 ~ 12 */ + u16 year; /* 0 ~ 127 (since 1980) */ + TIMEZONE_T tz; +} TIMESTAMP_T; + +typedef struct { + u16 Year; + u16 Month; + u16 Day; + u16 Hour; + u16 Minute; + u16 Second; + u16 MilliSecond; + TIMEZONE_T Timezone; +} DATE_TIME_T; + +typedef struct { + u64 Offset; // start sector number of the partition + u64 Size; // in sectors +} PART_INFO_T; + +typedef struct { + u32 SecSize; // sector size in bytes + u64 DevSize; // block device size in sectors +} DEV_INFO_T; + +typedef struct { + u32 FatType; + u32 ClusterSize; + u32 NumClusters; + u32 FreeClusters; + u32 UsedClusters; +} VOL_INFO_T; + +/* directory structure */ +typedef struct { + u32 dir; + u32 size; + u8 flags; +} CHAIN_T; + +/* hint structure */ +typedef struct { + u32 clu; + union { + u32 off; // cluster offset + s32 eidx; // entry index + }; +} HINT_T; + +typedef struct { + spinlock_t cache_lru_lock; + struct list_head cache_lru; + s32 nr_caches; + u32 cache_valid_id; // for avoiding the race between alloc and free +} EXTENT_T; + +/* first empty entry hint information */ +typedef struct { + s32 eidx; // entry index of a directory + s32 count; // count of continuous empty entry + CHAIN_T cur; // the cluster that first empty slot exists in +} HINT_FEMP_T; + +/* file id structure */ +typedef struct { + CHAIN_T dir; + s32 entry; + u32 type; + u32 attr; + u32 start_clu; + u64 size; + u8 flags; + u8 reserved[3]; // padding + u32 version; // the copy of low 32bit of i_version to check the validation of hint_stat + s64 rwoffset; // file offset or dentry index for readdir + EXTENT_T extent; // extent cache for a file + HINT_T hint_bmap; // hint for cluster last accessed + HINT_T hint_stat; // hint for entry index we try to lookup next time + HINT_FEMP_T hint_femp; // hint for first empty entry +} FILE_ID_T; + +typedef struct { + s8 *lfn; + s8 *sfn; + s32 lfnbuf_len; //usally MAX_UNINAME_BUF_SIZE + s32 sfnbuf_len; //usally MAX_DOSNAME_BUF_SIZE, used only for vfat, not for exfat +} DENTRY_NAMEBUF_T; + +typedef struct { + u32 Attr; + u64 Size; + u32 NumSubdirs; + DATE_TIME_T CreateTimestamp; + DATE_TIME_T ModifyTimestamp; + DATE_TIME_T AccessTimestamp; + DENTRY_NAMEBUF_T NameBuf; +} DIR_ENTRY_T; + +/* cache information */ +typedef struct __cache_entry { + struct __cache_entry *next; + struct __cache_entry *prev; + struct { + struct __cache_entry *next; + struct __cache_entry *prev; + } hash; + u64 sec; + u32 flag; + struct buffer_head *bh; +} cache_ent_t; + +/*----------------------------------------------------------------------*/ +/* Type Definitions : Wrapper & In-Core */ +/*----------------------------------------------------------------------*/ +typedef struct __FATENT_OPS_T { + s32 (*ent_get)(struct super_block *sb, u32 loc, u32 *content); + s32 (*ent_set)(struct super_block *sb, u32 loc, u32 content); +} FATENT_OPS_T; + +typedef struct { + s32 (*alloc_cluster)(struct super_block *, u32, CHAIN_T *, s32); + s32 (*free_cluster)(struct super_block *, CHAIN_T *, s32); + s32 (*count_used_clusters)(struct super_block *, u32 *); + s32 (*init_dir_entry)(struct super_block *, CHAIN_T *, s32, u32, u32, u64); + s32 (*init_ext_entry)(struct super_block *, CHAIN_T *, s32, s32, UNI_NAME_T *, DOS_NAME_T *); + s32 (*find_dir_entry)(struct super_block *, FILE_ID_T *, CHAIN_T *, UNI_NAME_T *, s32, DOS_NAME_T *, u32); + s32 (*delete_dir_entry)(struct super_block *, CHAIN_T *, s32, s32, s32); + void (*get_uniname_from_ext_entry)(struct super_block *, CHAIN_T *, s32, u16 *); + s32 (*count_ext_entries)(struct super_block *, CHAIN_T *, s32, DENTRY_T *); + s32 (*calc_num_entries)(UNI_NAME_T *); + s32 (*check_max_dentries)(FILE_ID_T *); + u32 (*get_entry_type)(DENTRY_T *); + void (*set_entry_type)(DENTRY_T *, u32); + u32 (*get_entry_attr)(DENTRY_T *); + void (*set_entry_attr)(DENTRY_T *, u32); + u8 (*get_entry_flag)(DENTRY_T *); + void (*set_entry_flag)(DENTRY_T *, u8); + u32 (*get_entry_clu0)(DENTRY_T *); + void (*set_entry_clu0)(DENTRY_T *, u32); + u64 (*get_entry_size)(DENTRY_T *); + void (*set_entry_size)(DENTRY_T *, u64); + void (*get_entry_time)(DENTRY_T *, TIMESTAMP_T *, u8); + void (*set_entry_time)(DENTRY_T *, TIMESTAMP_T *, u8); + u32 (*get_au_stat)(struct super_block *, s32); +} FS_FUNC_T; + +typedef struct __FS_INFO_T { + s32 bd_opened; // opened or not + u32 vol_type; // volume FAT type + u32 vol_id; // volume serial number + u64 num_sectors; // num of sectors in volume + u32 num_clusters; // num of clusters in volume + u32 cluster_size; // cluster size in bytes + u32 cluster_size_bits; + u32 sect_per_clus; // cluster size in sectors + u32 sect_per_clus_bits; + u64 FAT1_start_sector; // FAT1 start sector + u64 FAT2_start_sector; // FAT2 start sector + u64 root_start_sector; // root dir start sector + u64 data_start_sector; // data area start sector + u32 num_FAT_sectors; // num of FAT sectors + u32 root_dir; // root dir cluster + u32 dentries_in_root; // num of dentries in root dir + u32 dentries_per_clu; // num of dentries per cluster + u32 vol_flag; // volume dirty flag + struct buffer_head *pbr_bh; // buffer_head of PBR sector + + u32 map_clu; // allocation bitmap start cluster + u32 map_sectors; // num of allocation bitmap sectors + struct buffer_head **vol_amap; // allocation bitmap + + u16 **vol_utbl; // upcase table + + u32 clu_srch_ptr; // cluster search pointer + u32 used_clusters; // number of used clusters + + u32 prev_eio; // block device operation error flag + + FS_FUNC_T *fs_func; + FATENT_OPS_T *fatent_ops; + + s32 reserved_clusters; // # of reserved clusters (DA) + void *amap; // AU Allocation Map + + /* fat cache */ + struct { + cache_ent_t pool[FAT_CACHE_SIZE]; + cache_ent_t lru_list; + cache_ent_t hash_list[FAT_CACHE_HASH_SIZE]; + } fcache; + + /* meta cache */ + struct { + cache_ent_t pool[BUF_CACHE_SIZE]; + cache_ent_t lru_list; + cache_ent_t keep_list; // CACHEs in this list will not be kicked by normal lru operations + cache_ent_t hash_list[BUF_CACHE_HASH_SIZE]; + } dcache; +} FS_INFO_T; + +/*======================================================================*/ +/* */ +/* API FUNCTION DECLARATIONS */ +/* (CHANGE THIS PART IF REQUIRED) */ +/* */ +/*======================================================================*/ + +/*----------------------------------------------------------------------*/ +/* External Function Declarations */ +/*----------------------------------------------------------------------*/ + +/* file system initialization & shutdown functions */ +s32 fsapi_init(void); +s32 fsapi_shutdown(void); + +/* volume management functions */ +s32 fsapi_mount(struct super_block *sb); +s32 fsapi_umount(struct super_block *sb); +s32 fsapi_statfs(struct super_block *sb, VOL_INFO_T *info); +s32 fsapi_sync_fs(struct super_block *sb, s32 do_sync); +s32 fsapi_set_vol_flags(struct super_block *sb, u16 new_flag, s32 always_sync); + +/* file management functions */ +s32 fsapi_lookup(struct inode *inode, u8 *path, FILE_ID_T *fid); +s32 fsapi_create(struct inode *inode, u8 *path, u8 mode, FILE_ID_T *fid); +s32 fsapi_read_link(struct inode *inode, FILE_ID_T *fid, void *buffer, u64 count, u64 *rcount); +s32 fsapi_write_link(struct inode *inode, FILE_ID_T *fid, void *buffer, u64 count, u64 *wcount); +s32 fsapi_remove(struct inode *inode, FILE_ID_T *fid); /* unlink and truncate */ +s32 fsapi_truncate(struct inode *inode, u64 old_size, u64 new_size); +s32 fsapi_rename(struct inode *old_parent_inode, FILE_ID_T *fid, + struct inode *new_parent_inode, struct dentry *new_dentry); +s32 fsapi_unlink(struct inode *inode, FILE_ID_T *fid); +s32 fsapi_read_inode(struct inode *inode, DIR_ENTRY_T *info); +s32 fsapi_write_inode(struct inode *inode, DIR_ENTRY_T *info, int sync); +s32 fsapi_map_clus(struct inode *inode, u32 clu_offset, u32 *clu, int dest); +s32 fsapi_reserve_clus(struct inode *inode); + +/* directory management functions */ +s32 fsapi_mkdir(struct inode *inode, u8 *path, FILE_ID_T *fid); +s32 fsapi_readdir(struct inode *inode, DIR_ENTRY_T *dir_entry); +s32 fsapi_rmdir(struct inode *inode, FILE_ID_T *fid); + +/* FAT & buf cache functions */ +s32 fsapi_cache_flush(struct super_block *sb, int do_sync); +s32 fsapi_cache_release(struct super_block *sb); + +/* extra info functions */ +u32 fsapi_get_au_stat(struct super_block *sb, s32 mode); + +/* extent cache functions */ +void fsapi_invalidate_extent(struct inode *inode); + +/* bdev management */ +s32 fsapi_check_bdi_valid(struct super_block *sb); + +#ifdef CONFIG_SDFAT_DFR +/*----------------------------------------------------------------------*/ +/* Defragmentation related */ +/*----------------------------------------------------------------------*/ + +s32 fsapi_dfr_get_info(struct super_block *sb, void *arg); + +s32 fsapi_dfr_scan_dir(struct super_block *sb, void *args); + +s32 fsapi_dfr_validate_clus(struct inode *inode, void *chunk, int skip_prev); +s32 fsapi_dfr_reserve_clus(struct super_block *sb, s32 nr_clus); +s32 fsapi_dfr_mark_ignore(struct super_block *sb, unsigned int clus); +void fsapi_dfr_unmark_ignore_all(struct super_block *sb); + +s32 fsapi_dfr_map_clus(struct inode *inode, u32 clu_offset, u32 *clu); +void fsapi_dfr_writepage_endio(struct page *page); + +void fsapi_dfr_update_fat_prev(struct super_block *sb, int force); +void fsapi_dfr_update_fat_next(struct super_block *sb); +void fsapi_dfr_check_discard(struct super_block *sb); +void fsapi_dfr_free_clus(struct super_block *sb, u32 clus); + +s32 fsapi_dfr_check_dfr_required(struct super_block *sb, int *totalau, int *cleanau, int *fullau); +s32 fsapi_dfr_check_dfr_on(struct inode *inode, loff_t start, loff_t end, s32 cancel, const char *caller); + + +#ifdef CONFIG_SDFAT_DFR_DEBUG +void fsapi_dfr_spo_test(struct super_block *sb, int flag, const char *caller); +#endif /* CONFIG_SDFAT_DFR_DEBUG */ + +#endif /* CONFIG_SDFAT_DFR */ + + +#ifdef __cplusplus +} +#endif /* __cplusplus */ + +#endif /* _SDFAT_API_H */ + +/* end of api.h */ diff --git a/fs/sdfat/blkdev.c b/fs/sdfat/blkdev.c new file mode 100644 index 000000000000..264c670df0f0 --- /dev/null +++ b/fs/sdfat/blkdev.c @@ -0,0 +1,416 @@ +/* + * Copyright (C) 2012-2013 Samsung Electronics Co., Ltd. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version 2 + * of the License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, see <http://www.gnu.org/licenses/>. + */ + +/************************************************************************/ +/* */ +/* PROJECT : exFAT & FAT12/16/32 File System */ +/* FILE : blkdev.c */ +/* PURPOSE : sdFAT Block Device Driver Glue Layer */ +/* */ +/*----------------------------------------------------------------------*/ +/* NOTES */ +/* */ +/************************************************************************/ + +#include <linux/blkdev.h> +#include <linux/log2.h> +#include <linux/backing-dev.h> + +#include "sdfat.h" + +/*----------------------------------------------------------------------*/ +/* Constant & Macro Definitions */ +/*----------------------------------------------------------------------*/ + +/*----------------------------------------------------------------------*/ +/* Global Variable Definitions */ +/*----------------------------------------------------------------------*/ + +/*----------------------------------------------------------------------*/ +/* Local Variable Definitions */ +/*----------------------------------------------------------------------*/ + +/*----------------------------------------------------------------------*/ +/* FUNCTIONS WHICH HAS KERNEL VERSION DEPENDENCY */ +/************************************************************************/ + +#if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 0, 0) + /* EMPTY */ +#else /* LINUX_VERSION_CODE < KERNEL_VERSION(4, 0, 0) */ +static struct backing_dev_info *inode_to_bdi(struct inode *bd_inode) +{ + return bd_inode->i_mapping->backing_dev_info; +} +#endif + +/*======================================================================*/ +/* Function Definitions */ +/*======================================================================*/ +s32 bdev_open_dev(struct super_block *sb) +{ + FS_INFO_T *fsi = &(SDFAT_SB(sb)->fsi); + + if (fsi->bd_opened) + return 0; + + fsi->bd_opened = true; + return 0; +} + +s32 bdev_close_dev(struct super_block *sb) +{ + FS_INFO_T *fsi = &(SDFAT_SB(sb)->fsi); + + fsi->bd_opened = false; + return 0; +} + +static inline s32 block_device_ejected(struct super_block *sb) +{ + struct inode *bd_inode = sb->s_bdev->bd_inode; + struct backing_dev_info *bdi = inode_to_bdi(bd_inode); + + return (bdi->dev == NULL); +} + +s32 bdev_check_bdi_valid(struct super_block *sb) +{ + FS_INFO_T *fsi = &(SDFAT_SB(sb)->fsi); + + if (block_device_ejected(sb)) { + if (!(fsi->prev_eio & SDFAT_EIO_BDI)) { + fsi->prev_eio |= SDFAT_EIO_BDI; + sdfat_log_msg(sb, KERN_ERR, "%s: block device is " + "eliminated.(bdi:%p)", __func__, sb->s_bdi); + sdfat_debug_warn_on(1); + } + return -ENXIO; + } + + return 0; +} + + +/* Make a readahead request */ +s32 bdev_readahead(struct super_block *sb, u64 secno, u64 num_secs) +{ + FS_INFO_T *fsi = &(SDFAT_SB(sb)->fsi); + u32 sects_per_page = (PAGE_SIZE >> sb->s_blocksize_bits); + struct blk_plug plug; + u64 i; + + if (!fsi->bd_opened) + return -EIO; + + blk_start_plug(&plug); + for (i = 0; i < num_secs; i++) { + if (i && !(i & (sects_per_page - 1))) + blk_flush_plug(current); + sb_breadahead(sb, (sector_t)(secno + i)); + } + blk_finish_plug(&plug); + + return 0; +} + +s32 bdev_mread(struct super_block *sb, u64 secno, struct buffer_head **bh, u64 num_secs, s32 read) +{ + FS_INFO_T *fsi = &(SDFAT_SB(sb)->fsi); + u8 blksize_bits = sb->s_blocksize_bits; +#ifdef CONFIG_SDFAT_DBG_IOCTL + struct sdfat_sb_info *sbi = SDFAT_SB(sb); + long flags = sbi->debug_flags; + + if (flags & SDFAT_DEBUGFLAGS_ERROR_RW) + return -EIO; +#endif /* CONFIG_SDFAT_DBG_IOCTL */ + + if (!fsi->bd_opened) + return -EIO; + + brelse(*bh); + + if (read) + *bh = __bread(sb->s_bdev, (sector_t)secno, num_secs << blksize_bits); + else + *bh = __getblk(sb->s_bdev, (sector_t)secno, num_secs << blksize_bits); + + /* read successfully */ + if (*bh) + return 0; + + /* + * patch 1.2.4 : reset ONCE warning message per volume. + */ + if (!(fsi->prev_eio & SDFAT_EIO_READ)) { + fsi->prev_eio |= SDFAT_EIO_READ; + sdfat_log_msg(sb, KERN_ERR, "%s: No bh. I/O error.", __func__); + sdfat_debug_warn_on(1); + } + + return -EIO; +} + +s32 bdev_mwrite(struct super_block *sb, u64 secno, struct buffer_head *bh, u64 num_secs, s32 sync) +{ + u64 count; + struct buffer_head *bh2; + FS_INFO_T *fsi = &(SDFAT_SB(sb)->fsi); +#ifdef CONFIG_SDFAT_DBG_IOCTL + struct sdfat_sb_info *sbi = SDFAT_SB(sb); + long flags = sbi->debug_flags; + + if (flags & SDFAT_DEBUGFLAGS_ERROR_RW) + return -EIO; +#endif /* CONFIG_SDFAT_DBG_IOCTL */ + + if (!fsi->bd_opened) + return -EIO; + + if (secno == bh->b_blocknr) { + set_buffer_uptodate(bh); + mark_buffer_dirty(bh); + if (sync && (sync_dirty_buffer(bh) != 0)) + return -EIO; + } else { + count = num_secs << sb->s_blocksize_bits; + + bh2 = __getblk(sb->s_bdev, (sector_t)secno, count); + + if (!bh2) + goto no_bh; + + lock_buffer(bh2); + memcpy(bh2->b_data, bh->b_data, count); + set_buffer_uptodate(bh2); + mark_buffer_dirty(bh2); + unlock_buffer(bh2); + if (sync && (sync_dirty_buffer(bh2) != 0)) { + __brelse(bh2); + goto no_bh; + } + __brelse(bh2); + } + return 0; +no_bh: + /* + * patch 1.2.4 : reset ONCE warning message per volume. + */ + if (!(fsi->prev_eio & SDFAT_EIO_WRITE)) { + fsi->prev_eio |= SDFAT_EIO_WRITE; + sdfat_log_msg(sb, KERN_ERR, "%s: No bh. I/O error.", __func__); + sdfat_debug_warn_on(1); + } + + return -EIO; +} + +s32 bdev_sync_all(struct super_block *sb) +{ + FS_INFO_T *fsi = &(SDFAT_SB(sb)->fsi); +#ifdef CONFIG_SDFAT_DBG_IOCTL + struct sdfat_sb_info *sbi = SDFAT_SB(sb); + long flags = sbi->debug_flags; + + if (flags & SDFAT_DEBUGFLAGS_ERROR_RW) + return -EIO; +#endif /* CONFIG_SDFAT_DBG_IOCTL */ + + if (!fsi->bd_opened) + return -EIO; + + return sync_blockdev(sb->s_bdev); +} + +/* + * Sector Read/Write Functions + */ +s32 read_sect(struct super_block *sb, u64 sec, struct buffer_head **bh, s32 read) +{ + FS_INFO_T *fsi = &(SDFAT_SB(sb)->fsi); + + BUG_ON(!bh); + if ((sec >= fsi->num_sectors) && (fsi->num_sectors > 0)) { + sdfat_fs_error_ratelimit(sb, + "%s: out of range (sect:%llu)", __func__, sec); + return -EIO; + } + + if (bdev_mread(sb, sec, bh, 1, read)) { + sdfat_fs_error_ratelimit(sb, + "%s: I/O error (sect:%llu)", __func__, sec); + return -EIO; + } + + return 0; +} + +s32 write_sect(struct super_block *sb, u64 sec, struct buffer_head *bh, s32 sync) +{ + FS_INFO_T *fsi = &(SDFAT_SB(sb)->fsi); + + BUG_ON(!bh); + if ((sec >= fsi->num_sectors) && (fsi->num_sectors > 0)) { + sdfat_fs_error_ratelimit(sb, + "%s: out of range (sect:%llu)", __func__, sec); + return -EIO; + } + + if (bdev_mwrite(sb, sec, bh, 1, sync)) { + sdfat_fs_error_ratelimit(sb, "%s: I/O error (sect:%llu)", + __func__, sec); + return -EIO; + } + + return 0; +} + +s32 read_msect(struct super_block *sb, u64 sec, struct buffer_head **bh, u64 num_secs, s32 read) +{ + FS_INFO_T *fsi = &(SDFAT_SB(sb)->fsi); + + BUG_ON(!bh); + if (((sec+num_secs) > fsi->num_sectors) && (fsi->num_sectors > 0)) { + sdfat_fs_error_ratelimit(sb, "%s: out of range(sect:%llu len:%llu)", + __func__, sec, num_secs); + return -EIO; + } + + if (bdev_mread(sb, sec, bh, num_secs, read)) { + sdfat_fs_error_ratelimit(sb, "%s: I/O error (sect:%llu len:%llu)", + __func__, sec, num_secs); + return -EIO; + } + + return 0; +} + +s32 write_msect(struct super_block *sb, u64 sec, struct buffer_head *bh, u64 num_secs, s32 sync) +{ + FS_INFO_T *fsi = &(SDFAT_SB(sb)->fsi); + + BUG_ON(!bh); + if (((sec+num_secs) > fsi->num_sectors) && (fsi->num_sectors > 0)) { + sdfat_fs_error_ratelimit(sb, "%s: out of range(sect:%llu len:%llu)", + __func__, sec, num_secs); + return -EIO; + } + + + if (bdev_mwrite(sb, sec, bh, num_secs, sync)) { + sdfat_fs_error_ratelimit(sb, "%s: I/O error (sect:%llu len:%llu)", + __func__, sec, num_secs); + return -EIO; + } + + return 0; +} + +static inline void __blkdev_write_bhs(struct buffer_head **bhs, s32 nr_bhs) +{ + s32 i; + + for (i = 0; i < nr_bhs; i++) + write_dirty_buffer(bhs[i], WRITE); +} + +static inline s32 __blkdev_sync_bhs(struct buffer_head **bhs, s32 nr_bhs) +{ + s32 i, err = 0; + + for (i = 0; i < nr_bhs; i++) { + wait_on_buffer(bhs[i]); + if (!err && !buffer_uptodate(bhs[i])) + err = -EIO; + } + return err; +} + +static inline s32 __buffer_zeroed(struct super_block *sb, u64 blknr, u64 num_secs) +{ + struct buffer_head *bhs[MAX_BUF_PER_PAGE]; + s32 nr_bhs = MAX_BUF_PER_PAGE; + u64 last_blknr = blknr + num_secs; + s32 err, i, n; + struct blk_plug plug; + + /* Zeroing the unused blocks on this cluster */ + n = 0; + blk_start_plug(&plug); + while (blknr < last_blknr) { + bhs[n] = sb_getblk(sb, (sector_t)blknr); + if (!bhs[n]) { + err = -ENOMEM; + blk_finish_plug(&plug); + goto error; + } + memset(bhs[n]->b_data, 0, sb->s_blocksize); + set_buffer_uptodate(bhs[n]); + mark_buffer_dirty(bhs[n]); + + n++; + blknr++; + + if (blknr == last_blknr) + break; + + if (n == nr_bhs) { + __blkdev_write_bhs(bhs, n); + + for (i = 0; i < n; i++) + brelse(bhs[i]); + n = 0; + } + } + __blkdev_write_bhs(bhs, n); + blk_finish_plug(&plug); + + err = __blkdev_sync_bhs(bhs, n); + if (err) + goto error; + + for (i = 0; i < n; i++) + brelse(bhs[i]); + + return 0; + +error: + EMSG("%s: failed zeroed sect %llu\n", __func__, blknr); + for (i = 0; i < n; i++) + bforget(bhs[i]); + + return err; +} + +s32 write_msect_zero(struct super_block *sb, u64 sec, u64 num_secs) +{ + FS_INFO_T *fsi = &(SDFAT_SB(sb)->fsi); + + if (((sec+num_secs) > fsi->num_sectors) && (fsi->num_sectors > 0)) { + sdfat_fs_error_ratelimit(sb, "%s: out of range(sect:%llu len:%llu)", + __func__, sec, num_secs); + return -EIO; + } + + /* Just return -EAGAIN if it is failed */ + if (__buffer_zeroed(sb, sec, num_secs)) + return -EAGAIN; + + return 0; +} /* end of write_msect_zero */ + +/* end of blkdev.c */ diff --git a/fs/sdfat/cache.c b/fs/sdfat/cache.c new file mode 100644 index 000000000000..8318898646be --- /dev/null +++ b/fs/sdfat/cache.c @@ -0,0 +1,846 @@ +/* + * Copyright (C) 2012-2013 Samsung Electronics Co., Ltd. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version 2 + * of the License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, see <http://www.gnu.org/licenses/>. + */ + +/************************************************************************/ +/* */ +/* PROJECT : exFAT & FAT12/16/32 File System */ +/* FILE : cache.c */ +/* PURPOSE : sdFAT Cache Manager */ +/* (FAT Cache & Buffer Cache) */ +/* */ +/*----------------------------------------------------------------------*/ +/* NOTES */ +/* */ +/* */ +/************************************************************************/ + +#include <linux/swap.h> /* for mark_page_accessed() */ +#include <asm/unaligned.h> + +#include "sdfat.h" +#include "core.h" + +#define DEBUG_HASH_LIST +#define DEBUG_HASH_PREV (0xAAAA5555) +#define DEBUG_HASH_NEXT (0x5555AAAA) + +/*----------------------------------------------------------------------*/ +/* Global Variable Definitions */ +/*----------------------------------------------------------------------*/ +/* All buffer structures are protected w/ fsi->v_sem */ + +/*----------------------------------------------------------------------*/ +/* Local Variable Definitions */ +/*----------------------------------------------------------------------*/ +#define LOCKBIT (0x01) +#define DIRTYBIT (0x02) +#define KEEPBIT (0x04) + +/*----------------------------------------------------------------------*/ +/* Cache handling function declarations */ +/*----------------------------------------------------------------------*/ +static cache_ent_t *__fcache_find(struct super_block *sb, u64 sec); +static cache_ent_t *__fcache_get(struct super_block *sb); +static void __fcache_insert_hash(struct super_block *sb, cache_ent_t *bp); +static void __fcache_remove_hash(cache_ent_t *bp); + +static cache_ent_t *__dcache_find(struct super_block *sb, u64 sec); +static cache_ent_t *__dcache_get(struct super_block *sb); +static void __dcache_insert_hash(struct super_block *sb, cache_ent_t *bp); +static void __dcache_remove_hash(cache_ent_t *bp); + +/*----------------------------------------------------------------------*/ +/* Static functions */ +/*----------------------------------------------------------------------*/ +static void push_to_mru(cache_ent_t *bp, cache_ent_t *list) +{ + bp->next = list->next; + bp->prev = list; + list->next->prev = bp; + list->next = bp; +} + +static void push_to_lru(cache_ent_t *bp, cache_ent_t *list) +{ + bp->prev = list->prev; + bp->next = list; + list->prev->next = bp; + list->prev = bp; +} + +static void move_to_mru(cache_ent_t *bp, cache_ent_t *list) +{ + bp->prev->next = bp->next; + bp->next->prev = bp->prev; + push_to_mru(bp, list); +} + +static void move_to_lru(cache_ent_t *bp, cache_ent_t *list) +{ + bp->prev->next = bp->next; + bp->next->prev = bp->prev; + push_to_lru(bp, list); +} + +static inline s32 __check_hash_valid(cache_ent_t *bp) +{ +#ifdef DEBUG_HASH_LIST + if ((bp->hash.next == (cache_ent_t *)DEBUG_HASH_NEXT) || + (bp->hash.prev == (cache_ent_t *)DEBUG_HASH_PREV)) { + return -EINVAL; + } +#endif + if ((bp->hash.next == bp) || (bp->hash.prev == bp)) + return -EINVAL; + + return 0; +} + +static inline void __remove_from_hash(cache_ent_t *bp) +{ + (bp->hash.prev)->hash.next = bp->hash.next; + (bp->hash.next)->hash.prev = bp->hash.prev; + bp->hash.next = bp; + bp->hash.prev = bp; +#ifdef DEBUG_HASH_LIST + bp->hash.next = (cache_ent_t *)DEBUG_HASH_NEXT; + bp->hash.prev = (cache_ent_t *)DEBUG_HASH_PREV; +#endif +} + +/* Do FAT mirroring (don't sync) + * sec: sector No. in FAT1 + * bh: bh of sec. + */ +static inline s32 __fat_copy(struct super_block *sb, u64 sec, struct buffer_head *bh, int sync) +{ +#ifdef CONFIG_SDFAT_FAT_MIRRORING + FS_INFO_T *fsi = &(SDFAT_SB(sb)->fsi); + u64 sec2; + + if (fsi->FAT2_start_sector != fsi->FAT1_start_sector) { + sec2 = sec - fsi->FAT1_start_sector + fsi->FAT2_start_sector; + BUG_ON(sec2 != (sec + (u64)fsi->num_FAT_sectors)); + + MMSG("BD: fat mirroring (%llu in FAT1, %llu in FAT2)\n", sec, sec2); + if (write_sect(sb, sec2, bh, sync)) + return -EIO; + } +#else + /* DO NOTHING */ +#endif + return 0; +} /* end of __fat_copy */ + +/* + * returns 1, if bp is flushed + * returns 0, if bp is not dirty + * returns -1, if error occurs + */ +static s32 __fcache_ent_flush(struct super_block *sb, cache_ent_t *bp, u32 sync) +{ + if (!(bp->flag & DIRTYBIT)) + return 0; +#ifdef CONFIG_SDFAT_DELAYED_META_DIRTY + // Make buffer dirty (XXX: Naive impl.) + if (write_sect(sb, bp->sec, bp->bh, 0)) + return -EIO; + + if (__fat_copy(sb, bp->sec, bp->bh, 0)) + return -EIO; +#endif + bp->flag &= ~(DIRTYBIT); + + if (sync) + sync_dirty_buffer(bp->bh); + + return 1; +} + +static s32 __fcache_ent_discard(struct super_block *sb, cache_ent_t *bp) +{ + FS_INFO_T *fsi = &(SDFAT_SB(sb)->fsi); + + __fcache_remove_hash(bp); + bp->sec = ~0; + bp->flag = 0; + + if (bp->bh) { + __brelse(bp->bh); + bp->bh = NULL; + } + move_to_lru(bp, &fsi->fcache.lru_list); + return 0; +} + +u8 *fcache_getblk(struct super_block *sb, u64 sec) +{ + cache_ent_t *bp; + FS_INFO_T *fsi = &(SDFAT_SB(sb)->fsi); + u32 page_ra_count = FCACHE_MAX_RA_SIZE >> sb->s_blocksize_bits; + + bp = __fcache_find(sb, sec); + if (bp) { + if (bdev_check_bdi_valid(sb)) { + __fcache_ent_flush(sb, bp, 0); + __fcache_ent_discard(sb, bp); + return NULL; + } + move_to_mru(bp, &fsi->fcache.lru_list); + return bp->bh->b_data; + } + + bp = __fcache_get(sb); + if (!__check_hash_valid(bp)) + __fcache_remove_hash(bp); + + bp->sec = sec; + bp->flag = 0; + __fcache_insert_hash(sb, bp); + + /* Naive FAT read-ahead (increase I/O unit to page_ra_count) */ + if ((sec & (page_ra_count - 1)) == 0) + bdev_readahead(sb, sec, (u64)page_ra_count); + + /* + * patch 1.2.4 : buffer_head null pointer exception problem. + * + * When read_sect is failed, fcache should be moved to + * EMPTY hash_list and the first of lru_list. + */ + if (read_sect(sb, sec, &(bp->bh), 1)) { + __fcache_ent_discard(sb, bp); + return NULL; + } + + return bp->bh->b_data; +} + +static inline int __mark_delayed_dirty(struct super_block *sb, cache_ent_t *bp) +{ +#ifdef CONFIG_SDFAT_DELAYED_META_DIRTY + FS_INFO_T *fsi = &(SDFAT_SB(sb)->fsi); + + if (fsi->vol_type == EXFAT) + return -ENOTSUPP; + + bp->flag |= DIRTYBIT; + return 0; +#else + return -ENOTSUPP; +#endif +} + + + +s32 fcache_modify(struct super_block *sb, u64 sec) +{ + cache_ent_t *bp; + + bp = __fcache_find(sb, sec); + if (!bp) { + sdfat_fs_error(sb, "Can`t find fcache (sec 0x%016llx)", sec); + return -EIO; + } + + if (!__mark_delayed_dirty(sb, bp)) + return 0; + + if (write_sect(sb, sec, bp->bh, 0)) + return -EIO; + + if (__fat_copy(sb, sec, bp->bh, 0)) + return -EIO; + + return 0; +} + +/*======================================================================*/ +/* Cache Initialization Functions */ +/*======================================================================*/ +s32 meta_cache_init(struct super_block *sb) +{ + FS_INFO_T *fsi = &(SDFAT_SB(sb)->fsi); + s32 i; + + /* LRU list */ + fsi->fcache.lru_list.next = &fsi->fcache.lru_list; + fsi->fcache.lru_list.prev = fsi->fcache.lru_list.next; + + for (i = 0; i < FAT_CACHE_SIZE; i++) { + fsi->fcache.pool[i].sec = ~0; + fsi->fcache.pool[i].flag = 0; + fsi->fcache.pool[i].bh = NULL; + fsi->fcache.pool[i].prev = NULL; + fsi->fcache.pool[i].next = NULL; + push_to_mru(&(fsi->fcache.pool[i]), &fsi->fcache.lru_list); + } + + fsi->dcache.lru_list.next = &fsi->dcache.lru_list; + fsi->dcache.lru_list.prev = fsi->dcache.lru_list.next; + fsi->dcache.keep_list.next = &fsi->dcache.keep_list; + fsi->dcache.keep_list.prev = fsi->dcache.keep_list.next; + + // Initially, all the BUF_CACHEs are in the LRU list + for (i = 0; i < BUF_CACHE_SIZE; i++) { + fsi->dcache.pool[i].sec = ~0; + fsi->dcache.pool[i].flag = 0; + fsi->dcache.pool[i].bh = NULL; + fsi->dcache.pool[i].prev = NULL; + fsi->dcache.pool[i].next = NULL; + push_to_mru(&(fsi->dcache.pool[i]), &fsi->dcache.lru_list); + } + + /* HASH list */ + for (i = 0; i < FAT_CACHE_HASH_SIZE; i++) { + fsi->fcache.hash_list[i].sec = ~0; + fsi->fcache.hash_list[i].hash.next = &(fsi->fcache.hash_list[i]); +; + fsi->fcache.hash_list[i].hash.prev = fsi->fcache.hash_list[i].hash.next; + } + + for (i = 0; i < FAT_CACHE_SIZE; i++) + __fcache_insert_hash(sb, &(fsi->fcache.pool[i])); + + for (i = 0; i < BUF_CACHE_HASH_SIZE; i++) { + fsi->dcache.hash_list[i].sec = ~0; + fsi->dcache.hash_list[i].hash.next = &(fsi->dcache.hash_list[i]); + + fsi->dcache.hash_list[i].hash.prev = fsi->dcache.hash_list[i].hash.next; + } + + for (i = 0; i < BUF_CACHE_SIZE; i++) + __dcache_insert_hash(sb, &(fsi->dcache.pool[i])); + + return 0; +} + +s32 meta_cache_shutdown(struct super_block *sb) +{ + return 0; +} + +/*======================================================================*/ +/* FAT Read/Write Functions */ +/*======================================================================*/ +s32 fcache_release_all(struct super_block *sb) +{ + s32 ret = 0; + cache_ent_t *bp; + FS_INFO_T *fsi = &(SDFAT_SB(sb)->fsi); + s32 dirtycnt = 0; + + bp = fsi->fcache.lru_list.next; + while (bp != &fsi->fcache.lru_list) { + s32 ret_tmp = __fcache_ent_flush(sb, bp, 0); + + if (ret_tmp < 0) + ret = ret_tmp; + else + dirtycnt += ret_tmp; + + bp->sec = ~0; + bp->flag = 0; + + if (bp->bh) { + __brelse(bp->bh); + bp->bh = NULL; + } + bp = bp->next; + } + + DMSG("BD:Release / dirty fat cache: %d (err:%d)\n", dirtycnt, ret); + return ret; +} + + +/* internal DIRTYBIT marked => bh dirty */ +s32 fcache_flush(struct super_block *sb, u32 sync) +{ + s32 ret = 0; + cache_ent_t *bp; + FS_INFO_T *fsi = &(SDFAT_SB(sb)->fsi); + s32 dirtycnt = 0; + + bp = fsi->fcache.lru_list.next; + while (bp != &fsi->fcache.lru_list) { + ret = __fcache_ent_flush(sb, bp, sync); + if (ret < 0) + break; + + dirtycnt += ret; + bp = bp->next; + } + + MMSG("BD: flush / dirty fat cache: %d (err:%d)\n", dirtycnt, ret); + return ret; +} + +static cache_ent_t *__fcache_find(struct super_block *sb, u64 sec) +{ + s32 off; + cache_ent_t *bp, *hp; + FS_INFO_T *fsi = &(SDFAT_SB(sb)->fsi); + + off = (sec + (sec >> fsi->sect_per_clus_bits)) & (FAT_CACHE_HASH_SIZE - 1); + hp = &(fsi->fcache.hash_list[off]); + for (bp = hp->hash.next; bp != hp; bp = bp->hash.next) { + if (bp->sec == sec) { + /* + * patch 1.2.4 : for debugging + */ + WARN(!bp->bh, "[SDFAT] fcache has no bh. " + "It will make system panic.\n"); + + touch_buffer(bp->bh); + return bp; + } + } + return NULL; +} + +static cache_ent_t *__fcache_get(struct super_block *sb) +{ + cache_ent_t *bp; + FS_INFO_T *fsi = &(SDFAT_SB(sb)->fsi); + + bp = fsi->fcache.lru_list.prev; +#ifdef CONFIG_SDFAT_DELAYED_META_DIRTY + while (bp->flag & DIRTYBIT) { + cache_ent_t *bp_prev = bp->prev; + + bp = bp_prev; + if (bp == &fsi->fcache.lru_list) { + DMSG("BD: fat cache flooding\n"); + fcache_flush(sb, 0); // flush all dirty FAT caches + bp = fsi->fcache.lru_list.prev; + } + } +#endif +// if (bp->flag & DIRTYBIT) +// sync_dirty_buffer(bp->bh); + + move_to_mru(bp, &fsi->fcache.lru_list); + return bp; +} + +static void __fcache_insert_hash(struct super_block *sb, cache_ent_t *bp) +{ + s32 off; + cache_ent_t *hp; + FS_INFO_T *fsi; + + fsi = &(SDFAT_SB(sb)->fsi); + off = (bp->sec + (bp->sec >> fsi->sect_per_clus_bits)) & (FAT_CACHE_HASH_SIZE-1); + + hp = &(fsi->fcache.hash_list[off]); + bp->hash.next = hp->hash.next; + bp->hash.prev = hp; + hp->hash.next->hash.prev = bp; + hp->hash.next = bp; +} + + +static void __fcache_remove_hash(cache_ent_t *bp) +{ +#ifdef DEBUG_HASH_LIST + if ((bp->hash.next == (cache_ent_t *)DEBUG_HASH_NEXT) || + (bp->hash.prev == (cache_ent_t *)DEBUG_HASH_PREV)) { + EMSG("%s: FATAL: tried to remove already-removed-cache-entry" + "(bp:%p)\n", __func__, bp); + return; + } +#endif + WARN_ON(bp->flag & DIRTYBIT); + __remove_from_hash(bp); +} + +/*======================================================================*/ +/* Buffer Read/Write Functions */ +/*======================================================================*/ +/* Read-ahead a cluster */ +s32 dcache_readahead(struct super_block *sb, u64 sec) +{ + FS_INFO_T *fsi = &(SDFAT_SB(sb)->fsi); + struct buffer_head *bh; + u32 max_ra_count = DCACHE_MAX_RA_SIZE >> sb->s_blocksize_bits; + u32 page_ra_count = PAGE_SIZE >> sb->s_blocksize_bits; + u32 adj_ra_count = max(fsi->sect_per_clus, page_ra_count); + u32 ra_count = min(adj_ra_count, max_ra_count); + + /* Read-ahead is not required */ + if (fsi->sect_per_clus == 1) + return 0; + + if (sec < fsi->data_start_sector) { + EMSG("BD: %s: requested sector is invalid(sect:%llu, root:%llu)\n", + __func__, sec, fsi->data_start_sector); + return -EIO; + } + + /* Not sector aligned with ra_count, resize ra_count to page size */ + if ((sec - fsi->data_start_sector) & (ra_count - 1)) + ra_count = page_ra_count; + + bh = sb_find_get_block(sb, sec); + if (!bh || !buffer_uptodate(bh)) + bdev_readahead(sb, sec, (u64)ra_count); + + brelse(bh); + + return 0; +} + +/* + * returns 1, if bp is flushed + * returns 0, if bp is not dirty + * returns -1, if error occurs + */ +static s32 __dcache_ent_flush(struct super_block *sb, cache_ent_t *bp, u32 sync) +{ + if (!(bp->flag & DIRTYBIT)) + return 0; +#ifdef CONFIG_SDFAT_DELAYED_META_DIRTY + // Make buffer dirty (XXX: Naive impl.) + if (write_sect(sb, bp->sec, bp->bh, 0)) + return -EIO; +#endif + bp->flag &= ~(DIRTYBIT); + + if (sync) + sync_dirty_buffer(bp->bh); + + return 1; +} + +static s32 __dcache_ent_discard(struct super_block *sb, cache_ent_t *bp) +{ + FS_INFO_T *fsi = &(SDFAT_SB(sb)->fsi); + + MMSG("%s : bp[%p] (sec:%016llx flag:%08x bh:%p) list(prev:%p next:%p) " + "hash(prev:%p next:%p)\n", __func__, + bp, bp->sec, bp->flag, bp->bh, bp->prev, bp->next, + bp->hash.prev, bp->hash.next); + + __dcache_remove_hash(bp); + bp->sec = ~0; + bp->flag = 0; + + if (bp->bh) { + __brelse(bp->bh); + bp->bh = NULL; + } + + move_to_lru(bp, &fsi->dcache.lru_list); + return 0; +} + +u8 *dcache_getblk(struct super_block *sb, u64 sec) +{ + cache_ent_t *bp; + FS_INFO_T *fsi = &(SDFAT_SB(sb)->fsi); + + bp = __dcache_find(sb, sec); + if (bp) { + if (bdev_check_bdi_valid(sb)) { + MMSG("%s: found cache(%p, sect:%llu). But invalid BDI\n" + , __func__, bp, sec); + __dcache_ent_flush(sb, bp, 0); + __dcache_ent_discard(sb, bp); + return NULL; + } + + if (!(bp->flag & KEEPBIT)) // already in keep list + move_to_mru(bp, &fsi->dcache.lru_list); + + return bp->bh->b_data; + } + + bp = __dcache_get(sb); + + if (!__check_hash_valid(bp)) + __dcache_remove_hash(bp); + + bp->sec = sec; + bp->flag = 0; + __dcache_insert_hash(sb, bp); + + if (read_sect(sb, sec, &(bp->bh), 1)) { + __dcache_ent_discard(sb, bp); + return NULL; + } + + return bp->bh->b_data; + +} + +s32 dcache_modify(struct super_block *sb, u64 sec) +{ + s32 ret = -EIO; + cache_ent_t *bp; + + set_sb_dirty(sb); + + bp = __dcache_find(sb, sec); + if (unlikely(!bp)) { + sdfat_fs_error(sb, "Can`t find dcache (sec 0x%016llx)", sec); + return -EIO; + } +#ifdef CONFIG_SDFAT_DELAYED_META_DIRTY + if (SDFAT_SB(sb)->fsi.vol_type != EXFAT) { + bp->flag |= DIRTYBIT; + return 0; + } +#endif + ret = write_sect(sb, sec, bp->bh, 0); + + if (ret) { + DMSG("%s : failed to modify buffer(err:%d, sec:%llu, bp:0x%p)\n", + __func__, ret, sec, bp); + } + + return ret; +} + +s32 dcache_lock(struct super_block *sb, u64 sec) +{ + cache_ent_t *bp; + + bp = __dcache_find(sb, sec); + if (likely(bp)) { + bp->flag |= LOCKBIT; + return 0; + } + + EMSG("%s : failed to lock buffer(sec:%llu, bp:0x%p)\n", __func__, sec, bp); + return -EIO; +} + +s32 dcache_unlock(struct super_block *sb, u64 sec) +{ + cache_ent_t *bp; + + bp = __dcache_find(sb, sec); + if (likely(bp)) { + bp->flag &= ~(LOCKBIT); + return 0; + } + + EMSG("%s : failed to unlock buffer (sec:%llu, bp:0x%p)\n", __func__, sec, bp); + return -EIO; +} + +s32 dcache_release(struct super_block *sb, u64 sec) +{ + cache_ent_t *bp; + FS_INFO_T *fsi = &(SDFAT_SB(sb)->fsi); + + bp = __dcache_find(sb, sec); + if (unlikely(!bp)) + return -ENOENT; + +#ifdef CONFIG_SDFAT_DELAYED_META_DIRTY + if (bp->flag & DIRTYBIT) { + if (write_sect(sb, bp->sec, bp->bh, 0)) + return -EIO; + } +#endif + bp->sec = ~0; + bp->flag = 0; + + if (bp->bh) { + __brelse(bp->bh); + bp->bh = NULL; + } + + move_to_lru(bp, &fsi->dcache.lru_list); + return 0; +} + +s32 dcache_release_all(struct super_block *sb) +{ + s32 ret = 0; + cache_ent_t *bp; + FS_INFO_T *fsi = &(SDFAT_SB(sb)->fsi); +#ifdef CONFIG_SDFAT_DELAYED_META_DIRTY + s32 dirtycnt = 0; +#endif + + /* Connect list elements: + * LRU list : (A - B - ... - bp_front) + (bp_first + ... + bp_last) + */ + while (fsi->dcache.keep_list.prev != &fsi->dcache.keep_list) { + cache_ent_t *bp_keep = fsi->dcache.keep_list.prev; + // bp_keep->flag &= ~(KEEPBIT); // Will be 0-ed later + move_to_mru(bp_keep, &fsi->dcache.lru_list); + } + + bp = fsi->dcache.lru_list.next; + while (bp != &fsi->dcache.lru_list) { +#ifdef CONFIG_SDFAT_DELAYED_META_DIRTY + if (bp->flag & DIRTYBIT) { + dirtycnt++; + if (write_sect(sb, bp->sec, bp->bh, 0)) + ret = -EIO; + } +#endif + bp->sec = ~0; + bp->flag = 0; + + if (bp->bh) { + __brelse(bp->bh); + bp->bh = NULL; + } + bp = bp->next; + } + +#ifdef CONFIG_SDFAT_DELAYED_META_DIRTY + DMSG("BD:Release / dirty buf cache: %d (err:%d)", dirtycnt, ret); +#endif + return ret; +} + + +s32 dcache_flush(struct super_block *sb, u32 sync) +{ + s32 ret = 0; + cache_ent_t *bp; + FS_INFO_T *fsi = &(SDFAT_SB(sb)->fsi); + s32 dirtycnt = 0; + s32 keepcnt = 0; + + /* Connect list elements: + * LRU list : (A - B - ... - bp_front) + (bp_first + ... + bp_last) + */ + while (fsi->dcache.keep_list.prev != &fsi->dcache.keep_list) { + cache_ent_t *bp_keep = fsi->dcache.keep_list.prev; + + bp_keep->flag &= ~(KEEPBIT); // Will be 0-ed later + move_to_mru(bp_keep, &fsi->dcache.lru_list); + keepcnt++; + } + + bp = fsi->dcache.lru_list.next; + while (bp != &fsi->dcache.lru_list) { + if (bp->flag & DIRTYBIT) { +#ifdef CONFIG_SDFAT_DELAYED_META_DIRTY + // Make buffer dirty (XXX: Naive impl.) + if (write_sect(sb, bp->sec, bp->bh, 0)) { + ret = -EIO; + break; + } + +#endif + bp->flag &= ~(DIRTYBIT); + dirtycnt++; + + if (sync != 0) + sync_dirty_buffer(bp->bh); + } + bp = bp->next; + } + + MMSG("BD: flush / dirty dentry cache: %d (%d from keeplist, err:%d)\n", + dirtycnt, keepcnt, ret); + return ret; +} + +static cache_ent_t *__dcache_find(struct super_block *sb, u64 sec) +{ + s32 off; + cache_ent_t *bp, *hp; + FS_INFO_T *fsi = &(SDFAT_SB(sb)->fsi); + + off = (sec + (sec >> fsi->sect_per_clus_bits)) & (BUF_CACHE_HASH_SIZE - 1); + + hp = &(fsi->dcache.hash_list[off]); + for (bp = hp->hash.next; bp != hp; bp = bp->hash.next) { + if (bp->sec == sec) { + touch_buffer(bp->bh); + return bp; + } + } + return NULL; +} + +static cache_ent_t *__dcache_get(struct super_block *sb) +{ + cache_ent_t *bp; + FS_INFO_T *fsi = &(SDFAT_SB(sb)->fsi); + + bp = fsi->dcache.lru_list.prev; +#ifdef CONFIG_SDFAT_DELAYED_META_DIRTY + while (bp->flag & (DIRTYBIT | LOCKBIT)) { + cache_ent_t *bp_prev = bp->prev; // hold prev + + if (bp->flag & DIRTYBIT) { + MMSG("BD: Buf cache => Keep list\n"); + bp->flag |= KEEPBIT; + move_to_mru(bp, &fsi->dcache.keep_list); + } + bp = bp_prev; + + /* If all dcaches are dirty */ + if (bp == &fsi->dcache.lru_list) { + DMSG("BD: buf cache flooding\n"); + dcache_flush(sb, 0); + bp = fsi->dcache.lru_list.prev; + } + } +#else + while (bp->flag & LOCKBIT) + bp = bp->prev; +#endif +// if (bp->flag & DIRTYBIT) +// sync_dirty_buffer(bp->bh); + + move_to_mru(bp, &fsi->dcache.lru_list); + return bp; +} + +static void __dcache_insert_hash(struct super_block *sb, cache_ent_t *bp) +{ + s32 off; + cache_ent_t *hp; + FS_INFO_T *fsi; + + fsi = &(SDFAT_SB(sb)->fsi); + off = (bp->sec + (bp->sec >> fsi->sect_per_clus_bits)) & (BUF_CACHE_HASH_SIZE-1); + + hp = &(fsi->dcache.hash_list[off]); + bp->hash.next = hp->hash.next; + bp->hash.prev = hp; + hp->hash.next->hash.prev = bp; + hp->hash.next = bp; +} + +static void __dcache_remove_hash(cache_ent_t *bp) +{ +#ifdef DEBUG_HASH_LIST + if ((bp->hash.next == (cache_ent_t *)DEBUG_HASH_NEXT) || + (bp->hash.prev == (cache_ent_t *)DEBUG_HASH_PREV)) { + EMSG("%s: FATAL: tried to remove already-removed-cache-entry" + "(bp:%p)\n", __func__, bp); + return; + } +#endif + WARN_ON(bp->flag & DIRTYBIT); + __remove_from_hash(bp); +} + + +/* end of cache.c */ diff --git a/fs/sdfat/config.h b/fs/sdfat/config.h new file mode 100644 index 000000000000..6e2a4e80932c --- /dev/null +++ b/fs/sdfat/config.h @@ -0,0 +1,146 @@ +/* + * Copyright (C) 2012-2013 Samsung Electronics Co., Ltd. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version 2 + * of the License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, see <http://www.gnu.org/licenses/>. + */ + +#ifndef _SDFAT_CONFIG_H +#define _SDFAT_CONFIG_H +/*======================================================================*/ +/* */ +/* FFS CONFIGURATIONS */ +/* (CHANGE THIS PART IF REQUIRED) */ +/* */ +/*======================================================================*/ + +/*----------------------------------------------------------------------*/ +/* Feature Config */ +/*----------------------------------------------------------------------*/ + +/*----------------------------------------------------------------------*/ +/* Debug/Experimental Config */ +/*----------------------------------------------------------------------*/ +//#define CONFIG_SDFAT_TRACE_IO +//#define CONFIG_SDFAT_TRACE_LOCK /* Trace elapsed time in lock_super(sb) */ + +/*----------------------------------------------------------------------*/ +/* Defragmentation Config */ +/*----------------------------------------------------------------------*/ +//#define CONFIG_SDFAT_DFR +//#define CONFIG_SDFAT_DFR_PACKING +//#define CONFIG_SDFAT_DFR_DEBUG + +/*----------------------------------------------------------------------*/ +/* Config for Kernel equal or newer than 3.7 */ +/*----------------------------------------------------------------------*/ +#ifndef CONFIG_SDFAT_WRITE_SB_INTERVAL_CSECS +#define CONFIG_SDFAT_WRITE_SB_INTERVAL_CSECS (dirty_writeback_interval) +#endif + +/*----------------------------------------------------------------------*/ +/* Default Kconfig */ +/*----------------------------------------------------------------------*/ +/* default mount options */ +#ifndef CONFIG_SDFAT_DEFAULT_CODEPAGE /* if Kconfig lacked codepage */ +#define CONFIG_SDFAT_DEFAULT_CODEPAGE 437 +#endif + +#ifndef CONFIG_SDFAT_DEFAULT_IOCHARSET /* if Kconfig lacked iocharset */ +#define CONFIG_SDFAT_DEFAULT_IOCHARSET "utf8" +#endif + +#ifndef CONFIG_SDFAT_FAT32_SHORTNAME_SEQ /* Shortname ~1, ... ~9 have higher + * priority (WIN32/VFAT-like) + */ +//#define CONFIG_SDFAT_FAT32_SHORTNAME_SEQ +#endif + +#ifndef CONFIG_SDFAT_ALIGNED_MPAGE_WRITE +//#define CONFIG_SDFAT_ALIGNED_MPAGE_WRITE +#endif + +#ifndef CONFIG_SDFAT_FAT_MIRRORING /* if Kconfig lacked fat-mirroring option */ +#define CONFIG_SDFAT_FAT_MIRRORING /* Write FAT 1, FAT 2 simultaneously */ +#endif + +#ifndef CONFIG_SDFAT_DELAYED_META_DIRTY +//#define CONFIG_SDFAT_DELAYED_META_DIRTY /* delayed DIR/FAT dirty support */ +#endif + +#ifndef CONFIG_SDFAT_SUPPORT_DIR_SYNC +//#define CONFIG_SDFAT_SUPPORT_DIR_SYNC /* support DIR_SYNC */ +#endif + +#ifndef CONFIG_SDFAT_CHECK_RO_ATTR +//#define CONFIG_SDFAT_CHECK_RO_ATTR +#endif + +#ifndef CONFIG_SDFAT_RESTRICT_EXT_ONLY_SFN +#define CONFIG_SDFAT_RESTRICT_EXT_ONLY_SFN +#endif + +#ifndef CONFIG_SDFAT_ALLOW_LOOKUP_LOSSY_SFN +//#define CONFIG_SDFAT_ALLOW_LOOKUP_LOSSY_SFN +#endif + +#ifndef CONFIG_SDFAT_DBG_SHOW_PID +//#define CONFIG_SDFAT_DBG_SHOW_PID +#endif + +#ifndef CONFIG_SDFAT_VIRTUAL_XATTR +//#define CONFIG_SDFAT_VIRTUAL_XATTR +#endif + +#ifndef CONFIG_SDFAT_SUPPORT_STLOG +//#define CONFIG_SDFAT_SUPPORT_STLOG +#endif + +#ifndef CONFIG_SDFAT_DEBUG +//{ +//#define CONFIG_SDFAT_DEBUG + +#ifndef CONFIG_SDFAT_DBG_IOCTL +//#define CONFIG_SDFAT_DBG_IOCTL +#endif + +#ifndef CONFIG_SDFAT_DBG_MSG +//#define CONFIG_SDFAT_DBG_MSG +#endif + +#ifndef CONFIG_SDFAT_DBG_CAREFUL +//#define CONFIG_SDFAT_DBG_CAREFUL +#endif + +#ifndef CONFIG_SDFAT_DBG_BUGON +//#define CONFIG_SDFAT_DBG_BUGON +#endif + +#ifndef CONFIG_SDFAT_DBG_WARNON +//#define CONFIG_SDFAT_DBG_WARNON +#endif +//} +#endif /* CONFIG_SDFAT_DEBUG */ + + +#ifndef CONFIG_SDFAT_TRACE_SB_LOCK +//#define CONFIG_SDFAT_TRACE_SB_LOCK +#endif + +#ifndef CONFIG_SDFAT_TRACE_ELAPSED_TIME +//#define CONFIG_SDFAT_TRACE_ELAPSED_TIME +#endif + +#endif /* _SDFAT_CONFIG_H */ + +/* end of config.h */ diff --git a/fs/sdfat/core.c b/fs/sdfat/core.c new file mode 100644 index 000000000000..3a5af0b83d59 --- /dev/null +++ b/fs/sdfat/core.c @@ -0,0 +1,3694 @@ +/* + * Copyright (C) 2012-2013 Samsung Electronics Co., Ltd. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version 2 + * of the License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, see <http://www.gnu.org/licenses/>. + */ + +/************************************************************************/ +/* */ +/* PROJECT : exFAT & FAT12/16/32 File System */ +/* FILE : core.c */ +/* PURPOSE : FAT & exFAT common core code for sdFAT */ +/* */ +/*----------------------------------------------------------------------*/ +/* NOTES */ +/* */ +/* */ +/************************************************************************/ + +#include <linux/version.h> +#include <linux/blkdev.h> +#include <linux/workqueue.h> +#include <linux/writeback.h> +#include <linux/kernel.h> +#include <linux/log2.h> +#if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 16, 0) +#include <linux/iversion.h> +#endif + +#include "sdfat.h" +#include "core.h" +#include <asm/byteorder.h> +#include <asm/unaligned.h> + + +/************************************************************************* + * FUNCTIONS WHICH HAS KERNEL VERSION DEPENDENCY + *************************************************************************/ +#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 16, 0) +static inline u64 inode_peek_iversion(struct inode *inode) +{ + return inode->i_version; +} +#endif + + +/*----------------------------------------------------------------------*/ +/* Constant & Macro Definitions */ +/*----------------------------------------------------------------------*/ +static inline void __set_sb_dirty(struct super_block *sb) +{ +#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 7, 0) + sb->s_dirt = 1; +#else /* LINUX_VERSION_CODE >= KERNEL_VERSION(3, 7, 0) */ + struct sdfat_sb_info *sbi = SDFAT_SB(sb); + + sbi->s_dirt = 1; + /* Insert work */ + spin_lock(&sbi->work_lock); + if (!sbi->write_super_queued) { + unsigned long delay; + + delay = msecs_to_jiffies(CONFIG_SDFAT_WRITE_SB_INTERVAL_CSECS * 10); + queue_delayed_work(system_long_wq, &sbi->write_super_work, delay); + sbi->write_super_queued = 1; + } + spin_unlock(&sbi->work_lock); +#endif +} + +void set_sb_dirty(struct super_block *sb) +{ + __set_sb_dirty(sb); + // XXX: to be removed later, prints too much output + //TMSG("%s finished.\n", __func__); +} + +/*----------------------------------------------------------------------*/ +/* Global Variable Definitions */ +/*----------------------------------------------------------------------*/ + +/*----------------------------------------------------------------------*/ +/* Local Variable Definitions */ +/*----------------------------------------------------------------------*/ + +static s8 *reserved_names[] = { + "AUX ", "CON ", "NUL ", "PRN ", + "COM1 ", "COM2 ", "COM3 ", "COM4 ", + "COM5 ", "COM6 ", "COM7 ", "COM8 ", "COM9 ", + "LPT1 ", "LPT2 ", "LPT3 ", "LPT4 ", + "LPT5 ", "LPT6 ", "LPT7 ", "LPT8 ", "LPT9 ", + NULL +}; + +/*======================================================================*/ +/* Local Function Definitions */ +/*======================================================================*/ + +/* + * File System Management Functions + */ + +static s32 check_type_size(void) +{ + /* critical check for system requirement on size of DENTRY_T structure */ + if (sizeof(DENTRY_T) != DENTRY_SIZE) + return -EINVAL; + + if (sizeof(DOS_DENTRY_T) != DENTRY_SIZE) + return -EINVAL; + + if (sizeof(EXT_DENTRY_T) != DENTRY_SIZE) + return -EINVAL; + + if (sizeof(FILE_DENTRY_T) != DENTRY_SIZE) + return -EINVAL; + + if (sizeof(STRM_DENTRY_T) != DENTRY_SIZE) + return -EINVAL; + + if (sizeof(NAME_DENTRY_T) != DENTRY_SIZE) + return -EINVAL; + + if (sizeof(BMAP_DENTRY_T) != DENTRY_SIZE) + return -EINVAL; + + if (sizeof(CASE_DENTRY_T) != DENTRY_SIZE) + return -EINVAL; + + if (sizeof(VOLM_DENTRY_T) != DENTRY_SIZE) + return -EINVAL; + + return 0; +} + +static s32 __fs_set_vol_flags(struct super_block *sb, u16 new_flag, s32 always_sync) +{ + FS_INFO_T *fsi = &(SDFAT_SB(sb)->fsi); + s32 err; + s32 sync = 0; + + /* flags are not changed */ + if (fsi->vol_flag == new_flag) + return 0; + + fsi->vol_flag = new_flag; + + /* skip updating volume dirty flag, + * if this volume has been mounted with read-only + */ + if (sb->s_flags & MS_RDONLY) + return 0; + + if (!fsi->pbr_bh) { + err = read_sect(sb, 0, &(fsi->pbr_bh), 1); + if (err) { + EMSG("%s : failed to read boot sector\n", __func__); + return err; + } + } + + if (fsi->vol_type == EXFAT) { + pbr64_t *bpb = (pbr64_t *)fsi->pbr_bh->b_data; + bpb->bsx.vol_flags = cpu_to_le16(new_flag); + } else if (fsi->vol_type == FAT32) { + pbr32_t *bpb = (pbr32_t *)fsi->pbr_bh->b_data; + bpb->bsx.state = new_flag & VOL_DIRTY ? FAT_VOL_DIRTY : 0x00; + } else { /* FAT16/12 */ + pbr16_t *bpb = (pbr16_t *) fsi->pbr_bh->b_data; + bpb->bpb.state = new_flag & VOL_DIRTY ? FAT_VOL_DIRTY : 0x00; + } + + if (always_sync) + sync = 1; + else if ((new_flag == VOL_DIRTY) && (!buffer_dirty(fsi->pbr_bh))) + sync = 1; + else + sync = 0; + + err = write_sect(sb, 0, fsi->pbr_bh, sync); + if (err) + EMSG("%s : failed to modify volume flag\n", __func__); + + return err; +} + +static s32 fs_set_vol_flags(struct super_block *sb, u16 new_flag) +{ + return __fs_set_vol_flags(sb, new_flag, 0); +} + +s32 fscore_set_vol_flags(struct super_block *sb, u16 new_flag, s32 always_sync) +{ + return __fs_set_vol_flags(sb, new_flag, always_sync); +} + +static inline s32 __fs_meta_sync(struct super_block *sb, s32 do_sync) +{ +#ifdef CONFIG_SDFAT_DELAYED_META_DIRTY + FS_INFO_T *fsi = &(SDFAT_SB(sb)->fsi); + + if (fsi->vol_type != EXFAT) { + MMSG("meta flush in fs_sync(sync=%d)\n", do_sync); + fcache_flush(sb, 0); + dcache_flush(sb, 0); + } +#else + /* DO NOTHING */ +#endif + return 0; +} + +static s32 fs_sync(struct super_block *sb, s32 do_sync) +{ + s32 err; + + if (!do_sync) + return 0; + + err = __fs_meta_sync(sb, do_sync); + + if (!err) + err = bdev_sync_all(sb); + + if (err) + EMSG("%s : failed to sync. (err:%d)\n", __func__, err); + + return err; +} + +/* + * Cluster Management Functions + */ + +static s32 __clear_cluster(struct inode *inode, u32 clu) +{ + u64 s, n; + struct super_block *sb = inode->i_sb; + u32 sect_size = (u32)sb->s_blocksize; + s32 ret = 0; + struct buffer_head *tmp_bh = NULL; + FS_INFO_T *fsi = &(SDFAT_SB(sb)->fsi); + + if (IS_CLUS_FREE(clu)) { /* FAT16 root_dir */ + s = fsi->root_start_sector; + n = fsi->data_start_sector; + } else { + s = CLUS_TO_SECT(fsi, clu); + n = s + fsi->sect_per_clus; + } + + if (IS_DIRSYNC(inode)) { + ret = write_msect_zero(sb, s, (u64)fsi->sect_per_clus); + if (ret != -EAGAIN) + return ret; + } + + /* Trying buffered zero writes + * if it doesn't have DIRSYNC or write_msect_zero() returned -EAGAIN + */ + for ( ; s < n; s++) { +#if 0 + dcache_release(sb, s); +#endif + ret = read_sect(sb, s, &tmp_bh, 0); + if (ret) + goto out; + + memset((u8 *)tmp_bh->b_data, 0x0, sect_size); + ret = write_sect(sb, s, tmp_bh, 0); + if (ret) + goto out; + } +out: + brelse(tmp_bh); + return ret; +} /* end of __clear_cluster */ + +static s32 __find_last_cluster(struct super_block *sb, CHAIN_T *p_chain, u32 *ret_clu) +{ + u32 clu, next; + u32 count = 0; + + next = p_chain->dir; + if (p_chain->flags == 0x03) { + *ret_clu = next + p_chain->size - 1; + return 0; + } + + do { + count++; + clu = next; + if (fat_ent_get_safe(sb, clu, &next)) + return -EIO; + } while (!IS_CLUS_EOF(next)); + + if (p_chain->size != count) { + sdfat_fs_error(sb, "bogus directory size " + "(clus : ondisk(%d) != counted(%d))", + p_chain->size, count); + sdfat_debug_bug_on(1); + return -EIO; + } + + *ret_clu = clu; + return 0; +} + + +static s32 __count_num_clusters(struct super_block *sb, CHAIN_T *p_chain, u32 *ret_count) +{ + u32 i, count; + u32 clu; + FS_INFO_T *fsi = &(SDFAT_SB(sb)->fsi); + + if (!p_chain->dir || IS_CLUS_EOF(p_chain->dir)) { + *ret_count = 0; + return 0; + } + + if (p_chain->flags == 0x03) { + *ret_count = p_chain->size; + return 0; + } + + clu = p_chain->dir; + count = 0; + for (i = CLUS_BASE; i < fsi->num_clusters; i++) { + count++; + if (fat_ent_get_safe(sb, clu, &clu)) + return -EIO; + if (IS_CLUS_EOF(clu)) + break; + } + + *ret_count = count; + return 0; +} + +/* + * Upcase table Management Functions + */ +static void free_upcase_table(struct super_block *sb) +{ + u32 i; + FS_INFO_T *fsi = &(SDFAT_SB(sb)->fsi); + u16 **upcase_table; + + upcase_table = fsi->vol_utbl; + for (i = 0 ; i < UTBL_COL_COUNT ; i++) { + /* kfree(NULL) is safe */ + kfree(upcase_table[i]); + upcase_table[i] = NULL; + } + + /* kfree(NULL) is safe */ + kfree(fsi->vol_utbl); + fsi->vol_utbl = NULL; +} + +static s32 __load_upcase_table(struct super_block *sb, u64 sector, u64 num_sectors, u32 utbl_checksum) +{ + FS_INFO_T *fsi = &(SDFAT_SB(sb)->fsi); + struct buffer_head *tmp_bh = NULL; + u32 sect_size = (u32)sb->s_blocksize; + s32 ret = -EIO; + u32 i, j; + + u8 skip = false; + u32 index = 0; + u32 checksum = 0; + u16 **upcase_table = kzalloc((UTBL_COL_COUNT * sizeof(u16 *)), GFP_KERNEL); + + if (!upcase_table) + return -ENOMEM; + /* thanks for kzalloc + * memset(upcase_table, 0, UTBL_COL_COUNT * sizeof(u16 *)); + */ + + fsi->vol_utbl = upcase_table; + num_sectors += sector; + + while (sector < num_sectors) { + ret = read_sect(sb, sector, &tmp_bh, 1); + if (ret) { + EMSG("%s: failed to read sector(0x%llx)\n", + __func__, sector); + goto error; + } + sector++; + + for (i = 0; i < sect_size && index <= 0xFFFF; i += 2) { + /* FIXME : is __le16 ok? */ + //u16 uni = le16_to_cpu(((__le16*)(tmp_bh->b_data))[i]); + u16 uni = get_unaligned_le16((u8 *)tmp_bh->b_data+i); + + checksum = ((checksum & 1) ? 0x80000000 : 0) + + (checksum >> 1) + *(((u8 *)tmp_bh->b_data)+i); + checksum = ((checksum & 1) ? 0x80000000 : 0) + + (checksum >> 1) + *(((u8 *)tmp_bh->b_data)+(i+1)); + + if (skip) { + MMSG("skip from 0x%X to 0x%X(amount of 0x%X)\n", + index, index+uni, uni); + index += uni; + skip = false; + } else if (uni == index) { + index++; + } else if (uni == 0xFFFF) { + skip = true; + } else { /* uni != index , uni != 0xFFFF */ + u16 col_index = get_col_index(index); + + if (!upcase_table[col_index]) { + upcase_table[col_index] = + kmalloc((UTBL_ROW_COUNT * sizeof(u16)), GFP_KERNEL); + if (!upcase_table[col_index]) { + EMSG("failed to allocate memory" + " for column 0x%X\n", + col_index); + ret = -ENOMEM; + goto error; + } + + for (j = 0; j < UTBL_ROW_COUNT; j++) + upcase_table[col_index][j] = (col_index << LOW_INDEX_BIT) | j; + } + + upcase_table[col_index][get_row_index(index)] = uni; + index++; + } + } + } + if (index >= 0xFFFF && utbl_checksum == checksum) { + DMSG("%s: load upcase table successfully" + "(idx:0x%08x, utbl_chksum:0x%08x)\n", + __func__, index, utbl_checksum); + if (tmp_bh) + brelse(tmp_bh); + return 0; + } + + EMSG("%s: failed to load upcase table" + "(idx:0x%08x, chksum:0x%08x, utbl_chksum:0x%08x)\n", + __func__, index, checksum, utbl_checksum); + + ret = -EINVAL; +error: + if (tmp_bh) + brelse(tmp_bh); + free_upcase_table(sb); + return ret; +} + +static s32 __load_default_upcase_table(struct super_block *sb) +{ + s32 i, ret = -EIO; + u32 j; + FS_INFO_T *fsi = &(SDFAT_SB(sb)->fsi); + + u8 skip = false; + u32 index = 0; + u16 uni = 0; + u16 **upcase_table; + + upcase_table = kmalloc((UTBL_COL_COUNT * sizeof(u16 *)), GFP_KERNEL); + if (!upcase_table) + return -ENOMEM; + + fsi->vol_utbl = upcase_table; + memset(upcase_table, 0, UTBL_COL_COUNT * sizeof(u16 *)); + + for (i = 0; index <= 0xFFFF && i < SDFAT_NUM_UPCASE*2; i += 2) { + /* FIXME : is __le16 ok? */ + //uni = le16_to_cpu(((__le16*)uni_def_upcase)[i>>1]); + uni = get_unaligned_le16((u8 *)uni_def_upcase+i); + if (skip) { + MMSG("skip from 0x%x ", index); + index += uni; + MMSG("to 0x%x (amount of 0x%x)\n", index, uni); + skip = false; + } else if (uni == index) { + index++; + } else if (uni == 0xFFFF) { + skip = true; + } else { /* uni != index , uni != 0xFFFF */ + u16 col_index = get_col_index(index); + + if (!upcase_table[col_index]) { + upcase_table[col_index] = kmalloc((UTBL_ROW_COUNT * sizeof(u16)), GFP_KERNEL); + if (!upcase_table[col_index]) { + EMSG("failed to allocate memory for " + "new column 0x%x\n", col_index); + ret = -ENOMEM; + goto error; + } + + for (j = 0; j < UTBL_ROW_COUNT; j++) + upcase_table[col_index][j] = (col_index << LOW_INDEX_BIT) | j; + } + + upcase_table[col_index][get_row_index(index)] = uni; + index++; + } + } + + if (index >= 0xFFFF) + return 0; + +error: + /* FATAL error: default upcase table has error */ + free_upcase_table(sb); + return ret; +} + +static s32 load_upcase_table(struct super_block *sb) +{ + s32 i, ret; + u32 tbl_clu, type; + u64 sector, tbl_size, num_sectors; + u8 blksize_bits = sb->s_blocksize_bits; + CHAIN_T clu; + CASE_DENTRY_T *ep; + FS_INFO_T *fsi = &(SDFAT_SB(sb)->fsi); + + clu.dir = fsi->root_dir; + clu.flags = 0x01; + + if (fsi->vol_type != EXFAT) + goto load_default; + + while (!IS_CLUS_EOF(clu.dir)) { + for (i = 0; i < fsi->dentries_per_clu; i++) { + ep = (CASE_DENTRY_T *) get_dentry_in_dir(sb, &clu, i, NULL); + if (!ep) + return -EIO; + + type = fsi->fs_func->get_entry_type((DENTRY_T *) ep); + + if (type == TYPE_UNUSED) + break; + if (type != TYPE_UPCASE) + continue; + + tbl_clu = le32_to_cpu(ep->start_clu); + tbl_size = le64_to_cpu(ep->size); + + sector = CLUS_TO_SECT(fsi, tbl_clu); + num_sectors = ((tbl_size - 1) >> blksize_bits) + 1; + ret = __load_upcase_table(sb, sector, num_sectors, + le32_to_cpu(ep->checksum)); + + if (ret && (ret != -EIO)) + goto load_default; + + /* load successfully */ + return ret; + } + + if (get_next_clus_safe(sb, &(clu.dir))) + return -EIO; + } + +load_default: + sdfat_log_msg(sb, KERN_INFO, "trying to load default upcase table"); + /* load default upcase table */ + return __load_default_upcase_table(sb); +} /* end of load_upcase_table */ + + +/* + * Directory Entry Management Functions + */ +s32 walk_fat_chain(struct super_block *sb, CHAIN_T *p_dir, u32 byte_offset, u32 *clu) +{ + FS_INFO_T *fsi = &(SDFAT_SB(sb)->fsi); + u32 clu_offset; + u32 cur_clu; + + clu_offset = byte_offset >> fsi->cluster_size_bits; + cur_clu = p_dir->dir; + + if (p_dir->flags == 0x03) { + cur_clu += clu_offset; + } else { + while (clu_offset > 0) { + if (get_next_clus_safe(sb, &cur_clu)) + return -EIO; + if (IS_CLUS_EOF(cur_clu)) { + sdfat_fs_error(sb, "invalid dentry access " + "beyond EOF (clu : %u, eidx : %d)", + p_dir->dir, + byte_offset >> DENTRY_SIZE_BITS); + return -EIO; + } + clu_offset--; + } + } + + if (clu) + *clu = cur_clu; + return 0; +} + +static s32 find_location(struct super_block *sb, CHAIN_T *p_dir, s32 entry, u64 *sector, s32 *offset) +{ + s32 ret; + u32 off, clu = 0; + u32 blksize_mask = (u32)(sb->s_blocksize-1); + u8 blksize_bits = sb->s_blocksize_bits; + FS_INFO_T *fsi = &(SDFAT_SB(sb)->fsi); + + off = entry << DENTRY_SIZE_BITS; + + /* FAT16 root_dir */ + if (IS_CLUS_FREE(p_dir->dir)) { + *offset = off & blksize_mask; + *sector = off >> blksize_bits; + *sector += fsi->root_start_sector; + return 0; + } + + ret = walk_fat_chain(sb, p_dir, off, &clu); + if (ret) + return ret; + + /* byte offset in cluster */ + off &= (fsi->cluster_size - 1); + + /* byte offset in sector */ + *offset = off & blksize_mask; + + /* sector offset in cluster */ + *sector = off >> blksize_bits; + *sector += CLUS_TO_SECT(fsi, clu); + return 0; +} /* end of find_location */ + +DENTRY_T *get_dentry_in_dir(struct super_block *sb, CHAIN_T *p_dir, s32 entry, u64 *sector) +{ + FS_INFO_T *fsi = &(SDFAT_SB(sb)->fsi); + u32 dentries_per_page = PAGE_SIZE >> DENTRY_SIZE_BITS; + s32 off; + u64 sec; + u8 *buf; + + if (p_dir->dir == DIR_DELETED) { + EMSG("%s : abnormal access to deleted dentry\n", __func__); + BUG_ON(!fsi->prev_eio); + return NULL; + } + + if (find_location(sb, p_dir, entry, &sec, &off)) + return NULL; + + /* DIRECTORY READAHEAD : + * Try to read ahead per a page except root directory of fat12/16 + */ + if ((!IS_CLUS_FREE(p_dir->dir)) && + !(entry & (dentries_per_page - 1))) + dcache_readahead(sb, sec); + + buf = dcache_getblk(sb, sec); + if (!buf) + return NULL; + + if (sector) + *sector = sec; + return (DENTRY_T *)(buf + off); +} /* end of get_dentry_in_dir */ + +/* used only in search empty_slot() */ +#define CNT_UNUSED_NOHIT (-1) +#define CNT_UNUSED_HIT (-2) +/* search EMPTY CONTINUOUS "num_entries" entries */ +static s32 search_empty_slot(struct super_block *sb, HINT_FEMP_T *hint_femp, CHAIN_T *p_dir, s32 num_entries) +{ + s32 i, dentry, num_empty = 0; + s32 dentries_per_clu; + u32 type; + CHAIN_T clu; + DENTRY_T *ep; + FS_INFO_T *fsi = &(SDFAT_SB(sb)->fsi); + + if (IS_CLUS_FREE(p_dir->dir)) /* FAT16 root_dir */ + dentries_per_clu = fsi->dentries_in_root; + else + dentries_per_clu = fsi->dentries_per_clu; + + ASSERT(-1 <= hint_femp->eidx); + + if (hint_femp->eidx != -1) { + clu.dir = hint_femp->cur.dir; + clu.size = hint_femp->cur.size; + clu.flags = hint_femp->cur.flags; + + dentry = hint_femp->eidx; + + if (num_entries <= hint_femp->count) { + MMSG("%s: empty slot(HIT) - found " + "(clu : 0x%08x eidx : %d)\n", + __func__, hint_femp->cur.dir, hint_femp->eidx); + hint_femp->eidx = -1; + + if (fsi->vol_type == EXFAT) + return dentry; + + return dentry + (num_entries - 1); + } + MMSG("%s: empty slot(HIT) - search from " + "(clu : 0x%08x eidx : %d)\n", + __func__, hint_femp->cur.dir, hint_femp->eidx); + } else { + MMSG("%s: empty slot(MISS) - search from " + "(clu:0x%08x eidx : 0)\n", + __func__, p_dir->dir); + + clu.dir = p_dir->dir; + clu.size = p_dir->size; + clu.flags = p_dir->flags; + + dentry = 0; + } + + while (!IS_CLUS_EOF(clu.dir)) { + /* FAT16 root_dir */ + if (IS_CLUS_FREE(p_dir->dir)) + i = dentry % dentries_per_clu; + else + i = dentry & (dentries_per_clu-1); + + for ( ; i < dentries_per_clu; i++, dentry++) { + ep = get_dentry_in_dir(sb, &clu, i, NULL); + if (!ep) + return -EIO; + + type = fsi->fs_func->get_entry_type(ep); + + if ((type == TYPE_UNUSED) || (type == TYPE_DELETED)) { + num_empty++; + if (hint_femp->eidx == -1) { + hint_femp->eidx = dentry; + hint_femp->count = CNT_UNUSED_NOHIT; + + hint_femp->cur.dir = clu.dir; + hint_femp->cur.size = clu.size; + hint_femp->cur.flags = clu.flags; + } + + if ((type == TYPE_UNUSED) && + (hint_femp->count != CNT_UNUSED_HIT)) { + hint_femp->count = CNT_UNUSED_HIT; + } + } else { + if ((hint_femp->eidx != -1) && + (hint_femp->count == CNT_UNUSED_HIT)) { + /* unused empty group means + * an empty group which includes + * unused dentry + */ + sdfat_fs_error(sb, + "found bogus dentry(%d) " + "beyond unused empty group(%d) " + "(start_clu : %u, cur_clu : %u)", + dentry, hint_femp->eidx, p_dir->dir, + clu.dir); + return -EIO; + } + + num_empty = 0; + hint_femp->eidx = -1; + } + + if (num_empty >= num_entries) { + /* found and invalidate hint_femp */ + hint_femp->eidx = -1; + + if (fsi->vol_type == EXFAT) + return (dentry - (num_entries-1)); + + return dentry; + } + } + + if (IS_CLUS_FREE(p_dir->dir)) + break; /* FAT16 root_dir */ + + if (clu.flags == 0x03) { + if ((--clu.size) > 0) + clu.dir++; + else + clu.dir = CLUS_EOF; + } else { + if (get_next_clus_safe(sb, &(clu.dir))) + return -EIO; + } + } + + return -ENOSPC; +} /* end of search_empty_slot */ + +/* find empty directory entry. + * if there isn't any empty slot, expand cluster chain. + */ +static s32 find_empty_entry(struct inode *inode, CHAIN_T *p_dir, s32 num_entries) +{ + s32 dentry; + u32 ret, last_clu; + u64 sector; + u64 size = 0; + CHAIN_T clu; + DENTRY_T *ep = NULL; + struct super_block *sb = inode->i_sb; + FS_INFO_T *fsi = &(SDFAT_SB(sb)->fsi); + FILE_ID_T *fid = &(SDFAT_I(inode)->fid); + HINT_FEMP_T hint_femp; + + hint_femp.eidx = -1; + + ASSERT(-1 <= fid->hint_femp.eidx); + + if (fid->hint_femp.eidx != -1) { + memcpy(&hint_femp, &fid->hint_femp, sizeof(HINT_FEMP_T)); + fid->hint_femp.eidx = -1; + } + + /* FAT16 root_dir */ + if (IS_CLUS_FREE(p_dir->dir)) + return search_empty_slot(sb, &hint_femp, p_dir, num_entries); + + while ((dentry = search_empty_slot(sb, &hint_femp, p_dir, num_entries)) < 0) { + if (dentry == -EIO) + break; + + if (fsi->fs_func->check_max_dentries(fid)) + return -ENOSPC; + + /* we trust p_dir->size regardless of FAT type */ + if (__find_last_cluster(sb, p_dir, &last_clu)) + return -EIO; + + /* + * Allocate new cluster to this directory + */ + clu.dir = last_clu + 1; + clu.size = 0; /* UNUSED */ + clu.flags = p_dir->flags; + + /* (0) check if there are reserved clusters + * (create_dir 의 주석 참고) + */ + if (!IS_CLUS_EOF(fsi->used_clusters) && + ((fsi->used_clusters + fsi->reserved_clusters) >= (fsi->num_clusters - 2))) + return -ENOSPC; + + /* (1) allocate a cluster */ + ret = fsi->fs_func->alloc_cluster(sb, 1, &clu, ALLOC_HOT); + if (ret) + return ret; + + if (__clear_cluster(inode, clu.dir)) + return -EIO; + + /* (2) append to the FAT chain */ + if (clu.flags != p_dir->flags) { + /* no-fat-chain bit is disabled, + * so fat-chain should be synced with alloc-bmp + */ + chain_cont_cluster(sb, p_dir->dir, p_dir->size); + p_dir->flags = 0x01; + hint_femp.cur.flags = 0x01; + } + + if (clu.flags == 0x01) + if (fat_ent_set(sb, last_clu, clu.dir)) + return -EIO; + + if (hint_femp.eidx == -1) { + /* the special case that new dentry + * should be allocated from the start of new cluster + */ + hint_femp.eidx = (s32)(p_dir->size << + (fsi->cluster_size_bits - DENTRY_SIZE_BITS)); + hint_femp.count = fsi->dentries_per_clu; + + hint_femp.cur.dir = clu.dir; + hint_femp.cur.size = 0; + hint_femp.cur.flags = clu.flags; + } + hint_femp.cur.size++; + p_dir->size++; + size = (p_dir->size << fsi->cluster_size_bits); + + /* (3) update the directory entry */ + if ((fsi->vol_type == EXFAT) && (p_dir->dir != fsi->root_dir)) { + ep = get_dentry_in_dir(sb, + &(fid->dir), fid->entry+1, §or); + if (!ep) + return -EIO; + fsi->fs_func->set_entry_size(ep, size); + fsi->fs_func->set_entry_flag(ep, p_dir->flags); + if (dcache_modify(sb, sector)) + return -EIO; + + if (update_dir_chksum(sb, &(fid->dir), fid->entry)) + return -EIO; + } + + /* directory inode should be updated in here */ + i_size_write(inode, (loff_t)size); + SDFAT_I(inode)->i_size_ondisk += fsi->cluster_size; + SDFAT_I(inode)->i_size_aligned += fsi->cluster_size; + SDFAT_I(inode)->fid.size = size; + SDFAT_I(inode)->fid.flags = p_dir->flags; + inode->i_blocks += 1 << (fsi->cluster_size_bits - sb->s_blocksize_bits); + } + + return dentry; +} /* end of find_empty_entry */ + +#define SDFAT_MIN_SUBDIR (2) +static const char *dot_name[SDFAT_MIN_SUBDIR] = { DOS_CUR_DIR_NAME, DOS_PAR_DIR_NAME }; + +static s32 __count_dos_name_entries(struct super_block *sb, CHAIN_T *p_dir, u32 type, u32 *dotcnt) +{ + s32 i, count = 0, check_dot = 0; + s32 dentries_per_clu; + u32 entry_type; + CHAIN_T clu; + DENTRY_T *ep; + FS_INFO_T *fsi = &(SDFAT_SB(sb)->fsi); + + if (IS_CLUS_FREE(p_dir->dir)) /* FAT16 root_dir */ + dentries_per_clu = fsi->dentries_in_root; + else + dentries_per_clu = fsi->dentries_per_clu; + + clu.dir = p_dir->dir; + clu.size = p_dir->size; + clu.flags = p_dir->flags; + + if (dotcnt) { + *dotcnt = 0; + if (fsi->vol_type != EXFAT) + check_dot = 1; + } + + while (!IS_CLUS_EOF(clu.dir)) { + for (i = 0; i < dentries_per_clu; i++) { + ep = get_dentry_in_dir(sb, &clu, i, NULL); + if (!ep) + return -EIO; + + entry_type = fsi->fs_func->get_entry_type(ep); + + if (entry_type == TYPE_UNUSED) + return count; + if (!(type & TYPE_CRITICAL_PRI) && !(type & TYPE_BENIGN_PRI)) + continue; + + if ((type != TYPE_ALL) && (type != entry_type)) + continue; + + count++; + if (check_dot && (i < SDFAT_MIN_SUBDIR)) { + BUG_ON(fsi->vol_type == EXFAT); + /* 11 is DOS_NAME_LENGTH */ + if (!strncmp(ep->dummy, dot_name[i], 11)) + (*dotcnt)++; + } + } + + /* FAT16 root_dir */ + if (IS_CLUS_FREE(p_dir->dir)) + break; + + if (clu.flags == 0x03) { + if ((--clu.size) > 0) + clu.dir++; + else + clu.dir = CLUS_EOF; + } else { + if (get_next_clus_safe(sb, &(clu.dir))) + return -EIO; + } + + check_dot = 0; + } + + return count; +} + +s32 check_dir_empty(struct super_block *sb, CHAIN_T *p_dir) +{ + s32 i, count = 0; + s32 dentries_per_clu; + u32 type; + CHAIN_T clu; + DENTRY_T *ep; + FS_INFO_T *fsi = &(SDFAT_SB(sb)->fsi); + + if (IS_CLUS_FREE(p_dir->dir)) /* FAT16 root_dir */ + dentries_per_clu = fsi->dentries_in_root; + else + dentries_per_clu = fsi->dentries_per_clu; + + clu.dir = p_dir->dir; + clu.size = p_dir->size; + clu.flags = p_dir->flags; + + while (!IS_CLUS_EOF(clu.dir)) { + for (i = 0; i < dentries_per_clu; i++) { + ep = get_dentry_in_dir(sb, &clu, i, NULL); + if (!ep) + return -EIO; + + type = fsi->fs_func->get_entry_type(ep); + + if (type == TYPE_UNUSED) + return 0; + + if ((type != TYPE_FILE) && (type != TYPE_DIR)) + continue; + + /* FAT16 root_dir */ + if (IS_CLUS_FREE(p_dir->dir)) + return -ENOTEMPTY; + + if (fsi->vol_type == EXFAT) + return -ENOTEMPTY; + + if ((p_dir->dir == fsi->root_dir) || (++count > 2)) + return -ENOTEMPTY; + } + + /* FAT16 root_dir */ + if (IS_CLUS_FREE(p_dir->dir)) + return -ENOTEMPTY; + + if (clu.flags == 0x03) { + if ((--clu.size) > 0) + clu.dir++; + else + clu.dir = CLUS_EOF; + } else { + if (get_next_clus_safe(sb, &(clu.dir))) + return -EIO; + } + } + + return 0; +} + +/* + * Name Conversion Functions + */ +#ifdef CONFIG_SDFAT_ALLOW_LOOKUP_LOSSY_SFN + /* over name length only */ +#define NEED_INVALIDATE_SFN(x) ((x) & NLS_NAME_OVERLEN) +#else + /* all lossy case */ +#define NEED_INVALIDATE_SFN(x) (x) +#endif + +/* NOTE : + * We should keep shortname code compatible with v1.0.15 or lower + * So, we try to check ext-only-name at create-mode only. + * + * i.e. '.mtp' -> + * v1.0.15 : ' MTP' with name_case, 0x10 + * v1.1.0 : 'MT????~?' with name_case, 0x00 and longname. + */ +static inline void preprocess_ext_only_sfn(s32 lookup, u16 first_char, DOS_NAME_T *p_dosname, s32 *lossy) +{ +#ifdef CONFIG_SDFAT_RESTRICT_EXT_ONLY_SFN + int i; + /* check ext-only-name at create-mode */ + if (*lossy || lookup || (first_char != (u16)'.')) + return; + + p_dosname->name_case = 0xFF; + + /* move ext-name to base-name */ + for (i = 0; i < 3; i++) { + p_dosname->name[i] = p_dosname->name[8+i]; + if (p_dosname->name[i] == ' ') + p_dosname->name[i] = '_'; + } + + /* fill remained space with '_' */ + for (i = 3; i < 8; i++) + p_dosname->name[i] = '_'; + + /* eliminate ext-name */ + for (i = 8; i < 11; i++) + p_dosname->name[i] = ' '; + + *lossy = NLS_NAME_LOSSY; +#endif /* CONFIG_SDFAT_CAN_CREATE_EXT_ONLY_SFN */ +} + +/* input : dir, uni_name + * output : num_of_entry, dos_name(format : aaaaaa~1.bbb) + */ +static s32 get_num_entries_and_dos_name(struct super_block *sb, CHAIN_T *p_dir, + UNI_NAME_T *p_uniname, s32 *entries, + DOS_NAME_T *p_dosname, s32 lookup) +{ + s32 ret, num_entries, lossy = NLS_NAME_NO_LOSSY; + s8 **r; + FS_INFO_T *fsi = &(SDFAT_SB(sb)->fsi); + + /* Init null char. */ + p_dosname->name[0] = '\0'; + + num_entries = fsi->fs_func->calc_num_entries(p_uniname); + if (num_entries == 0) + return -EINVAL; + + if (fsi->vol_type == EXFAT) + goto out; + + nls_uni16s_to_sfn(sb, p_uniname, p_dosname, &lossy); + + preprocess_ext_only_sfn(lookup, p_uniname->name[0], p_dosname, &lossy); + + if (!lossy) { + for (r = reserved_names; *r; r++) { + if (!strncmp((void *) p_dosname->name, *r, 8)) + return -EINVAL; + } + + if (p_dosname->name_case != 0xFF) + num_entries = 1; + } else if (!lookup) { + /* create new dos name */ + ret = fat_generate_dos_name_new(sb, p_dir, p_dosname, + num_entries); + if (ret) + return ret; + + } else if (NEED_INVALIDATE_SFN(lossy)) { + /* FIXME : We should check num_entries */ + p_dosname->name[0] = '\0'; + } + + if (num_entries > 1) + p_dosname->name_case = 0x0; +out: + *entries = num_entries; + return 0; +} /* end of get_num_entries_and_dos_name */ + +void get_uniname_from_dos_entry(struct super_block *sb, DOS_DENTRY_T *ep, UNI_NAME_T *p_uniname, u8 mode) +{ + DOS_NAME_T dos_name; + + if (mode == 0x0) + dos_name.name_case = 0x0; + else + dos_name.name_case = ep->lcase; + + memcpy(dos_name.name, ep->name, DOS_NAME_LENGTH); + nls_sfn_to_uni16s(sb, &dos_name, p_uniname); +} /* end of get_uniname_from_dos_entry */ + +/* returns the length of a struct qstr, ignoring trailing dots */ +static inline unsigned int __striptail_len(unsigned int len, const char *name) +{ + while (len && name[len - 1] == '.') + len--; + return len; +} + +/* + * Name Resolution Functions : + * Zero if it was successful; otherwise nonzero. + */ +static s32 __resolve_path(struct inode *inode, const u8 *path, CHAIN_T *p_dir, UNI_NAME_T *p_uniname, int lookup) +{ + s32 namelen; + s32 lossy = NLS_NAME_NO_LOSSY; + struct super_block *sb = inode->i_sb; + FS_INFO_T *fsi = &(SDFAT_SB(sb)->fsi); + FILE_ID_T *fid = &(SDFAT_I(inode)->fid); + + /* DOT and DOTDOT are handled by VFS layer */ + + /* strip all trailing spaces */ + /* DO NOTHING : Is needed? */ + + /* strip all trailing periods */ + namelen = __striptail_len(strlen(path), path); + if (!namelen) + return -ENOENT; + + /* the limitation of linux? */ + if (strlen(path) > (MAX_NAME_LENGTH * MAX_CHARSET_SIZE)) + return -ENAMETOOLONG; + + /* + * strip all leading spaces : + * "MS windows 7" supports leading spaces. + * So we should skip this preprocessing for compatibility. + */ + + /* file name conversion : + * If lookup case, we allow bad-name for compatibility. + */ + namelen = nls_vfsname_to_uni16s(sb, path, namelen, p_uniname, &lossy); + if (namelen < 0) + return namelen; /* return error value */ + + if ((lossy && !lookup) || !namelen) + return -EINVAL; + + sdfat_debug_bug_on(fid->size != i_size_read(inode)); +// fid->size = i_size_read(inode); + + p_dir->dir = fid->start_clu; + p_dir->size = (u32)(fid->size >> fsi->cluster_size_bits); + p_dir->flags = fid->flags; + + return 0; +} + +static inline s32 resolve_path(struct inode *inode, const u8 *path, CHAIN_T *dir, UNI_NAME_T *uni) +{ + return __resolve_path(inode, path, dir, uni, 0); +} + +static inline s32 resolve_path_for_lookup(struct inode *inode, const u8 *path, CHAIN_T *dir, UNI_NAME_T *uni) +{ + return __resolve_path(inode, path, dir, uni, 1); +} + +static s32 create_dir(struct inode *inode, CHAIN_T *p_dir, UNI_NAME_T *p_uniname, FILE_ID_T *fid) +{ + s32 dentry, num_entries; + u64 ret; + u64 size; + CHAIN_T clu; + DOS_NAME_T dos_name, dot_name; + struct super_block *sb = inode->i_sb; + FS_INFO_T *fsi = &(SDFAT_SB(sb)->fsi); + + ret = get_num_entries_and_dos_name(sb, p_dir, p_uniname, &num_entries, &dos_name, 0); + if (ret) + return ret; + + /* find_empty_entry must be called before alloc_cluster */ + dentry = find_empty_entry(inode, p_dir, num_entries); + if (dentry < 0) + return dentry; /* -EIO or -ENOSPC */ + + clu.dir = CLUS_EOF; + clu.size = 0; + clu.flags = (fsi->vol_type == EXFAT) ? 0x03 : 0x01; + + /* (0) Check if there are reserved clusters up to max. */ + if ((fsi->used_clusters + fsi->reserved_clusters) >= (fsi->num_clusters - CLUS_BASE)) + return -ENOSPC; + + /* (1) allocate a cluster */ + ret = fsi->fs_func->alloc_cluster(sb, 1, &clu, ALLOC_HOT); + if (ret) + return ret; + + ret = __clear_cluster(inode, clu.dir); + if (ret) + return ret; + + size = fsi->cluster_size; + if (fsi->vol_type != EXFAT) { + /* initialize the . and .. entry + * Information for . points to itself + * Information for .. points to parent dir + */ + + dot_name.name_case = 0x0; + memcpy(dot_name.name, DOS_CUR_DIR_NAME, DOS_NAME_LENGTH); + + ret = fsi->fs_func->init_dir_entry(sb, &clu, 0, TYPE_DIR, clu.dir, 0); + if (ret) + return ret; + + ret = fsi->fs_func->init_ext_entry(sb, &clu, 0, 1, NULL, &dot_name); + if (ret) + return ret; + + memcpy(dot_name.name, DOS_PAR_DIR_NAME, DOS_NAME_LENGTH); + + if (p_dir->dir == fsi->root_dir) + ret = fsi->fs_func->init_dir_entry(sb, &clu, 1, TYPE_DIR, CLUS_FREE, 0); + else + ret = fsi->fs_func->init_dir_entry(sb, &clu, 1, TYPE_DIR, p_dir->dir, 0); + + if (ret) + return ret; + + ret = fsi->fs_func->init_ext_entry(sb, &clu, 1, 1, NULL, &dot_name); + if (ret) + return ret; + } + + /* (2) update the directory entry */ + /* make sub-dir entry in parent directory */ + ret = fsi->fs_func->init_dir_entry(sb, p_dir, dentry, TYPE_DIR, clu.dir, size); + if (ret) + return ret; + + ret = fsi->fs_func->init_ext_entry(sb, p_dir, dentry, num_entries, p_uniname, &dos_name); + if (ret) + return ret; + + fid->dir.dir = p_dir->dir; + fid->dir.size = p_dir->size; + fid->dir.flags = p_dir->flags; + fid->entry = dentry; + + fid->attr = ATTR_SUBDIR; + fid->flags = (fsi->vol_type == EXFAT) ? 0x03 : 0x01; + fid->size = size; + fid->start_clu = clu.dir; + + fid->type = TYPE_DIR; + fid->rwoffset = 0; + fid->hint_bmap.off = CLUS_EOF; + + /* hint_stat will be used if this is directory. */ + fid->version = 0; + fid->hint_stat.eidx = 0; + fid->hint_stat.clu = fid->start_clu; + fid->hint_femp.eidx = -1; + + return 0; +} /* end of create_dir */ + +static s32 create_file(struct inode *inode, CHAIN_T *p_dir, UNI_NAME_T *p_uniname, u8 mode, FILE_ID_T *fid) +{ + s32 ret, dentry, num_entries; + DOS_NAME_T dos_name; + struct super_block *sb = inode->i_sb; + FS_INFO_T *fsi = &(SDFAT_SB(sb)->fsi); + + ret = get_num_entries_and_dos_name(sb, p_dir, p_uniname, &num_entries, &dos_name, 0); + if (ret) + return ret; + + /* find_empty_entry must be called before alloc_cluster() */ + dentry = find_empty_entry(inode, p_dir, num_entries); + if (dentry < 0) + return dentry; /* -EIO or -ENOSPC */ + + /* (1) update the directory entry */ + /* fill the dos name directory entry information of the created file. + * the first cluster is not determined yet. (0) + */ + ret = fsi->fs_func->init_dir_entry(sb, p_dir, dentry, TYPE_FILE | mode, CLUS_FREE, 0); + if (ret) + return ret; + + ret = fsi->fs_func->init_ext_entry(sb, p_dir, dentry, num_entries, p_uniname, &dos_name); + if (ret) + return ret; + + fid->dir.dir = p_dir->dir; + fid->dir.size = p_dir->size; + fid->dir.flags = p_dir->flags; + fid->entry = dentry; + + fid->attr = ATTR_ARCHIVE | mode; + fid->flags = (fsi->vol_type == EXFAT) ? 0x03 : 0x01; + fid->size = 0; + fid->start_clu = CLUS_EOF; + + fid->type = TYPE_FILE; + fid->rwoffset = 0; + fid->hint_bmap.off = CLUS_EOF; + + /* hint_stat will be used if this is directory. */ + fid->version = 0; + fid->hint_stat.eidx = 0; + fid->hint_stat.clu = fid->start_clu; + fid->hint_femp.eidx = -1; + + return 0; +} /* end of create_file */ + +static s32 remove_file(struct inode *inode, CHAIN_T *p_dir, s32 entry) +{ + s32 num_entries; + u64 sector; + DENTRY_T *ep; + struct super_block *sb = inode->i_sb; + FS_INFO_T *fsi = &(SDFAT_SB(sb)->fsi); + + ep = get_dentry_in_dir(sb, p_dir, entry, §or); + if (!ep) + return -EIO; + + dcache_lock(sb, sector); + + /* dcache_lock() before call count_ext_entries() */ + num_entries = fsi->fs_func->count_ext_entries(sb, p_dir, entry, ep); + if (num_entries < 0) { + dcache_unlock(sb, sector); + return -EIO; + } + num_entries++; + + dcache_unlock(sb, sector); + + /* (1) update the directory entry */ + return fsi->fs_func->delete_dir_entry(sb, p_dir, entry, 0, num_entries); +} /* end of remove_file */ + +static s32 rename_file(struct inode *inode, CHAIN_T *p_dir, s32 oldentry, UNI_NAME_T *p_uniname, FILE_ID_T *fid) +{ + s32 ret, newentry = -1, num_old_entries, num_new_entries; + u64 sector_old, sector_new; + DOS_NAME_T dos_name; + DENTRY_T *epold, *epnew; + struct super_block *sb = inode->i_sb; + FS_INFO_T *fsi = &(SDFAT_SB(sb)->fsi); + + epold = get_dentry_in_dir(sb, p_dir, oldentry, §or_old); + if (!epold) + return -EIO; + + dcache_lock(sb, sector_old); + + /* dcache_lock() before call count_ext_entries() */ + num_old_entries = fsi->fs_func->count_ext_entries(sb, p_dir, oldentry, epold); + if (num_old_entries < 0) { + dcache_unlock(sb, sector_old); + return -EIO; + } + num_old_entries++; + + ret = get_num_entries_and_dos_name(sb, p_dir, p_uniname, &num_new_entries, &dos_name, 0); + if (ret) { + dcache_unlock(sb, sector_old); + return ret; + } + + if (num_old_entries < num_new_entries) { + newentry = find_empty_entry(inode, p_dir, num_new_entries); + if (newentry < 0) { + dcache_unlock(sb, sector_old); + return newentry; /* -EIO or -ENOSPC */ + } + + epnew = get_dentry_in_dir(sb, p_dir, newentry, §or_new); + if (!epnew) { + dcache_unlock(sb, sector_old); + return -EIO; + } + + memcpy((void *) epnew, (void *) epold, DENTRY_SIZE); + if (fsi->fs_func->get_entry_type(epnew) == TYPE_FILE) { + fsi->fs_func->set_entry_attr(epnew, fsi->fs_func->get_entry_attr(epnew) | ATTR_ARCHIVE); + fid->attr |= ATTR_ARCHIVE; + } + dcache_modify(sb, sector_new); + dcache_unlock(sb, sector_old); + + if (fsi->vol_type == EXFAT) { + epold = get_dentry_in_dir(sb, p_dir, oldentry+1, §or_old); + dcache_lock(sb, sector_old); + epnew = get_dentry_in_dir(sb, p_dir, newentry+1, §or_new); + + if (!epold || !epnew) { + dcache_unlock(sb, sector_old); + return -EIO; + } + + memcpy((void *) epnew, (void *) epold, DENTRY_SIZE); + dcache_modify(sb, sector_new); + dcache_unlock(sb, sector_old); + } + + ret = fsi->fs_func->init_ext_entry(sb, p_dir, newentry, num_new_entries, p_uniname, &dos_name); + if (ret) + return ret; + + fsi->fs_func->delete_dir_entry(sb, p_dir, oldentry, 0, num_old_entries); + fid->entry = newentry; + } else { + if (fsi->fs_func->get_entry_type(epold) == TYPE_FILE) { + fsi->fs_func->set_entry_attr(epold, fsi->fs_func->get_entry_attr(epold) | ATTR_ARCHIVE); + fid->attr |= ATTR_ARCHIVE; + } + dcache_modify(sb, sector_old); + dcache_unlock(sb, sector_old); + + ret = fsi->fs_func->init_ext_entry(sb, p_dir, oldentry, num_new_entries, p_uniname, &dos_name); + if (ret) + return ret; + + fsi->fs_func->delete_dir_entry(sb, p_dir, oldentry, num_new_entries, num_old_entries); + } + + return 0; +} /* end of rename_file */ + +static s32 move_file(struct inode *inode, CHAIN_T *p_olddir, s32 oldentry, + CHAIN_T *p_newdir, UNI_NAME_T *p_uniname, FILE_ID_T *fid) +{ + s32 ret, newentry, num_new_entries, num_old_entries; + u64 sector_mov, sector_new; + CHAIN_T clu; + DOS_NAME_T dos_name; + DENTRY_T *epmov, *epnew; + struct super_block *sb = inode->i_sb; + FS_INFO_T *fsi = &(SDFAT_SB(sb)->fsi); + + epmov = get_dentry_in_dir(sb, p_olddir, oldentry, §or_mov); + if (!epmov) + return -EIO; + + /* check if the source and target directory is the same */ + if (fsi->fs_func->get_entry_type(epmov) == TYPE_DIR && + fsi->fs_func->get_entry_clu0(epmov) == p_newdir->dir) + return -EINVAL; + + dcache_lock(sb, sector_mov); + + /* dcache_lock() before call count_ext_entries() */ + num_old_entries = fsi->fs_func->count_ext_entries(sb, p_olddir, oldentry, epmov); + if (num_old_entries < 0) { + dcache_unlock(sb, sector_mov); + return -EIO; + } + num_old_entries++; + + ret = get_num_entries_and_dos_name(sb, p_newdir, p_uniname, &num_new_entries, &dos_name, 0); + if (ret) { + dcache_unlock(sb, sector_mov); + return ret; + } + + newentry = find_empty_entry(inode, p_newdir, num_new_entries); + if (newentry < 0) { + dcache_unlock(sb, sector_mov); + return newentry; /* -EIO or -ENOSPC */ + } + + epnew = get_dentry_in_dir(sb, p_newdir, newentry, §or_new); + if (!epnew) { + dcache_unlock(sb, sector_mov); + return -EIO; + } + + memcpy((void *) epnew, (void *) epmov, DENTRY_SIZE); + if (fsi->fs_func->get_entry_type(epnew) == TYPE_FILE) { + fsi->fs_func->set_entry_attr(epnew, fsi->fs_func->get_entry_attr(epnew) | ATTR_ARCHIVE); + fid->attr |= ATTR_ARCHIVE; + } + dcache_modify(sb, sector_new); + dcache_unlock(sb, sector_mov); + + if (fsi->vol_type == EXFAT) { + epmov = get_dentry_in_dir(sb, p_olddir, oldentry+1, §or_mov); + dcache_lock(sb, sector_mov); + epnew = get_dentry_in_dir(sb, p_newdir, newentry+1, §or_new); + if (!epmov || !epnew) { + dcache_unlock(sb, sector_mov); + return -EIO; + } + + memcpy((void *) epnew, (void *) epmov, DENTRY_SIZE); + dcache_modify(sb, sector_new); + dcache_unlock(sb, sector_mov); + } else if (fsi->fs_func->get_entry_type(epnew) == TYPE_DIR) { + /* change ".." pointer to new parent dir */ + clu.dir = fsi->fs_func->get_entry_clu0(epnew); + clu.flags = 0x01; + + epnew = get_dentry_in_dir(sb, &clu, 1, §or_new); + if (!epnew) + return -EIO; + + if (p_newdir->dir == fsi->root_dir) + fsi->fs_func->set_entry_clu0(epnew, CLUS_FREE); + else + fsi->fs_func->set_entry_clu0(epnew, p_newdir->dir); + dcache_modify(sb, sector_new); + } + + ret = fsi->fs_func->init_ext_entry(sb, p_newdir, newentry, num_new_entries, p_uniname, &dos_name); + if (ret) + return ret; + + fsi->fs_func->delete_dir_entry(sb, p_olddir, oldentry, 0, num_old_entries); + + fid->dir.dir = p_newdir->dir; + fid->dir.size = p_newdir->size; + fid->dir.flags = p_newdir->flags; + + fid->entry = newentry; + + return 0; +} /* end of move_file */ + + +/*======================================================================*/ +/* Global Function Definitions */ +/*======================================================================*/ +/* roll back to the initial state of the file system */ +s32 fscore_init(void) +{ + s32 ret; + + ret = check_type_size(); + if (ret) + return ret; + + return extent_cache_init(); +} + +/* make free all memory-alloced global buffers */ +s32 fscore_shutdown(void) +{ + extent_cache_shutdown(); + return 0; +} + +/* check device is ejected */ +s32 fscore_check_bdi_valid(struct super_block *sb) +{ + return bdev_check_bdi_valid(sb); +} + +static bool is_exfat(pbr_t *pbr) +{ + int i = 53; + + do { + if (pbr->bpb.f64.res_zero[i-1]) + break; + } while (--i); + return i ? false : true; +} + +static bool is_fat32(pbr_t *pbr) +{ + if (le16_to_cpu(pbr->bpb.f16.num_fat_sectors)) + return false; + return true; +} + +inline pbr_t *read_pbr_with_logical_sector(struct super_block *sb, struct buffer_head **prev_bh) +{ + pbr_t *p_pbr = (pbr_t *) (*prev_bh)->b_data; + u16 logical_sect = 0; + + if (is_exfat(p_pbr)) + logical_sect = 1 << p_pbr->bsx.f64.sect_size_bits; + else + logical_sect = get_unaligned_le16(&p_pbr->bpb.f16.sect_size); + + /* is x a power of 2? + * (x) != 0 && (((x) & ((x) - 1)) == 0) + */ + if (!is_power_of_2(logical_sect) + || (logical_sect < 512) + || (logical_sect > 4096)) { + sdfat_log_msg(sb, KERN_ERR, "bogus logical sector size %u", + logical_sect); + return NULL; + } + + if (logical_sect < sb->s_blocksize) { + sdfat_log_msg(sb, KERN_ERR, + "logical sector size too small for device" + " (logical sector size = %u)", logical_sect); + return NULL; + } + + if (logical_sect > sb->s_blocksize) { + struct buffer_head *bh = NULL; + + __brelse(*prev_bh); + *prev_bh = NULL; + + if (!sb_set_blocksize(sb, logical_sect)) { + sdfat_log_msg(sb, KERN_ERR, + "unable to set blocksize %u", logical_sect); + return NULL; + } + bh = sb_bread(sb, 0); + if (!bh) { + sdfat_log_msg(sb, KERN_ERR, + "unable to read boot sector " + "(logical sector size = %lu)", sb->s_blocksize); + return NULL; + } + + *prev_bh = bh; + p_pbr = (pbr_t *) bh->b_data; + } + + sdfat_log_msg(sb, KERN_INFO, + "set logical sector size : %lu", sb->s_blocksize); + + return p_pbr; +} + +/* mount the file system volume */ +s32 fscore_mount(struct super_block *sb) +{ + s32 ret; + pbr_t *p_pbr; + struct buffer_head *tmp_bh = NULL; + struct gendisk *disk = sb->s_bdev->bd_disk; + struct hd_struct *part = sb->s_bdev->bd_part; + struct sdfat_mount_options *opts = &(SDFAT_SB(sb)->options); + FS_INFO_T *fsi = &(SDFAT_SB(sb)->fsi); + + /* initialize previous I/O error */ + fsi->prev_eio = 0; + + /* open the block device */ + if (bdev_open_dev(sb)) + return -EIO; + + /* set block size to read super block */ + sb_min_blocksize(sb, 512); + + /* read boot sector */ + ret = read_sect(sb, 0, &tmp_bh, 1); + if (ret) { + sdfat_log_msg(sb, KERN_ERR, "unable to read boot sector"); + ret = -EIO; + goto bd_close; + } + + /* PRB is read */ + p_pbr = (pbr_t *) tmp_bh->b_data; + + /* check the validity of PBR */ + if (le16_to_cpu((p_pbr->signature)) != PBR_SIGNATURE) { + sdfat_log_msg(sb, KERN_ERR, "invalid boot record signature"); + brelse(tmp_bh); + ret = -EINVAL; + goto bd_close; + } + + /* check logical sector size */ + p_pbr = read_pbr_with_logical_sector(sb, &tmp_bh); + if (!p_pbr) { + brelse(tmp_bh); + ret = -EIO; + goto bd_close; + } + + /* fill fs_struct */ + if (is_exfat(p_pbr)) { + if (opts->fs_type && opts->fs_type != FS_TYPE_EXFAT) { + sdfat_log_msg(sb, KERN_ERR, + "not specified filesystem type " + "(media:exfat, opts:%s)", + FS_TYPE_STR[opts->fs_type]); + ret = -EINVAL; + goto free_bh; + } + /* set maximum file size for exFAT */ + sb->s_maxbytes = 0x7fffffffffffffffLL; + opts->improved_allocation = 0; + opts->defrag = 0; + ret = mount_exfat(sb, p_pbr); + } else if (is_fat32(p_pbr)) { + if (opts->fs_type && opts->fs_type != FS_TYPE_VFAT) { + sdfat_log_msg(sb, KERN_ERR, + "not specified filesystem type " + "(media:vfat, opts:%s)", + FS_TYPE_STR[opts->fs_type]); + ret = -EINVAL; + goto free_bh; + } + /* set maximum file size for FAT */ + sb->s_maxbytes = 0xffffffff; + ret = mount_fat32(sb, p_pbr); + } else { + if (opts->fs_type && opts->fs_type != FS_TYPE_VFAT) { + sdfat_log_msg(sb, KERN_ERR, + "not specified filesystem type " + "(media:vfat, opts:%s)", + FS_TYPE_STR[opts->fs_type]); + ret = -EINVAL; + goto free_bh; + } + /* set maximum file size for FAT */ + sb->s_maxbytes = 0xffffffff; + opts->improved_allocation = 0; + opts->defrag = 0; + ret = mount_fat16(sb, p_pbr); + } +free_bh: + brelse(tmp_bh); + if (ret) { + sdfat_log_msg(sb, KERN_ERR, "failed to mount fs-core"); + goto bd_close; + } + + /* warn misaligned data data start sector must be a multiple of clu_size */ + sdfat_log_msg(sb, KERN_INFO, + "detected volume info : %s " + "(bps : %lu, spc : %u, data start : %llu, %s)", + sdfat_get_vol_type_str(fsi->vol_type), + sb->s_blocksize, fsi->sect_per_clus, fsi->data_start_sector, + (fsi->data_start_sector & (fsi->sect_per_clus - 1)) ? + "misaligned" : "aligned"); + + sdfat_log_msg(sb, KERN_INFO, + "detected volume size : %llu KB (disk : %llu KB, " + "part : %llu KB)", + (fsi->num_sectors * (sb->s_blocksize >> SECTOR_SIZE_BITS)) >> 1, + disk ? (u64)((disk->part0.nr_sects) >> 1) : 0, + part ? (u64)((part->nr_sects) >> 1) : 0); + + ret = load_upcase_table(sb); + if (ret) { + sdfat_log_msg(sb, KERN_ERR, "failed to load upcase table"); + goto bd_close; + } + + if (fsi->vol_type != EXFAT) + goto update_used_clus; + + /* allocate-bitmap is only for exFAT */ + ret = load_alloc_bmp(sb); + if (ret) { + sdfat_log_msg(sb, KERN_ERR, "failed to load alloc-bitmap"); + goto free_upcase; + } + +update_used_clus: + if (fsi->used_clusters == (u32) ~0) { + ret = fsi->fs_func->count_used_clusters(sb, &fsi->used_clusters); + if (ret) { + sdfat_log_msg(sb, KERN_ERR, "failed to scan clusters"); + goto free_alloc_bmp; + } + } + + return 0; +free_alloc_bmp: + if (fsi->vol_type == EXFAT) + free_alloc_bmp(sb); +free_upcase: + free_upcase_table(sb); +bd_close: + bdev_close_dev(sb); + return ret; +} /* end of fscore_mount */ + +/* umount the file system volume */ +s32 fscore_umount(struct super_block *sb) +{ + s32 ret = 0; + FS_INFO_T *fsi = &(SDFAT_SB(sb)->fsi); + + if (fs_sync(sb, 0)) + ret = -EIO; + + if (fs_set_vol_flags(sb, VOL_CLEAN)) + ret = -EIO; + + free_upcase_table(sb); + + if (fsi->vol_type == EXFAT) + free_alloc_bmp(sb); + + if (fcache_release_all(sb)) + ret = -EIO; + + if (dcache_release_all(sb)) + ret = -EIO; + + amap_destroy(sb); + + if (fsi->prev_eio) + ret = -EIO; + /* close the block device */ + bdev_close_dev(sb); + return ret; +} + +/* get the information of a file system volume */ +s32 fscore_statfs(struct super_block *sb, VOL_INFO_T *info) +{ + FS_INFO_T *fsi = &(SDFAT_SB(sb)->fsi); + + if (fsi->used_clusters == (u32) ~0) { + if (fsi->fs_func->count_used_clusters(sb, &fsi->used_clusters)) + return -EIO; + } + + info->FatType = fsi->vol_type; + info->ClusterSize = fsi->cluster_size; + info->NumClusters = fsi->num_clusters - 2; /* clu 0 & 1 */ + info->UsedClusters = fsi->used_clusters + fsi->reserved_clusters; + info->FreeClusters = info->NumClusters - info->UsedClusters; + + return 0; +} + +/* synchronize all file system volumes */ +s32 fscore_sync_fs(struct super_block *sb, s32 do_sync) +{ + /* synchronize the file system */ + if (fs_sync(sb, do_sync)) + return -EIO; + + if (fs_set_vol_flags(sb, VOL_CLEAN)) + return -EIO; + + return 0; +} + +/* stat allocation unit of a file system volume */ +u32 fscore_get_au_stat(struct super_block *sb, s32 mode) +{ + FS_INFO_T *fsi = &(SDFAT_SB(sb)->fsi); + + if (fsi->fs_func->get_au_stat) + return fsi->fs_func->get_au_stat(sb, mode); + + /* No error, just returns 0 */ + return 0; +} + + +/*----------------------------------------------------------------------*/ +/* File Operation Functions */ +/*----------------------------------------------------------------------*/ +/* lookup a file */ +s32 fscore_lookup(struct inode *inode, u8 *path, FILE_ID_T *fid) +{ + s32 ret, dentry, num_entries; + CHAIN_T dir; + UNI_NAME_T uni_name; + DOS_NAME_T dos_name; + DENTRY_T *ep, *ep2; + ENTRY_SET_CACHE_T *es = NULL; + struct super_block *sb = inode->i_sb; + FS_INFO_T *fsi = &(SDFAT_SB(sb)->fsi); + FILE_ID_T *dir_fid = &(SDFAT_I(inode)->fid); + + TMSG("%s entered\n", __func__); + + /* check the validity of directory name in the given pathname */ + ret = resolve_path_for_lookup(inode, path, &dir, &uni_name); + if (ret) + return ret; + + ret = get_num_entries_and_dos_name(sb, &dir, &uni_name, &num_entries, &dos_name, 1); + if (ret) + return ret; + + /* check the validation of hint_stat and initialize it if required */ + if (dir_fid->version != (u32)inode_peek_iversion(inode)) { + dir_fid->hint_stat.clu = dir.dir; + dir_fid->hint_stat.eidx = 0; + dir_fid->version = (u32)inode_peek_iversion(inode); + dir_fid->hint_femp.eidx = -1; + } + + /* search the file name for directories */ + dentry = fsi->fs_func->find_dir_entry(sb, dir_fid, &dir, &uni_name, + num_entries, &dos_name, TYPE_ALL); + + if ((dentry < 0) && (dentry != -EEXIST)) + return dentry; /* -error value */ + + fid->dir.dir = dir.dir; + fid->dir.size = dir.size; + fid->dir.flags = dir.flags; + fid->entry = dentry; + + /* root directory itself */ + if (unlikely(dentry == -EEXIST)) { + fid->type = TYPE_DIR; + fid->rwoffset = 0; + fid->hint_bmap.off = CLUS_EOF; + + fid->attr = ATTR_SUBDIR; + fid->flags = 0x01; + fid->size = 0; + fid->start_clu = fsi->root_dir; + } else { + if (fsi->vol_type == EXFAT) { + es = get_dentry_set_in_dir(sb, &dir, dentry, ES_2_ENTRIES, &ep); + if (!es) + return -EIO; + ep2 = ep+1; + } else { + ep = get_dentry_in_dir(sb, &dir, dentry, NULL); + if (!ep) + return -EIO; + ep2 = ep; + } + + fid->type = fsi->fs_func->get_entry_type(ep); + fid->rwoffset = 0; + fid->hint_bmap.off = CLUS_EOF; + fid->attr = fsi->fs_func->get_entry_attr(ep); + + fid->size = fsi->fs_func->get_entry_size(ep2); + if ((fid->type == TYPE_FILE) && (fid->size == 0)) { + fid->flags = (fsi->vol_type == EXFAT) ? 0x03 : 0x01; + fid->start_clu = CLUS_EOF; + } else { + fid->flags = fsi->fs_func->get_entry_flag(ep2); + fid->start_clu = fsi->fs_func->get_entry_clu0(ep2); + } + + if ((fid->type == TYPE_DIR) && (fsi->vol_type != EXFAT)) { + u32 num_clu = 0; + CHAIN_T tmp_dir; + + tmp_dir.dir = fid->start_clu; + tmp_dir.flags = fid->flags; + tmp_dir.size = 0; /* UNUSED */ + + if (__count_num_clusters(sb, &tmp_dir, &num_clu)) + return -EIO; + fid->size = (u64)num_clu << fsi->cluster_size_bits; + } + + /* FOR GRACEFUL ERROR HANDLING */ + if (IS_CLUS_FREE(fid->start_clu)) { + sdfat_fs_error(sb, + "non-zero size file starts with zero cluster " + "(size : %llu, p_dir : %u, entry : 0x%08x)", + fid->size, fid->dir.dir, fid->entry); + sdfat_debug_bug_on(1); + return -EIO; + } + + if (fsi->vol_type == EXFAT) + release_dentry_set(es); + } + + /* hint_stat will be used if this is directory. */ + fid->version = 0; + fid->hint_stat.eidx = 0; + fid->hint_stat.clu = fid->start_clu; + fid->hint_femp.eidx = -1; + + TMSG("%s exited successfully\n", __func__); + return 0; +} /* end of fscore_lookup */ + +/* create a file */ +s32 fscore_create(struct inode *inode, u8 *path, u8 mode, FILE_ID_T *fid) +{ + s32 ret/*, dentry*/; + CHAIN_T dir; + UNI_NAME_T uni_name; + struct super_block *sb = inode->i_sb; + + /* check the validity of directory name in the given pathname */ + ret = resolve_path(inode, path, &dir, &uni_name); + if (ret) + return ret; + + fs_set_vol_flags(sb, VOL_DIRTY); + + /* create a new file */ + ret = create_file(inode, &dir, &uni_name, mode, fid); + + fs_sync(sb, 0); + fs_set_vol_flags(sb, VOL_CLEAN); + + return ret; +} + +/* read data from a opened file */ +s32 fscore_read_link(struct inode *inode, FILE_ID_T *fid, void *buffer, u64 count, u64 *rcount) +{ + s32 ret = 0; + s32 offset, sec_offset; + u32 clu_offset; + u32 clu; + u64 logsector, oneblkread, read_bytes; + struct buffer_head *tmp_bh = NULL; + struct super_block *sb = inode->i_sb; + FS_INFO_T *fsi = &(SDFAT_SB(sb)->fsi); + + /* check if the given file ID is opened */ + if (fid->type != TYPE_FILE) + return -EPERM; + + if (fid->rwoffset > fid->size) + fid->rwoffset = fid->size; + + if (count > (fid->size - fid->rwoffset)) + count = fid->size - fid->rwoffset; + + if (count == 0) { + if (rcount) + *rcount = 0; + return 0; + } + + read_bytes = 0; + + while (count > 0) { + clu_offset = fid->rwoffset >> fsi->cluster_size_bits; + clu = fid->start_clu; + + if (fid->flags == 0x03) { + clu += clu_offset; + } else { + /* hint information */ + if ((clu_offset > 0) && + ((fid->hint_bmap.off != CLUS_EOF) && (fid->hint_bmap.off > 0)) && + (clu_offset >= fid->hint_bmap.off)) { + clu_offset -= fid->hint_bmap.off; + clu = fid->hint_bmap.clu; + } + + while (clu_offset > 0) { + ret = get_next_clus_safe(sb, &clu); + if (ret) + goto err_out; + + clu_offset--; + } + } + + /* hint information */ + fid->hint_bmap.off = fid->rwoffset >> fsi->cluster_size_bits; + fid->hint_bmap.clu = clu; + + offset = (s32)(fid->rwoffset & (fsi->cluster_size - 1)); /* byte offset in cluster */ + sec_offset = offset >> sb->s_blocksize_bits; /* sector offset in cluster */ + offset &= (sb->s_blocksize - 1); /* byte offset in sector */ + + logsector = CLUS_TO_SECT(fsi, clu) + sec_offset; + + oneblkread = (u64)(sb->s_blocksize - offset); + if (oneblkread > count) + oneblkread = count; + + if ((offset == 0) && (oneblkread == sb->s_blocksize)) { + ret = read_sect(sb, logsector, &tmp_bh, 1); + if (ret) + goto err_out; + memcpy(((s8 *) buffer)+read_bytes, ((s8 *) tmp_bh->b_data), (s32) oneblkread); + } else { + ret = read_sect(sb, logsector, &tmp_bh, 1); + if (ret) + goto err_out; + memcpy(((s8 *) buffer)+read_bytes, ((s8 *) tmp_bh->b_data)+offset, (s32) oneblkread); + } + count -= oneblkread; + read_bytes += oneblkread; + fid->rwoffset += oneblkread; + } + +err_out: + brelse(tmp_bh); + + /* set the size of read bytes */ + if (rcount != NULL) + *rcount = read_bytes; + + return ret; +} /* end of fscore_read_link */ + +/* write data into a opened file */ +s32 fscore_write_link(struct inode *inode, FILE_ID_T *fid, void *buffer, u64 count, u64 *wcount) +{ + s32 ret = 0; + s32 modified = false, offset, sec_offset; + u32 clu_offset, num_clusters, num_alloc; + u32 clu, last_clu; + u64 logsector, sector, oneblkwrite, write_bytes; + CHAIN_T new_clu; + TIMESTAMP_T tm; + DENTRY_T *ep, *ep2; + ENTRY_SET_CACHE_T *es = NULL; + struct buffer_head *tmp_bh = NULL; + struct super_block *sb = inode->i_sb; + u32 blksize = (u32)sb->s_blocksize; + u32 blksize_mask = (u32)(sb->s_blocksize-1); + u8 blksize_bits = sb->s_blocksize_bits; + FS_INFO_T *fsi = &(SDFAT_SB(sb)->fsi); + + /* check if the given file ID is opened */ + if (fid->type != TYPE_FILE) + return -EPERM; + + if (fid->rwoffset > fid->size) + fid->rwoffset = fid->size; + + if (count == 0) { + if (wcount) + *wcount = 0; + return 0; + } + + fs_set_vol_flags(sb, VOL_DIRTY); + + if (fid->size == 0) + num_clusters = 0; + else + num_clusters = ((fid->size-1) >> fsi->cluster_size_bits) + 1; + + write_bytes = 0; + + while (count > 0) { + clu_offset = (fid->rwoffset >> fsi->cluster_size_bits); + clu = last_clu = fid->start_clu; + + if (fid->flags == 0x03) { + if ((clu_offset > 0) && (!IS_CLUS_EOF(clu))) { + last_clu += clu_offset - 1; + + if (clu_offset == num_clusters) + clu = CLUS_EOF; + else + clu += clu_offset; + } + } else { + /* hint information */ + if ((clu_offset > 0) && + ((fid->hint_bmap.off != CLUS_EOF) && (fid->hint_bmap.off > 0)) && + (clu_offset >= fid->hint_bmap.off)) { + clu_offset -= fid->hint_bmap.off; + clu = fid->hint_bmap.clu; + } + + while ((clu_offset > 0) && (!IS_CLUS_EOF(clu))) { + last_clu = clu; + ret = get_next_clus_safe(sb, &clu); + if (ret) + goto err_out; + + clu_offset--; + } + } + + if (IS_CLUS_EOF(clu)) { + num_alloc = ((count-1) >> fsi->cluster_size_bits) + 1; + new_clu.dir = IS_CLUS_EOF(last_clu) ? CLUS_EOF : last_clu+1; + new_clu.size = 0; + new_clu.flags = fid->flags; + + /* (1) allocate a chain of clusters */ + ret = fsi->fs_func->alloc_cluster(sb, num_alloc, &new_clu, ALLOC_COLD); + if (ret) + goto err_out; + + /* (2) append to the FAT chain */ + if (IS_CLUS_EOF(last_clu)) { + if (new_clu.flags == 0x01) + fid->flags = 0x01; + fid->start_clu = new_clu.dir; + modified = true; + } else { + if (new_clu.flags != fid->flags) { + /* no-fat-chain bit is disabled, + * so fat-chain should be synced with + * alloc-bmp + */ + chain_cont_cluster(sb, fid->start_clu, num_clusters); + fid->flags = 0x01; + modified = true; + } + if (new_clu.flags == 0x01) { + ret = fat_ent_set(sb, last_clu, new_clu.dir); + if (ret) + goto err_out; + } + } + + num_clusters += num_alloc; + clu = new_clu.dir; + } + + /* hint information */ + fid->hint_bmap.off = fid->rwoffset >> fsi->cluster_size_bits; + fid->hint_bmap.clu = clu; + + /* byte offset in cluster */ + offset = (s32)(fid->rwoffset & (fsi->cluster_size-1)); + /* sector offset in cluster */ + sec_offset = offset >> blksize_bits; + /* byte offset in sector */ + offset &= blksize_mask; + logsector = CLUS_TO_SECT(fsi, clu) + sec_offset; + + oneblkwrite = (u64)(blksize - offset); + if (oneblkwrite > count) + oneblkwrite = count; + + if ((offset == 0) && (oneblkwrite == blksize)) { + ret = read_sect(sb, logsector, &tmp_bh, 0); + if (ret) + goto err_out; + + memcpy(((s8 *)tmp_bh->b_data), + ((s8 *)buffer)+write_bytes, + (s32)oneblkwrite); + + ret = write_sect(sb, logsector, tmp_bh, 0); + if (ret) { + brelse(tmp_bh); + goto err_out; + } + } else { + if ((offset > 0) || ((fid->rwoffset+oneblkwrite) < fid->size)) { + ret = read_sect(sb, logsector, &tmp_bh, 1); + if (ret) + goto err_out; + } else { + ret = read_sect(sb, logsector, &tmp_bh, 0); + if (ret) + goto err_out; + } + + memcpy(((s8 *) tmp_bh->b_data)+offset, ((s8 *) buffer)+write_bytes, (s32) oneblkwrite); + ret = write_sect(sb, logsector, tmp_bh, 0); + if (ret) { + brelse(tmp_bh); + goto err_out; + } + } + + count -= oneblkwrite; + write_bytes += oneblkwrite; + fid->rwoffset += oneblkwrite; + + fid->attr |= ATTR_ARCHIVE; + + if (fid->size < fid->rwoffset) { + fid->size = fid->rwoffset; + modified = true; + } + } + + brelse(tmp_bh); + + /* (3) update the direcoty entry */ + /* get_entry_(set_)in_dir shoulb be check DIR_DELETED flag. */ + if (fsi->vol_type == EXFAT) { + es = get_dentry_set_in_dir(sb, &(fid->dir), fid->entry, ES_ALL_ENTRIES, &ep); + if (!es) { + ret = -EIO; + goto err_out; + } + ep2 = ep+1; + } else { + ep = get_dentry_in_dir(sb, &(fid->dir), fid->entry, §or); + if (!ep) { + ret = -EIO; + goto err_out; + } + ep2 = ep; + } + + fsi->fs_func->set_entry_time(ep, tm_now(SDFAT_SB(sb), &tm), TM_MODIFY); + fsi->fs_func->set_entry_attr(ep, fid->attr); + + if (modified) { + if (fsi->fs_func->get_entry_flag(ep2) != fid->flags) + fsi->fs_func->set_entry_flag(ep2, fid->flags); + + if (fsi->fs_func->get_entry_size(ep2) != fid->size) + fsi->fs_func->set_entry_size(ep2, fid->size); + + if (fsi->fs_func->get_entry_clu0(ep2) != fid->start_clu) + fsi->fs_func->set_entry_clu0(ep2, fid->start_clu); + } + + if (fsi->vol_type == EXFAT) { + if (update_dir_chksum_with_entry_set(sb, es)) { + ret = -EIO; + goto err_out; + } + release_dentry_set(es); + } else { + if (dcache_modify(sb, sector)) { + ret = -EIO; + goto err_out; + } + } + + fs_sync(sb, 0); + fs_set_vol_flags(sb, VOL_CLEAN); + +err_out: + /* set the size of written bytes */ + if (wcount) + *wcount = write_bytes; + + return ret; +} /* end of fscore_write_link */ + +/* resize the file length */ +s32 fscore_truncate(struct inode *inode, u64 old_size, u64 new_size) +{ + u32 num_clusters_new, num_clusters_da, num_clusters_phys; + u32 last_clu = CLUS_FREE; + u64 sector; + CHAIN_T clu; + TIMESTAMP_T tm; + DENTRY_T *ep, *ep2; + struct super_block *sb = inode->i_sb; + FS_INFO_T *fsi = &(SDFAT_SB(sb)->fsi); + FILE_ID_T *fid = &(SDFAT_I(inode)->fid); + ENTRY_SET_CACHE_T *es = NULL; + s32 evict = (fid->dir.dir == DIR_DELETED) ? 1 : 0; + + /* check if the given file ID is opened */ + if ((fid->type != TYPE_FILE) && (fid->type != TYPE_DIR)) + return -EPERM; + + /* TO CHECK inode type and size */ + MMSG("%s: inode(%p) type(%s) size:%lld->%lld\n", __func__, inode, + (fid->type == TYPE_FILE) ? "file" : "dir", old_size, new_size); + + /* XXX : This is for debugging. */ + + /* It can be when write failed */ +#if 0 + if (fid->size != old_size) { + DMSG("%s: inode(%p) size-mismatch(old:%lld != fid:%lld)\n", + __func__, inode, old_size, fid->size); + WARN_ON(1); + } +#endif + /* + * There is no lock to protect fid->size. + * So, we should get old_size and use it. + */ + if (old_size <= new_size) + return 0; + + fs_set_vol_flags(sb, VOL_DIRTY); + + /* Reserved count update */ + #define num_clusters(v) ((v) ? (u32)(((v) - 1) >> fsi->cluster_size_bits) + 1 : 0) + num_clusters_da = num_clusters(SDFAT_I(inode)->i_size_aligned); + num_clusters_new = num_clusters(i_size_read(inode)); + num_clusters_phys = num_clusters(SDFAT_I(inode)->i_size_ondisk); + + /* num_clusters(i_size_old) should be equal to num_clusters_da */ + BUG_ON((num_clusters(old_size)) != (num_clusters(SDFAT_I(inode)->i_size_aligned))); + + /* for debugging (FIXME: is okay on no-da case?) */ + BUG_ON(num_clusters_da < num_clusters_phys); + + if ((num_clusters_da != num_clusters_phys) && + (num_clusters_new < num_clusters_da)) { + /* Decrement reserved clusters + * n_reserved = num_clusters_da - max(new,phys) + */ + int n_reserved = (num_clusters_new > num_clusters_phys) ? + (num_clusters_da - num_clusters_new) : + (num_clusters_da - num_clusters_phys); + + fsi->reserved_clusters -= n_reserved; + BUG_ON(fsi->reserved_clusters < 0); + } + + clu.dir = fid->start_clu; + /* In no-da case, num_clusters_phys is equal to below value + * clu.size = (u32)((old_size-1) >> fsi->cluster_size_bits) + 1; + */ + clu.size = num_clusters_phys; + clu.flags = fid->flags; + + /* For bigdata */ + sdfat_statistics_set_trunc(clu.flags, &clu); + + if (new_size > 0) { + /* Truncate FAT chain num_clusters after the first cluster + * num_clusters = min(new, phys); + */ + u32 num_clusters = (num_clusters_new < num_clusters_phys) ? + num_clusters_new : num_clusters_phys; + + /* Follow FAT chain + * (defensive coding - works fine even with corrupted FAT table + */ + if (clu.flags == 0x03) { + clu.dir += num_clusters; + clu.size -= num_clusters; +#if 0 + /* extent_get_clus can`t know last_cluster + * when find target cluster in cache. + */ + } else if (fid->type == TYPE_FILE) { + u32 fclus = 0; + s32 err = extent_get_clus(inode, num_clusters, + &fclus, &(clu.dir), &last_clu, 0); + if (err) + return -EIO; + ASSERT(fclus == num_clusters); + + if ((num_clusters > 1) && (last_clu == fid->start_clu)) { + u32 fclus_tmp = 0; + u32 temp = 0; + + err = extent_get_clus(inode, num_clusters - 1, + &fclus_tmp, &last_clu, &temp, 0); + if (err) + return -EIO; + ASSERT(fclus_tmp == (num_clusters - 1)); + } + + num_clusters -= fclus; + clu.size -= fclus; +#endif + } else { + while (num_clusters > 0) { + last_clu = clu.dir; + if (get_next_clus_safe(sb, &(clu.dir))) + return -EIO; + + num_clusters--; + clu.size--; + } + } + + /* Optimization avialable: */ +#if 0 + if (num_clusters_new < num_clusters) { + < loop > + } else { + // num_clusters_new >= num_clusters_phys + // FAT truncation is not necessary + + clu.dir = CLUS_EOF; + clu.size = 0; + } +#endif + } else if (new_size == 0) { + fid->flags = (fsi->vol_type == EXFAT) ? 0x03 : 0x01; + fid->start_clu = CLUS_EOF; + } + fid->size = new_size; + + if (fid->type == TYPE_FILE) + fid->attr |= ATTR_ARCHIVE; + + /* + * clu.dir: free from + * clu.size: # of clusters to free (exFAT, 0x03 only), no fat_free if 0 + * clu.flags: fid->flags (exFAT only) + */ + + /* (1) update the directory entry */ + if (!evict) { + + if (fsi->vol_type == EXFAT) { + es = get_dentry_set_in_dir(sb, &(fid->dir), fid->entry, ES_ALL_ENTRIES, &ep); + if (!es) + return -EIO; + ep2 = ep+1; + } else { + ep = get_dentry_in_dir(sb, &(fid->dir), fid->entry, §or); + if (!ep) + return -EIO; + ep2 = ep; + } + + fsi->fs_func->set_entry_time(ep, tm_now(SDFAT_SB(sb), &tm), TM_MODIFY); + fsi->fs_func->set_entry_attr(ep, fid->attr); + + /* + * if (fsi->vol_type != EXFAT) + * dcache_modify(sb, sector); + */ + + /* File size should be zero if there is no cluster allocated */ + if (IS_CLUS_EOF(fid->start_clu)) + fsi->fs_func->set_entry_size(ep2, 0); + else + fsi->fs_func->set_entry_size(ep2, new_size); + + if (new_size == 0) { + /* Any directory can not be truncated to zero */ + BUG_ON(fid->type != TYPE_FILE); + + fsi->fs_func->set_entry_flag(ep2, 0x01); + fsi->fs_func->set_entry_clu0(ep2, CLUS_FREE); + } + + if (fsi->vol_type == EXFAT) { + if (update_dir_chksum_with_entry_set(sb, es)) + return -EIO; + release_dentry_set(es); + } else { + if (dcache_modify(sb, sector)) + return -EIO; + } + + } /* end of if(fid->dir.dir != DIR_DELETED) */ + + /* (2) cut off from the FAT chain */ + if ((fid->flags == 0x01) && + (!IS_CLUS_FREE(last_clu)) && (!IS_CLUS_EOF(last_clu))) { + if (fat_ent_set(sb, last_clu, CLUS_EOF)) + return -EIO; + } + + /* (3) invalidate cache and free the clusters */ + /* clear extent cache */ + extent_cache_inval_inode(inode); + + /* hint information */ + fid->hint_bmap.off = CLUS_EOF; + fid->hint_bmap.clu = CLUS_EOF; + if (fid->rwoffset > fid->size) + fid->rwoffset = fid->size; + + /* hint_stat will be used if this is directory. */ + fid->hint_stat.eidx = 0; + fid->hint_stat.clu = fid->start_clu; + fid->hint_femp.eidx = -1; + + /* free the clusters */ + if (fsi->fs_func->free_cluster(sb, &clu, evict)) + return -EIO; + + fs_sync(sb, 0); + fs_set_vol_flags(sb, VOL_CLEAN); + + return 0; +} /* end of fscore_truncate */ + +static void update_parent_info(FILE_ID_T *fid, struct inode *parent_inode) +{ + FS_INFO_T *fsi = &(SDFAT_SB(parent_inode->i_sb)->fsi); + FILE_ID_T *parent_fid = &(SDFAT_I(parent_inode)->fid); + + /* + * the problem that FILE_ID_T caches wrong parent info. + * + * because of flag-mismatch of fid->dir, + * there is abnormal traversing cluster chain. + */ + if (unlikely((parent_fid->flags != fid->dir.flags) + || (parent_fid->size != (fid->dir.size<<fsi->cluster_size_bits)) + || (parent_fid->start_clu != fid->dir.dir))) { + + fid->dir.dir = parent_fid->start_clu; + fid->dir.flags = parent_fid->flags; + fid->dir.size = ((parent_fid->size + (fsi->cluster_size-1)) + >> fsi->cluster_size_bits); + } +} + +/* rename or move a old file into a new file */ +s32 fscore_rename(struct inode *old_parent_inode, FILE_ID_T *fid, + struct inode *new_parent_inode, struct dentry *new_dentry) +{ + s32 ret; + s32 dentry; + CHAIN_T olddir, newdir; + CHAIN_T *p_dir = NULL; + UNI_NAME_T uni_name; + DENTRY_T *ep; + struct super_block *sb = old_parent_inode->i_sb; + FS_INFO_T *fsi = &(SDFAT_SB(sb)->fsi); + u8 *new_path = (u8 *) new_dentry->d_name.name; + struct inode *new_inode = new_dentry->d_inode; + int num_entries; + FILE_ID_T *new_fid = NULL; + u32 new_entry_type = TYPE_UNUSED; + s32 new_entry = 0; + + /* check the validity of pointer parameters */ + if ((new_path == NULL) || (strlen(new_path) == 0)) + return -EINVAL; + + if (fid->dir.dir == DIR_DELETED) { + EMSG("%s : abnormal access to deleted source dentry\n", __func__); + return -ENOENT; + } + + /* patch 1.2.4 : the problem that FILE_ID_T caches wrong parent info. */ + update_parent_info(fid, old_parent_inode); + + olddir.dir = fid->dir.dir; + olddir.size = fid->dir.size; + olddir.flags = fid->dir.flags; + + dentry = fid->entry; + + /* check if the old file is "." or ".." */ + if (fsi->vol_type != EXFAT) { + if ((olddir.dir != fsi->root_dir) && (dentry < 2)) + return -EPERM; + } + + ep = get_dentry_in_dir(sb, &olddir, dentry, NULL); + if (!ep) + return -EIO; + +#ifdef CONFIG_SDFAT_CHECK_RO_ATTR + if (fsi->fs_func->get_entry_attr(ep) & ATTR_READONLY) + return -EPERM; +#endif + + /* check whether new dir is existing directory and empty */ + if (new_inode) { + ret = -EIO; + new_fid = &SDFAT_I(new_inode)->fid; + + if (new_fid->dir.dir == DIR_DELETED) { + EMSG("%s : abnormal access to deleted target dentry\n", __func__); + goto out; + } + + /* patch 1.2.4 : + * the problem that FILE_ID_T caches wrong parent info. + * + * FIXME : is needed? + */ + update_parent_info(new_fid, new_parent_inode); + + p_dir = &(new_fid->dir); + new_entry = new_fid->entry; + ep = get_dentry_in_dir(sb, p_dir, new_entry, NULL); + if (!ep) + goto out; + + new_entry_type = fsi->fs_func->get_entry_type(ep); + + /* if new_inode exists, update fid */ + new_fid->size = i_size_read(new_inode); + + if (new_entry_type == TYPE_DIR) { + CHAIN_T new_clu; + + new_clu.dir = new_fid->start_clu; + new_clu.size = ((new_fid->size-1) >> fsi->cluster_size_bits) + 1; + new_clu.flags = new_fid->flags; + + ret = check_dir_empty(sb, &new_clu); + if (ret) + return ret; + } + } + + /* check the validity of directory name in the given new pathname */ + ret = resolve_path(new_parent_inode, new_path, &newdir, &uni_name); + if (ret) + return ret; + + fs_set_vol_flags(sb, VOL_DIRTY); + + if (olddir.dir == newdir.dir) + ret = rename_file(new_parent_inode, &olddir, dentry, &uni_name, fid); + else + ret = move_file(new_parent_inode, &olddir, dentry, &newdir, &uni_name, fid); + + if ((!ret) && new_inode) { + /* delete entries of new_dir */ + ep = get_dentry_in_dir(sb, p_dir, new_entry, NULL); + if (!ep) { + ret = -EIO; + goto del_out; + } + + num_entries = fsi->fs_func->count_ext_entries(sb, p_dir, new_entry, ep); + if (num_entries < 0) { + ret = -EIO; + goto del_out; + } + + + if (fsi->fs_func->delete_dir_entry(sb, p_dir, new_entry, 0, num_entries+1)) { + ret = -EIO; + goto del_out; + } + + /* Free the clusters if new_inode is a dir(as if fscore_rmdir) */ + if (new_entry_type == TYPE_DIR) { + /* new_fid, new_clu_to_free */ + CHAIN_T new_clu_to_free; + + new_clu_to_free.dir = new_fid->start_clu; + new_clu_to_free.size = ((new_fid->size-1) >> fsi->cluster_size_bits) + 1; + new_clu_to_free.flags = new_fid->flags; + + if (fsi->fs_func->free_cluster(sb, &new_clu_to_free, 1)) { + /* just set I/O error only */ + ret = -EIO; + } + + new_fid->size = 0; + new_fid->start_clu = CLUS_EOF; + new_fid->flags = (fsi->vol_type == EXFAT) ? 0x03 : 0x01; + } +del_out: + /* Update new_inode fid + * Prevent syncing removed new_inode + * (new_fid is already initialized above code ("if (new_inode)") + */ + new_fid->dir.dir = DIR_DELETED; + } +out: + fs_sync(sb, 0); + fs_set_vol_flags(sb, VOL_CLEAN); + + return ret; +} /* end of fscore_rename */ + +/* remove a file */ +s32 fscore_remove(struct inode *inode, FILE_ID_T *fid) +{ + s32 ret; + s32 dentry; + CHAIN_T dir, clu_to_free; + DENTRY_T *ep; + struct super_block *sb = inode->i_sb; + FS_INFO_T *fsi = &(SDFAT_SB(sb)->fsi); + + dir.dir = fid->dir.dir; + dir.size = fid->dir.size; + dir.flags = fid->dir.flags; + + dentry = fid->entry; + + if (fid->dir.dir == DIR_DELETED) { + EMSG("%s : abnormal access to deleted dentry\n", __func__); + return -ENOENT; + } + + ep = get_dentry_in_dir(sb, &dir, dentry, NULL); + if (!ep) + return -EIO; + + +#ifdef CONFIG_SDFAT_CHECK_RO_ATTR + if (fsi->fs_func->get_entry_attr(ep) & ATTR_READONLY) + return -EPERM; +#endif + + fs_set_vol_flags(sb, VOL_DIRTY); + + /* (1) update the directory entry */ + ret = remove_file(inode, &dir, dentry); + if (ret) + goto out; + + clu_to_free.dir = fid->start_clu; + clu_to_free.size = ((fid->size-1) >> fsi->cluster_size_bits) + 1; + clu_to_free.flags = fid->flags; + + /* (2) invalidate extent cache and free the clusters + */ + /* clear extent cache */ + extent_cache_inval_inode(inode); + ret = fsi->fs_func->free_cluster(sb, &clu_to_free, 0); + /* WARN : DO NOT RETURN ERROR IN HERE */ + + /* (3) update FILE_ID_T */ + fid->size = 0; + fid->start_clu = CLUS_EOF; + fid->flags = (fsi->vol_type == EXFAT) ? 0x03 : 0x01; + fid->dir.dir = DIR_DELETED; + + fs_sync(sb, 0); + fs_set_vol_flags(sb, VOL_CLEAN); +out: + return ret; +} /* end of fscore_remove */ + + +/* + * Get the information of a given file + * REMARK : This function does not need any file name on linux + * + * info.Size means the value saved on disk. + * But root directory doesn`t have real dentry, + * so the size of root directory returns calculated one exceptively. + */ +s32 fscore_read_inode(struct inode *inode, DIR_ENTRY_T *info) +{ + u64 sector; + s32 count; + CHAIN_T dir; + TIMESTAMP_T tm; + DENTRY_T *ep, *ep2; + struct super_block *sb = inode->i_sb; + FS_INFO_T *fsi = &(SDFAT_SB(sb)->fsi); + FILE_ID_T *fid = &(SDFAT_I(inode)->fid); + ENTRY_SET_CACHE_T *es = NULL; + u8 is_dir = (fid->type == TYPE_DIR) ? 1 : 0; + + TMSG("%s entered\n", __func__); + + extent_cache_init_inode(inode); + + /* if root directory */ + if (is_dir && (fid->dir.dir == fsi->root_dir) && (fid->entry == -1)) { + info->Attr = ATTR_SUBDIR; + memset((s8 *) &info->CreateTimestamp, 0, sizeof(DATE_TIME_T)); + memset((s8 *) &info->ModifyTimestamp, 0, sizeof(DATE_TIME_T)); + memset((s8 *) &info->AccessTimestamp, 0, sizeof(DATE_TIME_T)); + //strcpy(info->NameBuf.sfn, "."); + //strcpy(info->NameBuf.lfn, "."); + + dir.dir = fsi->root_dir; + dir.flags = 0x01; + dir.size = 0; /* UNUSED */ + + /* FAT16 root_dir */ + if (IS_CLUS_FREE(fsi->root_dir)) { + info->Size = fsi->dentries_in_root << DENTRY_SIZE_BITS; + } else { + u32 num_clu; + + if (__count_num_clusters(sb, &dir, &num_clu)) + return -EIO; + info->Size = (u64)num_clu << fsi->cluster_size_bits; + } + + count = __count_dos_name_entries(sb, &dir, TYPE_DIR, NULL); + if (count < 0) + return -EIO; + info->NumSubdirs = count; + + return 0; + } + + /* get the directory entry of given file or directory */ + if (fsi->vol_type == EXFAT) { + /* es should be released */ + es = get_dentry_set_in_dir(sb, &(fid->dir), fid->entry, ES_2_ENTRIES, &ep); + if (!es) + return -EIO; + ep2 = ep+1; + } else { + ep = get_dentry_in_dir(sb, &(fid->dir), fid->entry, §or); + if (!ep) + return -EIO; + ep2 = ep; + /* dcache should be unlocked */ + dcache_lock(sb, sector); + } + + /* set FILE_INFO structure using the acquired DENTRY_T */ + info->Attr = fsi->fs_func->get_entry_attr(ep); + + fsi->fs_func->get_entry_time(ep, &tm, TM_CREATE); + info->CreateTimestamp.Year = tm.year; + info->CreateTimestamp.Month = tm.mon; + info->CreateTimestamp.Day = tm.day; + info->CreateTimestamp.Hour = tm.hour; + info->CreateTimestamp.Minute = tm.min; + info->CreateTimestamp.Second = tm.sec; + info->CreateTimestamp.MilliSecond = 0; + info->CreateTimestamp.Timezone.value = tm.tz.value; + + fsi->fs_func->get_entry_time(ep, &tm, TM_MODIFY); + info->ModifyTimestamp.Year = tm.year; + info->ModifyTimestamp.Month = tm.mon; + info->ModifyTimestamp.Day = tm.day; + info->ModifyTimestamp.Hour = tm.hour; + info->ModifyTimestamp.Minute = tm.min; + info->ModifyTimestamp.Second = tm.sec; + info->ModifyTimestamp.MilliSecond = 0; + info->ModifyTimestamp.Timezone.value = tm.tz.value; + + memset((s8 *) &info->AccessTimestamp, 0, sizeof(DATE_TIME_T)); + + info->NumSubdirs = 0; + info->Size = fsi->fs_func->get_entry_size(ep2); + + if (fsi->vol_type == EXFAT) + release_dentry_set(es); + else + dcache_unlock(sb, sector); + + if (is_dir) { + u32 dotcnt = 0; + + dir.dir = fid->start_clu; + dir.flags = fid->flags; + dir.size = fid->size >> fsi->cluster_size_bits; + /* + * NOTE : + * If "dir.flags" has 0x01, "dir.size" is meaningless. + */ +#if 0 + if (info->Size == 0) { + s32 num_clu; + + if (__count_num_clusters(sb, &dir, &num_clu)) + return -EIO; + info->Size = (u64)num_clu << fsi->cluster_size_bits; + } +#endif + count = __count_dos_name_entries(sb, &dir, TYPE_DIR, &dotcnt); + if (count < 0) + return -EIO; + + if (fsi->vol_type == EXFAT) { + count += SDFAT_MIN_SUBDIR; + } else { + /* + * if directory has been corrupted, + * we have to adjust subdir count. + */ + BUG_ON(dotcnt > SDFAT_MIN_SUBDIR); + if (dotcnt < SDFAT_MIN_SUBDIR) { + EMSG("%s: contents of the directory has been " + "corrupted (parent clus : %08x, idx : %d)", + __func__, fid->dir.dir, fid->entry); + } + count += (SDFAT_MIN_SUBDIR - dotcnt); + } + info->NumSubdirs = count; + } + + TMSG("%s exited successfully\n", __func__); + return 0; +} /* end of fscore_read_inode */ + +/* set the information of a given file + * REMARK : This function does not need any file name on linux + */ +s32 fscore_write_inode(struct inode *inode, DIR_ENTRY_T *info, s32 sync) +{ + s32 ret = -EIO; + u64 sector; + TIMESTAMP_T tm; + DENTRY_T *ep, *ep2; + ENTRY_SET_CACHE_T *es = NULL; + struct super_block *sb = inode->i_sb; + FS_INFO_T *fsi = &(SDFAT_SB(sb)->fsi); + FILE_ID_T *fid = &(SDFAT_I(inode)->fid); + u8 is_dir = (fid->type == TYPE_DIR) ? 1 : 0; + + + /* SKIP WRITING INODE : + * if the indoe is already unlinked, + * there is no need for updating inode + */ + if (fid->dir.dir == DIR_DELETED) + return 0; + + if (is_dir && (fid->dir.dir == fsi->root_dir) && (fid->entry == -1)) + return 0; + + fs_set_vol_flags(sb, VOL_DIRTY); + + /* get the directory entry of given file or directory */ + if (fsi->vol_type == EXFAT) { + es = get_dentry_set_in_dir(sb, &(fid->dir), fid->entry, ES_ALL_ENTRIES, &ep); + if (!es) + return -EIO; + ep2 = ep+1; + } else { + /* for other than exfat */ + ep = get_dentry_in_dir(sb, &(fid->dir), fid->entry, §or); + if (!ep) + return -EIO; + ep2 = ep; + } + + + fsi->fs_func->set_entry_attr(ep, info->Attr); + + /* set FILE_INFO structure using the acquired DENTRY_T */ + tm.tz = info->CreateTimestamp.Timezone; + tm.sec = info->CreateTimestamp.Second; + tm.min = info->CreateTimestamp.Minute; + tm.hour = info->CreateTimestamp.Hour; + tm.day = info->CreateTimestamp.Day; + tm.mon = info->CreateTimestamp.Month; + tm.year = info->CreateTimestamp.Year; + fsi->fs_func->set_entry_time(ep, &tm, TM_CREATE); + + tm.tz = info->ModifyTimestamp.Timezone; + tm.sec = info->ModifyTimestamp.Second; + tm.min = info->ModifyTimestamp.Minute; + tm.hour = info->ModifyTimestamp.Hour; + tm.day = info->ModifyTimestamp.Day; + tm.mon = info->ModifyTimestamp.Month; + tm.year = info->ModifyTimestamp.Year; + fsi->fs_func->set_entry_time(ep, &tm, TM_MODIFY); + + if (is_dir && fsi->vol_type != EXFAT) { + /* overwirte dirsize if FAT32 and dir size != 0 */ + if (fsi->fs_func->get_entry_size(ep2)) + fsi->fs_func->set_entry_size(ep2, 0); + } else { + /* File size should be zero if there is no cluster allocated */ + u64 on_disk_size = info->Size; + + if (IS_CLUS_EOF(fid->start_clu)) + on_disk_size = 0; + + fsi->fs_func->set_entry_size(ep2, on_disk_size); + } + + if (fsi->vol_type == EXFAT) { + ret = update_dir_chksum_with_entry_set(sb, es); + release_dentry_set(es); + } else { + ret = dcache_modify(sb, sector); + } + + fs_sync(sb, sync); + /* Comment below code to prevent super block update frequently */ + //fs_set_vol_flags(sb, VOL_CLEAN); + + return ret; +} /* end of fscore_write_inode */ + + +/* + * Input: inode, (logical) clu_offset, target allocation area + * Output: errcode, cluster number + * *clu = (~0), if it's unable to allocate a new cluster + */ +s32 fscore_map_clus(struct inode *inode, u32 clu_offset, u32 *clu, int dest) +{ + s32 ret, modified = false; + u32 last_clu; + u64 sector; + CHAIN_T new_clu; + DENTRY_T *ep; + ENTRY_SET_CACHE_T *es = NULL; + struct super_block *sb = inode->i_sb; + FS_INFO_T *fsi = &(SDFAT_SB(sb)->fsi); + FILE_ID_T *fid = &(SDFAT_I(inode)->fid); + u32 local_clu_offset = clu_offset; + s32 reserved_clusters = fsi->reserved_clusters; + u32 num_to_be_allocated = 0, num_clusters = 0; + + fid->rwoffset = (s64)(clu_offset) << fsi->cluster_size_bits; + + if (SDFAT_I(inode)->i_size_ondisk > 0) + num_clusters = (u32)((SDFAT_I(inode)->i_size_ondisk-1) >> fsi->cluster_size_bits) + 1; + + if (clu_offset >= num_clusters) + num_to_be_allocated = clu_offset - num_clusters + 1; + + if ((dest == ALLOC_NOWHERE) && (num_to_be_allocated > 0)) { + *clu = CLUS_EOF; + return 0; + } + + /* check always request cluster is 1 */ + //ASSERT(num_to_be_allocated == 1); + + sdfat_debug_check_clusters(inode); + + *clu = last_clu = fid->start_clu; + + /* XXX: Defensive code needed. + * what if i_size_ondisk != # of allocated clusters + */ + if (fid->flags == 0x03) { + if ((clu_offset > 0) && (!IS_CLUS_EOF(*clu))) { + last_clu += clu_offset - 1; + + if (clu_offset == num_clusters) + *clu = CLUS_EOF; + else + *clu += clu_offset; + } + } else if (fid->type == TYPE_FILE) { + u32 fclus = 0; + s32 err = extent_get_clus(inode, clu_offset, + &fclus, clu, &last_clu, 1); + if (err) + return -EIO; + + clu_offset -= fclus; + } else { + /* hint information */ + if ((clu_offset > 0) && + ((fid->hint_bmap.off != CLUS_EOF) && (fid->hint_bmap.off > 0)) && + (clu_offset >= fid->hint_bmap.off)) { + clu_offset -= fid->hint_bmap.off; + /* hint_bmap.clu should be valid */ + ASSERT(fid->hint_bmap.clu >= 2); + *clu = fid->hint_bmap.clu; + } + + while ((clu_offset > 0) && (!IS_CLUS_EOF(*clu))) { + last_clu = *clu; + if (get_next_clus_safe(sb, clu)) + return -EIO; + clu_offset--; + } + } + + if (IS_CLUS_EOF(*clu)) { + fs_set_vol_flags(sb, VOL_DIRTY); + + new_clu.dir = (IS_CLUS_EOF(last_clu)) ? CLUS_EOF : last_clu + 1; + new_clu.size = 0; + new_clu.flags = fid->flags; + + /* (1) allocate a cluster */ + if (num_to_be_allocated < 1) { + /* Broken FAT (i_sze > allocated FAT) */ + EMSG("%s: invalid fat chain : inode(%p) " + "num_to_be_allocated(%d) " + "i_size_ondisk(%lld) fid->flags(%02x) " + "fid->start(%08x) fid->hint_off(%u) " + "fid->hint_clu(%u) fid->rwoffset(%llu) " + "modified_clu_off(%d) last_clu(%08x) " + "new_clu(%08x)", __func__, inode, + num_to_be_allocated, + (SDFAT_I(inode)->i_size_ondisk), + fid->flags, fid->start_clu, + fid->hint_bmap.off, fid->hint_bmap.clu, + fid->rwoffset, clu_offset, + last_clu, new_clu.dir); + sdfat_fs_error(sb, "broken FAT chain."); + return -EIO; + } + + ret = fsi->fs_func->alloc_cluster(sb, num_to_be_allocated, &new_clu, ALLOC_COLD); + if (ret) + return ret; + + if (IS_CLUS_EOF(new_clu.dir) || IS_CLUS_FREE(new_clu.dir)) { + sdfat_fs_error(sb, "bogus cluster new allocated" + "(last_clu : %u, new_clu : %u)", + last_clu, new_clu.dir); + ASSERT(0); + return -EIO; + } + + /* Reserved cluster dec. */ + // XXX: Inode DA flag needed + if (SDFAT_SB(sb)->options.improved_allocation & SDFAT_ALLOC_DELAY) { + BUG_ON(reserved_clusters < num_to_be_allocated); + reserved_clusters -= num_to_be_allocated; + + } + + /* (2) append to the FAT chain */ + if (IS_CLUS_EOF(last_clu)) { + if (new_clu.flags == 0x01) + fid->flags = 0x01; + fid->start_clu = new_clu.dir; + modified = true; + } else { + if (new_clu.flags != fid->flags) { + /* no-fat-chain bit is disabled, + * so fat-chain should be synced with alloc-bmp + */ + chain_cont_cluster(sb, fid->start_clu, num_clusters); + fid->flags = 0x01; + modified = true; + } + if (new_clu.flags == 0x01) + if (fat_ent_set(sb, last_clu, new_clu.dir)) + return -EIO; + } + + num_clusters += num_to_be_allocated; + *clu = new_clu.dir; + + if (fid->dir.dir != DIR_DELETED) { + + if (fsi->vol_type == EXFAT) { + es = get_dentry_set_in_dir(sb, &(fid->dir), fid->entry, ES_ALL_ENTRIES, &ep); + if (!es) + return -EIO; + /* get stream entry */ + ep++; + } + + /* (3) update directory entry */ + if (modified) { + if (fsi->vol_type != EXFAT) { + ep = get_dentry_in_dir(sb, &(fid->dir), fid->entry, §or); + if (!ep) + return -EIO; + } + + if (fsi->fs_func->get_entry_flag(ep) != fid->flags) + fsi->fs_func->set_entry_flag(ep, fid->flags); + + if (fsi->fs_func->get_entry_clu0(ep) != fid->start_clu) + fsi->fs_func->set_entry_clu0(ep, fid->start_clu); + + fsi->fs_func->set_entry_size(ep, fid->size); + + if (fsi->vol_type != EXFAT) { + if (dcache_modify(sb, sector)) + return -EIO; + } + } + + if (fsi->vol_type == EXFAT) { + if (update_dir_chksum_with_entry_set(sb, es)) + return -EIO; + release_dentry_set(es); + } + + } /* end of if != DIR_DELETED */ + + + /* add number of new blocks to inode (non-DA only) */ + if (!(SDFAT_SB(sb)->options.improved_allocation & SDFAT_ALLOC_DELAY)) { + inode->i_blocks += num_to_be_allocated << (fsi->cluster_size_bits - sb->s_blocksize_bits); + } else { + // DA의 경우, i_blocks가 이미 증가해있어야 함. + BUG_ON(clu_offset >= (inode->i_blocks >> (fsi->cluster_size_bits - sb->s_blocksize_bits))); + } +#if 0 + fs_sync(sb, 0); + fs_set_vol_flags(sb, VOL_CLEAN); +#endif + /* (4) Move *clu pointer along FAT chains (hole care) + * because the caller of this function expect *clu to be the last cluster. + * This only works when num_to_be_allocated >= 2, + * *clu = (the first cluster of the allocated chain) => (the last cluster of ...) + */ + if (fid->flags == 0x03) { + *clu += num_to_be_allocated - 1; + } else { + while (num_to_be_allocated > 1) { + if (get_next_clus_safe(sb, clu)) + return -EIO; + num_to_be_allocated--; + } + } + + } + + /* update reserved_clusters */ + fsi->reserved_clusters = reserved_clusters; + + /* hint information */ + fid->hint_bmap.off = local_clu_offset; + fid->hint_bmap.clu = *clu; + + return 0; +} /* end of fscore_map_clus */ + +/* allocate reserved cluster */ +s32 fscore_reserve_clus(struct inode *inode) +{ + struct super_block *sb = inode->i_sb; + FS_INFO_T *fsi = &(SDFAT_SB(sb)->fsi); + + if ((fsi->used_clusters + fsi->reserved_clusters) >= (fsi->num_clusters - 2)) + return -ENOSPC; + + if (bdev_check_bdi_valid(sb)) + return -EIO; + + fsi->reserved_clusters++; + + /* inode->i_blocks update */ + inode->i_blocks += 1 << (fsi->cluster_size_bits - sb->s_blocksize_bits); + + sdfat_debug_check_clusters(inode); + + return 0; +} + +/* remove an entry, BUT don't truncate */ +s32 fscore_unlink(struct inode *inode, FILE_ID_T *fid) +{ + s32 dentry; + CHAIN_T dir; + DENTRY_T *ep; + struct super_block *sb = inode->i_sb; + + dir.dir = fid->dir.dir; + dir.size = fid->dir.size; + dir.flags = fid->dir.flags; + + dentry = fid->entry; + + if (fid->dir.dir == DIR_DELETED) { + EMSG("%s : abnormal access to deleted dentry\n", __func__); + return -ENOENT; + } + + ep = get_dentry_in_dir(sb, &dir, dentry, NULL); + if (!ep) + return -EIO; + +#ifdef CONFIG_SDFAT_CHECK_RO_ATTR + if (SDFAT_SB(sb)->fsi.fs_func->get_entry_attr(ep) & ATTR_READONLY) + return -EPERM; +#endif + + fs_set_vol_flags(sb, VOL_DIRTY); + + /* (1) update the directory entry */ + if (remove_file(inode, &dir, dentry)) + return -EIO; + + /* This doesn't modify fid */ + fid->dir.dir = DIR_DELETED; + + fs_sync(sb, 0); + fs_set_vol_flags(sb, VOL_CLEAN); + + return 0; +} + +/*----------------------------------------------------------------------*/ +/* Directory Operation Functions */ +/*----------------------------------------------------------------------*/ + +/* create a directory */ +s32 fscore_mkdir(struct inode *inode, u8 *path, FILE_ID_T *fid) +{ + s32 ret/*, dentry*/; + CHAIN_T dir; + UNI_NAME_T uni_name; + struct super_block *sb = inode->i_sb; + + TMSG("%s entered\n", __func__); + + /* check the validity of directory name in the given old pathname */ + ret = resolve_path(inode, path, &dir, &uni_name); + if (ret) + goto out; + + fs_set_vol_flags(sb, VOL_DIRTY); + + ret = create_dir(inode, &dir, &uni_name, fid); + + fs_sync(sb, 0); + fs_set_vol_flags(sb, VOL_CLEAN); +out: + TMSG("%s exited with err(%d)\n", __func__, ret); + return ret; +} + +/* read a directory entry from the opened directory */ +s32 fscore_readdir(struct inode *inode, DIR_ENTRY_T *dir_entry) +{ + s32 i; + s32 dentries_per_clu, dentries_per_clu_bits = 0; + u32 type, clu_offset; + u64 sector; + CHAIN_T dir, clu; + UNI_NAME_T uni_name; + TIMESTAMP_T tm; + DENTRY_T *ep; + struct super_block *sb = inode->i_sb; + FS_INFO_T *fsi = &(SDFAT_SB(sb)->fsi); + FILE_ID_T *fid = &(SDFAT_I(inode)->fid); + u32 dentry = (u32)(fid->rwoffset & 0xFFFFFFFF); /* u32 is enough for directory */ + + /* check if the given file ID is opened */ + if (fid->type != TYPE_DIR) + return -EPERM; + + if (fid->entry == -1) { + dir.dir = fsi->root_dir; + dir.size = 0; /* just initialize, but will not use */ + dir.flags = 0x01; + } else { + dir.dir = fid->start_clu; + dir.size = fid->size >> fsi->cluster_size_bits; + dir.flags = fid->flags; + sdfat_debug_bug_on(dentry >= (dir.size * fsi->dentries_per_clu)); + } + + if (IS_CLUS_FREE(dir.dir)) { /* FAT16 root_dir */ + dentries_per_clu = fsi->dentries_in_root; + + /* Prevent readdir over directory size */ + if (dentry >= dentries_per_clu) { + clu.dir = CLUS_EOF; + } else { + clu.dir = dir.dir; + clu.size = dir.size; + clu.flags = dir.flags; + } + } else { + dentries_per_clu = fsi->dentries_per_clu; + dentries_per_clu_bits = ilog2(dentries_per_clu); + + clu_offset = dentry >> dentries_per_clu_bits; + clu.dir = dir.dir; + clu.size = dir.size; + clu.flags = dir.flags; + + if (clu.flags == 0x03) { + clu.dir += clu_offset; + clu.size -= clu_offset; + } else { + /* hint_information */ + if ((clu_offset > 0) && + ((fid->hint_bmap.off != CLUS_EOF) && (fid->hint_bmap.off > 0)) && + (clu_offset >= fid->hint_bmap.off)) { + clu_offset -= fid->hint_bmap.off; + clu.dir = fid->hint_bmap.clu; + } + + while (clu_offset > 0) { + if (get_next_clus_safe(sb, &(clu.dir))) + return -EIO; + + clu_offset--; + } + } + } + + while (!IS_CLUS_EOF(clu.dir)) { + if (IS_CLUS_FREE(dir.dir)) /* FAT16 root_dir */ + i = dentry % dentries_per_clu; + else + i = dentry & (dentries_per_clu-1); + + for ( ; i < dentries_per_clu; i++, dentry++) { + ep = get_dentry_in_dir(sb, &clu, i, §or); + if (!ep) + return -EIO; + + type = fsi->fs_func->get_entry_type(ep); + + if (type == TYPE_UNUSED) + break; + + if ((type != TYPE_FILE) && (type != TYPE_DIR)) + continue; + + dcache_lock(sb, sector); + dir_entry->Attr = fsi->fs_func->get_entry_attr(ep); + + fsi->fs_func->get_entry_time(ep, &tm, TM_CREATE); + dir_entry->CreateTimestamp.Year = tm.year; + dir_entry->CreateTimestamp.Month = tm.mon; + dir_entry->CreateTimestamp.Day = tm.day; + dir_entry->CreateTimestamp.Hour = tm.hour; + dir_entry->CreateTimestamp.Minute = tm.min; + dir_entry->CreateTimestamp.Second = tm.sec; + dir_entry->CreateTimestamp.MilliSecond = 0; + + fsi->fs_func->get_entry_time(ep, &tm, TM_MODIFY); + dir_entry->ModifyTimestamp.Year = tm.year; + dir_entry->ModifyTimestamp.Month = tm.mon; + dir_entry->ModifyTimestamp.Day = tm.day; + dir_entry->ModifyTimestamp.Hour = tm.hour; + dir_entry->ModifyTimestamp.Minute = tm.min; + dir_entry->ModifyTimestamp.Second = tm.sec; + dir_entry->ModifyTimestamp.MilliSecond = 0; + + memset((s8 *) &dir_entry->AccessTimestamp, 0, sizeof(DATE_TIME_T)); + + *(uni_name.name) = 0x0; + fsi->fs_func->get_uniname_from_ext_entry(sb, &dir, dentry, uni_name.name); + if (*(uni_name.name) == 0x0) + get_uniname_from_dos_entry(sb, (DOS_DENTRY_T *) ep, &uni_name, 0x1); + nls_uni16s_to_vfsname(sb, &uni_name, + dir_entry->NameBuf.lfn, + dir_entry->NameBuf.lfnbuf_len); + dcache_unlock(sb, sector); + + if (fsi->vol_type == EXFAT) { + ep = get_dentry_in_dir(sb, &clu, i+1, NULL); + if (!ep) + return -EIO; + } else { + get_uniname_from_dos_entry(sb, (DOS_DENTRY_T *) ep, &uni_name, 0x0); + nls_uni16s_to_vfsname(sb, &uni_name, + dir_entry->NameBuf.sfn, + dir_entry->NameBuf.sfnbuf_len); + } + + dir_entry->Size = fsi->fs_func->get_entry_size(ep); + + /* + * Update hint information : + * fat16 root directory does not need it. + */ + if (!IS_CLUS_FREE(dir.dir)) { + fid->hint_bmap.off = dentry >> dentries_per_clu_bits; + fid->hint_bmap.clu = clu.dir; + } + + fid->rwoffset = (s64) ++dentry; + + return 0; + } + + /* fat16 root directory */ + if (IS_CLUS_FREE(dir.dir)) + break; + + if (clu.flags == 0x03) { + if ((--clu.size) > 0) + clu.dir++; + else + clu.dir = CLUS_EOF; + } else { + if (get_next_clus_safe(sb, &(clu.dir))) + return -EIO; + } + } + + dir_entry->NameBuf.lfn[0] = '\0'; + + fid->rwoffset = (s64)dentry; + + return 0; +} /* end of fscore_readdir */ + +/* remove a directory */ +s32 fscore_rmdir(struct inode *inode, FILE_ID_T *fid) +{ + s32 ret; + s32 dentry; + DENTRY_T *ep; + CHAIN_T dir, clu_to_free; + struct super_block *sb = inode->i_sb; + FS_INFO_T *fsi = &(SDFAT_SB(sb)->fsi); + + dir.dir = fid->dir.dir; + dir.size = fid->dir.size; + dir.flags = fid->dir.flags; + + dentry = fid->entry; + + if (fid->dir.dir == DIR_DELETED) { + EMSG("%s : abnormal access to deleted dentry\n", __func__); + return -ENOENT; + } + + /* check if the file is "." or ".." */ + if (fsi->vol_type != EXFAT) { + if ((dir.dir != fsi->root_dir) && (dentry < 2)) + return -EPERM; + } + + ep = get_dentry_in_dir(sb, &dir, dentry, NULL); + if (!ep) + return -EIO; + +#ifdef CONFIG_SDFAT_CHECK_RO_ATTR + if (SDFAT_SB(sb)->fsi.fs_func->get_entry_attr(ep) & ATTR_READONLY) + return -EPERM; +#endif + + clu_to_free.dir = fid->start_clu; + clu_to_free.size = ((fid->size-1) >> fsi->cluster_size_bits) + 1; + clu_to_free.flags = fid->flags; + + ret = check_dir_empty(sb, &clu_to_free); + if (ret) { + if (ret == -EIO) + EMSG("%s : failed to check_dir_empty : err(%d)\n", + __func__, ret); + return ret; + } + + fs_set_vol_flags(sb, VOL_DIRTY); + + /* (1) update the directory entry */ + ret = remove_file(inode, &dir, dentry); + if (ret) { + EMSG("%s : failed to remove_file : err(%d)\n", __func__, ret); + return ret; + } + + fid->dir.dir = DIR_DELETED; + + fs_sync(sb, 0); + fs_set_vol_flags(sb, VOL_CLEAN); + + return ret; +} /* end of fscore_rmdir */ + +/* end of core.c */ diff --git a/fs/sdfat/core.h b/fs/sdfat/core.h new file mode 100644 index 000000000000..1f8ed5a28ef3 --- /dev/null +++ b/fs/sdfat/core.h @@ -0,0 +1,221 @@ +/* + * Copyright (C) 2012-2013 Samsung Electronics Co., Ltd. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version 2 + * of the License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, see <http://www.gnu.org/licenses/>. + */ + +#ifndef _SDFAT_CORE_H +#define _SDFAT_CORE_H + +#include <asm/byteorder.h> + +#include "config.h" +#include "api.h" +#include "upcase.h" + +#ifdef __cplusplus +extern "C" { +#endif /* __cplusplus */ + +/*----------------------------------------------------------------------*/ +/* Constant & Macro Definitions */ +/*----------------------------------------------------------------------*/ +#define get_next_clus(sb, pclu) fat_ent_get(sb, *(pclu), pclu) +#define get_next_clus_safe(sb, pclu) fat_ent_get_safe(sb, *(pclu), pclu) + +/* file status */ +/* this prevents + * fscore_write_inode, fscore_map_clus, ... with the unlinked inodes + * from corrupting on-disk dentry data. + * + * The fid->dir value of unlinked inode will be DIR_DELETED + * and those functions must check if fid->dir is valid prior to + * the calling of get_dentry_in_dir() + */ +#define DIR_DELETED 0xFFFF0321 + +/*----------------------------------------------------------------------*/ +/* Type Definitions */ +/*----------------------------------------------------------------------*/ +#define ES_2_ENTRIES 2 +#define ES_3_ENTRIES 3 +#define ES_ALL_ENTRIES 0 + +typedef struct { + u64 sector; // sector number that contains file_entry + u32 offset; // byte offset in the sector + s32 alloc_flag; // flag in stream entry. 01 for cluster chain, 03 for contig. clusters. + u32 num_entries; + void *__buf; // __buf should be the last member +} ENTRY_SET_CACHE_T; + + + +/*----------------------------------------------------------------------*/ +/* External Function Declarations */ +/*----------------------------------------------------------------------*/ + +/* file system initialization & shutdown functions */ +s32 fscore_init(void); +s32 fscore_shutdown(void); + +/* bdev management */ +s32 fscore_check_bdi_valid(struct super_block *sb); + +/* chain management */ +s32 chain_cont_cluster(struct super_block *sb, u32 chain, u32 len); + +/* volume management functions */ +s32 fscore_mount(struct super_block *sb); +s32 fscore_umount(struct super_block *sb); +s32 fscore_statfs(struct super_block *sb, VOL_INFO_T *info); +s32 fscore_sync_fs(struct super_block *sb, s32 do_sync); +s32 fscore_set_vol_flags(struct super_block *sb, u16 new_flag, s32 always_sync); +u32 fscore_get_au_stat(struct super_block *sb, s32 mode); + +/* file management functions */ +s32 fscore_lookup(struct inode *inode, u8 *path, FILE_ID_T *fid); +s32 fscore_create(struct inode *inode, u8 *path, u8 mode, FILE_ID_T *fid); +s32 fscore_read_link(struct inode *inode, FILE_ID_T *fid, void *buffer, u64 count, u64 *rcount); +s32 fscore_write_link(struct inode *inode, FILE_ID_T *fid, void *buffer, u64 count, u64 *wcount); +s32 fscore_truncate(struct inode *inode, u64 old_size, u64 new_size); +s32 fscore_rename(struct inode *old_parent_inode, FILE_ID_T *fid, + struct inode *new_parent_inode, struct dentry *new_dentry); +s32 fscore_remove(struct inode *inode, FILE_ID_T *fid); +s32 fscore_read_inode(struct inode *inode, DIR_ENTRY_T *info); +s32 fscore_write_inode(struct inode *inode, DIR_ENTRY_T *info, int sync); +s32 fscore_map_clus(struct inode *inode, u32 clu_offset, u32 *clu, int dest); +s32 fscore_reserve_clus(struct inode *inode); +s32 fscore_unlink(struct inode *inode, FILE_ID_T *fid); + +/* directory management functions */ +s32 fscore_mkdir(struct inode *inode, u8 *path, FILE_ID_T *fid); +s32 fscore_readdir(struct inode *inode, DIR_ENTRY_T *dir_ent); +s32 fscore_rmdir(struct inode *inode, FILE_ID_T *fid); + + +/*----------------------------------------------------------------------*/ +/* External Function Declarations (NOT TO UPPER LAYER) */ +/*----------------------------------------------------------------------*/ + +/* core.c : core code for common */ +/* dir entry management functions */ +DENTRY_T *get_dentry_in_dir(struct super_block *sb, CHAIN_T *p_dir, s32 entry, u64 *sector); + +/* name conversion functions */ +void get_uniname_from_dos_entry(struct super_block *sb, DOS_DENTRY_T *ep, UNI_NAME_T *p_uniname, u8 mode); + +/* file operation functions */ +s32 walk_fat_chain(struct super_block *sb, CHAIN_T *p_dir, u32 byte_offset, u32 *clu); + +/* sdfat/cache.c */ +s32 meta_cache_init(struct super_block *sb); +s32 meta_cache_shutdown(struct super_block *sb); +u8 *fcache_getblk(struct super_block *sb, u64 sec); +s32 fcache_modify(struct super_block *sb, u64 sec); +s32 fcache_release_all(struct super_block *sb); +s32 fcache_flush(struct super_block *sb, u32 sync); + +u8 *dcache_getblk(struct super_block *sb, u64 sec); +s32 dcache_modify(struct super_block *sb, u64 sec); +s32 dcache_lock(struct super_block *sb, u64 sec); +s32 dcache_unlock(struct super_block *sb, u64 sec); +s32 dcache_release(struct super_block *sb, u64 sec); +s32 dcache_release_all(struct super_block *sb); +s32 dcache_flush(struct super_block *sb, u32 sync); +s32 dcache_readahead(struct super_block *sb, u64 sec); + + +/* fatent.c */ +s32 fat_ent_ops_init(struct super_block *sb); +s32 fat_ent_get(struct super_block *sb, u32 loc, u32 *content); +s32 fat_ent_set(struct super_block *sb, u32 loc, u32 content); +s32 fat_ent_get_safe(struct super_block *sb, u32 loc, u32 *content); + +/* core_fat.c : core code for fat */ +s32 fat_generate_dos_name_new(struct super_block *sb, CHAIN_T *p_dir, DOS_NAME_T *p_dosname, s32 n_entries); +s32 mount_fat16(struct super_block *sb, pbr_t *p_pbr); +s32 mount_fat32(struct super_block *sb, pbr_t *p_pbr); + +/* core_exfat.c : core code for exfat */ + +s32 load_alloc_bmp(struct super_block *sb); +void free_alloc_bmp(struct super_block *sb); +ENTRY_SET_CACHE_T *get_dentry_set_in_dir(struct super_block *sb, + CHAIN_T *p_dir, s32 entry, u32 type, DENTRY_T **file_ep); +void release_dentry_set(ENTRY_SET_CACHE_T *es); +s32 update_dir_chksum(struct super_block *sb, CHAIN_T *p_dir, s32 entry); +s32 update_dir_chksum_with_entry_set(struct super_block *sb, ENTRY_SET_CACHE_T *es); +bool is_dir_empty(struct super_block *sb, CHAIN_T *p_dir); +s32 mount_exfat(struct super_block *sb, pbr_t *p_pbr); + +/* amap_smart.c : creation on mount / destroy on umount */ +int amap_create(struct super_block *sb, u32 pack_ratio, u32 sect_per_au, u32 hidden_sect); +void amap_destroy(struct super_block *sb); + +/* amap_smart.c : (de)allocation functions */ +s32 amap_fat_alloc_cluster(struct super_block *sb, u32 num_alloc, CHAIN_T *p_chain, s32 dest); +s32 amap_free_cluster(struct super_block *sb, CHAIN_T *p_chain, s32 do_relse);/* Not impelmented */ +s32 amap_release_cluster(struct super_block *sb, u32 clu); /* Only update AMAP */ + +/* amap_smart.c : misc (for defrag) */ +s32 amap_mark_ignore(struct super_block *sb, u32 clu); +s32 amap_unmark_ignore(struct super_block *sb, u32 clu); +s32 amap_unmark_ignore_all(struct super_block *sb); +s32 amap_check_working(struct super_block *sb, u32 clu); +s32 amap_get_freeclus(struct super_block *sb, u32 clu); + +/* amap_smart.c : stat AU */ +u32 amap_get_au_stat(struct super_block *sb, s32 mode); + + +/* blkdev.c */ +s32 bdev_open_dev(struct super_block *sb); +s32 bdev_close_dev(struct super_block *sb); +s32 bdev_check_bdi_valid(struct super_block *sb); +s32 bdev_readahead(struct super_block *sb, u64 secno, u64 num_secs); +s32 bdev_mread(struct super_block *sb, u64 secno, struct buffer_head **bh, u64 num_secs, s32 read); +s32 bdev_mwrite(struct super_block *sb, u64 secno, struct buffer_head *bh, u64 num_secs, s32 sync); +s32 bdev_sync_all(struct super_block *sb); + +/* blkdev.c : sector read/write functions */ +s32 read_sect(struct super_block *sb, u64 sec, struct buffer_head **bh, s32 read); +s32 write_sect(struct super_block *sb, u64 sec, struct buffer_head *bh, s32 sync); +s32 read_msect(struct super_block *sb, u64 sec, struct buffer_head **bh, s64 num_secs, s32 read); +s32 write_msect(struct super_block *sb, u64 sec, struct buffer_head *bh, s64 num_secs, s32 sync); +s32 write_msect_zero(struct super_block *sb, u64 sec, u64 num_secs); + +/* misc.c */ +u8 calc_chksum_1byte(void *data, s32 len, u8 chksum); +u16 calc_chksum_2byte(void *data, s32 len, u16 chksum, s32 type); + +/* extent.c */ +s32 extent_cache_init(void); +void extent_cache_shutdown(void); +void extent_cache_init_inode(struct inode *inode); +void extent_cache_inval_inode(struct inode *inode); +s32 extent_get_clus(struct inode *inode, u32 cluster, u32 *fclus, + u32 *dclus, u32 *last_dclus, s32 allow_eof); +/*----------------------------------------------------------------------*/ +/* Wrapper Function */ +/*----------------------------------------------------------------------*/ +void set_sb_dirty(struct super_block *sb); + +#ifdef __cplusplus +} +#endif /* __cplusplus */ + +#endif /* _SDFAT_CORE_H */ + +/* end of core.h */ diff --git a/fs/sdfat/core_exfat.c b/fs/sdfat/core_exfat.c new file mode 100644 index 000000000000..9e4b994d0e90 --- /dev/null +++ b/fs/sdfat/core_exfat.c @@ -0,0 +1,1560 @@ +/* + * Copyright (C) 2012-2013 Samsung Electronics Co., Ltd. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version 2 + * of the License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, see <http://www.gnu.org/licenses/>. + */ + +/************************************************************************/ +/* */ +/* PROJECT : exFAT & FAT12/16/32 File System */ +/* FILE : core_exfat.c */ +/* PURPOSE : exFAT-fs core code for sdFAT */ +/* */ +/*----------------------------------------------------------------------*/ +/* NOTES */ +/* */ +/* */ +/************************************************************************/ + +#include <linux/version.h> +#include <linux/blkdev.h> +#include <linux/workqueue.h> +#include <linux/kernel.h> +#include <linux/log2.h> + +#include "sdfat.h" +#include "core.h" +#include <asm/byteorder.h> +#include <asm/unaligned.h> + +/*----------------------------------------------------------------------*/ +/* Constant & Macro Definitions */ +/*----------------------------------------------------------------------*/ + +/*----------------------------------------------------------------------*/ +/* Global Variable Definitions */ +/*----------------------------------------------------------------------*/ + +/*----------------------------------------------------------------------*/ +/* Local Variable Definitions */ +/*----------------------------------------------------------------------*/ +static u8 free_bit[] = { + 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, 4, 0, 1, 0, 2,/* 0 ~ 19*/ + 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, 5, 0, 1, 0, 2, 0, 1, 0, 3,/* 20 ~ 39*/ + 0, 1, 0, 2, 0, 1, 0, 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2,/* 40 ~ 59*/ + 0, 1, 0, 6, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, 4,/* 60 ~ 79*/ + 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, 5, 0, 1, 0, 2,/* 80 ~ 99*/ + 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, 4, 0, 1, 0, 2, 0, 1, 0, 3,/*100 ~ 119*/ + 0, 1, 0, 2, 0, 1, 0, 7, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2,/*120 ~ 139*/ + 0, 1, 0, 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, 5,/*140 ~ 159*/ + 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, 4, 0, 1, 0, 2,/*160 ~ 179*/ + 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, 6, 0, 1, 0, 2, 0, 1, 0, 3,/*180 ~ 199*/ + 0, 1, 0, 2, 0, 1, 0, 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2,/*200 ~ 219*/ + 0, 1, 0, 5, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, 4,/*220 ~ 239*/ + 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0 /*240 ~ 254*/ +}; + +static u8 used_bit[] = { + 0, 1, 1, 2, 1, 2, 2, 3, 1, 2, 2, 3, 2, 3, 3, 4, 1, 2, 2, 3,/* 0 ~ 19*/ + 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5, 1, 2, 2, 3, 2, 3, 3, 4,/* 20 ~ 39*/ + 2, 3, 3, 4, 3, 4, 4, 5, 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5,/* 40 ~ 59*/ + 4, 5, 5, 6, 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,/* 60 ~ 79*/ + 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, 2, 3, 3, 4,/* 80 ~ 99*/ + 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, 3, 4, 4, 5, 4, 5, 5, 6,/*100 ~ 119*/ + 4, 5, 5, 6, 5, 6, 6, 7, 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4,/*120 ~ 139*/ + 3, 4, 4, 5, 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,/*140 ~ 159*/ + 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, 3, 4, 4, 5,/*160 ~ 179*/ + 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7, 2, 3, 3, 4, 3, 4, 4, 5,/*180 ~ 199*/ + 3, 4, 4, 5, 4, 5, 5, 6, 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6,/*200 ~ 219*/ + 5, 6, 6, 7, 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,/*220 ~ 239*/ + 4, 5, 5, 6, 5, 6, 6, 7, 5, 6, 6, 7, 6, 7, 7, 8 /*240 ~ 255*/ +}; + + +/*======================================================================*/ +/* Local Function Definitions */ +/*======================================================================*/ +/* + * Directory Entry Management Functions + */ +static u32 exfat_get_entry_type(DENTRY_T *p_entry) +{ + FILE_DENTRY_T *ep = (FILE_DENTRY_T *) p_entry; + + if (ep->type == EXFAT_UNUSED) + return TYPE_UNUSED; + if (ep->type < 0x80) + return TYPE_DELETED; + if (ep->type == 0x80) + return TYPE_INVALID; + if (ep->type < 0xA0) { + if (ep->type == 0x81) + return TYPE_BITMAP; + if (ep->type == 0x82) + return TYPE_UPCASE; + if (ep->type == 0x83) + return TYPE_VOLUME; + if (ep->type == 0x85) { + if (le16_to_cpu(ep->attr) & ATTR_SUBDIR) + return TYPE_DIR; + return TYPE_FILE; + } + return TYPE_CRITICAL_PRI; + } + if (ep->type < 0xC0) { + if (ep->type == 0xA0) + return TYPE_GUID; + if (ep->type == 0xA1) + return TYPE_PADDING; + if (ep->type == 0xA2) + return TYPE_ACLTAB; + return TYPE_BENIGN_PRI; + } + if (ep->type < 0xE0) { + if (ep->type == 0xC0) + return TYPE_STREAM; + if (ep->type == 0xC1) + return TYPE_EXTEND; + if (ep->type == 0xC2) + return TYPE_ACL; + return TYPE_CRITICAL_SEC; + } + return TYPE_BENIGN_SEC; +} /* end of exfat_get_entry_type */ + +static void exfat_set_entry_type(DENTRY_T *p_entry, u32 type) +{ + FILE_DENTRY_T *ep = (FILE_DENTRY_T *) p_entry; + + if (type == TYPE_UNUSED) { + ep->type = 0x0; + } else if (type == TYPE_DELETED) { + ep->type &= ~0x80; + } else if (type == TYPE_STREAM) { + ep->type = 0xC0; + } else if (type == TYPE_EXTEND) { + ep->type = 0xC1; + } else if (type == TYPE_BITMAP) { + ep->type = 0x81; + } else if (type == TYPE_UPCASE) { + ep->type = 0x82; + } else if (type == TYPE_VOLUME) { + ep->type = 0x83; + } else if (type == TYPE_DIR) { + ep->type = 0x85; + ep->attr = cpu_to_le16(ATTR_SUBDIR); + } else if (type == TYPE_FILE) { + ep->type = 0x85; + ep->attr = cpu_to_le16(ATTR_ARCHIVE); + } else if (type == TYPE_SYMLINK) { + ep->type = 0x85; + ep->attr = cpu_to_le16(ATTR_ARCHIVE | ATTR_SYMLINK); + } +} /* end of exfat_set_entry_type */ + +static u32 exfat_get_entry_attr(DENTRY_T *p_entry) +{ + FILE_DENTRY_T *ep = (FILE_DENTRY_T *)p_entry; + + return (u32)le16_to_cpu(ep->attr); +} /* end of exfat_get_entry_attr */ + +static void exfat_set_entry_attr(DENTRY_T *p_entry, u32 attr) +{ + FILE_DENTRY_T *ep = (FILE_DENTRY_T *)p_entry; + + ep->attr = cpu_to_le16((u16) attr); +} /* end of exfat_set_entry_attr */ + +static u8 exfat_get_entry_flag(DENTRY_T *p_entry) +{ + STRM_DENTRY_T *ep = (STRM_DENTRY_T *)p_entry; + + return ep->flags; +} /* end of exfat_get_entry_flag */ + +static void exfat_set_entry_flag(DENTRY_T *p_entry, u8 flags) +{ + STRM_DENTRY_T *ep = (STRM_DENTRY_T *)p_entry; + + ep->flags = flags; +} /* end of exfat_set_entry_flag */ + +static u32 exfat_get_entry_clu0(DENTRY_T *p_entry) +{ + STRM_DENTRY_T *ep = (STRM_DENTRY_T *)p_entry; + + return (u32)le32_to_cpu(ep->start_clu); +} /* end of exfat_get_entry_clu0 */ + +static void exfat_set_entry_clu0(DENTRY_T *p_entry, u32 start_clu) +{ + STRM_DENTRY_T *ep = (STRM_DENTRY_T *)p_entry; + + ep->start_clu = cpu_to_le32(start_clu); +} /* end of exfat_set_entry_clu0 */ + +static u64 exfat_get_entry_size(DENTRY_T *p_entry) +{ + STRM_DENTRY_T *ep = (STRM_DENTRY_T *)p_entry; + + return le64_to_cpu(ep->valid_size); +} /* end of exfat_get_entry_size */ + +static void exfat_set_entry_size(DENTRY_T *p_entry, u64 size) +{ + STRM_DENTRY_T *ep = (STRM_DENTRY_T *)p_entry; + + ep->valid_size = cpu_to_le64(size); + ep->size = cpu_to_le64(size); +} /* end of exfat_set_entry_size */ + +static void exfat_get_entry_time(DENTRY_T *p_entry, TIMESTAMP_T *tp, u8 mode) +{ + u16 t = 0x00, d = 0x21, tz = 0x00; + FILE_DENTRY_T *ep = (FILE_DENTRY_T *)p_entry; + + switch (mode) { + case TM_CREATE: + t = le16_to_cpu(ep->create_time); + d = le16_to_cpu(ep->create_date); + tz = ep->create_tz; + break; + case TM_MODIFY: + t = le16_to_cpu(ep->modify_time); + d = le16_to_cpu(ep->modify_date); + tz = ep->modify_tz; + break; + case TM_ACCESS: + t = le16_to_cpu(ep->access_time); + d = le16_to_cpu(ep->access_date); + tz = ep->access_tz; + break; + } + + tp->tz.value = tz; + tp->sec = (t & 0x001F) << 1; + tp->min = (t >> 5) & 0x003F; + tp->hour = (t >> 11); + tp->day = (d & 0x001F); + tp->mon = (d >> 5) & 0x000F; + tp->year = (d >> 9); +} /* end of exfat_get_entry_time */ + +static void exfat_set_entry_time(DENTRY_T *p_entry, TIMESTAMP_T *tp, u8 mode) +{ + u16 t, d; + FILE_DENTRY_T *ep = (FILE_DENTRY_T *)p_entry; + + t = (tp->hour << 11) | (tp->min << 5) | (tp->sec >> 1); + d = (tp->year << 9) | (tp->mon << 5) | tp->day; + + switch (mode) { + case TM_CREATE: + ep->create_time = cpu_to_le16(t); + ep->create_date = cpu_to_le16(d); + ep->create_tz = tp->tz.value; + break; + case TM_MODIFY: + ep->modify_time = cpu_to_le16(t); + ep->modify_date = cpu_to_le16(d); + ep->modify_tz = tp->tz.value; + break; + case TM_ACCESS: + ep->access_time = cpu_to_le16(t); + ep->access_date = cpu_to_le16(d); + ep->access_tz = tp->tz.value; + break; + } +} /* end of exfat_set_entry_time */ + + +static void __init_file_entry(struct super_block *sb, FILE_DENTRY_T *ep, u32 type) +{ + TIMESTAMP_T tm, *tp; + + exfat_set_entry_type((DENTRY_T *) ep, type); + + tp = tm_now(SDFAT_SB(sb), &tm); + exfat_set_entry_time((DENTRY_T *) ep, tp, TM_CREATE); + exfat_set_entry_time((DENTRY_T *) ep, tp, TM_MODIFY); + exfat_set_entry_time((DENTRY_T *) ep, tp, TM_ACCESS); + ep->create_time_ms = 0; + ep->modify_time_ms = 0; +} /* end of __init_file_entry */ + +static void __init_strm_entry(STRM_DENTRY_T *ep, u8 flags, u32 start_clu, u64 size) +{ + exfat_set_entry_type((DENTRY_T *) ep, TYPE_STREAM); + ep->flags = flags; + ep->start_clu = cpu_to_le32(start_clu); + ep->valid_size = cpu_to_le64(size); + ep->size = cpu_to_le64(size); +} /* end of __init_strm_entry */ + +static void __init_name_entry(NAME_DENTRY_T *ep, u16 *uniname) +{ + s32 i; + + exfat_set_entry_type((DENTRY_T *) ep, TYPE_EXTEND); + ep->flags = 0x0; + + for (i = 0; i < 15; i++) { + ep->unicode_0_14[i] = cpu_to_le16(*uniname); + if (*uniname == 0x0) + break; + uniname++; + } +} /* end of __init_name_entry */ + +static s32 exfat_init_dir_entry(struct super_block *sb, CHAIN_T *p_dir, s32 entry, u32 type, u32 start_clu, u64 size) +{ + u64 sector; + u8 flags; + FILE_DENTRY_T *file_ep; + STRM_DENTRY_T *strm_ep; + + flags = (type == TYPE_FILE) ? 0x01 : 0x03; + + /* we cannot use get_dentry_set_in_dir here because file ep is not initialized yet */ + file_ep = (FILE_DENTRY_T *)get_dentry_in_dir(sb, p_dir, entry, §or); + if (!file_ep) + return -EIO; + + strm_ep = (STRM_DENTRY_T *)get_dentry_in_dir(sb, p_dir, entry+1, §or); + if (!strm_ep) + return -EIO; + + __init_file_entry(sb, file_ep, type); + if (dcache_modify(sb, sector)) + return -EIO; + + __init_strm_entry(strm_ep, flags, start_clu, size); + if (dcache_modify(sb, sector)) + return -EIO; + + return 0; +} /* end of exfat_init_dir_entry */ + +s32 update_dir_chksum(struct super_block *sb, CHAIN_T *p_dir, s32 entry) +{ + s32 ret = -EIO; + s32 i, num_entries; + u64 sector; + u16 chksum; + FILE_DENTRY_T *file_ep; + DENTRY_T *ep; + + file_ep = (FILE_DENTRY_T *)get_dentry_in_dir(sb, p_dir, entry, §or); + if (!file_ep) + return -EIO; + + dcache_lock(sb, sector); + + num_entries = (s32) file_ep->num_ext + 1; + chksum = calc_chksum_2byte((void *) file_ep, DENTRY_SIZE, 0, CS_DIR_ENTRY); + + for (i = 1; i < num_entries; i++) { + ep = get_dentry_in_dir(sb, p_dir, entry+i, NULL); + if (!ep) + goto out_unlock; + + chksum = calc_chksum_2byte((void *) ep, DENTRY_SIZE, chksum, CS_DEFAULT); + } + + file_ep->checksum = cpu_to_le16(chksum); + ret = dcache_modify(sb, sector); +out_unlock: + dcache_unlock(sb, sector); + return ret; + +} /* end of update_dir_chksum */ + + +static s32 exfat_init_ext_entry(struct super_block *sb, CHAIN_T *p_dir, s32 entry, s32 num_entries, + UNI_NAME_T *p_uniname, DOS_NAME_T *p_dosname) +{ + s32 i; + u64 sector; + u16 *uniname = p_uniname->name; + FILE_DENTRY_T *file_ep; + STRM_DENTRY_T *strm_ep; + NAME_DENTRY_T *name_ep; + + file_ep = (FILE_DENTRY_T *)get_dentry_in_dir(sb, p_dir, entry, §or); + if (!file_ep) + return -EIO; + + file_ep->num_ext = (u8)(num_entries - 1); + dcache_modify(sb, sector); + + strm_ep = (STRM_DENTRY_T *)get_dentry_in_dir(sb, p_dir, entry+1, §or); + if (!strm_ep) + return -EIO; + + strm_ep->name_len = p_uniname->name_len; + strm_ep->name_hash = cpu_to_le16(p_uniname->name_hash); + dcache_modify(sb, sector); + + for (i = 2; i < num_entries; i++) { + name_ep = (NAME_DENTRY_T *)get_dentry_in_dir(sb, p_dir, entry+i, §or); + if (!name_ep) + return -EIO; + + __init_name_entry(name_ep, uniname); + dcache_modify(sb, sector); + uniname += 15; + } + + update_dir_chksum(sb, p_dir, entry); + + return 0; +} /* end of exfat_init_ext_entry */ + + +static s32 exfat_delete_dir_entry(struct super_block *sb, CHAIN_T *p_dir, s32 entry, s32 order, s32 num_entries) +{ + s32 i; + u64 sector; + DENTRY_T *ep; + + for (i = order; i < num_entries; i++) { + ep = get_dentry_in_dir(sb, p_dir, entry+i, §or); + if (!ep) + return -EIO; + + exfat_set_entry_type(ep, TYPE_DELETED); + if (dcache_modify(sb, sector)) + return -EIO; + } + + return 0; +} + +static s32 __write_partial_entries_in_entry_set(struct super_block *sb, + ENTRY_SET_CACHE_T *es, u64 sec, u32 off, u32 count) +{ + s32 num_entries; + u32 buf_off = (off - es->offset); + u32 remaining_byte_in_sector, copy_entries; + FS_INFO_T *fsi = &(SDFAT_SB(sb)->fsi); + u32 clu; + u8 *buf, *esbuf = (u8 *)&(es->__buf); + + TMSG("%s entered\n", __func__); + MMSG("%s: es %p sec %llu off %u cnt %d\n", __func__, es, sec, off, count); + num_entries = count; + + while (num_entries) { + /* write per sector base */ + remaining_byte_in_sector = (1 << sb->s_blocksize_bits) - off; + copy_entries = min((s32)(remaining_byte_in_sector >> DENTRY_SIZE_BITS), num_entries); + buf = dcache_getblk(sb, sec); + if (!buf) + goto err_out; + MMSG("es->buf %p buf_off %u\n", esbuf, buf_off); + MMSG("copying %d entries from %p to sector %llu\n", copy_entries, (esbuf + buf_off), sec); + memcpy(buf + off, esbuf + buf_off, copy_entries << DENTRY_SIZE_BITS); + dcache_modify(sb, sec); + num_entries -= copy_entries; + + if (num_entries) { + // get next sector + if (IS_LAST_SECT_IN_CLUS(fsi, sec)) { + clu = SECT_TO_CLUS(fsi, sec); + if (es->alloc_flag == 0x03) + clu++; + else if (get_next_clus_safe(sb, &clu)) + goto err_out; + sec = CLUS_TO_SECT(fsi, clu); + } else { + sec++; + } + off = 0; + buf_off += copy_entries << DENTRY_SIZE_BITS; + } + } + + TMSG("%s exited successfully\n", __func__); + return 0; +err_out: + TMSG("%s failed\n", __func__); + return -EIO; +} + +/* write back all entries in entry set */ +static s32 __write_whole_entry_set(struct super_block *sb, ENTRY_SET_CACHE_T *es) +{ + return __write_partial_entries_in_entry_set(sb, es, es->sector, es->offset, es->num_entries); +} + +s32 update_dir_chksum_with_entry_set(struct super_block *sb, ENTRY_SET_CACHE_T *es) +{ + DENTRY_T *ep; + u16 chksum = 0; + s32 chksum_type = CS_DIR_ENTRY, i; + + ep = (DENTRY_T *)&(es->__buf); + for (i = 0; i < es->num_entries; i++) { + MMSG("%s %p\n", __func__, ep); + chksum = calc_chksum_2byte((void *) ep, DENTRY_SIZE, chksum, chksum_type); + ep++; + chksum_type = CS_DEFAULT; + } + + ep = (DENTRY_T *)&(es->__buf); + ((FILE_DENTRY_T *)ep)->checksum = cpu_to_le16(chksum); + return __write_whole_entry_set(sb, es); +} + +/* returns a set of dentries for a file or dir. + * Note that this is a copy (dump) of dentries so that user should call write_entry_set() + * to apply changes made in this entry set to the real device. + * in: + * sb+p_dir+entry: indicates a file/dir + * type: specifies how many dentries should be included. + * out: + * file_ep: will point the first dentry(= file dentry) on success + * return: + * pointer of entry set on success, + * NULL on failure. + */ + +#define ES_MODE_STARTED 0 +#define ES_MODE_GET_FILE_ENTRY 1 +#define ES_MODE_GET_STRM_ENTRY 2 +#define ES_MODE_GET_NAME_ENTRY 3 +#define ES_MODE_GET_CRITICAL_SEC_ENTRY 4 +ENTRY_SET_CACHE_T *get_dentry_set_in_dir(struct super_block *sb, + CHAIN_T *p_dir, s32 entry, u32 type, DENTRY_T **file_ep) +{ + s32 ret; + u32 off, byte_offset, clu = 0; + u32 entry_type; + u64 sec; + FS_INFO_T *fsi = &(SDFAT_SB(sb)->fsi); + ENTRY_SET_CACHE_T *es = NULL; + DENTRY_T *ep, *pos; + u8 *buf; + u8 num_entries; + s32 mode = ES_MODE_STARTED; + + /* FIXME : is available in error case? */ + if (p_dir->dir == DIR_DELETED) { + EMSG("%s : access to deleted dentry\n", __func__); + BUG_ON(!fsi->prev_eio); + return NULL; + } + + TMSG("%s entered\n", __func__); + MMSG("p_dir dir %u flags %x size %d\n", p_dir->dir, p_dir->flags, p_dir->size); + MMSG("entry %d type %d\n", entry, type); + + byte_offset = entry << DENTRY_SIZE_BITS; + ret = walk_fat_chain(sb, p_dir, byte_offset, &clu); + if (ret) + return NULL; + + /* byte offset in cluster */ + byte_offset &= fsi->cluster_size - 1; + + /* byte offset in sector */ + off = byte_offset & (u32)(sb->s_blocksize - 1); + + /* sector offset in cluster */ + sec = byte_offset >> (sb->s_blocksize_bits); + sec += CLUS_TO_SECT(fsi, clu); + + buf = dcache_getblk(sb, sec); + if (!buf) + goto err_out; + + ep = (DENTRY_T *)(buf + off); + entry_type = exfat_get_entry_type(ep); + + if ((entry_type != TYPE_FILE) + && (entry_type != TYPE_DIR)) + goto err_out; + + if (type == ES_ALL_ENTRIES) + num_entries = ((FILE_DENTRY_T *)ep)->num_ext+1; + else + num_entries = type; + + MMSG("trying to malloc %lx bytes for %d entries\n", + (unsigned long)(offsetof(ENTRY_SET_CACHE_T, __buf) + (num_entries) * sizeof(DENTRY_T)), num_entries); + es = kmalloc((offsetof(ENTRY_SET_CACHE_T, __buf) + (num_entries) * sizeof(DENTRY_T)), GFP_KERNEL); + if (!es) { + EMSG("%s: failed to alloc entryset\n", __func__); + goto err_out; + } + + es->num_entries = num_entries; + es->sector = sec; + es->offset = off; + es->alloc_flag = p_dir->flags; + + pos = (DENTRY_T *) &(es->__buf); + + while (num_entries) { + // instead of copying whole sector, we will check every entry. + // this will provide minimum stablity and consistency. + entry_type = exfat_get_entry_type(ep); + + if ((entry_type == TYPE_UNUSED) || (entry_type == TYPE_DELETED)) + goto err_out; + + switch (mode) { + case ES_MODE_STARTED: + if ((entry_type == TYPE_FILE) || (entry_type == TYPE_DIR)) + mode = ES_MODE_GET_FILE_ENTRY; + else + goto err_out; + break; + case ES_MODE_GET_FILE_ENTRY: + if (entry_type == TYPE_STREAM) + mode = ES_MODE_GET_STRM_ENTRY; + else + goto err_out; + break; + case ES_MODE_GET_STRM_ENTRY: + if (entry_type == TYPE_EXTEND) + mode = ES_MODE_GET_NAME_ENTRY; + else + goto err_out; + break; + case ES_MODE_GET_NAME_ENTRY: + if (entry_type == TYPE_EXTEND) + break; + else if (entry_type == TYPE_STREAM) + goto err_out; + else if (entry_type & TYPE_CRITICAL_SEC) + mode = ES_MODE_GET_CRITICAL_SEC_ENTRY; + else + goto err_out; + break; + case ES_MODE_GET_CRITICAL_SEC_ENTRY: + if ((entry_type == TYPE_EXTEND) || (entry_type == TYPE_STREAM)) + goto err_out; + else if ((entry_type & TYPE_CRITICAL_SEC) != TYPE_CRITICAL_SEC) + goto err_out; + break; + } + + /* copy dentry */ + memcpy(pos, ep, sizeof(DENTRY_T)); + + if (--num_entries == 0) + break; + + if (((off + DENTRY_SIZE) & (u32)(sb->s_blocksize - 1)) < + (off & (u32)(sb->s_blocksize - 1))) { + // get the next sector + if (IS_LAST_SECT_IN_CLUS(fsi, sec)) { + if (es->alloc_flag == 0x03) + clu++; + else if (get_next_clus_safe(sb, &clu)) + goto err_out; + sec = CLUS_TO_SECT(fsi, clu); + } else { + sec++; + } + buf = dcache_getblk(sb, sec); + if (!buf) + goto err_out; + off = 0; + ep = (DENTRY_T *)(buf); + } else { + ep++; + off += DENTRY_SIZE; + } + pos++; + } + + if (file_ep) + *file_ep = (DENTRY_T *)&(es->__buf); + + MMSG("es sec %llu offset %u flags %d, num_entries %u buf ptr %p\n", + es->sector, es->offset, es->alloc_flag, es->num_entries, &(es->__buf)); + TMSG("%s exited %p\n", __func__, es); + return es; +err_out: + TMSG("%s exited (return NULL) (es %p)\n", __func__, es); + + /* kfree(NULL) is safe */ + kfree(es); + es = NULL; + return NULL; +} + +void release_dentry_set(ENTRY_SET_CACHE_T *es) +{ + TMSG("%s %p\n", __func__, es); + + /* kfree(NULL) is safe */ + kfree(es); + es = NULL; +} + +static s32 __extract_uni_name_from_name_entry(NAME_DENTRY_T *ep, u16 *uniname, s32 order) +{ + s32 i, len = 0; + + for (i = 0; i < 15; i++) { + /* FIXME : unaligned? */ + *uniname = le16_to_cpu(ep->unicode_0_14[i]); + if (*uniname == 0x0) + return len; + uniname++; + len++; + } + + *uniname = 0x0; + return len; + +} /* end of __extract_uni_name_from_name_entry */ + +#define DIRENT_STEP_FILE (0) +#define DIRENT_STEP_STRM (1) +#define DIRENT_STEP_NAME (2) +#define DIRENT_STEP_SECD (3) + +/* return values of exfat_find_dir_entry() + * >= 0 : return dir entiry position with the name in dir + * -EEXIST : (root dir, ".") it is the root dir itself + * -ENOENT : entry with the name does not exist + * -EIO : I/O error + */ +static s32 exfat_find_dir_entry(struct super_block *sb, FILE_ID_T *fid, + CHAIN_T *p_dir, UNI_NAME_T *p_uniname, s32 num_entries, DOS_NAME_T *unused, u32 type) +{ + s32 i, rewind = 0, dentry = 0, end_eidx = 0, num_ext = 0, len; + s32 order, step, name_len; + s32 dentries_per_clu, num_empty = 0; + u32 entry_type; + u16 entry_uniname[16], *uniname = NULL, unichar; + CHAIN_T clu; + DENTRY_T *ep; + HINT_T *hint_stat = &fid->hint_stat; + HINT_FEMP_T candi_empty; + FILE_DENTRY_T *file_ep; + STRM_DENTRY_T *strm_ep; + NAME_DENTRY_T *name_ep; + FS_INFO_T *fsi = &(SDFAT_SB(sb)->fsi); + + /* + * REMARK: + * DOT and DOTDOT are handled by VFS layer + */ + + if (IS_CLUS_FREE(p_dir->dir)) + return -EIO; + + dentries_per_clu = fsi->dentries_per_clu; + + clu.dir = p_dir->dir; + clu.size = p_dir->size; + clu.flags = p_dir->flags; + + if (hint_stat->eidx) { + clu.dir = hint_stat->clu; + dentry = hint_stat->eidx; + end_eidx = dentry; + } + + candi_empty.eidx = -1; +rewind: + order = 0; + step = DIRENT_STEP_FILE; + while (!IS_CLUS_EOF(clu.dir)) { + i = dentry & (dentries_per_clu - 1); + for (; i < dentries_per_clu; i++, dentry++) { + if (rewind && (dentry == end_eidx)) + goto not_found; + + ep = get_dentry_in_dir(sb, &clu, i, NULL); + if (!ep) + return -EIO; + + entry_type = exfat_get_entry_type(ep); + + if ((entry_type == TYPE_UNUSED) || (entry_type == TYPE_DELETED)) { + step = DIRENT_STEP_FILE; + + num_empty++; + if (candi_empty.eidx == -1) { + if (num_empty == 1) { + candi_empty.cur.dir = clu.dir; + candi_empty.cur.size = clu.size; + candi_empty.cur.flags = clu.flags; + } + + if (num_empty >= num_entries) { + candi_empty.eidx = dentry - (num_empty - 1); + ASSERT(0 <= candi_empty.eidx); + candi_empty.count = num_empty; + + if ((fid->hint_femp.eidx == -1) || + (candi_empty.eidx <= fid->hint_femp.eidx)) { + memcpy(&fid->hint_femp, + &candi_empty, + sizeof(HINT_FEMP_T)); + } + } + } + + if (entry_type == TYPE_UNUSED) + goto not_found; + continue; + } + + num_empty = 0; + candi_empty.eidx = -1; + + if ((entry_type == TYPE_FILE) || (entry_type == TYPE_DIR)) { + step = DIRENT_STEP_FILE; + if ((type == TYPE_ALL) || (type == entry_type)) { + file_ep = (FILE_DENTRY_T *) ep; + num_ext = file_ep->num_ext; + step = DIRENT_STEP_STRM; + } + continue; + } + + if (entry_type == TYPE_STREAM) { + if (step != DIRENT_STEP_STRM) { + step = DIRENT_STEP_FILE; + continue; + } + step = DIRENT_STEP_FILE; + strm_ep = (STRM_DENTRY_T *) ep; + if ((p_uniname->name_hash == le16_to_cpu(strm_ep->name_hash)) && + (p_uniname->name_len == strm_ep->name_len)) { + step = DIRENT_STEP_NAME; + order = 1; + name_len = 0; + } + continue; + } + + if (entry_type == TYPE_EXTEND) { + if (step != DIRENT_STEP_NAME) { + step = DIRENT_STEP_FILE; + continue; + } + name_ep = (NAME_DENTRY_T *) ep; + + if ((++order) == 2) + uniname = p_uniname->name; + else + uniname += 15; + + len = __extract_uni_name_from_name_entry(name_ep, entry_uniname, order); + name_len += len; + + unichar = *(uniname+len); + *(uniname+len) = 0x0; + + if (nls_cmp_uniname(sb, uniname, entry_uniname)) { + step = DIRENT_STEP_FILE; + } else if (name_len == p_uniname->name_len) { + if (order == num_ext) { + //fid->hint_femp.eidx = -1; + goto found; + } + step = DIRENT_STEP_SECD; + } + + *(uniname+len) = unichar; + continue; + } + + if (entry_type & (TYPE_CRITICAL_SEC | TYPE_BENIGN_SEC)) { + if (step == DIRENT_STEP_SECD) { + if (++order == num_ext) + goto found; + continue; + } + } + step = DIRENT_STEP_FILE; + } + + if (clu.flags == 0x03) { + if ((--clu.size) > 0) + clu.dir++; + else + clu.dir = CLUS_EOF; + } else { + if (get_next_clus_safe(sb, &clu.dir)) + return -EIO; + } + } + +not_found: + /* we started at not 0 index,so we should try to find target + * from 0 index to the index we started at. + */ + if (!rewind && end_eidx) { + rewind = 1; + dentry = 0; + clu.dir = p_dir->dir; + /* reset empty hint */ + num_empty = 0; + candi_empty.eidx = -1; + goto rewind; + } + + /* initialized hint_stat */ + hint_stat->clu = p_dir->dir; + hint_stat->eidx = 0; + return -ENOENT; + +found: + /* next dentry we'll find is out of this cluster */ + if (!((dentry + 1) & (dentries_per_clu-1))) { + int ret = 0; + + if (clu.flags == 0x03) { + if ((--clu.size) > 0) + clu.dir++; + else + clu.dir = CLUS_EOF; + } else { + ret = get_next_clus_safe(sb, &clu.dir); + } + + if (ret || IS_CLUS_EOF(clu.dir)) { + /* just initialized hint_stat */ + hint_stat->clu = p_dir->dir; + hint_stat->eidx = 0; + return (dentry - num_ext); + } + } + + hint_stat->clu = clu.dir; + hint_stat->eidx = dentry + 1; + return (dentry - num_ext); +} /* end of exfat_find_dir_entry */ + +/* returns -EIO on error */ +static s32 exfat_count_ext_entries(struct super_block *sb, CHAIN_T *p_dir, s32 entry, DENTRY_T *p_entry) +{ + s32 i, count = 0; + u32 type; + FILE_DENTRY_T *file_ep = (FILE_DENTRY_T *) p_entry; + DENTRY_T *ext_ep; + + for (i = 0, entry++; i < file_ep->num_ext; i++, entry++) { + ext_ep = get_dentry_in_dir(sb, p_dir, entry, NULL); + if (!ext_ep) + return -EIO; + + type = exfat_get_entry_type(ext_ep); + if ((type == TYPE_EXTEND) || (type == TYPE_STREAM)) + count++; + else + return count; + } + + return count; +} /* end of exfat_count_ext_entries */ + + +/* + * Name Conversion Functions + */ +static void exfat_get_uniname_from_ext_entry(struct super_block *sb, CHAIN_T *p_dir, s32 entry, u16 *uniname) +{ + s32 i; + DENTRY_T *ep; + ENTRY_SET_CACHE_T *es; + + es = get_dentry_set_in_dir(sb, p_dir, entry, ES_ALL_ENTRIES, &ep); + if (!es) + return; + + if (es->num_entries < 3) + goto out; + + ep += 2; + + /* + * First entry : file entry + * Second entry : stream-extension entry + * Third entry : first file-name entry + * So, the index of first file-name dentry should start from 2. + */ + for (i = 2; i < es->num_entries; i++, ep++) { + /* end of name entry */ + if (exfat_get_entry_type(ep) != TYPE_EXTEND) + goto out; + + __extract_uni_name_from_name_entry((NAME_DENTRY_T *)ep, uniname, i); + uniname += 15; + } + +out: + release_dentry_set(es); +} /* end of exfat_get_uniname_from_ext_entry */ + +static s32 exfat_calc_num_entries(UNI_NAME_T *p_uniname) +{ + s32 len; + + len = p_uniname->name_len; + if (len == 0) + return 0; + + /* 1 file entry + 1 stream entry + name entries */ + return((len-1) / 15 + 3); + +} /* end of exfat_calc_num_entries */ + +static s32 exfat_check_max_dentries(FILE_ID_T *fid) +{ + if ((fid->size >> DENTRY_SIZE_BITS) >= MAX_EXFAT_DENTRIES) { + /* exFAT spec allows a dir to grow upto 8388608(256MB) dentries */ + return -ENOSPC; + } + return 0; +} /* end of check_max_dentries */ + +/* + * Allocation Bitmap Management Functions + */ +s32 load_alloc_bmp(struct super_block *sb) +{ + s32 ret; + u32 i, j, map_size, type, need_map_size; + u64 sector; + CHAIN_T clu; + BMAP_DENTRY_T *ep; + FS_INFO_T *fsi = &(SDFAT_SB(sb)->fsi); + + clu.dir = fsi->root_dir; + clu.flags = 0x01; + + while (!IS_CLUS_EOF(clu.dir)) { + for (i = 0; i < fsi->dentries_per_clu; i++) { + ep = (BMAP_DENTRY_T *) get_dentry_in_dir(sb, &clu, i, NULL); + if (!ep) + return -EIO; + + type = exfat_get_entry_type((DENTRY_T *) ep); + + if (type == TYPE_UNUSED) + break; + if (type != TYPE_BITMAP) + continue; + + if (ep->flags == 0x0) { + fsi->map_clu = le32_to_cpu(ep->start_clu); + map_size = (u32) le64_to_cpu(ep->size); + + need_map_size = (((fsi->num_clusters - CLUS_BASE) - 1) >> 3) + 1; + if (need_map_size != map_size) { + sdfat_log_msg(sb, KERN_ERR, + "bogus allocation bitmap size(need : %u, cur : %u)", + need_map_size, map_size); + /* Only allowed when bogus allocation bitmap size is large */ + if (need_map_size > map_size) + return -EIO; + } + fsi->map_sectors = ((need_map_size - 1) >> (sb->s_blocksize_bits)) + 1; + fsi->vol_amap = + kmalloc((sizeof(struct buffer_head *) * fsi->map_sectors), GFP_KERNEL); + if (!fsi->vol_amap) + return -ENOMEM; + + sector = CLUS_TO_SECT(fsi, fsi->map_clu); + + for (j = 0; j < fsi->map_sectors; j++) { + fsi->vol_amap[j] = NULL; + ret = read_sect(sb, sector+j, &(fsi->vol_amap[j]), 1); + if (ret) { + /* release all buffers and free vol_amap */ + i = 0; + while (i < j) + brelse(fsi->vol_amap[i++]); + + /* kfree(NULL) is safe */ + kfree(fsi->vol_amap); + fsi->vol_amap = NULL; + return ret; + } + } + + fsi->pbr_bh = NULL; + return 0; + } + } + + if (get_next_clus_safe(sb, &clu.dir)) + return -EIO; + } + + return -EINVAL; +} /* end of load_alloc_bmp */ + +void free_alloc_bmp(struct super_block *sb) +{ + s32 i; + FS_INFO_T *fsi = &(SDFAT_SB(sb)->fsi); + + brelse(fsi->pbr_bh); + + for (i = 0; i < fsi->map_sectors; i++) + __brelse(fsi->vol_amap[i]); + + /* kfree(NULL) is safe */ + kfree(fsi->vol_amap); + fsi->vol_amap = NULL; +} + +/* WARN : + * If the value of "clu" is 0, it means cluster 2 which is + * the first cluster of cluster heap. + */ +static s32 set_alloc_bitmap(struct super_block *sb, u32 clu) +{ + s32 i, b; + u64 sector; + FS_INFO_T *fsi = &(SDFAT_SB(sb)->fsi); + + i = clu >> (sb->s_blocksize_bits + 3); + b = clu & (u32)((sb->s_blocksize << 3) - 1); + + sector = CLUS_TO_SECT(fsi, fsi->map_clu) + i; + bitmap_set((unsigned long *)(fsi->vol_amap[i]->b_data), b, 1); + + return write_sect(sb, sector, fsi->vol_amap[i], 0); +} /* end of set_alloc_bitmap */ + +/* WARN : + * If the value of "clu" is 0, it means cluster 2 which is + * the first cluster of cluster heap. + */ +static s32 clr_alloc_bitmap(struct super_block *sb, u32 clu) +{ + s32 ret; + s32 i, b; + u64 sector; + struct sdfat_sb_info *sbi = SDFAT_SB(sb); + struct sdfat_mount_options *opts = &sbi->options; + FS_INFO_T *fsi = &(SDFAT_SB(sb)->fsi); + + i = clu >> (sb->s_blocksize_bits + 3); + b = clu & (u32)((sb->s_blocksize << 3) - 1); + + sector = CLUS_TO_SECT(fsi, fsi->map_clu) + i; + + bitmap_clear((unsigned long *)(fsi->vol_amap[i]->b_data), b, 1); + + ret = write_sect(sb, sector, fsi->vol_amap[i], 0); + + if (opts->discard) { + s32 ret_discard; + + TMSG("discard cluster(%08x)\n", clu+2); + ret_discard = sb_issue_discard(sb, CLUS_TO_SECT(fsi, clu+2), + (1 << fsi->sect_per_clus_bits), GFP_NOFS, 0); + + if (ret_discard == -EOPNOTSUPP) { + sdfat_msg(sb, KERN_ERR, + "discard not supported by device, disabling"); + opts->discard = 0; + } + } + + return ret; +} /* end of clr_alloc_bitmap */ + +/* WARN : + * If the value of "clu" is 0, it means cluster 2 which is + * the first cluster of cluster heap. + */ +static u32 test_alloc_bitmap(struct super_block *sb, u32 clu) +{ + u32 i, map_i, map_b; + u32 clu_base, clu_free; + u8 k, clu_mask; + FS_INFO_T *fsi = &(SDFAT_SB(sb)->fsi); + + clu_base = (clu & ~(0x7)) + 2; + clu_mask = (1 << (clu - clu_base + 2)) - 1; + + map_i = clu >> (sb->s_blocksize_bits + 3); + map_b = (clu >> 3) & (u32)(sb->s_blocksize - 1); + + for (i = 2; i < fsi->num_clusters; i += 8) { + k = *(((u8 *) fsi->vol_amap[map_i]->b_data) + map_b); + if (clu_mask > 0) { + k |= clu_mask; + clu_mask = 0; + } + if (k < 0xFF) { + clu_free = clu_base + free_bit[k]; + if (clu_free < fsi->num_clusters) + return clu_free; + } + clu_base += 8; + + if (((++map_b) >= (u32)sb->s_blocksize) || + (clu_base >= fsi->num_clusters)) { + if ((++map_i) >= fsi->map_sectors) { + clu_base = 2; + map_i = 0; + } + map_b = 0; + } + } + + return CLUS_EOF; +} /* end of test_alloc_bitmap */ + +void sync_alloc_bmp(struct super_block *sb) +{ + s32 i; + FS_INFO_T *fsi = &(SDFAT_SB(sb)->fsi); + + if (fsi->vol_amap == NULL) + return; + + for (i = 0; i < fsi->map_sectors; i++) + sync_dirty_buffer(fsi->vol_amap[i]); +} + +static s32 exfat_chain_cont_cluster(struct super_block *sb, u32 chain, u32 len) +{ + if (!len) + return 0; + + while (len > 1) { + if (fat_ent_set(sb, chain, chain+1)) + return -EIO; + chain++; + len--; + } + + if (fat_ent_set(sb, chain, CLUS_EOF)) + return -EIO; + return 0; +} + +s32 chain_cont_cluster(struct super_block *sb, u32 chain, u32 len) +{ + return exfat_chain_cont_cluster(sb, chain, len); +} + + +static s32 exfat_free_cluster(struct super_block *sb, CHAIN_T *p_chain, s32 do_relse) +{ + s32 ret = -EIO; + u32 num_clusters = 0; + u32 clu; + FS_INFO_T *fsi = &(SDFAT_SB(sb)->fsi); + s32 i; + u64 sector; + + /* invalid cluster number */ + if (IS_CLUS_FREE(p_chain->dir) || IS_CLUS_EOF(p_chain->dir)) + return 0; + + /* no cluster to truncate */ + if (p_chain->size == 0) { + DMSG("%s: cluster(%u) truncation is not required.", + __func__, p_chain->dir); + return 0; + } + + /* check cluster validation */ + if ((p_chain->dir < 2) && (p_chain->dir >= fsi->num_clusters)) { + EMSG("%s: invalid start cluster (%u)\n", __func__, p_chain->dir); + sdfat_debug_bug_on(1); + return -EIO; + } + + set_sb_dirty(sb); + clu = p_chain->dir; + + if (p_chain->flags == 0x03) { + do { + if (do_relse) { + sector = CLUS_TO_SECT(fsi, clu); + for (i = 0; i < fsi->sect_per_clus; i++) { + if (dcache_release(sb, sector+i) == -EIO) + goto out; + } + } + + if (clr_alloc_bitmap(sb, clu-2)) + goto out; + clu++; + + num_clusters++; + } while (num_clusters < p_chain->size); + } else { + do { + if (do_relse) { + sector = CLUS_TO_SECT(fsi, clu); + for (i = 0; i < fsi->sect_per_clus; i++) { + if (dcache_release(sb, sector+i) == -EIO) + goto out; + } + } + + if (clr_alloc_bitmap(sb, (clu - CLUS_BASE))) + goto out; + + if (get_next_clus_safe(sb, &clu)) + goto out; + + num_clusters++; + } while (!IS_CLUS_EOF(clu)); + } + + /* success */ + ret = 0; +out: + + fsi->used_clusters -= num_clusters; + return ret; +} /* end of exfat_free_cluster */ + +static s32 exfat_alloc_cluster(struct super_block *sb, u32 num_alloc, CHAIN_T *p_chain, s32 dest) +{ + s32 ret = -ENOSPC; + u32 num_clusters = 0, total_cnt; + u32 hint_clu, new_clu, last_clu = CLUS_EOF; + FS_INFO_T *fsi = &(SDFAT_SB(sb)->fsi); + + total_cnt = fsi->num_clusters - CLUS_BASE; + + if (unlikely(total_cnt < fsi->used_clusters)) { + sdfat_fs_error_ratelimit(sb, + "%s: invalid used clusters(t:%u,u:%u)\n", + __func__, total_cnt, fsi->used_clusters); + return -EIO; + } + + if (num_alloc > total_cnt - fsi->used_clusters) + return -ENOSPC; + + hint_clu = p_chain->dir; + /* find new cluster */ + if (IS_CLUS_EOF(hint_clu)) { + if (fsi->clu_srch_ptr < CLUS_BASE) { + EMSG("%s: fsi->clu_srch_ptr is invalid (%u)\n", + __func__, fsi->clu_srch_ptr); + ASSERT(0); + fsi->clu_srch_ptr = CLUS_BASE; + } + + hint_clu = test_alloc_bitmap(sb, fsi->clu_srch_ptr - CLUS_BASE); + if (IS_CLUS_EOF(hint_clu)) + return -ENOSPC; + } + + /* check cluster validation */ + if ((hint_clu < CLUS_BASE) && (hint_clu >= fsi->num_clusters)) { + EMSG("%s: hint_cluster is invalid (%u)\n", __func__, hint_clu); + ASSERT(0); + hint_clu = CLUS_BASE; + if (p_chain->flags == 0x03) { + if (exfat_chain_cont_cluster(sb, p_chain->dir, num_clusters)) + return -EIO; + p_chain->flags = 0x01; + } + } + + set_sb_dirty(sb); + + p_chain->dir = CLUS_EOF; + + while ((new_clu = test_alloc_bitmap(sb, hint_clu - CLUS_BASE)) != CLUS_EOF) { + if ((new_clu != hint_clu) && (p_chain->flags == 0x03)) { + if (exfat_chain_cont_cluster(sb, p_chain->dir, num_clusters)) { + ret = -EIO; + goto error; + } + p_chain->flags = 0x01; + } + + /* update allocation bitmap */ + if (set_alloc_bitmap(sb, new_clu - CLUS_BASE)) { + ret = -EIO; + goto error; + } + + num_clusters++; + + /* update FAT table */ + if (p_chain->flags == 0x01) { + if (fat_ent_set(sb, new_clu, CLUS_EOF)) { + ret = -EIO; + goto error; + } + } + + if (IS_CLUS_EOF(p_chain->dir)) { + p_chain->dir = new_clu; + } else if (p_chain->flags == 0x01) { + if (fat_ent_set(sb, last_clu, new_clu)) { + ret = -EIO; + goto error; + } + } + last_clu = new_clu; + + if ((--num_alloc) == 0) { + fsi->clu_srch_ptr = hint_clu; + fsi->used_clusters += num_clusters; + + p_chain->size += num_clusters; + return 0; + } + + hint_clu = new_clu + 1; + if (hint_clu >= fsi->num_clusters) { + hint_clu = CLUS_BASE; + + if (p_chain->flags == 0x03) { + if (exfat_chain_cont_cluster(sb, p_chain->dir, num_clusters)) { + ret = -EIO; + goto error; + } + p_chain->flags = 0x01; + } + } + } +error: + if (num_clusters) + exfat_free_cluster(sb, p_chain, 0); + return ret; +} /* end of exfat_alloc_cluster */ + +static s32 exfat_count_used_clusters(struct super_block *sb, u32 *ret_count) +{ + u32 count = 0; + u32 i, map_i, map_b; + FS_INFO_T *fsi = &(SDFAT_SB(sb)->fsi); + u32 total_clus = fsi->num_clusters - 2; + + map_i = map_b = 0; + + for (i = 0; i < total_clus; i += 8) { + u8 k = *(((u8 *) fsi->vol_amap[map_i]->b_data) + map_b); + + count += used_bit[k]; + if ((++map_b) >= (u32)sb->s_blocksize) { + map_i++; + map_b = 0; + } + } + + /* FIXME : abnormal bitmap count should be handled as more smart */ + if (total_clus < count) + count = total_clus; + + *ret_count = count; + return 0; +} /* end of exfat_count_used_clusters */ + + +/* + * File Operation Functions + */ +static FS_FUNC_T exfat_fs_func = { + .alloc_cluster = exfat_alloc_cluster, + .free_cluster = exfat_free_cluster, + .count_used_clusters = exfat_count_used_clusters, + + .init_dir_entry = exfat_init_dir_entry, + .init_ext_entry = exfat_init_ext_entry, + .find_dir_entry = exfat_find_dir_entry, + .delete_dir_entry = exfat_delete_dir_entry, + .get_uniname_from_ext_entry = exfat_get_uniname_from_ext_entry, + .count_ext_entries = exfat_count_ext_entries, + .calc_num_entries = exfat_calc_num_entries, + .check_max_dentries = exfat_check_max_dentries, + + .get_entry_type = exfat_get_entry_type, + .set_entry_type = exfat_set_entry_type, + .get_entry_attr = exfat_get_entry_attr, + .set_entry_attr = exfat_set_entry_attr, + .get_entry_flag = exfat_get_entry_flag, + .set_entry_flag = exfat_set_entry_flag, + .get_entry_clu0 = exfat_get_entry_clu0, + .set_entry_clu0 = exfat_set_entry_clu0, + .get_entry_size = exfat_get_entry_size, + .set_entry_size = exfat_set_entry_size, + .get_entry_time = exfat_get_entry_time, + .set_entry_time = exfat_set_entry_time, +}; + +s32 mount_exfat(struct super_block *sb, pbr_t *p_pbr) +{ + pbr64_t *p_bpb = (pbr64_t *)p_pbr; + FS_INFO_T *fsi = &(SDFAT_SB(sb)->fsi); + + if (!p_bpb->bsx.num_fats) { + sdfat_msg(sb, KERN_ERR, "bogus number of FAT structure"); + return -EINVAL; + } + + fsi->sect_per_clus = 1 << p_bpb->bsx.sect_per_clus_bits; + fsi->sect_per_clus_bits = p_bpb->bsx.sect_per_clus_bits; + fsi->cluster_size_bits = fsi->sect_per_clus_bits + sb->s_blocksize_bits; + fsi->cluster_size = 1 << fsi->cluster_size_bits; + + fsi->num_FAT_sectors = le32_to_cpu(p_bpb->bsx.fat_length); + + fsi->FAT1_start_sector = le32_to_cpu(p_bpb->bsx.fat_offset); + if (p_bpb->bsx.num_fats == 1) + fsi->FAT2_start_sector = fsi->FAT1_start_sector; + else + fsi->FAT2_start_sector = fsi->FAT1_start_sector + fsi->num_FAT_sectors; + + fsi->root_start_sector = le32_to_cpu(p_bpb->bsx.clu_offset); + fsi->data_start_sector = fsi->root_start_sector; + + fsi->num_sectors = le64_to_cpu(p_bpb->bsx.vol_length); + fsi->num_clusters = le32_to_cpu(p_bpb->bsx.clu_count) + 2; + /* because the cluster index starts with 2 */ + + fsi->vol_type = EXFAT; + fsi->vol_id = le32_to_cpu(p_bpb->bsx.vol_serial); + + fsi->root_dir = le32_to_cpu(p_bpb->bsx.root_cluster); + fsi->dentries_in_root = 0; + fsi->dentries_per_clu = 1 << (fsi->cluster_size_bits - DENTRY_SIZE_BITS); + + fsi->vol_flag = (u32) le16_to_cpu(p_bpb->bsx.vol_flags); + fsi->clu_srch_ptr = CLUS_BASE; + fsi->used_clusters = (u32) ~0; + + fsi->fs_func = &exfat_fs_func; + fat_ent_ops_init(sb); + + if (p_bpb->bsx.vol_flags & VOL_DIRTY) { + fsi->vol_flag |= VOL_DIRTY; + sdfat_log_msg(sb, KERN_WARNING, "Volume was not properly " + "unmounted. Some data may be corrupt. " + "Please run fsck."); + } + + return 0; +} /* end of mount_exfat */ + +/* end of core_exfat.c */ diff --git a/fs/sdfat/core_fat.c b/fs/sdfat/core_fat.c new file mode 100644 index 000000000000..5e0a196ae42b --- /dev/null +++ b/fs/sdfat/core_fat.c @@ -0,0 +1,1465 @@ +/* + * Copyright (C) 2012-2013 Samsung Electronics Co., Ltd. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version 2 + * of the License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, see <http://www.gnu.org/licenses/>. + */ + +/************************************************************************/ +/* */ +/* PROJECT : exFAT & FAT12/16/32 File System */ +/* FILE : core_fat.c */ +/* PURPOSE : FAT-fs core code for sdFAT */ +/* */ +/*----------------------------------------------------------------------*/ +/* NOTES */ +/* */ +/* */ +/************************************************************************/ + +#include <linux/version.h> +#include <linux/blkdev.h> +#include <linux/workqueue.h> +#include <linux/kernel.h> +#include <linux/log2.h> + +#include "sdfat.h" +#include "core.h" +#include <asm/byteorder.h> +#include <asm/unaligned.h> + +/*----------------------------------------------------------------------*/ +/* Constant & Macro Definitions */ +/*----------------------------------------------------------------------*/ +#define MAX_LFN_ORDER (20) + +/* + * MAX_EST_AU_SECT should be changed according to 32/64bits. + * On 32bit, 4KB page supports 512 clusters per AU. + * But, on 64bit, 4KB page can handle a half of total list_head of 32bit's. + * Bcause the size of list_head structure on 64bit increases twofold over 32bit. + */ +#if (BITS_PER_LONG == 64) +//#define MAX_EST_AU_SECT (16384) /* upto 8MB */ +#define MAX_EST_AU_SECT (32768) /* upto 16MB, used more page for list_head */ +#else +#define MAX_EST_AU_SECT (32768) /* upto 16MB */ +#endif + +/*======================================================================*/ +/* Local Function Declarations */ +/*======================================================================*/ +static s32 __extract_uni_name_from_ext_entry(EXT_DENTRY_T *, u16 *, s32); + +/*----------------------------------------------------------------------*/ +/* Global Variable Definitions */ +/*----------------------------------------------------------------------*/ + +/*----------------------------------------------------------------------*/ +/* Local Variable Definitions */ +/*----------------------------------------------------------------------*/ + +/*======================================================================*/ +/* Local Function Definitions */ +/*======================================================================*/ +static u32 __calc_default_au_size(struct super_block *sb) +{ + struct block_device *bdev = sb->s_bdev; + struct gendisk *disk; + struct request_queue *queue; + struct queue_limits *limit; + unsigned int est_au_sect = MAX_EST_AU_SECT; + unsigned int est_au_size = 0; + unsigned int queue_au_size = 0; + sector_t total_sect = 0; + + /* we assumed that sector size is 512 bytes */ + + disk = bdev->bd_disk; + if (!disk) + goto out; + + queue = disk->queue; + if (!queue) + goto out; + + limit = &queue->limits; + queue_au_size = limit->discard_granularity; + + /* estimate function(x) = + * (total_sect / 2) * 512 / 1024 + * => (total_sect >> 1) >> 1) + * => (total_sect >> 2) + * => estimated bytes size + * + * ex1) <= 8GB -> 4MB + * ex2) 16GB -> 8MB + * ex3) >= 32GB -> 16MB + */ + total_sect = disk->part0.nr_sects; + est_au_size = total_sect >> 2; + + /* au_size assumed that bytes per sector is 512 */ + est_au_sect = est_au_size >> 9; + + MMSG("DBG1: total_sect(%llu) est_au_size(%u) est_au_sect(%u)\n", + (u64)total_sect, est_au_size, est_au_sect); + + if (est_au_sect <= 8192) { + /* 4MB */ + est_au_sect = 8192; + } else if (est_au_sect <= 16384) { + /* 8MB */ + est_au_sect = 16384; + } else { + /* 8MB or 16MB */ + est_au_sect = MAX_EST_AU_SECT; + } + + MMSG("DBG2: total_sect(%llu) est_au_size(%u) est_au_sect(%u)\n", + (u64)total_sect, est_au_size, est_au_sect); + + if (est_au_size < queue_au_size && + queue_au_size <= (MAX_EST_AU_SECT << 9)) { + DMSG("use queue_au_size(%u) instead of est_au_size(%u)\n", + queue_au_size, est_au_size); + est_au_sect = queue_au_size >> 9; + } + +out: + if (sb->s_blocksize != 512) { + ASSERT(sb->s_blocksize_bits > 9); + sdfat_log_msg(sb, KERN_INFO, + "adjustment est_au_size by logical block size(%lu)", + sb->s_blocksize); + est_au_sect >>= (sb->s_blocksize_bits - 9); + } + + sdfat_log_msg(sb, KERN_INFO, "set default AU sectors : %u " + "(queue_au_size : %u KB, disk_size : %llu MB)", + est_au_sect, queue_au_size >> 10, (u64)(total_sect >> 11)); + return est_au_sect; +} + + +/* + * Cluster Management Functions + */ +static s32 fat_free_cluster(struct super_block *sb, CHAIN_T *p_chain, s32 do_relse) +{ + s32 ret = -EIO; + s32 num_clusters = 0; + u32 clu, prev; + FS_INFO_T *fsi = &(SDFAT_SB(sb)->fsi); + s32 i; + u64 sector; + + /* invalid cluster number */ + if (IS_CLUS_FREE(p_chain->dir) || IS_CLUS_EOF(p_chain->dir)) + return 0; + + /* no cluster to truncate */ + if (!p_chain->size) { + DMSG("%s: cluster(%u) truncation is not required.", + __func__, p_chain->dir); + return 0; + } + + /* check cluster validation */ + if ((p_chain->dir < 2) && (p_chain->dir >= fsi->num_clusters)) { + EMSG("%s: invalid start cluster (%u)\n", __func__, p_chain->dir); + sdfat_debug_bug_on(1); + return -EIO; + } + + + set_sb_dirty(sb); + clu = p_chain->dir; + + do { + if (do_relse) { + sector = CLUS_TO_SECT(fsi, clu); + for (i = 0; i < fsi->sect_per_clus; i++) { + if (dcache_release(sb, sector+i) == -EIO) + goto out; + } + } + + prev = clu; + if (get_next_clus_safe(sb, &clu)) { + /* print more helpful log */ + if (IS_CLUS_BAD(clu)) { + sdfat_log_msg(sb, KERN_ERR, "%s : " + "deleting bad cluster (clu[%u]->BAD)", + __func__, prev); + } else if (IS_CLUS_FREE(clu)) { + sdfat_log_msg(sb, KERN_ERR, "%s : " + "deleting free cluster (clu[%u]->FREE)", + __func__, prev); + } + goto out; + } + + /* Free FAT chain */ + if (fat_ent_set(sb, prev, CLUS_FREE)) + goto out; + + /* Update AMAP if needed */ + if (fsi->amap) { + if (amap_release_cluster(sb, prev)) + return -EIO; + } + + num_clusters++; + + } while (!IS_CLUS_EOF(clu)); + + /* success */ + ret = 0; +out: + fsi->used_clusters -= num_clusters; + return ret; +} /* end of fat_free_cluster */ + +static s32 fat_alloc_cluster(struct super_block *sb, u32 num_alloc, CHAIN_T *p_chain, s32 dest) +{ + s32 ret = -ENOSPC; + u32 i, num_clusters = 0, total_cnt; + u32 new_clu, last_clu = CLUS_EOF, read_clu; + FS_INFO_T *fsi = &(SDFAT_SB(sb)->fsi); + + total_cnt = fsi->num_clusters - CLUS_BASE; + + if (unlikely(total_cnt < fsi->used_clusters)) { + sdfat_fs_error_ratelimit(sb, + "%s : invalid used clusters(t:%u,u:%u)\n", + __func__, total_cnt, fsi->used_clusters); + return -EIO; + } + + if (num_alloc > total_cnt - fsi->used_clusters) + return -ENOSPC; + + new_clu = p_chain->dir; + if (IS_CLUS_EOF(new_clu)) + new_clu = fsi->clu_srch_ptr; + else if (new_clu >= fsi->num_clusters) + new_clu = CLUS_BASE; + + set_sb_dirty(sb); + + p_chain->dir = CLUS_EOF; + + for (i = CLUS_BASE; i < fsi->num_clusters; i++) { + if (fat_ent_get(sb, new_clu, &read_clu)) { + ret = -EIO; + goto error; + } + + if (IS_CLUS_FREE(read_clu)) { + if (fat_ent_set(sb, new_clu, CLUS_EOF)) { + ret = -EIO; + goto error; + } + num_clusters++; + + if (IS_CLUS_EOF(p_chain->dir)) { + p_chain->dir = new_clu; + } else { + if (fat_ent_set(sb, last_clu, new_clu)) { + ret = -EIO; + goto error; + } + } + + last_clu = new_clu; + + if ((--num_alloc) == 0) { + fsi->clu_srch_ptr = new_clu; + fsi->used_clusters += num_clusters; + + return 0; + } + } + if ((++new_clu) >= fsi->num_clusters) + new_clu = CLUS_BASE; + } +error: + if (num_clusters) + fat_free_cluster(sb, p_chain, 0); + return ret; +} /* end of fat_alloc_cluster */ + +static s32 fat_count_used_clusters(struct super_block *sb, u32 *ret_count) +{ + s32 i; + u32 clu, count = 0; + FS_INFO_T *fsi = &(SDFAT_SB(sb)->fsi); + + for (i = CLUS_BASE; i < fsi->num_clusters; i++) { + if (fat_ent_get(sb, i, &clu)) + return -EIO; + + if (!IS_CLUS_FREE(clu)) + count++; + } + + *ret_count = count; + return 0; +} /* end of fat_count_used_clusters */ + + +/* + * Directory Entry Management Functions + */ +static u32 fat_get_entry_type(DENTRY_T *p_entry) +{ + DOS_DENTRY_T *ep = (DOS_DENTRY_T *)p_entry; + + /* first byte of 32bytes dummy */ + if (*(ep->name) == MSDOS_UNUSED) + return TYPE_UNUSED; + + /* 0xE5 of Kanji Japanese is replaced to 0x05 */ + else if (*(ep->name) == MSDOS_DELETED) + return TYPE_DELETED; + + /* 11th byte of 32bytes dummy */ + else if ((ep->attr & ATTR_EXTEND_MASK) == ATTR_EXTEND) + return TYPE_EXTEND; + + else if (!(ep->attr & (ATTR_SUBDIR | ATTR_VOLUME))) + return TYPE_FILE; + + else if ((ep->attr & (ATTR_SUBDIR | ATTR_VOLUME)) == ATTR_SUBDIR) + return TYPE_DIR; + + else if ((ep->attr & (ATTR_SUBDIR | ATTR_VOLUME)) == ATTR_VOLUME) + return TYPE_VOLUME; + + return TYPE_INVALID; +} /* end of fat_get_entry_type */ + +static void fat_set_entry_type(DENTRY_T *p_entry, u32 type) +{ + DOS_DENTRY_T *ep = (DOS_DENTRY_T *)p_entry; + + if (type == TYPE_UNUSED) + *(ep->name) = MSDOS_UNUSED; /* 0x0 */ + + else if (type == TYPE_DELETED) + *(ep->name) = MSDOS_DELETED; /* 0xE5 */ + + else if (type == TYPE_EXTEND) + ep->attr = ATTR_EXTEND; + + else if (type == TYPE_DIR) + ep->attr = ATTR_SUBDIR; + + else if (type == TYPE_FILE) + ep->attr = ATTR_ARCHIVE; + + else if (type == TYPE_SYMLINK) + ep->attr = ATTR_ARCHIVE | ATTR_SYMLINK; +} /* end of fat_set_entry_type */ + +static u32 fat_get_entry_attr(DENTRY_T *p_entry) +{ + DOS_DENTRY_T *ep = (DOS_DENTRY_T *)p_entry; + + return (u32)ep->attr; +} /* end of fat_get_entry_attr */ + +static void fat_set_entry_attr(DENTRY_T *p_entry, u32 attr) +{ + DOS_DENTRY_T *ep = (DOS_DENTRY_T *)p_entry; + + ep->attr = (u8)attr; +} /* end of fat_set_entry_attr */ + +static u8 fat_get_entry_flag(DENTRY_T *p_entry) +{ + return 0x01; +} /* end of fat_get_entry_flag */ + +static void fat_set_entry_flag(DENTRY_T *p_entry, u8 flags) +{ +} /* end of fat_set_entry_flag */ + +static u32 fat_get_entry_clu0(DENTRY_T *p_entry) +{ + DOS_DENTRY_T *ep = (DOS_DENTRY_T *)p_entry; + /* FIXME : is ok? */ + return(((u32)(le16_to_cpu(ep->start_clu_hi)) << 16) | le16_to_cpu(ep->start_clu_lo)); +} /* end of fat_get_entry_clu0 */ + +static void fat_set_entry_clu0(DENTRY_T *p_entry, u32 start_clu) +{ + DOS_DENTRY_T *ep = (DOS_DENTRY_T *)p_entry; + + ep->start_clu_lo = cpu_to_le16(CLUSTER_16(start_clu)); + ep->start_clu_hi = cpu_to_le16(CLUSTER_16(start_clu >> 16)); +} /* end of fat_set_entry_clu0 */ + +static u64 fat_get_entry_size(DENTRY_T *p_entry) +{ + DOS_DENTRY_T *ep = (DOS_DENTRY_T *)p_entry; + + return (u64)le32_to_cpu(ep->size); +} /* end of fat_get_entry_size */ + +static void fat_set_entry_size(DENTRY_T *p_entry, u64 size) +{ + DOS_DENTRY_T *ep = (DOS_DENTRY_T *)p_entry; + + ep->size = cpu_to_le32((u32)size); +} /* end of fat_set_entry_size */ + +static void fat_get_entry_time(DENTRY_T *p_entry, TIMESTAMP_T *tp, u8 mode) +{ + u16 t = 0x00, d = 0x21; + DOS_DENTRY_T *ep = (DOS_DENTRY_T *) p_entry; + + switch (mode) { + case TM_CREATE: + t = le16_to_cpu(ep->create_time); + d = le16_to_cpu(ep->create_date); + break; + case TM_MODIFY: + t = le16_to_cpu(ep->modify_time); + d = le16_to_cpu(ep->modify_date); + break; + } + + tp->tz.value = 0x00; + tp->sec = (t & 0x001F) << 1; + tp->min = (t >> 5) & 0x003F; + tp->hour = (t >> 11); + tp->day = (d & 0x001F); + tp->mon = (d >> 5) & 0x000F; + tp->year = (d >> 9); +} /* end of fat_get_entry_time */ + +static void fat_set_entry_time(DENTRY_T *p_entry, TIMESTAMP_T *tp, u8 mode) +{ + u16 t, d; + DOS_DENTRY_T *ep = (DOS_DENTRY_T *) p_entry; + + t = (tp->hour << 11) | (tp->min << 5) | (tp->sec >> 1); + d = (tp->year << 9) | (tp->mon << 5) | tp->day; + + switch (mode) { + case TM_CREATE: + ep->create_time = cpu_to_le16(t); + ep->create_date = cpu_to_le16(d); + break; + case TM_MODIFY: + ep->modify_time = cpu_to_le16(t); + ep->modify_date = cpu_to_le16(d); + break; + } +} /* end of fat_set_entry_time */ + +static void __init_dos_entry(struct super_block *sb, DOS_DENTRY_T *ep, u32 type, u32 start_clu) +{ + TIMESTAMP_T tm, *tp; + + fat_set_entry_type((DENTRY_T *) ep, type); + ep->start_clu_lo = cpu_to_le16(CLUSTER_16(start_clu)); + ep->start_clu_hi = cpu_to_le16(CLUSTER_16(start_clu >> 16)); + ep->size = 0; + + tp = tm_now(SDFAT_SB(sb), &tm); + fat_set_entry_time((DENTRY_T *) ep, tp, TM_CREATE); + fat_set_entry_time((DENTRY_T *) ep, tp, TM_MODIFY); + ep->access_date = 0; + ep->create_time_ms = 0; +} /* end of __init_dos_entry */ + +static void __init_ext_entry(EXT_DENTRY_T *ep, s32 order, u8 chksum, u16 *uniname) +{ + s32 i; + u8 end = false; + + fat_set_entry_type((DENTRY_T *) ep, TYPE_EXTEND); + ep->order = (u8) order; + ep->sysid = 0; + ep->checksum = chksum; + ep->start_clu = 0; + + /* unaligned name */ + for (i = 0; i < 5; i++) { + if (!end) { + put_unaligned_le16(*uniname, &(ep->unicode_0_4[i<<1])); + if (*uniname == 0x0) + end = true; + else + uniname++; + } else { + put_unaligned_le16(0xFFFF, &(ep->unicode_0_4[i<<1])); + } + } + + /* aligned name */ + for (i = 0; i < 6; i++) { + if (!end) { + ep->unicode_5_10[i] = cpu_to_le16(*uniname); + if (*uniname == 0x0) + end = true; + else + uniname++; + } else { + ep->unicode_5_10[i] = cpu_to_le16(0xFFFF); + } + } + + /* aligned name */ + for (i = 0; i < 2; i++) { + if (!end) { + ep->unicode_11_12[i] = cpu_to_le16(*uniname); + if (*uniname == 0x0) + end = true; + else + uniname++; + } else { + ep->unicode_11_12[i] = cpu_to_le16(0xFFFF); + } + } +} /* end of __init_ext_entry */ + +static s32 fat_init_dir_entry(struct super_block *sb, CHAIN_T *p_dir, s32 entry, u32 type, + u32 start_clu, u64 size) +{ + u64 sector; + DOS_DENTRY_T *dos_ep; + + dos_ep = (DOS_DENTRY_T *) get_dentry_in_dir(sb, p_dir, entry, §or); + if (!dos_ep) + return -EIO; + + __init_dos_entry(sb, dos_ep, type, start_clu); + dcache_modify(sb, sector); + + return 0; +} /* end of fat_init_dir_entry */ + +static s32 fat_init_ext_entry(struct super_block *sb, CHAIN_T *p_dir, s32 entry, s32 num_entries, + UNI_NAME_T *p_uniname, DOS_NAME_T *p_dosname) +{ + s32 i; + u64 sector; + u8 chksum; + u16 *uniname = p_uniname->name; + DOS_DENTRY_T *dos_ep; + EXT_DENTRY_T *ext_ep; + + dos_ep = (DOS_DENTRY_T *) get_dentry_in_dir(sb, p_dir, entry, §or); + if (!dos_ep) + return -EIO; + + dos_ep->lcase = p_dosname->name_case; + memcpy(dos_ep->name, p_dosname->name, DOS_NAME_LENGTH); + if (dcache_modify(sb, sector)) + return -EIO; + + if ((--num_entries) > 0) { + chksum = calc_chksum_1byte((void *) dos_ep->name, DOS_NAME_LENGTH, 0); + + for (i = 1; i < num_entries; i++) { + ext_ep = (EXT_DENTRY_T *) get_dentry_in_dir(sb, p_dir, entry-i, §or); + if (!ext_ep) + return -EIO; + + __init_ext_entry(ext_ep, i, chksum, uniname); + if (dcache_modify(sb, sector)) + return -EIO; + uniname += 13; + } + + ext_ep = (EXT_DENTRY_T *) get_dentry_in_dir(sb, p_dir, entry-i, §or); + if (!ext_ep) + return -EIO; + + __init_ext_entry(ext_ep, i+MSDOS_LAST_LFN, chksum, uniname); + if (dcache_modify(sb, sector)) + return -EIO; + } + + return 0; +} /* end of fat_init_ext_entry */ + +static s32 fat_delete_dir_entry(struct super_block *sb, CHAIN_T *p_dir, s32 entry, s32 order, s32 num_entries) +{ + s32 i; + u64 sector; + DENTRY_T *ep; + + for (i = num_entries-1; i >= order; i--) { + ep = get_dentry_in_dir(sb, p_dir, entry-i, §or); + if (!ep) + return -EIO; + + fat_set_entry_type(ep, TYPE_DELETED); + if (dcache_modify(sb, sector)) + return -EIO; + } + + return 0; +} + +/* return values of fat_find_dir_entry() + * >= 0 : return dir entiry position with the name in dir + * -EEXIST : (root dir, ".") it is the root dir itself + * -ENOENT : entry with the name does not exist + * -EIO : I/O error + */ +static inline s32 __get_dentries_per_clu(FS_INFO_T *fsi, s32 clu) +{ + if (IS_CLUS_FREE(clu)) /* FAT16 root_dir */ + return fsi->dentries_in_root; + + return fsi->dentries_per_clu; +} + +static s32 fat_find_dir_entry(struct super_block *sb, FILE_ID_T *fid, + CHAIN_T *p_dir, UNI_NAME_T *p_uniname, s32 num_entries, DOS_NAME_T *p_dosname, u32 type) +{ + s32 i, rewind = 0, dentry = 0, end_eidx = 0; + s32 chksum = 0, lfn_ord = 0, lfn_len = 0; + s32 dentries_per_clu, num_empty = 0; + u32 entry_type; + u16 entry_uniname[14], *uniname = NULL; + CHAIN_T clu; + DENTRY_T *ep; + HINT_T *hint_stat = &fid->hint_stat; + HINT_FEMP_T candi_empty; + FS_INFO_T *fsi = &(SDFAT_SB(sb)->fsi); + + /* + * REMARK: + * DOT and DOTDOT are handled by VFS layer + */ + + dentries_per_clu = __get_dentries_per_clu(fsi, p_dir->dir); + clu.dir = p_dir->dir; + clu.flags = p_dir->flags; + + if (hint_stat->eidx) { + clu.dir = hint_stat->clu; + dentry = hint_stat->eidx; + end_eidx = dentry; + } + + candi_empty.eidx = -1; + + MMSG("lookup dir= %s\n", p_dosname->name); +rewind: + while (!IS_CLUS_EOF(clu.dir)) { + i = dentry % dentries_per_clu; + for (; i < dentries_per_clu; i++, dentry++) { + if (rewind && (dentry == end_eidx)) + goto not_found; + + ep = get_dentry_in_dir(sb, &clu, i, NULL); + if (!ep) + return -EIO; + + entry_type = fat_get_entry_type(ep); + + /* + * Most directory entries have long name, + * So, we check extend directory entry first. + */ + if (entry_type == TYPE_EXTEND) { + EXT_DENTRY_T *ext_ep = (EXT_DENTRY_T *)ep; + u32 cur_ord = (u32)ext_ep->order; + u32 cur_chksum = (s32)ext_ep->checksum; + s32 len = 13; + u16 unichar; + + num_empty = 0; + candi_empty.eidx = -1; + + /* check whether new lfn or not */ + if (cur_ord & MSDOS_LAST_LFN) { + cur_ord &= ~(MSDOS_LAST_LFN); + chksum = cur_chksum; + len = (13 * (cur_ord-1)); + uniname = (p_uniname->name + len); + lfn_ord = cur_ord + 1; + lfn_len = 0; + + /* check minimum name length */ + if (cur_ord && + (len > p_uniname->name_len)) { + /* MISMATCHED NAME LENGTH */ + lfn_len = -1; + } + len = 0; + } + + /* invalid lfn order */ + if (!cur_ord || (cur_ord > MAX_LFN_ORDER) || + ((cur_ord + 1) != lfn_ord)) + goto reset_dentry_set; + + /* check checksum of directory entry set */ + if (cur_chksum != chksum) + goto reset_dentry_set; + + /* update order for next dentry */ + lfn_ord = cur_ord; + + /* check whether mismatched lfn or not */ + if (lfn_len == -1) { + /* MISMATCHED LFN DENTRY SET */ + continue; + } + + if (!uniname) { + sdfat_fs_error(sb, + "%s : abnormal dentry " + "(start_clu[%u], " + "idx[%u])", __func__, + p_dir->dir, dentry); + sdfat_debug_bug_on(1); + return -EIO; + } + + /* update position of name buffer */ + uniname -= len; + + /* get utf16 characters saved on this entry */ + len = __extract_uni_name_from_ext_entry(ext_ep, entry_uniname, lfn_ord); + + /* replace last char to null */ + unichar = *(uniname+len); + *(uniname+len) = (u16)0x0; + + /* uniname ext_dentry unit compare repeatdly */ + if (nls_cmp_uniname(sb, uniname, entry_uniname)) { + /* DO HANDLE WRONG NAME */ + lfn_len = -1; + } else { + /* add matched chars length */ + lfn_len += len; + } + + /* restore previous character */ + *(uniname+len) = unichar; + + /* jump to check next dentry */ + continue; + + } else if ((entry_type == TYPE_FILE) || (entry_type == TYPE_DIR)) { + DOS_DENTRY_T *dos_ep = (DOS_DENTRY_T *)ep; + u32 cur_chksum = (s32)calc_chksum_1byte( + (void *) dos_ep->name, + DOS_NAME_LENGTH, 0); + + num_empty = 0; + candi_empty.eidx = -1; + + MMSG("checking dir= %c%c%c%c%c%c%c%c%c%c%c\n", + dos_ep->name[0], dos_ep->name[1], + dos_ep->name[2], dos_ep->name[3], + dos_ep->name[4], dos_ep->name[5], + dos_ep->name[6], dos_ep->name[7], + dos_ep->name[8], dos_ep->name[9], + dos_ep->name[10]); + + /* + * if there is no valid long filename, + * we should check short filename. + */ + if (!lfn_len || (cur_chksum != chksum)) { + /* check shortname */ + if ((p_dosname->name[0] != '\0') && + !nls_cmp_sfn(sb, + p_dosname->name, + dos_ep->name)) { + goto found; + } + /* check name length */ + } else if ((lfn_len > 0) && + ((s32)p_uniname->name_len == + lfn_len)) { + goto found; + } + + /* DO HANDLE MISMATCHED SFN, FALL THROUGH */ + } else if ((entry_type == TYPE_UNUSED) || (entry_type == TYPE_DELETED)) { + num_empty++; + if (candi_empty.eidx == -1) { + if (num_empty == 1) { + candi_empty.cur.dir = clu.dir; + candi_empty.cur.size = clu.size; + candi_empty.cur.flags = clu.flags; + } + + if (num_empty >= num_entries) { + candi_empty.eidx = dentry - (num_empty - 1); + ASSERT(0 <= candi_empty.eidx); + candi_empty.count = num_empty; + + if ((fid->hint_femp.eidx == -1) || + (candi_empty.eidx <= fid->hint_femp.eidx)) { + memcpy(&fid->hint_femp, + &candi_empty, + sizeof(HINT_FEMP_T)); + } + } + } + + if (entry_type == TYPE_UNUSED) + goto not_found; + /* FALL THROUGH */ + } +reset_dentry_set: + /* TYPE_DELETED, TYPE_VOLUME OR MISMATCHED SFN */ + lfn_ord = 0; + lfn_len = 0; + chksum = 0; + } + + if (IS_CLUS_FREE(p_dir->dir)) + break; /* FAT16 root_dir */ + + if (get_next_clus_safe(sb, &clu.dir)) + return -EIO; + } + +not_found: + /* we started at not 0 index,so we should try to find target + * from 0 index to the index we started at. + */ + if (!rewind && end_eidx) { + rewind = 1; + dentry = 0; + clu.dir = p_dir->dir; + /* reset dentry set */ + lfn_ord = 0; + lfn_len = 0; + chksum = 0; + /* reset empty hint_*/ + num_empty = 0; + candi_empty.eidx = -1; + goto rewind; + } + + /* initialized hint_stat */ + hint_stat->clu = p_dir->dir; + hint_stat->eidx = 0; + return -ENOENT; + +found: + /* next dentry we'll find is out of this cluster */ + if (!((dentry + 1) % dentries_per_clu)) { + int ret = 0; + /* FAT16 root_dir */ + if (IS_CLUS_FREE(p_dir->dir)) + clu.dir = CLUS_EOF; + else + ret = get_next_clus_safe(sb, &clu.dir); + + if (ret || IS_CLUS_EOF(clu.dir)) { + /* just initialized hint_stat */ + hint_stat->clu = p_dir->dir; + hint_stat->eidx = 0; + return dentry; + } + } + + hint_stat->clu = clu.dir; + hint_stat->eidx = dentry + 1; + return dentry; +} /* end of fat_find_dir_entry */ + +/* returns -EIO on error */ +static s32 fat_count_ext_entries(struct super_block *sb, CHAIN_T *p_dir, s32 entry, DENTRY_T *p_entry) +{ + s32 count = 0; + u8 chksum; + DOS_DENTRY_T *dos_ep = (DOS_DENTRY_T *) p_entry; + EXT_DENTRY_T *ext_ep; + + chksum = calc_chksum_1byte((void *) dos_ep->name, DOS_NAME_LENGTH, 0); + + for (entry--; entry >= 0; entry--) { + ext_ep = (EXT_DENTRY_T *)get_dentry_in_dir(sb, p_dir, entry, NULL); + if (!ext_ep) + return -EIO; + + if ((fat_get_entry_type((DENTRY_T *)ext_ep) == TYPE_EXTEND) && + (ext_ep->checksum == chksum)) { + count++; + if (ext_ep->order > MSDOS_LAST_LFN) + return count; + } else { + return count; + } + } + + return count; +} + + +/* + * Name Conversion Functions + */ +static s32 __extract_uni_name_from_ext_entry(EXT_DENTRY_T *ep, u16 *uniname, s32 order) +{ + s32 i, len = 0; + + for (i = 0; i < 5; i++) { + *uniname = get_unaligned_le16(&(ep->unicode_0_4[i<<1])); + if (*uniname == 0x0) + return len; + uniname++; + len++; + } + + if (order < 20) { + for (i = 0; i < 6; i++) { + /* FIXME : unaligned? */ + *uniname = le16_to_cpu(ep->unicode_5_10[i]); + if (*uniname == 0x0) + return len; + uniname++; + len++; + } + } else { + for (i = 0; i < 4; i++) { + /* FIXME : unaligned? */ + *uniname = le16_to_cpu(ep->unicode_5_10[i]); + if (*uniname == 0x0) + return len; + uniname++; + len++; + } + *uniname = 0x0; /* uniname[MAX_NAME_LENGTH] */ + return len; + } + + for (i = 0; i < 2; i++) { + /* FIXME : unaligned? */ + *uniname = le16_to_cpu(ep->unicode_11_12[i]); + if (*uniname == 0x0) + return len; + uniname++; + len++; + } + + *uniname = 0x0; + return len; + +} /* end of __extract_uni_name_from_ext_entry */ + +static void fat_get_uniname_from_ext_entry(struct super_block *sb, CHAIN_T *p_dir, s32 entry, u16 *uniname) +{ + u32 i; + u16 *name = uniname; + u32 chksum; + + DOS_DENTRY_T *dos_ep = + (DOS_DENTRY_T *)get_dentry_in_dir(sb, p_dir, entry, NULL); + + if (unlikely(!dos_ep)) + goto invalid_lfn; + + chksum = (u32)calc_chksum_1byte( + (void *) dos_ep->name, + DOS_NAME_LENGTH, 0); + + for (entry--, i = 1; entry >= 0; entry--, i++) { + EXT_DENTRY_T *ep; + + ep = (EXT_DENTRY_T *)get_dentry_in_dir(sb, p_dir, entry, NULL); + if (!ep) + goto invalid_lfn; + + if (fat_get_entry_type((DENTRY_T *) ep) != TYPE_EXTEND) + goto invalid_lfn; + + if (chksum != (u32)ep->checksum) + goto invalid_lfn; + + if (i != (u32)(ep->order & ~(MSDOS_LAST_LFN))) + goto invalid_lfn; + + __extract_uni_name_from_ext_entry(ep, name, (s32)i); + if (ep->order & MSDOS_LAST_LFN) + return; + + name += 13; + } +invalid_lfn: + *uniname = (u16)0x0; +} /* end of fat_get_uniname_from_ext_entry */ + +/* Find if the shortname exists + * and check if there are free entries + */ +static s32 __fat_find_shortname_entry(struct super_block *sb, CHAIN_T *p_dir, + u8 *p_dosname, s32 *offset, __attribute__((unused))int n_entry_needed) +{ + u32 type; + s32 i, dentry = 0; + s32 dentries_per_clu; + DENTRY_T *ep = NULL; + DOS_DENTRY_T *dos_ep = NULL; + CHAIN_T clu = *p_dir; + FS_INFO_T *fsi = &(SDFAT_SB(sb)->fsi); + + if (offset) + *offset = -1; + + if (IS_CLUS_FREE(clu.dir)) /* FAT16 root_dir */ + dentries_per_clu = fsi->dentries_in_root; + else + dentries_per_clu = fsi->dentries_per_clu; + + while (!IS_CLUS_EOF(clu.dir)) { + for (i = 0; i < dentries_per_clu; i++, dentry++) { + ep = get_dentry_in_dir(sb, &clu, i, NULL); + if (!ep) + return -EIO; + + type = fat_get_entry_type(ep); + + if ((type == TYPE_FILE) || (type == TYPE_DIR)) { + dos_ep = (DOS_DENTRY_T *)ep; + if (!nls_cmp_sfn(sb, p_dosname, dos_ep->name)) { + if (offset) + *offset = dentry; + return 0; + } + } + } + + /* fat12/16 root dir */ + if (IS_CLUS_FREE(clu.dir)) + break; + + if (get_next_clus_safe(sb, &clu.dir)) + return -EIO; + } + return -ENOENT; +} + +#ifdef CONFIG_SDFAT_FAT32_SHORTNAME_SEQ +static void __fat_attach_count_to_dos_name(u8 *dosname, s32 count) +{ + s32 i, j, length; + s8 str_count[6]; + + snprintf(str_count, sizeof(str_count), "~%d", count); + length = strlen(str_count); + + i = j = 0; + while (j <= (8 - length)) { + i = j; + if (dosname[j] == ' ') + break; + if (dosname[j] & 0x80) + j += 2; + else + j++; + } + + for (j = 0; j < length; i++, j++) + dosname[i] = (u8) str_count[j]; + + if (i == 7) + dosname[7] = ' '; + +} /* end of __fat_attach_count_to_dos_name */ +#endif + +s32 fat_generate_dos_name_new(struct super_block *sb, CHAIN_T *p_dir, DOS_NAME_T *p_dosname, s32 n_entry_needed) +{ + s32 i; + s32 baselen, err; + u8 work[DOS_NAME_LENGTH], buf[5]; + u8 tail; + + baselen = 8; + memset(work, ' ', DOS_NAME_LENGTH); + memcpy(work, p_dosname->name, DOS_NAME_LENGTH); + + while (baselen && (work[--baselen] == ' ')) { + /* DO NOTHING, JUST FOR CHECK_PATCH */ + } + + if (baselen > 6) + baselen = 6; + + BUG_ON(baselen < 0); + +#ifdef CONFIG_SDFAT_FAT32_SHORTNAME_SEQ + /* example) namei_exfat.c -> NAMEI_~1 - NAMEI_~9 */ + work[baselen] = '~'; + for (i = 1; i < 10; i++) { + // '0' + i = 1 ~ 9 ASCII + work[baselen + 1] = '0' + i; + err = __fat_find_shortname_entry(sb, p_dir, work, NULL, n_entry_needed); + if (err == -ENOENT) { + /* void return */ + __fat_attach_count_to_dos_name(p_dosname->name, i); + return 0; + } + + /* any other error */ + if (err) + return err; + } +#endif + + i = jiffies; + tail = (jiffies >> 16) & 0x7; + + if (baselen > 2) + baselen = 2; + + BUG_ON(baselen < 0); + + work[baselen + 4] = '~'; + // 1 ~ 8 ASCII + work[baselen + 5] = '1' + tail; + while (1) { + snprintf(buf, sizeof(buf), "%04X", i & 0xffff); + memcpy(&work[baselen], buf, 4); + err = __fat_find_shortname_entry(sb, p_dir, work, NULL, n_entry_needed); + if (err == -ENOENT) { + memcpy(p_dosname->name, work, DOS_NAME_LENGTH); + break; + } + + /* any other error */ + if (err) + return err; + + i -= 11; + } + return 0; +} /* end of generate_dos_name_new */ + +static s32 fat_calc_num_entries(UNI_NAME_T *p_uniname) +{ + s32 len; + + len = p_uniname->name_len; + if (len == 0) + return 0; + + /* 1 dos name entry + extended entries */ + return((len-1) / 13 + 2); + +} /* end of calc_num_enties */ + +static s32 fat_check_max_dentries(FILE_ID_T *fid) +{ + if ((fid->size >> DENTRY_SIZE_BITS) >= MAX_FAT_DENTRIES) { + /* FAT spec allows a dir to grow upto 65536 dentries */ + return -ENOSPC; + } + return 0; +} /* end of check_max_dentries */ + + +/* + * File Operation Functions + */ +static FS_FUNC_T fat_fs_func = { + .alloc_cluster = fat_alloc_cluster, + .free_cluster = fat_free_cluster, + .count_used_clusters = fat_count_used_clusters, + + .init_dir_entry = fat_init_dir_entry, + .init_ext_entry = fat_init_ext_entry, + .find_dir_entry = fat_find_dir_entry, + .delete_dir_entry = fat_delete_dir_entry, + .get_uniname_from_ext_entry = fat_get_uniname_from_ext_entry, + .count_ext_entries = fat_count_ext_entries, + .calc_num_entries = fat_calc_num_entries, + .check_max_dentries = fat_check_max_dentries, + + .get_entry_type = fat_get_entry_type, + .set_entry_type = fat_set_entry_type, + .get_entry_attr = fat_get_entry_attr, + .set_entry_attr = fat_set_entry_attr, + .get_entry_flag = fat_get_entry_flag, + .set_entry_flag = fat_set_entry_flag, + .get_entry_clu0 = fat_get_entry_clu0, + .set_entry_clu0 = fat_set_entry_clu0, + .get_entry_size = fat_get_entry_size, + .set_entry_size = fat_set_entry_size, + .get_entry_time = fat_get_entry_time, + .set_entry_time = fat_set_entry_time, +}; + +static FS_FUNC_T amap_fat_fs_func = { + .alloc_cluster = amap_fat_alloc_cluster, + .free_cluster = fat_free_cluster, + .count_used_clusters = fat_count_used_clusters, + + .init_dir_entry = fat_init_dir_entry, + .init_ext_entry = fat_init_ext_entry, + .find_dir_entry = fat_find_dir_entry, + .delete_dir_entry = fat_delete_dir_entry, + .get_uniname_from_ext_entry = fat_get_uniname_from_ext_entry, + .count_ext_entries = fat_count_ext_entries, + .calc_num_entries = fat_calc_num_entries, + .check_max_dentries = fat_check_max_dentries, + + .get_entry_type = fat_get_entry_type, + .set_entry_type = fat_set_entry_type, + .get_entry_attr = fat_get_entry_attr, + .set_entry_attr = fat_set_entry_attr, + .get_entry_flag = fat_get_entry_flag, + .set_entry_flag = fat_set_entry_flag, + .get_entry_clu0 = fat_get_entry_clu0, + .set_entry_clu0 = fat_set_entry_clu0, + .get_entry_size = fat_get_entry_size, + .set_entry_size = fat_set_entry_size, + .get_entry_time = fat_get_entry_time, + .set_entry_time = fat_set_entry_time, + + .get_au_stat = amap_get_au_stat, +}; + +s32 mount_fat16(struct super_block *sb, pbr_t *p_pbr) +{ + s32 num_root_sectors; + bpb16_t *p_bpb = &(p_pbr->bpb.f16); + FS_INFO_T *fsi = &(SDFAT_SB(sb)->fsi); + + if (!p_bpb->num_fats) { + sdfat_msg(sb, KERN_ERR, "bogus number of FAT structure"); + return -EINVAL; + } + + num_root_sectors = get_unaligned_le16(p_bpb->num_root_entries) << DENTRY_SIZE_BITS; + num_root_sectors = ((num_root_sectors-1) >> sb->s_blocksize_bits) + 1; + + fsi->sect_per_clus = p_bpb->sect_per_clus; + fsi->sect_per_clus_bits = ilog2(p_bpb->sect_per_clus); + fsi->cluster_size_bits = fsi->sect_per_clus_bits + sb->s_blocksize_bits; + fsi->cluster_size = 1 << fsi->cluster_size_bits; + + fsi->num_FAT_sectors = le16_to_cpu(p_bpb->num_fat_sectors); + + fsi->FAT1_start_sector = le16_to_cpu(p_bpb->num_reserved); + if (p_bpb->num_fats == 1) + fsi->FAT2_start_sector = fsi->FAT1_start_sector; + else + fsi->FAT2_start_sector = fsi->FAT1_start_sector + fsi->num_FAT_sectors; + + fsi->root_start_sector = fsi->FAT2_start_sector + fsi->num_FAT_sectors; + fsi->data_start_sector = fsi->root_start_sector + num_root_sectors; + + fsi->num_sectors = get_unaligned_le16(p_bpb->num_sectors); + if (!fsi->num_sectors) + fsi->num_sectors = le32_to_cpu(p_bpb->num_huge_sectors); + + if (!fsi->num_sectors) { + sdfat_msg(sb, KERN_ERR, "bogus number of total sector count"); + return -EINVAL; + } + + fsi->num_clusters = (u32)((fsi->num_sectors - fsi->data_start_sector) >> fsi->sect_per_clus_bits) + CLUS_BASE; + /* because the cluster index starts with 2 */ + + fsi->vol_type = FAT16; + if (fsi->num_clusters < FAT12_THRESHOLD) + fsi->vol_type = FAT12; + + fsi->vol_id = get_unaligned_le32(p_bpb->vol_serial); + + fsi->root_dir = 0; + fsi->dentries_in_root = get_unaligned_le16(p_bpb->num_root_entries); + if (!fsi->dentries_in_root) { + sdfat_msg(sb, KERN_ERR, "bogus number of max dentry count " + "of the root directory"); + return -EINVAL; + } + + fsi->dentries_per_clu = 1 << (fsi->cluster_size_bits - DENTRY_SIZE_BITS); + + fsi->vol_flag = VOL_CLEAN; + fsi->clu_srch_ptr = 2; + fsi->used_clusters = (u32) ~0; + + fsi->fs_func = &fat_fs_func; + fat_ent_ops_init(sb); + + if (p_bpb->state & FAT_VOL_DIRTY) { + fsi->vol_flag |= VOL_DIRTY; + sdfat_log_msg(sb, KERN_WARNING, "Volume was not properly " + "unmounted. Some data may be corrupt. " + "Please run fsck."); + } + + return 0; +} /* end of mount_fat16 */ + +static sector_t __calc_hidden_sect(struct super_block *sb) +{ + struct block_device *bdev = sb->s_bdev; + sector_t hidden = 0; + + if (!bdev) + goto out; + + hidden = bdev->bd_part->start_sect; + /* a disk device, not a partition */ + if (!hidden) { + if (bdev != bdev->bd_contains) + sdfat_log_msg(sb, KERN_WARNING, + "hidden(0), but disk has a partition table"); + goto out; + } + + if (sb->s_blocksize_bits != 9) { + ASSERT(sb->s_blocksize_bits > 9); + hidden >>= (sb->s_blocksize_bits - 9); + } + +out: + sdfat_log_msg(sb, KERN_INFO, "start_sect of part(%d) : %lld", + bdev ? bdev->bd_part->partno : -1, (s64)hidden); + return hidden; + +} + +s32 mount_fat32(struct super_block *sb, pbr_t *p_pbr) +{ + pbr32_t *p_bpb = (pbr32_t *)p_pbr; + FS_INFO_T *fsi = &(SDFAT_SB(sb)->fsi); + + if (!p_bpb->bpb.num_fats) { + sdfat_msg(sb, KERN_ERR, "bogus number of FAT structure"); + return -EINVAL; + } + + fsi->sect_per_clus = p_bpb->bpb.sect_per_clus; + fsi->sect_per_clus_bits = ilog2(p_bpb->bpb.sect_per_clus); + fsi->cluster_size_bits = fsi->sect_per_clus_bits + sb->s_blocksize_bits; + fsi->cluster_size = 1 << fsi->cluster_size_bits; + + fsi->num_FAT_sectors = le32_to_cpu(p_bpb->bpb.num_fat32_sectors); + + fsi->FAT1_start_sector = le16_to_cpu(p_bpb->bpb.num_reserved); + if (p_bpb->bpb.num_fats == 1) + fsi->FAT2_start_sector = fsi->FAT1_start_sector; + else + fsi->FAT2_start_sector = fsi->FAT1_start_sector + fsi->num_FAT_sectors; + + fsi->root_start_sector = fsi->FAT2_start_sector + fsi->num_FAT_sectors; + fsi->data_start_sector = fsi->root_start_sector; + + /* SPEC violation for compatibility */ + fsi->num_sectors = get_unaligned_le16(p_bpb->bpb.num_sectors); + if (!fsi->num_sectors) + fsi->num_sectors = le32_to_cpu(p_bpb->bpb.num_huge_sectors); + + /* 2nd check */ + if (!fsi->num_sectors) { + sdfat_msg(sb, KERN_ERR, "bogus number of total sector count"); + return -EINVAL; + } + + fsi->num_clusters = (u32)((fsi->num_sectors - fsi->data_start_sector) >> fsi->sect_per_clus_bits) + CLUS_BASE; + /* because the cluster index starts with 2 */ + + fsi->vol_type = FAT32; + fsi->vol_id = get_unaligned_le32(p_bpb->bsx.vol_serial); + + fsi->root_dir = le32_to_cpu(p_bpb->bpb.root_cluster); + fsi->dentries_in_root = 0; + fsi->dentries_per_clu = 1 << (fsi->cluster_size_bits - DENTRY_SIZE_BITS); + + fsi->vol_flag = VOL_CLEAN; + fsi->clu_srch_ptr = 2; + fsi->used_clusters = (u32) ~0; + + fsi->fs_func = &fat_fs_func; + + /* Delayed / smart allocation related init */ + fsi->reserved_clusters = 0; + + /* Should be initialized before calling amap_create() */ + fat_ent_ops_init(sb); + + /* AU Map Creation */ + if (SDFAT_SB(sb)->options.improved_allocation & SDFAT_ALLOC_SMART) { + u32 hidden_sectors = le32_to_cpu(p_bpb->bpb.num_hid_sectors); + u32 calc_hid_sect = 0; + int ret; + + + /* calculate hidden sector size */ + calc_hid_sect = __calc_hidden_sect(sb); + if (calc_hid_sect != hidden_sectors) { + sdfat_log_msg(sb, KERN_WARNING, "abnormal hidden " + "sector : bpb(%u) != ondisk(%u)", + hidden_sectors, calc_hid_sect); + if (SDFAT_SB(sb)->options.adj_hidsect) { + sdfat_log_msg(sb, KERN_INFO, + "adjustment hidden sector : " + "bpb(%u) -> ondisk(%u)", + hidden_sectors, calc_hid_sect); + hidden_sectors = calc_hid_sect; + } + } + + SDFAT_SB(sb)->options.amap_opt.misaligned_sect = hidden_sectors; + + /* calculate AU size if it's not set */ + if (!SDFAT_SB(sb)->options.amap_opt.sect_per_au) { + SDFAT_SB(sb)->options.amap_opt.sect_per_au = + __calc_default_au_size(sb); + } + + ret = amap_create(sb, + SDFAT_SB(sb)->options.amap_opt.pack_ratio, + SDFAT_SB(sb)->options.amap_opt.sect_per_au, + SDFAT_SB(sb)->options.amap_opt.misaligned_sect); + if (ret) { + sdfat_log_msg(sb, KERN_WARNING, "failed to create AMAP." + " disabling smart allocation. (err:%d)", ret); + SDFAT_SB(sb)->options.improved_allocation &= ~(SDFAT_ALLOC_SMART); + } else { + fsi->fs_func = &amap_fat_fs_func; + } + } + + /* Check dependency of mount options */ + if (SDFAT_SB(sb)->options.improved_allocation != + (SDFAT_ALLOC_DELAY | SDFAT_ALLOC_SMART)) { + sdfat_log_msg(sb, KERN_INFO, "disabling defragmentation because" + " smart, delay options are disabled"); + SDFAT_SB(sb)->options.defrag = 0; + } + + if (p_bpb->bsx.state & FAT_VOL_DIRTY) { + fsi->vol_flag |= VOL_DIRTY; + sdfat_log_msg(sb, KERN_WARNING, "Volume was not properly " + "unmounted. Some data may be corrupt. " + "Please run fsck."); + } + + return 0; +} /* end of mount_fat32 */ + +/* end of core_fat.c */ diff --git a/fs/sdfat/dfr.c b/fs/sdfat/dfr.c new file mode 100644 index 000000000000..abb4710b4340 --- /dev/null +++ b/fs/sdfat/dfr.c @@ -0,0 +1,1372 @@ +/* + * Copyright (C) 2012-2013 Samsung Electronics Co., Ltd. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version 2 + * of the License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, see <http://www.gnu.org/licenses/>. + */ + +/************************************************************************/ +/* */ +/* @PROJECT : exFAT & FAT12/16/32 File System */ +/* @FILE : dfr.c */ +/* @PURPOSE : Defragmentation support for SDFAT32 */ +/* */ +/*----------------------------------------------------------------------*/ +/* NOTES */ +/* */ +/* */ +/************************************************************************/ + +#include <linux/version.h> +#include <linux/list.h> +#include <linux/blkdev.h> + +#include "sdfat.h" +#include "core.h" +#include "amap_smart.h" + +#ifdef CONFIG_SDFAT_DFR +/** + * @fn defrag_get_info + * @brief get HW params for defrag daemon + * @return 0 on success, -errno otherwise + * @param sb super block + * @param arg defrag info arguments + * @remark protected by super_block + */ +int +defrag_get_info( + IN struct super_block *sb, + OUT struct defrag_info_arg *arg) +{ + FS_INFO_T *fsi = &(SDFAT_SB(sb)->fsi); + AMAP_T *amap = SDFAT_SB(sb)->fsi.amap; + + if (!arg) + return -EINVAL; + + arg->sec_sz = sb->s_blocksize; + arg->clus_sz = fsi->cluster_size; + arg->total_sec = fsi->num_sectors; + arg->fat_offset_sec = fsi->FAT1_start_sector; + arg->fat_sz_sec = fsi->num_FAT_sectors; + arg->n_fat = (fsi->FAT1_start_sector == fsi->FAT2_start_sector) ? 1 : 2; + + arg->sec_per_au = amap->option.au_size; + arg->hidden_sectors = amap->option.au_align_factor % amap->option.au_size; + + return 0; +} + + +static int +__defrag_scan_dir( + IN struct super_block *sb, + IN DOS_DENTRY_T *dos_ep, + IN loff_t i_pos, + OUT struct defrag_trav_arg *arg) +{ + FS_INFO_T *fsi = NULL; + UNI_NAME_T uniname; + unsigned int type = 0, start_clus = 0; + int err = -EPERM; + + /* Check params */ + ERR_HANDLE2((!sb || !dos_ep || !i_pos || !arg), err, -EINVAL); + fsi = &(SDFAT_SB(sb)->fsi); + + /* Get given entry's type */ + type = fsi->fs_func->get_entry_type((DENTRY_T *) dos_ep); + + /* Check dos_ep */ + if (!strncmp(dos_ep->name, DOS_CUR_DIR_NAME, DOS_NAME_LENGTH)) { + ; + } else if (!strncmp(dos_ep->name, DOS_PAR_DIR_NAME, DOS_NAME_LENGTH)) { + ; + } else if ((type == TYPE_DIR) || (type == TYPE_FILE)) { + + /* Set start_clus */ + SET32_HI(start_clus, le16_to_cpu(dos_ep->start_clu_hi)); + SET32_LO(start_clus, le16_to_cpu(dos_ep->start_clu_lo)); + arg->start_clus = start_clus; + + /* Set type & i_pos */ + if (type == TYPE_DIR) + arg->type = DFR_TRAV_TYPE_DIR; + else + arg->type = DFR_TRAV_TYPE_FILE; + + arg->i_pos = i_pos; + + /* Set name */ + memset(&uniname, 0, sizeof(UNI_NAME_T)); + get_uniname_from_dos_entry(sb, dos_ep, &uniname, 0x1); + /* FIXME : + * we should think that whether the size of arg->name + * is enough or not + */ + nls_uni16s_to_vfsname(sb, &uniname, + arg->name, sizeof(arg->name)); + + err = 0; + /* End case */ + } else if (type == TYPE_UNUSED) { + err = -ENOENT; + } else { + ; + } + +error: + return err; +} + + +/** + * @fn defrag_scan_dir + * @brief scan given directory + * @return 0 on success, -errno otherwise + * @param sb super block + * @param args traverse args + * @remark protected by inode_lock, super_block and volume lock + */ +int +defrag_scan_dir( + IN struct super_block *sb, + INOUT struct defrag_trav_arg *args) +{ + struct sdfat_sb_info *sbi = NULL; + FS_INFO_T *fsi = NULL; + struct defrag_trav_header *header = NULL; + DOS_DENTRY_T *dos_ep; + CHAIN_T chain; + int dot_found = 0, args_idx = DFR_TRAV_HEADER_IDX + 1, clus = 0, index = 0; + int err = 0, j = 0; + + /* Check params */ + ERR_HANDLE2((!sb || !args), err, -EINVAL); + sbi = SDFAT_SB(sb); + fsi = &(sbi->fsi); + header = (struct defrag_trav_header *) args; + + /* Exceptional case for ROOT */ + if (header->i_pos == DFR_TRAV_ROOT_IPOS) { + header->start_clus = fsi->root_dir; + dfr_debug("IOC_DFR_TRAV for ROOT: start_clus %08x", header->start_clus); + dot_found = 1; + } + + chain.dir = header->start_clus; + chain.size = 0; + chain.flags = 0; + + /* Check if this is directory */ + if (!dot_found) { + FAT32_CHECK_CLUSTER(fsi, chain.dir, err); + ERR_HANDLE(err); + dos_ep = (DOS_DENTRY_T *) get_dentry_in_dir(sb, &chain, 0, NULL); + ERR_HANDLE2(!dos_ep, err, -EIO); + + if (strncmp(dos_ep->name, DOS_CUR_DIR_NAME, DOS_NAME_LENGTH)) { + err = -EINVAL; + dfr_err("Scan: Not a directory, err %d", err); + goto error; + } + } + + /* For more-scan case */ + if ((header->stat == DFR_TRAV_STAT_MORE) && + (header->start_clus == sbi->dfr_hint_clus) && + (sbi->dfr_hint_idx > 0)) { + + index = sbi->dfr_hint_idx; + for (j = 0; j < (sbi->dfr_hint_idx / fsi->dentries_per_clu); j++) { + /* Follow FAT-chain */ + FAT32_CHECK_CLUSTER(fsi, chain.dir, err); + ERR_HANDLE(err); + err = fat_ent_get(sb, chain.dir, &(chain.dir)); + ERR_HANDLE(err); + + if (!IS_CLUS_EOF(chain.dir)) { + clus++; + index -= fsi->dentries_per_clu; + } else { + /** + * This directory modified. Stop scanning. + */ + err = -EINVAL; + dfr_err("Scan: SCAN_MORE failed, err %d", err); + goto error; + } + } + + /* For first-scan case */ + } else { + clus = 0; + index = 0; + } + +scan_fat_chain: + /* Scan given directory and get info of children */ + for ( ; index < fsi->dentries_per_clu; index++) { + DOS_DENTRY_T *dos_ep = NULL; + loff_t i_pos = 0; + + /* Get dos_ep */ + FAT32_CHECK_CLUSTER(fsi, chain.dir, err); + ERR_HANDLE(err); + dos_ep = (DOS_DENTRY_T *) get_dentry_in_dir(sb, &chain, index, NULL); + ERR_HANDLE2(!dos_ep, err, -EIO); + + /* Make i_pos for this entry */ + SET64_HI(i_pos, header->start_clus); + SET64_LO(i_pos, clus * fsi->dentries_per_clu + index); + + err = __defrag_scan_dir(sb, dos_ep, i_pos, &args[args_idx]); + if (!err) { + /* More-scan case */ + if (++args_idx >= (PAGE_SIZE / sizeof(struct defrag_trav_arg))) { + sbi->dfr_hint_clus = header->start_clus; + sbi->dfr_hint_idx = clus * fsi->dentries_per_clu + index + 1; + + header->stat = DFR_TRAV_STAT_MORE; + header->nr_entries = args_idx; + goto error; + } + /* Error case */ + } else if (err == -EINVAL) { + sbi->dfr_hint_clus = sbi->dfr_hint_idx = 0; + dfr_err("Scan: err %d", err); + goto error; + /* End case */ + } else if (err == -ENOENT) { + sbi->dfr_hint_clus = sbi->dfr_hint_idx = 0; + err = 0; + goto done; + } else { + /* DO NOTHING */ + } + err = 0; + } + + /* Follow FAT-chain */ + FAT32_CHECK_CLUSTER(fsi, chain.dir, err); + ERR_HANDLE(err); + err = fat_ent_get(sb, chain.dir, &(chain.dir)); + ERR_HANDLE(err); + + if (!IS_CLUS_EOF(chain.dir)) { + index = 0; + clus++; + goto scan_fat_chain; + } + +done: + /* Update header */ + header->stat = DFR_TRAV_STAT_DONE; + header->nr_entries = args_idx; + +error: + return err; +} + + +static int +__defrag_validate_cluster_prev( + IN struct super_block *sb, + IN struct defrag_chunk_info *chunk) +{ + FS_INFO_T *fsi = &(SDFAT_SB(sb)->fsi); + CHAIN_T dir; + DENTRY_T *ep = NULL; + unsigned int entry = 0, clus = 0; + int err = 0; + + if (chunk->prev_clus == 0) { + /* For the first cluster of a file */ + dir.dir = GET64_HI(chunk->i_pos); + dir.flags = 0x1; // Assume non-continuous + + entry = GET64_LO(chunk->i_pos); + + FAT32_CHECK_CLUSTER(fsi, dir.dir, err); + ERR_HANDLE(err); + ep = get_dentry_in_dir(sb, &dir, entry, NULL); + if (!ep) { + err = -EPERM; + goto error; + } + + /* should call fat_get_entry_clu0(ep) */ + clus = fsi->fs_func->get_entry_clu0(ep); + if (clus != chunk->d_clus) { + err = -ENXIO; + goto error; + } + } else { + /* Normal case */ + FAT32_CHECK_CLUSTER(fsi, chunk->prev_clus, err); + ERR_HANDLE(err); + err = fat_ent_get(sb, chunk->prev_clus, &clus); + if (err) + goto error; + if (chunk->d_clus != clus) + err = -ENXIO; + } + +error: + return err; +} + + +static int +__defrag_validate_cluster_next( + IN struct super_block *sb, + IN struct defrag_chunk_info *chunk) +{ + FS_INFO_T *fsi = &(SDFAT_SB(sb)->fsi); + unsigned int clus = 0; + int err = 0; + + /* Check next_clus */ + FAT32_CHECK_CLUSTER(fsi, (chunk->d_clus + chunk->nr_clus - 1), err); + ERR_HANDLE(err); + err = fat_ent_get(sb, (chunk->d_clus + chunk->nr_clus - 1), &clus); + if (err) + goto error; + if (chunk->next_clus != (clus & FAT32_EOF)) + err = -ENXIO; + +error: + return err; +} + + +/** + * @fn __defrag_check_au + * @brief check if this AU is in use + * @return 0 if idle, 1 if busy + * @param sb super block + * @param clus physical cluster num + * @param limit # of used clusters from daemon + */ +static int +__defrag_check_au( + struct super_block *sb, + u32 clus, + u32 limit) +{ + unsigned int nr_free = amap_get_freeclus(sb, clus); + +#if defined(CONFIG_SDFAT_DFR_DEBUG) && defined(CONFIG_SDFAT_DBG_MSG) + if (nr_free < limit) { + AMAP_T *amap = SDFAT_SB(sb)->fsi.amap; + AU_INFO_T *au = GET_AU(amap, i_AU_of_CLU(amap, clus)); + + dfr_debug("AU[%d] nr_free %d, limit %d", au->idx, nr_free, limit); + } +#endif + return ((nr_free < limit) ? 1 : 0); +} + + +/** + * @fn defrag_validate_cluster + * @brief validate cluster info of given chunk + * @return 0 on success, -errno otherwise + * @param inode inode of given chunk + * @param chunk given chunk + * @param skip_prev flag to skip checking previous cluster info + * @remark protected by super_block and volume lock + */ +int +defrag_validate_cluster( + IN struct inode *inode, + IN struct defrag_chunk_info *chunk, + IN int skip_prev) +{ + struct super_block *sb = inode->i_sb; + FILE_ID_T *fid = &(SDFAT_I(inode)->fid); + unsigned int clus = 0; + int err = 0, i = 0; + + /* If this inode is unlink-ed, skip it */ + if (fid->dir.dir == DIR_DELETED) + return -ENOENT; + + /* Skip working-AU */ + err = amap_check_working(sb, chunk->d_clus); + if (err) + return -EBUSY; + + /* Check # of free_clus of belonged AU */ + err = __defrag_check_au(inode->i_sb, chunk->d_clus, CLUS_PER_AU(sb) - chunk->au_clus); + if (err) + return -EINVAL; + + /* Check chunk's clusters */ + for (i = 0; i < chunk->nr_clus; i++) { + err = fsapi_map_clus(inode, chunk->f_clus + i, &clus, ALLOC_NOWHERE); + if (err || (chunk->d_clus + i != clus)) { + if (!err) + err = -ENXIO; + goto error; + } + } + + /* Check next_clus */ + err = __defrag_validate_cluster_next(sb, chunk); + ERR_HANDLE(err); + + if (!skip_prev) { + /* Check prev_clus */ + err = __defrag_validate_cluster_prev(sb, chunk); + ERR_HANDLE(err); + } + +error: + return err; +} + + +/** + * @fn defrag_reserve_clusters + * @brief reserve clusters for defrag + * @return 0 on success, -errno otherwise + * @param sb super block + * @param nr_clus # of clusters to reserve + * @remark protected by super_block and volume lock + */ +int +defrag_reserve_clusters( + INOUT struct super_block *sb, + IN int nr_clus) +{ + struct sdfat_sb_info *sbi = SDFAT_SB(sb); + FS_INFO_T *fsi = &(sbi->fsi); + + if (!(sbi->options.improved_allocation & SDFAT_ALLOC_DELAY)) + /* Nothing to do */ + return 0; + + /* Check error case */ + if (fsi->used_clusters + fsi->reserved_clusters + nr_clus >= fsi->num_clusters - 2) { + return -ENOSPC; + } else if (fsi->reserved_clusters + nr_clus < 0) { + dfr_err("Reserve count: reserved_clusters %d, nr_clus %d", + fsi->reserved_clusters, nr_clus); + BUG_ON(fsi->reserved_clusters + nr_clus < 0); + } + + sbi->dfr_reserved_clus += nr_clus; + fsi->reserved_clusters += nr_clus; + + return 0; +} + + +/** + * @fn defrag_mark_ignore + * @brief mark corresponding AU to be ignored + * @return 0 on success, -errno otherwise + * @param sb super block + * @param clus given cluster num + * @remark protected by super_block + */ +int +defrag_mark_ignore( + INOUT struct super_block *sb, + IN unsigned int clus) +{ + int err = 0; + + if (SDFAT_SB(sb)->options.improved_allocation & SDFAT_ALLOC_SMART) + err = amap_mark_ignore(sb, clus); + + if (err) + dfr_debug("err %d", err); + return err; +} + + +/** + * @fn defrag_unmark_ignore_all + * @brief unmark all ignored AUs + * @return void + * @param sb super block + * @remark protected by super_block + */ +void +defrag_unmark_ignore_all(struct super_block *sb) +{ + if (SDFAT_SB(sb)->options.improved_allocation & SDFAT_ALLOC_SMART) + amap_unmark_ignore_all(sb); +} + + +/** + * @fn defrag_map_cluster + * @brief get_block function for defrag dests + * @return 0 on success, -errno otherwise + * @param inode inode + * @param clu_offset logical cluster offset + * @param clu mapped cluster (physical) + * @remark protected by super_block and volume lock + */ +int +defrag_map_cluster( + struct inode *inode, + unsigned int clu_offset, + unsigned int *clu) +{ + struct super_block *sb = inode->i_sb; + struct sdfat_sb_info *sbi = SDFAT_SB(sb); + FS_INFO_T *fsi = &(SDFAT_SB(sb)->fsi); +#ifdef CONFIG_SDFAT_DFR_PACKING + AMAP_T *amap = SDFAT_SB(sb)->fsi.amap; +#endif + FILE_ID_T *fid = &(SDFAT_I(inode)->fid); + struct defrag_info *ino_dfr = &(SDFAT_I(inode)->dfr_info); + struct defrag_chunk_info *chunk = NULL; + CHAIN_T new_clu; + int i = 0, nr_new = 0, err = 0; + + /* Get corresponding chunk */ + for (i = 0; i < ino_dfr->nr_chunks; i++) { + chunk = &(ino_dfr->chunks[i]); + + if ((chunk->f_clus <= clu_offset) && (clu_offset < chunk->f_clus + chunk->nr_clus)) { + /* For already allocated new_clus */ + if (sbi->dfr_new_clus[chunk->new_idx + clu_offset - chunk->f_clus]) { + *clu = sbi->dfr_new_clus[chunk->new_idx + clu_offset - chunk->f_clus]; + return 0; + } + break; + } + } + BUG_ON(!chunk); + + fscore_set_vol_flags(sb, VOL_DIRTY, 0); + + new_clu.dir = CLUS_EOF; + new_clu.size = 0; + new_clu.flags = fid->flags; + + /* Allocate new cluster */ +#ifdef CONFIG_SDFAT_DFR_PACKING + if (amap->n_clean_au * DFR_FULL_RATIO <= amap->n_au * DFR_DEFAULT_PACKING_RATIO) + err = fsi->fs_func->alloc_cluster(sb, 1, &new_clu, ALLOC_COLD_PACKING); + else + err = fsi->fs_func->alloc_cluster(sb, 1, &new_clu, ALLOC_COLD_ALIGNED); +#else + err = fsi->fs_func->alloc_cluster(sb, 1, &new_clu, ALLOC_COLD_ALIGNED); +#endif + + if (err) { + dfr_err("Map: 1 %d", 0); + return err; + } + + /* Decrease reserved cluster count */ + defrag_reserve_clusters(sb, -1); + + /* Add new_clus info in ino_dfr */ + sbi->dfr_new_clus[chunk->new_idx + clu_offset - chunk->f_clus] = new_clu.dir; + + /* Make FAT-chain for new_clus */ + for (i = 0; i < chunk->nr_clus; i++) { +#if 0 + if (sbi->dfr_new_clus[chunk->new_idx + i]) + nr_new++; + else + break; +#else + if (!sbi->dfr_new_clus[chunk->new_idx + i]) + break; + nr_new++; +#endif + } + if (nr_new == chunk->nr_clus) { + for (i = 0; i < chunk->nr_clus - 1; i++) { + FAT32_CHECK_CLUSTER(fsi, sbi->dfr_new_clus[chunk->new_idx + i], err); + BUG_ON(err); + if (fat_ent_set(sb, + sbi->dfr_new_clus[chunk->new_idx + i], + sbi->dfr_new_clus[chunk->new_idx + i + 1])) + return -EIO; + } + } + + *clu = new_clu.dir; + return 0; +} + + +/** + * @fn defrag_writepage_end_io + * @brief check WB status of requested page + * @return void + * @param page page + */ +void +defrag_writepage_end_io( + INOUT struct page *page) +{ + struct super_block *sb = page->mapping->host->i_sb; + struct sdfat_sb_info *sbi = SDFAT_SB(sb); + struct defrag_info *ino_dfr = &(SDFAT_I(page->mapping->host)->dfr_info); + unsigned int clus_start = 0, clus_end = 0; + int i = 0; + + /* Check if this inode is on defrag */ + if (atomic_read(&ino_dfr->stat) != DFR_INO_STAT_REQ) + return; + + clus_start = page->index / PAGES_PER_CLUS(sb); + clus_end = clus_start + 1; + + /* Check each chunk in given inode */ + for (i = 0; i < ino_dfr->nr_chunks; i++) { + struct defrag_chunk_info *chunk = &(ino_dfr->chunks[i]); + unsigned int chunk_start = 0, chunk_end = 0; + + chunk_start = chunk->f_clus; + chunk_end = chunk->f_clus + chunk->nr_clus; + + if ((clus_start >= chunk_start) && (clus_end <= chunk_end)) { + int off = clus_start - chunk_start; + + clear_bit((page->index & (PAGES_PER_CLUS(sb) - 1)), + (volatile unsigned long *)&(sbi->dfr_page_wb[chunk->new_idx + off])); + } + } +} + + +/** + * @fn __defrag_check_wb + * @brief check if WB for given chunk completed + * @return 0 on success, -errno otherwise + * @param sbi super block info + * @param chunk given chunk + */ +static int +__defrag_check_wb( + IN struct sdfat_sb_info *sbi, + IN struct defrag_chunk_info *chunk) +{ + int err = 0, wb_i = 0, i = 0, nr_new = 0; + + if (!sbi || !chunk) + return -EINVAL; + + /* Check WB complete status first */ + for (wb_i = 0; wb_i < chunk->nr_clus; wb_i++) { + if (atomic_read((atomic_t *)&(sbi->dfr_page_wb[chunk->new_idx + wb_i]))) { + err = -EBUSY; + break; + } + } + + /** + * Check NEW_CLUS status. + * writepage_end_io cannot check whole WB complete status, + * so we need to check NEW_CLUS status. + */ + for (i = 0; i < chunk->nr_clus; i++) + if (sbi->dfr_new_clus[chunk->new_idx + i]) + nr_new++; + + if (nr_new == chunk->nr_clus) { + err = 0; + if ((wb_i != chunk->nr_clus) && (wb_i != chunk->nr_clus - 1)) + dfr_debug("submit_fullpage_bio() called on a page (nr_clus %d, wb_i %d)", + chunk->nr_clus, wb_i); + + BUG_ON(nr_new > chunk->nr_clus); + } else { + dfr_debug("nr_new %d, nr_clus %d", nr_new, chunk->nr_clus); + err = -EBUSY; + } + + /* Update chunk's state */ + if (!err) + chunk->stat |= DFR_CHUNK_STAT_WB; + + return err; +} + + +static void +__defrag_check_fat_old( + IN struct super_block *sb, + IN struct inode *inode, + IN struct defrag_chunk_info *chunk) +{ + FS_INFO_T *fsi = &(SDFAT_SB(sb)->fsi); + unsigned int clus = 0; + int err = 0, idx = 0, max_idx = 0; + + /* Get start_clus */ + clus = SDFAT_I(inode)->fid.start_clu; + + /* Follow FAT-chain */ + #define num_clusters(val) ((val) ? (s32)((val - 1) >> fsi->cluster_size_bits) + 1 : 0) + max_idx = num_clusters(SDFAT_I(inode)->i_size_ondisk); + for (idx = 0; idx < max_idx; idx++) { + + FAT32_CHECK_CLUSTER(fsi, clus, err); + ERR_HANDLE(err); + err = fat_ent_get(sb, clus, &clus); + ERR_HANDLE(err); + + if ((idx < max_idx - 1) && (IS_CLUS_EOF(clus) || IS_CLUS_FREE(clus))) { + dfr_err("FAT: inode %p, max_idx %d, idx %d, clus %08x, " + "f_clus %d, nr_clus %d", inode, max_idx, + idx, clus, chunk->f_clus, chunk->nr_clus); + BUG_ON(idx < max_idx - 1); + goto error; + } + } + +error: + return; +} + + +static void +__defrag_check_fat_new( + IN struct super_block *sb, + IN struct inode *inode, + IN struct defrag_chunk_info *chunk) +{ + struct sdfat_sb_info *sbi = SDFAT_SB(sb); + FS_INFO_T *fsi = &(SDFAT_SB(sb)->fsi); + unsigned int clus = 0; + int i = 0, err = 0; + + /* Check start of FAT-chain */ + if (chunk->prev_clus) { + FAT32_CHECK_CLUSTER(fsi, chunk->prev_clus, err); + BUG_ON(err); + err = fat_ent_get(sb, chunk->prev_clus, &clus); + BUG_ON(err); + } else { + clus = SDFAT_I(inode)->fid.start_clu; + } + if (sbi->dfr_new_clus[chunk->new_idx] != clus) { + dfr_err("FAT: inode %p, start_clus %08x, read_clus %08x", + inode, sbi->dfr_new_clus[chunk->new_idx], clus); + err = EIO; + goto error; + } + + /* Check inside of FAT-chain */ + if (chunk->nr_clus > 1) { + for (i = 0; i < chunk->nr_clus - 1; i++) { + FAT32_CHECK_CLUSTER(fsi, sbi->dfr_new_clus[chunk->new_idx + i], err); + BUG_ON(err); + err = fat_ent_get(sb, sbi->dfr_new_clus[chunk->new_idx + i], &clus); + BUG_ON(err); + if (sbi->dfr_new_clus[chunk->new_idx + i + 1] != clus) { + dfr_err("FAT: inode %p, new_clus %08x, read_clus %08x", + inode, sbi->dfr_new_clus[chunk->new_idx], clus); + err = EIO; + goto error; + } + } + clus = 0; + } + + /* Check end of FAT-chain */ + FAT32_CHECK_CLUSTER(fsi, sbi->dfr_new_clus[chunk->new_idx + chunk->nr_clus - 1], err); + BUG_ON(err); + err = fat_ent_get(sb, sbi->dfr_new_clus[chunk->new_idx + chunk->nr_clus - 1], &clus); + BUG_ON(err); + if ((chunk->next_clus & 0x0FFFFFFF) != (clus & 0x0FFFFFFF)) { + dfr_err("FAT: inode %p, next_clus %08x, read_clus %08x", inode, chunk->next_clus, clus); + err = EIO; + } + +error: + BUG_ON(err); +} + + +/** + * @fn __defrag_update_dirent + * @brief update DIR entry for defrag req + * @return void + * @param sb super block + * @param chunk given chunk + */ +static void +__defrag_update_dirent( + struct super_block *sb, + struct defrag_chunk_info *chunk) +{ + struct sdfat_sb_info *sbi = SDFAT_SB(sb); + FS_INFO_T *fsi = &SDFAT_SB(sb)->fsi; + CHAIN_T dir; + DOS_DENTRY_T *dos_ep; + unsigned int entry = 0; + unsigned long long sector = 0; + unsigned short hi = 0, lo = 0; + int err = 0; + + dir.dir = GET64_HI(chunk->i_pos); + dir.flags = 0x1; // Assume non-continuous + + entry = GET64_LO(chunk->i_pos); + + FAT32_CHECK_CLUSTER(fsi, dir.dir, err); + BUG_ON(err); + dos_ep = (DOS_DENTRY_T *) get_dentry_in_dir(sb, &dir, entry, §or); + + hi = GET32_HI(sbi->dfr_new_clus[chunk->new_idx]); + lo = GET32_LO(sbi->dfr_new_clus[chunk->new_idx]); + + dos_ep->start_clu_hi = cpu_to_le16(hi); + dos_ep->start_clu_lo = cpu_to_le16(lo); + + dcache_modify(sb, sector); +} + + +/** + * @fn defrag_update_fat_prev + * @brief update FAT chain for defrag requests + * @return void + * @param sb super block + * @param force flag to force FAT update + * @remark protected by super_block and volume lock + */ +void +defrag_update_fat_prev( + struct super_block *sb, + int force) +{ + struct sdfat_sb_info *sbi = SDFAT_SB(sb); + FS_INFO_T *fsi = &(sbi->fsi); + struct defrag_info *sb_dfr = &sbi->dfr_info, *ino_dfr = NULL; + int skip = 0, done = 0; + + /* Check if FS_ERROR occurred */ + if (sb->s_flags & MS_RDONLY) { + dfr_err("RDONLY partition (err %d)", -EPERM); + goto out; + } + + list_for_each_entry(ino_dfr, &sb_dfr->entry, entry) { + struct inode *inode = &(container_of(ino_dfr, struct sdfat_inode_info, dfr_info)->vfs_inode); + struct sdfat_inode_info *ino_info = SDFAT_I(inode); + struct defrag_chunk_info *chunk_prev = NULL; + int i = 0, j = 0; + + mutex_lock(&ino_dfr->lock); + BUG_ON(atomic_read(&ino_dfr->stat) != DFR_INO_STAT_REQ); + for (i = 0; i < ino_dfr->nr_chunks; i++) { + struct defrag_chunk_info *chunk = NULL; + int err = 0; + + chunk = &(ino_dfr->chunks[i]); + BUG_ON(!chunk); + + /* Do nothing for already passed chunk */ + if (chunk->stat == DFR_CHUNK_STAT_PASS) { + done++; + continue; + } + + /* Handle error case */ + if (chunk->stat == DFR_CHUNK_STAT_ERR) { + err = -EINVAL; + goto error; + } + + /* Double-check clusters */ + if (chunk_prev && + (chunk->f_clus == chunk_prev->f_clus + chunk_prev->nr_clus) && + (chunk_prev->stat == DFR_CHUNK_STAT_PASS)) { + + err = defrag_validate_cluster(inode, chunk, 1); + + /* Handle continuous chunks in a file */ + if (!err) { + chunk->prev_clus = + sbi->dfr_new_clus[chunk_prev->new_idx + chunk_prev->nr_clus - 1]; + dfr_debug("prev->f_clus %d, prev->nr_clus %d, chunk->f_clus %d", + chunk_prev->f_clus, chunk_prev->nr_clus, chunk->f_clus); + } + } else { + err = defrag_validate_cluster(inode, chunk, 0); + } + + if (err) { + dfr_err("Cluster validation: inode %p, chunk->f_clus %d, err %d", + inode, chunk->f_clus, err); + goto error; + } + + /** + * Skip update_fat_prev if WB or update_fat_next not completed. + * Go to error case if FORCE set. + */ + if (__defrag_check_wb(sbi, chunk) || (chunk->stat != DFR_CHUNK_STAT_PREP)) { + if (force) { + err = -EPERM; + dfr_err("Skip case: inode %p, stat %x, f_clus %d, err %d", + inode, chunk->stat, chunk->f_clus, err); + goto error; + } + skip++; + continue; + } + +#ifdef CONFIG_SDFAT_DFR_DEBUG + /* SPO test */ + defrag_spo_test(sb, DFR_SPO_RANDOM, __func__); +#endif + + /* Update chunk's previous cluster */ + if (chunk->prev_clus == 0) { + /* For the first cluster of a file */ + /* Update ino_info->fid.start_clu */ + ino_info->fid.start_clu = sbi->dfr_new_clus[chunk->new_idx]; + __defrag_update_dirent(sb, chunk); + } else { + FAT32_CHECK_CLUSTER(fsi, chunk->prev_clus, err); + BUG_ON(err); + if (fat_ent_set(sb, + chunk->prev_clus, + sbi->dfr_new_clus[chunk->new_idx])) { + err = -EIO; + goto error; + } + } + + /* Clear extent cache */ + extent_cache_inval_inode(inode); + + /* Update FID info */ + ino_info->fid.hint_bmap.off = CLUS_EOF; + ino_info->fid.hint_bmap.clu = 0; + + /* Clear old FAT-chain */ + for (j = 0; j < chunk->nr_clus; j++) + defrag_free_cluster(sb, chunk->d_clus + j); + + /* Mark this chunk PASS */ + chunk->stat = DFR_CHUNK_STAT_PASS; + __defrag_check_fat_new(sb, inode, chunk); + + done++; + +error: + if (err) { + /** + * chunk->new_idx != 0 means this chunk needs to be cleaned up + */ + if (chunk->new_idx) { + /* Free already allocated clusters */ + for (j = 0; j < chunk->nr_clus; j++) { + if (sbi->dfr_new_clus[chunk->new_idx + j]) { + defrag_free_cluster(sb, sbi->dfr_new_clus[chunk->new_idx + j]); + sbi->dfr_new_clus[chunk->new_idx + j] = 0; + } + } + + __defrag_check_fat_old(sb, inode, chunk); + } + + /** + * chunk->new_idx == 0 means this chunk already cleaned up + */ + chunk->new_idx = 0; + chunk->stat = DFR_CHUNK_STAT_ERR; + } + + chunk_prev = chunk; + } + BUG_ON(!mutex_is_locked(&ino_dfr->lock)); + mutex_unlock(&ino_dfr->lock); + } + +out: + if (skip) { + dfr_debug("%s skipped (nr_reqs %d, done %d, skip %d)", + __func__, sb_dfr->nr_chunks - 1, done, skip); + } else { + /* Make dfr_reserved_clus zero */ + if (sbi->dfr_reserved_clus > 0) { + if (fsi->reserved_clusters < sbi->dfr_reserved_clus) { + dfr_err("Reserved count: reserved_clus %d, dfr_reserved_clus %d", + fsi->reserved_clusters, sbi->dfr_reserved_clus); + BUG_ON(fsi->reserved_clusters < sbi->dfr_reserved_clus); + } + + defrag_reserve_clusters(sb, 0 - sbi->dfr_reserved_clus); + } + + dfr_debug("%s done (nr_reqs %d, done %d)", __func__, sb_dfr->nr_chunks - 1, done); + } +} + + +/** + * @fn defrag_update_fat_next + * @brief update FAT chain for defrag requests + * @return void + * @param sb super block + * @remark protected by super_block and volume lock + */ +void +defrag_update_fat_next( + struct super_block *sb) +{ + struct sdfat_sb_info *sbi = SDFAT_SB(sb); + FS_INFO_T *fsi = &(SDFAT_SB(sb)->fsi); + struct defrag_info *sb_dfr = &sbi->dfr_info, *ino_dfr = NULL; + struct defrag_chunk_info *chunk = NULL; + int done = 0, i = 0, j = 0, err = 0; + + /* Check if FS_ERROR occurred */ + if (sb->s_flags & MS_RDONLY) { + dfr_err("RDONLY partition (err %d)", -EROFS); + goto out; + } + + list_for_each_entry(ino_dfr, &sb_dfr->entry, entry) { + + for (i = 0; i < ino_dfr->nr_chunks; i++) { + int skip = 0; + + chunk = &(ino_dfr->chunks[i]); + + /* Do nothing if error occurred or update_fat_next already passed */ + if (chunk->stat == DFR_CHUNK_STAT_ERR) + continue; + if (chunk->stat & DFR_CHUNK_STAT_FAT) { + done++; + continue; + } + + /* Ship this chunk if get_block not passed for this chunk */ + for (j = 0; j < chunk->nr_clus; j++) { + if (sbi->dfr_new_clus[chunk->new_idx + j] == 0) { + skip = 1; + break; + } + } + if (skip) + continue; + + /* Update chunk's next cluster */ + FAT32_CHECK_CLUSTER(fsi, + sbi->dfr_new_clus[chunk->new_idx + chunk->nr_clus - 1], err); + BUG_ON(err); + if (fat_ent_set(sb, + sbi->dfr_new_clus[chunk->new_idx + chunk->nr_clus - 1], + chunk->next_clus)) + goto out; + +#ifdef CONFIG_SDFAT_DFR_DEBUG + /* SPO test */ + defrag_spo_test(sb, DFR_SPO_RANDOM, __func__); +#endif + + /* Update chunk's state */ + chunk->stat |= DFR_CHUNK_STAT_FAT; + done++; + } + } + +out: + dfr_debug("%s done (nr_reqs %d, done %d)", __func__, sb_dfr->nr_chunks - 1, done); +} + + +/** + * @fn defrag_check_discard + * @brief check if we can send discard for this AU, if so, send discard + * @return void + * @param sb super block + * @remark protected by super_block and volume lock + */ +void +defrag_check_discard( + IN struct super_block *sb) +{ + FS_INFO_T *fsi = &(SDFAT_SB(sb)->fsi); + AMAP_T *amap = SDFAT_SB(sb)->fsi.amap; + AU_INFO_T *au = NULL; + struct defrag_info *sb_dfr = &(SDFAT_SB(sb)->dfr_info); + unsigned int tmp[DFR_MAX_AU_MOVED]; + int i = 0, j = 0; + + BUG_ON(!amap); + + if (!(SDFAT_SB(sb)->options.discard) || + !(SDFAT_SB(sb)->options.improved_allocation & SDFAT_ALLOC_SMART)) + return; + + memset(tmp, 0, sizeof(int) * DFR_MAX_AU_MOVED); + + for (i = REQ_HEADER_IDX + 1; i < sb_dfr->nr_chunks; i++) { + struct defrag_chunk_info *chunk = &(sb_dfr->chunks[i]); + int skip = 0; + + au = GET_AU(amap, i_AU_of_CLU(amap, chunk->d_clus)); + + /* Send DISCARD for free AU */ + if ((IS_AU_IGNORED(au, amap)) && + (amap_get_freeclus(sb, chunk->d_clus) == CLUS_PER_AU(sb))) { + sector_t blk = 0, nr_blks = 0; + unsigned int au_align_factor = amap->option.au_align_factor % amap->option.au_size; + + BUG_ON(au->idx == 0); + + /* Avoid multiple DISCARD */ + for (j = 0; j < DFR_MAX_AU_MOVED; j++) { + if (tmp[j] == au->idx) { + skip = 1; + break; + } + } + if (skip == 1) + continue; + + /* Send DISCARD cmd */ + blk = (sector_t) (((au->idx * CLUS_PER_AU(sb)) << fsi->sect_per_clus_bits) + - au_align_factor); + nr_blks = ((sector_t)CLUS_PER_AU(sb)) << fsi->sect_per_clus_bits; + + dfr_debug("Send DISCARD for AU[%d] (blk %08zx)", au->idx, blk); + sb_issue_discard(sb, blk, nr_blks, GFP_NOFS, 0); + + /* Save previous AU's index */ + for (j = 0; j < DFR_MAX_AU_MOVED; j++) { + if (!tmp[j]) { + tmp[j] = au->idx; + break; + } + } + } + } +} + + +/** + * @fn defrag_free_cluster + * @brief free uneccessary cluster + * @return void + * @param sb super block + * @param clus physical cluster num + * @remark protected by super_block and volume lock + */ +int +defrag_free_cluster( + struct super_block *sb, + unsigned int clus) +{ + FS_INFO_T *fsi = &SDFAT_SB(sb)->fsi; + unsigned int val = 0; + s32 err = 0; + + FAT32_CHECK_CLUSTER(fsi, clus, err); + BUG_ON(err); + if (fat_ent_get(sb, clus, &val)) + return -EIO; + if (val) { + if (fat_ent_set(sb, clus, 0)) + return -EIO; + } else { + dfr_err("Free: Already freed, clus %08x, val %08x", clus, val); + BUG_ON(!val); + } + + set_sb_dirty(sb); + fsi->used_clusters--; + if (fsi->amap) + amap_release_cluster(sb, clus); + + return 0; +} + + +/** + * @fn defrag_check_defrag_required + * @brief check if defrag required + * @return 1 if required, 0 otherwise + * @param sb super block + * @param totalau # of total AUs + * @param cleanau # of clean AUs + * @param fullau # of full AUs + * @remark protected by super_block + */ +int +defrag_check_defrag_required( + IN struct super_block *sb, + OUT int *totalau, + OUT int *cleanau, + OUT int *fullau) +{ + FS_INFO_T *fsi = &(SDFAT_SB(sb)->fsi); + AMAP_T *amap = NULL; + int clean_ratio = 0, frag_ratio = 0; + int ret = 0; + + if (!sb || !(SDFAT_SB(sb)->options.defrag)) + return 0; + + /* Check DFR_DEFAULT_STOP_RATIO first */ + fsi = &(SDFAT_SB(sb)->fsi); + if (fsi->used_clusters == (unsigned int)(~0)) { + if (fsi->fs_func->count_used_clusters(sb, &fsi->used_clusters)) + return -EIO; + } + if (fsi->used_clusters * DFR_FULL_RATIO >= fsi->num_clusters * DFR_DEFAULT_STOP_RATIO) { + dfr_debug("used_clusters %d, num_clusters %d", fsi->used_clusters, fsi->num_clusters); + return 0; + } + + /* Check clean/frag ratio */ + amap = SDFAT_SB(sb)->fsi.amap; + BUG_ON(!amap); + + clean_ratio = (amap->n_clean_au * 100) / amap->n_au; + if (amap->n_full_au) + frag_ratio = ((amap->n_au - amap->n_clean_au) * 100) / amap->n_full_au; + else + frag_ratio = ((amap->n_au - amap->n_clean_au) * 100) / + (fsi->used_clusters / CLUS_PER_AU(sb) + 1); + + /* + * Wake-up defrag_daemon: + * when # of clean AUs too small, or frag_ratio exceeds the limit + */ + if ((clean_ratio < DFR_DEFAULT_WAKEUP_RATIO) || + ((clean_ratio < DFR_DEFAULT_CLEAN_RATIO) && (frag_ratio >= DFR_DEFAULT_FRAG_RATIO))) { + + if (totalau) + *totalau = amap->n_au; + if (cleanau) + *cleanau = amap->n_clean_au; + if (fullau) + *fullau = amap->n_full_au; + ret = 1; + } + + return ret; +} + + +/** + * @fn defrag_check_defrag_required + * @brief check defrag status on inode + * @return 1 if defrag in on, 0 otherwise + * @param inode inode + * @param start logical start addr + * @param end logical end addr + * @param cancel flag to cancel defrag + * @param caller caller info + */ +int +defrag_check_defrag_on( + INOUT struct inode *inode, + IN loff_t start, + IN loff_t end, + IN int cancel, + IN const char *caller) +{ + struct super_block *sb = inode->i_sb; + struct sdfat_sb_info *sbi = SDFAT_SB(sb); + FS_INFO_T *fsi = &(sbi->fsi); + struct defrag_info *ino_dfr = &(SDFAT_I(inode)->dfr_info); + unsigned int clus_start = 0, clus_end = 0; + int ret = 0, i = 0; + + if (!inode || (start == end)) + return 0; + + mutex_lock(&ino_dfr->lock); + /* Check if this inode is on defrag */ + if (atomic_read(&ino_dfr->stat) == DFR_INO_STAT_REQ) { + + clus_start = start >> (fsi->cluster_size_bits); + clus_end = (end >> (fsi->cluster_size_bits)) + + ((end & (fsi->cluster_size - 1)) ? 1 : 0); + + if (!ino_dfr->chunks) + goto error; + + /* Check each chunk in given inode */ + for (i = 0; i < ino_dfr->nr_chunks; i++) { + struct defrag_chunk_info *chunk = &(ino_dfr->chunks[i]); + unsigned int chunk_start = 0, chunk_end = 0; + + /* Skip this chunk when error occurred or it already passed defrag process */ + if ((chunk->stat == DFR_CHUNK_STAT_ERR) || (chunk->stat == DFR_CHUNK_STAT_PASS)) + continue; + + chunk_start = chunk->f_clus; + chunk_end = chunk->f_clus + chunk->nr_clus; + + if (((clus_start >= chunk_start) && (clus_start < chunk_end)) || + ((clus_end > chunk_start) && (clus_end <= chunk_end)) || + ((clus_start < chunk_start) && (clus_end > chunk_end))) { + ret = 1; + if (cancel) { + chunk->stat = DFR_CHUNK_STAT_ERR; + dfr_debug("Defrag canceled: inode %p, start %08x, end %08x, caller %s", + inode, clus_start, clus_end, caller); + } + } + } + } + +error: + BUG_ON(!mutex_is_locked(&ino_dfr->lock)); + mutex_unlock(&ino_dfr->lock); + return ret; +} + + +#ifdef CONFIG_SDFAT_DFR_DEBUG +/** + * @fn defrag_spo_test + * @brief test SPO while defrag running + * @return void + * @param sb super block + * @param flag SPO debug flag + * @param caller caller info + */ +void +defrag_spo_test( + struct super_block *sb, + int flag, + const char *caller) +{ + struct sdfat_sb_info *sbi = SDFAT_SB(sb); + + if (!sb || !(SDFAT_SB(sb)->options.defrag)) + return; + + if (flag == sbi->dfr_spo_flag) { + dfr_err("Defrag SPO test (flag %d, caller %s)", flag, caller); + panic("Defrag SPO test"); + } +} +#endif /* CONFIG_SDFAT_DFR_DEBUG */ + + +#endif /* CONFIG_SDFAT_DFR */ diff --git a/fs/sdfat/dfr.h b/fs/sdfat/dfr.h new file mode 100644 index 000000000000..da98605020d3 --- /dev/null +++ b/fs/sdfat/dfr.h @@ -0,0 +1,261 @@ +/* + * Copyright (C) 2012-2013 Samsung Electronics Co., Ltd. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version 2 + * of the License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, see <http://www.gnu.org/licenses/>. + */ + +#ifndef _SDFAT_DEFRAG_H +#define _SDFAT_DEFRAG_H + +#ifdef CONFIG_SDFAT_DFR + +/* Tuning parameters */ +#define DFR_MIN_TIMEOUT (1 * HZ) // Minimum timeout for forced-sync +#define DFR_DEFAULT_TIMEOUT (10 * HZ) // Default timeout for forced-sync + +#define DFR_DEFAULT_CLEAN_RATIO (50) // Wake-up daemon when clean AU ratio under 50% +#define DFR_DEFAULT_WAKEUP_RATIO (10) // Wake-up daemon when clean AU ratio under 10%, regardless of frag_ratio + +#define DFR_DEFAULT_FRAG_RATIO (130) // Wake-up daemon when frag_ratio over 130% + +#define DFR_DEFAULT_PACKING_RATIO (10) // Call allocator with PACKING flag, when clean AU ratio under 10% + +#define DFR_DEFAULT_STOP_RATIO (98) // Stop defrag_daemon when disk used ratio over 98% +#define DFR_FULL_RATIO (100) + +#define DFR_MAX_AU_MOVED (16) // Maximum # of AUs for a request + + +/* Debugging support*/ +#define dfr_err(fmt, args...) pr_err("DFR: " fmt "\n", args) + +#ifdef CONFIG_SDFAT_DFR_DEBUG +#define dfr_debug(fmt, args...) pr_debug("DFR: " fmt "\n", args) +#else +#define dfr_debug(fmt, args...) +#endif + + +/* Error handling */ +#define ERR_HANDLE(err) { \ + if (err) { \ + dfr_debug("err %d", err); \ + goto error; \ + } \ +} + +#define ERR_HANDLE2(cond, err, val) { \ + if (cond) { \ + err = val; \ + dfr_debug("err %d", err); \ + goto error; \ + } \ +} + + +/* Arguments IN-OUT */ +#define IN +#define OUT +#define INOUT + + +/* Macros */ +#define GET64_HI(var64) ((unsigned int)((var64) >> 32)) +#define GET64_LO(var64) ((unsigned int)(((var64) << 32) >> 32)) +#define SET64_HI(dst64, var32) { (dst64) = ((loff_t)(var32) << 32) | ((dst64) & 0x00000000ffffffffLL); } +#define SET64_LO(dst64, var32) { (dst64) = ((dst64) & 0xffffffff00000000LL) | ((var32) & 0x00000000ffffffffLL); } + +#define GET32_HI(var32) ((unsigned short)((var32) >> 16)) +#define GET32_LO(var32) ((unsigned short)(((var32) << 16) >> 16)) +#define SET32_HI(dst32, var16) { (dst32) = ((unsigned int)(var16) << 16) | ((dst32) & 0x0000ffff); } +#define SET32_LO(dst32, var16) { (dst32) = ((dst32) & 0xffff0000) | ((unsigned int)(var16) & 0x0000ffff); } + + +/* FAT32 related */ +#define FAT32_EOF (0x0fffffff) +#define FAT32_RESERVED (0x0ffffff7) +#define FAT32_UNUSED_CLUS (2) + +#define CLUS_PER_AU(sb) ( \ + (SDFAT_SB(sb)->options.amap_opt.sect_per_au) >> (SDFAT_SB(sb)->fsi.sect_per_clus_bits) \ +) +#define PAGES_PER_AU(sb) ( \ + ((SDFAT_SB(sb)->options.amap_opt.sect_per_au) << ((sb)->s_blocksize_bits)) \ + >> PAGE_SHIFT \ +) +#define PAGES_PER_CLUS(sb) ((SDFAT_SB(sb)->fsi.cluster_size) >> PAGE_SHIFT) + +#define FAT32_CHECK_CLUSTER(fsi, clus, err) \ + { \ + if (((clus) < FAT32_UNUSED_CLUS) || \ + ((clus) > (fsi)->num_clusters) || \ + ((clus) >= FAT32_RESERVED)) { \ + dfr_err("clus %08x, fsi->num_clusters %08x", (clus), (fsi)->num_clusters); \ + err = -EINVAL; \ + } else { \ + err = 0; \ + } \ + } + + +/* IOCTL_DFR_INFO */ +struct defrag_info_arg { + /* PBS info */ + unsigned int sec_sz; + unsigned int clus_sz; + unsigned long long total_sec; + unsigned long long fat_offset_sec; + unsigned int fat_sz_sec; + unsigned int n_fat; + unsigned int hidden_sectors; + + /* AU info */ + unsigned int sec_per_au; +}; + + +/* IOC_DFR_TRAV */ +#define DFR_TRAV_HEADER_IDX (0) + +#define DFR_TRAV_TYPE_HEADER (0x0000000F) +#define DFR_TRAV_TYPE_DIR (1) +#define DFR_TRAV_TYPE_FILE (2) +#define DFR_TRAV_TYPE_TEST (DFR_TRAV_TYPE_HEADER | 0x10000000) + +#define DFR_TRAV_ROOT_IPOS (0xFFFFFFFFFFFFFFFFLL) + +struct defrag_trav_arg { + int type; + unsigned int start_clus; + loff_t i_pos; + char name[MAX_DOSNAME_BUF_SIZE]; + char dummy1; + int dummy2; +}; + +#define DFR_TRAV_STAT_DONE (0x1) +#define DFR_TRAV_STAT_MORE (0x2) +#define DFR_TRAV_STAT_ERR (0xFF) + +struct defrag_trav_header { + int type; + unsigned int start_clus; + loff_t i_pos; + char name[MAX_DOSNAME_BUF_SIZE]; + char stat; + unsigned int nr_entries; +}; + + +/* IOC_DFR_REQ */ +#define REQ_HEADER_IDX (0) + +#define DFR_CHUNK_STAT_ERR (0xFFFFFFFF) +#define DFR_CHUNK_STAT_REQ (0x1) +#define DFR_CHUNK_STAT_WB (0x2) +#define DFR_CHUNK_STAT_FAT (0x4) +#define DFR_CHUNK_STAT_PREP (DFR_CHUNK_STAT_REQ | DFR_CHUNK_STAT_WB | DFR_CHUNK_STAT_FAT) +#define DFR_CHUNK_STAT_PASS (0x0000000F) + +struct defrag_chunk_header { + int mode; + unsigned int nr_chunks; + loff_t dummy1; + int dummy2[4]; + union { + int *dummy3; + int dummy4; + }; + int dummy5; +}; + +struct defrag_chunk_info { + int stat; + /* File related */ + unsigned int f_clus; + loff_t i_pos; + /* Cluster related */ + unsigned int d_clus; + unsigned int nr_clus; + unsigned int prev_clus; + unsigned int next_clus; + union { + void *dummy; + /* req status */ + unsigned int new_idx; + }; + /* AU related */ + unsigned int au_clus; +}; + + +/* Global info */ +#define DFR_MODE_BACKGROUND (0x1) +#define DFR_MODE_FOREGROUND (0x2) +#define DFR_MODE_ONESHOT (0x4) +#define DFR_MODE_BATCHED (0x8) +#define DFR_MODE_TEST (DFR_MODE_BACKGROUND | 0x10000000) + +#define DFR_SB_STAT_IDLE (0) +#define DFR_SB_STAT_REQ (1) +#define DFR_SB_STAT_VALID (2) + +#define DFR_INO_STAT_IDLE (0) +#define DFR_INO_STAT_REQ (1) +struct defrag_info { + struct mutex lock; + atomic_t stat; + struct defrag_chunk_info *chunks; + unsigned int nr_chunks; + struct list_head entry; +}; + + +/* SPO test flags */ +#define DFR_SPO_NONE (0) +#define DFR_SPO_NORMAL (1) +#define DFR_SPO_DISCARD (2) +#define DFR_SPO_FAT_NEXT (3) +#define DFR_SPO_RANDOM (4) + + +/* Extern functions */ +int defrag_get_info(struct super_block *sb, struct defrag_info_arg *arg); + +int defrag_scan_dir(struct super_block *sb, struct defrag_trav_arg *arg); + +int defrag_validate_cluster(struct inode *inode, struct defrag_chunk_info *chunk, int skip_prev); +int defrag_reserve_clusters(struct super_block *sb, int nr_clus); +int defrag_mark_ignore(struct super_block *sb, unsigned int clus); +void defrag_unmark_ignore_all(struct super_block *sb); + +int defrag_map_cluster(struct inode *inode, unsigned int clu_offset, unsigned int *clu); +void defrag_writepage_end_io(struct page *page); + +void defrag_update_fat_prev(struct super_block *sb, int force); +void defrag_update_fat_next(struct super_block *sb); +void defrag_check_discard(struct super_block *sb); +int defrag_free_cluster(struct super_block *sb, unsigned int clus); + +int defrag_check_defrag_required(struct super_block *sb, int *totalau, int *cleanau, int *fullau); +int defrag_check_defrag_on(struct inode *inode, loff_t start, loff_t end, int cancel, const char *caller); + +#ifdef CONFIG_SDFAT_DFR_DEBUG +void defrag_spo_test(struct super_block *sb, int flag, const char *caller); +#endif + +#endif /* CONFIG_SDFAT_DFR */ + +#endif /* _SDFAT_DEFRAG_H */ + diff --git a/fs/sdfat/extent.c b/fs/sdfat/extent.c new file mode 100644 index 000000000000..59e096530dda --- /dev/null +++ b/fs/sdfat/extent.c @@ -0,0 +1,351 @@ +/* + * Copyright (C) 2012-2013 Samsung Electronics Co., Ltd. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version 2 + * of the License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, see <http://www.gnu.org/licenses/>. + */ + +/* + * linux/fs/fat/cache.c + * + * Written 1992,1993 by Werner Almesberger + * + * Mar 1999. AV. Changed cache, so that it uses the starting cluster instead + * of inode number. + * May 1999. AV. Fixed the bogosity with FAT32 (read "FAT28"). Fscking lusers. + */ + +/************************************************************************/ +/* */ +/* PROJECT : exFAT & FAT12/16/32 File System */ +/* FILE : extent.c */ +/* PURPOSE : Improve the performance of traversing fat chain. */ +/* */ +/*----------------------------------------------------------------------*/ +/* NOTES */ +/* */ +/* */ +/************************************************************************/ + +#include <linux/slab.h> +#include "sdfat.h" +#include "core.h" + +#define EXTENT_CACHE_VALID 0 +/* this must be > 0. */ +#define EXTENT_MAX_CACHE 16 + +struct extent_cache { + struct list_head cache_list; + u32 nr_contig; /* number of contiguous clusters */ + u32 fcluster; /* cluster number in the file. */ + u32 dcluster; /* cluster number on disk. */ +}; + +struct extent_cache_id { + u32 id; + u32 nr_contig; + u32 fcluster; + u32 dcluster; +}; + +static struct kmem_cache *extent_cache_cachep; + +static void init_once(void *c) +{ + struct extent_cache *cache = (struct extent_cache *)c; + + INIT_LIST_HEAD(&cache->cache_list); +} + +s32 extent_cache_init(void) +{ + extent_cache_cachep = kmem_cache_create("sdfat_extent_cache", + sizeof(struct extent_cache), + 0, SLAB_RECLAIM_ACCOUNT|SLAB_MEM_SPREAD, + init_once); + if (!extent_cache_cachep) + return -ENOMEM; + return 0; +} + +void extent_cache_shutdown(void) +{ + if (!extent_cache_cachep) + return; + kmem_cache_destroy(extent_cache_cachep); +} + +void extent_cache_init_inode(struct inode *inode) +{ + EXTENT_T *extent = &(SDFAT_I(inode)->fid.extent); + + spin_lock_init(&extent->cache_lru_lock); + extent->nr_caches = 0; + extent->cache_valid_id = EXTENT_CACHE_VALID + 1; + INIT_LIST_HEAD(&extent->cache_lru); +} + +static inline struct extent_cache *extent_cache_alloc(void) +{ + return kmem_cache_alloc(extent_cache_cachep, GFP_NOFS); +} + +static inline void extent_cache_free(struct extent_cache *cache) +{ + BUG_ON(!list_empty(&cache->cache_list)); + kmem_cache_free(extent_cache_cachep, cache); +} + +static inline void extent_cache_update_lru(struct inode *inode, + struct extent_cache *cache) +{ + EXTENT_T *extent = &(SDFAT_I(inode)->fid.extent); + + if (extent->cache_lru.next != &cache->cache_list) + list_move(&cache->cache_list, &extent->cache_lru); +} + +static u32 extent_cache_lookup(struct inode *inode, u32 fclus, + struct extent_cache_id *cid, + u32 *cached_fclus, u32 *cached_dclus) +{ + EXTENT_T *extent = &(SDFAT_I(inode)->fid.extent); + + static struct extent_cache nohit = { .fcluster = 0, }; + + struct extent_cache *hit = &nohit, *p; + u32 offset = CLUS_EOF; + + spin_lock(&extent->cache_lru_lock); + list_for_each_entry(p, &extent->cache_lru, cache_list) { + /* Find the cache of "fclus" or nearest cache. */ + if (p->fcluster <= fclus && hit->fcluster < p->fcluster) { + hit = p; + if ((hit->fcluster + hit->nr_contig) < fclus) { + offset = hit->nr_contig; + } else { + offset = fclus - hit->fcluster; + break; + } + } + } + if (hit != &nohit) { + extent_cache_update_lru(inode, hit); + + cid->id = extent->cache_valid_id; + cid->nr_contig = hit->nr_contig; + cid->fcluster = hit->fcluster; + cid->dcluster = hit->dcluster; + *cached_fclus = cid->fcluster + offset; + *cached_dclus = cid->dcluster + offset; + } + spin_unlock(&extent->cache_lru_lock); + + return offset; +} + +static struct extent_cache *extent_cache_merge(struct inode *inode, + struct extent_cache_id *new) +{ + EXTENT_T *extent = &(SDFAT_I(inode)->fid.extent); + + struct extent_cache *p; + + list_for_each_entry(p, &extent->cache_lru, cache_list) { + /* Find the same part as "new" in cluster-chain. */ + if (p->fcluster == new->fcluster) { + ASSERT(p->dcluster == new->dcluster); + if (new->nr_contig > p->nr_contig) + p->nr_contig = new->nr_contig; + return p; + } + } + return NULL; +} + +static void extent_cache_add(struct inode *inode, struct extent_cache_id *new) +{ + EXTENT_T *extent = &(SDFAT_I(inode)->fid.extent); + + struct extent_cache *cache, *tmp; + + if (new->fcluster == -1) /* dummy cache */ + return; + + spin_lock(&extent->cache_lru_lock); + if (new->id != EXTENT_CACHE_VALID && + new->id != extent->cache_valid_id) + goto out; /* this cache was invalidated */ + + cache = extent_cache_merge(inode, new); + if (cache == NULL) { + if (extent->nr_caches < EXTENT_MAX_CACHE) { + extent->nr_caches++; + spin_unlock(&extent->cache_lru_lock); + + tmp = extent_cache_alloc(); + if (!tmp) { + spin_lock(&extent->cache_lru_lock); + extent->nr_caches--; + spin_unlock(&extent->cache_lru_lock); + return; + } + + spin_lock(&extent->cache_lru_lock); + cache = extent_cache_merge(inode, new); + if (cache != NULL) { + extent->nr_caches--; + extent_cache_free(tmp); + goto out_update_lru; + } + cache = tmp; + } else { + struct list_head *p = extent->cache_lru.prev; + cache = list_entry(p, struct extent_cache, cache_list); + } + cache->fcluster = new->fcluster; + cache->dcluster = new->dcluster; + cache->nr_contig = new->nr_contig; + } +out_update_lru: + extent_cache_update_lru(inode, cache); +out: + spin_unlock(&extent->cache_lru_lock); +} + +/* + * Cache invalidation occurs rarely, thus the LRU chain is not updated. It + * fixes itself after a while. + */ +static void __extent_cache_inval_inode(struct inode *inode) +{ + EXTENT_T *extent = &(SDFAT_I(inode)->fid.extent); + struct extent_cache *cache; + + while (!list_empty(&extent->cache_lru)) { + cache = list_entry(extent->cache_lru.next, + struct extent_cache, cache_list); + list_del_init(&cache->cache_list); + extent->nr_caches--; + extent_cache_free(cache); + } + /* Update. The copy of caches before this id is discarded. */ + extent->cache_valid_id++; + if (extent->cache_valid_id == EXTENT_CACHE_VALID) + extent->cache_valid_id++; +} + +void extent_cache_inval_inode(struct inode *inode) +{ + EXTENT_T *extent = &(SDFAT_I(inode)->fid.extent); + + spin_lock(&extent->cache_lru_lock); + __extent_cache_inval_inode(inode); + spin_unlock(&extent->cache_lru_lock); +} + +static inline s32 cache_contiguous(struct extent_cache_id *cid, u32 dclus) +{ + cid->nr_contig++; + return ((cid->dcluster + cid->nr_contig) == dclus); +} + +static inline void cache_init(struct extent_cache_id *cid, u32 fclus, u32 dclus) +{ + cid->id = EXTENT_CACHE_VALID; + cid->fcluster = fclus; + cid->dcluster = dclus; + cid->nr_contig = 0; +} + +s32 extent_get_clus(struct inode *inode, u32 cluster, u32 *fclus, + u32 *dclus, u32 *last_dclus, s32 allow_eof) +{ + struct super_block *sb = inode->i_sb; + FS_INFO_T *fsi = &(SDFAT_SB(sb)->fsi); + u32 limit = fsi->num_clusters; + FILE_ID_T *fid = &(SDFAT_I(inode)->fid); + struct extent_cache_id cid; + u32 content; + + /* FOR GRACEFUL ERROR HANDLING */ + if (IS_CLUS_FREE(fid->start_clu)) { + sdfat_fs_error(sb, "invalid access to " + "extent cache (entry 0x%08x)", fid->start_clu); + ASSERT(0); + return -EIO; + } + + *fclus = 0; + *dclus = fid->start_clu; + *last_dclus = *dclus; + + /* + * Don`t use extent_cache if zero offset or non-cluster allocation + */ + if ((cluster == 0) || IS_CLUS_EOF(*dclus)) + return 0; + + cache_init(&cid, CLUS_EOF, CLUS_EOF); + + if (extent_cache_lookup(inode, cluster, &cid, fclus, dclus) == CLUS_EOF) { + /* + * dummy, always not contiguous + * This is reinitialized by cache_init(), later. + */ + ASSERT((cid.id == EXTENT_CACHE_VALID) + && (cid.fcluster == CLUS_EOF) + && (cid.dcluster == CLUS_EOF) + && (cid.nr_contig == 0)); + } + + if (*fclus == cluster) + return 0; + + while (*fclus < cluster) { + /* prevent the infinite loop of cluster chain */ + if (*fclus > limit) { + sdfat_fs_error(sb, + "%s: detected the cluster chain loop" + " (i_pos %u)", __func__, + (*fclus)); + return -EIO; + } + + if (fat_ent_get_safe(sb, *dclus, &content)) + return -EIO; + + *last_dclus = *dclus; + *dclus = content; + (*fclus)++; + + if (IS_CLUS_EOF(content)) { + if (!allow_eof) { + sdfat_fs_error(sb, + "%s: invalid cluster chain (i_pos %u," + "last_clus 0x%08x is EOF)", + __func__, *fclus, (*last_dclus)); + return -EIO; + } + + break; + } + + if (!cache_contiguous(&cid, *dclus)) + cache_init(&cid, *fclus, *dclus); + } + + extent_cache_add(inode, &cid); + return 0; +} diff --git a/fs/sdfat/fatent.c b/fs/sdfat/fatent.c new file mode 100644 index 000000000000..fca32a50d336 --- /dev/null +++ b/fs/sdfat/fatent.c @@ -0,0 +1,420 @@ +/* + * Copyright (C) 2012-2013 Samsung Electronics Co., Ltd. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version 2 + * of the License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, see <http://www.gnu.org/licenses/>. + */ + +/************************************************************************/ +/* */ +/* PROJECT : exFAT & FAT12/16/32 File System */ +/* FILE : fatent.c */ +/* PURPOSE : sdFAT FAT entry manager */ +/* */ +/*----------------------------------------------------------------------*/ +/* NOTES */ +/* */ +/* */ +/************************************************************************/ + +#include <asm/unaligned.h> + +#include "sdfat.h" +#include "core.h" + +/*----------------------------------------------------------------------*/ +/* Global Variable Definitions */ +/*----------------------------------------------------------------------*/ +/* All buffer structures are protected w/ fsi->v_sem */ + +/*----------------------------------------------------------------------*/ +/* Static functions */ +/*----------------------------------------------------------------------*/ + +/*======================================================================*/ +/* FAT Read/Write Functions */ +/*======================================================================*/ +/* in : sb, loc + * out: content + * returns 0 on success, -1 on error + */ +static s32 exfat_ent_get(struct super_block *sb, u32 loc, u32 *content) +{ + u32 off, _content; + u64 sec; + u8 *fat_sector; + FS_INFO_T *fsi = &(SDFAT_SB(sb)->fsi); + + /* fsi->vol_type == EXFAT */ + sec = fsi->FAT1_start_sector + (loc >> (sb->s_blocksize_bits-2)); + off = (loc << 2) & (u32)(sb->s_blocksize - 1); + + fat_sector = fcache_getblk(sb, sec); + if (!fat_sector) + return -EIO; + + _content = le32_to_cpu(*(__le32 *)(&fat_sector[off])); + + /* remap reserved clusters to simplify code */ + if (_content >= CLUSTER_32(0xFFFFFFF8)) + _content = CLUS_EOF; + + *content = CLUSTER_32(_content); + return 0; +} + +static s32 exfat_ent_set(struct super_block *sb, u32 loc, u32 content) +{ + u32 off; + u64 sec; + u8 *fat_sector; + __le32 *fat_entry; + FS_INFO_T *fsi = &(SDFAT_SB(sb)->fsi); + + sec = fsi->FAT1_start_sector + (loc >> (sb->s_blocksize_bits-2)); + off = (loc << 2) & (u32)(sb->s_blocksize - 1); + + fat_sector = fcache_getblk(sb, sec); + if (!fat_sector) + return -EIO; + + fat_entry = (__le32 *)&(fat_sector[off]); + *fat_entry = cpu_to_le32(content); + + return fcache_modify(sb, sec); +} + +#define FATENT_FAT32_VALID_MASK (0x0FFFFFFFU) +#define FATENT_FAT32_IGNORE_MASK (0xF0000000U) +static s32 fat32_ent_get(struct super_block *sb, u32 loc, u32 *content) +{ + u32 off, _content; + u64 sec; + u8 *fat_sector; + FS_INFO_T *fsi = &(SDFAT_SB(sb)->fsi); + + sec = fsi->FAT1_start_sector + (loc >> (sb->s_blocksize_bits-2)); + off = (loc << 2) & (u32)(sb->s_blocksize - 1); + + fat_sector = fcache_getblk(sb, sec); + if (!fat_sector) + return -EIO; + + _content = le32_to_cpu(*(__le32 *)(&fat_sector[off])); + _content &= FATENT_FAT32_VALID_MASK; + + /* remap reserved clusters to simplify code */ + if (_content == CLUSTER_32(0x0FFFFFF7U)) + _content = CLUS_BAD; + else if (_content >= CLUSTER_32(0x0FFFFFF8U)) + _content = CLUS_EOF; + + *content = CLUSTER_32(_content); + return 0; +} + +static s32 fat32_ent_set(struct super_block *sb, u32 loc, u32 content) +{ + u32 off; + u64 sec; + u8 *fat_sector; + __le32 *fat_entry; + FS_INFO_T *fsi = &(SDFAT_SB(sb)->fsi); + + content &= FATENT_FAT32_VALID_MASK; + + sec = fsi->FAT1_start_sector + (loc >> (sb->s_blocksize_bits-2)); + off = (loc << 2) & (u32)(sb->s_blocksize - 1); + + fat_sector = fcache_getblk(sb, sec); + if (!fat_sector) + return -EIO; + + fat_entry = (__le32 *)&(fat_sector[off]); + content |= (le32_to_cpu(*fat_entry) & FATENT_FAT32_IGNORE_MASK); + *fat_entry = cpu_to_le32(content); + + return fcache_modify(sb, sec); +} + +#define FATENT_FAT16_VALID_MASK (0x0000FFFFU) +static s32 fat16_ent_get(struct super_block *sb, u32 loc, u32 *content) +{ + u32 off, _content; + u64 sec; + u8 *fat_sector; + FS_INFO_T *fsi = &(SDFAT_SB(sb)->fsi); + + sec = fsi->FAT1_start_sector + (loc >> (sb->s_blocksize_bits-1)); + off = (loc << 1) & (u32)(sb->s_blocksize - 1); + + fat_sector = fcache_getblk(sb, sec); + if (!fat_sector) + return -EIO; + + _content = (u32)le16_to_cpu(*(__le16 *)(&fat_sector[off])); + _content &= FATENT_FAT16_VALID_MASK; + + /* remap reserved clusters to simplify code */ + if (_content == CLUSTER_16(0xFFF7U)) + _content = CLUS_BAD; + else if (_content >= CLUSTER_16(0xFFF8U)) + _content = CLUS_EOF; + + *content = CLUSTER_32(_content); + return 0; +} + +static s32 fat16_ent_set(struct super_block *sb, u32 loc, u32 content) +{ + u32 off; + u64 sec; + u8 *fat_sector; + __le16 *fat_entry; + FS_INFO_T *fsi = &(SDFAT_SB(sb)->fsi); + + content &= FATENT_FAT16_VALID_MASK; + + sec = fsi->FAT1_start_sector + (loc >> (sb->s_blocksize_bits-1)); + off = (loc << 1) & (u32)(sb->s_blocksize - 1); + + fat_sector = fcache_getblk(sb, sec); + if (!fat_sector) + return -EIO; + + fat_entry = (__le16 *)&(fat_sector[off]); + *fat_entry = cpu_to_le16(content); + + return fcache_modify(sb, sec); +} + +#define FATENT_FAT12_VALID_MASK (0x00000FFFU) +static s32 fat12_ent_get(struct super_block *sb, u32 loc, u32 *content) +{ + u32 off, _content; + u64 sec; + u8 *fat_sector; + FS_INFO_T *fsi = &(SDFAT_SB(sb)->fsi); + + sec = fsi->FAT1_start_sector + ((loc + (loc >> 1)) >> sb->s_blocksize_bits); + off = (loc + (loc >> 1)) & (u32)(sb->s_blocksize - 1); + + fat_sector = fcache_getblk(sb, sec); + if (!fat_sector) + return -EIO; + + if (off == (u32)(sb->s_blocksize - 1)) { + _content = (u32) fat_sector[off]; + + fat_sector = fcache_getblk(sb, ++sec); + if (!fat_sector) + return -EIO; + + _content |= (u32) fat_sector[0] << 8; + } else { + _content = get_unaligned_le16(&fat_sector[off]); + } + + if (loc & 1) + _content >>= 4; + + _content &= FATENT_FAT12_VALID_MASK; + + /* remap reserved clusters to simplify code */ + if (_content == CLUSTER_16(0x0FF7U)) + _content = CLUS_BAD; + else if (_content >= CLUSTER_16(0x0FF8U)) + _content = CLUS_EOF; + + *content = CLUSTER_32(_content); + return 0; +} + +static s32 fat12_ent_set(struct super_block *sb, u32 loc, u32 content) +{ + u32 off; + u64 sec; + u8 *fat_sector, *fat_entry; + FS_INFO_T *fsi = &(SDFAT_SB(sb)->fsi); + + content &= FATENT_FAT12_VALID_MASK; + + sec = fsi->FAT1_start_sector + ((loc + (loc >> 1)) >> sb->s_blocksize_bits); + off = (loc + (loc >> 1)) & (u32)(sb->s_blocksize - 1); + + fat_sector = fcache_getblk(sb, sec); + if (!fat_sector) + return -EIO; + + if (loc & 1) { /* odd */ + + content <<= 4; + + if (off == (u32)(sb->s_blocksize-1)) { + fat_sector[off] = (u8)(content | (fat_sector[off] & 0x0F)); + if (fcache_modify(sb, sec)) + return -EIO; + + fat_sector = fcache_getblk(sb, ++sec); + if (!fat_sector) + return -EIO; + + fat_sector[0] = (u8)(content >> 8); + } else { + fat_entry = &(fat_sector[off]); + content |= 0x000F & get_unaligned_le16(fat_entry); + put_unaligned_le16(content, fat_entry); + } + } else { /* even */ + fat_sector[off] = (u8)(content); + + if (off == (u32)(sb->s_blocksize-1)) { + fat_sector[off] = (u8)(content); + if (fcache_modify(sb, sec)) + return -EIO; + + fat_sector = fcache_getblk(sb, ++sec); + if (!fat_sector) + return -EIO; + + fat_sector[0] = (u8)((fat_sector[0] & 0xF0) | (content >> 8)); + } else { + fat_entry = &(fat_sector[off]); + content |= 0xF000 & get_unaligned_le16(fat_entry); + put_unaligned_le16(content, fat_entry); + } + } + return fcache_modify(sb, sec); +} + + +static FATENT_OPS_T fat12_ent_ops = { + fat12_ent_get, + fat12_ent_set +}; + +static FATENT_OPS_T fat16_ent_ops = { + fat16_ent_get, + fat16_ent_set +}; + +static FATENT_OPS_T fat32_ent_ops = { + fat32_ent_get, + fat32_ent_set +}; + +static FATENT_OPS_T exfat_ent_ops = { + exfat_ent_get, + exfat_ent_set +}; + +s32 fat_ent_ops_init(struct super_block *sb) +{ + FS_INFO_T *fsi = &(SDFAT_SB(sb)->fsi); + + switch (fsi->vol_type) { + case EXFAT: + fsi->fatent_ops = &exfat_ent_ops; + break; + case FAT32: + fsi->fatent_ops = &fat32_ent_ops; + break; + case FAT16: + fsi->fatent_ops = &fat16_ent_ops; + break; + case FAT12: + fsi->fatent_ops = &fat12_ent_ops; + break; + default: + fsi->fatent_ops = NULL; + EMSG("Unknown volume type : %d", (int)fsi->vol_type); + return -ENOTSUPP; + } + + return 0; +} + +static inline bool is_reserved_clus(u32 clus) +{ + if (IS_CLUS_FREE(clus)) + return true; + if (IS_CLUS_EOF(clus)) + return true; + if (IS_CLUS_BAD(clus)) + return true; + return false; +} + +static inline bool is_valid_clus(FS_INFO_T *fsi, u32 clus) +{ + if (clus < CLUS_BASE || fsi->num_clusters <= clus) + return false; + return true; +} + +s32 fat_ent_get(struct super_block *sb, u32 loc, u32 *content) +{ + FS_INFO_T *fsi = &(SDFAT_SB(sb)->fsi); + s32 err; + + if (!is_valid_clus(fsi, loc)) { + sdfat_fs_error(sb, "invalid access to FAT (entry 0x%08x)", loc); + return -EIO; + } + + err = fsi->fatent_ops->ent_get(sb, loc, content); + if (err) { + sdfat_fs_error(sb, "failed to access to FAT " + "(entry 0x%08x, err:%d)", loc, err); + return err; + } + + if (!is_reserved_clus(*content) && !is_valid_clus(fsi, *content)) { + sdfat_fs_error(sb, "invalid access to FAT (entry 0x%08x) " + "bogus content (0x%08x)", loc, *content); + return -EIO; + } + + return 0; +} + +s32 fat_ent_set(struct super_block *sb, u32 loc, u32 content) +{ + FS_INFO_T *fsi = &(SDFAT_SB(sb)->fsi); + + return fsi->fatent_ops->ent_set(sb, loc, content); +} + +s32 fat_ent_get_safe(struct super_block *sb, u32 loc, u32 *content) +{ + s32 err = fat_ent_get(sb, loc, content); + + if (err) + return err; + + if (IS_CLUS_FREE(*content)) { + sdfat_fs_error(sb, "invalid access to FAT free cluster " + "(entry 0x%08x)", loc); + return -EIO; + } + + if (IS_CLUS_BAD(*content)) { + sdfat_fs_error(sb, "invalid access to FAT bad cluster " + "(entry 0x%08x)", loc); + return -EIO; + } + + return 0; +} + +/* end of fatent.c */ diff --git a/fs/sdfat/misc.c b/fs/sdfat/misc.c new file mode 100644 index 000000000000..a006e898816f --- /dev/null +++ b/fs/sdfat/misc.c @@ -0,0 +1,464 @@ +/* + * Copyright (C) 2012-2013 Samsung Electronics Co., Ltd. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version 2 + * of the License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, see <http://www.gnu.org/licenses/>. + */ + +/* + * linux/fs/fat/misc.c + * + * Written 1992,1993 by Werner Almesberger + * 22/11/2000 - Fixed fat_date_unix2dos for dates earlier than 01/01/1980 + * and date_dos2unix for date==0 by Igor Zhbanov(bsg@uniyar.ac.ru) + */ + +/************************************************************************/ +/* */ +/* PROJECT : exFAT & FAT12/16/32 File System */ +/* FILE : misc.c */ +/* PURPOSE : Helper function for checksum and handing sdFAT error */ +/* */ +/*----------------------------------------------------------------------*/ +/* NOTES */ +/* */ +/* */ +/************************************************************************/ + +#include <linux/module.h> +#include <linux/fs.h> +#include <linux/buffer_head.h> +#include <linux/time.h> +#include "sdfat.h" +#include "version.h" + +#ifdef CONFIG_SDFAT_SUPPORT_STLOG +#ifdef CONFIG_PROC_FSLOG +#include <linux/fslog.h> +#else +#include <linux/stlog.h> +#endif +#else +#define ST_LOG(fmt, ...) +#endif + +/************************************************************************* + * FUNCTIONS WHICH HAS KERNEL VERSION DEPENDENCY + *************************************************************************/ +#if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 18, 0) +#define CURRENT_TIME_SEC timespec64_trunc(current_kernel_time64(), NSEC_PER_SEC) +#elif LINUX_VERSION_CODE >= KERNEL_VERSION(4, 12, 0) +#define CURRENT_TIME_SEC timespec_trunc(current_kernel_time(), NSEC_PER_SEC) +#else /* LINUX_VERSION_CODE < KERNEL_VERSION(4, 12, 0) */ + /* EMPTY */ +#endif + + +#ifdef CONFIG_SDFAT_UEVENT +static struct kobject sdfat_uevent_kobj; + +int sdfat_uevent_init(struct kset *sdfat_kset) +{ + int err; + struct kobj_type *ktype = get_ktype(&sdfat_kset->kobj); + + sdfat_uevent_kobj.kset = sdfat_kset; + err = kobject_init_and_add(&sdfat_uevent_kobj, ktype, NULL, "uevent"); + if (err) + pr_err("[SDFAT] Unable to create sdfat uevent kobj\n"); + + return err; +} + +void sdfat_uevent_uninit(void) +{ + kobject_del(&sdfat_uevent_kobj); + memset(&sdfat_uevent_kobj, 0, sizeof(struct kobject)); +} + +void sdfat_uevent_ro_remount(struct super_block *sb) +{ + struct block_device *bdev = sb->s_bdev; + dev_t bd_dev = bdev ? bdev->bd_dev : 0; + + char major[16], minor[16]; + char *envp[] = { major, minor, NULL }; + + snprintf(major, sizeof(major), "MAJOR=%d", MAJOR(bd_dev)); + snprintf(minor, sizeof(minor), "MINOR=%d", MINOR(bd_dev)); + + kobject_uevent_env(&sdfat_uevent_kobj, KOBJ_CHANGE, envp); + + ST_LOG("[SDFAT](%s[%d:%d]): Uevent triggered\n", + sb->s_id, MAJOR(bd_dev), MINOR(bd_dev)); +} +#endif + +/* + * sdfat_fs_error reports a file system problem that might indicate fa data + * corruption/inconsistency. Depending on 'errors' mount option the + * panic() is called, or error message is printed FAT and nothing is done, + * or filesystem is remounted read-only (default behavior). + * In case the file system is remounted read-only, it can be made writable + * again by remounting it. + */ +void __sdfat_fs_error(struct super_block *sb, int report, const char *fmt, ...) +{ + struct sdfat_mount_options *opts = &SDFAT_SB(sb)->options; + va_list args; + struct va_format vaf; + struct block_device *bdev = sb->s_bdev; + dev_t bd_dev = bdev ? bdev->bd_dev : 0; + + if (report) { + va_start(args, fmt); + vaf.fmt = fmt; + vaf.va = &args; + pr_err("[SDFAT](%s[%d:%d]):ERR: %pV\n", + sb->s_id, MAJOR(bd_dev), MINOR(bd_dev), &vaf); +#ifdef CONFIG_SDFAT_SUPPORT_STLOG + if (opts->errors == SDFAT_ERRORS_RO && !(sb->s_flags & MS_RDONLY)) { + ST_LOG("[SDFAT](%s[%d:%d]):ERR: %pV\n", + sb->s_id, MAJOR(bd_dev), MINOR(bd_dev), &vaf); + } +#endif + va_end(args); + } + + if (opts->errors == SDFAT_ERRORS_PANIC) { + panic("[SDFAT](%s[%d:%d]): fs panic from previous error\n", + sb->s_id, MAJOR(bd_dev), MINOR(bd_dev)); + } else if (opts->errors == SDFAT_ERRORS_RO && !(sb->s_flags & MS_RDONLY)) { + sb->s_flags |= MS_RDONLY; + sdfat_statistics_set_mnt_ro(); + pr_err("[SDFAT](%s[%d:%d]): Filesystem has been set " + "read-only\n", sb->s_id, MAJOR(bd_dev), MINOR(bd_dev)); +#ifdef CONFIG_SDFAT_SUPPORT_STLOG + ST_LOG("[SDFAT](%s[%d:%d]): Filesystem has been set read-only\n", + sb->s_id, MAJOR(bd_dev), MINOR(bd_dev)); +#endif + sdfat_uevent_ro_remount(sb); + } +} +EXPORT_SYMBOL(__sdfat_fs_error); + +/** + * __sdfat_msg() - print preformated SDFAT specific messages. + * All logs except what uses sdfat_fs_error() should be written by __sdfat_msg() + * If 'st' is set, the log is propagated to ST_LOG. + */ +void __sdfat_msg(struct super_block *sb, const char *level, int st, const char *fmt, ...) +{ + struct va_format vaf; + va_list args; + struct block_device *bdev = sb->s_bdev; + dev_t bd_dev = bdev ? bdev->bd_dev : 0; + + va_start(args, fmt); + vaf.fmt = fmt; + vaf.va = &args; + /* level means KERN_ pacility level */ + printk("%s[SDFAT](%s[%d:%d]): %pV\n", level, + sb->s_id, MAJOR(bd_dev), MINOR(bd_dev), &vaf); +#ifdef CONFIG_SDFAT_SUPPORT_STLOG + if (st) { + ST_LOG("[SDFAT](%s[%d:%d]): %pV\n", + sb->s_id, MAJOR(bd_dev), MINOR(bd_dev), &vaf); + } +#endif + va_end(args); +} +EXPORT_SYMBOL(__sdfat_msg); + +void sdfat_log_version(void) +{ + pr_info("[SDFAT] Filesystem version %s\n", SDFAT_VERSION); +#ifdef CONFIG_SDFAT_SUPPORT_STLOG + ST_LOG("[SDFAT] Filesystem version %s\n", SDFAT_VERSION); +#endif +} +EXPORT_SYMBOL(sdfat_log_version); + +/* <linux/time.h> externs sys_tz + * extern struct timezone sys_tz; + */ +#define UNIX_SECS_1980 315532800L + +#if BITS_PER_LONG == 64 +#define UNIX_SECS_2108 4354819200L +#endif + +/* days between 1970/01/01 and 1980/01/01 (2 leap days) */ +#define DAYS_DELTA_DECADE (365 * 10 + 2) +/* 120 (2100 - 1980) isn't leap year */ +#define NO_LEAP_YEAR_2100 (120) +#define IS_LEAP_YEAR(y) (!((y) & 0x3) && (y) != NO_LEAP_YEAR_2100) + +#define SECS_PER_MIN (60) +#define SECS_PER_HOUR (60 * SECS_PER_MIN) +#define SECS_PER_DAY (24 * SECS_PER_HOUR) + +#define MAKE_LEAP_YEAR(leap_year, year) \ + do { \ + /* 2100 isn't leap year */ \ + if (unlikely(year > NO_LEAP_YEAR_2100)) \ + leap_year = ((year + 3) / 4) - 1; \ + else \ + leap_year = ((year + 3) / 4); \ + } while (0) + +/* Linear day numbers of the respective 1sts in non-leap years. */ +static time_t accum_days_in_year[] = { + /* Month : N 01 02 03 04 05 06 07 08 09 10 11 12 */ + 0, 0, 31, 59, 90, 120, 151, 181, 212, 243, 273, 304, 334, 0, 0, 0, +}; + +#define TIMEZONE_SEC(x) ((x) * 15 * SECS_PER_MIN) +/* Convert a FAT time/date pair to a UNIX date (seconds since 1 1 70). */ +void sdfat_time_fat2unix(struct sdfat_sb_info *sbi, sdfat_timespec_t *ts, + DATE_TIME_T *tp) +{ + time_t year = tp->Year; + time_t ld; /* leap day */ + + MAKE_LEAP_YEAR(ld, year); + + if (IS_LEAP_YEAR(year) && (tp->Month) > 2) + ld++; + + ts->tv_sec = tp->Second + tp->Minute * SECS_PER_MIN + + tp->Hour * SECS_PER_HOUR + + (year * 365 + ld + accum_days_in_year[tp->Month] + + (tp->Day - 1) + DAYS_DELTA_DECADE) * SECS_PER_DAY; + + ts->tv_nsec = 0; + + /* Treat as local time */ + if (!sbi->options.tz_utc && !tp->Timezone.valid) { + ts->tv_sec += sys_tz.tz_minuteswest * SECS_PER_MIN; + return; + } + + /* Treat as UTC time */ + if (!tp->Timezone.valid) + return; + + /* Treat as UTC time, but need to adjust timezone to UTC0 */ + if (tp->Timezone.off <= 0x3F) + ts->tv_sec -= TIMEZONE_SEC(tp->Timezone.off); + else /* 0x40 <= (tp->Timezone & 0x7F) <=0x7F */ + ts->tv_sec += TIMEZONE_SEC(0x80 - tp->Timezone.off); +} + +#define TIMEZONE_CUR_OFFSET() ((sys_tz.tz_minuteswest / (-15)) & 0x7F) +/* Convert linear UNIX date to a FAT time/date pair. */ +void sdfat_time_unix2fat(struct sdfat_sb_info *sbi, sdfat_timespec_t *ts, + DATE_TIME_T *tp) +{ + bool tz_valid = (sbi->fsi.vol_type == EXFAT) ? true : false; + time_t second = ts->tv_sec; + time_t day, month, year; + time_t ld; /* leap day */ + + tp->Timezone.value = 0x00; + + /* Treats as local time with proper time */ + if (tz_valid || !sbi->options.tz_utc) { + second -= sys_tz.tz_minuteswest * SECS_PER_MIN; + if (tz_valid) { + tp->Timezone.valid = 1; + tp->Timezone.off = TIMEZONE_CUR_OFFSET(); + } + } + + /* Jan 1 GMT 00:00:00 1980. But what about another time zone? */ + if (second < UNIX_SECS_1980) { + tp->Second = 0; + tp->Minute = 0; + tp->Hour = 0; + tp->Day = 1; + tp->Month = 1; + tp->Year = 0; + return; + } +#if (BITS_PER_LONG == 64) + if (second >= UNIX_SECS_2108) { + tp->Second = 59; + tp->Minute = 59; + tp->Hour = 23; + tp->Day = 31; + tp->Month = 12; + tp->Year = 127; + return; + } +#endif + + day = second / SECS_PER_DAY - DAYS_DELTA_DECADE; + year = day / 365; + + MAKE_LEAP_YEAR(ld, year); + if (year * 365 + ld > day) + year--; + + MAKE_LEAP_YEAR(ld, year); + day -= year * 365 + ld; + + if (IS_LEAP_YEAR(year) && day == accum_days_in_year[3]) { + month = 2; + } else { + if (IS_LEAP_YEAR(year) && day > accum_days_in_year[3]) + day--; + for (month = 1; month < 12; month++) { + if (accum_days_in_year[month + 1] > day) + break; + } + } + day -= accum_days_in_year[month]; + + tp->Second = second % SECS_PER_MIN; + tp->Minute = (second / SECS_PER_MIN) % 60; + tp->Hour = (second / SECS_PER_HOUR) % 24; + tp->Day = day + 1; + tp->Month = month; + tp->Year = year; +} + +TIMESTAMP_T *tm_now(struct sdfat_sb_info *sbi, TIMESTAMP_T *tp) +{ + sdfat_timespec_t ts = CURRENT_TIME_SEC; + DATE_TIME_T dt; + + sdfat_time_unix2fat(sbi, &ts, &dt); + + tp->year = dt.Year; + tp->mon = dt.Month; + tp->day = dt.Day; + tp->hour = dt.Hour; + tp->min = dt.Minute; + tp->sec = dt.Second; + tp->tz.value = dt.Timezone.value; + + return tp; +} + +u8 calc_chksum_1byte(void *data, s32 len, u8 chksum) +{ + s32 i; + u8 *c = (u8 *) data; + + for (i = 0; i < len; i++, c++) + chksum = (((chksum & 1) << 7) | ((chksum & 0xFE) >> 1)) + *c; + + return chksum; +} + +u16 calc_chksum_2byte(void *data, s32 len, u16 chksum, s32 type) +{ + s32 i; + u8 *c = (u8 *) data; + + for (i = 0; i < len; i++, c++) { + if (((i == 2) || (i == 3)) && (type == CS_DIR_ENTRY)) + continue; + chksum = (((chksum & 1) << 15) | ((chksum & 0xFFFE) >> 1)) + (u16) *c; + } + return chksum; +} + +#ifdef CONFIG_SDFAT_TRACE_ELAPSED_TIME +struct timeval __t1, __t2; +u32 sdfat_time_current_usec(struct timeval *tv) +{ + do_gettimeofday(tv); + return (u32)(tv->tv_sec*1000000 + tv->tv_usec); +} +#endif /* CONFIG_SDFAT_TRACE_ELAPSED_TIME */ + +#ifdef CONFIG_SDFAT_DBG_CAREFUL +/* Check the consistency of i_size_ondisk (FAT32, or flags 0x01 only) */ +void sdfat_debug_check_clusters(struct inode *inode) +{ + unsigned int num_clusters; + volatile uint32_t tmp_fat_chain[50]; + volatile int tmp_i = 0; + volatile unsigned int num_clusters_org, tmp_i = 0; + CHAIN_T clu; + FILE_ID_T *fid = &(SDFAT_I(inode)->fid); + FS_INFO_T *fsi = &(SDFAT_SB(inode->i_sb)->fsi); + + if (SDFAT_I(inode)->i_size_ondisk == 0) + num_clusters = 0; + else + num_clusters = ((SDFAT_I(inode)->i_size_ondisk-1) >> fsi->cluster_size_bits) + 1; + + clu.dir = fid->start_clu; + clu.size = num_clusters; + clu.flags = fid->flags; + + num_clusters_org = num_clusters; + + if (clu.flags == 0x03) + return; + + while (num_clusters > 0) { + /* FAT chain logging */ + tmp_fat_chain[tmp_i] = clu.dir; + tmp_i++; + if (tmp_i >= 50) + tmp_i = 0; + + BUG_ON(IS_CLUS_EOF(clu.dir) || IS_CLUS_FREE(clu.dir)); + + if (get_next_clus_safe(inode->i_sb, &(clu.dir))) + EMSG("%s: failed to access to FAT\n"); + + num_clusters--; + } + + BUG_ON(!IS_CLUS_EOF(clu.dir)); +} + +#endif /* CONFIG_SDFAT_DBG_CAREFUL */ + +#ifdef CONFIG_SDFAT_DBG_MSG +void __sdfat_dmsg(int level, const char *fmt, ...) +{ +#ifdef CONFIG_SDFAT_DBG_SHOW_PID + struct va_format vaf; + va_list args; + + /* should check type */ + if (level > SDFAT_MSG_LEVEL) + return; + + va_start(args, fmt); + vaf.fmt = fmt; + vaf.va = &args; + /* fmt already includes KERN_ pacility level */ + printk("[%u] %pV", current->pid, &vaf); + va_end(args); +#else + va_list args; + + /* should check type */ + if (level > SDFAT_MSG_LEVEL) + return; + + va_start(args, fmt); + /* fmt already includes KERN_ pacility level */ + vprintk(fmt, args); + va_end(args); +#endif +} +#endif + diff --git a/fs/sdfat/mpage.c b/fs/sdfat/mpage.c new file mode 100644 index 000000000000..f550fbb2204a --- /dev/null +++ b/fs/sdfat/mpage.c @@ -0,0 +1,635 @@ +/* + * fs/mpage.c + * + * Copyright (C) 2002, Linus Torvalds. + * + * Contains functions related to preparing and submitting BIOs which contain + * multiple pagecache pages. + * + * 15May2002 Andrew Morton + * Initial version + * 27Jun2002 axboe@suse.de + * use bio_add_page() to build bio's just the right size + */ + +/* + * Copyright (C) 2012-2013 Samsung Electronics Co., Ltd. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version 2 + * of the License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, see <http://www.gnu.org/licenses/>. + */ + +/************************************************************************/ +/* */ +/* PROJECT : exFAT & FAT12/16/32 File System */ +/* FILE : core.c */ +/* PURPOSE : sdFAT glue layer for supporting VFS */ +/* */ +/*----------------------------------------------------------------------*/ +/* NOTES */ +/* */ +/* */ +/************************************************************************/ + +#include <linux/version.h> +#include <linux/module.h> +#include <linux/time.h> +#include <linux/buffer_head.h> +#include <linux/exportfs.h> +#include <linux/mount.h> +#include <linux/vfs.h> +#include <linux/parser.h> +#include <linux/uio.h> +#include <linux/writeback.h> +#include <linux/log2.h> +#include <linux/hash.h> +#include <linux/backing-dev.h> +#include <linux/sched.h> +#include <linux/fs_struct.h> +#include <linux/namei.h> +#include <linux/bio.h> +#include <linux/blkdev.h> +#include <linux/swap.h> /* for mark_page_accessed() */ +#include <asm/current.h> +#include <asm/unaligned.h> +#if LINUX_VERSION_CODE >= KERNEL_VERSION(3, 10, 0) +#include <linux/aio.h> +#endif + +#include "sdfat.h" + +#ifdef CONFIG_SDFAT_ALIGNED_MPAGE_WRITE + +/************************************************************************* + * INNER FUNCTIONS FOR FUNCTIONS WHICH HAS KERNEL VERSION DEPENDENCY + *************************************************************************/ +static void __mpage_write_end_io(struct bio *bio, int err); + +/************************************************************************* + * FUNCTIONS WHICH HAS KERNEL VERSION DEPENDENCY + *************************************************************************/ +#if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 14, 0) + /* EMPTY */ +#else /* LINUX_VERSION_CODE < KERNEL_VERSION(4, 14, 0) */ +static inline void bio_set_dev(struct bio *bio, struct block_device *bdev) +{ + bio->bi_bdev = bdev; +} +#endif + +#if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 10, 0) +static inline void __sdfat_clean_bdev_aliases(struct block_device *bdev, sector_t block) +{ + clean_bdev_aliases(bdev, block, 1); +} +#else /* LINUX_VERSION_CODE < KERNEL_VERSION(4,10,0) */ +static inline void __sdfat_clean_bdev_aliases(struct block_device *bdev, sector_t block) +{ + unmap_underlying_metadata(bdev, block); +} +#endif + +#if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 8, 0) +static inline void __sdfat_submit_bio_write2(int flags, struct bio *bio) +{ + bio_set_op_attrs(bio, REQ_OP_WRITE, flags); + submit_bio(bio); +} +#else /* LINUX_VERSION_CODE < KERNEL_VERSION(4,8,0) */ +static inline void __sdfat_submit_bio_write2(int flags, struct bio *bio) +{ + submit_bio(WRITE | flags, bio); +} +#endif + +#if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 1, 0) +static inline int bio_get_nr_vecs(struct block_device *bdev) +{ + return BIO_MAX_PAGES; +} +#else /* LINUX_VERSION_CODE < KERNEL_VERSION(4,1,0) */ + /* EMPTY */ +#endif + +#if LINUX_VERSION_CODE >= KERNEL_VERSION(3, 14, 0) +static inline sector_t __sdfat_bio_sector(struct bio *bio) +{ + return bio->bi_iter.bi_sector; +} + +static inline void __sdfat_set_bio_sector(struct bio *bio, sector_t sector) +{ + bio->bi_iter.bi_sector = sector; +} + +static inline unsigned int __sdfat_bio_size(struct bio *bio) +{ + return bio->bi_iter.bi_size; +} + +static inline void __sdfat_set_bio_size(struct bio *bio, unsigned int size) +{ + bio->bi_iter.bi_size = size; +} +#else /* LINUX_VERSION_CODE < KERNEL_VERSION(3, 14, 0) */ +static inline sector_t __sdfat_bio_sector(struct bio *bio) +{ + return bio->bi_sector; +} + +static inline void __sdfat_set_bio_sector(struct bio *bio, sector_t sector) +{ + bio->bi_sector = sector; +} + +static inline unsigned int __sdfat_bio_size(struct bio *bio) +{ + return bio->bi_size; +} + +static inline void __sdfat_set_bio_size(struct bio *bio, unsigned int size) +{ + bio->bi_size = size; +} +#endif + +/************************************************************************* + * MORE FUNCTIONS WHICH HAS KERNEL VERSION DEPENDENCY + *************************************************************************/ +#if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 13, 0) +static void mpage_write_end_io(struct bio *bio) +{ + __mpage_write_end_io(bio, blk_status_to_errno(bio->bi_status)); +} +#elif LINUX_VERSION_CODE >= KERNEL_VERSION(4, 3, 0) +static void mpage_write_end_io(struct bio *bio) +{ + __mpage_write_end_io(bio, bio->bi_error); +} +#else /* LINUX_VERSION_CODE < KERNEL_VERSION(4,3,0) */ +static void mpage_write_end_io(struct bio *bio, int err) +{ + if (test_bit(BIO_UPTODATE, &bio->bi_flags)) + err = 0; + __mpage_write_end_io(bio, err); +} +#endif + +/* __check_dfr_on() and __dfr_writepage_end_io() functions + * are copied from sdfat.c + * Each function should be same perfectly + */ +static inline int __check_dfr_on(struct inode *inode, loff_t start, loff_t end, const char *fname) +{ +#ifdef CONFIG_SDFAT_DFR + struct defrag_info *ino_dfr = &(SDFAT_I(inode)->dfr_info); + + if ((atomic_read(&ino_dfr->stat) == DFR_INO_STAT_REQ) && + fsapi_dfr_check_dfr_on(inode, start, end, 0, fname)) + return 1; +#endif + return 0; +} + +static inline int __dfr_writepage_end_io(struct page *page) +{ +#ifdef CONFIG_SDFAT_DFR + struct defrag_info *ino_dfr = &(SDFAT_I(page->mapping->host)->dfr_info); + + if (atomic_read(&ino_dfr->stat) == DFR_INO_STAT_REQ) + fsapi_dfr_writepage_endio(page); +#endif + return 0; +} + + +static inline unsigned int __calc_size_to_align(struct super_block *sb) +{ + struct block_device *bdev = sb->s_bdev; + struct gendisk *disk; + struct request_queue *queue; + struct queue_limits *limit; + unsigned int max_sectors; + unsigned int aligned = 0; + + disk = bdev->bd_disk; + if (!disk) + goto out; + + queue = disk->queue; + if (!queue) + goto out; + + limit = &queue->limits; + max_sectors = limit->max_sectors; + aligned = 1 << ilog2(max_sectors); + + if (aligned && (max_sectors & (aligned - 1))) + aligned = 0; +out: + return aligned; +} + +struct mpage_data { + struct bio *bio; + sector_t last_block_in_bio; + get_block_t *get_block; + unsigned int use_writepage; + unsigned int size_to_align; +}; + +/* + * I/O completion handler for multipage BIOs. + * + * The mpage code never puts partial pages into a BIO (except for end-of-file). + * If a page does not map to a contiguous run of blocks then it simply falls + * back to block_read_full_page(). + * + * Why is this? If a page's completion depends on a number of different BIOs + * which can complete in any order (or at the same time) then determining the + * status of that page is hard. See end_buffer_async_read() for the details. + * There is no point in duplicating all that complexity. + */ +static void __mpage_write_end_io(struct bio *bio, int err) +{ + struct bio_vec *bvec = bio->bi_io_vec + bio->bi_vcnt - 1; + + ASSERT(bio_data_dir(bio) == WRITE); /* only write */ + + do { + struct page *page = bvec->bv_page; + + if (--bvec >= bio->bi_io_vec) + prefetchw(&bvec->bv_page->flags); + if (err) { + SetPageError(page); + if (page->mapping) + mapping_set_error(page->mapping, err); + } + + __dfr_writepage_end_io(page); + + end_page_writeback(page); + } while (bvec >= bio->bi_io_vec); + bio_put(bio); +} + +static struct bio *mpage_bio_submit_write(int flags, struct bio *bio) +{ + bio->bi_end_io = mpage_write_end_io; + __sdfat_submit_bio_write2(flags, bio); + return NULL; +} + +static struct bio * +mpage_alloc(struct block_device *bdev, + sector_t first_sector, int nr_vecs, + gfp_t gfp_flags) +{ + struct bio *bio; + + bio = bio_alloc(gfp_flags, nr_vecs); + + if (bio == NULL && (current->flags & PF_MEMALLOC)) { + while (!bio && (nr_vecs /= 2)) + bio = bio_alloc(gfp_flags, nr_vecs); + } + + if (bio) { + bio_set_dev(bio, bdev); + __sdfat_set_bio_sector(bio, first_sector); + } + return bio; +} + +static int sdfat_mpage_writepage(struct page *page, + struct writeback_control *wbc, void *data) +{ + struct mpage_data *mpd = data; + struct bio *bio = mpd->bio; + struct address_space *mapping = page->mapping; + struct inode *inode = page->mapping->host; + const unsigned int blkbits = inode->i_blkbits; + const unsigned int blocks_per_page = PAGE_SIZE >> blkbits; + sector_t last_block; + sector_t block_in_file; + sector_t blocks[MAX_BUF_PER_PAGE]; + unsigned int page_block; + unsigned int first_unmapped = blocks_per_page; + struct block_device *bdev = NULL; + int boundary = 0; + sector_t boundary_block = 0; + struct block_device *boundary_bdev = NULL; + int length; + struct buffer_head map_bh; + loff_t i_size = i_size_read(inode); + unsigned long end_index = i_size >> PAGE_SHIFT; + int ret = 0; + + if (page_has_buffers(page)) { + struct buffer_head *head = page_buffers(page); + struct buffer_head *bh = head; + + /* If they're all mapped and dirty, do it */ + page_block = 0; + do { + BUG_ON(buffer_locked(bh)); + if (!buffer_mapped(bh)) { + /* + * unmapped dirty buffers are created by + * __set_page_dirty_buffers -> mmapped data + */ + if (buffer_dirty(bh)) + goto confused; + if (first_unmapped == blocks_per_page) + first_unmapped = page_block; + continue; + } + + if (first_unmapped != blocks_per_page) + goto confused; /* hole -> non-hole */ + + if (!buffer_dirty(bh) || !buffer_uptodate(bh)) + goto confused; + + /* bh should be mapped if delay is set */ + if (buffer_delay(bh)) { + sector_t blk_in_file = + (sector_t)(page->index << (PAGE_SHIFT - blkbits)) + page_block; + + BUG_ON(bh->b_size != (1 << blkbits)); + if (page->index > end_index) { + MMSG("%s(inode:%p) " + "over end with delayed buffer" + "(page_idx:%u, end_idx:%u)\n", + __func__, inode, + (u32)page->index, + (u32)end_index); + goto confused; + } + + ret = mpd->get_block(inode, blk_in_file, bh, 1); + if (ret) { + MMSG("%s(inode:%p) " + "failed to getblk(ret:%d)\n", + __func__, inode, ret); + goto confused; + } + + BUG_ON(buffer_delay(bh)); + + if (buffer_new(bh)) { + clear_buffer_new(bh); + __sdfat_clean_bdev_aliases(bh->b_bdev, bh->b_blocknr); + } + } + + if (page_block) { + if (bh->b_blocknr != blocks[page_block-1] + 1) { + MMSG("%s(inode:%p) pblk(%d) " + "no_seq(prev:%lld, new:%lld)\n", + __func__, inode, page_block, + (u64)blocks[page_block-1], + (u64)bh->b_blocknr); + goto confused; + } + } + blocks[page_block++] = bh->b_blocknr; + boundary = buffer_boundary(bh); + if (boundary) { + boundary_block = bh->b_blocknr; + boundary_bdev = bh->b_bdev; + } + bdev = bh->b_bdev; + } while ((bh = bh->b_this_page) != head); + + if (first_unmapped) + goto page_is_mapped; + + /* + * Page has buffers, but they are all unmapped. The page was + * created by pagein or read over a hole which was handled by + * block_read_full_page(). If this address_space is also + * using mpage_readpages then this can rarely happen. + */ + goto confused; + } + + /* + * The page has no buffers: map it to disk + */ + BUG_ON(!PageUptodate(page)); + block_in_file = (sector_t)page->index << (PAGE_SHIFT - blkbits); + last_block = (i_size - 1) >> blkbits; + map_bh.b_page = page; + for (page_block = 0; page_block < blocks_per_page; ) { + + map_bh.b_state = 0; + map_bh.b_size = 1 << blkbits; + if (mpd->get_block(inode, block_in_file, &map_bh, 1)) + goto confused; + + if (buffer_new(&map_bh)) + __sdfat_clean_bdev_aliases(map_bh.b_bdev, map_bh.b_blocknr); + if (buffer_boundary(&map_bh)) { + boundary_block = map_bh.b_blocknr; + boundary_bdev = map_bh.b_bdev; + } + + if (page_block) { + if (map_bh.b_blocknr != blocks[page_block-1] + 1) + goto confused; + } + blocks[page_block++] = map_bh.b_blocknr; + boundary = buffer_boundary(&map_bh); + bdev = map_bh.b_bdev; + if (block_in_file == last_block) + break; + block_in_file++; + } + BUG_ON(page_block == 0); + + first_unmapped = page_block; + +page_is_mapped: + if (page->index >= end_index) { + /* + * The page straddles i_size. It must be zeroed out on each + * and every writepage invocation because it may be mmapped. + * "A file is mapped in multiples of the page size. For a file + * that is not a multiple of the page size, the remaining memory + * is zeroed when mapped, and writes to that region are not + * written out to the file." + */ + unsigned int offset = i_size & (PAGE_SIZE - 1); + + if (page->index > end_index || !offset) { + MMSG("%s(inode:%p) over end " + "(page_idx:%u, end_idx:%u off:%u)\n", + __func__, inode, (u32)page->index, + (u32)end_index, (u32)offset); + goto confused; + } + zero_user_segment(page, offset, PAGE_SIZE); + } + + /* + * This page will go to BIO. Do we need to send this BIO off first? + * + * REMARK : added ELSE_IF for ALIGNMENT_MPAGE_WRITE of SDFAT + */ + if (bio) { + if (mpd->last_block_in_bio != blocks[0] - 1) { + bio = mpage_bio_submit_write(0, bio); + } else if (mpd->size_to_align) { + unsigned int mask = mpd->size_to_align - 1; + sector_t max_end_block = + (__sdfat_bio_sector(bio) & ~(mask)) + mask; + + if ((__sdfat_bio_size(bio) != (1 << (mask + 1))) && + (mpd->last_block_in_bio == max_end_block)) { + MMSG("%s(inode:%p) alignment mpage_bio_submit" + "(start:%u, len:%u aligned:%u)\n", + __func__, inode, + (unsigned int)__sdfat_bio_sector(bio), + (unsigned int)(mpd->last_block_in_bio - + __sdfat_bio_sector(bio) + 1), + (unsigned int)mpd->size_to_align); + bio = mpage_bio_submit_write(REQ_NOMERGE, bio); + } + } + } + +alloc_new: + if (!bio) { + bio = mpage_alloc(bdev, blocks[0] << (blkbits - 9), + bio_get_nr_vecs(bdev), GFP_NOFS|__GFP_HIGH); + if (!bio) + goto confused; + } + + /* + * Must try to add the page before marking the buffer clean or + * the confused fail path above (OOM) will be very confused when + * it finds all bh marked clean (i.e. it will not write anything) + */ + length = first_unmapped << blkbits; + if (bio_add_page(bio, page, length, 0) < length) { + bio = mpage_bio_submit_write(0, bio); + goto alloc_new; + } + + /* + * OK, we have our BIO, so we can now mark the buffers clean. Make + * sure to only clean buffers which we know we'll be writing. + */ + if (page_has_buffers(page)) { + struct buffer_head *head = page_buffers(page); + struct buffer_head *bh = head; + unsigned int buffer_counter = 0; + + do { + if (buffer_counter++ == first_unmapped) + break; + clear_buffer_dirty(bh); + bh = bh->b_this_page; + } while (bh != head); + + /* + * we cannot drop the bh if the page is not uptodate + * or a concurrent readpage would fail to serialize with the bh + * and it would read from disk before we reach the platter. + */ + if (buffer_heads_over_limit && PageUptodate(page)) + try_to_free_buffers(page); + } + + BUG_ON(PageWriteback(page)); + set_page_writeback(page); + + /* + * FIXME FOR DEFRAGMENTATION : CODE REVIEW IS REQUIRED + * + * Turn off MAPPED flag in victim's bh if defrag on. + * Another write_begin can starts after get_block for defrag victims + * called. + * In this case, write_begin calls get_block and get original block + * number and previous defrag will be canceled. + */ + if (unlikely(__check_dfr_on(inode, (loff_t)(page->index << PAGE_SHIFT), + (loff_t)((page->index + 1) << PAGE_SHIFT), __func__))) { + struct buffer_head *head = page_buffers(page); + struct buffer_head *bh = head; + + do { + clear_buffer_mapped(bh); + bh = bh->b_this_page; + } while (bh != head); + } + + unlock_page(page); + if (boundary || (first_unmapped != blocks_per_page)) { + bio = mpage_bio_submit_write(0, bio); + if (boundary_block) { + write_boundary_block(boundary_bdev, + boundary_block, 1 << blkbits); + } + } else { + mpd->last_block_in_bio = blocks[blocks_per_page - 1]; + } + + goto out; + +confused: + if (bio) + bio = mpage_bio_submit_write(0, bio); + + if (mpd->use_writepage) { + ret = mapping->a_ops->writepage(page, wbc); + } else { + ret = -EAGAIN; + goto out; + } + /* + * The caller has a ref on the inode, so *mapping is stable + */ + mapping_set_error(mapping, ret); +out: + mpd->bio = bio; + return ret; +} + +int sdfat_mpage_writepages(struct address_space *mapping, + struct writeback_control *wbc, get_block_t *get_block) +{ + struct blk_plug plug; + int ret; + struct mpage_data mpd = { + .bio = NULL, + .last_block_in_bio = 0, + .get_block = get_block, + .use_writepage = 1, + .size_to_align = __calc_size_to_align(mapping->host->i_sb), + }; + + BUG_ON(!get_block); + blk_start_plug(&plug); + ret = write_cache_pages(mapping, wbc, sdfat_mpage_writepage, &mpd); + if (mpd.bio) + mpage_bio_submit_write(0, mpd.bio); + blk_finish_plug(&plug); + return ret; +} + +#endif /* CONFIG_SDFAT_ALIGNED_MPAGE_WRITE */ + diff --git a/fs/sdfat/nls.c b/fs/sdfat/nls.c new file mode 100644 index 000000000000..b65634454c55 --- /dev/null +++ b/fs/sdfat/nls.c @@ -0,0 +1,478 @@ +/* + * Copyright (C) 2012-2013 Samsung Electronics Co., Ltd. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version 2 + * of the License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, see <http://www.gnu.org/licenses/>. + */ + +/************************************************************************/ +/* */ +/* PROJECT : exFAT & FAT12/16/32 File System */ +/* FILE : nls.c */ +/* PURPOSE : sdFAT NLS Manager */ +/* */ +/*----------------------------------------------------------------------*/ +/* NOTES */ +/* */ +/* */ +/************************************************************************/ +#include <linux/string.h> +#include <linux/nls.h> + +#include "sdfat.h" +#include "core.h" + +/*----------------------------------------------------------------------*/ +/* Global Variable Definitions */ +/*----------------------------------------------------------------------*/ + +/*----------------------------------------------------------------------*/ +/* Local Variable Definitions */ +/*----------------------------------------------------------------------*/ + +static u16 bad_dos_chars[] = { + /* + , ; = [ ] */ + 0x002B, 0x002C, 0x003B, 0x003D, 0x005B, 0x005D, + 0xFF0B, 0xFF0C, 0xFF1B, 0xFF1D, 0xFF3B, 0xFF3D, + 0 +}; + +/* + * Allow full-width illegal characters : + * "MS windows 7" supports full-width-invalid-name-characters. + * So we should check half-width-invalid-name-characters(ASCII) only + * for compatibility. + * + * " * / : < > ? \ | + * + * patch 1.2.0 + */ +static u16 bad_uni_chars[] = { + 0x0022, 0x002A, 0x002F, 0x003A, + 0x003C, 0x003E, 0x003F, 0x005C, 0x007C, +#if 0 /* allow full-width characters */ + 0x201C, 0x201D, 0xFF0A, 0xFF0F, 0xFF1A, + 0xFF1C, 0xFF1E, 0xFF1F, 0xFF3C, 0xFF5C, +#endif + 0 +}; + +/*----------------------------------------------------------------------*/ +/* Local Function Declarations */ +/*----------------------------------------------------------------------*/ +static s32 convert_uni_to_ch(struct nls_table *nls, u16 uni, u8 *ch, s32 *lossy); +static s32 convert_ch_to_uni(struct nls_table *nls, u8 *ch, u16 *uni, s32 *lossy); + +static u16 nls_upper(struct super_block *sb, u16 a) +{ + FS_INFO_T *fsi = &(SDFAT_SB(sb)->fsi); + + if (SDFAT_SB(sb)->options.casesensitive) + return a; + if ((fsi->vol_utbl)[get_col_index(a)] != NULL) + return (fsi->vol_utbl)[get_col_index(a)][get_row_index(a)]; + else + return a; +} +/*======================================================================*/ +/* Global Function Definitions */ +/*======================================================================*/ +u16 *nls_wstrchr(u16 *str, u16 wchar) +{ + while (*str) { + if (*(str++) == wchar) + return str; + } + + return 0; +} + +s32 nls_cmp_sfn(struct super_block *sb, u8 *a, u8 *b) +{ + return strncmp((void *)a, (void *)b, DOS_NAME_LENGTH); +} + +s32 nls_cmp_uniname(struct super_block *sb, u16 *a, u16 *b) +{ + s32 i; + + for (i = 0; i < MAX_NAME_LENGTH; i++, a++, b++) { + if (nls_upper(sb, *a) != nls_upper(sb, *b)) + return 1; + if (*a == 0x0) + return 0; + } + return 0; +} + +#define CASE_LOWER_BASE (0x08) /* base is lower case */ +#define CASE_LOWER_EXT (0x10) /* extension is lower case */ + +s32 nls_uni16s_to_sfn(struct super_block *sb, UNI_NAME_T *p_uniname, DOS_NAME_T *p_dosname, s32 *p_lossy) +{ + s32 i, j, len, lossy = NLS_NAME_NO_LOSSY; + u8 buf[MAX_CHARSET_SIZE]; + u8 lower = 0, upper = 0; + u8 *dosname = p_dosname->name; + u16 *uniname = p_uniname->name; + u16 *p, *last_period; + struct nls_table *nls = SDFAT_SB(sb)->nls_disk; + + /* DOSNAME is filled with space */ + for (i = 0; i < DOS_NAME_LENGTH; i++) + *(dosname+i) = ' '; + + /* DOT and DOTDOT are handled by VFS layer */ + + /* search for the last embedded period */ + last_period = NULL; + for (p = uniname; *p; p++) { + if (*p == (u16) '.') + last_period = p; + } + + i = 0; + while (i < DOS_NAME_LENGTH) { + if (i == 8) { + if (last_period == NULL) + break; + + if (uniname <= last_period) { + if (uniname < last_period) + lossy |= NLS_NAME_OVERLEN; + uniname = last_period + 1; + } + } + + if (*uniname == (u16) '\0') { + break; + } else if (*uniname == (u16) ' ') { + lossy |= NLS_NAME_LOSSY; + } else if (*uniname == (u16) '.') { + if (uniname < last_period) + lossy |= NLS_NAME_LOSSY; + else + i = 8; + } else if (nls_wstrchr(bad_dos_chars, *uniname)) { + lossy |= NLS_NAME_LOSSY; + *(dosname+i) = '_'; + i++; + } else { + len = convert_uni_to_ch(nls, *uniname, buf, &lossy); + + if (len > 1) { + if ((i >= 8) && ((i+len) > DOS_NAME_LENGTH)) + break; + + if ((i < 8) && ((i+len) > 8)) { + i = 8; + continue; + } + + lower = 0xFF; + + for (j = 0; j < len; j++, i++) + *(dosname+i) = *(buf+j); + } else { /* len == 1 */ + if ((*buf >= 'a') && (*buf <= 'z')) { + *(dosname+i) = *buf - ('a' - 'A'); + + lower |= (i < 8) ? + CASE_LOWER_BASE : + CASE_LOWER_EXT; + } else if ((*buf >= 'A') && (*buf <= 'Z')) { + *(dosname+i) = *buf; + + upper |= (i < 8) ? + CASE_LOWER_BASE : + CASE_LOWER_EXT; + } else { + *(dosname+i) = *buf; + } + i++; + } + } + + uniname++; + } + + if (*dosname == 0xE5) + *dosname = 0x05; + if (*uniname != 0x0) + lossy |= NLS_NAME_OVERLEN; + + if (upper & lower) + p_dosname->name_case = 0xFF; + else + p_dosname->name_case = lower; + + if (p_lossy) + *p_lossy = lossy; + return i; +} + +s32 nls_sfn_to_uni16s(struct super_block *sb, DOS_NAME_T *p_dosname, UNI_NAME_T *p_uniname) +{ + s32 i = 0, j, n = 0; + u8 buf[MAX_DOSNAME_BUF_SIZE]; + u8 *dosname = p_dosname->name; + u16 *uniname = p_uniname->name; + struct nls_table *nls = SDFAT_SB(sb)->nls_disk; + + if (*dosname == 0x05) { + *buf = 0xE5; + i++; + n++; + } + + for ( ; i < 8; i++, n++) { + if (*(dosname+i) == ' ') + break; + + if ((*(dosname+i) >= 'A') && (*(dosname+i) <= 'Z') && + (p_dosname->name_case & CASE_LOWER_BASE)) + *(buf+n) = *(dosname+i) + ('a' - 'A'); + else + *(buf+n) = *(dosname+i); + } + if (*(dosname+8) != ' ') { + *(buf+n) = '.'; + n++; + } + + for (i = 8; i < DOS_NAME_LENGTH; i++, n++) { + if (*(dosname+i) == ' ') + break; + + if ((*(dosname+i) >= 'A') && (*(dosname+i) <= 'Z') && + (p_dosname->name_case & CASE_LOWER_EXT)) + *(buf+n) = *(dosname+i) + ('a' - 'A'); + else + *(buf+n) = *(dosname+i); + } + *(buf+n) = '\0'; + + i = j = 0; + while (j < MAX_NAME_LENGTH) { + if (*(buf+i) == '\0') + break; + + i += convert_ch_to_uni(nls, (buf+i), uniname, NULL); + + uniname++; + j++; + } + + *uniname = (u16) '\0'; + return j; +} + +static s32 __nls_utf16s_to_vfsname(struct super_block *sb, UNI_NAME_T *p_uniname, u8 *p_cstring, s32 buflen) +{ + s32 len; + const u16 *uniname = p_uniname->name; + + /* always len >= 0 */ + len = utf16s_to_utf8s(uniname, MAX_NAME_LENGTH, UTF16_HOST_ENDIAN, + p_cstring, buflen); + p_cstring[len] = '\0'; + return len; +} + +static s32 __nls_vfsname_to_utf16s(struct super_block *sb, const u8 *p_cstring, + const s32 len, UNI_NAME_T *p_uniname, s32 *p_lossy) +{ + s32 i, unilen, lossy = NLS_NAME_NO_LOSSY; + u16 upname[MAX_NAME_LENGTH+1]; + u16 *uniname = p_uniname->name; + + BUG_ON(!len); + + unilen = utf8s_to_utf16s(p_cstring, len, UTF16_HOST_ENDIAN, + (wchar_t *)uniname, MAX_NAME_LENGTH+2); + if (unilen < 0) { + MMSG("%s: failed to vfsname_to_utf16(err:%d) " + "vfsnamelen:%d", __func__, unilen, len); + return unilen; + } + + if (unilen > MAX_NAME_LENGTH) { + MMSG("%s: failed to vfsname_to_utf16(estr:ENAMETOOLONG) " + "vfsnamelen:%d, unilen:%d>%d", + __func__, len, unilen, MAX_NAME_LENGTH); + return -ENAMETOOLONG; + } + + p_uniname->name_len = (u8)(unilen & 0xFF); + + for (i = 0; i < unilen; i++) { + if ((*uniname < 0x0020) || nls_wstrchr(bad_uni_chars, *uniname)) + lossy |= NLS_NAME_LOSSY; + + *(upname+i) = nls_upper(sb, *uniname); + uniname++; + } + + *uniname = (u16)'\0'; + p_uniname->name_len = unilen; + p_uniname->name_hash = calc_chksum_2byte((void *) upname, + unilen << 1, 0, CS_DEFAULT); + + if (p_lossy) + *p_lossy = lossy; + + return unilen; +} + +static s32 __nls_uni16s_to_vfsname(struct super_block *sb, UNI_NAME_T *p_uniname, u8 *p_cstring, s32 buflen) +{ + s32 i, j, len, out_len = 0; + u8 buf[MAX_CHARSET_SIZE]; + const u16 *uniname = p_uniname->name; + struct nls_table *nls = SDFAT_SB(sb)->nls_io; + + i = 0; + while ((i < MAX_NAME_LENGTH) && (out_len < (buflen-1))) { + if (*uniname == (u16)'\0') + break; + + len = convert_uni_to_ch(nls, *uniname, buf, NULL); + + if (out_len + len >= buflen) + len = (buflen - 1) - out_len; + + out_len += len; + + if (len > 1) { + for (j = 0; j < len; j++) + *p_cstring++ = (s8) *(buf+j); + } else { /* len == 1 */ + *p_cstring++ = (s8) *buf; + } + + uniname++; + i++; + } + + *p_cstring = '\0'; + return out_len; +} + +static s32 __nls_vfsname_to_uni16s(struct super_block *sb, const u8 *p_cstring, + const s32 len, UNI_NAME_T *p_uniname, s32 *p_lossy) +{ + s32 i, unilen, lossy = NLS_NAME_NO_LOSSY; + u16 upname[MAX_NAME_LENGTH+1]; + u16 *uniname = p_uniname->name; + struct nls_table *nls = SDFAT_SB(sb)->nls_io; + + BUG_ON(!len); + + i = unilen = 0; + while ((unilen < MAX_NAME_LENGTH) && (i < len)) { + i += convert_ch_to_uni(nls, (u8 *)(p_cstring+i), uniname, &lossy); + + if ((*uniname < 0x0020) || nls_wstrchr(bad_uni_chars, *uniname)) + lossy |= NLS_NAME_LOSSY; + + *(upname+unilen) = nls_upper(sb, *uniname); + + uniname++; + unilen++; + } + + if (*(p_cstring+i) != '\0') + lossy |= NLS_NAME_OVERLEN; + + *uniname = (u16)'\0'; + p_uniname->name_len = unilen; + p_uniname->name_hash = + calc_chksum_2byte((void *) upname, unilen<<1, 0, CS_DEFAULT); + + if (p_lossy) + *p_lossy = lossy; + + return unilen; +} + +s32 nls_uni16s_to_vfsname(struct super_block *sb, UNI_NAME_T *uniname, u8 *p_cstring, s32 buflen) +{ + if (SDFAT_SB(sb)->options.utf8) + return __nls_utf16s_to_vfsname(sb, uniname, p_cstring, buflen); + + return __nls_uni16s_to_vfsname(sb, uniname, p_cstring, buflen); +} + +s32 nls_vfsname_to_uni16s(struct super_block *sb, const u8 *p_cstring, const s32 len, UNI_NAME_T *uniname, s32 *p_lossy) +{ + if (SDFAT_SB(sb)->options.utf8) + return __nls_vfsname_to_utf16s(sb, p_cstring, len, uniname, p_lossy); + return __nls_vfsname_to_uni16s(sb, p_cstring, len, uniname, p_lossy); +} + +/*======================================================================*/ +/* Local Function Definitions */ +/*======================================================================*/ + +static s32 convert_ch_to_uni(struct nls_table *nls, u8 *ch, u16 *uni, s32 *lossy) +{ + int len; + + *uni = 0x0; + + if (ch[0] < 0x80) { + *uni = (u16) ch[0]; + return 1; + } + + len = nls->char2uni(ch, MAX_CHARSET_SIZE, uni); + if (len < 0) { + /* conversion failed */ + DMSG("%s: fail to use nls\n", __func__); + if (lossy != NULL) + *lossy |= NLS_NAME_LOSSY; + *uni = (u16) '_'; + if (!strcmp(nls->charset, "utf8")) + return 1; + return 2; + } + + return len; +} /* end of convert_ch_to_uni */ + +static s32 convert_uni_to_ch(struct nls_table *nls, u16 uni, u8 *ch, s32 *lossy) +{ + int len; + + ch[0] = 0x0; + + if (uni < 0x0080) { + ch[0] = (u8) uni; + return 1; + } + + len = nls->uni2char(uni, ch, MAX_CHARSET_SIZE); + if (len < 0) { + /* conversion failed */ + DMSG("%s: fail to use nls\n", __func__); + if (lossy != NULL) + *lossy |= NLS_NAME_LOSSY; + ch[0] = '_'; + return 1; + } + + return len; + +} /* end of convert_uni_to_ch */ + +/* end of nls.c */ diff --git a/fs/sdfat/sdfat.c b/fs/sdfat/sdfat.c new file mode 100644 index 000000000000..2d9955cfc993 --- /dev/null +++ b/fs/sdfat/sdfat.c @@ -0,0 +1,5255 @@ +/* + * Copyright (C) 2012-2013 Samsung Electronics Co., Ltd. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version 2 + * of the License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, see <http://www.gnu.org/licenses/>. + */ + +/************************************************************************/ +/* */ +/* PROJECT : exFAT & FAT12/16/32 File System */ +/* FILE : core.c */ +/* PURPOSE : sdFAT glue layer for supporting VFS */ +/* */ +/*----------------------------------------------------------------------*/ +/* NOTES */ +/* */ +/* */ +/************************************************************************/ + +#include <linux/version.h> +#include <linux/module.h> +#include <linux/init.h> +#include <linux/time.h> +#include <linux/slab.h> +#include <linux/seq_file.h> +#include <linux/pagemap.h> +#include <linux/mpage.h> +#include <linux/buffer_head.h> +#include <linux/exportfs.h> +#include <linux/mount.h> +#include <linux/vfs.h> +#include <linux/parser.h> +#include <linux/uio.h> +#include <linux/writeback.h> +#include <linux/log2.h> +#include <linux/hash.h> +#include <linux/backing-dev.h> +#include <linux/sched.h> +#include <linux/fs_struct.h> +#include <linux/namei.h> +#include <linux/bio.h> +#include <linux/blkdev.h> +#include <linux/swap.h> /* for mark_page_accessed() */ +#include <linux/vmalloc.h> +#include <asm/current.h> +#include <asm/unaligned.h> +#if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 16, 0) +#include <linux/iversion.h> +#elif LINUX_VERSION_CODE >= KERNEL_VERSION(3, 10, 0) +#include <linux/aio.h> +#endif + +#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 0, 0) +#error SDFAT only supports linux kernel version 3.0 or higher +#endif + +#include "sdfat.h" +#include "version.h" + +/* skip iterating emit_dots when dir is empty */ +#define ITER_POS_FILLED_DOTS (2) + +/* type index declare at sdfat.h */ +const char *FS_TYPE_STR[] = { + "auto", + "exfat", + "vfat" +}; + +static struct kset *sdfat_kset; +static struct kmem_cache *sdfat_inode_cachep; + + +static int sdfat_default_codepage = CONFIG_SDFAT_DEFAULT_CODEPAGE; +static char sdfat_default_iocharset[] = CONFIG_SDFAT_DEFAULT_IOCHARSET; +static const char sdfat_iocharset_with_utf8[] = "iso8859-1"; + +#ifdef CONFIG_SDFAT_TRACE_SB_LOCK +static unsigned long __lock_jiffies; +#endif + +static void sdfat_truncate(struct inode *inode, loff_t old_size); +static int sdfat_get_block(struct inode *inode, sector_t iblock, + struct buffer_head *bh_result, int create); + +static struct inode *sdfat_iget(struct super_block *sb, loff_t i_pos); +static struct inode *sdfat_build_inode(struct super_block *sb, const FILE_ID_T *fid, loff_t i_pos); +static void sdfat_detach(struct inode *inode); +static void sdfat_attach(struct inode *inode, loff_t i_pos); +static inline unsigned long sdfat_hash(loff_t i_pos); +static int __sdfat_write_inode(struct inode *inode, int sync); +static int sdfat_sync_inode(struct inode *inode); +static int sdfat_write_inode(struct inode *inode, struct writeback_control *wbc); +static void sdfat_write_super(struct super_block *sb); +static void sdfat_write_failed(struct address_space *mapping, loff_t to); + +static void sdfat_init_namebuf(DENTRY_NAMEBUF_T *nb); +static int sdfat_alloc_namebuf(DENTRY_NAMEBUF_T *nb); +static void sdfat_free_namebuf(DENTRY_NAMEBUF_T *nb); + +/************************************************************************* + * INNER FUNCTIONS FOR FUNCTIONS WHICH HAS KERNEL VERSION DEPENDENCY + *************************************************************************/ +static int __sdfat_getattr(struct inode *inode, struct kstat *stat); +static void __sdfat_writepage_end_io(struct bio *bio, int err); +static inline void __lock_super(struct super_block *sb); +static inline void __unlock_super(struct super_block *sb); +static int __sdfat_create(struct inode *dir, struct dentry *dentry); +static int __sdfat_revalidate(struct dentry *dentry); +static int __sdfat_revalidate_ci(struct dentry *dentry, unsigned int flags); +static int __sdfat_file_fsync(struct file *filp, loff_t start, loff_t end, int datasync); +static struct dentry *__sdfat_lookup(struct inode *dir, struct dentry *dentry); +static int __sdfat_mkdir(struct inode *dir, struct dentry *dentry); +static int __sdfat_rename(struct inode *old_dir, struct dentry *old_dentry, + struct inode *new_dir, struct dentry *new_dentry); +static int __sdfat_show_options(struct seq_file *m, struct super_block *sb); +static inline ssize_t __sdfat_blkdev_direct_IO(int rw, struct kiocb *iocb, + struct inode *inode, void *iov_u, loff_t offset, + unsigned long nr_segs); +static inline ssize_t __sdfat_direct_IO(int rw, struct kiocb *iocb, + struct inode *inode, void *iov_u, loff_t offset, + loff_t count, unsigned long nr_segs); +static int __sdfat_d_hash(const struct dentry *dentry, struct qstr *qstr); +static int __sdfat_d_hashi(const struct dentry *dentry, struct qstr *qstr); +static int __sdfat_cmp(const struct dentry *dentry, unsigned int len, + const char *str, const struct qstr *name); +static int __sdfat_cmpi(const struct dentry *dentry, unsigned int len, + const char *str, const struct qstr *name); + +/************************************************************************* + * FUNCTIONS WHICH HAS KERNEL VERSION DEPENDENCY + *************************************************************************/ +#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 16, 0) +static inline void inode_set_iversion(struct inode *inode, u64 val) +{ + inode->i_version = val; +} +static inline u64 inode_peek_iversion(struct inode *inode) +{ + return inode->i_version; +} +#endif + + +#if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 14, 0) + /* EMPTY */ +#else /* LINUX_VERSION_CODE < KERNEL_VERSION(4, 14, 0) */ +static inline void bio_set_dev(struct bio *bio, struct block_device *bdev) +{ + bio->bi_bdev = bdev; +} +#endif + + +#if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 11, 0) +static int sdfat_getattr(const struct path *path, struct kstat *stat, + u32 request_mask, unsigned int query_flags) +{ + struct inode *inode = d_backing_inode(path->dentry); + + return __sdfat_getattr(inode, stat); +} +#else /* LINUX_VERSION_CODE < KERNEL_VERSION(4, 11, 0) */ +static int sdfat_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat *stat) +{ + struct inode *inode = dentry->d_inode; + + return __sdfat_getattr(inode, stat); +} +#endif + + +#if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 10, 0) +static inline void __sdfat_clean_bdev_aliases(struct block_device *bdev, sector_t block) +{ + clean_bdev_aliases(bdev, block, 1); +} +#else /* LINUX_VERSION_CODE < KERNEL_VERSION(4,10,0) */ +static inline void __sdfat_clean_bdev_aliases(struct block_device *bdev, sector_t block) +{ + unmap_underlying_metadata(bdev, block); +} +#endif + + +#if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 9, 0) +static int sdfat_rename(struct inode *old_dir, struct dentry *old_dentry, + struct inode *new_dir, struct dentry *new_dentry, + unsigned int flags) +{ + /* + * The VFS already checks for existence, so for local filesystems + * the RENAME_NOREPLACE implementation is equivalent to plain rename. + * Don't support any other flags + */ + if (flags & ~RENAME_NOREPLACE) + return -EINVAL; + return __sdfat_rename(old_dir, old_dentry, new_dir, new_dentry); +} +#else /* LINUX_VERSION_CODE < KERNEL_VERSION(4, 9, 0) */ +static int sdfat_rename(struct inode *old_dir, struct dentry *old_dentry, + struct inode *new_dir, struct dentry *new_dentry) +{ + return __sdfat_rename(old_dir, old_dentry, new_dir, new_dentry); +} + +static int setattr_prepare(struct dentry *dentry, struct iattr *attr) +{ + struct inode *inode = dentry->d_inode; + + return inode_change_ok(inode, attr); +} +#endif + + +#if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 8, 0) +static inline void __sdfat_submit_bio_write(struct bio *bio) +{ + bio_set_op_attrs(bio, REQ_OP_WRITE, 0); + submit_bio(bio); +} + +static inline unsigned int __sdfat_full_name_hash(const struct dentry *dentry, const char *name, unsigned int len) +{ + return full_name_hash(dentry, name, len); +} + +static inline unsigned long __sdfat_init_name_hash(const struct dentry *dentry) +{ + return init_name_hash(dentry); +} +#else /* LINUX_VERSION_CODE < KERNEL_VERSION(4, 8, 0) */ +static inline void __sdfat_submit_bio_write(struct bio *bio) +{ + submit_bio(WRITE, bio); +} + +static inline unsigned int __sdfat_full_name_hash(const struct dentry *unused, const char *name, unsigned int len) +{ + return full_name_hash(name, len); +} + +static inline unsigned long __sdfat_init_name_hash(const struct dentry *unused) +{ + return init_name_hash(); +} +#endif + + +#if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 4, 21) + /* EMPTY */ +#else /* LINUX_VERSION_CODE < KERNEL_VERSION(4, 4, 21) */ +static inline void inode_lock(struct inode *inode) +{ + mutex_lock(&inode->i_mutex); +} + +static inline void inode_unlock(struct inode *inode) +{ + mutex_unlock(&inode->i_mutex); +} +#endif + + +#if LINUX_VERSION_CODE >= KERNEL_VERSION(3, 16, 0) +static inline int sdfat_remount_syncfs(struct super_block *sb) +{ + sync_filesystem(sb); + return 0; +} +#else /* LINUX_VERSION_CODE < KERNEL_VERSION(3, 16, 0) */ +static inline int sdfat_remount_syncfs(struct super_block *sb) +{ + /* + * We don`t need to call sync_filesystem(sb), + * Because VFS calls it. + */ + return 0; +} +#endif + + +#if LINUX_VERSION_CODE >= KERNEL_VERSION(3, 14, 0) +static inline sector_t __sdfat_bio_sector(struct bio *bio) +{ + return bio->bi_iter.bi_sector; +} + +static inline void __sdfat_set_bio_iterate(struct bio *bio, sector_t sector, + unsigned int size, unsigned int idx, unsigned int done) +{ + struct bvec_iter *iter = &(bio->bi_iter); + + iter->bi_sector = sector; + iter->bi_size = size; + iter->bi_idx = idx; + iter->bi_bvec_done = done; +} + +static void __sdfat_truncate_pagecache(struct inode *inode, + loff_t to, loff_t newsize) +{ + truncate_pagecache(inode, newsize); +} + +static int sdfat_d_hash(const struct dentry *dentry, struct qstr *qstr) +{ + return __sdfat_d_hash(dentry, qstr); +} + +static int sdfat_d_hashi(const struct dentry *dentry, struct qstr *qstr) +{ + return __sdfat_d_hashi(dentry, qstr); +} + +//instead of sdfat_readdir +static int sdfat_iterate(struct file *filp, struct dir_context *ctx) +{ + struct inode *inode = filp->f_path.dentry->d_inode; + struct super_block *sb = inode->i_sb; + struct sdfat_sb_info *sbi = SDFAT_SB(sb); + FS_INFO_T *fsi = &(sbi->fsi); + DIR_ENTRY_T de; + DENTRY_NAMEBUF_T *nb = &(de.NameBuf); + unsigned long inum; + loff_t cpos; + int err = 0, fake_offset = 0; + + sdfat_init_namebuf(nb); + __lock_super(sb); + + cpos = ctx->pos; + if ((fsi->vol_type == EXFAT) || (inode->i_ino == SDFAT_ROOT_INO)) { + if (!dir_emit_dots(filp, ctx)) + goto out; + if (ctx->pos == ITER_POS_FILLED_DOTS) { + cpos = 0; + fake_offset = 1; + } + } + if (cpos & (DENTRY_SIZE - 1)) { + err = -ENOENT; + goto out; + } + + /* name buffer should be allocated before use */ + err = sdfat_alloc_namebuf(nb); + if (err) + goto out; +get_new: + SDFAT_I(inode)->fid.size = i_size_read(inode); + SDFAT_I(inode)->fid.rwoffset = cpos >> DENTRY_SIZE_BITS; + + if (cpos >= SDFAT_I(inode)->fid.size) + goto end_of_dir; + + err = fsapi_readdir(inode, &de); + if (err) { + // at least we tried to read a sector + // move cpos to next sector position (should be aligned) + if (err == -EIO) { + cpos += 1 << (sb->s_blocksize_bits); + cpos &= ~((u32)sb->s_blocksize-1); + } + + err = -EIO; + goto end_of_dir; + } + + cpos = SDFAT_I(inode)->fid.rwoffset << DENTRY_SIZE_BITS; + + if (!nb->lfn[0]) + goto end_of_dir; + + if (!memcmp(nb->sfn, DOS_CUR_DIR_NAME, DOS_NAME_LENGTH)) { + inum = inode->i_ino; + } else if (!memcmp(nb->sfn, DOS_PAR_DIR_NAME, DOS_NAME_LENGTH)) { + inum = parent_ino(filp->f_path.dentry); + } else { + loff_t i_pos = ((loff_t) SDFAT_I(inode)->fid.start_clu << 32) | + ((SDFAT_I(inode)->fid.rwoffset-1) & 0xffffffff); + struct inode *tmp = sdfat_iget(sb, i_pos); + + if (tmp) { + inum = tmp->i_ino; + iput(tmp); + } else { + inum = iunique(sb, SDFAT_ROOT_INO); + } + } + + /* Before calling dir_emit(), sb_lock should be released. + * Because page fault can occur in dir_emit() when the size of buffer given + * from user is larger than one page size + */ + __unlock_super(sb); + if (!dir_emit(ctx, nb->lfn, strlen(nb->lfn), inum, + (de.Attr & ATTR_SUBDIR) ? DT_DIR : DT_REG)) + goto out_unlocked; + __lock_super(sb); + + ctx->pos = cpos; + goto get_new; + +end_of_dir: + if (!cpos && fake_offset) + cpos = ITER_POS_FILLED_DOTS; + ctx->pos = cpos; +out: + __unlock_super(sb); +out_unlocked: + /* + * To improve performance, free namebuf after unlock sb_lock. + * If namebuf is not allocated, this function do nothing + */ + sdfat_free_namebuf(nb); + return err; +} +#else /* LINUX_VERSION_CODE < KERNEL_VERSION(3, 14, 0) */ +static inline sector_t __sdfat_bio_sector(struct bio *bio) +{ + return bio->bi_sector; +} + +static inline void __sdfat_set_bio_iterate(struct bio *bio, sector_t sector, + unsigned int size, unsigned int idx, unsigned int done) +{ + bio->bi_sector = sector; + bio->bi_idx = idx; + bio->bi_size = size; //PAGE_SIZE; +} + +static void __sdfat_truncate_pagecache(struct inode *inode, + loff_t to, loff_t newsize) +{ + truncate_pagecache(inode, to, newsize); +} + +static int sdfat_d_hash(const struct dentry *dentry, + const struct inode *inode, struct qstr *qstr) +{ + return __sdfat_d_hash(dentry, qstr); +} + +static int sdfat_d_hashi(const struct dentry *dentry, + const struct inode *inode, struct qstr *qstr) +{ + return __sdfat_d_hashi(dentry, qstr); +} + +static int sdfat_readdir(struct file *filp, void *dirent, filldir_t filldir) +{ + struct inode *inode = filp->f_path.dentry->d_inode; + struct super_block *sb = inode->i_sb; + struct sdfat_sb_info *sbi = SDFAT_SB(sb); + FS_INFO_T *fsi = &(sbi->fsi); + DIR_ENTRY_T de; + DENTRY_NAMEBUF_T *nb = &(de.NameBuf); + unsigned long inum; + loff_t cpos; + int err = 0, fake_offset = 0; + + sdfat_init_namebuf(nb); + __lock_super(sb); + + cpos = filp->f_pos; + /* Fake . and .. for the root directory. */ + if ((fsi->vol_type == EXFAT) || (inode->i_ino == SDFAT_ROOT_INO)) { + while (cpos < ITER_POS_FILLED_DOTS) { + if (inode->i_ino == SDFAT_ROOT_INO) + inum = SDFAT_ROOT_INO; + else if (cpos == 0) + inum = inode->i_ino; + else /* (cpos == 1) */ + inum = parent_ino(filp->f_path.dentry); + + if (filldir(dirent, "..", cpos+1, cpos, inum, DT_DIR) < 0) + goto out; + cpos++; + filp->f_pos++; + } + if (cpos == ITER_POS_FILLED_DOTS) { + cpos = 0; + fake_offset = 1; + } + } + if (cpos & (DENTRY_SIZE - 1)) { + err = -ENOENT; + goto out; + } + + /* name buffer should be allocated before use */ + err = sdfat_alloc_namebuf(nb); + if (err) + goto out; +get_new: + SDFAT_I(inode)->fid.size = i_size_read(inode); + SDFAT_I(inode)->fid.rwoffset = cpos >> DENTRY_SIZE_BITS; + + if (cpos >= SDFAT_I(inode)->fid.size) + goto end_of_dir; + + err = fsapi_readdir(inode, &de); + if (err) { + // at least we tried to read a sector + // move cpos to next sector position (should be aligned) + if (err == -EIO) { + cpos += 1 << (sb->s_blocksize_bits); + cpos &= ~((u32)sb->s_blocksize-1); + } + + err = -EIO; + goto end_of_dir; + } + + cpos = SDFAT_I(inode)->fid.rwoffset << DENTRY_SIZE_BITS; + + if (!nb->lfn[0]) + goto end_of_dir; + + if (!memcmp(nb->sfn, DOS_CUR_DIR_NAME, DOS_NAME_LENGTH)) { + inum = inode->i_ino; + } else if (!memcmp(nb->sfn, DOS_PAR_DIR_NAME, DOS_NAME_LENGTH)) { + inum = parent_ino(filp->f_path.dentry); + } else { + loff_t i_pos = ((loff_t) SDFAT_I(inode)->fid.start_clu << 32) | + ((SDFAT_I(inode)->fid.rwoffset-1) & 0xffffffff); + struct inode *tmp = sdfat_iget(sb, i_pos); + + if (tmp) { + inum = tmp->i_ino; + iput(tmp); + } else { + inum = iunique(sb, SDFAT_ROOT_INO); + } + } + + /* Before calling dir_emit(), sb_lock should be released. + * Because page fault can occur in dir_emit() when the size of buffer given + * from user is larger than one page size + */ + __unlock_super(sb); + if (filldir(dirent, nb->lfn, strlen(nb->lfn), cpos, inum, + (de.Attr & ATTR_SUBDIR) ? DT_DIR : DT_REG) < 0) + goto out_unlocked; + __lock_super(sb); + + filp->f_pos = cpos; + goto get_new; + +end_of_dir: + if (!cpos && fake_offset) + cpos = ITER_POS_FILLED_DOTS; + filp->f_pos = cpos; +out: + __unlock_super(sb); +out_unlocked: + /* + * To improve performance, free namebuf after unlock sb_lock. + * If namebuf is not allocated, this function do nothing + */ + sdfat_free_namebuf(nb); + return err; +} +#endif + + +#if LINUX_VERSION_CODE >= KERNEL_VERSION(3, 9, 0) + /* EMPTY */ +#else /* LINUX_VERSION_CODE < KERNEL_VERSION(3, 9, 0) */ +static inline struct inode *file_inode(const struct file *f) +{ + return f->f_dentry->d_inode; +} +#endif + + +#if LINUX_VERSION_CODE >= KERNEL_VERSION(3, 7, 0) +static inline int __is_sb_dirty(struct super_block *sb) +{ + return SDFAT_SB(sb)->s_dirt; +} + +static inline void __set_sb_clean(struct super_block *sb) +{ + SDFAT_SB(sb)->s_dirt = 0; +} + +/* Workqueue wrapper for sdfat_write_super () */ +static void __write_super_delayed(struct work_struct *work) +{ + struct sdfat_sb_info *sbi; + struct super_block *sb; + + sbi = container_of(work, struct sdfat_sb_info, write_super_work.work); + sb = sbi->host_sb; + + /* XXX: Is this needed? */ + if (!sb || !down_read_trylock(&sb->s_umount)) { + DMSG("%s: skip delayed work(write_super).\n", __func__); + return; + } + + DMSG("%s: do delayed_work(write_super).\n", __func__); + + spin_lock(&sbi->work_lock); + sbi->write_super_queued = 0; + spin_unlock(&sbi->work_lock); + + sdfat_write_super(sb); + + up_read(&sb->s_umount); +} + +static void setup_sdfat_sync_super_wq(struct super_block *sb) +{ + struct sdfat_sb_info *sbi = SDFAT_SB(sb); + + mutex_init(&sbi->s_lock); + spin_lock_init(&sbi->work_lock); + INIT_DELAYED_WORK(&sbi->write_super_work, __write_super_delayed); + sbi->host_sb = sb; +} + +static inline bool __cancel_delayed_work_sync(struct sdfat_sb_info *sbi) +{ + return cancel_delayed_work_sync(&sbi->write_super_work); +} + +static inline void lock_super(struct super_block *sb) +{ + struct sdfat_sb_info *sbi = SDFAT_SB(sb); + + mutex_lock(&sbi->s_lock); +} + +static inline void unlock_super(struct super_block *sb) +{ + struct sdfat_sb_info *sbi = SDFAT_SB(sb); + + mutex_unlock(&sbi->s_lock); +} + +static int sdfat_revalidate(struct dentry *dentry, unsigned int flags) +{ + if (flags & LOOKUP_RCU) + return -ECHILD; + + return __sdfat_revalidate(dentry); +} + +static int sdfat_revalidate_ci(struct dentry *dentry, unsigned int flags) +{ + if (flags & LOOKUP_RCU) + return -ECHILD; + + return __sdfat_revalidate_ci(dentry, flags); +} + +static struct inode *sdfat_iget(struct super_block *sb, loff_t i_pos) +{ + struct sdfat_sb_info *sbi = SDFAT_SB(sb); + struct sdfat_inode_info *info; + struct hlist_head *head = sbi->inode_hashtable + sdfat_hash(i_pos); + struct inode *inode = NULL; + + spin_lock(&sbi->inode_hash_lock); + hlist_for_each_entry(info, head, i_hash_fat) { + BUG_ON(info->vfs_inode.i_sb != sb); + + if (i_pos != info->i_pos) + continue; + inode = igrab(&info->vfs_inode); + if (inode) + break; + } + spin_unlock(&sbi->inode_hash_lock); + return inode; +} +#else /* LINUX_VERSION_CODE < KERNEL_VERSION(3, 7, 0) */ +static inline int __is_sb_dirty(struct super_block *sb) +{ + return sb->s_dirt; +} + +static inline void __set_sb_clean(struct super_block *sb) +{ + sb->s_dirt = 0; +} + +static void setup_sdfat_sync_super_wq(struct super_block *sb) +{ + struct sdfat_sb_info *sbi = SDFAT_SB(sb); + + sbi->host_sb = sb; +} + +static inline bool __cancel_delayed_work_sync(struct sdfat_sb_info *sbi) +{ + /* DO NOTHING */ + return 0; +} + +static inline void clear_inode(struct inode *inode) +{ + end_writeback(inode); +} + +static int sdfat_revalidate(struct dentry *dentry, struct nameidata *nd) +{ + if (nd && nd->flags & LOOKUP_RCU) + return -ECHILD; + + return __sdfat_revalidate(dentry); +} + +static int sdfat_revalidate_ci(struct dentry *dentry, struct nameidata *nd) +{ + if (nd && nd->flags & LOOKUP_RCU) + return -ECHILD; + + return __sdfat_revalidate_ci(dentry, nd ? nd->flags : 0); + +} + +static struct inode *sdfat_iget(struct super_block *sb, loff_t i_pos) +{ + struct sdfat_sb_info *sbi = SDFAT_SB(sb); + struct sdfat_inode_info *info; + struct hlist_node *node; + struct hlist_head *head = sbi->inode_hashtable + sdfat_hash(i_pos); + struct inode *inode = NULL; + + spin_lock(&sbi->inode_hash_lock); + hlist_for_each_entry(info, node, head, i_hash_fat) { + BUG_ON(info->vfs_inode.i_sb != sb); + + if (i_pos != info->i_pos) + continue; + inode = igrab(&info->vfs_inode); + if (inode) + break; + } + spin_unlock(&sbi->inode_hash_lock); + return inode; +} +#endif + + +#if LINUX_VERSION_CODE >= KERNEL_VERSION(3, 6, 0) +static struct dentry *sdfat_lookup(struct inode *dir, struct dentry *dentry, + unsigned int flags) +{ + return __sdfat_lookup(dir, dentry); +} +#else /* LINUX_VERSION_CODE < KERNEL_VERSION(3, 6, 0) */ +static struct dentry *sdfat_lookup(struct inode *dir, struct dentry *dentry, + struct nameidata *nd) +{ + return __sdfat_lookup(dir, dentry); +} +#endif + + +#if LINUX_VERSION_CODE >= KERNEL_VERSION(3, 5, 0) + /* NOTHING NOW */ +#else /* LINUX_VERSION_CODE < KERNEL_VERSION(3, 5, 0) */ +#define GLOBAL_ROOT_UID (0) +#define GLOBAL_ROOT_GID (0) + +static inline bool uid_eq(uid_t left, uid_t right) +{ + return left == right; +} + +static inline bool gid_eq(gid_t left, gid_t right) +{ + return left == right; +} + +static inline uid_t from_kuid_munged(struct user_namespace *to, uid_t kuid) +{ + return kuid; +} + +static inline gid_t from_kgid_munged(struct user_namespace *to, gid_t kgid) +{ + return kgid; +} + +static inline uid_t make_kuid(struct user_namespace *from, uid_t uid) +{ + return uid; +} + +static inline gid_t make_kgid(struct user_namespace *from, gid_t gid) +{ + return gid; +} +#endif + + +#if LINUX_VERSION_CODE >= KERNEL_VERSION(3, 4, 0) +static struct dentry *__d_make_root(struct inode *root_inode) +{ + return d_make_root(root_inode); +} + +static void __sdfat_do_truncate(struct inode *inode, loff_t old, loff_t new) +{ + down_write(&SDFAT_I(inode)->truncate_lock); + truncate_setsize(inode, new); + sdfat_truncate(inode, old); + up_write(&SDFAT_I(inode)->truncate_lock); +} + +static sector_t sdfat_aop_bmap(struct address_space *mapping, sector_t block) +{ + sector_t blocknr; + + /* sdfat_get_cluster() assumes the requested blocknr isn't truncated. */ + down_read(&SDFAT_I(mapping->host)->truncate_lock); + blocknr = generic_block_bmap(mapping, block, sdfat_get_block); + up_read(&SDFAT_I(mapping->host)->truncate_lock); + return blocknr; +} + +static int sdfat_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode) +{ + return __sdfat_mkdir(dir, dentry); +} + +static int sdfat_show_options(struct seq_file *m, struct dentry *root) +{ + return __sdfat_show_options(m, root->d_sb); +} +#else /* LINUX_VERSION_CODE < KERNEL_VERSION(3, 4, 0) */ +static inline void set_nlink(struct inode *inode, unsigned int nlink) +{ + inode->i_nlink = nlink; +} + +static struct dentry *__d_make_root(struct inode *root_inode) +{ + return d_alloc_root(root_inode); +} + +static void __sdfat_do_truncate(struct inode *inode, loff_t old, loff_t new) +{ + truncate_setsize(inode, new); + sdfat_truncate(inode, old); +} + +static sector_t sdfat_aop_bmap(struct address_space *mapping, sector_t block) +{ + sector_t blocknr; + + /* sdfat_get_cluster() assumes the requested blocknr isn't truncated. */ + down_read(&mapping->host->i_alloc_sem); + blocknr = generic_block_bmap(mapping, block, sdfat_get_block); + up_read(&mapping->host->i_alloc_sem); + return blocknr; +} + +static int sdfat_mkdir(struct inode *dir, struct dentry *dentry, int mode) +{ + return __sdfat_mkdir(dir, dentry); +} + +static int sdfat_show_options(struct seq_file *m, struct vfsmount *mnt) +{ + return __sdfat_show_options(m, mnt->mnt_sb); +} +#endif + + +#if LINUX_VERSION_CODE >= KERNEL_VERSION(3, 1, 0) +#define __sdfat_generic_file_fsync(filp, start, end, datasync) \ + generic_file_fsync(filp, start, end, datasync) + +static int sdfat_file_fsync(struct file *filp, loff_t start, loff_t end, int datasync) +{ + return __sdfat_file_fsync(filp, start, end, datasync); +} +#else /* LINUX_VERSION_CODE < KERNEL_VERSION(3, 1, 0) */ +#define __sdfat_generic_file_fsync(filp, start, end, datasync) \ + generic_file_fsync(filp, datasync) +static int sdfat_file_fsync(struct file *filp, int datasync) +{ + return __sdfat_file_fsync(filp, 0, 0, datasync); +} +#endif + +/************************************************************************* + * MORE FUNCTIONS WHICH HAS KERNEL VERSION DEPENDENCY + *************************************************************************/ +#if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 18, 0) +#define CURRENT_TIME_SEC timespec64_trunc(current_kernel_time64(), NSEC_PER_SEC) +#elif LINUX_VERSION_CODE >= KERNEL_VERSION(4, 12, 0) +#define CURRENT_TIME_SEC timespec_trunc(current_kernel_time(), NSEC_PER_SEC) +#else /* LINUX_VERSION_CODE < KERNEL_VERSION(4, 12, 0) */ + /* EMPTY */ +#endif + + +#if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 13, 0) +static void sdfat_writepage_end_io(struct bio *bio) +{ + __sdfat_writepage_end_io(bio, blk_status_to_errno(bio->bi_status)); +} +#elif LINUX_VERSION_CODE >= KERNEL_VERSION(4, 3, 0) +static void sdfat_writepage_end_io(struct bio *bio) +{ + __sdfat_writepage_end_io(bio, bio->bi_error); +} +#else /* LINUX_VERSION_CODE < KERNEL_VERSION(4, 3, 0) */ +static void sdfat_writepage_end_io(struct bio *bio, int err) +{ + if (test_bit(BIO_UPTODATE, &bio->bi_flags)) + err = 0; + __sdfat_writepage_end_io(bio, err); +} +#endif + + +#if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 8, 0) +static int sdfat_cmp(const struct dentry *dentry, + unsigned int len, const char *str, const struct qstr *name) +{ + return __sdfat_cmp(dentry, len, str, name); +} + +static int sdfat_cmpi(const struct dentry *dentry, + unsigned int len, const char *str, const struct qstr *name) +{ + return __sdfat_cmpi(dentry, len, str, name); +} +#elif LINUX_VERSION_CODE >= KERNEL_VERSION(3, 14, 0) +static int sdfat_cmp(const struct dentry *parent, const struct dentry *dentry, + unsigned int len, const char *str, const struct qstr *name) +{ + return __sdfat_cmp(dentry, len, str, name); +} + +static int sdfat_cmpi(const struct dentry *parent, const struct dentry *dentry, + unsigned int len, const char *str, const struct qstr *name) +{ + return __sdfat_cmpi(dentry, len, str, name); +} +#else +static int sdfat_cmp(const struct dentry *parent, const struct inode *pinode, + const struct dentry *dentry, const struct inode *inode, + unsigned int len, const char *str, const struct qstr *name) +{ + return __sdfat_cmp(dentry, len, str, name); +} + +static int sdfat_cmpi(const struct dentry *parent, const struct inode *pinode, + const struct dentry *dentry, const struct inode *inode, + unsigned int len, const char *str, const struct qstr *name) +{ + return __sdfat_cmpi(dentry, len, str, name); +} +#endif + + +#if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 7, 0) +static ssize_t sdfat_direct_IO(struct kiocb *iocb, struct iov_iter *iter) +{ + struct file *file = iocb->ki_filp; + struct address_space *mapping = file->f_mapping; + struct inode *inode = mapping->host; + size_t count = iov_iter_count(iter); + int rw = iov_iter_rw(iter); + loff_t offset = iocb->ki_pos; + + return __sdfat_direct_IO(rw, iocb, inode, + (void *)iter, offset, count, 0 /* UNUSED */); +} +#elif LINUX_VERSION_CODE >= KERNEL_VERSION(4, 1, 0) +static ssize_t sdfat_direct_IO(struct kiocb *iocb, + struct iov_iter *iter, + loff_t offset) +{ + struct file *file = iocb->ki_filp; + struct address_space *mapping = file->f_mapping; + struct inode *inode = mapping->host; + size_t count = iov_iter_count(iter); + int rw = iov_iter_rw(iter); + + return __sdfat_direct_IO(rw, iocb, inode, + (void *)iter, offset, count, 0 /* UNUSED */); +} +#elif LINUX_VERSION_CODE >= KERNEL_VERSION(3, 16, 0) +static ssize_t sdfat_direct_IO(int rw, struct kiocb *iocb, + struct iov_iter *iter, + loff_t offset) +{ + struct file *file = iocb->ki_filp; + struct address_space *mapping = file->f_mapping; + struct inode *inode = mapping->host; + size_t count = iov_iter_count(iter); + + return __sdfat_direct_IO(rw, iocb, inode, + (void *)iter, offset, count, 0 /* UNUSED */); +} +#else +static ssize_t sdfat_direct_IO(int rw, struct kiocb *iocb, + const struct iovec *iov, loff_t offset, unsigned long nr_segs) +{ + struct file *file = iocb->ki_filp; + struct address_space *mapping = file->f_mapping; + struct inode *inode = mapping->host; + size_t count = iov_length(iov, nr_segs); + + return __sdfat_direct_IO(rw, iocb, inode, + (void *)iov, offset, count, nr_segs); +} +#endif + + +#if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 7, 0) +static inline ssize_t __sdfat_blkdev_direct_IO(int unused, struct kiocb *iocb, + struct inode *inode, void *iov_u, loff_t unused_1, + unsigned long nr_segs) +{ + struct iov_iter *iter = (struct iov_iter *)iov_u; + + return blockdev_direct_IO(iocb, inode, iter, sdfat_get_block); +} +#elif LINUX_VERSION_CODE >= KERNEL_VERSION(4, 1, 0) +static inline ssize_t __sdfat_blkdev_direct_IO(int unused, struct kiocb *iocb, + struct inode *inode, void *iov_u, loff_t offset, + unsigned long nr_segs) +{ + struct iov_iter *iter = (struct iov_iter *)iov_u; + + return blockdev_direct_IO(iocb, inode, iter, offset, sdfat_get_block); +} +#elif LINUX_VERSION_CODE >= KERNEL_VERSION(3, 16, 0) +static inline ssize_t __sdfat_blkdev_direct_IO(int rw, struct kiocb *iocb, + struct inode *inode, void *iov_u, loff_t offset, + unsigned long nr_segs) +{ + struct iov_iter *iter = (struct iov_iter *)iov_u; + + return blockdev_direct_IO(rw, iocb, inode, iter, + offset, sdfat_get_block); +} +#elif LINUX_VERSION_CODE >= KERNEL_VERSION(3, 4, 0) +static inline ssize_t __sdfat_blkdev_direct_IO(int rw, struct kiocb *iocb, + struct inode *inode, void *iov_u, loff_t offset, + unsigned long nr_segs) +{ + const struct iovec *iov = (const struct iovec *)iov_u; + + return blockdev_direct_IO(rw, iocb, inode, iov, + offset, nr_segs, sdfat_get_block); +} +#else +static inline ssize_t __sdfat_blkdev_direct_IO(int rw, struct kiocb *iocb, + struct inode *inode, void *iov_u, loff_t offset, + unsigned long nr_segs) +{ + const struct iovec *iov = (const struct iovec *)iov_u; + + return blockdev_direct_IO(rw, iocb, inode, inode->i_sb->s_bdev, iov, + offset, nr_segs, sdfat_get_block, NULL); +} +#endif + + +#if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 5, 0) +static const char *sdfat_follow_link(struct dentry *dentry, struct inode *inode, struct delayed_call *done) +{ + struct sdfat_inode_info *ei = SDFAT_I(inode); + + return (char *)(ei->target); +} +#elif LINUX_VERSION_CODE >= KERNEL_VERSION(4, 2, 0) +static const char *sdfat_follow_link(struct dentry *dentry, void **cookie) +{ + struct sdfat_inode_info *ei = SDFAT_I(dentry->d_inode); + + return *cookie = (char *)(ei->target); +} +#else +static void *sdfat_follow_link(struct dentry *dentry, struct nameidata *nd) +{ + struct sdfat_inode_info *ei = SDFAT_I(dentry->d_inode); + + nd_set_link(nd, (char *)(ei->target)); + return NULL; +} +#endif + + +#if LINUX_VERSION_CODE >= KERNEL_VERSION(3, 6, 0) +static int sdfat_create(struct inode *dir, struct dentry *dentry, umode_t mode, + bool excl) +{ + return __sdfat_create(dir, dentry); +} +#elif LINUX_VERSION_CODE >= KERNEL_VERSION(3, 4, 0) +static int sdfat_create(struct inode *dir, struct dentry *dentry, umode_t mode, + struct nameidata *nd) +{ + return __sdfat_create(dir, dentry); +} +#else +static int sdfat_create(struct inode *dir, struct dentry *dentry, int mode, + struct nameidata *nd) +{ + return __sdfat_create(dir, dentry); +} +#endif + + +/************************************************************************* + * WRAP FUNCTIONS FOR DEBUGGING + *************************************************************************/ +#ifdef CONFIG_SDFAT_TRACE_SB_LOCK +static inline void __lock_super(struct super_block *sb) +{ + lock_super(sb); + __lock_jiffies = jiffies; +} + +static inline void __unlock_super(struct super_block *sb) +{ + int time = ((jiffies - __lock_jiffies) * 1000 / HZ); + /* FIXME : error message should be modified */ + if (time > 10) + EMSG("lock_super in %s (%d ms)\n", __func__, time); + + unlock_super(sb); +} +#else /* CONFIG_SDFAT_TRACE_SB_LOCK */ +static inline void __lock_super(struct super_block *sb) +{ + lock_super(sb); +} + +static inline void __unlock_super(struct super_block *sb) +{ + unlock_super(sb); +} +#endif /* CONFIG_SDFAT_TRACE_SB_LOCK */ + +/************************************************************************* + * NORMAL FUNCTIONS + *************************************************************************/ +static inline loff_t sdfat_make_i_pos(FILE_ID_T *fid) +{ + return ((loff_t) fid->dir.dir << 32) | (fid->entry & 0xffffffff); +} + +/*======================================================================*/ +/* Directory Entry Name Buffer Operations */ +/*======================================================================*/ +static void sdfat_init_namebuf(DENTRY_NAMEBUF_T *nb) +{ + nb->lfn = NULL; + nb->sfn = NULL; + nb->lfnbuf_len = 0; + nb->sfnbuf_len = 0; +} + +static int sdfat_alloc_namebuf(DENTRY_NAMEBUF_T *nb) +{ + nb->lfn = __getname(); + if (!nb->lfn) + return -ENOMEM; + nb->sfn = nb->lfn + MAX_VFSNAME_BUF_SIZE; + nb->lfnbuf_len = MAX_VFSNAME_BUF_SIZE; + nb->sfnbuf_len = MAX_VFSNAME_BUF_SIZE; + return 0; +} + +static void sdfat_free_namebuf(DENTRY_NAMEBUF_T *nb) +{ + if (!nb->lfn) + return; + + __putname(nb->lfn); + sdfat_init_namebuf(nb); +} + +/*======================================================================*/ +/* Directory Entry Operations */ +/*======================================================================*/ +#define SDFAT_DSTATE_LOCKED (void *)(0xCAFE2016) +#define SDFAT_DSTATE_UNLOCKED (void *)(0x00000000) + +static inline void __lock_d_revalidate(struct dentry *dentry) +{ + spin_lock(&dentry->d_lock); + dentry->d_fsdata = SDFAT_DSTATE_LOCKED; + spin_unlock(&dentry->d_lock); +} + +static inline void __unlock_d_revalidate(struct dentry *dentry) +{ + spin_lock(&dentry->d_lock); + dentry->d_fsdata = SDFAT_DSTATE_UNLOCKED; + spin_unlock(&dentry->d_lock); +} + +/* __check_dstate_locked requires dentry->d_lock */ +static inline int __check_dstate_locked(struct dentry *dentry) +{ + if (dentry->d_fsdata == SDFAT_DSTATE_LOCKED) + return 1; + + return 0; +} + +/* + * If new entry was created in the parent, it could create the 8.3 + * alias (the shortname of logname). So, the parent may have the + * negative-dentry which matches the created 8.3 alias. + * + * If it happened, the negative dentry isn't actually negative + * anymore. So, drop it. + */ +static int __sdfat_revalidate_common(struct dentry *dentry) +{ + int ret = 1; + + spin_lock(&dentry->d_lock); + if ((!dentry->d_inode) && (!__check_dstate_locked(dentry) && + (dentry->d_time != + (unsigned long)inode_peek_iversion(dentry->d_parent->d_inode)))) { + ret = 0; + } + spin_unlock(&dentry->d_lock); + return ret; +} + +static int __sdfat_revalidate(struct dentry *dentry) +{ + /* This is not negative dentry. Always valid. */ + if (dentry->d_inode) + return 1; + return __sdfat_revalidate_common(dentry); +} + +static int __sdfat_revalidate_ci(struct dentry *dentry, unsigned int flags) +{ + /* + * This is not negative dentry. Always valid. + * + * Note, rename() to existing directory entry will have ->d_inode, + * and will use existing name which isn't specified name by user. + * + * We may be able to drop this positive dentry here. But dropping + * positive dentry isn't good idea. So it's unsupported like + * rename("filename", "FILENAME") for now. + */ + if (dentry->d_inode) + return 1; +#if 0 /* Blocked below code for lookup_one_len() called by stackable FS */ + /* + * This may be nfsd (or something), anyway, we can't see the + * intent of this. So, since this can be for creation, drop it. + */ + if (!flags) + return 0; +#endif + /* + * Drop the negative dentry, in order to make sure to use the + * case sensitive name which is specified by user if this is + * for creation. + */ + if (flags & (LOOKUP_CREATE | LOOKUP_RENAME_TARGET)) + return 0; + return __sdfat_revalidate_common(dentry); +} + + +/* returns the length of a struct qstr, ignoring trailing dots */ +static unsigned int __sdfat_striptail_len(unsigned int len, const char *name) +{ + while (len && name[len - 1] == '.') + len--; + return len; +} + +static unsigned int sdfat_striptail_len(const struct qstr *qstr) +{ + return __sdfat_striptail_len(qstr->len, qstr->name); +} + +/* + * Compute the hash for the sdfat name corresponding to the dentry. + * Note: if the name is invalid, we leave the hash code unchanged so + * that the existing dentry can be used. The sdfat fs routines will + * return ENOENT or EINVAL as appropriate. + */ +static int __sdfat_d_hash(const struct dentry *dentry, struct qstr *qstr) +{ + unsigned int len = sdfat_striptail_len(qstr); + + qstr->hash = __sdfat_full_name_hash(dentry, qstr->name, len); + return 0; +} + +/* + * Compute the hash for the sdfat name corresponding to the dentry. + * Note: if the name is invalid, we leave the hash code unchanged so + * that the existing dentry can be used. The sdfat fs routines will + * return ENOENT or EINVAL as appropriate. + */ +static int __sdfat_d_hashi(const struct dentry *dentry, struct qstr *qstr) +{ + struct nls_table *t = SDFAT_SB(dentry->d_sb)->nls_io; + const unsigned char *name; + unsigned int len; + unsigned long hash; + + name = qstr->name; + len = sdfat_striptail_len(qstr); + + hash = __sdfat_init_name_hash(dentry); + while (len--) + hash = partial_name_hash(nls_tolower(t, *name++), hash); + qstr->hash = end_name_hash(hash); + + return 0; +} + +/* + * Case sensitive compare of two sdfat names. + */ +static int __sdfat_cmp(const struct dentry *dentry, unsigned int len, + const char *str, const struct qstr *name) +{ + unsigned int alen, blen; + + /* A filename cannot end in '.' or we treat it like it has none */ + alen = sdfat_striptail_len(name); + blen = __sdfat_striptail_len(len, str); + if (alen == blen) { + if (strncmp(name->name, str, alen) == 0) + return 0; + } + return 1; +} + +/* + * Case insensitive compare of two sdfat names. + */ +static int __sdfat_cmpi(const struct dentry *dentry, unsigned int len, + const char *str, const struct qstr *name) +{ + struct nls_table *t = SDFAT_SB(dentry->d_sb)->nls_io; + unsigned int alen, blen; + + /* A filename cannot end in '.' or we treat it like it has none */ + alen = sdfat_striptail_len(name); + blen = __sdfat_striptail_len(len, str); + if (alen == blen) { + if (nls_strnicmp(t, name->name, str, alen) == 0) + return 0; + } + return 1; +} + +static const struct dentry_operations sdfat_dentry_ops = { + .d_revalidate = sdfat_revalidate, + .d_hash = sdfat_d_hash, + .d_compare = sdfat_cmp, +}; + +static const struct dentry_operations sdfat_ci_dentry_ops = { + .d_revalidate = sdfat_revalidate_ci, + .d_hash = sdfat_d_hashi, + .d_compare = sdfat_cmpi, +}; + +#ifdef CONFIG_SDFAT_DFR +/*----------------------------------------------------------------------*/ +/* Defragmentation related */ +/*----------------------------------------------------------------------*/ +/** + * @fn defrag_cleanup_reqs + * @brief clean-up defrag info depending on error flag + * @return void + * @param sb super block + * @param error error flag + */ +static void defrag_cleanup_reqs(INOUT struct super_block *sb, IN int error) +{ + struct sdfat_sb_info *sbi = SDFAT_SB(sb); + struct defrag_info *sb_dfr = &(sbi->dfr_info); + struct defrag_info *ino_dfr = NULL, *tmp = NULL; + /* sdfat patch 0.96 : sbi->dfr_info crash problem */ + __lock_super(sb); + + /* Clean-up ino_dfr */ + if (!error) { + list_for_each_entry_safe(ino_dfr, tmp, &sb_dfr->entry, entry) { + struct inode *inode = &(container_of(ino_dfr, struct sdfat_inode_info, dfr_info)->vfs_inode); + + mutex_lock(&ino_dfr->lock); + + atomic_set(&ino_dfr->stat, DFR_INO_STAT_IDLE); + + list_del(&ino_dfr->entry); + + ino_dfr->chunks = NULL; + ino_dfr->nr_chunks = 0; + INIT_LIST_HEAD(&ino_dfr->entry); + + BUG_ON(!mutex_is_locked(&ino_dfr->lock)); + mutex_unlock(&ino_dfr->lock); + + iput(inode); + } + } + + /* Clean-up sb_dfr */ + sb_dfr->chunks = NULL; + sb_dfr->nr_chunks = 0; + INIT_LIST_HEAD(&sb_dfr->entry); + + /* Clear dfr_new_clus page */ + memset(sbi->dfr_new_clus, 0, PAGE_SIZE); + sbi->dfr_new_idx = 1; + memset(sbi->dfr_page_wb, 0, PAGE_SIZE); + + sbi->dfr_hint_clus = sbi->dfr_hint_idx = sbi->dfr_reserved_clus = 0; + + __unlock_super(sb); +} + +/** + * @fn defrag_validate_pages + * @brief validate and mark dirty for victiim pages + * @return 0 on success, -errno otherwise + * @param inode inode + * @param chunk given chunk + * @remark protected by inode_lock and super_lock + */ +static int +defrag_validate_pages( + IN struct inode *inode, + IN struct defrag_chunk_info *chunk) +{ + struct super_block *sb = inode->i_sb; + struct sdfat_sb_info *sbi = SDFAT_SB(sb); + struct page *page = NULL; + unsigned int i_size = 0, page_off = 0, page_nr = 0; + int buf_i = 0, i = 0, err = 0; + + i_size = i_size_read(inode); + page_off = chunk->f_clus * PAGES_PER_CLUS(sb); + page_nr = (i_size / PAGE_SIZE) + ((i_size % PAGE_SIZE) ? 1 : 0); + if ((i_size <= 0) || (page_nr <= 0)) { + dfr_err("inode %p, i_size %d, page_nr %d", inode, i_size, page_nr); + return -EINVAL; + } + + /* Get victim pages + * and check its dirty/writeback/mapped state + */ + for (i = 0; + i < min((int)(page_nr - page_off), (int)(chunk->nr_clus * PAGES_PER_CLUS(sb))); + i++) { + page = find_get_page(inode->i_mapping, page_off + i); + if (page) + if (!trylock_page(page)) { + put_page(page); + page = NULL; + } + + if (!page) { + dfr_debug("get/lock_page() failed, index %d", i); + err = -EINVAL; + goto error; + } + + sbi->dfr_pagep[buf_i++] = page; + if (PageError(page) || !PageUptodate(page) || PageDirty(page) || + PageWriteback(page) || page_mapped(page)) { + dfr_debug("page %p, err %d, uptodate %d, " + "dirty %d, wb %d, mapped %d", + page, PageError(page), PageUptodate(page), + PageDirty(page), PageWriteback(page), + page_mapped(page)); + err = -EINVAL; + goto error; + } + + set_bit((page->index & (PAGES_PER_CLUS(sb) - 1)), + (volatile unsigned long *)&(sbi->dfr_page_wb[chunk->new_idx + i / PAGES_PER_CLUS(sb)])); + + page = NULL; + } + + /** + * All pages in the chunks are valid. + */ + i_size -= (chunk->f_clus * (sbi->fsi.cluster_size)); + BUG_ON(((i_size / PAGE_SIZE) + ((i_size % PAGE_SIZE) ? 1 : 0)) != (page_nr - page_off)); + + for (i = 0; i < buf_i; i++) { + struct buffer_head *bh = NULL, *head = NULL; + int bh_idx = 0; + + page = sbi->dfr_pagep[i]; + BUG_ON(!page); + + /* Mark dirty in page */ + set_page_dirty(page); + mark_page_accessed(page); + + /* Attach empty BHs */ + if (!page_has_buffers(page)) + create_empty_buffers(page, 1 << inode->i_blkbits, 0); + + /* Mark dirty in BHs */ + bh = head = page_buffers(page); + BUG_ON(!bh && !i_size); + do { + if ((bh_idx >= 1) && (bh_idx >= (i_size >> inode->i_blkbits))) { + clear_buffer_dirty(bh); + } else { + if (PageUptodate(page)) + if (!buffer_uptodate(bh)) + set_buffer_uptodate(bh); + + /* Set this bh as delay */ + set_buffer_new(bh); + set_buffer_delay(bh); + + mark_buffer_dirty(bh); + } + + bh_idx++; + bh = bh->b_this_page; + } while (bh != head); + + /* Mark this page accessed */ + mark_page_accessed(page); + + i_size -= PAGE_SIZE; + } + +error: + /* Unlock and put refs for pages */ + for (i = 0; i < buf_i; i++) { + BUG_ON(!sbi->dfr_pagep[i]); + unlock_page(sbi->dfr_pagep[i]); + put_page(sbi->dfr_pagep[i]); + } + memset(sbi->dfr_pagep, 0, sizeof(PAGE_SIZE)); + + return err; +} + + +/** + * @fn defrag_validate_reqs + * @brief validate defrag requests + * @return negative if all requests not valid, 0 otherwise + * @param sb super block + * @param chunks given chunks + */ +static int +defrag_validate_reqs( + IN struct super_block *sb, + INOUT struct defrag_chunk_info *chunks) +{ + struct sdfat_sb_info *sbi = SDFAT_SB(sb); + struct defrag_info *sb_dfr = &(sbi->dfr_info); + int i = 0, err = 0, err_cnt = 0; + + /* Validate all reqs */ + for (i = REQ_HEADER_IDX + 1; i < sb_dfr->nr_chunks; i++) { + struct defrag_chunk_info *chunk = NULL; + struct inode *inode = NULL; + struct defrag_info *ino_dfr = NULL; + + chunk = &chunks[i]; + + /* Check inode */ + __lock_super(sb); + inode = sdfat_iget(sb, chunk->i_pos); + if (!inode) { + dfr_debug("inode not found, i_pos %08llx", chunk->i_pos); + chunk->stat = DFR_CHUNK_STAT_ERR; + err_cnt++; + __unlock_super(sb); + continue; + } + __unlock_super(sb); + + dfr_debug("req[%d] inode %p, i_pos %08llx, f_clus %d, " + "d_clus %08x, nr %d, prev %08x, next %08x", + i, inode, chunk->i_pos, chunk->f_clus, chunk->d_clus, + chunk->nr_clus, chunk->prev_clus, chunk->next_clus); + /** + * Lock ordering: inode_lock -> lock_super + */ + inode_lock(inode); + __lock_super(sb); + + /* Check if enough buffers exist for chunk->new_idx */ + if ((sbi->dfr_new_idx + chunk->nr_clus) >= (PAGE_SIZE / sizeof(int))) { + dfr_err("dfr_new_idx %d, chunk->nr_clus %d", + sbi->dfr_new_idx, chunk->nr_clus); + err = -ENOSPC; + goto unlock; + } + + /* Reserve clusters for defrag with DA */ + err = fsapi_dfr_reserve_clus(sb, chunk->nr_clus); + if (err) + goto unlock; + + /* Check clusters */ + err = fsapi_dfr_validate_clus(inode, chunk, 0); + if (err) { + fsapi_dfr_reserve_clus(sb, 0 - chunk->nr_clus); + dfr_debug("Cluster validation: err %d", err); + goto unlock; + } + + /* Check pages */ + err = defrag_validate_pages(inode, chunk); + if (err) { + fsapi_dfr_reserve_clus(sb, 0 - chunk->nr_clus); + dfr_debug("Page validation: err %d", err); + goto unlock; + } + + /* Mark IGNORE flag to victim AU */ + if (sbi->options.improved_allocation & SDFAT_ALLOC_SMART) + fsapi_dfr_mark_ignore(sb, chunk->d_clus); + + ino_dfr = &(SDFAT_I(inode)->dfr_info); + mutex_lock(&ino_dfr->lock); + + /* Update chunk info */ + chunk->stat = DFR_CHUNK_STAT_REQ; + chunk->new_idx = sbi->dfr_new_idx; + + /* Update ino_dfr info */ + if (list_empty(&(ino_dfr->entry))) { + list_add_tail(&ino_dfr->entry, &sb_dfr->entry); + ino_dfr->chunks = chunk; + igrab(inode); + } + ino_dfr->nr_chunks++; + + atomic_set(&ino_dfr->stat, DFR_INO_STAT_REQ); + + BUG_ON(!mutex_is_locked(&ino_dfr->lock)); + mutex_unlock(&ino_dfr->lock); + + /* Reserved buffers for chunk->new_idx */ + sbi->dfr_new_idx += chunk->nr_clus; + +unlock: + if (err) { + chunk->stat = DFR_CHUNK_STAT_ERR; + err_cnt++; + } + iput(inode); + __unlock_super(sb); + inode_unlock(inode); + } + + /* Return error if all chunks are invalid */ + if (err_cnt == sb_dfr->nr_chunks - 1) { + dfr_debug("%s failed (err_cnt %d)", __func__, err_cnt); + return -ENXIO; + } + + return 0; +} + + +/** + * @fn defrag_check_fs_busy + * @brief check if this module busy + * @return 0 when idle, 1 otherwise + * @param sb super block + * @param reserved_clus # of reserved clusters + * @param queued_pages # of queued pages + */ +static int +defrag_check_fs_busy( + IN struct super_block *sb, + OUT int *reserved_clus, + OUT int *queued_pages) +{ + FS_INFO_T *fsi = &(SDFAT_SB(sb)->fsi); + int err = 0; + + *reserved_clus = *queued_pages = 0; + + __lock_super(sb); + *reserved_clus = fsi->reserved_clusters; + *queued_pages = atomic_read(&SDFAT_SB(sb)->stat_n_pages_queued); + + if (*reserved_clus || *queued_pages) + err = 1; + __unlock_super(sb); + + return err; +} + + +/** + * @fn sdfat_ioctl_defrag_req + * @brief ioctl to send defrag requests + * @return 0 on success, -errno otherwise + * @param inode inode + * @param uarg given requests + */ +static int +sdfat_ioctl_defrag_req( + IN struct inode *inode, + INOUT unsigned int *uarg) +{ + struct super_block *sb = inode->i_sb; + struct sdfat_sb_info *sbi = SDFAT_SB(sb); + struct defrag_info *sb_dfr = &(sbi->dfr_info); + struct defrag_chunk_header head; + struct defrag_chunk_info *chunks = NULL; + unsigned int len = 0; + int err = 0; + unsigned long timeout = 0; + + /* Check overlapped defrag */ + if (atomic_cmpxchg(&sb_dfr->stat, DFR_SB_STAT_IDLE, DFR_SB_STAT_REQ)) { + dfr_debug("sb_dfr->stat %d", atomic_read(&sb_dfr->stat)); + return -EBUSY; + } + + /* Check if defrag required */ + __lock_super(sb); + if (!fsapi_dfr_check_dfr_required(sb, NULL, NULL, NULL)) { + dfr_debug("Not enough space left for defrag (err %d)", -ENOSPC); + atomic_set(&sb_dfr->stat, DFR_SB_STAT_IDLE); + __unlock_super(sb); + return -ENOSPC; + } + __unlock_super(sb); + + /* Copy args */ + memset(&head, 0, sizeof(struct defrag_chunk_header)); + err = copy_from_user(&head, uarg, sizeof(struct defrag_chunk_info)); + ERR_HANDLE(err); + + /* If FS busy, cancel defrag */ + if (!(head.mode == DFR_MODE_TEST)) { + int reserved_clus = 0, queued_pages = 0; + + err = defrag_check_fs_busy(sb, &reserved_clus, &queued_pages); + if (err) { + dfr_debug("FS busy, cancel defrag (reserved_clus %d, queued_pages %d)", + reserved_clus, queued_pages); + err = -EBUSY; + goto error; + } + } + + /* Total length is saved in the chunk header's nr_chunks field */ + len = head.nr_chunks; + ERR_HANDLE2(!len, err, -EINVAL); + + dfr_debug("IOC_DFR_REQ started (mode %d, nr_req %d)", head.mode, len - 1); + if (get_order(len * sizeof(struct defrag_chunk_info)) > MAX_ORDER) { + dfr_debug("len %d, sizeof(struct defrag_chunk_info) %lu, MAX_ORDER %d", + len, sizeof(struct defrag_chunk_info), MAX_ORDER); + err = -EINVAL; + goto error; + } + chunks = alloc_pages_exact(len * sizeof(struct defrag_chunk_info), + GFP_KERNEL | __GFP_ZERO); + ERR_HANDLE2(!chunks, err, -ENOMEM) + + err = copy_from_user(chunks, uarg, len * sizeof(struct defrag_chunk_info)); + ERR_HANDLE(err); + + /* Initialize sb_dfr */ + sb_dfr->chunks = chunks; + sb_dfr->nr_chunks = len; + + /* Validate reqs & mark defrag/dirty */ + err = defrag_validate_reqs(sb, sb_dfr->chunks); + ERR_HANDLE(err); + + atomic_set(&sb_dfr->stat, DFR_SB_STAT_VALID); + + /* Wait for defrag completion */ + if (head.mode == DFR_MODE_ONESHOT) + timeout = 0; + else if (head.mode & DFR_MODE_BACKGROUND) + timeout = DFR_DEFAULT_TIMEOUT; + else + timeout = DFR_MIN_TIMEOUT; + + dfr_debug("Wait for completion (timeout %ld)", timeout); + init_completion(&sbi->dfr_complete); + timeout = wait_for_completion_timeout(&sbi->dfr_complete, timeout); + + if (!timeout) { + /* Force defrag_updat_fat() after timeout. */ + dfr_debug("Force sync(), mode %d, left-timeout %ld", head.mode, timeout); + + down_read(&sb->s_umount); + + sync_inodes_sb(sb); + + __lock_super(sb); + fsapi_dfr_update_fat_next(sb); + + fsapi_sync_fs(sb, 1); + +#ifdef CONFIG_SDFAT_DFR_DEBUG + /* SPO test */ + fsapi_dfr_spo_test(sb, DFR_SPO_FAT_NEXT, __func__); +#endif + + fsapi_dfr_update_fat_prev(sb, 1); + fsapi_sync_fs(sb, 1); + + __unlock_super(sb); + + up_read(&sb->s_umount); + } + +#ifdef CONFIG_SDFAT_DFR_DEBUG + /* SPO test */ + fsapi_dfr_spo_test(sb, DFR_SPO_NORMAL, __func__); +#endif + + __lock_super(sb); + /* Send DISCARD to clean-ed AUs */ + fsapi_dfr_check_discard(sb); + +#ifdef CONFIG_SDFAT_DFR_DEBUG + /* SPO test */ + fsapi_dfr_spo_test(sb, DFR_SPO_DISCARD, __func__); +#endif + + /* Unmark IGNORE flag to all victim AUs */ + fsapi_dfr_unmark_ignore_all(sb); + __unlock_super(sb); + + err = copy_to_user(uarg, sb_dfr->chunks, sizeof(struct defrag_chunk_info) * len); + ERR_HANDLE(err); + +error: + /* Clean-up sb_dfr & ino_dfr */ + defrag_cleanup_reqs(sb, err); + + if (chunks) + free_pages_exact(chunks, len * sizeof(struct defrag_chunk_info)); + + /* Set sb_dfr's state as IDLE */ + atomic_set(&sb_dfr->stat, DFR_SB_STAT_IDLE); + + dfr_debug("IOC_DFR_REQ done (err %d)", err); + return err; +} + +/** + * @fn sdfat_ioctl_defrag_trav + * @brief ioctl to traverse given directory for defrag + * @return 0 on success, -errno otherwise + * @param inode inode + * @param uarg output buffer + */ +static int +sdfat_ioctl_defrag_trav( + IN struct inode *inode, + INOUT unsigned int *uarg) +{ + struct super_block *sb = inode->i_sb; + struct sdfat_sb_info *sbi = SDFAT_SB(sb); + struct defrag_info *sb_dfr = &(sbi->dfr_info); + struct defrag_trav_arg *args = (struct defrag_trav_arg *) sbi->dfr_pagep; + struct defrag_trav_header *header = (struct defrag_trav_header *) args; + int err = 0; + + /* Check overlapped defrag */ + if (atomic_cmpxchg(&sb_dfr->stat, DFR_SB_STAT_IDLE, DFR_SB_STAT_REQ)) { + dfr_debug("sb_dfr->stat %d", atomic_read(&sb_dfr->stat)); + return -EBUSY; + } + + /* Check if defrag required */ + __lock_super(sb); + if (!fsapi_dfr_check_dfr_required(sb, NULL, NULL, NULL)) { + dfr_debug("Not enough space left for defrag (err %d)", -ENOSPC); + atomic_set(&sb_dfr->stat, DFR_SB_STAT_IDLE); + __unlock_super(sb); + return -ENOSPC; + } + __unlock_super(sb); + + /* Copy args */ + err = copy_from_user(args, uarg, PAGE_SIZE); + ERR_HANDLE(err); + + /** + * Check args. + * ROOT directory has i_pos = 0 and start_clus = 0 . + */ + if (!(header->type & DFR_TRAV_TYPE_HEADER)) { + err = -EINVAL; + dfr_debug("type %d, i_pos %08llx, start_clus %08x", + header->type, header->i_pos, header->start_clus); + goto error; + } + + /* If FS busy, cancel defrag */ + if (!(header->type & DFR_TRAV_TYPE_TEST)) { + unsigned int reserved_clus = 0, queued_pages = 0; + + err = defrag_check_fs_busy(sb, &reserved_clus, &queued_pages); + if (err) { + dfr_debug("FS busy, cancel defrag (reserved_clus %d, queued_pages %d)", + reserved_clus, queued_pages); + err = -EBUSY; + goto error; + } + } + + /* Scan given directory and gather info */ + inode_lock(inode); + __lock_super(sb); + err = fsapi_dfr_scan_dir(sb, (void *)args); + __unlock_super(sb); + inode_unlock(inode); + ERR_HANDLE(err); + + /* Copy the result to user */ + err = copy_to_user(uarg, args, PAGE_SIZE); + ERR_HANDLE(err); + +error: + memset(sbi->dfr_pagep, 0, PAGE_SIZE); + + atomic_set(&sb_dfr->stat, DFR_SB_STAT_IDLE); + return err; +} + +/** + * @fn sdfat_ioctl_defrag_info + * @brief ioctl to get HW param info + * @return 0 on success, -errno otherwise + * @param sb super block + * @param uarg output buffer + */ +static int +sdfat_ioctl_defrag_info( + IN struct super_block *sb, + OUT unsigned int *uarg) +{ + struct defrag_info_arg info_arg; + int err = 0; + + memset(&info_arg, 0, sizeof(struct defrag_info_arg)); + + __lock_super(sb); + err = fsapi_dfr_get_info(sb, &info_arg); + __unlock_super(sb); + ERR_HANDLE(err); + dfr_debug("IOC_DFR_INFO: sec_per_au %d, hidden_sectors %d", + info_arg.sec_per_au, info_arg.hidden_sectors); + + err = copy_to_user(uarg, &info_arg, sizeof(struct defrag_info_arg)); +error: + return err; +} + +#endif /* CONFIG_SDFAT_DFR */ + +static inline int __do_dfr_map_cluster(struct inode *inode, u32 clu_offset, unsigned int *clus_ptr) +{ +#ifdef CONFIG_SDFAT_DFR + return fsapi_dfr_map_clus(inode, clu_offset, clus_ptr); +#else + return 0; +#endif +} + +static inline int __check_dfr_on(struct inode *inode, loff_t start, loff_t end, const char *fname) +{ +#ifdef CONFIG_SDFAT_DFR + struct defrag_info *ino_dfr = &(SDFAT_I(inode)->dfr_info); + + if ((atomic_read(&ino_dfr->stat) == DFR_INO_STAT_REQ) && + fsapi_dfr_check_dfr_on(inode, start, end, 0, fname)) + return 1; +#endif + return 0; +} + +static inline int __cancel_dfr_work(struct inode *inode, loff_t start, loff_t end, const char *fname) +{ +#ifdef CONFIG_SDFAT_DFR + struct defrag_info *ino_dfr = &(SDFAT_I(inode)->dfr_info); + /* Cancel DEFRAG */ + if (atomic_read(&ino_dfr->stat) == DFR_INO_STAT_REQ) + fsapi_dfr_check_dfr_on(inode, start, end, 1, fname); +#endif + return 0; +} + +static inline int __dfr_writepage_end_io(struct page *page) +{ +#ifdef CONFIG_SDFAT_DFR + struct defrag_info *ino_dfr = &(SDFAT_I(page->mapping->host)->dfr_info); + + if (atomic_read(&ino_dfr->stat) == DFR_INO_STAT_REQ) + fsapi_dfr_writepage_endio(page); +#endif + return 0; +} + +static inline void __init_dfr_info(struct inode *inode) +{ +#ifdef CONFIG_SDFAT_DFR + memset(&(SDFAT_I(inode)->dfr_info), 0, sizeof(struct defrag_info)); + INIT_LIST_HEAD(&(SDFAT_I(inode)->dfr_info.entry)); + mutex_init(&(SDFAT_I(inode)->dfr_info.lock)); +#endif +} + +static inline int __alloc_dfr_mem_if_required(struct super_block *sb) +{ +#ifdef CONFIG_SDFAT_DFR + struct sdfat_sb_info *sbi = SDFAT_SB(sb); + + if (!sbi->options.defrag) + return 0; + + memset(&sbi->dfr_info, 0, sizeof(struct defrag_info)); + INIT_LIST_HEAD(&(sbi->dfr_info.entry)); + mutex_init(&(sbi->dfr_info.lock)); + + sbi->dfr_new_clus = kzalloc(PAGE_SIZE, GFP_KERNEL); + if (!sbi->dfr_new_clus) { + dfr_debug("error %d", -ENOMEM); + return -ENOMEM; + } + sbi->dfr_new_idx = 1; + + sbi->dfr_page_wb = kzalloc(PAGE_SIZE, GFP_KERNEL); + if (!sbi->dfr_page_wb) { + dfr_debug("error %d", -ENOMEM); + return -ENOMEM; + } + + sbi->dfr_pagep = alloc_pages_exact(sizeof(struct page *) * + PAGES_PER_AU(sb), GFP_KERNEL | __GFP_ZERO); + if (!sbi->dfr_pagep) { + dfr_debug("error %d", -ENOMEM); + return -ENOMEM; + } +#endif + return 0; +} + +static void __free_dfr_mem_if_required(struct super_block *sb) +{ +#ifdef CONFIG_SDFAT_DFR + struct sdfat_sb_info *sbi = SDFAT_SB(sb); + + if (sbi->dfr_pagep) { + free_pages_exact(sbi->dfr_pagep, sizeof(struct page *) * PAGES_PER_AU(sb)); + sbi->dfr_pagep = NULL; + } + + /* thanks for kfree */ + kfree(sbi->dfr_page_wb); + sbi->dfr_page_wb = NULL; + + kfree(sbi->dfr_new_clus); + sbi->dfr_new_clus = NULL; +#endif +} + + +static int sdfat_file_mmap(struct file *file, struct vm_area_struct *vm_struct) +{ + __cancel_dfr_work(file->f_mapping->host, + (loff_t)vm_struct->vm_start, + (loff_t)(vm_struct->vm_end - 1), + __func__); + + return generic_file_mmap(file, vm_struct); +} + +static int sdfat_ioctl_volume_id(struct inode *dir) +{ + struct sdfat_sb_info *sbi = SDFAT_SB(dir->i_sb); + FS_INFO_T *fsi = &(sbi->fsi); + + return fsi->vol_id; +} + +static int sdfat_dfr_ioctl(struct inode *inode, struct file *filp, + unsigned int cmd, unsigned long arg) +{ +#ifdef CONFIG_SDFAT_DFR + switch (cmd) { + case SDFAT_IOCTL_DFR_INFO: { + struct super_block *sb = inode->i_sb; + FS_INFO_T *fsi = &SDFAT_SB(sb)->fsi; + unsigned int __user *uarg = (unsigned int __user *) arg; + + __lock_super(sb); + /* Check FS type (FAT32 only) */ + if (fsi->vol_type != FAT32) { + dfr_err("Defrag not supported, vol_type %d", fsi->vol_type); + __unlock_super(sb); + return -EPERM; + + } + + /* Check if SB's defrag option enabled */ + if (!(SDFAT_SB(sb)->options.defrag)) { + dfr_err("Defrag not supported, sbi->options.defrag %d", SDFAT_SB(sb)->options.defrag); + __unlock_super(sb); + return -EPERM; + } + + /* Only IOCTL on mount-point allowed */ + if (filp->f_path.mnt->mnt_root != filp->f_path.dentry) { + dfr_err("IOC_DFR_INFO only allowed on ROOT, root %p, dentry %p", + filp->f_path.mnt->mnt_root, filp->f_path.dentry); + __unlock_super(sb); + return -EPERM; + } + __unlock_super(sb); + + return sdfat_ioctl_defrag_info(sb, uarg); + } + case SDFAT_IOCTL_DFR_TRAV: { + struct super_block *sb = inode->i_sb; + FS_INFO_T *fsi = &SDFAT_SB(sb)->fsi; + unsigned int __user *uarg = (unsigned int __user *) arg; + + __lock_super(sb); + /* Check FS type (FAT32 only) */ + if (fsi->vol_type != FAT32) { + dfr_err("Defrag not supported, vol_type %d", fsi->vol_type); + __unlock_super(sb); + return -EPERM; + + } + + /* Check if SB's defrag option enabled */ + if (!(SDFAT_SB(sb)->options.defrag)) { + dfr_err("Defrag not supported, sbi->options.defrag %d", SDFAT_SB(sb)->options.defrag); + __unlock_super(sb); + return -EPERM; + } + __unlock_super(sb); + + return sdfat_ioctl_defrag_trav(inode, uarg); + } + case SDFAT_IOCTL_DFR_REQ: { + struct super_block *sb = inode->i_sb; + FS_INFO_T *fsi = &SDFAT_SB(sb)->fsi; + unsigned int __user *uarg = (unsigned int __user *) arg; + + __lock_super(sb); + + /* Check if FS_ERROR occurred */ + if (sb->s_flags & MS_RDONLY) { + dfr_err("RDONLY partition (err %d)", -EPERM); + __unlock_super(sb); + return -EPERM; + } + + /* Check FS type (FAT32 only) */ + if (fsi->vol_type != FAT32) { + dfr_err("Defrag not supported, vol_type %d", fsi->vol_type); + __unlock_super(sb); + return -EINVAL; + + } + + /* Check if SB's defrag option enabled */ + if (!(SDFAT_SB(sb)->options.defrag)) { + dfr_err("Defrag not supported, sbi->options.defrag %d", SDFAT_SB(sb)->options.defrag); + __unlock_super(sb); + return -EPERM; + } + + /* Only IOCTL on mount-point allowed */ + if (filp->f_path.mnt->mnt_root != filp->f_path.dentry) { + dfr_err("IOC_DFR_INFO only allowed on ROOT, root %p, dentry %p", + filp->f_path.mnt->mnt_root, filp->f_path.dentry); + __unlock_super(sb); + return -EINVAL; + } + __unlock_super(sb); + + return sdfat_ioctl_defrag_req(inode, uarg); + } +#ifdef CONFIG_SDFAT_DFR_DEBUG + case SDFAT_IOCTL_DFR_SPO_FLAG: { + struct sdfat_sb_info *sbi = SDFAT_SB(inode->i_sb); + int ret = 0; + + ret = get_user(sbi->dfr_spo_flag, (int __user *)arg); + dfr_debug("dfr_spo_flag %d", sbi->dfr_spo_flag); + + return ret; + } +#endif /* CONFIG_SDFAT_DFR_DEBUG */ + } +#endif /* CONFIG_SDFAT_DFR */ + + /* Inappropriate ioctl for device */ + return -ENOTTY; +} + +static int sdfat_dbg_ioctl(struct inode *inode, struct file *filp, + unsigned int cmd, unsigned long arg) +{ +#ifdef CONFIG_SDFAT_DBG_IOCTL + struct super_block *sb = inode->i_sb; + struct sdfat_sb_info *sbi = SDFAT_SB(sb); + unsigned int flags; + + switch (cmd) { + case SDFAT_IOC_GET_DEBUGFLAGS: + flags = sbi->debug_flags; + return put_user(flags, (int __user *)arg); + case SDFAT_IOC_SET_DEBUGFLAGS: + flags = 0; + if (!capable(CAP_SYS_ADMIN)) + return -EPERM; + + if (get_user(flags, (int __user *) arg)) + return -EFAULT; + + __lock_super(sb); + sbi->debug_flags = flags; + __unlock_super(sb); + return 0; + case SDFAT_IOCTL_PANIC: + panic("ioctl panic for test"); + + /* COULD NOT REACH HEAR */ + return 0; + } +#endif /* CONFIG_SDFAT_DBG_IOCTL */ + return -ENOTTY; +} + +static long sdfat_generic_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) +{ + struct inode *inode = file_inode(filp); + int err; + + if (cmd == SDFAT_IOCTL_GET_VOLUME_ID) + return sdfat_ioctl_volume_id(inode); + + err = sdfat_dfr_ioctl(inode, filp, cmd, arg); + if (err != -ENOTTY) + return err; + + /* -ENOTTY if inappropriate ioctl for device */ + return sdfat_dbg_ioctl(inode, filp, cmd, arg); +} + +static int __sdfat_getattr(struct inode *inode, struct kstat *stat) +{ + TMSG("%s entered\n", __func__); + + generic_fillattr(inode, stat); + stat->blksize = SDFAT_SB(inode->i_sb)->fsi.cluster_size; + + TMSG("%s exited\n", __func__); + return 0; +} + +static void __sdfat_writepage_end_io(struct bio *bio, int err) +{ + struct page *page = bio->bi_io_vec->bv_page; + struct super_block *sb = page->mapping->host->i_sb; + + ASSERT(bio->bi_vcnt == 1); /* Single page endio */ + ASSERT(bio_data_dir(bio)); /* Write */ + + if (err) { + SetPageError(page); + mapping_set_error(page->mapping, err); + } + + __dfr_writepage_end_io(page); + +#ifdef CONFIG_SDFAT_TRACE_IO + { + //struct sdfat_sb_info *sbi = SDFAT_SB(bio->bi_bdev->bd_super); + struct sdfat_sb_info *sbi = SDFAT_SB(sb); + + sbi->stat_n_pages_written++; + if (page->mapping->host == sb->s_bdev->bd_inode) + sbi->stat_n_bdev_pages_written++; + + /* 4 MB = 1024 pages => 0.4 sec (approx.) + * 32 KB = 64 pages => 0.025 sec + * Min. average latency b/w msgs. ~= 0.025 sec + */ + if ((sbi->stat_n_pages_written & 63) == 0) { + DMSG("STAT:%u, %u, %u, %u (Sector #: %u)\n", + sbi->stat_n_pages_added, sbi->stat_n_pages_written, + sbi->stat_n_bdev_pages_witten, + sbi->stat_n_pages_confused, + (unsigned int)__sdfat_bio_sector(bio)); + } + } +#endif + end_page_writeback(page); + bio_put(bio); + + // Update trace info. + atomic_dec(&SDFAT_SB(sb)->stat_n_pages_queued); +} + + +static int __support_write_inode_sync(struct super_block *sb) +{ +#ifdef CONFIG_SDFAT_SUPPORT_DIR_SYNC +#ifdef CONFIG_SDFAT_DELAYED_META_DIRTY + struct sdfat_sb_info *sbi = SDFAT_SB(sb); + + if (sbi->fsi.vol_type != EXFAT) + return 0; +#endif + return 1; +#endif + return 0; +} + + +static int __sdfat_file_fsync(struct file *filp, loff_t start, loff_t end, int datasync) +{ + struct inode *inode = filp->f_mapping->host; + struct super_block *sb = inode->i_sb; + int res, err = 0; + + res = __sdfat_generic_file_fsync(filp, start, end, datasync); + + if (!__support_write_inode_sync(sb)) + err = fsapi_sync_fs(sb, 1); + + return res ? res : err; +} + + +static const struct file_operations sdfat_dir_operations = { + .llseek = generic_file_llseek, + .read = generic_read_dir, +#if LINUX_VERSION_CODE >= KERNEL_VERSION(3, 14, 0) + .iterate = sdfat_iterate, +#else /* LINUX_VERSION_CODE < KERNEL_VERSION(3, 14, 0) */ + .readdir = sdfat_readdir, +#endif + .fsync = sdfat_file_fsync, + .unlocked_ioctl = sdfat_generic_ioctl, +}; + +static int __sdfat_create(struct inode *dir, struct dentry *dentry) +{ + struct super_block *sb = dir->i_sb; + struct inode *inode; + sdfat_timespec_t ts; + FILE_ID_T fid; + loff_t i_pos; + int err; + + __lock_super(sb); + + TMSG("%s entered\n", __func__); + + ts = CURRENT_TIME_SEC; + + err = fsapi_create(dir, (u8 *) dentry->d_name.name, FM_REGULAR, &fid); + if (err) + goto out; + + __lock_d_revalidate(dentry); + + inode_inc_iversion(dir); + dir->i_ctime = dir->i_mtime = dir->i_atime = ts; + if (IS_DIRSYNC(dir)) + (void) sdfat_sync_inode(dir); + else + mark_inode_dirty(dir); + + i_pos = sdfat_make_i_pos(&fid); + inode = sdfat_build_inode(sb, &fid, i_pos); + if (IS_ERR(inode)) { + err = PTR_ERR(inode); + goto out; + } + inode_inc_iversion(inode); + inode->i_mtime = inode->i_atime = inode->i_ctime = ts; + /* timestamp is already written, so mark_inode_dirty() is unneeded. */ + + d_instantiate(dentry, inode); +out: + __unlock_d_revalidate(dentry); + __unlock_super(sb); + TMSG("%s exited with err(%d)\n", __func__, err); + if (!err) + sdfat_statistics_set_create(fid.flags); + return err; +} + + +static int sdfat_find(struct inode *dir, struct qstr *qname, FILE_ID_T *fid) +{ + int err; + + if (qname->len == 0) + return -ENOENT; + + err = fsapi_lookup(dir, (u8 *) qname->name, fid); + if (err) + return -ENOENT; + + return 0; +} + +static int sdfat_d_anon_disconn(struct dentry *dentry) +{ + return IS_ROOT(dentry) && (dentry->d_flags & DCACHE_DISCONNECTED); +} + +static struct dentry *__sdfat_lookup(struct inode *dir, struct dentry *dentry) +{ + struct super_block *sb = dir->i_sb; + struct inode *inode; + struct dentry *alias; + int err; + FILE_ID_T fid; + loff_t i_pos; + u64 ret; + mode_t i_mode; + + __lock_super(sb); + TMSG("%s entered\n", __func__); + err = sdfat_find(dir, &dentry->d_name, &fid); + if (err) { + if (err == -ENOENT) { + inode = NULL; + goto out; + } + goto error; + } + + i_pos = sdfat_make_i_pos(&fid); + inode = sdfat_build_inode(sb, &fid, i_pos); + if (IS_ERR(inode)) { + err = PTR_ERR(inode); + goto error; + } + + i_mode = inode->i_mode; + if (S_ISLNK(i_mode) && !SDFAT_I(inode)->target) { + SDFAT_I(inode)->target = kmalloc((i_size_read(inode)+1), GFP_KERNEL); + if (!SDFAT_I(inode)->target) { + err = -ENOMEM; + goto error; + } + fsapi_read_link(dir, &fid, SDFAT_I(inode)->target, i_size_read(inode), &ret); + *(SDFAT_I(inode)->target + i_size_read(inode)) = '\0'; + } + + alias = d_find_alias(inode); + + /* + * Checking "alias->d_parent == dentry->d_parent" to make sure + * FS is not corrupted (especially double linked dir). + */ + if (alias && alias->d_parent == dentry->d_parent && + !sdfat_d_anon_disconn(alias)) { + + /* + * Unhashed alias is able to exist because of revalidate() + * called by lookup_fast. You can easily make this status + * by calling create and lookup concurrently + * In such case, we reuse an alias instead of new dentry + */ + if (d_unhashed(alias)) { +#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 10, 0) + BUG_ON(alias->d_name.hash != dentry->d_name.hash && alias->d_name.len != dentry->d_name.len); +#else + BUG_ON(alias->d_name.hash_len != dentry->d_name.hash_len); +#endif + sdfat_msg(sb, KERN_INFO, "rehashed a dentry(%p) " + "in read lookup", alias); + d_drop(dentry); + d_rehash(alias); + } else if (!S_ISDIR(i_mode)) { + /* + * This inode has non anonymous-DCACHE_DISCONNECTED + * dentry. This means, the user did ->lookup() by an + * another name (longname vs 8.3 alias of it) in past. + * + * Switch to new one for reason of locality if possible. + */ + d_move(alias, dentry); + } + iput(inode); + __unlock_super(sb); + TMSG("%s exited\n", __func__); + return alias; + } + dput(alias); +out: + /* initialize d_time even though it is positive dentry */ + dentry->d_time = (unsigned long)inode_peek_iversion(dir); + __unlock_super(sb); + + dentry = d_splice_alias(inode, dentry); + + TMSG("%s exited\n", __func__); + return dentry; +error: + __unlock_super(sb); + TMSG("%s exited with err(%d)\n", __func__, err); + return ERR_PTR(err); +} + + +static int sdfat_unlink(struct inode *dir, struct dentry *dentry) +{ + struct inode *inode = dentry->d_inode; + struct super_block *sb = dir->i_sb; + sdfat_timespec_t ts; + int err; + + __lock_super(sb); + + TMSG("%s entered\n", __func__); + + ts = CURRENT_TIME_SEC; + + SDFAT_I(inode)->fid.size = i_size_read(inode); + + __cancel_dfr_work(inode, 0, SDFAT_I(inode)->fid.size, __func__); + + err = fsapi_unlink(dir, &(SDFAT_I(inode)->fid)); + if (err) + goto out; + + __lock_d_revalidate(dentry); + + inode_inc_iversion(dir); + dir->i_mtime = dir->i_atime = ts; + if (IS_DIRSYNC(dir)) + (void) sdfat_sync_inode(dir); + else + mark_inode_dirty(dir); + + clear_nlink(inode); + inode->i_mtime = inode->i_atime = ts; + sdfat_detach(inode); + dentry->d_time = (unsigned long)inode_peek_iversion(dir); +out: + __unlock_d_revalidate(dentry); + __unlock_super(sb); + TMSG("%s exited with err(%d)\n", __func__, err); + return err; +} + +static int sdfat_symlink(struct inode *dir, struct dentry *dentry, const char *target) +{ + struct super_block *sb = dir->i_sb; + struct inode *inode; + sdfat_timespec_t ts; + FILE_ID_T fid; + loff_t i_pos; + int err; + u64 len = (u64) strlen(target); + u64 ret; + + /* symlink option check */ + if (!SDFAT_SB(sb)->options.symlink) + return -ENOTSUPP; + + __lock_super(sb); + + TMSG("%s entered\n", __func__); + + ts = CURRENT_TIME_SEC; + + err = fsapi_create(dir, (u8 *) dentry->d_name.name, FM_SYMLINK, &fid); + if (err) + goto out; + + err = fsapi_write_link(dir, &fid, (char *) target, len, &ret); + + if (err) { + fsapi_remove(dir, &fid); + goto out; + } + + __lock_d_revalidate(dentry); + + inode_inc_iversion(dir); + dir->i_ctime = dir->i_mtime = dir->i_atime = ts; + if (IS_DIRSYNC(dir)) + (void) sdfat_sync_inode(dir); + else + mark_inode_dirty(dir); + + i_pos = sdfat_make_i_pos(&fid); + inode = sdfat_build_inode(sb, &fid, i_pos); + if (IS_ERR(inode)) { + err = PTR_ERR(inode); + goto out; + } + inode_inc_iversion(inode); + inode->i_mtime = inode->i_atime = inode->i_ctime = ts; + /* timestamp is already written, so mark_inode_dirty() is unneeded. */ + + SDFAT_I(inode)->target = kmalloc((len+1), GFP_KERNEL); + if (!SDFAT_I(inode)->target) { + err = -ENOMEM; + goto out; + } + memcpy(SDFAT_I(inode)->target, target, len+1); + + d_instantiate(dentry, inode); +out: + __unlock_d_revalidate(dentry); + __unlock_super(sb); + TMSG("%s exited with err(%d)\n", __func__, err); + return err; +} + + +static int __sdfat_mkdir(struct inode *dir, struct dentry *dentry) +{ + struct super_block *sb = dir->i_sb; + struct inode *inode; + sdfat_timespec_t ts; + FILE_ID_T fid; + loff_t i_pos; + int err; + + __lock_super(sb); + + TMSG("%s entered\n", __func__); + + ts = CURRENT_TIME_SEC; + + err = fsapi_mkdir(dir, (u8 *) dentry->d_name.name, &fid); + if (err) + goto out; + + __lock_d_revalidate(dentry); + + inode_inc_iversion(dir); + dir->i_ctime = dir->i_mtime = dir->i_atime = ts; + if (IS_DIRSYNC(dir)) + (void) sdfat_sync_inode(dir); + else + mark_inode_dirty(dir); + inc_nlink(dir); + + i_pos = sdfat_make_i_pos(&fid); + inode = sdfat_build_inode(sb, &fid, i_pos); + if (IS_ERR(inode)) { + err = PTR_ERR(inode); + goto out; + } + inode_inc_iversion(inode); + inode->i_mtime = inode->i_atime = inode->i_ctime = ts; + /* timestamp is already written, so mark_inode_dirty() is unneeded. */ + + d_instantiate(dentry, inode); + +out: + __unlock_d_revalidate(dentry); + __unlock_super(sb); + TMSG("%s exited with err(%d)\n", __func__, err); + if (!err) + sdfat_statistics_set_mkdir(fid.flags); + return err; +} + + +static int sdfat_rmdir(struct inode *dir, struct dentry *dentry) +{ + struct inode *inode = dentry->d_inode; + struct super_block *sb = dir->i_sb; + sdfat_timespec_t ts; + int err; + + __lock_super(sb); + + TMSG("%s entered\n", __func__); + + ts = CURRENT_TIME_SEC; + + SDFAT_I(inode)->fid.size = i_size_read(inode); + + err = fsapi_rmdir(dir, &(SDFAT_I(inode)->fid)); + if (err) + goto out; + + __lock_d_revalidate(dentry); + + inode_inc_iversion(dir); + dir->i_mtime = dir->i_atime = ts; + if (IS_DIRSYNC(dir)) + (void) sdfat_sync_inode(dir); + else + mark_inode_dirty(dir); + drop_nlink(dir); + + clear_nlink(inode); + inode->i_mtime = inode->i_atime = ts; + sdfat_detach(inode); + dentry->d_time = (unsigned long)inode_peek_iversion(dir); +out: + __unlock_d_revalidate(dentry); + __unlock_super(sb); + TMSG("%s exited with err(%d)\n", __func__, err); + return err; +} + +static int __sdfat_rename(struct inode *old_dir, struct dentry *old_dentry, + struct inode *new_dir, struct dentry *new_dentry) +{ + struct inode *old_inode, *new_inode; + struct super_block *sb = old_dir->i_sb; + sdfat_timespec_t ts; + loff_t i_pos; + int err; + + __lock_super(sb); + + TMSG("%s entered\n", __func__); + + old_inode = old_dentry->d_inode; + new_inode = new_dentry->d_inode; + + ts = CURRENT_TIME_SEC; + + SDFAT_I(old_inode)->fid.size = i_size_read(old_inode); + + __cancel_dfr_work(old_inode, 0, 1, __func__); + + err = fsapi_rename(old_dir, &(SDFAT_I(old_inode)->fid), new_dir, new_dentry); + if (err) + goto out; + + __lock_d_revalidate(old_dentry); + __lock_d_revalidate(new_dentry); + + inode_inc_iversion(new_dir); + new_dir->i_ctime = new_dir->i_mtime = new_dir->i_atime = ts; + if (IS_DIRSYNC(new_dir)) + (void) sdfat_sync_inode(new_dir); + else + mark_inode_dirty(new_dir); + + i_pos = sdfat_make_i_pos(&(SDFAT_I(old_inode)->fid)); + sdfat_detach(old_inode); + sdfat_attach(old_inode, i_pos); + if (IS_DIRSYNC(new_dir)) + (void) sdfat_sync_inode(old_inode); + else + mark_inode_dirty(old_inode); + + if ((S_ISDIR(old_inode->i_mode)) && (old_dir != new_dir)) { + drop_nlink(old_dir); + if (!new_inode) + inc_nlink(new_dir); + } + + inode_inc_iversion(old_dir); + old_dir->i_ctime = old_dir->i_mtime = ts; + if (IS_DIRSYNC(old_dir)) + (void) sdfat_sync_inode(old_dir); + else + mark_inode_dirty(old_dir); + + if (new_inode) { + sdfat_detach(new_inode); + + /* skip drop_nlink if new_inode already has been dropped */ + if (new_inode->i_nlink) { + drop_nlink(new_inode); + if (S_ISDIR(new_inode->i_mode)) + drop_nlink(new_inode); + } else { + EMSG("%s : abnormal access to an inode dropped\n", + __func__); + WARN_ON(new_inode->i_nlink == 0); + } + new_inode->i_ctime = ts; +#if 0 + (void) sdfat_sync_inode(new_inode); +#endif + } + +out: + __unlock_d_revalidate(old_dentry); + __unlock_d_revalidate(new_dentry); + __unlock_super(sb); + TMSG("%s exited with err(%d)\n", __func__, err); + return err; +} + +static int sdfat_cont_expand(struct inode *inode, loff_t size) +{ + struct address_space *mapping = inode->i_mapping; + loff_t start = i_size_read(inode), count = size - i_size_read(inode); + int err, err2; + + err = generic_cont_expand_simple(inode, size); + if (err) + return err; + + inode->i_ctime = inode->i_mtime = CURRENT_TIME_SEC; + mark_inode_dirty(inode); + + if (!IS_SYNC(inode)) + return 0; + + err = filemap_fdatawrite_range(mapping, start, start + count - 1); + err2 = sync_mapping_buffers(mapping); + err = (err)?(err):(err2); + err2 = write_inode_now(inode, 1); + err = (err)?(err):(err2); + if (err) + return err; + + return filemap_fdatawait_range(mapping, start, start + count - 1); +} + +static int sdfat_allow_set_time(struct sdfat_sb_info *sbi, struct inode *inode) +{ + mode_t allow_utime = sbi->options.allow_utime; + + if (!uid_eq(current_fsuid(), inode->i_uid)) { + if (in_group_p(inode->i_gid)) + allow_utime >>= 3; + if (allow_utime & MAY_WRITE) + return 1; + } + + /* use a default check */ + return 0; +} + +static int sdfat_sanitize_mode(const struct sdfat_sb_info *sbi, + struct inode *inode, umode_t *mode_ptr) +{ + mode_t i_mode, mask, perm; + + i_mode = inode->i_mode; + + if (S_ISREG(i_mode) || S_ISLNK(i_mode)) + mask = sbi->options.fs_fmask; + else + mask = sbi->options.fs_dmask; + + perm = *mode_ptr & ~(S_IFMT | mask); + + /* Of the r and x bits, all (subject to umask) must be present.*/ + if ((perm & (S_IRUGO | S_IXUGO)) != (i_mode & (S_IRUGO | S_IXUGO))) + return -EPERM; + + if (sdfat_mode_can_hold_ro(inode)) { + /* Of the w bits, either all (subject to umask) or none must be present. */ + if ((perm & S_IWUGO) && ((perm & S_IWUGO) != (S_IWUGO & ~mask))) + return -EPERM; + } else { + /* If sdfat_mode_can_hold_ro(inode) is false, can't change w bits. */ + if ((perm & S_IWUGO) != (S_IWUGO & ~mask)) + return -EPERM; + } + + *mode_ptr &= S_IFMT | perm; + + return 0; +} + +/* + * sdfat_block_truncate_page() zeroes out a mapping from file offset `from' + * up to the end of the block which corresponds to `from'. + * This is required during truncate to physically zeroout the tail end + * of that block so it doesn't yield old data if the file is later grown. + * Also, avoid causing failure from fsx for cases of "data past EOF" + */ +static int sdfat_block_truncate_page(struct inode *inode, loff_t from) +{ + return block_truncate_page(inode->i_mapping, from, sdfat_get_block); +} + +static int sdfat_setattr(struct dentry *dentry, struct iattr *attr) +{ + + struct sdfat_sb_info *sbi = SDFAT_SB(dentry->d_sb); + struct inode *inode = dentry->d_inode; + unsigned int ia_valid; + int error; + loff_t old_size; + + TMSG("%s entered\n", __func__); + + if ((attr->ia_valid & ATTR_SIZE) + && (attr->ia_size > i_size_read(inode))) { + error = sdfat_cont_expand(inode, attr->ia_size); + if (error || attr->ia_valid == ATTR_SIZE) + goto out; + attr->ia_valid &= ~ATTR_SIZE; + } + + /* Check for setting the inode time. */ + ia_valid = attr->ia_valid; + if ((ia_valid & (ATTR_MTIME_SET | ATTR_ATIME_SET | ATTR_TIMES_SET)) + && sdfat_allow_set_time(sbi, inode)) { + attr->ia_valid &= ~(ATTR_MTIME_SET | ATTR_ATIME_SET | ATTR_TIMES_SET); + } + + error = setattr_prepare(dentry, attr); + attr->ia_valid = ia_valid; + if (error) + goto out; + + if (((attr->ia_valid & ATTR_UID) && + (!uid_eq(attr->ia_uid, sbi->options.fs_uid))) || + ((attr->ia_valid & ATTR_GID) && + (!gid_eq(attr->ia_gid, sbi->options.fs_gid))) || + ((attr->ia_valid & ATTR_MODE) && + (attr->ia_mode & ~(S_IFREG | S_IFLNK | S_IFDIR | S_IRWXUGO)))) { + error = -EPERM; + goto out; + } + + /* + * We don't return -EPERM here. Yes, strange, but this is too + * old behavior. + */ + if (attr->ia_valid & ATTR_MODE) { + if (sdfat_sanitize_mode(sbi, inode, &attr->ia_mode) < 0) + attr->ia_valid &= ~ATTR_MODE; + } + + SDFAT_I(inode)->fid.size = i_size_read(inode); + + /* patch 1.2.0 : fixed the problem of size mismatch. */ + if (attr->ia_valid & ATTR_SIZE) { + error = sdfat_block_truncate_page(inode, attr->ia_size); + if (error) + goto out; + + old_size = i_size_read(inode); + + /* TO CHECK evicting directory works correctly */ + MMSG("%s: inode(%p) truncate size (%llu->%llu)\n", __func__, + inode, (u64)old_size, (u64)attr->ia_size); + __sdfat_do_truncate(inode, old_size, attr->ia_size); + } + setattr_copy(inode, attr); + mark_inode_dirty(inode); +out: + TMSG("%s exited with err(%d)\n", __func__, error); + return error; +} + +static const struct inode_operations sdfat_dir_inode_operations = { + .create = sdfat_create, + .lookup = sdfat_lookup, + .unlink = sdfat_unlink, + .symlink = sdfat_symlink, + .mkdir = sdfat_mkdir, + .rmdir = sdfat_rmdir, + .rename = sdfat_rename, + .setattr = sdfat_setattr, + .getattr = sdfat_getattr, +#ifdef CONFIG_SDFAT_VIRTUAL_XATTR + .listxattr = sdfat_listxattr, +#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 9, 0) + .setxattr = sdfat_setxattr, + .getxattr = sdfat_getxattr, + .removexattr = sdfat_removexattr, +#endif +#endif +}; + +/*======================================================================*/ +/* File Operations */ +/*======================================================================*/ +static const struct inode_operations sdfat_symlink_inode_operations = { +#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 10, 0) + .readlink = generic_readlink, +#endif +#if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 5, 0) + .get_link = sdfat_follow_link, +#else /* LINUX_VERSION_CODE < KERNEL_VERSION(4, 5, 0) */ + .follow_link = sdfat_follow_link, +#endif +#ifdef CONFIG_SDFAT_VIRTUAL_XATTR + .listxattr = sdfat_listxattr, +#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 9, 0) + .setxattr = sdfat_setxattr, + .getxattr = sdfat_getxattr, + .removexattr = sdfat_removexattr, +#endif +#endif +}; + +static int sdfat_file_release(struct inode *inode, struct file *filp) +{ + struct super_block *sb = inode->i_sb; + + /* Moved below code from sdfat_write_inode + * TO FIX size-mismatch problem. + */ + /* FIXME : Added bug_on to confirm that there is no size mismatch */ + sdfat_debug_bug_on(SDFAT_I(inode)->fid.size != i_size_read(inode)); + SDFAT_I(inode)->fid.size = i_size_read(inode); + fsapi_sync_fs(sb, 0); + return 0; +} + +static const struct file_operations sdfat_file_operations = { + .llseek = generic_file_llseek, +#if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 1, 0) + .read_iter = generic_file_read_iter, + .write_iter = generic_file_write_iter, +#elif LINUX_VERSION_CODE >= KERNEL_VERSION(3, 16, 0) + .read = new_sync_read, + .write = new_sync_write, + .read_iter = generic_file_read_iter, + .write_iter = generic_file_write_iter, +#else /* LINUX_VERSION_CODE < KERNEL_VERSION(3, 16, 0) */ + .read = do_sync_read, + .write = do_sync_write, + .aio_read = generic_file_aio_read, + .aio_write = generic_file_aio_write, +#endif + .mmap = sdfat_file_mmap, + .release = sdfat_file_release, + .unlocked_ioctl = sdfat_generic_ioctl, + .fsync = sdfat_file_fsync, + .splice_read = generic_file_splice_read, +}; + +static const struct address_space_operations sdfat_da_aops; +static const struct address_space_operations sdfat_aops; + +static void sdfat_truncate(struct inode *inode, loff_t old_size) +{ + struct super_block *sb = inode->i_sb; + struct sdfat_sb_info *sbi = SDFAT_SB(sb); + FS_INFO_T *fsi = &(sbi->fsi); + unsigned int blocksize = 1 << inode->i_blkbits; + loff_t aligned_size; + int err; + + __lock_super(sb); + + if (SDFAT_I(inode)->fid.start_clu == 0) { + /* Stange statement: + * Empty start_clu != ~0 (not allocated) + */ + sdfat_fs_error(sb, "tried to truncate zeroed cluster."); + goto out; + } + + sdfat_debug_check_clusters(inode); + + __cancel_dfr_work(inode, (loff_t)i_size_read(inode), (loff_t)old_size, __func__); + + err = fsapi_truncate(inode, old_size, i_size_read(inode)); + if (err) + goto out; + + inode->i_ctime = inode->i_mtime = CURRENT_TIME_SEC; + if (IS_DIRSYNC(inode)) + (void) sdfat_sync_inode(inode); + else + mark_inode_dirty(inode); + + // FIXME: 확인 요망 + // inode->i_blocks = ((SDFAT_I(inode)->i_size_ondisk + (fsi->cluster_size - 1)) + inode->i_blocks = ((i_size_read(inode) + (fsi->cluster_size - 1)) & + ~((loff_t)fsi->cluster_size - 1)) >> inode->i_blkbits; +out: + /* + * This protects against truncating a file bigger than it was then + * trying to write into the hole. + * + * comment by sh.hong: + * This seems to mean 'intra page/block' truncate and writing. + * I couldn't find a reason to change the values prior to fsapi_truncate + * Therefore, I switched the order of operations + * so that it's possible to utilize i_size_ondisk in fsapi_truncate + */ + + aligned_size = i_size_read(inode); + if (aligned_size & (blocksize - 1)) { + aligned_size |= (blocksize - 1); + aligned_size++; + } + + if (SDFAT_I(inode)->i_size_ondisk > i_size_read(inode)) + SDFAT_I(inode)->i_size_ondisk = aligned_size; + + sdfat_debug_check_clusters(inode); + + if (SDFAT_I(inode)->i_size_aligned > i_size_read(inode)) + SDFAT_I(inode)->i_size_aligned = aligned_size; + + /* After truncation : + * 1) Delayed allocation is OFF + * i_size = i_size_ondisk <= i_size_aligned + * (useless size var.) + * (block-aligned) + * 2) Delayed allocation is ON + * i_size = i_size_ondisk = i_size_aligned + * (will be block-aligned after write) + * or + * i_size_ondisk < i_size <= i_size_aligned (block_aligned) + * (will be block-aligned after write) + */ + + __unlock_super(sb); +} + +static const struct inode_operations sdfat_file_inode_operations = { + .setattr = sdfat_setattr, + .getattr = sdfat_getattr, +#ifdef CONFIG_SDFAT_VIRTUAL_XATTR + .listxattr = sdfat_listxattr, +#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 9, 0) + .setxattr = sdfat_setxattr, + .getxattr = sdfat_getxattr, + .removexattr = sdfat_removexattr, +#endif +#endif +}; + +/*======================================================================*/ +/* Address Space Operations */ +/*======================================================================*/ +/* 2-level option flag */ +#define BMAP_NOT_CREATE 0 +#define BMAP_ADD_BLOCK 1 +#define BMAP_ADD_CLUSTER 2 +#define BLOCK_ADDED(bmap_ops) (bmap_ops) +static int sdfat_bmap(struct inode *inode, sector_t sector, sector_t *phys, + unsigned long *mapped_blocks, int *create) +{ + struct super_block *sb = inode->i_sb; + struct sdfat_sb_info *sbi = SDFAT_SB(sb); + FS_INFO_T *fsi = &(sbi->fsi); + const unsigned long blocksize = sb->s_blocksize; + const unsigned char blocksize_bits = sb->s_blocksize_bits; + sector_t last_block; + unsigned int cluster, clu_offset, sec_offset; + int err = 0; + + *phys = 0; + *mapped_blocks = 0; + + /* core code should handle EIO */ +#if 0 + if (fsi->prev_eio && BLOCK_ADDED(*create)) + return -EIO; +#endif + + if (((fsi->vol_type == FAT12) || (fsi->vol_type == FAT16)) && + (inode->i_ino == SDFAT_ROOT_INO)) { + if (sector < (fsi->dentries_in_root >> + (sb->s_blocksize_bits - DENTRY_SIZE_BITS))) { + *phys = sector + fsi->root_start_sector; + *mapped_blocks = 1; + } + return 0; + } + + last_block = (i_size_read(inode) + (blocksize - 1)) >> blocksize_bits; + if ((sector >= last_block) && (*create == BMAP_NOT_CREATE)) + return 0; + + /* Is this block already allocated? */ + clu_offset = sector >> fsi->sect_per_clus_bits; /* cluster offset */ + + SDFAT_I(inode)->fid.size = i_size_read(inode); + + + if (unlikely(__check_dfr_on(inode, + (loff_t)((loff_t)clu_offset << fsi->cluster_size_bits), + (loff_t)((loff_t)(clu_offset + 1) << fsi->cluster_size_bits), + __func__))) { + err = __do_dfr_map_cluster(inode, clu_offset, &cluster); + } else { + if (*create & BMAP_ADD_CLUSTER) + err = fsapi_map_clus(inode, clu_offset, &cluster, 1); + else + err = fsapi_map_clus(inode, clu_offset, &cluster, ALLOC_NOWHERE); + } + + if (err) { + if (err != -ENOSPC) + return -EIO; + return err; + } + + /* FOR BIGDATA */ + sdfat_statistics_set_rw(SDFAT_I(inode)->fid.flags, + clu_offset, *create & BMAP_ADD_CLUSTER); + + if (!IS_CLUS_EOF(cluster)) { + /* sector offset in cluster */ + sec_offset = sector & (fsi->sect_per_clus - 1); + + *phys = CLUS_TO_SECT(fsi, cluster) + sec_offset; + *mapped_blocks = fsi->sect_per_clus - sec_offset; + } +#if 0 + else { + /* Debug purpose (new clu needed) */ + ASSERT((*create & BMAP_ADD_CLUSTER) == 0); + ASSERT(sector >= last_block); + } +#endif + + if (sector < last_block) + *create = BMAP_NOT_CREATE; +#if 0 + else if (sector >= last_block) + *create = non-zero; + + if (iblock <= last mapped-block) + *phys != 0 + *create = BMAP_NOT_CREATE + else if (iblock <= last cluster) + *phys != 0 + *create = non-zero +#endif + return 0; +} + +static int sdfat_da_prep_block(struct inode *inode, sector_t iblock, + struct buffer_head *bh_result, int create) +{ + struct super_block *sb = inode->i_sb; + struct sdfat_sb_info *sbi = SDFAT_SB(sb); + FS_INFO_T *fsi = &(sbi->fsi); + unsigned long max_blocks = bh_result->b_size >> inode->i_blkbits; + unsigned long mapped_blocks; + sector_t phys; + loff_t pos; + int sec_offset; + int bmap_create = create ? BMAP_ADD_BLOCK : BMAP_NOT_CREATE; + int err = 0; + + __lock_super(sb); + + /* FAT32 only */ + ASSERT(fsi->vol_type == FAT32); + + err = sdfat_bmap(inode, iblock, &phys, &mapped_blocks, &bmap_create); + if (err) { + if (err != -ENOSPC) + sdfat_fs_error_ratelimit(sb, "%s: failed to bmap " + "(iblock:%u, err:%d)", __func__, + (u32)iblock, err); + goto unlock_ret; + } + + sec_offset = iblock & (fsi->sect_per_clus - 1); + + if (phys) { + /* the block in in the mapped cluster boundary */ + max_blocks = min(mapped_blocks, max_blocks); + map_bh(bh_result, sb, phys); + + BUG_ON(BLOCK_ADDED(bmap_create) && (sec_offset == 0)); + + } else if (create == 1) { + /* Not exist: new cluster needed */ + if (!BLOCK_ADDED(bmap_create)) { + sector_t last_block; + last_block = (i_size_read(inode) + (sb->s_blocksize - 1)) + >> sb->s_blocksize_bits; + sdfat_fs_error(sb, "%s: new cluster need, but " + "bmap_create == BMAP_NOT_CREATE(iblock:%lld, " + "last_block:%lld)", __func__, + (s64)iblock, (s64)last_block); + err = -EIO; + goto unlock_ret; + } + + // Reserved Cluster (only if iblock is the first sector in a clu) + if (sec_offset == 0) { + err = fsapi_reserve_clus(inode); + if (err) { + if (err != -ENOSPC) + sdfat_fs_error_ratelimit(sb, + "%s: failed to bmap " + "(iblock:%u, err:%d)", __func__, + (u32)iblock, err); + + goto unlock_ret; + } + } + + // Delayed mapping + map_bh(bh_result, sb, ~((sector_t) 0xffff)); + set_buffer_new(bh_result); + set_buffer_delay(bh_result); + + } else { + /* get_block on non-existing addr. with create==0 */ + /* + * CHECKME: + * i_size_aligned 보다 작으면 delay 매핑을 일단 + * 켜줘야되는 게 아닌가? + * - 0-fill 을 항상 하기에, FAT 에서는 문제 없음. + * 중간에 영역이 꽉 찼으면, 디스크에 내려가지 않고는 + * invalidate 될 일이 없음 + */ + goto unlock_ret; + } + + + /* Newly added blocks */ + if (BLOCK_ADDED(bmap_create)) { + set_buffer_new(bh_result); + + SDFAT_I(inode)->i_size_aligned += max_blocks << sb->s_blocksize_bits; + if (phys) { + /* i_size_ondisk changes if a block added in the existing cluster */ + #define num_clusters(value) ((value) ? (s32)((value - 1) >> fsi->cluster_size_bits) + 1 : 0) + + /* FOR GRACEFUL ERROR HANDLING */ + if (num_clusters(SDFAT_I(inode)->i_size_aligned) != + num_clusters(SDFAT_I(inode)->i_size_ondisk)) { + EMSG("%s: inode(%p) invalid size (create(%d) " + "bmap_create(%d) phys(%lld) aligned(%lld) " + "on_disk(%lld) iblock(%u) sec_off(%d))\n", + __func__, inode, create, bmap_create, (s64)phys, + (s64)SDFAT_I(inode)->i_size_aligned, + (s64)SDFAT_I(inode)->i_size_ondisk, + (u32)iblock, + (s32)sec_offset); + sdfat_debug_bug_on(1); + } + SDFAT_I(inode)->i_size_ondisk = SDFAT_I(inode)->i_size_aligned; + } + + pos = (iblock + 1) << sb->s_blocksize_bits; + /* Debug purpose - defensive coding */ + ASSERT(SDFAT_I(inode)->i_size_aligned == pos); + if (SDFAT_I(inode)->i_size_aligned < pos) + SDFAT_I(inode)->i_size_aligned = pos; + /* Debug end */ + +#ifdef CONFIG_SDFAT_TRACE_IO + /* New page added (ASSERTION: 8 blocks per page) */ + if ((sec_offset & 7) == 0) + sbi->stat_n_pages_added++; +#endif + } + + /* FOR GRACEFUL ERROR HANDLING */ + if (i_size_read(inode) > SDFAT_I(inode)->i_size_aligned) { + sdfat_fs_error_ratelimit(sb, "%s: invalid size (inode(%p), " + "size(%llu) > aligned(%llu)\n", __func__, inode, + i_size_read(inode), SDFAT_I(inode)->i_size_aligned); + sdfat_debug_bug_on(1); + } + + bh_result->b_size = max_blocks << sb->s_blocksize_bits; + +unlock_ret: + __unlock_super(sb); + return err; +} + +static int sdfat_get_block(struct inode *inode, sector_t iblock, + struct buffer_head *bh_result, int create) +{ + struct super_block *sb = inode->i_sb; + unsigned long max_blocks = bh_result->b_size >> inode->i_blkbits; + int err = 0; + unsigned long mapped_blocks; + sector_t phys; + loff_t pos; + int bmap_create = create ? BMAP_ADD_CLUSTER : BMAP_NOT_CREATE; + + __lock_super(sb); + err = sdfat_bmap(inode, iblock, &phys, &mapped_blocks, &bmap_create); + if (err) { + if (err != -ENOSPC) + sdfat_fs_error_ratelimit(sb, "%s: failed to bmap " + "(inode:%p iblock:%u, err:%d)", + __func__, inode, (u32)iblock, err); + goto unlock_ret; + } + + if (phys) { + max_blocks = min(mapped_blocks, max_blocks); + + /* Treat newly added block / cluster */ + if (BLOCK_ADDED(bmap_create) || buffer_delay(bh_result)) { + + /* Update i_size_ondisk */ + pos = (iblock + 1) << sb->s_blocksize_bits; + if (SDFAT_I(inode)->i_size_ondisk < pos) { + /* Debug purpose */ + if ((pos - SDFAT_I(inode)->i_size_ondisk) > bh_result->b_size) { + /* This never happens without DA */ + MMSG("Jumping get_block\n"); + } + + SDFAT_I(inode)->i_size_ondisk = pos; + sdfat_debug_check_clusters(inode); + } + + if (BLOCK_ADDED(bmap_create)) { + /* Old way (w/o DA) + * create == 1 only if iblock > i_size + * (in block unit) + */ + + /* 20130723 CHECK + * Truncate와 동시에 발생할 경우, + * i_size < (i_block 위치) 면서 buffer_delay()가 + * 켜져있을 수 있다. + * + * 기존에 할당된 영역을 다시 쓸 뿐이므로 큰 문제 + * 없지만, 그 경우, 미리 i_size_aligned 가 확장된 + * 영역이어야 한다. + */ + + /* FOR GRACEFUL ERROR HANDLING */ + if (buffer_delay(bh_result) && + (pos > SDFAT_I(inode)->i_size_aligned)) { + sdfat_fs_error(sb, "requested for bmap " + "out of range(pos:(%llu)>i_size_aligned(%llu)\n", + pos, SDFAT_I(inode)->i_size_aligned); + sdfat_debug_bug_on(1); + err = -EIO; + goto unlock_ret; + } + set_buffer_new(bh_result); + + /* + * adjust i_size_aligned if i_size_ondisk is + * bigger than it. (i.e. non-DA) + */ + if (SDFAT_I(inode)->i_size_ondisk > + SDFAT_I(inode)->i_size_aligned) { + SDFAT_I(inode)->i_size_aligned = + SDFAT_I(inode)->i_size_ondisk; + } + } + + if (buffer_delay(bh_result)) + clear_buffer_delay(bh_result); + +#if 0 + /* Debug purpose */ + if (SDFAT_I(inode)->i_size_ondisk > + SDFAT_I(inode)->i_size_aligned) { + /* Only after truncate + * and the two size variables should indicate + * same i_block + */ + unsigned int blocksize = 1 << inode->i_blkbits; + BUG_ON(SDFAT_I(inode)->i_size_ondisk - + SDFAT_I(inode)->i_size_aligned >= blocksize); + } +#endif + } + map_bh(bh_result, sb, phys); + } + + bh_result->b_size = max_blocks << sb->s_blocksize_bits; +unlock_ret: + __unlock_super(sb); + return err; +} + +static int sdfat_readpage(struct file *file, struct page *page) +{ + int ret; + + ret = mpage_readpage(page, sdfat_get_block); + return ret; +} + +static int sdfat_readpages(struct file *file, struct address_space *mapping, + struct list_head *pages, unsigned int nr_pages) +{ + int ret; + + ret = mpage_readpages(mapping, pages, nr_pages, sdfat_get_block); + return ret; +} + +static inline void sdfat_submit_fullpage_bio(struct block_device *bdev, + sector_t sector, unsigned int length, struct page *page) +{ + /* Single page bio submit */ + struct bio *bio; + + BUG_ON((length > PAGE_SIZE) || (length == 0)); + + /* + * If __GFP_WAIT is set, then bio_alloc will always be able to allocate + * a bio. This is due to the mempool guarantees. To make this work, callers + * must never allocate more than 1 bio at a time from this pool. + * + * #define GFP_NOIO (__GFP_WAIT) + */ + bio = bio_alloc(GFP_NOIO, 1); + + bio_set_dev(bio, bdev); + bio->bi_vcnt = 1; + bio->bi_io_vec[0].bv_page = page; /* Inline vec */ + bio->bi_io_vec[0].bv_len = length; /* PAGE_SIZE */ + bio->bi_io_vec[0].bv_offset = 0; + __sdfat_set_bio_iterate(bio, sector, length, 0, 0); + + bio->bi_end_io = sdfat_writepage_end_io; + __sdfat_submit_bio_write(bio); +} + +static int sdfat_writepage(struct page *page, struct writeback_control *wbc) +{ + struct inode * const inode = page->mapping->host; + struct super_block *sb = inode->i_sb; + loff_t i_size = i_size_read(inode); + const pgoff_t end_index = i_size >> PAGE_SHIFT; + const unsigned int blocks_per_page = PAGE_SIZE >> inode->i_blkbits; + FS_INFO_T *fsi = &(SDFAT_SB(sb)->fsi); + struct buffer_head *bh, *head; + sector_t block, block_0, last_phys; + int ret; + unsigned int nr_blocks_towrite = blocks_per_page; + + /* Don't distinguish 0-filled/clean block. + * Just write back the whole page + */ + if (fsi->cluster_size < PAGE_SIZE) + goto confused; + + if (!PageUptodate(page)) { + MMSG("%s: Not up-to-date page -> block_write_full_page\n", + __func__); + goto confused; + } + + if (page->index >= end_index) { + /* last page or outside i_size */ + unsigned int offset = i_size & (PAGE_SIZE-1); + + /* If a truncation is in progress */ + if (page->index > end_index || !offset) + goto confused; + + /* 0-fill after i_size */ + zero_user_segment(page, offset, PAGE_SIZE); + } + + if (!page_has_buffers(page)) { + MMSG("WP: No buffers -> block_write_full_page\n"); + goto confused; + } + + block = (sector_t)page->index << (PAGE_SHIFT - inode->i_blkbits); + block_0 = block; /* first block */ + head = page_buffers(page); + bh = head; + + last_phys = 0; + do { + BUG_ON(buffer_locked(bh)); + + if (!buffer_dirty(bh) || !buffer_uptodate(bh)) { + if (nr_blocks_towrite == blocks_per_page) + nr_blocks_towrite = (unsigned int) (block - block_0); + + BUG_ON(nr_blocks_towrite >= blocks_per_page); + + // !uptodate but dirty?? + if (buffer_dirty(bh)) + goto confused; + + // Nothing to writeback in this block + bh = bh->b_this_page; + block++; + continue; + } + + if (nr_blocks_towrite != blocks_per_page) + // Dirty -> Non-dirty -> Dirty again case + goto confused; + + /* Map if needed */ + if (!buffer_mapped(bh) || buffer_delay(bh)) { + BUG_ON(bh->b_size != (1 << (inode->i_blkbits))); + ret = sdfat_get_block(inode, block, bh, 1); + if (ret) + goto confused; + + if (buffer_new(bh)) { + clear_buffer_new(bh); + __sdfat_clean_bdev_aliases(bh->b_bdev, bh->b_blocknr); + } + } + + /* continuity check */ + if (((last_phys + 1) != bh->b_blocknr) && (last_phys != 0)) { + DMSG("Non-contiguous block mapping in single page"); + goto confused; + } + + last_phys = bh->b_blocknr; + bh = bh->b_this_page; + block++; + } while (bh != head); + + if (nr_blocks_towrite == 0) { + DMSG("Page dirty but no dirty bh? alloc_208\n"); + goto confused; + } + + + /* Write-back */ + do { + clear_buffer_dirty(bh); + bh = bh->b_this_page; + } while (bh != head); + + BUG_ON(PageWriteback(page)); + set_page_writeback(page); + + /** + * Turn off MAPPED flag in victim's bh if defrag on. + * Another write_begin can starts after get_block for defrag victims called. + * In this case, write_begin calls get_block and get original block number + * and previous defrag will be canceled. + */ + if (unlikely(__check_dfr_on(inode, + (loff_t)(page->index << PAGE_SHIFT), + (loff_t)((page->index + 1) << PAGE_SHIFT), + __func__))) { + do { + clear_buffer_mapped(bh); + bh = bh->b_this_page; + } while (bh != head); + } + + // Trace # of pages queued (Approx.) + atomic_inc(&SDFAT_SB(sb)->stat_n_pages_queued); + + sdfat_submit_fullpage_bio(head->b_bdev, + head->b_blocknr << (sb->s_blocksize_bits - SECTOR_SIZE_BITS), + nr_blocks_towrite << inode->i_blkbits, + page); + + unlock_page(page); + + return 0; + +confused: +#ifdef CONFIG_SDFAT_TRACE_IO + SDFAT_SB(sb)->stat_n_pages_confused++; +#endif + ret = block_write_full_page(page, sdfat_get_block, wbc); + return ret; +} + +static int sdfat_da_writepages(struct address_space *mapping, + struct writeback_control *wbc) +{ + MMSG("%s(inode:%p) with nr_to_write = 0x%08lx " + "(ku %d, bg %d, tag %d, rc %d )\n", + __func__, mapping->host, wbc->nr_to_write, + wbc->for_kupdate, wbc->for_background, wbc->tagged_writepages, + wbc->for_reclaim); + + ASSERT(mapping->a_ops == &sdfat_da_aops); + +#ifdef CONFIG_SDFAT_ALIGNED_MPAGE_WRITE + if (SDFAT_SB(mapping->host->i_sb)->options.adj_req) + return sdfat_mpage_writepages(mapping, wbc, sdfat_get_block); +#endif + return generic_writepages(mapping, wbc); +} + +static int sdfat_writepages(struct address_space *mapping, + struct writeback_control *wbc) +{ + MMSG("%s(inode:%p) with nr_to_write = 0x%08lx " + "(ku %d, bg %d, tag %d, rc %d )\n", + __func__, mapping->host, wbc->nr_to_write, + wbc->for_kupdate, wbc->for_background, wbc->tagged_writepages, + wbc->for_reclaim); + + ASSERT(mapping->a_ops == &sdfat_aops); + +#ifdef CONFIG_SDFAT_ALIGNED_MPAGE_WRITE + if (SDFAT_SB(mapping->host->i_sb)->options.adj_req) + return sdfat_mpage_writepages(mapping, wbc, sdfat_get_block); +#endif + return mpage_writepages(mapping, wbc, sdfat_get_block); +} + +static void sdfat_write_failed(struct address_space *mapping, loff_t to) +{ + struct inode *inode = mapping->host; + + if (to > i_size_read(inode)) { + __sdfat_truncate_pagecache(inode, to, i_size_read(inode)); + sdfat_truncate(inode, SDFAT_I(inode)->i_size_aligned); + } +} + +static int sdfat_check_writable(struct super_block *sb) +{ + if (fsapi_check_bdi_valid(sb)) + return -EIO; + + if (sb->s_flags & MS_RDONLY) + return -EROFS; + + return 0; +} + +static int __sdfat_write_begin(struct file *file, struct address_space *mapping, + loff_t pos, unsigned int len, + unsigned int flags, struct page **pagep, + void **fsdata, get_block_t *get_block, + loff_t *bytes, const char *fname) +{ + struct super_block *sb = mapping->host->i_sb; + int ret; + + __cancel_dfr_work(mapping->host, pos, (loff_t)(pos + len), fname); + + ret = sdfat_check_writable(sb); + if (unlikely(ret < 0)) + return ret; + + *pagep = NULL; + ret = cont_write_begin(file, mapping, pos, len, flags, pagep, fsdata, + get_block, bytes); + + if (ret < 0) + sdfat_write_failed(mapping, pos+len); + + return ret; +} + + +static int sdfat_da_write_begin(struct file *file, struct address_space *mapping, + loff_t pos, unsigned int len, unsigned int flags, + struct page **pagep, void **fsdata) +{ + return __sdfat_write_begin(file, mapping, pos, len, flags, + pagep, fsdata, sdfat_da_prep_block, + &SDFAT_I(mapping->host)->i_size_aligned, + __func__); +} + + +static int sdfat_write_begin(struct file *file, struct address_space *mapping, + loff_t pos, unsigned int len, unsigned int flags, + struct page **pagep, void **fsdata) +{ + return __sdfat_write_begin(file, mapping, pos, len, flags, + pagep, fsdata, sdfat_get_block, + &SDFAT_I(mapping->host)->i_size_ondisk, + __func__); +} + +static int sdfat_write_end(struct file *file, struct address_space *mapping, + loff_t pos, unsigned int len, unsigned int copied, + struct page *pagep, void *fsdata) +{ + struct inode *inode = mapping->host; + FILE_ID_T *fid = &(SDFAT_I(inode)->fid); + int err; + + err = generic_write_end(file, mapping, pos, len, copied, pagep, fsdata); + + /* FOR GRACEFUL ERROR HANDLING */ + if (SDFAT_I(inode)->i_size_aligned < i_size_read(inode)) { + sdfat_fs_error(inode->i_sb, "invalid size(size(%llu) " + "> aligned(%llu)\n", i_size_read(inode), + SDFAT_I(inode)->i_size_aligned); + sdfat_debug_bug_on(1); + } + + if (err < len) + sdfat_write_failed(mapping, pos+len); + + if (!(err < 0) && !(fid->attr & ATTR_ARCHIVE)) { + inode->i_mtime = inode->i_ctime = CURRENT_TIME_SEC; + fid->attr |= ATTR_ARCHIVE; + mark_inode_dirty(inode); + } + + return err; +} + +static inline ssize_t __sdfat_direct_IO(int rw, struct kiocb *iocb, + struct inode *inode, void *iov_u, loff_t offset, + loff_t count, unsigned long nr_segs) +{ + struct address_space *mapping = inode->i_mapping; + loff_t size = offset + count; + ssize_t ret; + + if (rw == WRITE) { + /* + * FIXME: blockdev_direct_IO() doesn't use ->write_begin(), + * so we need to update the ->i_size_aligned to block boundary. + * + * But we must fill the remaining area or hole by nul for + * updating ->i_size_aligned + * + * Return 0, and fallback to normal buffered write. + */ + if (SDFAT_I(inode)->i_size_aligned < size) + return 0; + } + + /* + * sdFAT need to use the DIO_LOCKING for avoiding the race + * condition of sdfat_get_block() and ->truncate(). + */ + ret = __sdfat_blkdev_direct_IO(rw, iocb, inode, iov_u, offset, nr_segs); + if (ret < 0 && (rw & WRITE)) + sdfat_write_failed(mapping, size); + + return ret; +} + +static const struct address_space_operations sdfat_aops = { + .readpage = sdfat_readpage, + .readpages = sdfat_readpages, + .writepage = sdfat_writepage, + .writepages = sdfat_writepages, + .write_begin = sdfat_write_begin, + .write_end = sdfat_write_end, + .direct_IO = sdfat_direct_IO, + .bmap = sdfat_aop_bmap +}; + +static const struct address_space_operations sdfat_da_aops = { + .readpage = sdfat_readpage, + .readpages = sdfat_readpages, + .writepage = sdfat_writepage, + .writepages = sdfat_da_writepages, + .write_begin = sdfat_da_write_begin, + .write_end = sdfat_write_end, + .direct_IO = sdfat_direct_IO, + .bmap = sdfat_aop_bmap +}; + +/*======================================================================*/ +/* Super Operations */ +/*======================================================================*/ + +static inline unsigned long sdfat_hash(loff_t i_pos) +{ + return hash_32(i_pos, SDFAT_HASH_BITS); +} + +static void sdfat_attach(struct inode *inode, loff_t i_pos) +{ + struct sdfat_sb_info *sbi = SDFAT_SB(inode->i_sb); + struct hlist_head *head = sbi->inode_hashtable + sdfat_hash(i_pos); + + spin_lock(&sbi->inode_hash_lock); + SDFAT_I(inode)->i_pos = i_pos; + hlist_add_head(&SDFAT_I(inode)->i_hash_fat, head); + spin_unlock(&sbi->inode_hash_lock); +} + +static void sdfat_detach(struct inode *inode) +{ + struct sdfat_sb_info *sbi = SDFAT_SB(inode->i_sb); + + spin_lock(&sbi->inode_hash_lock); + hlist_del_init(&SDFAT_I(inode)->i_hash_fat); + SDFAT_I(inode)->i_pos = 0; + spin_unlock(&sbi->inode_hash_lock); +} + + +/* doesn't deal with root inode */ +static int sdfat_fill_inode(struct inode *inode, const FILE_ID_T *fid) +{ + struct sdfat_sb_info *sbi = SDFAT_SB(inode->i_sb); + FS_INFO_T *fsi = &(sbi->fsi); + DIR_ENTRY_T info; + u64 size = fid->size; + + memcpy(&(SDFAT_I(inode)->fid), fid, sizeof(FILE_ID_T)); + + SDFAT_I(inode)->i_pos = 0; + SDFAT_I(inode)->target = NULL; + inode->i_uid = sbi->options.fs_uid; + inode->i_gid = sbi->options.fs_gid; + inode_inc_iversion(inode); + inode->i_generation = get_seconds(); + + if (fsapi_read_inode(inode, &info) < 0) { + MMSG("%s: failed to read stat!\n", __func__); + return -EIO; + } + + if (info.Attr & ATTR_SUBDIR) { /* directory */ + inode->i_generation &= ~1; + inode->i_mode = sdfat_make_mode(sbi, info.Attr, S_IRWXUGO); + inode->i_op = &sdfat_dir_inode_operations; + inode->i_fop = &sdfat_dir_operations; + + set_nlink(inode, info.NumSubdirs); + } else if (info.Attr & ATTR_SYMLINK) { /* symbolic link */ + inode->i_op = &sdfat_symlink_inode_operations; + inode->i_generation |= 1; + inode->i_mode = sdfat_make_mode(sbi, info.Attr, S_IRWXUGO); + } else { /* regular file */ + inode->i_generation |= 1; + inode->i_mode = sdfat_make_mode(sbi, info.Attr, S_IRWXUGO); + inode->i_op = &sdfat_file_inode_operations; + inode->i_fop = &sdfat_file_operations; + + if (sbi->options.improved_allocation & SDFAT_ALLOC_DELAY) + inode->i_mapping->a_ops = &sdfat_da_aops; + else + inode->i_mapping->a_ops = &sdfat_aops; + + inode->i_mapping->nrpages = 0; + + } + + /* + * Use fid->size instead of info.Size + * because info.Size means the value saved on disk + */ + i_size_write(inode, size); + + /* ondisk and aligned size should be aligned with block size */ + if (size & (inode->i_sb->s_blocksize - 1)) { + size |= (inode->i_sb->s_blocksize - 1); + size++; + } + + SDFAT_I(inode)->i_size_aligned = size; + SDFAT_I(inode)->i_size_ondisk = size; + sdfat_debug_check_clusters(inode); + + sdfat_save_attr(inode, info.Attr); + + inode->i_blocks = ((i_size_read(inode) + (fsi->cluster_size - 1)) + & ~((loff_t)fsi->cluster_size - 1)) >> inode->i_blkbits; + + sdfat_time_fat2unix(sbi, &inode->i_mtime, &info.ModifyTimestamp); + sdfat_time_fat2unix(sbi, &inode->i_ctime, &info.CreateTimestamp); + sdfat_time_fat2unix(sbi, &inode->i_atime, &info.AccessTimestamp); + + __init_dfr_info(inode); + + return 0; +} + +static struct inode *sdfat_build_inode(struct super_block *sb, + const FILE_ID_T *fid, loff_t i_pos) { + struct inode *inode; + int err; + + inode = sdfat_iget(sb, i_pos); + if (inode) + goto out; + inode = new_inode(sb); + if (!inode) { + inode = ERR_PTR(-ENOMEM); + goto out; + } + inode->i_ino = iunique(sb, SDFAT_ROOT_INO); + inode_set_iversion(inode, 1); + err = sdfat_fill_inode(inode, fid); + if (err) { + iput(inode); + inode = ERR_PTR(err); + goto out; + } + sdfat_attach(inode, i_pos); + insert_inode_hash(inode); +out: + return inode; +} + +static struct inode *sdfat_alloc_inode(struct super_block *sb) +{ + struct sdfat_inode_info *ei; + + ei = kmem_cache_alloc(sdfat_inode_cachep, GFP_NOFS); + if (!ei) + return NULL; +#if LINUX_VERSION_CODE >= KERNEL_VERSION(3, 4, 0) + init_rwsem(&ei->truncate_lock); +#endif + return &ei->vfs_inode; +} + +static void sdfat_destroy_inode(struct inode *inode) +{ + if (SDFAT_I(inode)->target) { + kfree(SDFAT_I(inode)->target); + SDFAT_I(inode)->target = NULL; + } + + kmem_cache_free(sdfat_inode_cachep, SDFAT_I(inode)); +} + +static int __sdfat_write_inode(struct inode *inode, int sync) +{ + struct super_block *sb = inode->i_sb; + struct sdfat_sb_info *sbi = SDFAT_SB(sb); + DIR_ENTRY_T info; + + if (inode->i_ino == SDFAT_ROOT_INO) + return 0; + + info.Attr = sdfat_make_attr(inode); + info.Size = i_size_read(inode); + + sdfat_time_unix2fat(sbi, &inode->i_mtime, &info.ModifyTimestamp); + sdfat_time_unix2fat(sbi, &inode->i_ctime, &info.CreateTimestamp); + sdfat_time_unix2fat(sbi, &inode->i_atime, &info.AccessTimestamp); + + if (!__support_write_inode_sync(sb)) + sync = 0; + + /* FIXME : Do we need handling error? */ + return fsapi_write_inode(inode, &info, sync); +} + +static int sdfat_sync_inode(struct inode *inode) +{ + return __sdfat_write_inode(inode, 1); +} + +static int sdfat_write_inode(struct inode *inode, struct writeback_control *wbc) +{ + return __sdfat_write_inode(inode, wbc->sync_mode == WB_SYNC_ALL); +} + +static void sdfat_evict_inode(struct inode *inode) +{ + truncate_inode_pages(&inode->i_data, 0); + + if (!inode->i_nlink) { + loff_t old_size = i_size_read(inode); + + i_size_write(inode, 0); + + SDFAT_I(inode)->fid.size = old_size; + + __cancel_dfr_work(inode, 0, (loff_t)old_size, __func__); + + /* TO CHECK evicting directory works correctly */ + MMSG("%s: inode(%p) evict %s (size(%llu) to zero)\n", + __func__, inode, + S_ISDIR(inode->i_mode) ? "directory" : "file", + (u64)old_size); + fsapi_truncate(inode, old_size, 0); + } + + invalidate_inode_buffers(inode); + clear_inode(inode); + fsapi_invalidate_extent(inode); + sdfat_detach(inode); + + /* after end of this function, caller will remove inode hash */ + /* remove_inode_hash(inode); */ +} + + + +static void sdfat_put_super(struct super_block *sb) +{ + struct sdfat_sb_info *sbi = SDFAT_SB(sb); + int err; + + sdfat_log_msg(sb, KERN_INFO, "trying to unmount..."); + + __cancel_delayed_work_sync(sbi); + + if (__is_sb_dirty(sb)) + sdfat_write_super(sb); + + __free_dfr_mem_if_required(sb); + err = fsapi_umount(sb); + + if (sbi->nls_disk) { + unload_nls(sbi->nls_disk); + sbi->nls_disk = NULL; + sbi->options.codepage = sdfat_default_codepage; + } + if (sbi->nls_io) { + unload_nls(sbi->nls_io); + sbi->nls_io = NULL; + } + if (sbi->options.iocharset != sdfat_default_iocharset) { + kfree(sbi->options.iocharset); + sbi->options.iocharset = sdfat_default_iocharset; + } + + sb->s_fs_info = NULL; + + kobject_del(&sbi->sb_kobj); + kobject_put(&sbi->sb_kobj); + if (!sbi->use_vmalloc) + kfree(sbi); + else + vfree(sbi); + + sdfat_log_msg(sb, KERN_INFO, "unmounted successfully! %s", + err ? "(with previous I/O errors)" : ""); +} + +static inline void __flush_delayed_meta(struct super_block *sb, s32 sync) +{ +#ifdef CONFIG_SDFAT_DELAYED_META_DIRTY + fsapi_cache_flush(sb, sync); +#else + /* DO NOTHING */ +#endif +} + +static void sdfat_write_super(struct super_block *sb) +{ + int time = 0; + + __lock_super(sb); + + __set_sb_clean(sb); + +#ifdef CONFIG_SDFAT_DFR + if (atomic_read(&(SDFAT_SB(sb)->dfr_info.stat)) == DFR_SB_STAT_VALID) + fsapi_dfr_update_fat_next(sb); +#endif + + /* flush delayed FAT/DIR dirty */ + __flush_delayed_meta(sb, 0); + + if (!(sb->s_flags & MS_RDONLY)) + fsapi_sync_fs(sb, 0); + + __unlock_super(sb); + + time = jiffies; + + /* Issuing bdev requests is needed + * to guarantee DIR updates in time + * whether w/ or w/o delayed DIR dirty feature. + * (otherwise DIR updates could be delayed for 5 + 5 secs at max.) + */ + sync_blockdev(sb->s_bdev); + +#if (defined(CONFIG_SDFAT_DFR) && defined(CONFIG_SDFAT_DFR_DEBUG)) + /* SPO test */ + fsapi_dfr_spo_test(sb, DFR_SPO_FAT_NEXT, __func__); +#endif + MMSG("BD: sdfat_write_super (bdev_sync for %ld ms)\n", + (jiffies - time) * 1000 / HZ); +} + + +static void __dfr_update_fat_next(struct super_block *sb) +{ +#ifdef CONFIG_SDFAT_DFR + struct sdfat_sb_info *sbi = SDFAT_SB(sb); + + if (sbi->options.defrag && + (atomic_read(&sbi->dfr_info.stat) == DFR_SB_STAT_VALID)) { + fsapi_dfr_update_fat_next(sb); + } +#endif +} + +static void __dfr_update_fat_prev(struct super_block *sb, int wait) +{ +#ifdef CONFIG_SDFAT_DFR + struct sdfat_sb_info *sbi = SDFAT_SB(sb); + struct defrag_info *sb_dfr = &sbi->dfr_info; + /* static time available? */ + static int time; /* initialized by zero */ + int uevent = 0, total = 0, clean = 0, full = 0; + int spent = jiffies - time; + + if (!(sbi->options.defrag && wait)) + return; + + __lock_super(sb); + /* Update FAT for defrag */ + if (atomic_read(&(sbi->dfr_info.stat)) == DFR_SB_STAT_VALID) { + + fsapi_dfr_update_fat_prev(sb, 0); + + /* flush delayed FAT/DIR dirty */ + __flush_delayed_meta(sb, 0); + + /* Complete defrag req */ + fsapi_sync_fs(sb, 1); + atomic_set(&sb_dfr->stat, DFR_SB_STAT_REQ); + complete_all(&sbi->dfr_complete); + } else if (((spent < 0) || (spent > DFR_DEFAULT_TIMEOUT)) && + (atomic_read(&(sbi->dfr_info.stat)) == DFR_SB_STAT_IDLE)) { + uevent = fsapi_dfr_check_dfr_required(sb, &total, &clean, &full); + time = jiffies; + } + __unlock_super(sb); + + if (uevent) { + kobject_uevent(&SDFAT_SB(sb)->sb_kobj, KOBJ_CHANGE); + dfr_debug("uevent for defrag_daemon, total_au %d, " + "clean_au %d, full_au %d", total, clean, full); + } +#endif +} + +static int sdfat_sync_fs(struct super_block *sb, int wait) +{ + int err = 0; + + /* If there are some dirty buffers in the bdev inode */ + if (__is_sb_dirty(sb)) { + __lock_super(sb); + __set_sb_clean(sb); + + __dfr_update_fat_next(sb); + + err = fsapi_sync_fs(sb, 1); + +#if (defined(CONFIG_SDFAT_DFR) && defined(CONFIG_SDFAT_DFR_DEBUG)) + /* SPO test */ + fsapi_dfr_spo_test(sb, DFR_SPO_FAT_NEXT, __func__); +#endif + + __unlock_super(sb); + } + + __dfr_update_fat_prev(sb, wait); + + return err; +} + +static int sdfat_statfs(struct dentry *dentry, struct kstatfs *buf) +{ + /* + * patch 1.2.2 : + * fixed the slow-call problem because of volume-lock contention. + */ + struct super_block *sb = dentry->d_sb; + u64 id = huge_encode_dev(sb->s_bdev->bd_dev); + FS_INFO_T *fsi = &(SDFAT_SB(sb)->fsi); + VOL_INFO_T info; + + /* fsapi_statfs will try to get a volume lock if needed */ + if (fsapi_statfs(sb, &info)) + return -EIO; + + if (fsi->prev_eio) + sdfat_msg(sb, KERN_INFO, "called statfs with previous" + " I/O error(0x%02X).", fsi->prev_eio); + + buf->f_type = sb->s_magic; + buf->f_bsize = info.ClusterSize; + buf->f_blocks = info.NumClusters; + buf->f_bfree = info.FreeClusters; + buf->f_bavail = info.FreeClusters; + buf->f_fsid.val[0] = (u32)id; + buf->f_fsid.val[1] = (u32)(id >> 32); + buf->f_namelen = 260; + + return 0; +} + +static int sdfat_remount(struct super_block *sb, int *flags, char *data) +{ + unsigned long prev_sb_flags; + char *orig_data = kstrdup(data, GFP_KERNEL); + struct sdfat_sb_info *sbi = SDFAT_SB(sb); + FS_INFO_T *fsi = &(sbi->fsi); + + *flags |= MS_NODIRATIME; + + prev_sb_flags = sb->s_flags; + + sdfat_remount_syncfs(sb); + + fsapi_set_vol_flags(sb, VOL_CLEAN, 1); + + sdfat_log_msg(sb, KERN_INFO, "re-mounted(%s->%s), eio=0x%x, Opts: %s", + (prev_sb_flags & MS_RDONLY) ? "ro" : "rw", + (*flags & MS_RDONLY) ? "ro" : "rw", + fsi->prev_eio, orig_data); + kfree(orig_data); + return 0; +} + +static int __sdfat_show_options(struct seq_file *m, struct super_block *sb) +{ + struct sdfat_sb_info *sbi = SDFAT_SB(sb); + struct sdfat_mount_options *opts = &sbi->options; + FS_INFO_T *fsi = &(sbi->fsi); + + /* Show partition info */ + seq_printf(m, ",fs=%s", sdfat_get_vol_type_str(fsi->vol_type)); + if (fsi->prev_eio) + seq_printf(m, ",eio=0x%x", fsi->prev_eio); + if (!uid_eq(opts->fs_uid, GLOBAL_ROOT_UID)) + seq_printf(m, ",uid=%u", + from_kuid_munged(&init_user_ns, opts->fs_uid)); + if (!gid_eq(opts->fs_gid, GLOBAL_ROOT_GID)) + seq_printf(m, ",gid=%u", + from_kgid_munged(&init_user_ns, opts->fs_gid)); + seq_printf(m, ",fmask=%04o", opts->fs_fmask); + seq_printf(m, ",dmask=%04o", opts->fs_dmask); + if (opts->allow_utime) + seq_printf(m, ",allow_utime=%04o", opts->allow_utime); + if (sbi->nls_disk) + seq_printf(m, ",codepage=%s", sbi->nls_disk->charset); + if (sbi->nls_io) + seq_printf(m, ",iocharset=%s", sbi->nls_io->charset); + if (opts->utf8) + seq_puts(m, ",utf8"); + if (sbi->fsi.vol_type != EXFAT) + seq_puts(m, ",shortname=winnt"); + seq_printf(m, ",namecase=%u", opts->casesensitive); + if (opts->tz_utc) + seq_puts(m, ",tz=UTC"); + if (opts->improved_allocation & SDFAT_ALLOC_DELAY) + seq_puts(m, ",delay"); + if (opts->improved_allocation & SDFAT_ALLOC_SMART) + seq_printf(m, ",smart,ausize=%u", opts->amap_opt.sect_per_au); + if (opts->defrag) + seq_puts(m, ",defrag"); + if (opts->adj_hidsect) + seq_puts(m, ",adj_hid"); + if (opts->adj_req) + seq_puts(m, ",adj_req"); + seq_printf(m, ",symlink=%u", opts->symlink); + seq_printf(m, ",bps=%ld", sb->s_blocksize); + if (opts->errors == SDFAT_ERRORS_CONT) + seq_puts(m, ",errors=continue"); + else if (opts->errors == SDFAT_ERRORS_PANIC) + seq_puts(m, ",errors=panic"); + else + seq_puts(m, ",errors=remount-ro"); + if (opts->discard) + seq_puts(m, ",discard"); + + return 0; +} + +static const struct super_operations sdfat_sops = { + .alloc_inode = sdfat_alloc_inode, + .destroy_inode = sdfat_destroy_inode, + .write_inode = sdfat_write_inode, + .evict_inode = sdfat_evict_inode, + .put_super = sdfat_put_super, +#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 7, 0) + .write_super = sdfat_write_super, +#endif + .sync_fs = sdfat_sync_fs, + .statfs = sdfat_statfs, + .remount_fs = sdfat_remount, + .show_options = sdfat_show_options, +}; + +/*======================================================================*/ +/* SYSFS Operations */ +/*======================================================================*/ +#define SDFAT_ATTR(name, mode, show, store) \ +static struct sdfat_attr sdfat_attr_##name = __ATTR(name, mode, show, store) + +struct sdfat_attr { + struct attribute attr; + ssize_t (*show)(struct sdfat_sb_info *, char *); + ssize_t (*store)(struct sdfat_sb_info *, const char *, size_t); +}; + +static ssize_t sdfat_attr_show(struct kobject *kobj, struct attribute *attr, char *buf) +{ + struct sdfat_sb_info *sbi = container_of(kobj, struct sdfat_sb_info, sb_kobj); + struct sdfat_attr *a = container_of(attr, struct sdfat_attr, attr); + + return a->show ? a->show(sbi, buf) : 0; +} + +static ssize_t sdfat_attr_store(struct kobject *kobj, struct attribute *attr, + const char *buf, size_t len) +{ + struct sdfat_sb_info *sbi = container_of(kobj, struct sdfat_sb_info, sb_kobj); + struct sdfat_attr *a = container_of(attr, struct sdfat_attr, attr); + + return a->store ? a->store(sbi, buf, len) : len; +} + +static const struct sysfs_ops sdfat_attr_ops = { + .show = sdfat_attr_show, + .store = sdfat_attr_store, +}; + + +static ssize_t type_show(struct sdfat_sb_info *sbi, char *buf) +{ + FS_INFO_T *fsi = &(sbi->fsi); + + return snprintf(buf, PAGE_SIZE, "%s\n", sdfat_get_vol_type_str(fsi->vol_type)); +} +SDFAT_ATTR(type, 0444, type_show, NULL); + +static ssize_t eio_show(struct sdfat_sb_info *sbi, char *buf) +{ + FS_INFO_T *fsi = &(sbi->fsi); + + return snprintf(buf, PAGE_SIZE, "0x%x\n", fsi->prev_eio); +} +SDFAT_ATTR(eio, 0444, eio_show, NULL); + +static ssize_t fratio_show(struct sdfat_sb_info *sbi, char *buf) +{ + unsigned int n_total_au = 0; + unsigned int n_clean_au = 0; + unsigned int n_full_au = 0; + unsigned int n_dirty_au = 0; + unsigned int fr = 0; + + n_total_au = fsapi_get_au_stat(sbi->host_sb, VOL_AU_STAT_TOTAL); + n_clean_au = fsapi_get_au_stat(sbi->host_sb, VOL_AU_STAT_CLEAN); + n_full_au = fsapi_get_au_stat(sbi->host_sb, VOL_AU_STAT_FULL); + n_dirty_au = n_total_au - (n_full_au + n_clean_au); + + if (!n_dirty_au) + fr = 0; + else if (!n_clean_au) + fr = 100; + else + fr = (n_dirty_au * 100) / (n_clean_au + n_dirty_au); + + return snprintf(buf, PAGE_SIZE, "%u\n", fr); +} +SDFAT_ATTR(fratio, 0444, fratio_show, NULL); + +static ssize_t totalau_show(struct sdfat_sb_info *sbi, char *buf) +{ + unsigned int n_au = 0; + + n_au = fsapi_get_au_stat(sbi->host_sb, VOL_AU_STAT_TOTAL); + return snprintf(buf, PAGE_SIZE, "%u\n", n_au); +} +SDFAT_ATTR(totalau, 0444, totalau_show, NULL); + +static ssize_t cleanau_show(struct sdfat_sb_info *sbi, char *buf) +{ + unsigned int n_clean_au = 0; + + n_clean_au = fsapi_get_au_stat(sbi->host_sb, VOL_AU_STAT_CLEAN); + return snprintf(buf, PAGE_SIZE, "%u\n", n_clean_au); +} +SDFAT_ATTR(cleanau, 0444, cleanau_show, NULL); + +static ssize_t fullau_show(struct sdfat_sb_info *sbi, char *buf) +{ + unsigned int n_full_au = 0; + + n_full_au = fsapi_get_au_stat(sbi->host_sb, VOL_AU_STAT_FULL); + return snprintf(buf, PAGE_SIZE, "%u\n", n_full_au); +} +SDFAT_ATTR(fullau, 0444, fullau_show, NULL); + +static struct attribute *sdfat_attrs[] = { + &sdfat_attr_type.attr, + &sdfat_attr_eio.attr, + &sdfat_attr_fratio.attr, + &sdfat_attr_totalau.attr, + &sdfat_attr_cleanau.attr, + &sdfat_attr_fullau.attr, + NULL, +}; + +static struct kobj_type sdfat_ktype = { + .default_attrs = sdfat_attrs, + .sysfs_ops = &sdfat_attr_ops, +}; + +static ssize_t version_show(struct kobject *kobj, + struct kobj_attribute *attr, char *buff) +{ + return snprintf(buff, PAGE_SIZE, "FS Version %s\n", SDFAT_VERSION); +} + +static struct kobj_attribute version_attr = __ATTR_RO(version); + +static struct attribute *attributes[] = { + &version_attr.attr, + NULL, +}; + +static struct attribute_group attr_group = { + .attrs = attributes, +}; + +/*======================================================================*/ +/* Super Block Read Operations */ +/*======================================================================*/ + +enum { + Opt_uid, + Opt_gid, + Opt_umask, + Opt_dmask, + Opt_fmask, + Opt_allow_utime, + Opt_codepage, + Opt_charset, + Opt_utf8, + Opt_namecase, + Opt_tz_utc, + Opt_adj_hidsect, + Opt_delay, + Opt_smart, + Opt_ausize, + Opt_packing, + Opt_defrag, + Opt_symlink, + Opt_debug, + Opt_err_cont, + Opt_err_panic, + Opt_err_ro, + Opt_err, + Opt_discard, + Opt_fs, + Opt_adj_req, +#ifdef CONFIG_SDFAT_USE_FOR_VFAT + Opt_shortname_lower, + Opt_shortname_win95, + Opt_shortname_winnt, + Opt_shortname_mixed, +#endif /* CONFIG_SDFAT_USE_FOR_VFAT */ +}; + +static const match_table_t sdfat_tokens = { + {Opt_uid, "uid=%u"}, + {Opt_gid, "gid=%u"}, + {Opt_umask, "umask=%o"}, + {Opt_dmask, "dmask=%o"}, + {Opt_fmask, "fmask=%o"}, + {Opt_allow_utime, "allow_utime=%o"}, + {Opt_codepage, "codepage=%u"}, + {Opt_charset, "iocharset=%s"}, + {Opt_utf8, "utf8"}, + {Opt_namecase, "namecase=%u"}, + {Opt_tz_utc, "tz=UTC"}, + {Opt_adj_hidsect, "adj_hid"}, + {Opt_delay, "delay"}, + {Opt_smart, "smart"}, + {Opt_ausize, "ausize=%u"}, + {Opt_packing, "packing=%u"}, + {Opt_defrag, "defrag"}, + {Opt_symlink, "symlink=%u"}, + {Opt_debug, "debug"}, + {Opt_err_cont, "errors=continue"}, + {Opt_err_panic, "errors=panic"}, + {Opt_err_ro, "errors=remount-ro"}, + {Opt_discard, "discard"}, + {Opt_fs, "fs=%s"}, + {Opt_adj_req, "adj_req"}, +#ifdef CONFIG_SDFAT_USE_FOR_VFAT + {Opt_shortname_lower, "shortname=lower"}, + {Opt_shortname_win95, "shortname=win95"}, + {Opt_shortname_winnt, "shortname=winnt"}, + {Opt_shortname_mixed, "shortname=mixed"}, +#endif /* CONFIG_SDFAT_USE_FOR_VFAT */ + {Opt_err, NULL} +}; + +static int parse_options(struct super_block *sb, char *options, int silent, + int *debug, struct sdfat_mount_options *opts) +{ + char *p; + substring_t args[MAX_OPT_ARGS]; + int option, i; + char *tmpstr; + + opts->fs_uid = current_uid(); + opts->fs_gid = current_gid(); + opts->fs_fmask = opts->fs_dmask = current->fs->umask; + opts->allow_utime = (unsigned short) -1; + opts->codepage = sdfat_default_codepage; + opts->iocharset = sdfat_default_iocharset; + opts->casesensitive = 0; + opts->utf8 = 0; + opts->adj_hidsect = 0; + opts->tz_utc = 0; + opts->improved_allocation = 0; + opts->amap_opt.pack_ratio = 0; // Default packing + opts->amap_opt.sect_per_au = 0; + opts->amap_opt.misaligned_sect = 0; + opts->symlink = 0; + opts->errors = SDFAT_ERRORS_RO; + opts->discard = 0; + *debug = 0; + + if (!options) + goto out; + + while ((p = strsep(&options, ",")) != NULL) { + int token; + + if (!*p) + continue; + token = match_token(p, sdfat_tokens, args); + switch (token) { + case Opt_uid: + if (match_int(&args[0], &option)) + return 0; + opts->fs_uid = make_kuid(current_user_ns(), option); + break; + case Opt_gid: + if (match_int(&args[0], &option)) + return 0; + opts->fs_gid = make_kgid(current_user_ns(), option); + break; + case Opt_umask: + case Opt_dmask: + case Opt_fmask: + if (match_octal(&args[0], &option)) + return 0; + if (token != Opt_dmask) + opts->fs_fmask = option; + if (token != Opt_fmask) + opts->fs_dmask = option; + break; + case Opt_allow_utime: + if (match_octal(&args[0], &option)) + return 0; + opts->allow_utime = option & (S_IWGRP | S_IWOTH); + break; + case Opt_codepage: + if (match_int(&args[0], &option)) + return 0; + opts->codepage = option; + break; + case Opt_charset: + if (opts->iocharset != sdfat_default_iocharset) + kfree(opts->iocharset); + tmpstr = match_strdup(&args[0]); + if (!tmpstr) + return -ENOMEM; + opts->iocharset = tmpstr; + break; + case Opt_namecase: + if (match_int(&args[0], &option)) + return 0; + opts->casesensitive = (option > 0) ? 1:0; + break; + case Opt_utf8: + opts->utf8 = 1; + break; + case Opt_adj_hidsect: + opts->adj_hidsect = 1; + break; + case Opt_tz_utc: + opts->tz_utc = 1; + break; + case Opt_symlink: + if (match_int(&args[0], &option)) + return 0; + opts->symlink = option > 0 ? 1 : 0; + break; + case Opt_delay: + opts->improved_allocation |= SDFAT_ALLOC_DELAY; + break; + case Opt_smart: + opts->improved_allocation |= SDFAT_ALLOC_SMART; + break; + case Opt_ausize: + if (match_int(&args[0], &option)) + return -EINVAL; + if (!is_power_of_2(option)) + return -EINVAL; + opts->amap_opt.sect_per_au = option; + IMSG("set AU size by option : %u sectors\n", option); + break; + case Opt_packing: + if (match_int(&args[0], &option)) + return 0; + opts->amap_opt.pack_ratio = option; + break; + case Opt_defrag: +#ifdef CONFIG_SDFAT_DFR + opts->defrag = 1; +#else + IMSG("defragmentation config is not enabled. ignore\n"); +#endif + break; + case Opt_err_cont: + opts->errors = SDFAT_ERRORS_CONT; + break; + case Opt_err_panic: + opts->errors = SDFAT_ERRORS_PANIC; + break; + case Opt_err_ro: + opts->errors = SDFAT_ERRORS_RO; + break; + case Opt_debug: + *debug = 1; + break; + case Opt_discard: + opts->discard = 1; + break; + case Opt_fs: + tmpstr = match_strdup(&args[0]); + if (!tmpstr) + return -ENOMEM; + for (i = 0; i < FS_TYPE_MAX; i++) { + if (!strcmp(tmpstr, FS_TYPE_STR[i])) { + opts->fs_type = (unsigned char)i; + sdfat_log_msg(sb, KERN_ERR, + "set fs-type by option : %s", + FS_TYPE_STR[i]); + break; + } + } + kfree(tmpstr); + if (i == FS_TYPE_MAX) { + sdfat_log_msg(sb, KERN_ERR, + "invalid fs-type, " + "only allow auto, exfat, vfat"); + return -EINVAL; + } + break; + case Opt_adj_req: +#ifdef CONFIG_SDFAT_ALIGNED_MPAGE_WRITE + opts->adj_req = 1; +#else + IMSG("adjust request config is not enabled. ignore\n"); +#endif + break; +#ifdef CONFIG_SDFAT_USE_FOR_VFAT + case Opt_shortname_lower: + case Opt_shortname_win95: + case Opt_shortname_mixed: + pr_warn("[SDFAT] DRAGONS AHEAD! sdFAT only understands \"shortname=winnt\"!\n"); + case Opt_shortname_winnt: + break; +#endif /* CONFIG_SDFAT_USE_FOR_VFAT */ + default: + if (!silent) { + sdfat_msg(sb, KERN_ERR, + "unrecognized mount option \"%s\" " + "or missing value", p); + } + return -EINVAL; + } + } + +out: + if (opts->allow_utime == (unsigned short) -1) + opts->allow_utime = ~opts->fs_dmask & (S_IWGRP | S_IWOTH); + + if (opts->utf8 && strcmp(opts->iocharset, sdfat_iocharset_with_utf8)) { + sdfat_msg(sb, KERN_WARNING, + "utf8 enabled, \"iocharset=%s\" is recommended", + sdfat_iocharset_with_utf8); + } + + if (opts->discard) { + struct request_queue *q = bdev_get_queue(sb->s_bdev); + + if (!blk_queue_discard(q)) + sdfat_msg(sb, KERN_WARNING, + "mounting with \"discard\" option, but " + "the device does not support discard"); + opts->discard = 0; + } + + return 0; +} + +static void sdfat_hash_init(struct super_block *sb) +{ + struct sdfat_sb_info *sbi = SDFAT_SB(sb); + int i; + + spin_lock_init(&sbi->inode_hash_lock); + for (i = 0; i < SDFAT_HASH_SIZE; i++) + INIT_HLIST_HEAD(&sbi->inode_hashtable[i]); +} + +static int sdfat_read_root(struct inode *inode) +{ + struct super_block *sb = inode->i_sb; + struct sdfat_sb_info *sbi = SDFAT_SB(sb); + sdfat_timespec_t ts; + FS_INFO_T *fsi = &(sbi->fsi); + DIR_ENTRY_T info; + + ts = CURRENT_TIME_SEC; + + SDFAT_I(inode)->fid.dir.dir = fsi->root_dir; + SDFAT_I(inode)->fid.dir.flags = 0x01; + SDFAT_I(inode)->fid.entry = -1; + SDFAT_I(inode)->fid.start_clu = fsi->root_dir; + SDFAT_I(inode)->fid.flags = 0x01; + SDFAT_I(inode)->fid.type = TYPE_DIR; + SDFAT_I(inode)->fid.version = 0; + SDFAT_I(inode)->fid.rwoffset = 0; + SDFAT_I(inode)->fid.hint_bmap.off = CLUS_EOF; + SDFAT_I(inode)->fid.hint_stat.eidx = 0; + SDFAT_I(inode)->fid.hint_stat.clu = fsi->root_dir; + SDFAT_I(inode)->fid.hint_femp.eidx = -1; + + SDFAT_I(inode)->target = NULL; + + if (fsapi_read_inode(inode, &info) < 0) + return -EIO; + + inode->i_uid = sbi->options.fs_uid; + inode->i_gid = sbi->options.fs_gid; + inode_inc_iversion(inode); + inode->i_generation = 0; + inode->i_mode = sdfat_make_mode(sbi, ATTR_SUBDIR, S_IRWXUGO); + inode->i_op = &sdfat_dir_inode_operations; + inode->i_fop = &sdfat_dir_operations; + + i_size_write(inode, info.Size); + SDFAT_I(inode)->fid.size = info.Size; + inode->i_blocks = ((i_size_read(inode) + (fsi->cluster_size - 1)) + & ~((loff_t)fsi->cluster_size - 1)) >> inode->i_blkbits; + SDFAT_I(inode)->i_pos = ((loff_t) fsi->root_dir << 32) | 0xffffffff; + SDFAT_I(inode)->i_size_aligned = i_size_read(inode); + SDFAT_I(inode)->i_size_ondisk = i_size_read(inode); + + sdfat_save_attr(inode, ATTR_SUBDIR); + inode->i_mtime = inode->i_atime = inode->i_ctime = ts; + set_nlink(inode, info.NumSubdirs + 2); + return 0; +} + + + +static void setup_dops(struct super_block *sb) +{ + if (SDFAT_SB(sb)->options.casesensitive == 0) + sb->s_d_op = &sdfat_ci_dentry_ops; + else + sb->s_d_op = &sdfat_dentry_ops; +} + +static int sdfat_fill_super(struct super_block *sb, void *data, int silent) +{ + struct inode *root_inode = NULL; + struct sdfat_sb_info *sbi; + int debug; + int err; + char buf[50]; + struct block_device *bdev = sb->s_bdev; + dev_t bd_dev = bdev ? bdev->bd_dev : 0; + + sdfat_log_msg(sb, KERN_INFO, "trying to mount..."); + + /* + * GFP_KERNEL is ok here, because while we do hold the + * supeblock lock, memory pressure can't call back into + * the filesystem, since we're only just about to mount + * it and have no inodes etc active! + */ + sbi = kzalloc(sizeof(struct sdfat_sb_info), GFP_KERNEL); + if (!sbi) { + sdfat_log_msg(sb, KERN_INFO, + "trying to alloc sbi with vzalloc()"); + sbi = vzalloc(sizeof(struct sdfat_sb_info)); + if (!sbi) { + sdfat_log_msg(sb, KERN_ERR, "failed to mount! (ENOMEM)"); + return -ENOMEM; + } + sbi->use_vmalloc = 1; + } + + mutex_init(&sbi->s_vlock); + sb->s_fs_info = sbi; + sb->s_flags |= MS_NODIRATIME; + sb->s_magic = SDFAT_SUPER_MAGIC; + sb->s_op = &sdfat_sops; + ratelimit_state_init(&sbi->ratelimit, DEFAULT_RATELIMIT_INTERVAL, + DEFAULT_RATELIMIT_BURST); + err = parse_options(sb, data, silent, &debug, &sbi->options); + if (err) { + sdfat_log_msg(sb, KERN_ERR, "failed to parse options"); + goto failed_mount; + } + + setup_sdfat_xattr_handler(sb); + setup_sdfat_sync_super_wq(sb); + setup_dops(sb); + + err = fsapi_mount(sb); + if (err) { + sdfat_log_msg(sb, KERN_ERR, "failed to recognize fat type"); + goto failed_mount; + } + + /* set up enough so that it can read an inode */ + sdfat_hash_init(sb); + + /* + * The low byte of FAT's first entry must have same value with + * media-field. But in real world, too many devices is + * writing wrong value. So, removed that validity check. + * + * if (FAT_FIRST_ENT(sb, media) != first) + */ + + err = -EINVAL; + sprintf(buf, "cp%d", sbi->options.codepage); + sbi->nls_disk = load_nls(buf); + if (!sbi->nls_disk) { + sdfat_log_msg(sb, KERN_ERR, "codepage %s not found", buf); + goto failed_mount2; + } + + sbi->nls_io = load_nls(sbi->options.iocharset); + if (!sbi->nls_io) { + sdfat_log_msg(sb, KERN_ERR, "IO charset %s not found", + sbi->options.iocharset); + goto failed_mount2; + } + + err = __alloc_dfr_mem_if_required(sb); + if (err) { + sdfat_log_msg(sb, KERN_ERR, "failed to initialize a memory for " + "defragmentation"); + goto failed_mount3; + } + + err = -ENOMEM; + root_inode = new_inode(sb); + if (!root_inode) { + sdfat_log_msg(sb, KERN_ERR, "failed to allocate root inode."); + goto failed_mount3; + } + + root_inode->i_ino = SDFAT_ROOT_INO; + inode_set_iversion(root_inode, 1); + + err = sdfat_read_root(root_inode); + if (err) { + sdfat_log_msg(sb, KERN_ERR, "failed to initialize root inode."); + goto failed_mount3; + } + + sdfat_attach(root_inode, SDFAT_I(root_inode)->i_pos); + insert_inode_hash(root_inode); + + err = -ENOMEM; + sb->s_root = __d_make_root(root_inode); + if (!sb->s_root) { + sdfat_msg(sb, KERN_ERR, "failed to get the root dentry"); + goto failed_mount3; + } + + /* + * Initialize filesystem attributes (for sysfs) + * ex: /sys/fs/sdfat/mmcblk1[179:17] + */ + sbi->sb_kobj.kset = sdfat_kset; + err = kobject_init_and_add(&sbi->sb_kobj, &sdfat_ktype, NULL, + "%s[%d:%d]", sb->s_id, MAJOR(bd_dev), MINOR(bd_dev)); + if (err) { + sdfat_msg(sb, KERN_ERR, "Unable to create sdfat attributes for" + " %s[%d:%d](%d)", sb->s_id, + MAJOR(bd_dev), MINOR(bd_dev), err); + goto failed_mount3; + } + + sdfat_log_msg(sb, KERN_INFO, "mounted successfully!"); + /* FOR BIGDATA */ + sdfat_statistics_set_mnt(&sbi->fsi); + sdfat_statistics_set_vol_size(sb); + return 0; + +failed_mount3: + __free_dfr_mem_if_required(sb); +failed_mount2: + fsapi_umount(sb); +failed_mount: + sdfat_log_msg(sb, KERN_INFO, "failed to mount! (%d)", err); + + if (root_inode) + iput(root_inode); + sb->s_root = NULL; + + if (sbi->nls_io) + unload_nls(sbi->nls_io); + if (sbi->nls_disk) + unload_nls(sbi->nls_disk); + if (sbi->options.iocharset != sdfat_default_iocharset) + kfree(sbi->options.iocharset); + sb->s_fs_info = NULL; + if (!sbi->use_vmalloc) + kfree(sbi); + else + vfree(sbi); + return err; +} + +static struct dentry *sdfat_fs_mount(struct file_system_type *fs_type, + int flags, const char *dev_name, void *data) { + return mount_bdev(fs_type, flags, dev_name, data, sdfat_fill_super); +} + +static void init_once(void *foo) +{ + struct sdfat_inode_info *ei = (struct sdfat_inode_info *)foo; + + INIT_HLIST_NODE(&ei->i_hash_fat); + inode_init_once(&ei->vfs_inode); +} + +static int __init sdfat_init_inodecache(void) +{ + sdfat_inode_cachep = kmem_cache_create("sdfat_inode_cache", + sizeof(struct sdfat_inode_info), + 0, (SLAB_RECLAIM_ACCOUNT|SLAB_MEM_SPREAD), + init_once); + if (!sdfat_inode_cachep) + return -ENOMEM; + return 0; +} + +static void sdfat_destroy_inodecache(void) +{ + /* + * Make sure all delayed rcu free inodes are flushed before we + * destroy cache. + */ + rcu_barrier(); + kmem_cache_destroy(sdfat_inode_cachep); +} + +#ifdef CONFIG_SDFAT_DBG_IOCTL +static void sdfat_debug_kill_sb(struct super_block *sb) +{ + struct sdfat_sb_info *sbi = SDFAT_SB(sb); + struct block_device *bdev = sb->s_bdev; + + long flags; + + if (sbi) { + flags = sbi->debug_flags; + + if (flags & SDFAT_DEBUGFLAGS_INVALID_UMOUNT) { + /* invalidate_bdev drops all device cache include dirty. + * we use this to simulate device removal + */ + fsapi_cache_release(sb); + invalidate_bdev(bdev); + } + } + + kill_block_super(sb); +} +#endif /* CONFIG_SDFAT_DBG_IOCTL */ + +static struct file_system_type sdfat_fs_type = { + .owner = THIS_MODULE, + .name = "sdfat", + .mount = sdfat_fs_mount, +#ifdef CONFIG_SDFAT_DBG_IOCTL + .kill_sb = sdfat_debug_kill_sb, +#else + .kill_sb = kill_block_super, +#endif /* CONFIG_SDFAT_DBG_IOCTL */ + .fs_flags = FS_REQUIRES_DEV, +}; +MODULE_ALIAS_FS("sdfat"); + +#ifdef CONFIG_SDFAT_USE_FOR_EXFAT +static struct file_system_type exfat_fs_type = { + .owner = THIS_MODULE, + .name = "exfat", + .mount = sdfat_fs_mount, +#ifdef CONFIG_SDFAT_DBG_IOCTL + .kill_sb = sdfat_debug_kill_sb, +#else + .kill_sb = kill_block_super, +#endif /* CONFIG_SDFAT_DBG_IOCTL */ + .fs_flags = FS_REQUIRES_DEV, +}; +MODULE_ALIAS_FS("exfat"); +#endif /* CONFIG_SDFAT_USE_FOR_EXFAT */ + +#ifdef CONFIG_SDFAT_USE_FOR_VFAT +static struct file_system_type vfat_fs_type = { + .owner = THIS_MODULE, + .name = "vfat", + .mount = sdfat_fs_mount, +#ifdef CONFIG_SDFAT_DBG_IOCTL + .kill_sb = sdfat_debug_kill_sb, +#else + .kill_sb = kill_block_super, +#endif /* CONFIG_SDFAT_DBG_IOCTL */ + .fs_flags = FS_REQUIRES_DEV, +}; +MODULE_ALIAS_FS("vfat"); +#endif /* CONFIG_SDFAT_USE_FOR_VFAT */ + +static int __init init_sdfat_fs(void) +{ + int err; + + sdfat_log_version(); + err = fsapi_init(); + if (err) + goto error; + + sdfat_kset = kset_create_and_add("sdfat", NULL, fs_kobj); + if (!sdfat_kset) { + pr_err("[SDFAT] failed to create sdfat kset\n"); + err = -ENOMEM; + goto error; + } + + err = sysfs_create_group(&sdfat_kset->kobj, &attr_group); + if (err) { + pr_err("[SDFAT] failed to create sdfat version attributes\n"); + goto error; + } + + err = sdfat_statistics_init(sdfat_kset); + if (err) + goto error; + + err = sdfat_uevent_init(sdfat_kset); + if (err) + goto error; + + err = sdfat_init_inodecache(); + if (err) { + pr_err("[SDFAT] failed to initialize inode cache\n"); + goto error; + } + + err = register_filesystem(&sdfat_fs_type); + if (err) { + pr_err("[SDFAT] failed to register filesystem\n"); + goto error; + } + +#ifdef CONFIG_SDFAT_USE_FOR_EXFAT + err = register_filesystem(&exfat_fs_type); + if (err) { + pr_err("[SDFAT] failed to register for exfat filesystem\n"); + goto error; + } +#endif /* CONFIG_SDFAT_USE_FOR_EXFAT */ + +#ifdef CONFIG_SDFAT_USE_FOR_VFAT + err = register_filesystem(&vfat_fs_type); + if (err) { + pr_err("[SDFAT] failed to register for vfat filesystem\n"); + goto error; + } +#endif /* CONFIG_SDFAT_USE_FOR_VFAT */ + + return 0; +error: + sdfat_uevent_uninit(); + sdfat_statistics_uninit(); + + if (sdfat_kset) { + sysfs_remove_group(&sdfat_kset->kobj, &attr_group); + kset_unregister(sdfat_kset); + sdfat_kset = NULL; + } + + sdfat_destroy_inodecache(); + fsapi_shutdown(); + + pr_err("[SDFAT] failed to initialize FS driver(err:%d)\n", err); + return err; +} + +static void __exit exit_sdfat_fs(void) +{ + sdfat_uevent_uninit(); + sdfat_statistics_uninit(); + + if (sdfat_kset) { + sysfs_remove_group(&sdfat_kset->kobj, &attr_group); + kset_unregister(sdfat_kset); + sdfat_kset = NULL; + } + + sdfat_destroy_inodecache(); + unregister_filesystem(&sdfat_fs_type); + +#ifdef CONFIG_SDFAT_USE_FOR_EXFAT + unregister_filesystem(&exfat_fs_type); +#endif /* CONFIG_SDFAT_USE_FOR_EXFAT */ +#ifdef CONFIG_SDFAT_USE_FOR_VFAT + unregister_filesystem(&vfat_fs_type); +#endif /* CONFIG_SDFAT_USE_FOR_VFAT */ + fsapi_shutdown(); +} + +module_init(init_sdfat_fs); +module_exit(exit_sdfat_fs); + +MODULE_LICENSE("GPL"); +MODULE_DESCRIPTION("FAT/exFAT filesystem support"); +MODULE_AUTHOR("Samsung Electronics Co., Ltd."); + diff --git a/fs/sdfat/sdfat.h b/fs/sdfat/sdfat.h new file mode 100644 index 000000000000..60f7811c7b99 --- /dev/null +++ b/fs/sdfat/sdfat.h @@ -0,0 +1,528 @@ +/* + * Copyright (C) 2012-2013 Samsung Electronics Co., Ltd. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version 2 + * of the License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, see <http://www.gnu.org/licenses/>. + */ + +#ifndef _SDFAT_H +#define _SDFAT_H + +#include <linux/buffer_head.h> +#include <linux/string.h> +#include <linux/types.h> +#include <linux/nls.h> +#include <linux/fs.h> +#include <linux/mutex.h> +#include <linux/ratelimit.h> +#include <linux/version.h> +#include <linux/kobject.h> +#include "api.h" + +#ifdef CONFIG_SDFAT_DFR +#include "dfr.h" +#endif + +/* + * sdfat error flags + */ +#define SDFAT_ERRORS_CONT (1) /* ignore error and continue */ +#define SDFAT_ERRORS_PANIC (2) /* panic on error */ +#define SDFAT_ERRORS_RO (3) /* remount r/o on error */ + +/* + * sdfat allocator flags + */ +#define SDFAT_ALLOC_DELAY (1) /* Delayed allocation */ +#define SDFAT_ALLOC_SMART (2) /* Smart allocation */ + +/* + * sdfat allocator destination for smart allocation + */ +#define ALLOC_NOWHERE (0) +#define ALLOC_COLD (1) +#define ALLOC_HOT (16) +#define ALLOC_COLD_ALIGNED (1) +#define ALLOC_COLD_PACKING (2) +#define ALLOC_COLD_SEQ (4) + +/* + * sdfat nls lossy flag + */ +#define NLS_NAME_NO_LOSSY (0x00) /* no lossy */ +#define NLS_NAME_LOSSY (0x01) /* just detected incorrect filename(s) */ +#define NLS_NAME_OVERLEN (0x02) /* the length is over than its limit */ + +/* + * sdfat common MACRO + */ +#define CLUSTER_16(x) ((u16)((x) & 0xFFFFU)) +#define CLUSTER_32(x) ((u32)((x) & 0xFFFFFFFFU)) +#define CLUS_EOF CLUSTER_32(~0) +#define CLUS_BAD (0xFFFFFFF7U) +#define CLUS_FREE (0) +#define CLUS_BASE (2) +#define IS_CLUS_EOF(x) ((x) == CLUS_EOF) +#define IS_CLUS_BAD(x) ((x) == CLUS_BAD) +#define IS_CLUS_FREE(x) ((x) == CLUS_FREE) +#define IS_LAST_SECT_IN_CLUS(fsi, sec) \ + ((((sec) - (fsi)->data_start_sector + 1) \ + & ((1 << (fsi)->sect_per_clus_bits) - 1)) == 0) + +#define CLUS_TO_SECT(fsi, x) \ + ((((unsigned long long)(x) - CLUS_BASE) << (fsi)->sect_per_clus_bits) + (fsi)->data_start_sector) + +#define SECT_TO_CLUS(fsi, sec) \ + ((u32)((((sec) - (fsi)->data_start_sector) >> (fsi)->sect_per_clus_bits) + CLUS_BASE)) + +/* variables defined at sdfat.c */ +extern const char *FS_TYPE_STR[]; + +enum { + FS_TYPE_AUTO, + FS_TYPE_EXFAT, + FS_TYPE_VFAT, + FS_TYPE_MAX +}; + +/* + * sdfat mount in-memory data + */ +struct sdfat_mount_options { +#if LINUX_VERSION_CODE >= KERNEL_VERSION(3, 5, 0) + kuid_t fs_uid; + kgid_t fs_gid; +#else /* LINUX_VERSION_CODE < KERNEL_VERSION(3, 5, 0) */ + uid_t fs_uid; + gid_t fs_gid; +#endif + unsigned short fs_fmask; + unsigned short fs_dmask; + unsigned short allow_utime; /* permission for setting the [am]time */ + unsigned short codepage; /* codepage for shortname conversions */ + char *iocharset; /* charset for filename input/display */ + struct { + unsigned int pack_ratio; + unsigned int sect_per_au; + unsigned int misaligned_sect; + } amap_opt; /* AMAP-related options (see amap.c) */ + + unsigned char utf8; + unsigned char casesensitive; + unsigned char adj_hidsect; + unsigned char tz_utc; + unsigned char improved_allocation; + unsigned char defrag; + unsigned char symlink; /* support symlink operation */ + unsigned char errors; /* on error: continue, panic, remount-ro */ + unsigned char discard; /* flag on if -o dicard specified and device support discard() */ + unsigned char fs_type; /* fs_type that user specified */ + unsigned short adj_req; /* support aligned mpage write */ +}; + +#define SDFAT_HASH_BITS 8 +#define SDFAT_HASH_SIZE (1UL << SDFAT_HASH_BITS) + +/* + * SDFAT file system superblock in-memory data + */ +struct sdfat_sb_info { + FS_INFO_T fsi; /* private filesystem info */ + + struct mutex s_vlock; /* volume lock */ + int use_vmalloc; + +#if LINUX_VERSION_CODE >= KERNEL_VERSION(3, 7, 0) + int s_dirt; + struct mutex s_lock; /* superblock lock */ + int write_super_queued; /* Write_super work is pending? */ + struct delayed_work write_super_work; /* Work_queue data structrue for write_super() */ + spinlock_t work_lock; /* Lock for WQ */ +#endif + struct super_block *host_sb; /* sb pointer */ + struct sdfat_mount_options options; + struct nls_table *nls_disk; /* Codepage used on disk */ + struct nls_table *nls_io; /* Charset used for input and display */ + struct ratelimit_state ratelimit; + + spinlock_t inode_hash_lock; + struct hlist_head inode_hashtable[SDFAT_HASH_SIZE]; + struct kobject sb_kobj; +#ifdef CONFIG_SDFAT_DBG_IOCTL + long debug_flags; +#endif /* CONFIG_SDFAT_DBG_IOCTL */ + +#ifdef CONFIG_SDFAT_DFR + struct defrag_info dfr_info; + struct completion dfr_complete; + unsigned int *dfr_new_clus; + int dfr_new_idx; + unsigned int *dfr_page_wb; + void **dfr_pagep; + unsigned int dfr_hint_clus; + unsigned int dfr_hint_idx; + int dfr_reserved_clus; + +#ifdef CONFIG_SDFAT_DFR_DEBUG + int dfr_spo_flag; +#endif /* CONFIG_SDFAT_DFR_DEBUG */ + +#endif /* CONFIG_SDFAT_DFR */ + +#ifdef CONFIG_SDFAT_TRACE_IO + /* Statistics for allocator */ + unsigned int stat_n_pages_written; /* # of written pages in total */ + unsigned int stat_n_pages_added; /* # of added blocks in total */ + unsigned int stat_n_bdev_pages_written; /* # of written pages owned by bdev inode */ + unsigned int stat_n_pages_confused; +#endif + atomic_t stat_n_pages_queued; /* # of pages in the request queue (approx.) */ +}; + +/* + * SDFAT file system inode in-memory data + */ +struct sdfat_inode_info { + FILE_ID_T fid; + char *target; + /* NOTE: i_size_ondisk is 64bits, so must hold ->inode_lock to access */ + loff_t i_size_ondisk; /* physically allocated size */ + loff_t i_size_aligned; /* block-aligned i_size (used in cont_write_begin) */ + loff_t i_pos; /* on-disk position of directory entry or 0 */ + struct hlist_node i_hash_fat; /* hash by i_location */ +#if LINUX_VERSION_CODE >= KERNEL_VERSION(3, 4, 0) + struct rw_semaphore truncate_lock; /* protect bmap against truncate */ +#endif +#ifdef CONFIG_SDFAT_DFR + struct defrag_info dfr_info; +#endif + struct inode vfs_inode; +}; + +#if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 18, 0) +typedef struct timespec64 sdfat_timespec_t; +#else /* LINUX_VERSION_CODE < KERNEL_VERSION(4, 18, 0) */ +typedef struct timespec sdfat_timespec_t; +#endif + +/* + * FIXME : needs on-disk-slot in-memory data + */ + +/* static inline functons */ +static inline const char *sdfat_get_vol_type_str(unsigned int type) +{ + if (type == EXFAT) + return "exfat"; + else if (type == FAT32) + return "vfat:32"; + else if (type == FAT16) + return "vfat:16"; + else if (type == FAT12) + return "vfat:12"; + + return "unknown"; +} + +static inline struct sdfat_sb_info *SDFAT_SB(struct super_block *sb) +{ + return (struct sdfat_sb_info *)sb->s_fs_info; +} + +static inline struct sdfat_inode_info *SDFAT_I(struct inode *inode) +{ + return container_of(inode, struct sdfat_inode_info, vfs_inode); +} + +/* + * If ->i_mode can't hold S_IWUGO (i.e. ATTR_RO), we use ->i_attrs to + * save ATTR_RO instead of ->i_mode. + * + * If it's directory and !sbi->options.rodir, ATTR_RO isn't read-only + * bit, it's just used as flag for app. + */ +static inline int sdfat_mode_can_hold_ro(struct inode *inode) +{ + struct sdfat_sb_info *sbi = SDFAT_SB(inode->i_sb); + + if (S_ISDIR(inode->i_mode)) + return 0; + + if ((~sbi->options.fs_fmask) & S_IWUGO) + return 1; + return 0; +} + +/* + * FIXME : needs to check symlink option. + */ +/* Convert attribute bits and a mask to the UNIX mode. */ +static inline mode_t sdfat_make_mode(struct sdfat_sb_info *sbi, + u32 attr, mode_t mode) +{ + if ((attr & ATTR_READONLY) && !(attr & ATTR_SUBDIR)) + mode &= ~S_IWUGO; + + if (attr & ATTR_SUBDIR) + return (mode & ~sbi->options.fs_dmask) | S_IFDIR; + else if (attr & ATTR_SYMLINK) + return (mode & ~sbi->options.fs_dmask) | S_IFLNK; + else + return (mode & ~sbi->options.fs_fmask) | S_IFREG; +} + +/* Return the FAT attribute byte for this inode */ +static inline u32 sdfat_make_attr(struct inode *inode) +{ + u32 attrs = SDFAT_I(inode)->fid.attr; + + if (S_ISDIR(inode->i_mode)) + attrs |= ATTR_SUBDIR; + if (sdfat_mode_can_hold_ro(inode) && !(inode->i_mode & S_IWUGO)) + attrs |= ATTR_READONLY; + return attrs; +} + +static inline void sdfat_save_attr(struct inode *inode, u32 attr) +{ + if (sdfat_mode_can_hold_ro(inode)) + SDFAT_I(inode)->fid.attr = attr & ATTR_RWMASK; + else + SDFAT_I(inode)->fid.attr = attr & (ATTR_RWMASK | ATTR_READONLY); +} + +/* sdfat/statistics.c */ +/* bigdata function */ +#ifdef CONFIG_SDFAT_STATISTICS +extern int sdfat_statistics_init(struct kset *sdfat_kset); +extern void sdfat_statistics_uninit(void); +extern void sdfat_statistics_set_mnt(FS_INFO_T *fsi); +extern void sdfat_statistics_set_mnt_ro(void); +extern void sdfat_statistics_set_mkdir(u8 flags); +extern void sdfat_statistics_set_create(u8 flags); +extern void sdfat_statistics_set_rw(u8 flags, u32 clu_offset, s32 create); +extern void sdfat_statistics_set_trunc(u8 flags, CHAIN_T *clu); +extern void sdfat_statistics_set_vol_size(struct super_block *sb); +#else +static inline int sdfat_statistics_init(struct kset *sdfat_kset) +{ + return 0; +} +static inline void sdfat_statistics_uninit(void) {}; +static inline void sdfat_statistics_set_mnt(FS_INFO_T *fsi) {}; +static inline void sdfat_statistics_set_mnt_ro(void) {}; +static inline void sdfat_statistics_set_mkdir(u8 flags) {}; +static inline void sdfat_statistics_set_create(u8 flags) {}; +static inline void sdfat_statistics_set_rw(u8 flags, u32 clu_offset, s32 create) {}; +static inline void sdfat_statistics_set_trunc(u8 flags, CHAIN_T *clu) {}; +static inline void sdfat_statistics_set_vol_size(struct super_block *sb) {}; +#endif + +/* sdfat/nls.c */ +/* NLS management function */ +s32 nls_cmp_sfn(struct super_block *sb, u8 *a, u8 *b); +s32 nls_cmp_uniname(struct super_block *sb, u16 *a, u16 *b); +s32 nls_uni16s_to_sfn(struct super_block *sb, UNI_NAME_T *p_uniname, DOS_NAME_T *p_dosname, s32 *p_lossy); +s32 nls_sfn_to_uni16s(struct super_block *sb, DOS_NAME_T *p_dosname, UNI_NAME_T *p_uniname); +s32 nls_uni16s_to_vfsname(struct super_block *sb, UNI_NAME_T *uniname, u8 *p_cstring, s32 len); +s32 nls_vfsname_to_uni16s(struct super_block *sb, const u8 *p_cstring, + const s32 len, UNI_NAME_T *uniname, s32 *p_lossy); + +/* sdfat/mpage.c */ +#ifdef CONFIG_SDFAT_ALIGNED_MPAGE_WRITE +int sdfat_mpage_writepages(struct address_space *mapping, + struct writeback_control *wbc, get_block_t *get_block); +#endif + +/* sdfat/xattr.c */ +#ifdef CONFIG_SDFAT_VIRTUAL_XATTR +void setup_sdfat_xattr_handler(struct super_block *sb); +extern int sdfat_setxattr(struct dentry *dentry, const char *name, const void *value, size_t size, int flags); +extern ssize_t sdfat_getxattr(struct dentry *dentry, const char *name, void *value, size_t size); +extern ssize_t sdfat_listxattr(struct dentry *dentry, char *list, size_t size); +extern int sdfat_removexattr(struct dentry *dentry, const char *name); +#else +static inline void setup_sdfat_xattr_handler(struct super_block *sb) {}; +#endif + +/* sdfat/misc.c */ +#ifdef CONFIG_SDFAT_UEVENT +extern int sdfat_uevent_init(struct kset *sdfat_kset); +extern void sdfat_uevent_uninit(void); +extern void sdfat_uevent_ro_remount(struct super_block *sb); +#else +static inline int sdfat_uevent_init(struct kset *sdfat_kset) +{ + return 0; +} +static inline void sdfat_uevent_uninit(void) {}; +static inline void sdfat_uevent_ro_remount(struct super_block *sb) {}; +#endif +extern void +__sdfat_fs_error(struct super_block *sb, int report, const char *fmt, ...) + __printf(3, 4) __cold; +#define sdfat_fs_error(sb, fmt, args...) \ + __sdfat_fs_error(sb, 1, fmt, ## args) +#define sdfat_fs_error_ratelimit(sb, fmt, args...) \ + __sdfat_fs_error(sb, __ratelimit(&SDFAT_SB(sb)->ratelimit), fmt, ## args) +extern void +__sdfat_msg(struct super_block *sb, const char *lv, int st, const char *fmt, ...) + __printf(4, 5) __cold; +#define sdfat_msg(sb, lv, fmt, args...) \ + __sdfat_msg(sb, lv, 0, fmt, ## args) +#define sdfat_log_msg(sb, lv, fmt, args...) \ + __sdfat_msg(sb, lv, 1, fmt, ## args) +extern void sdfat_log_version(void); +extern void sdfat_time_fat2unix(struct sdfat_sb_info *sbi, sdfat_timespec_t *ts, + DATE_TIME_T *tp); +extern void sdfat_time_unix2fat(struct sdfat_sb_info *sbi, sdfat_timespec_t *ts, + DATE_TIME_T *tp); +extern TIMESTAMP_T *tm_now(struct sdfat_sb_info *sbi, TIMESTAMP_T *tm); + +#ifdef CONFIG_SDFAT_DEBUG + +#ifdef CONFIG_SDFAT_DBG_CAREFUL +void sdfat_debug_check_clusters(struct inode *inode); +#else +#define sdfat_debug_check_clusters(inode) +#endif /* CONFIG_SDFAT_DBG_CAREFUL */ + +#ifdef CONFIG_SDFAT_DBG_BUGON +#define sdfat_debug_bug_on(expr) BUG_ON(expr) +#else +#define sdfat_debug_bug_on(expr) +#endif + +#ifdef CONFIG_SDFAT_DBG_WARNON +#define sdfat_debug_warn_on(expr) WARN_ON(expr) +#else +#define sdfat_debug_warn_on(expr) +#endif + +#else /* CONFIG_SDFAT_DEBUG */ + +#define sdfat_debug_check_clusters(inode) +#define sdfat_debug_bug_on(expr) +#define sdfat_debug_warn_on(expr) + +#endif /* CONFIG_SDFAT_DEBUG */ + +#ifdef CONFIG_SDFAT_TRACE_ELAPSED_TIME +u32 sdfat_time_current_usec(struct timeval *tv); +extern struct timeval __t1; +extern struct timeval __t2; + +#define TIME_GET(tv) sdfat_time_current_usec(tv) +#define TIME_START(s) sdfat_time_current_usec(s) +#define TIME_END(e) sdfat_time_current_usec(e) +#define TIME_ELAPSED(s, e) ((u32)(((e)->tv_sec - (s)->tv_sec) * 1000000 + \ + ((e)->tv_usec - (s)->tv_usec))) +#define PRINT_TIME(n) pr_info("[SDFAT] Elapsed time %d = %d (usec)\n", n, (__t2 - __t1)) +#else /* CONFIG_SDFAT_TRACE_ELAPSED_TIME */ +#define TIME_GET(tv) (0) +#define TIME_START(s) +#define TIME_END(e) +#define TIME_ELAPSED(s, e) (0) +#define PRINT_TIME(n) +#endif /* CONFIG_SDFAT_TRACE_ELAPSED_TIME */ + +#define SDFAT_MSG_LV_NONE (0x00000000) +#define SDFAT_MSG_LV_ERR (0x00000001) +#define SDFAT_MSG_LV_INFO (0x00000002) +#define SDFAT_MSG_LV_DBG (0x00000003) +#define SDFAT_MSG_LV_MORE (0x00000004) +#define SDFAT_MSG_LV_TRACE (0x00000005) +#define SDFAT_MSG_LV_ALL (0x00000006) + +#define SDFAT_MSG_LEVEL SDFAT_MSG_LV_INFO + +#define SDFAT_TAG_NAME "SDFAT" +#define __S(x) #x +#define _S(x) __S(x) + +extern void __sdfat_dmsg(int level, const char *fmt, ...) __printf(2, 3) __cold; + +#define SDFAT_EMSG_T(level, ...) \ + __sdfat_dmsg(level, KERN_ERR "[" SDFAT_TAG_NAME "] [" _S(__FILE__) "(" _S(__LINE__) ")] " __VA_ARGS__) +#define SDFAT_DMSG_T(level, ...) \ + __sdfat_dmsg(level, KERN_INFO "[" SDFAT_TAG_NAME "] " __VA_ARGS__) + +#define SDFAT_EMSG(...) SDFAT_EMSG_T(SDFAT_MSG_LV_ERR, __VA_ARGS__) +#define SDFAT_IMSG(...) SDFAT_DMSG_T(SDFAT_MSG_LV_INFO, __VA_ARGS__) +#define SDFAT_DMSG(...) SDFAT_DMSG_T(SDFAT_MSG_LV_DBG, __VA_ARGS__) +#define SDFAT_MMSG(...) SDFAT_DMSG_T(SDFAT_MSG_LV_MORE, __VA_ARGS__) +#define SDFAT_TMSG(...) SDFAT_DMSG_T(SDFAT_MSG_LV_TRACE, __VA_ARGS__) + +#define EMSG(...) +#define IMSG(...) +#define DMSG(...) +#define MMSG(...) +#define TMSG(...) + +#define EMSG_VAR(exp) +#define IMSG_VAR(exp) +#define DMSG_VAR(exp) +#define MMSG_VAR(exp) +#define TMSG_VAR(exp) + +#ifdef CONFIG_SDFAT_DBG_MSG + + +#if (SDFAT_MSG_LEVEL >= SDFAT_MSG_LV_ERR) +#undef EMSG +#undef EMSG_VAR +#define EMSG(...) SDFAT_EMSG(__VA_ARGS__) +#define EMSG_VAR(exp) exp +#endif + +#if (SDFAT_MSG_LEVEL >= SDFAT_MSG_LV_INFO) +#undef IMSG +#undef IMSG_VAR +#define IMSG(...) SDFAT_IMSG(__VA_ARGS__) +#define IMSG_VAR(exp) exp +#endif + +#if (SDFAT_MSG_LEVEL >= SDFAT_MSG_LV_DBG) +#undef DMSG +#undef DMSG_VAR +#define DMSG(...) SDFAT_DMSG(__VA_ARGS__) +#define DMSG_VAR(exp) exp +#endif + +#if (SDFAT_MSG_LEVEL >= SDFAT_MSG_LV_MORE) +#undef MMSG +#undef MMSG_VAR +#define MMSG(...) SDFAT_MMSG(__VA_ARGS__) +#define MMSG_VAR(exp) exp +#endif + +/* should replace with trace function */ +#if (SDFAT_MSG_LEVEL >= SDFAT_MSG_LV_TRACE) +#undef TMSG +#undef TMSG_VAR +#define TMSG(...) SDFAT_TMSG(__VA_ARGS__) +#define TMSG_VAR(exp) exp +#endif + +#endif /* CONFIG_SDFAT_DBG_MSG */ + + +#define ASSERT(expr) { \ + if (!(expr)) { \ + pr_err("Assertion failed! %s\n", #expr); \ + BUG_ON(1); \ + } \ +} + +#endif /* !_SDFAT_H */ + diff --git a/fs/sdfat/sdfat_fs.h b/fs/sdfat/sdfat_fs.h new file mode 100644 index 000000000000..23b5cda2f58c --- /dev/null +++ b/fs/sdfat/sdfat_fs.h @@ -0,0 +1,423 @@ +/* + * Copyright (C) 2012-2013 Samsung Electronics Co., Ltd. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version 2 + * of the License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, see <http://www.gnu.org/licenses/>. + */ + +#ifndef _SDFAT_FS_H +#define _SDFAT_FS_H + +#include <linux/types.h> +#include <linux/magic.h> +#include <asm/byteorder.h> + +/*----------------------------------------------------------------------*/ +/* Constant & Macro Definitions */ +/*----------------------------------------------------------------------*/ +#ifndef MSDOS_SUPER_MAGIC +#define MSDOS_SUPER_MAGIC 0x4d44 /* MD */ +#endif + +#ifndef EXFAT_SUPER_MAGIC +#define EXFAT_SUPER_MAGIC (0x2011BAB0UL) +#endif /* EXFAT_SUPER_MAGIC */ + +#ifndef SDFAT_SUPER_MAGIC +#define SDFAT_SUPER_MAGIC (0x5EC5DFA4UL) +#endif /* SDFAT_SUPER_MAGIC */ + +#define SDFAT_ROOT_INO 1 + +/* FAT types */ +#define FAT12 0x01 // FAT12 +#define FAT16 0x0E // Win95 FAT16 (LBA) +#define FAT32 0x0C // Win95 FAT32 (LBA) +#define EXFAT 0x07 // exFAT + +/* directory file name */ +#define DOS_CUR_DIR_NAME ". " +#define DOS_PAR_DIR_NAME ".. " + +#ifdef __LITTLE_ENDIAN +#define UNI_CUR_DIR_NAME ".\0" +#define UNI_PAR_DIR_NAME ".\0.\0" +#else +#define UNI_CUR_DIR_NAME "\0." +#define UNI_PAR_DIR_NAME "\0.\0." +#endif + +/* file name lengths */ +/* NOTE : + * The maximum length of input or output is limited to 256 including NULL, + * But we allocate 4 extra bytes for utf8 translation reside in last position, + * because utf8 can uses memory upto 6 bytes per one character. + * Therefore, MAX_CHARSET_SIZE supports upto 6 bytes for utf8 + */ +#define MAX_UNINAME_BUF_SIZE (((MAX_NAME_LENGTH+1)*2)+4) +#define MAX_DOSNAME_BUF_SIZE ((DOS_NAME_LENGTH+2)+6) +#define MAX_VFSNAME_BUF_SIZE ((MAX_NAME_LENGTH+1)*MAX_CHARSET_SIZE) +#define MAX_CHARSET_SIZE 6 // max size of multi-byte character +#define MAX_NAME_LENGTH 255 // max len of file name excluding NULL +#define DOS_NAME_LENGTH 11 // DOS file name length excluding NULL + +#define SECTOR_SIZE_BITS 9 /* VFS sector size is 512 bytes */ + +#define DENTRY_SIZE 32 /* directory entry size */ +#define DENTRY_SIZE_BITS 5 + +#define MAX_FAT_DENTRIES 65536 /* FAT allows 65536 directory entries */ +#define MAX_EXFAT_DENTRIES 8388608 /* exFAT allows 8388608(256MB) directory entries */ + +/* PBR entries */ +#define PBR_SIGNATURE 0xAA55 +#define EXT_SIGNATURE 0xAA550000 +#define VOL_LABEL "NO NAME " /* size should be 11 */ +#define OEM_NAME "MSWIN4.1" /* size should be 8 */ +#define STR_FAT12 "FAT12 " /* size should be 8 */ +#define STR_FAT16 "FAT16 " /* size should be 8 */ +#define STR_FAT32 "FAT32 " /* size should be 8 */ +#define STR_EXFAT "EXFAT " /* size should be 8 */ + +#define VOL_CLEAN 0x0000 +#define VOL_DIRTY 0x0002 + +#define FAT_VOL_DIRTY 0x01 + +/* max number of clusters */ +#define FAT12_THRESHOLD 4087 // 2^12 - 1 + 2 (clu 0 & 1) +#define FAT16_THRESHOLD 65527 // 2^16 - 1 + 2 +#define FAT32_THRESHOLD 268435457 // 2^28 - 1 + 2 +#define EXFAT_THRESHOLD 268435457 // 2^28 - 1 + 2 + +/* dentry types */ +#define MSDOS_DELETED 0xE5 /* deleted mark */ +#define MSDOS_UNUSED 0x00 /* end of directory */ + +#define EXFAT_UNUSED 0x00 /* end of directory */ +#define IS_EXFAT_DELETED(x) ((x) < 0x80) /* deleted file (0x01~0x7F) */ +#define EXFAT_INVAL 0x80 /* invalid value */ +#define EXFAT_BITMAP 0x81 /* allocation bitmap */ +#define EXFAT_UPCASE 0x82 /* upcase table */ +#define EXFAT_VOLUME 0x83 /* volume label */ +#define EXFAT_FILE 0x85 /* file or dir */ +#define EXFAT_STREAM 0xC0 /* stream entry */ +#define EXFAT_NAME 0xC1 /* file name entry */ +#define EXFAT_ACL 0xC2 /* stream entry */ + +/* specific flag */ +#define MSDOS_LAST_LFN 0x40 + +/* file attributes */ +#define ATTR_NORMAL 0x0000 +#define ATTR_READONLY 0x0001 +#define ATTR_HIDDEN 0x0002 +#define ATTR_SYSTEM 0x0004 +#define ATTR_VOLUME 0x0008 +#define ATTR_SUBDIR 0x0010 +#define ATTR_ARCHIVE 0x0020 +#define ATTR_SYMLINK 0x0040 +#define ATTR_EXTEND (ATTR_READONLY | ATTR_HIDDEN | ATTR_SYSTEM | \ + ATTR_VOLUME) /* 0x000F */ + +#define ATTR_EXTEND_MASK (ATTR_EXTEND | ATTR_SUBDIR | ATTR_ARCHIVE) +#define ATTR_RWMASK (ATTR_HIDDEN | ATTR_SYSTEM | ATTR_VOLUME | \ + ATTR_SUBDIR | ATTR_ARCHIVE | ATTR_SYMLINK)/* 0x007E */ + +/* file creation modes */ +#define FM_REGULAR 0x00 +#define FM_SYMLINK 0x40 + +/* time modes */ +#define TM_CREATE 0 +#define TM_MODIFY 1 +#define TM_ACCESS 2 + +/* checksum types */ +#define CS_DIR_ENTRY 0 +#define CS_PBR_SECTOR 1 +#define CS_DEFAULT 2 + +/* + * ioctl command + */ +#define SDFAT_IOCTL_GET_VOLUME_ID _IOR('r', 0x12, __u32) +#define SDFAT_IOCTL_DFR_INFO _IOC(_IOC_NONE, 'E', 0x13, sizeof(u32)) +#define SDFAT_IOCTL_DFR_TRAV _IOC(_IOC_NONE, 'E', 0x14, sizeof(u32)) +#define SDFAT_IOCTL_DFR_REQ _IOC(_IOC_NONE, 'E', 0x15, sizeof(u32)) +#define SDFAT_IOCTL_DFR_SPO_FLAG _IOC(_IOC_NONE, 'E', 0x16, sizeof(u32)) +#define SDFAT_IOCTL_PANIC _IOC(_IOC_NONE, 'E', 0x17, sizeof(u32)) + +/* + * ioctl command for debugging + */ + +/* + * IOCTL code 'f' used by + * - file systems typically #0~0x1F + * - embedded terminal devices #128~ + * - exts for debugging purpose #99 + * number 100 and 101 is available now but has possible conflicts + * + * NOTE : This is available only If CONFIG_SDFAT_DVBG_IOCTL is enabled. + * + */ +#define SDFAT_IOC_GET_DEBUGFLAGS _IOR('f', 100, long) +#define SDFAT_IOC_SET_DEBUGFLAGS _IOW('f', 101, long) + +#define SDFAT_DEBUGFLAGS_INVALID_UMOUNT 0x01 +#define SDFAT_DEBUGFLAGS_ERROR_RW 0x02 + +/*----------------------------------------------------------------------*/ +/* On-Disk Type Definitions */ +/*----------------------------------------------------------------------*/ + +/* FAT12/16 BIOS parameter block (64 bytes) */ +typedef struct { + __u8 jmp_boot[3]; + __u8 oem_name[8]; + + __u8 sect_size[2]; /* unaligned */ + __u8 sect_per_clus; + __le16 num_reserved; /* . */ + __u8 num_fats; + __u8 num_root_entries[2]; /* unaligned */ + __u8 num_sectors[2]; /* unaligned */ + __u8 media_type; + __le16 num_fat_sectors; + __le16 sectors_in_track; + __le16 num_heads; + __le32 num_hid_sectors; /* . */ + __le32 num_huge_sectors; + + __u8 phy_drv_no; + __u8 state; /* used by WindowsNT for mount state */ + __u8 ext_signature; + __u8 vol_serial[4]; + __u8 vol_label[11]; + __u8 vol_type[8]; + __le16 dummy; +} bpb16_t; + +/* FAT32 BIOS parameter block (64 bytes) */ +typedef struct { + __u8 jmp_boot[3]; + __u8 oem_name[8]; + + __u8 sect_size[2]; /* unaligned */ + __u8 sect_per_clus; + __le16 num_reserved; + __u8 num_fats; + __u8 num_root_entries[2]; /* unaligned */ + __u8 num_sectors[2]; /* unaligned */ + __u8 media_type; + __le16 num_fat_sectors; /* zero */ + __le16 sectors_in_track; + __le16 num_heads; + __le32 num_hid_sectors; /* . */ + __le32 num_huge_sectors; + + __le32 num_fat32_sectors; + __le16 ext_flags; + __u8 fs_version[2]; + __le32 root_cluster; /* . */ + __le16 fsinfo_sector; + __le16 backup_sector; + __le16 reserved[6]; /* . */ +} bpb32_t; + +/* FAT32 EXTEND BIOS parameter block (32 bytes) */ +typedef struct { + __u8 phy_drv_no; + __u8 state; /* used by WindowsNT for mount state */ + __u8 ext_signature; + __u8 vol_serial[4]; + __u8 vol_label[11]; + __u8 vol_type[8]; + __le16 dummy[3]; +} bsx32_t; + +/* EXFAT BIOS parameter block (64 bytes) */ +typedef struct { + __u8 jmp_boot[3]; + __u8 oem_name[8]; + __u8 res_zero[53]; +} bpb64_t; + +/* EXFAT EXTEND BIOS parameter block (56 bytes) */ +typedef struct { + __le64 vol_offset; + __le64 vol_length; + __le32 fat_offset; + __le32 fat_length; + __le32 clu_offset; + __le32 clu_count; + __le32 root_cluster; + __le32 vol_serial; + __u8 fs_version[2]; + __le16 vol_flags; + __u8 sect_size_bits; + __u8 sect_per_clus_bits; + __u8 num_fats; + __u8 phy_drv_no; + __u8 perc_in_use; + __u8 reserved2[7]; +} bsx64_t; + +/* FAT32 PBR (64 bytes) */ +typedef struct { + bpb16_t bpb; +} pbr16_t; + +/* FAT32 PBR[BPB+BSX] (96 bytes) */ +typedef struct { + bpb32_t bpb; + bsx32_t bsx; +} pbr32_t; + +/* EXFAT PBR[BPB+BSX] (120 bytes) */ +typedef struct { + bpb64_t bpb; + bsx64_t bsx; +} pbr64_t; + +/* Common PBR[Partition Boot Record] (512 bytes) */ +typedef struct { + union { + __u8 raw[64]; + bpb16_t f16; + bpb32_t f32; + bpb64_t f64; + } bpb; + union { + __u8 raw[56]; + bsx32_t f32; + bsx64_t f64; + } bsx; + __u8 boot_code[390]; + __le16 signature; +} pbr_t; + +/* FAT32 filesystem information sector (512 bytes) */ +typedef struct { + __le32 signature1; // aligned + __u8 reserved1[480]; + __le32 signature2; // aligned + __le32 free_cluster; // aligned + __le32 next_cluster; // aligned + __u8 reserved2[14]; + __le16 signature3[2]; +} fat32_fsi_t; + +/* FAT directory entry (32 bytes) */ +typedef struct { + __u8 dummy[32]; +} DENTRY_T; + +typedef struct { + __u8 name[DOS_NAME_LENGTH]; /* 11 chars */ + __u8 attr; + __u8 lcase; + __u8 create_time_ms; + __le16 create_time; // aligned + __le16 create_date; // aligned + __le16 access_date; // aligned + __le16 start_clu_hi; // aligned + __le16 modify_time; // aligned + __le16 modify_date; // aligned + __le16 start_clu_lo; // aligned + __le32 size; // aligned +} DOS_DENTRY_T; + +/* FAT extended directory entry (32 bytes) */ +typedef struct { + __u8 order; + __u8 unicode_0_4[10]; + __u8 attr; + __u8 sysid; + __u8 checksum; + __le16 unicode_5_10[6]; // aligned + __le16 start_clu; // aligned + __le16 unicode_11_12[2]; // aligned +} EXT_DENTRY_T; + +/* EXFAT file directory entry (32 bytes) */ +typedef struct { + __u8 type; + __u8 num_ext; + __le16 checksum; // aligned + __le16 attr; // aligned + __le16 reserved1; + __le16 create_time; // aligned + __le16 create_date; // aligned + __le16 modify_time; // aligned + __le16 modify_date; // aligned + __le16 access_time; // aligned + __le16 access_date; // aligned + __u8 create_time_ms; + __u8 modify_time_ms; + __u8 create_tz; + __u8 modify_tz; + __u8 access_tz; + __u8 reserved2[7]; +} FILE_DENTRY_T; + +/* EXFAT stream extension directory entry (32 bytes) */ +typedef struct { + __u8 type; + __u8 flags; + __u8 reserved1; + __u8 name_len; + __le16 name_hash; // aligned + __le16 reserved2; + __le64 valid_size; // aligned + __le32 reserved3; // aligned + __le32 start_clu; // aligned + __le64 size; // aligned +} STRM_DENTRY_T; + +/* EXFAT file name directory entry (32 bytes) */ +typedef struct { + __u8 type; + __u8 flags; + __le16 unicode_0_14[15]; // aligned +} NAME_DENTRY_T; + +/* EXFAT allocation bitmap directory entry (32 bytes) */ +typedef struct { + __u8 type; + __u8 flags; + __u8 reserved[18]; + __le32 start_clu; // aligned + __le64 size; // aligned +} BMAP_DENTRY_T; + +/* EXFAT up-case table directory entry (32 bytes) */ +typedef struct { + __u8 type; + __u8 reserved1[3]; + __le32 checksum; // aligned + __u8 reserved2[12]; + __le32 start_clu; // aligned + __le64 size; // aligned +} CASE_DENTRY_T; + +/* EXFAT volume label directory entry (32 bytes) */ +typedef struct { + __u8 type; + __u8 label_len; + __le16 unicode_0_10[11]; // aligned + __u8 reserved[8]; +} VOLM_DENTRY_T; + +#endif /* _SDFAT_FS_H */ diff --git a/fs/sdfat/statistics.c b/fs/sdfat/statistics.c new file mode 100644 index 000000000000..099d9a358a76 --- /dev/null +++ b/fs/sdfat/statistics.c @@ -0,0 +1,281 @@ +#include "sdfat.h" + +#define SDFAT_VF_CLUS_MAX 7 /* 512 Byte ~ 32 KByte */ +#define SDFAT_EF_CLUS_MAX 17 /* 512 Byte ~ 32 MByte */ + +enum { + SDFAT_MNT_FAT12, + SDFAT_MNT_FAT16, + SDFAT_MNT_FAT32, + SDFAT_MNT_EXFAT, + SDFAT_MNT_RO, + SDFAT_MNT_MAX +}; + +enum { + SDFAT_OP_EXFAT_MNT, + SDFAT_OP_MKDIR, + SDFAT_OP_CREATE, + SDFAT_OP_READ, + SDFAT_OP_WRITE, + SDFAT_OP_TRUNC, + SDFAT_OP_MAX +}; + +enum { + SDFAT_VOL_4G, + SDFAT_VOL_8G, + SDFAT_VOL_16G, + SDFAT_VOL_32G, + SDFAT_VOL_64G, + SDFAT_VOL_128G, + SDFAT_VOL_256G, + SDFAT_VOL_512G, + SDFAT_VOL_XTB, + SDFAT_VOL_MAX +}; + +static struct sdfat_statistics { + u32 clus_vfat[SDFAT_VF_CLUS_MAX]; + u32 clus_exfat[SDFAT_EF_CLUS_MAX]; + u32 mnt_cnt[SDFAT_MNT_MAX]; + u32 nofat_op[SDFAT_OP_MAX]; + u32 vol_size[SDFAT_VOL_MAX]; +} statistics; + +static struct kset *sdfat_statistics_kset; + +static ssize_t vfat_cl_show(struct kobject *kobj, + struct kobj_attribute *attr, char *buff) +{ + return snprintf(buff, PAGE_SIZE, "\"VCL_512B_I\":\"%u\"," + "\"VCL_1K_I\":\"%u\",\"VCL_2K_I\":\"%u\"," + "\"VCL_4K_I\":\"%u\",\"VCL_8K_I\":\"%u\"," + "\"VCL_16K_I\":\"%u\",\"VCL_32K_I\":\"%u\"\n", + statistics.clus_vfat[0], statistics.clus_vfat[1], + statistics.clus_vfat[2], statistics.clus_vfat[3], + statistics.clus_vfat[4], statistics.clus_vfat[5], + statistics.clus_vfat[6]); +} + +static ssize_t exfat_cl_show(struct kobject *kobj, + struct kobj_attribute *attr, char *buff) +{ + return snprintf(buff, PAGE_SIZE, "\"ECL_512B_I\":\"%u\"," + "\"ECL_1K_I\":\"%u\",\"ECL_2K_I\":\"%u\"," + "\"ECL_4K_I\":\"%u\",\"ECL_8K_I\":\"%u\"," + "\"ECL_16K_I\":\"%u\",\"ECL_32K_I\":\"%u\"," + "\"ECL_64K_I\":\"%u\",\"ECL_128K_I\":\"%u\"," + "\"ECL_256K_I\":\"%u\",\"ECL_512K_I\":\"%u\"," + "\"ECL_1M_I\":\"%u\",\"ECL_2M_I\":\"%u\"," + "\"ECL_4M_I\":\"%u\",\"ECL_8M_I\":\"%u\"," + "\"ECL_16M_I\":\"%u\",\"ECL_32M_I\":\"%u\"\n", + statistics.clus_exfat[0], statistics.clus_exfat[1], + statistics.clus_exfat[2], statistics.clus_exfat[3], + statistics.clus_exfat[4], statistics.clus_exfat[5], + statistics.clus_exfat[6], statistics.clus_exfat[7], + statistics.clus_exfat[8], statistics.clus_exfat[9], + statistics.clus_exfat[10], statistics.clus_exfat[11], + statistics.clus_exfat[12], statistics.clus_exfat[13], + statistics.clus_exfat[14], statistics.clus_exfat[15], + statistics.clus_exfat[16]); +} + +static ssize_t mount_show(struct kobject *kobj, + struct kobj_attribute *attr, char *buff) +{ + return snprintf(buff, PAGE_SIZE, "\"FAT12_MNT_I\":\"%u\"," + "\"FAT16_MNT_I\":\"%u\",\"FAT32_MNT_I\":\"%u\"," + "\"EXFAT_MNT_I\":\"%u\",\"RO_MNT_I\":\"%u\"\n", + statistics.mnt_cnt[SDFAT_MNT_FAT12], + statistics.mnt_cnt[SDFAT_MNT_FAT16], + statistics.mnt_cnt[SDFAT_MNT_FAT32], + statistics.mnt_cnt[SDFAT_MNT_EXFAT], + statistics.mnt_cnt[SDFAT_MNT_RO]); +} + +static ssize_t nofat_op_show(struct kobject *kobj, + struct kobj_attribute *attr, char *buff) +{ + return snprintf(buff, PAGE_SIZE, "\"NOFAT_MOUNT_I\":\"%u\"," + "\"NOFAT_MKDIR_I\":\"%u\",\"NOFAT_CREATE_I\":\"%u\"," + "\"NOFAT_READ_I\":\"%u\",\"NOFAT_WRITE_I\":\"%u\"," + "\"NOFAT_TRUNC_I\":\"%u\"\n", + statistics.nofat_op[SDFAT_OP_EXFAT_MNT], + statistics.nofat_op[SDFAT_OP_MKDIR], + statistics.nofat_op[SDFAT_OP_CREATE], + statistics.nofat_op[SDFAT_OP_READ], + statistics.nofat_op[SDFAT_OP_WRITE], + statistics.nofat_op[SDFAT_OP_TRUNC]); +} + +static ssize_t vol_size_show(struct kobject *kobj, + struct kobj_attribute *attr, char *buff) +{ + return snprintf(buff, PAGE_SIZE, "\"VOL_4G_I\":\"%u\"," + "\"VOL_8G_I\":\"%u\",\"VOL_16G_I\":\"%u\"," + "\"VOL_32G_I\":\"%u\",\"VOL_64G_I\":\"%u\"," + "\"VOL_128G_I\":\"%u\",\"VOL_256G_I\":\"%u\"," + "\"VOL_512G_I\":\"%u\",\"VOL_XTB_I\":\"%u\"\n", + statistics.vol_size[SDFAT_VOL_4G], + statistics.vol_size[SDFAT_VOL_8G], + statistics.vol_size[SDFAT_VOL_16G], + statistics.vol_size[SDFAT_VOL_32G], + statistics.vol_size[SDFAT_VOL_64G], + statistics.vol_size[SDFAT_VOL_128G], + statistics.vol_size[SDFAT_VOL_256G], + statistics.vol_size[SDFAT_VOL_512G], + statistics.vol_size[SDFAT_VOL_XTB]); +} + +static struct kobj_attribute vfat_cl_attr = __ATTR_RO(vfat_cl); +static struct kobj_attribute exfat_cl_attr = __ATTR_RO(exfat_cl); +static struct kobj_attribute mount_attr = __ATTR_RO(mount); +static struct kobj_attribute nofat_op_attr = __ATTR_RO(nofat_op); +static struct kobj_attribute vol_size_attr = __ATTR_RO(vol_size); + +static struct attribute *attributes_statistics[] = { + &vfat_cl_attr.attr, + &exfat_cl_attr.attr, + &mount_attr.attr, + &nofat_op_attr.attr, + &vol_size_attr.attr, + NULL, +}; + +static struct attribute_group attr_group_statistics = { + .attrs = attributes_statistics, +}; + +int sdfat_statistics_init(struct kset *sdfat_kset) +{ + int err; + + sdfat_statistics_kset = kset_create_and_add("statistics", NULL, &sdfat_kset->kobj); + if (!sdfat_statistics_kset) { + pr_err("[SDFAT] failed to create sdfat statistics kobj\n"); + return -ENOMEM; + } + + err = sysfs_create_group(&sdfat_statistics_kset->kobj, &attr_group_statistics); + if (err) { + pr_err("[SDFAT] failed to create sdfat statistics attributes\n"); + kset_unregister(sdfat_statistics_kset); + sdfat_statistics_kset = NULL; + return err; + } + + return 0; +} + +void sdfat_statistics_uninit(void) +{ + if (sdfat_statistics_kset) { + sysfs_remove_group(&sdfat_statistics_kset->kobj, &attr_group_statistics); + kset_unregister(sdfat_statistics_kset); + sdfat_statistics_kset = NULL; + } + memset(&statistics, 0, sizeof(struct sdfat_statistics)); +} + +void sdfat_statistics_set_mnt(FS_INFO_T *fsi) +{ + if (fsi->vol_type == EXFAT) { + statistics.mnt_cnt[SDFAT_MNT_EXFAT]++; + statistics.nofat_op[SDFAT_OP_EXFAT_MNT] = 1; + if (fsi->sect_per_clus_bits < SDFAT_EF_CLUS_MAX) + statistics.clus_exfat[fsi->sect_per_clus_bits]++; + else + statistics.clus_exfat[SDFAT_EF_CLUS_MAX - 1]++; + return; + } + + if (fsi->vol_type == FAT32) + statistics.mnt_cnt[SDFAT_MNT_FAT32]++; + else if (fsi->vol_type == FAT16) + statistics.mnt_cnt[SDFAT_MNT_FAT16]++; + else if (fsi->vol_type == FAT12) + statistics.mnt_cnt[SDFAT_MNT_FAT12]++; + + if (fsi->sect_per_clus_bits < SDFAT_VF_CLUS_MAX) + statistics.clus_vfat[fsi->sect_per_clus_bits]++; + else + statistics.clus_vfat[SDFAT_VF_CLUS_MAX - 1]++; +} + +void sdfat_statistics_set_mnt_ro(void) +{ + statistics.mnt_cnt[SDFAT_MNT_RO]++; +} + +void sdfat_statistics_set_mkdir(u8 flags) +{ + if (flags != 0x03) + return; + statistics.nofat_op[SDFAT_OP_MKDIR] = 1; +} + +void sdfat_statistics_set_create(u8 flags) +{ + if (flags != 0x03) + return; + statistics.nofat_op[SDFAT_OP_CREATE] = 1; +} + +/* flags : file or dir flgas, 0x03 means no fat-chain. + * clu_offset : file or dir logical cluster offset + * create : BMAP_ADD_CLUSTER or not + * + * File or dir have BMAP_ADD_CLUSTER is no fat-chain write + * when they have 0x03 flag and two or more clusters. + * And don`t have BMAP_ADD_CLUSTER is no fat-chain read + * when above same condition. + */ +void sdfat_statistics_set_rw(u8 flags, u32 clu_offset, s32 create) +{ + if ((flags == 0x03) && (clu_offset > 1)) { + if (create) + statistics.nofat_op[SDFAT_OP_WRITE] = 1; + else + statistics.nofat_op[SDFAT_OP_READ] = 1; + } +} + +/* flags : file or dir flgas, 0x03 means no fat-chain. + * clu : cluster chain + * + * Set no fat-chain trunc when file or dir have 0x03 flag + * and two or more clusters. + */ +void sdfat_statistics_set_trunc(u8 flags, CHAIN_T *clu) +{ + if ((flags == 0x03) && (clu->size > 1)) + statistics.nofat_op[SDFAT_OP_TRUNC] = 1; +} + +void sdfat_statistics_set_vol_size(struct super_block *sb) +{ + u64 vol_size; + FS_INFO_T *fsi = &(SDFAT_SB(sb)->fsi); + + vol_size = (u64)fsi->num_sectors << sb->s_blocksize_bits; + + if (vol_size <= ((u64)1 << 32)) + statistics.vol_size[SDFAT_VOL_4G]++; + else if (vol_size <= ((u64)1 << 33)) + statistics.vol_size[SDFAT_VOL_8G]++; + else if (vol_size <= ((u64)1 << 34)) + statistics.vol_size[SDFAT_VOL_16G]++; + else if (vol_size <= ((u64)1 << 35)) + statistics.vol_size[SDFAT_VOL_32G]++; + else if (vol_size <= ((u64)1 << 36)) + statistics.vol_size[SDFAT_VOL_64G]++; + else if (vol_size <= ((u64)1 << 37)) + statistics.vol_size[SDFAT_VOL_128G]++; + else if (vol_size <= ((u64)1 << 38)) + statistics.vol_size[SDFAT_VOL_256G]++; + else if (vol_size <= ((u64)1 << 39)) + statistics.vol_size[SDFAT_VOL_512G]++; + else + statistics.vol_size[SDFAT_VOL_XTB]++; +} diff --git a/fs/sdfat/upcase.h b/fs/sdfat/upcase.h new file mode 100644 index 000000000000..386772c57f8d --- /dev/null +++ b/fs/sdfat/upcase.h @@ -0,0 +1,407 @@ +/* + * Copyright (C) 2012-2013 Samsung Electronics Co., Ltd. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version 2 + * of the License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, see <http://www.gnu.org/licenses/>. + */ + +#ifndef _UPCASE_H +#define _UPCASE_H + +/* Upcase tabel macro */ +#define SDFAT_NUM_UPCASE 2918 +#define HIGH_INDEX_BIT (8) +#define HIGH_INDEX_MASK (0xFF00) +#define LOW_INDEX_BIT (16-HIGH_INDEX_BIT) +#define UTBL_ROW_COUNT (1<<LOW_INDEX_BIT) +#define UTBL_COL_COUNT (1<<HIGH_INDEX_BIT) + +static inline u16 get_col_index(u16 i) +{ + return i >> LOW_INDEX_BIT; +} +static inline u16 get_row_index(u16 i) +{ + return i & ~HIGH_INDEX_MASK; +} + + +static const u8 uni_def_upcase[SDFAT_NUM_UPCASE<<1] = { + 0x00, 0x00, 0x01, 0x00, 0x02, 0x00, 0x03, 0x00, 0x04, 0x00, 0x05, 0x00, 0x06, 0x00, 0x07, 0x00, + 0x08, 0x00, 0x09, 0x00, 0x0A, 0x00, 0x0B, 0x00, 0x0C, 0x00, 0x0D, 0x00, 0x0E, 0x00, 0x0F, 0x00, + 0x10, 0x00, 0x11, 0x00, 0x12, 0x00, 0x13, 0x00, 0x14, 0x00, 0x15, 0x00, 0x16, 0x00, 0x17, 0x00, + 0x18, 0x00, 0x19, 0x00, 0x1A, 0x00, 0x1B, 0x00, 0x1C, 0x00, 0x1D, 0x00, 0x1E, 0x00, 0x1F, 0x00, + 0x20, 0x00, 0x21, 0x00, 0x22, 0x00, 0x23, 0x00, 0x24, 0x00, 0x25, 0x00, 0x26, 0x00, 0x27, 0x00, + 0x28, 0x00, 0x29, 0x00, 0x2A, 0x00, 0x2B, 0x00, 0x2C, 0x00, 0x2D, 0x00, 0x2E, 0x00, 0x2F, 0x00, + 0x30, 0x00, 0x31, 0x00, 0x32, 0x00, 0x33, 0x00, 0x34, 0x00, 0x35, 0x00, 0x36, 0x00, 0x37, 0x00, + 0x38, 0x00, 0x39, 0x00, 0x3A, 0x00, 0x3B, 0x00, 0x3C, 0x00, 0x3D, 0x00, 0x3E, 0x00, 0x3F, 0x00, + 0x40, 0x00, 0x41, 0x00, 0x42, 0x00, 0x43, 0x00, 0x44, 0x00, 0x45, 0x00, 0x46, 0x00, 0x47, 0x00, + 0x48, 0x00, 0x49, 0x00, 0x4A, 0x00, 0x4B, 0x00, 0x4C, 0x00, 0x4D, 0x00, 0x4E, 0x00, 0x4F, 0x00, + 0x50, 0x00, 0x51, 0x00, 0x52, 0x00, 0x53, 0x00, 0x54, 0x00, 0x55, 0x00, 0x56, 0x00, 0x57, 0x00, + 0x58, 0x00, 0x59, 0x00, 0x5A, 0x00, 0x5B, 0x00, 0x5C, 0x00, 0x5D, 0x00, 0x5E, 0x00, 0x5F, 0x00, + 0x60, 0x00, 0x41, 0x00, 0x42, 0x00, 0x43, 0x00, 0x44, 0x00, 0x45, 0x00, 0x46, 0x00, 0x47, 0x00, + 0x48, 0x00, 0x49, 0x00, 0x4A, 0x00, 0x4B, 0x00, 0x4C, 0x00, 0x4D, 0x00, 0x4E, 0x00, 0x4F, 0x00, + 0x50, 0x00, 0x51, 0x00, 0x52, 0x00, 0x53, 0x00, 0x54, 0x00, 0x55, 0x00, 0x56, 0x00, 0x57, 0x00, + 0x58, 0x00, 0x59, 0x00, 0x5A, 0x00, 0x7B, 0x00, 0x7C, 0x00, 0x7D, 0x00, 0x7E, 0x00, 0x7F, 0x00, + 0x80, 0x00, 0x81, 0x00, 0x82, 0x00, 0x83, 0x00, 0x84, 0x00, 0x85, 0x00, 0x86, 0x00, 0x87, 0x00, + 0x88, 0x00, 0x89, 0x00, 0x8A, 0x00, 0x8B, 0x00, 0x8C, 0x00, 0x8D, 0x00, 0x8E, 0x00, 0x8F, 0x00, + 0x90, 0x00, 0x91, 0x00, 0x92, 0x00, 0x93, 0x00, 0x94, 0x00, 0x95, 0x00, 0x96, 0x00, 0x97, 0x00, + 0x98, 0x00, 0x99, 0x00, 0x9A, 0x00, 0x9B, 0x00, 0x9C, 0x00, 0x9D, 0x00, 0x9E, 0x00, 0x9F, 0x00, + 0xA0, 0x00, 0xA1, 0x00, 0xA2, 0x00, 0xA3, 0x00, 0xA4, 0x00, 0xA5, 0x00, 0xA6, 0x00, 0xA7, 0x00, + 0xA8, 0x00, 0xA9, 0x00, 0xAA, 0x00, 0xAB, 0x00, 0xAC, 0x00, 0xAD, 0x00, 0xAE, 0x00, 0xAF, 0x00, + 0xB0, 0x00, 0xB1, 0x00, 0xB2, 0x00, 0xB3, 0x00, 0xB4, 0x00, 0xB5, 0x00, 0xB6, 0x00, 0xB7, 0x00, + 0xB8, 0x00, 0xB9, 0x00, 0xBA, 0x00, 0xBB, 0x00, 0xBC, 0x00, 0xBD, 0x00, 0xBE, 0x00, 0xBF, 0x00, + 0xC0, 0x00, 0xC1, 0x00, 0xC2, 0x00, 0xC3, 0x00, 0xC4, 0x00, 0xC5, 0x00, 0xC6, 0x00, 0xC7, 0x00, + 0xC8, 0x00, 0xC9, 0x00, 0xCA, 0x00, 0xCB, 0x00, 0xCC, 0x00, 0xCD, 0x00, 0xCE, 0x00, 0xCF, 0x00, + 0xD0, 0x00, 0xD1, 0x00, 0xD2, 0x00, 0xD3, 0x00, 0xD4, 0x00, 0xD5, 0x00, 0xD6, 0x00, 0xD7, 0x00, + 0xD8, 0x00, 0xD9, 0x00, 0xDA, 0x00, 0xDB, 0x00, 0xDC, 0x00, 0xDD, 0x00, 0xDE, 0x00, 0xDF, 0x00, + 0xC0, 0x00, 0xC1, 0x00, 0xC2, 0x00, 0xC3, 0x00, 0xC4, 0x00, 0xC5, 0x00, 0xC6, 0x00, 0xC7, 0x00, + 0xC8, 0x00, 0xC9, 0x00, 0xCA, 0x00, 0xCB, 0x00, 0xCC, 0x00, 0xCD, 0x00, 0xCE, 0x00, 0xCF, 0x00, + 0xD0, 0x00, 0xD1, 0x00, 0xD2, 0x00, 0xD3, 0x00, 0xD4, 0x00, 0xD5, 0x00, 0xD6, 0x00, 0xF7, 0x00, + 0xD8, 0x00, 0xD9, 0x00, 0xDA, 0x00, 0xDB, 0x00, 0xDC, 0x00, 0xDD, 0x00, 0xDE, 0x00, 0x78, 0x01, + 0x00, 0x01, 0x00, 0x01, 0x02, 0x01, 0x02, 0x01, 0x04, 0x01, 0x04, 0x01, 0x06, 0x01, 0x06, 0x01, + 0x08, 0x01, 0x08, 0x01, 0x0A, 0x01, 0x0A, 0x01, 0x0C, 0x01, 0x0C, 0x01, 0x0E, 0x01, 0x0E, 0x01, + 0x10, 0x01, 0x10, 0x01, 0x12, 0x01, 0x12, 0x01, 0x14, 0x01, 0x14, 0x01, 0x16, 0x01, 0x16, 0x01, + 0x18, 0x01, 0x18, 0x01, 0x1A, 0x01, 0x1A, 0x01, 0x1C, 0x01, 0x1C, 0x01, 0x1E, 0x01, 0x1E, 0x01, + 0x20, 0x01, 0x20, 0x01, 0x22, 0x01, 0x22, 0x01, 0x24, 0x01, 0x24, 0x01, 0x26, 0x01, 0x26, 0x01, + 0x28, 0x01, 0x28, 0x01, 0x2A, 0x01, 0x2A, 0x01, 0x2C, 0x01, 0x2C, 0x01, 0x2E, 0x01, 0x2E, 0x01, + 0x30, 0x01, 0x31, 0x01, 0x32, 0x01, 0x32, 0x01, 0x34, 0x01, 0x34, 0x01, 0x36, 0x01, 0x36, 0x01, + 0x38, 0x01, 0x39, 0x01, 0x39, 0x01, 0x3B, 0x01, 0x3B, 0x01, 0x3D, 0x01, 0x3D, 0x01, 0x3F, 0x01, + 0x3F, 0x01, 0x41, 0x01, 0x41, 0x01, 0x43, 0x01, 0x43, 0x01, 0x45, 0x01, 0x45, 0x01, 0x47, 0x01, + 0x47, 0x01, 0x49, 0x01, 0x4A, 0x01, 0x4A, 0x01, 0x4C, 0x01, 0x4C, 0x01, 0x4E, 0x01, 0x4E, 0x01, + 0x50, 0x01, 0x50, 0x01, 0x52, 0x01, 0x52, 0x01, 0x54, 0x01, 0x54, 0x01, 0x56, 0x01, 0x56, 0x01, + 0x58, 0x01, 0x58, 0x01, 0x5A, 0x01, 0x5A, 0x01, 0x5C, 0x01, 0x5C, 0x01, 0x5E, 0x01, 0x5E, 0x01, + 0x60, 0x01, 0x60, 0x01, 0x62, 0x01, 0x62, 0x01, 0x64, 0x01, 0x64, 0x01, 0x66, 0x01, 0x66, 0x01, + 0x68, 0x01, 0x68, 0x01, 0x6A, 0x01, 0x6A, 0x01, 0x6C, 0x01, 0x6C, 0x01, 0x6E, 0x01, 0x6E, 0x01, + 0x70, 0x01, 0x70, 0x01, 0x72, 0x01, 0x72, 0x01, 0x74, 0x01, 0x74, 0x01, 0x76, 0x01, 0x76, 0x01, + 0x78, 0x01, 0x79, 0x01, 0x79, 0x01, 0x7B, 0x01, 0x7B, 0x01, 0x7D, 0x01, 0x7D, 0x01, 0x7F, 0x01, + 0x43, 0x02, 0x81, 0x01, 0x82, 0x01, 0x82, 0x01, 0x84, 0x01, 0x84, 0x01, 0x86, 0x01, 0x87, 0x01, + 0x87, 0x01, 0x89, 0x01, 0x8A, 0x01, 0x8B, 0x01, 0x8B, 0x01, 0x8D, 0x01, 0x8E, 0x01, 0x8F, 0x01, + 0x90, 0x01, 0x91, 0x01, 0x91, 0x01, 0x93, 0x01, 0x94, 0x01, 0xF6, 0x01, 0x96, 0x01, 0x97, 0x01, + 0x98, 0x01, 0x98, 0x01, 0x3D, 0x02, 0x9B, 0x01, 0x9C, 0x01, 0x9D, 0x01, 0x20, 0x02, 0x9F, 0x01, + 0xA0, 0x01, 0xA0, 0x01, 0xA2, 0x01, 0xA2, 0x01, 0xA4, 0x01, 0xA4, 0x01, 0xA6, 0x01, 0xA7, 0x01, + 0xA7, 0x01, 0xA9, 0x01, 0xAA, 0x01, 0xAB, 0x01, 0xAC, 0x01, 0xAC, 0x01, 0xAE, 0x01, 0xAF, 0x01, + 0xAF, 0x01, 0xB1, 0x01, 0xB2, 0x01, 0xB3, 0x01, 0xB3, 0x01, 0xB5, 0x01, 0xB5, 0x01, 0xB7, 0x01, + 0xB8, 0x01, 0xB8, 0x01, 0xBA, 0x01, 0xBB, 0x01, 0xBC, 0x01, 0xBC, 0x01, 0xBE, 0x01, 0xF7, 0x01, + 0xC0, 0x01, 0xC1, 0x01, 0xC2, 0x01, 0xC3, 0x01, 0xC4, 0x01, 0xC5, 0x01, 0xC4, 0x01, 0xC7, 0x01, + 0xC8, 0x01, 0xC7, 0x01, 0xCA, 0x01, 0xCB, 0x01, 0xCA, 0x01, 0xCD, 0x01, 0xCD, 0x01, 0xCF, 0x01, + 0xCF, 0x01, 0xD1, 0x01, 0xD1, 0x01, 0xD3, 0x01, 0xD3, 0x01, 0xD5, 0x01, 0xD5, 0x01, 0xD7, 0x01, + 0xD7, 0x01, 0xD9, 0x01, 0xD9, 0x01, 0xDB, 0x01, 0xDB, 0x01, 0x8E, 0x01, 0xDE, 0x01, 0xDE, 0x01, + 0xE0, 0x01, 0xE0, 0x01, 0xE2, 0x01, 0xE2, 0x01, 0xE4, 0x01, 0xE4, 0x01, 0xE6, 0x01, 0xE6, 0x01, + 0xE8, 0x01, 0xE8, 0x01, 0xEA, 0x01, 0xEA, 0x01, 0xEC, 0x01, 0xEC, 0x01, 0xEE, 0x01, 0xEE, 0x01, + 0xF0, 0x01, 0xF1, 0x01, 0xF2, 0x01, 0xF1, 0x01, 0xF4, 0x01, 0xF4, 0x01, 0xF6, 0x01, 0xF7, 0x01, + 0xF8, 0x01, 0xF8, 0x01, 0xFA, 0x01, 0xFA, 0x01, 0xFC, 0x01, 0xFC, 0x01, 0xFE, 0x01, 0xFE, 0x01, + 0x00, 0x02, 0x00, 0x02, 0x02, 0x02, 0x02, 0x02, 0x04, 0x02, 0x04, 0x02, 0x06, 0x02, 0x06, 0x02, + 0x08, 0x02, 0x08, 0x02, 0x0A, 0x02, 0x0A, 0x02, 0x0C, 0x02, 0x0C, 0x02, 0x0E, 0x02, 0x0E, 0x02, + 0x10, 0x02, 0x10, 0x02, 0x12, 0x02, 0x12, 0x02, 0x14, 0x02, 0x14, 0x02, 0x16, 0x02, 0x16, 0x02, + 0x18, 0x02, 0x18, 0x02, 0x1A, 0x02, 0x1A, 0x02, 0x1C, 0x02, 0x1C, 0x02, 0x1E, 0x02, 0x1E, 0x02, + 0x20, 0x02, 0x21, 0x02, 0x22, 0x02, 0x22, 0x02, 0x24, 0x02, 0x24, 0x02, 0x26, 0x02, 0x26, 0x02, + 0x28, 0x02, 0x28, 0x02, 0x2A, 0x02, 0x2A, 0x02, 0x2C, 0x02, 0x2C, 0x02, 0x2E, 0x02, 0x2E, 0x02, + 0x30, 0x02, 0x30, 0x02, 0x32, 0x02, 0x32, 0x02, 0x34, 0x02, 0x35, 0x02, 0x36, 0x02, 0x37, 0x02, + 0x38, 0x02, 0x39, 0x02, 0x65, 0x2C, 0x3B, 0x02, 0x3B, 0x02, 0x3D, 0x02, 0x66, 0x2C, 0x3F, 0x02, + 0x40, 0x02, 0x41, 0x02, 0x41, 0x02, 0x43, 0x02, 0x44, 0x02, 0x45, 0x02, 0x46, 0x02, 0x46, 0x02, + 0x48, 0x02, 0x48, 0x02, 0x4A, 0x02, 0x4A, 0x02, 0x4C, 0x02, 0x4C, 0x02, 0x4E, 0x02, 0x4E, 0x02, + 0x50, 0x02, 0x51, 0x02, 0x52, 0x02, 0x81, 0x01, 0x86, 0x01, 0x55, 0x02, 0x89, 0x01, 0x8A, 0x01, + 0x58, 0x02, 0x8F, 0x01, 0x5A, 0x02, 0x90, 0x01, 0x5C, 0x02, 0x5D, 0x02, 0x5E, 0x02, 0x5F, 0x02, + 0x93, 0x01, 0x61, 0x02, 0x62, 0x02, 0x94, 0x01, 0x64, 0x02, 0x65, 0x02, 0x66, 0x02, 0x67, 0x02, + 0x97, 0x01, 0x96, 0x01, 0x6A, 0x02, 0x62, 0x2C, 0x6C, 0x02, 0x6D, 0x02, 0x6E, 0x02, 0x9C, 0x01, + 0x70, 0x02, 0x71, 0x02, 0x9D, 0x01, 0x73, 0x02, 0x74, 0x02, 0x9F, 0x01, 0x76, 0x02, 0x77, 0x02, + 0x78, 0x02, 0x79, 0x02, 0x7A, 0x02, 0x7B, 0x02, 0x7C, 0x02, 0x64, 0x2C, 0x7E, 0x02, 0x7F, 0x02, + 0xA6, 0x01, 0x81, 0x02, 0x82, 0x02, 0xA9, 0x01, 0x84, 0x02, 0x85, 0x02, 0x86, 0x02, 0x87, 0x02, + 0xAE, 0x01, 0x44, 0x02, 0xB1, 0x01, 0xB2, 0x01, 0x45, 0x02, 0x8D, 0x02, 0x8E, 0x02, 0x8F, 0x02, + 0x90, 0x02, 0x91, 0x02, 0xB7, 0x01, 0x93, 0x02, 0x94, 0x02, 0x95, 0x02, 0x96, 0x02, 0x97, 0x02, + 0x98, 0x02, 0x99, 0x02, 0x9A, 0x02, 0x9B, 0x02, 0x9C, 0x02, 0x9D, 0x02, 0x9E, 0x02, 0x9F, 0x02, + 0xA0, 0x02, 0xA1, 0x02, 0xA2, 0x02, 0xA3, 0x02, 0xA4, 0x02, 0xA5, 0x02, 0xA6, 0x02, 0xA7, 0x02, + 0xA8, 0x02, 0xA9, 0x02, 0xAA, 0x02, 0xAB, 0x02, 0xAC, 0x02, 0xAD, 0x02, 0xAE, 0x02, 0xAF, 0x02, + 0xB0, 0x02, 0xB1, 0x02, 0xB2, 0x02, 0xB3, 0x02, 0xB4, 0x02, 0xB5, 0x02, 0xB6, 0x02, 0xB7, 0x02, + 0xB8, 0x02, 0xB9, 0x02, 0xBA, 0x02, 0xBB, 0x02, 0xBC, 0x02, 0xBD, 0x02, 0xBE, 0x02, 0xBF, 0x02, + 0xC0, 0x02, 0xC1, 0x02, 0xC2, 0x02, 0xC3, 0x02, 0xC4, 0x02, 0xC5, 0x02, 0xC6, 0x02, 0xC7, 0x02, + 0xC8, 0x02, 0xC9, 0x02, 0xCA, 0x02, 0xCB, 0x02, 0xCC, 0x02, 0xCD, 0x02, 0xCE, 0x02, 0xCF, 0x02, + 0xD0, 0x02, 0xD1, 0x02, 0xD2, 0x02, 0xD3, 0x02, 0xD4, 0x02, 0xD5, 0x02, 0xD6, 0x02, 0xD7, 0x02, + 0xD8, 0x02, 0xD9, 0x02, 0xDA, 0x02, 0xDB, 0x02, 0xDC, 0x02, 0xDD, 0x02, 0xDE, 0x02, 0xDF, 0x02, + 0xE0, 0x02, 0xE1, 0x02, 0xE2, 0x02, 0xE3, 0x02, 0xE4, 0x02, 0xE5, 0x02, 0xE6, 0x02, 0xE7, 0x02, + 0xE8, 0x02, 0xE9, 0x02, 0xEA, 0x02, 0xEB, 0x02, 0xEC, 0x02, 0xED, 0x02, 0xEE, 0x02, 0xEF, 0x02, + 0xF0, 0x02, 0xF1, 0x02, 0xF2, 0x02, 0xF3, 0x02, 0xF4, 0x02, 0xF5, 0x02, 0xF6, 0x02, 0xF7, 0x02, + 0xF8, 0x02, 0xF9, 0x02, 0xFA, 0x02, 0xFB, 0x02, 0xFC, 0x02, 0xFD, 0x02, 0xFE, 0x02, 0xFF, 0x02, + 0x00, 0x03, 0x01, 0x03, 0x02, 0x03, 0x03, 0x03, 0x04, 0x03, 0x05, 0x03, 0x06, 0x03, 0x07, 0x03, + 0x08, 0x03, 0x09, 0x03, 0x0A, 0x03, 0x0B, 0x03, 0x0C, 0x03, 0x0D, 0x03, 0x0E, 0x03, 0x0F, 0x03, + 0x10, 0x03, 0x11, 0x03, 0x12, 0x03, 0x13, 0x03, 0x14, 0x03, 0x15, 0x03, 0x16, 0x03, 0x17, 0x03, + 0x18, 0x03, 0x19, 0x03, 0x1A, 0x03, 0x1B, 0x03, 0x1C, 0x03, 0x1D, 0x03, 0x1E, 0x03, 0x1F, 0x03, + 0x20, 0x03, 0x21, 0x03, 0x22, 0x03, 0x23, 0x03, 0x24, 0x03, 0x25, 0x03, 0x26, 0x03, 0x27, 0x03, + 0x28, 0x03, 0x29, 0x03, 0x2A, 0x03, 0x2B, 0x03, 0x2C, 0x03, 0x2D, 0x03, 0x2E, 0x03, 0x2F, 0x03, + 0x30, 0x03, 0x31, 0x03, 0x32, 0x03, 0x33, 0x03, 0x34, 0x03, 0x35, 0x03, 0x36, 0x03, 0x37, 0x03, + 0x38, 0x03, 0x39, 0x03, 0x3A, 0x03, 0x3B, 0x03, 0x3C, 0x03, 0x3D, 0x03, 0x3E, 0x03, 0x3F, 0x03, + 0x40, 0x03, 0x41, 0x03, 0x42, 0x03, 0x43, 0x03, 0x44, 0x03, 0x45, 0x03, 0x46, 0x03, 0x47, 0x03, + 0x48, 0x03, 0x49, 0x03, 0x4A, 0x03, 0x4B, 0x03, 0x4C, 0x03, 0x4D, 0x03, 0x4E, 0x03, 0x4F, 0x03, + 0x50, 0x03, 0x51, 0x03, 0x52, 0x03, 0x53, 0x03, 0x54, 0x03, 0x55, 0x03, 0x56, 0x03, 0x57, 0x03, + 0x58, 0x03, 0x59, 0x03, 0x5A, 0x03, 0x5B, 0x03, 0x5C, 0x03, 0x5D, 0x03, 0x5E, 0x03, 0x5F, 0x03, + 0x60, 0x03, 0x61, 0x03, 0x62, 0x03, 0x63, 0x03, 0x64, 0x03, 0x65, 0x03, 0x66, 0x03, 0x67, 0x03, + 0x68, 0x03, 0x69, 0x03, 0x6A, 0x03, 0x6B, 0x03, 0x6C, 0x03, 0x6D, 0x03, 0x6E, 0x03, 0x6F, 0x03, + 0x70, 0x03, 0x71, 0x03, 0x72, 0x03, 0x73, 0x03, 0x74, 0x03, 0x75, 0x03, 0x76, 0x03, 0x77, 0x03, + 0x78, 0x03, 0x79, 0x03, 0x7A, 0x03, 0xFD, 0x03, 0xFE, 0x03, 0xFF, 0x03, 0x7E, 0x03, 0x7F, 0x03, + 0x80, 0x03, 0x81, 0x03, 0x82, 0x03, 0x83, 0x03, 0x84, 0x03, 0x85, 0x03, 0x86, 0x03, 0x87, 0x03, + 0x88, 0x03, 0x89, 0x03, 0x8A, 0x03, 0x8B, 0x03, 0x8C, 0x03, 0x8D, 0x03, 0x8E, 0x03, 0x8F, 0x03, + 0x90, 0x03, 0x91, 0x03, 0x92, 0x03, 0x93, 0x03, 0x94, 0x03, 0x95, 0x03, 0x96, 0x03, 0x97, 0x03, + 0x98, 0x03, 0x99, 0x03, 0x9A, 0x03, 0x9B, 0x03, 0x9C, 0x03, 0x9D, 0x03, 0x9E, 0x03, 0x9F, 0x03, + 0xA0, 0x03, 0xA1, 0x03, 0xA2, 0x03, 0xA3, 0x03, 0xA4, 0x03, 0xA5, 0x03, 0xA6, 0x03, 0xA7, 0x03, + 0xA8, 0x03, 0xA9, 0x03, 0xAA, 0x03, 0xAB, 0x03, 0x86, 0x03, 0x88, 0x03, 0x89, 0x03, 0x8A, 0x03, + 0xB0, 0x03, 0x91, 0x03, 0x92, 0x03, 0x93, 0x03, 0x94, 0x03, 0x95, 0x03, 0x96, 0x03, 0x97, 0x03, + 0x98, 0x03, 0x99, 0x03, 0x9A, 0x03, 0x9B, 0x03, 0x9C, 0x03, 0x9D, 0x03, 0x9E, 0x03, 0x9F, 0x03, + 0xA0, 0x03, 0xA1, 0x03, 0xA3, 0x03, 0xA3, 0x03, 0xA4, 0x03, 0xA5, 0x03, 0xA6, 0x03, 0xA7, 0x03, + 0xA8, 0x03, 0xA9, 0x03, 0xAA, 0x03, 0xAB, 0x03, 0x8C, 0x03, 0x8E, 0x03, 0x8F, 0x03, 0xCF, 0x03, + 0xD0, 0x03, 0xD1, 0x03, 0xD2, 0x03, 0xD3, 0x03, 0xD4, 0x03, 0xD5, 0x03, 0xD6, 0x03, 0xD7, 0x03, + 0xD8, 0x03, 0xD8, 0x03, 0xDA, 0x03, 0xDA, 0x03, 0xDC, 0x03, 0xDC, 0x03, 0xDE, 0x03, 0xDE, 0x03, + 0xE0, 0x03, 0xE0, 0x03, 0xE2, 0x03, 0xE2, 0x03, 0xE4, 0x03, 0xE4, 0x03, 0xE6, 0x03, 0xE6, 0x03, + 0xE8, 0x03, 0xE8, 0x03, 0xEA, 0x03, 0xEA, 0x03, 0xEC, 0x03, 0xEC, 0x03, 0xEE, 0x03, 0xEE, 0x03, + 0xF0, 0x03, 0xF1, 0x03, 0xF9, 0x03, 0xF3, 0x03, 0xF4, 0x03, 0xF5, 0x03, 0xF6, 0x03, 0xF7, 0x03, + 0xF7, 0x03, 0xF9, 0x03, 0xFA, 0x03, 0xFA, 0x03, 0xFC, 0x03, 0xFD, 0x03, 0xFE, 0x03, 0xFF, 0x03, + 0x00, 0x04, 0x01, 0x04, 0x02, 0x04, 0x03, 0x04, 0x04, 0x04, 0x05, 0x04, 0x06, 0x04, 0x07, 0x04, + 0x08, 0x04, 0x09, 0x04, 0x0A, 0x04, 0x0B, 0x04, 0x0C, 0x04, 0x0D, 0x04, 0x0E, 0x04, 0x0F, 0x04, + 0x10, 0x04, 0x11, 0x04, 0x12, 0x04, 0x13, 0x04, 0x14, 0x04, 0x15, 0x04, 0x16, 0x04, 0x17, 0x04, + 0x18, 0x04, 0x19, 0x04, 0x1A, 0x04, 0x1B, 0x04, 0x1C, 0x04, 0x1D, 0x04, 0x1E, 0x04, 0x1F, 0x04, + 0x20, 0x04, 0x21, 0x04, 0x22, 0x04, 0x23, 0x04, 0x24, 0x04, 0x25, 0x04, 0x26, 0x04, 0x27, 0x04, + 0x28, 0x04, 0x29, 0x04, 0x2A, 0x04, 0x2B, 0x04, 0x2C, 0x04, 0x2D, 0x04, 0x2E, 0x04, 0x2F, 0x04, + 0x10, 0x04, 0x11, 0x04, 0x12, 0x04, 0x13, 0x04, 0x14, 0x04, 0x15, 0x04, 0x16, 0x04, 0x17, 0x04, + 0x18, 0x04, 0x19, 0x04, 0x1A, 0x04, 0x1B, 0x04, 0x1C, 0x04, 0x1D, 0x04, 0x1E, 0x04, 0x1F, 0x04, + 0x20, 0x04, 0x21, 0x04, 0x22, 0x04, 0x23, 0x04, 0x24, 0x04, 0x25, 0x04, 0x26, 0x04, 0x27, 0x04, + 0x28, 0x04, 0x29, 0x04, 0x2A, 0x04, 0x2B, 0x04, 0x2C, 0x04, 0x2D, 0x04, 0x2E, 0x04, 0x2F, 0x04, + 0x00, 0x04, 0x01, 0x04, 0x02, 0x04, 0x03, 0x04, 0x04, 0x04, 0x05, 0x04, 0x06, 0x04, 0x07, 0x04, + 0x08, 0x04, 0x09, 0x04, 0x0A, 0x04, 0x0B, 0x04, 0x0C, 0x04, 0x0D, 0x04, 0x0E, 0x04, 0x0F, 0x04, + 0x60, 0x04, 0x60, 0x04, 0x62, 0x04, 0x62, 0x04, 0x64, 0x04, 0x64, 0x04, 0x66, 0x04, 0x66, 0x04, + 0x68, 0x04, 0x68, 0x04, 0x6A, 0x04, 0x6A, 0x04, 0x6C, 0x04, 0x6C, 0x04, 0x6E, 0x04, 0x6E, 0x04, + 0x70, 0x04, 0x70, 0x04, 0x72, 0x04, 0x72, 0x04, 0x74, 0x04, 0x74, 0x04, 0x76, 0x04, 0x76, 0x04, + 0x78, 0x04, 0x78, 0x04, 0x7A, 0x04, 0x7A, 0x04, 0x7C, 0x04, 0x7C, 0x04, 0x7E, 0x04, 0x7E, 0x04, + 0x80, 0x04, 0x80, 0x04, 0x82, 0x04, 0x83, 0x04, 0x84, 0x04, 0x85, 0x04, 0x86, 0x04, 0x87, 0x04, + 0x88, 0x04, 0x89, 0x04, 0x8A, 0x04, 0x8A, 0x04, 0x8C, 0x04, 0x8C, 0x04, 0x8E, 0x04, 0x8E, 0x04, + 0x90, 0x04, 0x90, 0x04, 0x92, 0x04, 0x92, 0x04, 0x94, 0x04, 0x94, 0x04, 0x96, 0x04, 0x96, 0x04, + 0x98, 0x04, 0x98, 0x04, 0x9A, 0x04, 0x9A, 0x04, 0x9C, 0x04, 0x9C, 0x04, 0x9E, 0x04, 0x9E, 0x04, + 0xA0, 0x04, 0xA0, 0x04, 0xA2, 0x04, 0xA2, 0x04, 0xA4, 0x04, 0xA4, 0x04, 0xA6, 0x04, 0xA6, 0x04, + 0xA8, 0x04, 0xA8, 0x04, 0xAA, 0x04, 0xAA, 0x04, 0xAC, 0x04, 0xAC, 0x04, 0xAE, 0x04, 0xAE, 0x04, + 0xB0, 0x04, 0xB0, 0x04, 0xB2, 0x04, 0xB2, 0x04, 0xB4, 0x04, 0xB4, 0x04, 0xB6, 0x04, 0xB6, 0x04, + 0xB8, 0x04, 0xB8, 0x04, 0xBA, 0x04, 0xBA, 0x04, 0xBC, 0x04, 0xBC, 0x04, 0xBE, 0x04, 0xBE, 0x04, + 0xC0, 0x04, 0xC1, 0x04, 0xC1, 0x04, 0xC3, 0x04, 0xC3, 0x04, 0xC5, 0x04, 0xC5, 0x04, 0xC7, 0x04, + 0xC7, 0x04, 0xC9, 0x04, 0xC9, 0x04, 0xCB, 0x04, 0xCB, 0x04, 0xCD, 0x04, 0xCD, 0x04, 0xC0, 0x04, + 0xD0, 0x04, 0xD0, 0x04, 0xD2, 0x04, 0xD2, 0x04, 0xD4, 0x04, 0xD4, 0x04, 0xD6, 0x04, 0xD6, 0x04, + 0xD8, 0x04, 0xD8, 0x04, 0xDA, 0x04, 0xDA, 0x04, 0xDC, 0x04, 0xDC, 0x04, 0xDE, 0x04, 0xDE, 0x04, + 0xE0, 0x04, 0xE0, 0x04, 0xE2, 0x04, 0xE2, 0x04, 0xE4, 0x04, 0xE4, 0x04, 0xE6, 0x04, 0xE6, 0x04, + 0xE8, 0x04, 0xE8, 0x04, 0xEA, 0x04, 0xEA, 0x04, 0xEC, 0x04, 0xEC, 0x04, 0xEE, 0x04, 0xEE, 0x04, + 0xF0, 0x04, 0xF0, 0x04, 0xF2, 0x04, 0xF2, 0x04, 0xF4, 0x04, 0xF4, 0x04, 0xF6, 0x04, 0xF6, 0x04, + 0xF8, 0x04, 0xF8, 0x04, 0xFA, 0x04, 0xFA, 0x04, 0xFC, 0x04, 0xFC, 0x04, 0xFE, 0x04, 0xFE, 0x04, + 0x00, 0x05, 0x00, 0x05, 0x02, 0x05, 0x02, 0x05, 0x04, 0x05, 0x04, 0x05, 0x06, 0x05, 0x06, 0x05, + 0x08, 0x05, 0x08, 0x05, 0x0A, 0x05, 0x0A, 0x05, 0x0C, 0x05, 0x0C, 0x05, 0x0E, 0x05, 0x0E, 0x05, + 0x10, 0x05, 0x10, 0x05, 0x12, 0x05, 0x12, 0x05, 0x14, 0x05, 0x15, 0x05, 0x16, 0x05, 0x17, 0x05, + 0x18, 0x05, 0x19, 0x05, 0x1A, 0x05, 0x1B, 0x05, 0x1C, 0x05, 0x1D, 0x05, 0x1E, 0x05, 0x1F, 0x05, + 0x20, 0x05, 0x21, 0x05, 0x22, 0x05, 0x23, 0x05, 0x24, 0x05, 0x25, 0x05, 0x26, 0x05, 0x27, 0x05, + 0x28, 0x05, 0x29, 0x05, 0x2A, 0x05, 0x2B, 0x05, 0x2C, 0x05, 0x2D, 0x05, 0x2E, 0x05, 0x2F, 0x05, + 0x30, 0x05, 0x31, 0x05, 0x32, 0x05, 0x33, 0x05, 0x34, 0x05, 0x35, 0x05, 0x36, 0x05, 0x37, 0x05, + 0x38, 0x05, 0x39, 0x05, 0x3A, 0x05, 0x3B, 0x05, 0x3C, 0x05, 0x3D, 0x05, 0x3E, 0x05, 0x3F, 0x05, + 0x40, 0x05, 0x41, 0x05, 0x42, 0x05, 0x43, 0x05, 0x44, 0x05, 0x45, 0x05, 0x46, 0x05, 0x47, 0x05, + 0x48, 0x05, 0x49, 0x05, 0x4A, 0x05, 0x4B, 0x05, 0x4C, 0x05, 0x4D, 0x05, 0x4E, 0x05, 0x4F, 0x05, + 0x50, 0x05, 0x51, 0x05, 0x52, 0x05, 0x53, 0x05, 0x54, 0x05, 0x55, 0x05, 0x56, 0x05, 0x57, 0x05, + 0x58, 0x05, 0x59, 0x05, 0x5A, 0x05, 0x5B, 0x05, 0x5C, 0x05, 0x5D, 0x05, 0x5E, 0x05, 0x5F, 0x05, + 0x60, 0x05, 0x31, 0x05, 0x32, 0x05, 0x33, 0x05, 0x34, 0x05, 0x35, 0x05, 0x36, 0x05, 0x37, 0x05, + 0x38, 0x05, 0x39, 0x05, 0x3A, 0x05, 0x3B, 0x05, 0x3C, 0x05, 0x3D, 0x05, 0x3E, 0x05, 0x3F, 0x05, + 0x40, 0x05, 0x41, 0x05, 0x42, 0x05, 0x43, 0x05, 0x44, 0x05, 0x45, 0x05, 0x46, 0x05, 0x47, 0x05, + 0x48, 0x05, 0x49, 0x05, 0x4A, 0x05, 0x4B, 0x05, 0x4C, 0x05, 0x4D, 0x05, 0x4E, 0x05, 0x4F, 0x05, + 0x50, 0x05, 0x51, 0x05, 0x52, 0x05, 0x53, 0x05, 0x54, 0x05, 0x55, 0x05, 0x56, 0x05, 0xFF, 0xFF, + 0xF6, 0x17, 0x63, 0x2C, 0x7E, 0x1D, 0x7F, 0x1D, 0x80, 0x1D, 0x81, 0x1D, 0x82, 0x1D, 0x83, 0x1D, + 0x84, 0x1D, 0x85, 0x1D, 0x86, 0x1D, 0x87, 0x1D, 0x88, 0x1D, 0x89, 0x1D, 0x8A, 0x1D, 0x8B, 0x1D, + 0x8C, 0x1D, 0x8D, 0x1D, 0x8E, 0x1D, 0x8F, 0x1D, 0x90, 0x1D, 0x91, 0x1D, 0x92, 0x1D, 0x93, 0x1D, + 0x94, 0x1D, 0x95, 0x1D, 0x96, 0x1D, 0x97, 0x1D, 0x98, 0x1D, 0x99, 0x1D, 0x9A, 0x1D, 0x9B, 0x1D, + 0x9C, 0x1D, 0x9D, 0x1D, 0x9E, 0x1D, 0x9F, 0x1D, 0xA0, 0x1D, 0xA1, 0x1D, 0xA2, 0x1D, 0xA3, 0x1D, + 0xA4, 0x1D, 0xA5, 0x1D, 0xA6, 0x1D, 0xA7, 0x1D, 0xA8, 0x1D, 0xA9, 0x1D, 0xAA, 0x1D, 0xAB, 0x1D, + 0xAC, 0x1D, 0xAD, 0x1D, 0xAE, 0x1D, 0xAF, 0x1D, 0xB0, 0x1D, 0xB1, 0x1D, 0xB2, 0x1D, 0xB3, 0x1D, + 0xB4, 0x1D, 0xB5, 0x1D, 0xB6, 0x1D, 0xB7, 0x1D, 0xB8, 0x1D, 0xB9, 0x1D, 0xBA, 0x1D, 0xBB, 0x1D, + 0xBC, 0x1D, 0xBD, 0x1D, 0xBE, 0x1D, 0xBF, 0x1D, 0xC0, 0x1D, 0xC1, 0x1D, 0xC2, 0x1D, 0xC3, 0x1D, + 0xC4, 0x1D, 0xC5, 0x1D, 0xC6, 0x1D, 0xC7, 0x1D, 0xC8, 0x1D, 0xC9, 0x1D, 0xCA, 0x1D, 0xCB, 0x1D, + 0xCC, 0x1D, 0xCD, 0x1D, 0xCE, 0x1D, 0xCF, 0x1D, 0xD0, 0x1D, 0xD1, 0x1D, 0xD2, 0x1D, 0xD3, 0x1D, + 0xD4, 0x1D, 0xD5, 0x1D, 0xD6, 0x1D, 0xD7, 0x1D, 0xD8, 0x1D, 0xD9, 0x1D, 0xDA, 0x1D, 0xDB, 0x1D, + 0xDC, 0x1D, 0xDD, 0x1D, 0xDE, 0x1D, 0xDF, 0x1D, 0xE0, 0x1D, 0xE1, 0x1D, 0xE2, 0x1D, 0xE3, 0x1D, + 0xE4, 0x1D, 0xE5, 0x1D, 0xE6, 0x1D, 0xE7, 0x1D, 0xE8, 0x1D, 0xE9, 0x1D, 0xEA, 0x1D, 0xEB, 0x1D, + 0xEC, 0x1D, 0xED, 0x1D, 0xEE, 0x1D, 0xEF, 0x1D, 0xF0, 0x1D, 0xF1, 0x1D, 0xF2, 0x1D, 0xF3, 0x1D, + 0xF4, 0x1D, 0xF5, 0x1D, 0xF6, 0x1D, 0xF7, 0x1D, 0xF8, 0x1D, 0xF9, 0x1D, 0xFA, 0x1D, 0xFB, 0x1D, + 0xFC, 0x1D, 0xFD, 0x1D, 0xFE, 0x1D, 0xFF, 0x1D, 0x00, 0x1E, 0x00, 0x1E, 0x02, 0x1E, 0x02, 0x1E, + 0x04, 0x1E, 0x04, 0x1E, 0x06, 0x1E, 0x06, 0x1E, 0x08, 0x1E, 0x08, 0x1E, 0x0A, 0x1E, 0x0A, 0x1E, + 0x0C, 0x1E, 0x0C, 0x1E, 0x0E, 0x1E, 0x0E, 0x1E, 0x10, 0x1E, 0x10, 0x1E, 0x12, 0x1E, 0x12, 0x1E, + 0x14, 0x1E, 0x14, 0x1E, 0x16, 0x1E, 0x16, 0x1E, 0x18, 0x1E, 0x18, 0x1E, 0x1A, 0x1E, 0x1A, 0x1E, + 0x1C, 0x1E, 0x1C, 0x1E, 0x1E, 0x1E, 0x1E, 0x1E, 0x20, 0x1E, 0x20, 0x1E, 0x22, 0x1E, 0x22, 0x1E, + 0x24, 0x1E, 0x24, 0x1E, 0x26, 0x1E, 0x26, 0x1E, 0x28, 0x1E, 0x28, 0x1E, 0x2A, 0x1E, 0x2A, 0x1E, + 0x2C, 0x1E, 0x2C, 0x1E, 0x2E, 0x1E, 0x2E, 0x1E, 0x30, 0x1E, 0x30, 0x1E, 0x32, 0x1E, 0x32, 0x1E, + 0x34, 0x1E, 0x34, 0x1E, 0x36, 0x1E, 0x36, 0x1E, 0x38, 0x1E, 0x38, 0x1E, 0x3A, 0x1E, 0x3A, 0x1E, + 0x3C, 0x1E, 0x3C, 0x1E, 0x3E, 0x1E, 0x3E, 0x1E, 0x40, 0x1E, 0x40, 0x1E, 0x42, 0x1E, 0x42, 0x1E, + 0x44, 0x1E, 0x44, 0x1E, 0x46, 0x1E, 0x46, 0x1E, 0x48, 0x1E, 0x48, 0x1E, 0x4A, 0x1E, 0x4A, 0x1E, + 0x4C, 0x1E, 0x4C, 0x1E, 0x4E, 0x1E, 0x4E, 0x1E, 0x50, 0x1E, 0x50, 0x1E, 0x52, 0x1E, 0x52, 0x1E, + 0x54, 0x1E, 0x54, 0x1E, 0x56, 0x1E, 0x56, 0x1E, 0x58, 0x1E, 0x58, 0x1E, 0x5A, 0x1E, 0x5A, 0x1E, + 0x5C, 0x1E, 0x5C, 0x1E, 0x5E, 0x1E, 0x5E, 0x1E, 0x60, 0x1E, 0x60, 0x1E, 0x62, 0x1E, 0x62, 0x1E, + 0x64, 0x1E, 0x64, 0x1E, 0x66, 0x1E, 0x66, 0x1E, 0x68, 0x1E, 0x68, 0x1E, 0x6A, 0x1E, 0x6A, 0x1E, + 0x6C, 0x1E, 0x6C, 0x1E, 0x6E, 0x1E, 0x6E, 0x1E, 0x70, 0x1E, 0x70, 0x1E, 0x72, 0x1E, 0x72, 0x1E, + 0x74, 0x1E, 0x74, 0x1E, 0x76, 0x1E, 0x76, 0x1E, 0x78, 0x1E, 0x78, 0x1E, 0x7A, 0x1E, 0x7A, 0x1E, + 0x7C, 0x1E, 0x7C, 0x1E, 0x7E, 0x1E, 0x7E, 0x1E, 0x80, 0x1E, 0x80, 0x1E, 0x82, 0x1E, 0x82, 0x1E, + 0x84, 0x1E, 0x84, 0x1E, 0x86, 0x1E, 0x86, 0x1E, 0x88, 0x1E, 0x88, 0x1E, 0x8A, 0x1E, 0x8A, 0x1E, + 0x8C, 0x1E, 0x8C, 0x1E, 0x8E, 0x1E, 0x8E, 0x1E, 0x90, 0x1E, 0x90, 0x1E, 0x92, 0x1E, 0x92, 0x1E, + 0x94, 0x1E, 0x94, 0x1E, 0x96, 0x1E, 0x97, 0x1E, 0x98, 0x1E, 0x99, 0x1E, 0x9A, 0x1E, 0x9B, 0x1E, + 0x9C, 0x1E, 0x9D, 0x1E, 0x9E, 0x1E, 0x9F, 0x1E, 0xA0, 0x1E, 0xA0, 0x1E, 0xA2, 0x1E, 0xA2, 0x1E, + 0xA4, 0x1E, 0xA4, 0x1E, 0xA6, 0x1E, 0xA6, 0x1E, 0xA8, 0x1E, 0xA8, 0x1E, 0xAA, 0x1E, 0xAA, 0x1E, + 0xAC, 0x1E, 0xAC, 0x1E, 0xAE, 0x1E, 0xAE, 0x1E, 0xB0, 0x1E, 0xB0, 0x1E, 0xB2, 0x1E, 0xB2, 0x1E, + 0xB4, 0x1E, 0xB4, 0x1E, 0xB6, 0x1E, 0xB6, 0x1E, 0xB8, 0x1E, 0xB8, 0x1E, 0xBA, 0x1E, 0xBA, 0x1E, + 0xBC, 0x1E, 0xBC, 0x1E, 0xBE, 0x1E, 0xBE, 0x1E, 0xC0, 0x1E, 0xC0, 0x1E, 0xC2, 0x1E, 0xC2, 0x1E, + 0xC4, 0x1E, 0xC4, 0x1E, 0xC6, 0x1E, 0xC6, 0x1E, 0xC8, 0x1E, 0xC8, 0x1E, 0xCA, 0x1E, 0xCA, 0x1E, + 0xCC, 0x1E, 0xCC, 0x1E, 0xCE, 0x1E, 0xCE, 0x1E, 0xD0, 0x1E, 0xD0, 0x1E, 0xD2, 0x1E, 0xD2, 0x1E, + 0xD4, 0x1E, 0xD4, 0x1E, 0xD6, 0x1E, 0xD6, 0x1E, 0xD8, 0x1E, 0xD8, 0x1E, 0xDA, 0x1E, 0xDA, 0x1E, + 0xDC, 0x1E, 0xDC, 0x1E, 0xDE, 0x1E, 0xDE, 0x1E, 0xE0, 0x1E, 0xE0, 0x1E, 0xE2, 0x1E, 0xE2, 0x1E, + 0xE4, 0x1E, 0xE4, 0x1E, 0xE6, 0x1E, 0xE6, 0x1E, 0xE8, 0x1E, 0xE8, 0x1E, 0xEA, 0x1E, 0xEA, 0x1E, + 0xEC, 0x1E, 0xEC, 0x1E, 0xEE, 0x1E, 0xEE, 0x1E, 0xF0, 0x1E, 0xF0, 0x1E, 0xF2, 0x1E, 0xF2, 0x1E, + 0xF4, 0x1E, 0xF4, 0x1E, 0xF6, 0x1E, 0xF6, 0x1E, 0xF8, 0x1E, 0xF8, 0x1E, 0xFA, 0x1E, 0xFB, 0x1E, + 0xFC, 0x1E, 0xFD, 0x1E, 0xFE, 0x1E, 0xFF, 0x1E, 0x08, 0x1F, 0x09, 0x1F, 0x0A, 0x1F, 0x0B, 0x1F, + 0x0C, 0x1F, 0x0D, 0x1F, 0x0E, 0x1F, 0x0F, 0x1F, 0x08, 0x1F, 0x09, 0x1F, 0x0A, 0x1F, 0x0B, 0x1F, + 0x0C, 0x1F, 0x0D, 0x1F, 0x0E, 0x1F, 0x0F, 0x1F, 0x18, 0x1F, 0x19, 0x1F, 0x1A, 0x1F, 0x1B, 0x1F, + 0x1C, 0x1F, 0x1D, 0x1F, 0x16, 0x1F, 0x17, 0x1F, 0x18, 0x1F, 0x19, 0x1F, 0x1A, 0x1F, 0x1B, 0x1F, + 0x1C, 0x1F, 0x1D, 0x1F, 0x1E, 0x1F, 0x1F, 0x1F, 0x28, 0x1F, 0x29, 0x1F, 0x2A, 0x1F, 0x2B, 0x1F, + 0x2C, 0x1F, 0x2D, 0x1F, 0x2E, 0x1F, 0x2F, 0x1F, 0x28, 0x1F, 0x29, 0x1F, 0x2A, 0x1F, 0x2B, 0x1F, + 0x2C, 0x1F, 0x2D, 0x1F, 0x2E, 0x1F, 0x2F, 0x1F, 0x38, 0x1F, 0x39, 0x1F, 0x3A, 0x1F, 0x3B, 0x1F, + 0x3C, 0x1F, 0x3D, 0x1F, 0x3E, 0x1F, 0x3F, 0x1F, 0x38, 0x1F, 0x39, 0x1F, 0x3A, 0x1F, 0x3B, 0x1F, + 0x3C, 0x1F, 0x3D, 0x1F, 0x3E, 0x1F, 0x3F, 0x1F, 0x48, 0x1F, 0x49, 0x1F, 0x4A, 0x1F, 0x4B, 0x1F, + 0x4C, 0x1F, 0x4D, 0x1F, 0x46, 0x1F, 0x47, 0x1F, 0x48, 0x1F, 0x49, 0x1F, 0x4A, 0x1F, 0x4B, 0x1F, + 0x4C, 0x1F, 0x4D, 0x1F, 0x4E, 0x1F, 0x4F, 0x1F, 0x50, 0x1F, 0x59, 0x1F, 0x52, 0x1F, 0x5B, 0x1F, + 0x54, 0x1F, 0x5D, 0x1F, 0x56, 0x1F, 0x5F, 0x1F, 0x58, 0x1F, 0x59, 0x1F, 0x5A, 0x1F, 0x5B, 0x1F, + 0x5C, 0x1F, 0x5D, 0x1F, 0x5E, 0x1F, 0x5F, 0x1F, 0x68, 0x1F, 0x69, 0x1F, 0x6A, 0x1F, 0x6B, 0x1F, + 0x6C, 0x1F, 0x6D, 0x1F, 0x6E, 0x1F, 0x6F, 0x1F, 0x68, 0x1F, 0x69, 0x1F, 0x6A, 0x1F, 0x6B, 0x1F, + 0x6C, 0x1F, 0x6D, 0x1F, 0x6E, 0x1F, 0x6F, 0x1F, 0xBA, 0x1F, 0xBB, 0x1F, 0xC8, 0x1F, 0xC9, 0x1F, + 0xCA, 0x1F, 0xCB, 0x1F, 0xDA, 0x1F, 0xDB, 0x1F, 0xF8, 0x1F, 0xF9, 0x1F, 0xEA, 0x1F, 0xEB, 0x1F, + 0xFA, 0x1F, 0xFB, 0x1F, 0x7E, 0x1F, 0x7F, 0x1F, 0x88, 0x1F, 0x89, 0x1F, 0x8A, 0x1F, 0x8B, 0x1F, + 0x8C, 0x1F, 0x8D, 0x1F, 0x8E, 0x1F, 0x8F, 0x1F, 0x88, 0x1F, 0x89, 0x1F, 0x8A, 0x1F, 0x8B, 0x1F, + 0x8C, 0x1F, 0x8D, 0x1F, 0x8E, 0x1F, 0x8F, 0x1F, 0x98, 0x1F, 0x99, 0x1F, 0x9A, 0x1F, 0x9B, 0x1F, + 0x9C, 0x1F, 0x9D, 0x1F, 0x9E, 0x1F, 0x9F, 0x1F, 0x98, 0x1F, 0x99, 0x1F, 0x9A, 0x1F, 0x9B, 0x1F, + 0x9C, 0x1F, 0x9D, 0x1F, 0x9E, 0x1F, 0x9F, 0x1F, 0xA8, 0x1F, 0xA9, 0x1F, 0xAA, 0x1F, 0xAB, 0x1F, + 0xAC, 0x1F, 0xAD, 0x1F, 0xAE, 0x1F, 0xAF, 0x1F, 0xA8, 0x1F, 0xA9, 0x1F, 0xAA, 0x1F, 0xAB, 0x1F, + 0xAC, 0x1F, 0xAD, 0x1F, 0xAE, 0x1F, 0xAF, 0x1F, 0xB8, 0x1F, 0xB9, 0x1F, 0xB2, 0x1F, 0xBC, 0x1F, + 0xB4, 0x1F, 0xB5, 0x1F, 0xB6, 0x1F, 0xB7, 0x1F, 0xB8, 0x1F, 0xB9, 0x1F, 0xBA, 0x1F, 0xBB, 0x1F, + 0xBC, 0x1F, 0xBD, 0x1F, 0xBE, 0x1F, 0xBF, 0x1F, 0xC0, 0x1F, 0xC1, 0x1F, 0xC2, 0x1F, 0xC3, 0x1F, + 0xC4, 0x1F, 0xC5, 0x1F, 0xC6, 0x1F, 0xC7, 0x1F, 0xC8, 0x1F, 0xC9, 0x1F, 0xCA, 0x1F, 0xCB, 0x1F, + 0xC3, 0x1F, 0xCD, 0x1F, 0xCE, 0x1F, 0xCF, 0x1F, 0xD8, 0x1F, 0xD9, 0x1F, 0xD2, 0x1F, 0xD3, 0x1F, + 0xD4, 0x1F, 0xD5, 0x1F, 0xD6, 0x1F, 0xD7, 0x1F, 0xD8, 0x1F, 0xD9, 0x1F, 0xDA, 0x1F, 0xDB, 0x1F, + 0xDC, 0x1F, 0xDD, 0x1F, 0xDE, 0x1F, 0xDF, 0x1F, 0xE8, 0x1F, 0xE9, 0x1F, 0xE2, 0x1F, 0xE3, 0x1F, + 0xE4, 0x1F, 0xEC, 0x1F, 0xE6, 0x1F, 0xE7, 0x1F, 0xE8, 0x1F, 0xE9, 0x1F, 0xEA, 0x1F, 0xEB, 0x1F, + 0xEC, 0x1F, 0xED, 0x1F, 0xEE, 0x1F, 0xEF, 0x1F, 0xF0, 0x1F, 0xF1, 0x1F, 0xF2, 0x1F, 0xF3, 0x1F, + 0xF4, 0x1F, 0xF5, 0x1F, 0xF6, 0x1F, 0xF7, 0x1F, 0xF8, 0x1F, 0xF9, 0x1F, 0xFA, 0x1F, 0xFB, 0x1F, + 0xF3, 0x1F, 0xFD, 0x1F, 0xFE, 0x1F, 0xFF, 0x1F, 0x00, 0x20, 0x01, 0x20, 0x02, 0x20, 0x03, 0x20, + 0x04, 0x20, 0x05, 0x20, 0x06, 0x20, 0x07, 0x20, 0x08, 0x20, 0x09, 0x20, 0x0A, 0x20, 0x0B, 0x20, + 0x0C, 0x20, 0x0D, 0x20, 0x0E, 0x20, 0x0F, 0x20, 0x10, 0x20, 0x11, 0x20, 0x12, 0x20, 0x13, 0x20, + 0x14, 0x20, 0x15, 0x20, 0x16, 0x20, 0x17, 0x20, 0x18, 0x20, 0x19, 0x20, 0x1A, 0x20, 0x1B, 0x20, + 0x1C, 0x20, 0x1D, 0x20, 0x1E, 0x20, 0x1F, 0x20, 0x20, 0x20, 0x21, 0x20, 0x22, 0x20, 0x23, 0x20, + 0x24, 0x20, 0x25, 0x20, 0x26, 0x20, 0x27, 0x20, 0x28, 0x20, 0x29, 0x20, 0x2A, 0x20, 0x2B, 0x20, + 0x2C, 0x20, 0x2D, 0x20, 0x2E, 0x20, 0x2F, 0x20, 0x30, 0x20, 0x31, 0x20, 0x32, 0x20, 0x33, 0x20, + 0x34, 0x20, 0x35, 0x20, 0x36, 0x20, 0x37, 0x20, 0x38, 0x20, 0x39, 0x20, 0x3A, 0x20, 0x3B, 0x20, + 0x3C, 0x20, 0x3D, 0x20, 0x3E, 0x20, 0x3F, 0x20, 0x40, 0x20, 0x41, 0x20, 0x42, 0x20, 0x43, 0x20, + 0x44, 0x20, 0x45, 0x20, 0x46, 0x20, 0x47, 0x20, 0x48, 0x20, 0x49, 0x20, 0x4A, 0x20, 0x4B, 0x20, + 0x4C, 0x20, 0x4D, 0x20, 0x4E, 0x20, 0x4F, 0x20, 0x50, 0x20, 0x51, 0x20, 0x52, 0x20, 0x53, 0x20, + 0x54, 0x20, 0x55, 0x20, 0x56, 0x20, 0x57, 0x20, 0x58, 0x20, 0x59, 0x20, 0x5A, 0x20, 0x5B, 0x20, + 0x5C, 0x20, 0x5D, 0x20, 0x5E, 0x20, 0x5F, 0x20, 0x60, 0x20, 0x61, 0x20, 0x62, 0x20, 0x63, 0x20, + 0x64, 0x20, 0x65, 0x20, 0x66, 0x20, 0x67, 0x20, 0x68, 0x20, 0x69, 0x20, 0x6A, 0x20, 0x6B, 0x20, + 0x6C, 0x20, 0x6D, 0x20, 0x6E, 0x20, 0x6F, 0x20, 0x70, 0x20, 0x71, 0x20, 0x72, 0x20, 0x73, 0x20, + 0x74, 0x20, 0x75, 0x20, 0x76, 0x20, 0x77, 0x20, 0x78, 0x20, 0x79, 0x20, 0x7A, 0x20, 0x7B, 0x20, + 0x7C, 0x20, 0x7D, 0x20, 0x7E, 0x20, 0x7F, 0x20, 0x80, 0x20, 0x81, 0x20, 0x82, 0x20, 0x83, 0x20, + 0x84, 0x20, 0x85, 0x20, 0x86, 0x20, 0x87, 0x20, 0x88, 0x20, 0x89, 0x20, 0x8A, 0x20, 0x8B, 0x20, + 0x8C, 0x20, 0x8D, 0x20, 0x8E, 0x20, 0x8F, 0x20, 0x90, 0x20, 0x91, 0x20, 0x92, 0x20, 0x93, 0x20, + 0x94, 0x20, 0x95, 0x20, 0x96, 0x20, 0x97, 0x20, 0x98, 0x20, 0x99, 0x20, 0x9A, 0x20, 0x9B, 0x20, + 0x9C, 0x20, 0x9D, 0x20, 0x9E, 0x20, 0x9F, 0x20, 0xA0, 0x20, 0xA1, 0x20, 0xA2, 0x20, 0xA3, 0x20, + 0xA4, 0x20, 0xA5, 0x20, 0xA6, 0x20, 0xA7, 0x20, 0xA8, 0x20, 0xA9, 0x20, 0xAA, 0x20, 0xAB, 0x20, + 0xAC, 0x20, 0xAD, 0x20, 0xAE, 0x20, 0xAF, 0x20, 0xB0, 0x20, 0xB1, 0x20, 0xB2, 0x20, 0xB3, 0x20, + 0xB4, 0x20, 0xB5, 0x20, 0xB6, 0x20, 0xB7, 0x20, 0xB8, 0x20, 0xB9, 0x20, 0xBA, 0x20, 0xBB, 0x20, + 0xBC, 0x20, 0xBD, 0x20, 0xBE, 0x20, 0xBF, 0x20, 0xC0, 0x20, 0xC1, 0x20, 0xC2, 0x20, 0xC3, 0x20, + 0xC4, 0x20, 0xC5, 0x20, 0xC6, 0x20, 0xC7, 0x20, 0xC8, 0x20, 0xC9, 0x20, 0xCA, 0x20, 0xCB, 0x20, + 0xCC, 0x20, 0xCD, 0x20, 0xCE, 0x20, 0xCF, 0x20, 0xD0, 0x20, 0xD1, 0x20, 0xD2, 0x20, 0xD3, 0x20, + 0xD4, 0x20, 0xD5, 0x20, 0xD6, 0x20, 0xD7, 0x20, 0xD8, 0x20, 0xD9, 0x20, 0xDA, 0x20, 0xDB, 0x20, + 0xDC, 0x20, 0xDD, 0x20, 0xDE, 0x20, 0xDF, 0x20, 0xE0, 0x20, 0xE1, 0x20, 0xE2, 0x20, 0xE3, 0x20, + 0xE4, 0x20, 0xE5, 0x20, 0xE6, 0x20, 0xE7, 0x20, 0xE8, 0x20, 0xE9, 0x20, 0xEA, 0x20, 0xEB, 0x20, + 0xEC, 0x20, 0xED, 0x20, 0xEE, 0x20, 0xEF, 0x20, 0xF0, 0x20, 0xF1, 0x20, 0xF2, 0x20, 0xF3, 0x20, + 0xF4, 0x20, 0xF5, 0x20, 0xF6, 0x20, 0xF7, 0x20, 0xF8, 0x20, 0xF9, 0x20, 0xFA, 0x20, 0xFB, 0x20, + 0xFC, 0x20, 0xFD, 0x20, 0xFE, 0x20, 0xFF, 0x20, 0x00, 0x21, 0x01, 0x21, 0x02, 0x21, 0x03, 0x21, + 0x04, 0x21, 0x05, 0x21, 0x06, 0x21, 0x07, 0x21, 0x08, 0x21, 0x09, 0x21, 0x0A, 0x21, 0x0B, 0x21, + 0x0C, 0x21, 0x0D, 0x21, 0x0E, 0x21, 0x0F, 0x21, 0x10, 0x21, 0x11, 0x21, 0x12, 0x21, 0x13, 0x21, + 0x14, 0x21, 0x15, 0x21, 0x16, 0x21, 0x17, 0x21, 0x18, 0x21, 0x19, 0x21, 0x1A, 0x21, 0x1B, 0x21, + 0x1C, 0x21, 0x1D, 0x21, 0x1E, 0x21, 0x1F, 0x21, 0x20, 0x21, 0x21, 0x21, 0x22, 0x21, 0x23, 0x21, + 0x24, 0x21, 0x25, 0x21, 0x26, 0x21, 0x27, 0x21, 0x28, 0x21, 0x29, 0x21, 0x2A, 0x21, 0x2B, 0x21, + 0x2C, 0x21, 0x2D, 0x21, 0x2E, 0x21, 0x2F, 0x21, 0x30, 0x21, 0x31, 0x21, 0x32, 0x21, 0x33, 0x21, + 0x34, 0x21, 0x35, 0x21, 0x36, 0x21, 0x37, 0x21, 0x38, 0x21, 0x39, 0x21, 0x3A, 0x21, 0x3B, 0x21, + 0x3C, 0x21, 0x3D, 0x21, 0x3E, 0x21, 0x3F, 0x21, 0x40, 0x21, 0x41, 0x21, 0x42, 0x21, 0x43, 0x21, + 0x44, 0x21, 0x45, 0x21, 0x46, 0x21, 0x47, 0x21, 0x48, 0x21, 0x49, 0x21, 0x4A, 0x21, 0x4B, 0x21, + 0x4C, 0x21, 0x4D, 0x21, 0x32, 0x21, 0x4F, 0x21, 0x50, 0x21, 0x51, 0x21, 0x52, 0x21, 0x53, 0x21, + 0x54, 0x21, 0x55, 0x21, 0x56, 0x21, 0x57, 0x21, 0x58, 0x21, 0x59, 0x21, 0x5A, 0x21, 0x5B, 0x21, + 0x5C, 0x21, 0x5D, 0x21, 0x5E, 0x21, 0x5F, 0x21, 0x60, 0x21, 0x61, 0x21, 0x62, 0x21, 0x63, 0x21, + 0x64, 0x21, 0x65, 0x21, 0x66, 0x21, 0x67, 0x21, 0x68, 0x21, 0x69, 0x21, 0x6A, 0x21, 0x6B, 0x21, + 0x6C, 0x21, 0x6D, 0x21, 0x6E, 0x21, 0x6F, 0x21, 0x60, 0x21, 0x61, 0x21, 0x62, 0x21, 0x63, 0x21, + 0x64, 0x21, 0x65, 0x21, 0x66, 0x21, 0x67, 0x21, 0x68, 0x21, 0x69, 0x21, 0x6A, 0x21, 0x6B, 0x21, + 0x6C, 0x21, 0x6D, 0x21, 0x6E, 0x21, 0x6F, 0x21, 0x80, 0x21, 0x81, 0x21, 0x82, 0x21, 0x83, 0x21, + 0x83, 0x21, 0xFF, 0xFF, 0x4B, 0x03, 0xB6, 0x24, 0xB7, 0x24, 0xB8, 0x24, 0xB9, 0x24, 0xBA, 0x24, + 0xBB, 0x24, 0xBC, 0x24, 0xBD, 0x24, 0xBE, 0x24, 0xBF, 0x24, 0xC0, 0x24, 0xC1, 0x24, 0xC2, 0x24, + 0xC3, 0x24, 0xC4, 0x24, 0xC5, 0x24, 0xC6, 0x24, 0xC7, 0x24, 0xC8, 0x24, 0xC9, 0x24, 0xCA, 0x24, + 0xCB, 0x24, 0xCC, 0x24, 0xCD, 0x24, 0xCE, 0x24, 0xCF, 0x24, 0xFF, 0xFF, 0x46, 0x07, 0x00, 0x2C, + 0x01, 0x2C, 0x02, 0x2C, 0x03, 0x2C, 0x04, 0x2C, 0x05, 0x2C, 0x06, 0x2C, 0x07, 0x2C, 0x08, 0x2C, + 0x09, 0x2C, 0x0A, 0x2C, 0x0B, 0x2C, 0x0C, 0x2C, 0x0D, 0x2C, 0x0E, 0x2C, 0x0F, 0x2C, 0x10, 0x2C, + 0x11, 0x2C, 0x12, 0x2C, 0x13, 0x2C, 0x14, 0x2C, 0x15, 0x2C, 0x16, 0x2C, 0x17, 0x2C, 0x18, 0x2C, + 0x19, 0x2C, 0x1A, 0x2C, 0x1B, 0x2C, 0x1C, 0x2C, 0x1D, 0x2C, 0x1E, 0x2C, 0x1F, 0x2C, 0x20, 0x2C, + 0x21, 0x2C, 0x22, 0x2C, 0x23, 0x2C, 0x24, 0x2C, 0x25, 0x2C, 0x26, 0x2C, 0x27, 0x2C, 0x28, 0x2C, + 0x29, 0x2C, 0x2A, 0x2C, 0x2B, 0x2C, 0x2C, 0x2C, 0x2D, 0x2C, 0x2E, 0x2C, 0x5F, 0x2C, 0x60, 0x2C, + 0x60, 0x2C, 0x62, 0x2C, 0x63, 0x2C, 0x64, 0x2C, 0x65, 0x2C, 0x66, 0x2C, 0x67, 0x2C, 0x67, 0x2C, + 0x69, 0x2C, 0x69, 0x2C, 0x6B, 0x2C, 0x6B, 0x2C, 0x6D, 0x2C, 0x6E, 0x2C, 0x6F, 0x2C, 0x70, 0x2C, + 0x71, 0x2C, 0x72, 0x2C, 0x73, 0x2C, 0x74, 0x2C, 0x75, 0x2C, 0x75, 0x2C, 0x77, 0x2C, 0x78, 0x2C, + 0x79, 0x2C, 0x7A, 0x2C, 0x7B, 0x2C, 0x7C, 0x2C, 0x7D, 0x2C, 0x7E, 0x2C, 0x7F, 0x2C, 0x80, 0x2C, + 0x80, 0x2C, 0x82, 0x2C, 0x82, 0x2C, 0x84, 0x2C, 0x84, 0x2C, 0x86, 0x2C, 0x86, 0x2C, 0x88, 0x2C, + 0x88, 0x2C, 0x8A, 0x2C, 0x8A, 0x2C, 0x8C, 0x2C, 0x8C, 0x2C, 0x8E, 0x2C, 0x8E, 0x2C, 0x90, 0x2C, + 0x90, 0x2C, 0x92, 0x2C, 0x92, 0x2C, 0x94, 0x2C, 0x94, 0x2C, 0x96, 0x2C, 0x96, 0x2C, 0x98, 0x2C, + 0x98, 0x2C, 0x9A, 0x2C, 0x9A, 0x2C, 0x9C, 0x2C, 0x9C, 0x2C, 0x9E, 0x2C, 0x9E, 0x2C, 0xA0, 0x2C, + 0xA0, 0x2C, 0xA2, 0x2C, 0xA2, 0x2C, 0xA4, 0x2C, 0xA4, 0x2C, 0xA6, 0x2C, 0xA6, 0x2C, 0xA8, 0x2C, + 0xA8, 0x2C, 0xAA, 0x2C, 0xAA, 0x2C, 0xAC, 0x2C, 0xAC, 0x2C, 0xAE, 0x2C, 0xAE, 0x2C, 0xB0, 0x2C, + 0xB0, 0x2C, 0xB2, 0x2C, 0xB2, 0x2C, 0xB4, 0x2C, 0xB4, 0x2C, 0xB6, 0x2C, 0xB6, 0x2C, 0xB8, 0x2C, + 0xB8, 0x2C, 0xBA, 0x2C, 0xBA, 0x2C, 0xBC, 0x2C, 0xBC, 0x2C, 0xBE, 0x2C, 0xBE, 0x2C, 0xC0, 0x2C, + 0xC0, 0x2C, 0xC2, 0x2C, 0xC2, 0x2C, 0xC4, 0x2C, 0xC4, 0x2C, 0xC6, 0x2C, 0xC6, 0x2C, 0xC8, 0x2C, + 0xC8, 0x2C, 0xCA, 0x2C, 0xCA, 0x2C, 0xCC, 0x2C, 0xCC, 0x2C, 0xCE, 0x2C, 0xCE, 0x2C, 0xD0, 0x2C, + 0xD0, 0x2C, 0xD2, 0x2C, 0xD2, 0x2C, 0xD4, 0x2C, 0xD4, 0x2C, 0xD6, 0x2C, 0xD6, 0x2C, 0xD8, 0x2C, + 0xD8, 0x2C, 0xDA, 0x2C, 0xDA, 0x2C, 0xDC, 0x2C, 0xDC, 0x2C, 0xDE, 0x2C, 0xDE, 0x2C, 0xE0, 0x2C, + 0xE0, 0x2C, 0xE2, 0x2C, 0xE2, 0x2C, 0xE4, 0x2C, 0xE5, 0x2C, 0xE6, 0x2C, 0xE7, 0x2C, 0xE8, 0x2C, + 0xE9, 0x2C, 0xEA, 0x2C, 0xEB, 0x2C, 0xEC, 0x2C, 0xED, 0x2C, 0xEE, 0x2C, 0xEF, 0x2C, 0xF0, 0x2C, + 0xF1, 0x2C, 0xF2, 0x2C, 0xF3, 0x2C, 0xF4, 0x2C, 0xF5, 0x2C, 0xF6, 0x2C, 0xF7, 0x2C, 0xF8, 0x2C, + 0xF9, 0x2C, 0xFA, 0x2C, 0xFB, 0x2C, 0xFC, 0x2C, 0xFD, 0x2C, 0xFE, 0x2C, 0xFF, 0x2C, 0xA0, 0x10, + 0xA1, 0x10, 0xA2, 0x10, 0xA3, 0x10, 0xA4, 0x10, 0xA5, 0x10, 0xA6, 0x10, 0xA7, 0x10, 0xA8, 0x10, + 0xA9, 0x10, 0xAA, 0x10, 0xAB, 0x10, 0xAC, 0x10, 0xAD, 0x10, 0xAE, 0x10, 0xAF, 0x10, 0xB0, 0x10, + 0xB1, 0x10, 0xB2, 0x10, 0xB3, 0x10, 0xB4, 0x10, 0xB5, 0x10, 0xB6, 0x10, 0xB7, 0x10, 0xB8, 0x10, + 0xB9, 0x10, 0xBA, 0x10, 0xBB, 0x10, 0xBC, 0x10, 0xBD, 0x10, 0xBE, 0x10, 0xBF, 0x10, 0xC0, 0x10, + 0xC1, 0x10, 0xC2, 0x10, 0xC3, 0x10, 0xC4, 0x10, 0xC5, 0x10, 0xFF, 0xFF, 0x1B, 0xD2, 0x21, 0xFF, + 0x22, 0xFF, 0x23, 0xFF, 0x24, 0xFF, 0x25, 0xFF, 0x26, 0xFF, 0x27, 0xFF, 0x28, 0xFF, 0x29, 0xFF, + 0x2A, 0xFF, 0x2B, 0xFF, 0x2C, 0xFF, 0x2D, 0xFF, 0x2E, 0xFF, 0x2F, 0xFF, 0x30, 0xFF, 0x31, 0xFF, + 0x32, 0xFF, 0x33, 0xFF, 0x34, 0xFF, 0x35, 0xFF, 0x36, 0xFF, 0x37, 0xFF, 0x38, 0xFF, 0x39, 0xFF, + 0x3A, 0xFF, 0x5B, 0xFF, 0x5C, 0xFF, 0x5D, 0xFF, 0x5E, 0xFF, 0x5F, 0xFF, 0x60, 0xFF, 0x61, 0xFF, + 0x62, 0xFF, 0x63, 0xFF, 0x64, 0xFF, 0x65, 0xFF, 0x66, 0xFF, 0x67, 0xFF, 0x68, 0xFF, 0x69, 0xFF, + 0x6A, 0xFF, 0x6B, 0xFF, 0x6C, 0xFF, 0x6D, 0xFF, 0x6E, 0xFF, 0x6F, 0xFF, 0x70, 0xFF, 0x71, 0xFF, + 0x72, 0xFF, 0x73, 0xFF, 0x74, 0xFF, 0x75, 0xFF, 0x76, 0xFF, 0x77, 0xFF, 0x78, 0xFF, 0x79, 0xFF, + 0x7A, 0xFF, 0x7B, 0xFF, 0x7C, 0xFF, 0x7D, 0xFF, 0x7E, 0xFF, 0x7F, 0xFF, 0x80, 0xFF, 0x81, 0xFF, + 0x82, 0xFF, 0x83, 0xFF, 0x84, 0xFF, 0x85, 0xFF, 0x86, 0xFF, 0x87, 0xFF, 0x88, 0xFF, 0x89, 0xFF, + 0x8A, 0xFF, 0x8B, 0xFF, 0x8C, 0xFF, 0x8D, 0xFF, 0x8E, 0xFF, 0x8F, 0xFF, 0x90, 0xFF, 0x91, 0xFF, + 0x92, 0xFF, 0x93, 0xFF, 0x94, 0xFF, 0x95, 0xFF, 0x96, 0xFF, 0x97, 0xFF, 0x98, 0xFF, 0x99, 0xFF, + 0x9A, 0xFF, 0x9B, 0xFF, 0x9C, 0xFF, 0x9D, 0xFF, 0x9E, 0xFF, 0x9F, 0xFF, 0xA0, 0xFF, 0xA1, 0xFF, + 0xA2, 0xFF, 0xA3, 0xFF, 0xA4, 0xFF, 0xA5, 0xFF, 0xA6, 0xFF, 0xA7, 0xFF, 0xA8, 0xFF, 0xA9, 0xFF, + 0xAA, 0xFF, 0xAB, 0xFF, 0xAC, 0xFF, 0xAD, 0xFF, 0xAE, 0xFF, 0xAF, 0xFF, 0xB0, 0xFF, 0xB1, 0xFF, + 0xB2, 0xFF, 0xB3, 0xFF, 0xB4, 0xFF, 0xB5, 0xFF, 0xB6, 0xFF, 0xB7, 0xFF, 0xB8, 0xFF, 0xB9, 0xFF, + 0xBA, 0xFF, 0xBB, 0xFF, 0xBC, 0xFF, 0xBD, 0xFF, 0xBE, 0xFF, 0xBF, 0xFF, 0xC0, 0xFF, 0xC1, 0xFF, + 0xC2, 0xFF, 0xC3, 0xFF, 0xC4, 0xFF, 0xC5, 0xFF, 0xC6, 0xFF, 0xC7, 0xFF, 0xC8, 0xFF, 0xC9, 0xFF, + 0xCA, 0xFF, 0xCB, 0xFF, 0xCC, 0xFF, 0xCD, 0xFF, 0xCE, 0xFF, 0xCF, 0xFF, 0xD0, 0xFF, 0xD1, 0xFF, + 0xD2, 0xFF, 0xD3, 0xFF, 0xD4, 0xFF, 0xD5, 0xFF, 0xD6, 0xFF, 0xD7, 0xFF, 0xD8, 0xFF, 0xD9, 0xFF, + 0xDA, 0xFF, 0xDB, 0xFF, 0xDC, 0xFF, 0xDD, 0xFF, 0xDE, 0xFF, 0xDF, 0xFF, 0xE0, 0xFF, 0xE1, 0xFF, + 0xE2, 0xFF, 0xE3, 0xFF, 0xE4, 0xFF, 0xE5, 0xFF, 0xE6, 0xFF, 0xE7, 0xFF, 0xE8, 0xFF, 0xE9, 0xFF, + 0xEA, 0xFF, 0xEB, 0xFF, 0xEC, 0xFF, 0xED, 0xFF, 0xEE, 0xFF, 0xEF, 0xFF, 0xF0, 0xFF, 0xF1, 0xFF, + 0xF2, 0xFF, 0xF3, 0xFF, 0xF4, 0xFF, 0xF5, 0xFF, 0xF6, 0xFF, 0xF7, 0xFF, 0xF8, 0xFF, 0xF9, 0xFF, + 0xFA, 0xFF, 0xFB, 0xFF, 0xFC, 0xFF, 0xFD, 0xFF, 0xFE, 0xFF, 0xFF, 0xFF +}; + +#endif /* _UPCASE_H */ diff --git a/fs/sdfat/version.h b/fs/sdfat/version.h new file mode 100644 index 000000000000..44e44e03d847 --- /dev/null +++ b/fs/sdfat/version.h @@ -0,0 +1,25 @@ +/* + * Copyright (C) 2012-2013 Samsung Electronics Co., Ltd. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version 2 + * of the License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, see <http://www.gnu.org/licenses/>. + */ + +/************************************************************************/ +/* */ +/* PROJECT : exFAT & FAT12/16/32 File System */ +/* FILE : version.h */ +/* PURPOSE : sdFAT File Manager */ +/* */ +/************************************************************************/ +#define SDFAT_VERSION "2.3.0-lineage" diff --git a/fs/sdfat/xattr.c b/fs/sdfat/xattr.c new file mode 100644 index 000000000000..40bb850711be --- /dev/null +++ b/fs/sdfat/xattr.c @@ -0,0 +1,132 @@ +/* + * Copyright (C) 2012-2013 Samsung Electronics Co., Ltd. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version 2 + * of the License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, see <http://www.gnu.org/licenses/>. + */ + +/************************************************************************/ +/* */ +/* PROJECT : exFAT & FAT12/16/32 File System */ +/* FILE : xattr.c */ +/* PURPOSE : sdFAT code for supporting xattr(Extended File Attributes) */ +/* */ +/*----------------------------------------------------------------------*/ +/* NOTES */ +/* */ +/* */ +/************************************************************************/ + +#include <linux/file.h> +#include <linux/fs.h> +#include <linux/xattr.h> +#include <linux/dcache.h> +#include "sdfat.h" + +#ifndef CONFIG_SDFAT_VIRTUAL_XATTR_SELINUX_LABEL +#define CONFIG_SDFAT_VIRTUAL_XATTR_SELINUX_LABEL ("undefined") +#endif + +static const char default_xattr[] = CONFIG_SDFAT_VIRTUAL_XATTR_SELINUX_LABEL; + +static int can_support(const char *name) +{ + if (!name || strcmp(name, "security.selinux")) + return -1; + return 0; +} + +ssize_t sdfat_listxattr(struct dentry *dentry, char *list, size_t size) +{ + return 0; +} + + +/************************************************************************* + * INNER FUNCTIONS WHICH HAS KERNEL VERSION DEPENDENCY + *************************************************************************/ +static int __sdfat_xattr_check_support(const char *name) +{ + if (can_support(name)) + return -EOPNOTSUPP; + + return 0; +} + +ssize_t __sdfat_getxattr(const char *name, void *value, size_t size) +{ + if (can_support(name)) + return -EOPNOTSUPP; + + if ((size > strlen(default_xattr)+1) && value) + strcpy(value, default_xattr); + + return strlen(default_xattr); +} + + +/************************************************************************* + * FUNCTIONS WHICH HAS KERNEL VERSION DEPENDENCY + *************************************************************************/ +#if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 9, 0) +static int sdfat_xattr_get(const struct xattr_handler *handler, + struct dentry *dentry, struct inode *inode, + const char *name, void *buffer, size_t size) +{ + return __sdfat_getxattr(name, buffer, size); +} + +static int sdfat_xattr_set(const struct xattr_handler *handler, + struct dentry *dentry, struct inode *inode, + const char *name, const void *value, size_t size, + int flags) +{ + return __sdfat_xattr_check_support(name); +} + +const struct xattr_handler sdfat_xattr_handler = { + .prefix = "", /* match anything */ + .get = sdfat_xattr_get, + .set = sdfat_xattr_set, +}; + +const struct xattr_handler *sdfat_xattr_handlers[] = { + &sdfat_xattr_handler, + NULL +}; + +void setup_sdfat_xattr_handler(struct super_block *sb) +{ + sb->s_xattr = sdfat_xattr_handlers; +} +#else /* LINUX_VERSION_CODE < KERNEL_VERSION(4, 9, 0) */ +int sdfat_setxattr(struct dentry *dentry, const char *name, const void *value, size_t size, int flags) +{ + return __sdfat_xattr_check_support(name); +} + +ssize_t sdfat_getxattr(struct dentry *dentry, const char *name, void *value, size_t size) +{ + return __sdfat_getxattr(name, value, size); +} + +int sdfat_removexattr(struct dentry *dentry, const char *name) +{ + return __sdfat_xattr_check_support(name); +} + +void setup_sdfat_xattr_handler(struct super_block *sb) +{ + /* DO NOTHING */ +} +#endif diff --git a/fs/select.c b/fs/select.c index 3d38808dbcb6..f7667f2c6d0d 100644 --- a/fs/select.c +++ b/fs/select.c @@ -239,7 +239,8 @@ int poll_schedule_timeout(struct poll_wqueues *pwq, int state, set_current_state(state); if (!pwq->triggered) - rc = schedule_hrtimeout_range(expires, slack, HRTIMER_MODE_ABS); + rc = freezable_schedule_hrtimeout_range(expires, slack, + HRTIMER_MODE_ABS); __set_current_state(TASK_RUNNING); /* diff --git a/fs/seq_file.c b/fs/seq_file.c index 95e730506ad2..0004df800c88 100644 --- a/fs/seq_file.c +++ b/fs/seq_file.c @@ -683,11 +683,11 @@ EXPORT_SYMBOL(seq_puts); /* * A helper routine for putting decimal numbers without rich format of printf(). * only 'unsigned long long' is supported. - * This routine will put one byte delimiter + number into seq_file. + * This routine will put strlen(delimiter) + number into seq_file. * This routine is very quick when you show lots of numbers. * In usual cases, it will be better to use seq_printf(). It's easier to read. */ -void seq_put_decimal_ull(struct seq_file *m, char delimiter, +void seq_put_decimal_ull(struct seq_file *m, const char *delimiter, unsigned long long num) { int len; @@ -695,8 +695,15 @@ void seq_put_decimal_ull(struct seq_file *m, char delimiter, if (m->count + 2 >= m->size) /* we'll write 2 bytes at least */ goto overflow; - if (delimiter) - m->buf[m->count++] = delimiter; + len = strlen(delimiter); + if (m->count + len >= m->size) + goto overflow; + + memcpy(m->buf + m->count, delimiter, len); + m->count += len; + + if (m->count + 1 >= m->size) + goto overflow; if (num < 10) { m->buf[m->count++] = num + '0'; @@ -706,6 +713,7 @@ void seq_put_decimal_ull(struct seq_file *m, char delimiter, len = num_to_str(m->buf + m->count, m->size - m->count, num); if (!len) goto overflow; + m->count += len; return; @@ -714,19 +722,42 @@ overflow: } EXPORT_SYMBOL(seq_put_decimal_ull); -void seq_put_decimal_ll(struct seq_file *m, char delimiter, long long num) +void seq_put_decimal_ll(struct seq_file *m, const char *delimiter, long long num) { + int len; + + if (m->count + 3 >= m->size) /* we'll write 2 bytes at least */ + goto overflow; + + len = strlen(delimiter); + if (m->count + len >= m->size) + goto overflow; + + memcpy(m->buf + m->count, delimiter, len); + m->count += len; + + if (m->count + 2 >= m->size) + goto overflow; + if (num < 0) { - if (m->count + 3 >= m->size) { - seq_set_overflow(m); - return; - } - if (delimiter) - m->buf[m->count++] = delimiter; + m->buf[m->count++] = '-'; num = -num; - delimiter = '-'; } - seq_put_decimal_ull(m, delimiter, num); + + if (num < 10) { + m->buf[m->count++] = num + '0'; + return; + } + + len = num_to_str(m->buf + m->count, m->size - m->count, num); + if (!len) + goto overflow; + + m->count += len; + return; + +overflow: + seq_set_overflow(m); } EXPORT_SYMBOL(seq_put_decimal_ll); diff --git a/fs/squashfs/Kconfig b/fs/squashfs/Kconfig index 6dd158a216f4..ffb093e72b6c 100644 --- a/fs/squashfs/Kconfig +++ b/fs/squashfs/Kconfig @@ -26,6 +26,34 @@ config SQUASHFS If unsure, say N. choice + prompt "File decompression options" + depends on SQUASHFS + help + Squashfs now supports two options for decompressing file + data. Traditionally Squashfs has decompressed into an + intermediate buffer and then memcopied it into the page cache. + Squashfs now supports the ability to decompress directly into + the page cache. + + If unsure, select "Decompress file data into an intermediate buffer" + +config SQUASHFS_FILE_CACHE + bool "Decompress file data into an intermediate buffer" + help + Decompress file data into an intermediate buffer and then + memcopy it into the page cache. + +config SQUASHFS_FILE_DIRECT + bool "Decompress files directly into the page cache" + help + Directly decompress file data into the page cache. + Doing so can significantly improve performance because + it eliminates a memcpy and it also removes the lock contention + on the single buffer. + +endchoice + +choice prompt "Decompressor parallelisation options" depends on SQUASHFS help diff --git a/fs/squashfs/Makefile b/fs/squashfs/Makefile index fe51f1507ed1..246a6f329d89 100644 --- a/fs/squashfs/Makefile +++ b/fs/squashfs/Makefile @@ -5,7 +5,8 @@ obj-$(CONFIG_SQUASHFS) += squashfs.o squashfs-y += block.o cache.o dir.o export.o file.o fragment.o id.o inode.o squashfs-y += namei.o super.o symlink.o decompressor.o -squashfs-y += file_direct.o page_actor.o +squashfs-$(CONFIG_SQUASHFS_FILE_CACHE) += file_cache.o +squashfs-$(CONFIG_SQUASHFS_FILE_DIRECT) += file_direct.o page_actor.o squashfs-$(CONFIG_SQUASHFS_DECOMP_SINGLE) += decompressor_single.o squashfs-$(CONFIG_SQUASHFS_DECOMP_MULTI) += decompressor_multi.o squashfs-$(CONFIG_SQUASHFS_DECOMP_MULTI_PERCPU) += decompressor_multi_percpu.o diff --git a/fs/squashfs/block.c b/fs/squashfs/block.c index b3b95e2ae2ff..82bc942fc437 100644 --- a/fs/squashfs/block.c +++ b/fs/squashfs/block.c @@ -28,12 +28,9 @@ #include <linux/fs.h> #include <linux/vfs.h> -#include <linux/bio.h> #include <linux/slab.h> #include <linux/string.h> -#include <linux/pagemap.h> #include <linux/buffer_head.h> -#include <linux/workqueue.h> #include "squashfs_fs.h" #include "squashfs_fs_sb.h" @@ -41,381 +38,45 @@ #include "decompressor.h" #include "page_actor.h" -static struct workqueue_struct *squashfs_read_wq; - -struct squashfs_read_request { - struct super_block *sb; - u64 index; - int length; - int compressed; - int offset; - u64 read_end; - struct squashfs_page_actor *output; - enum { - SQUASHFS_COPY, - SQUASHFS_DECOMPRESS, - SQUASHFS_METADATA, - } data_processing; - bool synchronous; - - /* - * If the read is synchronous, it is possible to retrieve information - * about the request by setting these pointers. - */ - int *res; - int *bytes_read; - int *bytes_uncompressed; - - int nr_buffers; - struct buffer_head **bh; - struct work_struct offload; -}; - -struct squashfs_bio_request { - struct buffer_head **bh; - int nr_buffers; -}; - -static int squashfs_bio_submit(struct squashfs_read_request *req); - -int squashfs_init_read_wq(void) -{ - squashfs_read_wq = create_workqueue("SquashFS read wq"); - return !!squashfs_read_wq; -} - -void squashfs_destroy_read_wq(void) -{ - flush_workqueue(squashfs_read_wq); - destroy_workqueue(squashfs_read_wq); -} - -static void free_read_request(struct squashfs_read_request *req, int error) -{ - if (!req->synchronous) - squashfs_page_actor_free(req->output, error); - if (req->res) - *(req->res) = error; - kfree(req->bh); - kfree(req); -} - -static void squashfs_process_blocks(struct squashfs_read_request *req) -{ - int error = 0; - int bytes, i, length; - struct squashfs_sb_info *msblk = req->sb->s_fs_info; - struct squashfs_page_actor *actor = req->output; - struct buffer_head **bh = req->bh; - int nr_buffers = req->nr_buffers; - - for (i = 0; i < nr_buffers; ++i) { - if (!bh[i]) - continue; - wait_on_buffer(bh[i]); - if (!buffer_uptodate(bh[i])) - error = -EIO; - } - if (error) - goto cleanup; - - if (req->data_processing == SQUASHFS_METADATA) { - /* Extract the length of the metadata block */ - if (req->offset != msblk->devblksize - 1) { - length = le16_to_cpup((__le16 *) - (bh[0]->b_data + req->offset)); - } else { - length = (unsigned char)bh[0]->b_data[req->offset]; - length |= (unsigned char)bh[1]->b_data[0] << 8; - } - req->compressed = SQUASHFS_COMPRESSED(length); - req->data_processing = req->compressed ? SQUASHFS_DECOMPRESS - : SQUASHFS_COPY; - length = SQUASHFS_COMPRESSED_SIZE(length); - if (req->index + length + 2 > req->read_end) { - for (i = 0; i < nr_buffers; ++i) - put_bh(bh[i]); - kfree(bh); - req->length = length; - req->index += 2; - squashfs_bio_submit(req); - return; - } - req->length = length; - req->offset = (req->offset + 2) % PAGE_SIZE; - if (req->offset < 2) { - put_bh(bh[0]); - ++bh; - --nr_buffers; - } - } - if (req->bytes_read) - *(req->bytes_read) = req->length; - - if (req->data_processing == SQUASHFS_COPY) { - squashfs_bh_to_actor(bh, nr_buffers, req->output, req->offset, - req->length, msblk->devblksize); - } else if (req->data_processing == SQUASHFS_DECOMPRESS) { - req->length = squashfs_decompress(msblk, bh, nr_buffers, - req->offset, req->length, actor); - if (req->length < 0) { - error = -EIO; - goto cleanup; - } - } - - /* Last page may have trailing bytes not filled */ - bytes = req->length % PAGE_SIZE; - if (bytes && actor->page[actor->pages - 1]) - zero_user_segment(actor->page[actor->pages - 1], bytes, - PAGE_SIZE); - -cleanup: - if (req->bytes_uncompressed) - *(req->bytes_uncompressed) = req->length; - if (error) { - for (i = 0; i < nr_buffers; ++i) - if (bh[i]) - put_bh(bh[i]); - } - free_read_request(req, error); -} - -static void read_wq_handler(struct work_struct *work) -{ - squashfs_process_blocks(container_of(work, - struct squashfs_read_request, offload)); -} - -static void squashfs_bio_end_io(struct bio *bio) -{ - int i; - int error = bio->bi_error; - struct squashfs_bio_request *bio_req = bio->bi_private; - - bio_put(bio); - - for (i = 0; i < bio_req->nr_buffers; ++i) { - if (!bio_req->bh[i]) - continue; - if (!error) - set_buffer_uptodate(bio_req->bh[i]); - else - clear_buffer_uptodate(bio_req->bh[i]); - unlock_buffer(bio_req->bh[i]); - } - kfree(bio_req); -} - -static int bh_is_optional(struct squashfs_read_request *req, int idx) -{ - int start_idx, end_idx; - struct squashfs_sb_info *msblk = req->sb->s_fs_info; - - start_idx = (idx * msblk->devblksize - req->offset) >> PAGE_SHIFT; - end_idx = ((idx + 1) * msblk->devblksize - req->offset + 1) >> PAGE_SHIFT; - if (start_idx >= req->output->pages) - return 1; - if (start_idx < 0) - start_idx = end_idx; - if (end_idx >= req->output->pages) - end_idx = start_idx; - return !req->output->page[start_idx] && !req->output->page[end_idx]; -} - -static int actor_getblks(struct squashfs_read_request *req, u64 block) -{ - int i; - - req->bh = kmalloc_array(req->nr_buffers, sizeof(*(req->bh)), GFP_NOIO); - if (!req->bh) - return -ENOMEM; - - for (i = 0; i < req->nr_buffers; ++i) { - /* - * When dealing with an uncompressed block, the actor may - * contains NULL pages. There's no need to read the buffers - * associated with these pages. - */ - if (!req->compressed && bh_is_optional(req, i)) { - req->bh[i] = NULL; - continue; - } - req->bh[i] = sb_getblk(req->sb, block + i); - if (!req->bh[i]) { - while (--i) { - if (req->bh[i]) - put_bh(req->bh[i]); - } - return -1; - } - } - return 0; -} - -static int squashfs_bio_submit(struct squashfs_read_request *req) +/* + * Read the metadata block length, this is stored in the first two + * bytes of the metadata block. + */ +static struct buffer_head *get_block_length(struct super_block *sb, + u64 *cur_index, int *offset, int *length) { - struct bio *bio = NULL; + struct squashfs_sb_info *msblk = sb->s_fs_info; struct buffer_head *bh; - struct squashfs_bio_request *bio_req = NULL; - int b = 0, prev_block = 0; - struct squashfs_sb_info *msblk = req->sb->s_fs_info; - - u64 read_start = round_down(req->index, msblk->devblksize); - u64 read_end = round_up(req->index + req->length, msblk->devblksize); - sector_t block = read_start >> msblk->devblksize_log2; - sector_t block_end = read_end >> msblk->devblksize_log2; - int offset = read_start - round_down(req->index, PAGE_SIZE); - int nr_buffers = block_end - block; - int blksz = msblk->devblksize; - int bio_max_pages = nr_buffers > BIO_MAX_PAGES ? BIO_MAX_PAGES - : nr_buffers; - /* Setup the request */ - req->read_end = read_end; - req->offset = req->index - read_start; - req->nr_buffers = nr_buffers; - if (actor_getblks(req, block) < 0) - goto getblk_failed; - - /* Create and submit the BIOs */ - for (b = 0; b < nr_buffers; ++b, offset += blksz) { - bh = req->bh[b]; - if (!bh || !trylock_buffer(bh)) - continue; - if (buffer_uptodate(bh)) { - unlock_buffer(bh); - continue; + bh = sb_bread(sb, *cur_index); + if (bh == NULL) + return NULL; + + if (msblk->devblksize - *offset == 1) { + *length = (unsigned char) bh->b_data[*offset]; + put_bh(bh); + bh = sb_bread(sb, ++(*cur_index)); + if (bh == NULL) + return NULL; + *length |= (unsigned char) bh->b_data[0] << 8; + *offset = 1; + } else { + *length = (unsigned char) bh->b_data[*offset] | + (unsigned char) bh->b_data[*offset + 1] << 8; + *offset += 2; + + if (*offset == msblk->devblksize) { + put_bh(bh); + bh = sb_bread(sb, ++(*cur_index)); + if (bh == NULL) + return NULL; + *offset = 0; } - offset %= PAGE_SIZE; - - /* Append the buffer to the current BIO if it is contiguous */ - if (bio && bio_req && prev_block + 1 == b) { - if (bio_add_page(bio, bh->b_page, blksz, offset)) { - bio_req->nr_buffers += 1; - prev_block = b; - continue; - } - } - - /* Otherwise, submit the current BIO and create a new one */ - if (bio) - submit_bio(READ, bio); - bio_req = kcalloc(1, sizeof(struct squashfs_bio_request), - GFP_NOIO); - if (!bio_req) - goto req_alloc_failed; - bio_req->bh = &req->bh[b]; - bio = bio_alloc(GFP_NOIO, bio_max_pages); - if (!bio) - goto bio_alloc_failed; - bio->bi_bdev = req->sb->s_bdev; - bio->bi_iter.bi_sector = (block + b) - << (msblk->devblksize_log2 - 9); - bio->bi_private = bio_req; - bio->bi_end_io = squashfs_bio_end_io; - - bio_add_page(bio, bh->b_page, blksz, offset); - bio_req->nr_buffers += 1; - prev_block = b; } - if (bio) - submit_bio(READ, bio); - if (req->synchronous) - squashfs_process_blocks(req); - else { - INIT_WORK(&req->offload, read_wq_handler); - schedule_work(&req->offload); - } - return 0; - -bio_alloc_failed: - kfree(bio_req); -req_alloc_failed: - unlock_buffer(bh); - while (--nr_buffers >= b) - if (req->bh[nr_buffers]) - put_bh(req->bh[nr_buffers]); - while (--b >= 0) - if (req->bh[b]) - wait_on_buffer(req->bh[b]); -getblk_failed: - free_read_request(req, -ENOMEM); - return -ENOMEM; + return bh; } -static int read_metadata_block(struct squashfs_read_request *req, - u64 *next_index) -{ - int ret, error, bytes_read = 0, bytes_uncompressed = 0; - struct squashfs_sb_info *msblk = req->sb->s_fs_info; - - if (req->index + 2 > msblk->bytes_used) { - free_read_request(req, -EINVAL); - return -EINVAL; - } - req->length = 2; - - /* Do not read beyond the end of the device */ - if (req->index + req->length > msblk->bytes_used) - req->length = msblk->bytes_used - req->index; - req->data_processing = SQUASHFS_METADATA; - - /* - * Reading metadata is always synchronous because we don't know the - * length in advance and the function is expected to update - * 'next_index' and return the length. - */ - req->synchronous = true; - req->res = &error; - req->bytes_read = &bytes_read; - req->bytes_uncompressed = &bytes_uncompressed; - - TRACE("Metadata block @ 0x%llx, %scompressed size %d, src size %d\n", - req->index, req->compressed ? "" : "un", bytes_read, - req->output->length); - - ret = squashfs_bio_submit(req); - if (ret) - return ret; - if (error) - return error; - if (next_index) - *next_index += 2 + bytes_read; - return bytes_uncompressed; -} - -static int read_data_block(struct squashfs_read_request *req, int length, - u64 *next_index, bool synchronous) -{ - int ret, error = 0, bytes_uncompressed = 0, bytes_read = 0; - - req->compressed = SQUASHFS_COMPRESSED_BLOCK(length); - req->length = length = SQUASHFS_COMPRESSED_SIZE_BLOCK(length); - req->data_processing = req->compressed ? SQUASHFS_DECOMPRESS - : SQUASHFS_COPY; - - req->synchronous = synchronous; - if (synchronous) { - req->res = &error; - req->bytes_read = &bytes_read; - req->bytes_uncompressed = &bytes_uncompressed; - } - - TRACE("Data block @ 0x%llx, %scompressed size %d, src size %d\n", - req->index, req->compressed ? "" : "un", req->length, - req->output->length); - - ret = squashfs_bio_submit(req); - if (ret) - return ret; - if (synchronous) - ret = error ? error : bytes_uncompressed; - if (next_index) - *next_index += length; - return ret; -} /* * Read and decompress a metadata block or datablock. Length is non-zero @@ -426,50 +87,130 @@ static int read_data_block(struct squashfs_read_request *req, int length, * generated a larger block - this does occasionally happen with compression * algorithms). */ -static int __squashfs_read_data(struct super_block *sb, u64 index, int length, - u64 *next_index, struct squashfs_page_actor *output, bool sync) +int squashfs_read_data(struct super_block *sb, u64 index, int length, + u64 *next_index, struct squashfs_page_actor *output) { - struct squashfs_read_request *req; + struct squashfs_sb_info *msblk = sb->s_fs_info; + struct buffer_head **bh; + int offset = index & ((1 << msblk->devblksize_log2) - 1); + u64 cur_index = index >> msblk->devblksize_log2; + int bytes, compressed, b = 0, k = 0, avail, i; - req = kcalloc(1, sizeof(struct squashfs_read_request), GFP_KERNEL); - if (!req) { - if (!sync) - squashfs_page_actor_free(output, -ENOMEM); + bh = kcalloc(((output->length + msblk->devblksize - 1) + >> msblk->devblksize_log2) + 1, sizeof(*bh), GFP_KERNEL); + if (bh == NULL) return -ENOMEM; - } - req->sb = sb; - req->index = index; - req->output = output; + if (length) { + /* + * Datablock. + */ + bytes = -offset; + compressed = SQUASHFS_COMPRESSED_BLOCK(length); + length = SQUASHFS_COMPRESSED_SIZE_BLOCK(length); + if (next_index) + *next_index = index + length; + + TRACE("Block @ 0x%llx, %scompressed size %d, src size %d\n", + index, compressed ? "" : "un", length, output->length); + + if (length < 0 || length > output->length || + (index + length) > msblk->bytes_used) + goto read_failure; + + for (b = 0; bytes < length; b++, cur_index++) { + bh[b] = sb_getblk(sb, cur_index); + if (bh[b] == NULL) + goto block_release; + bytes += msblk->devblksize; + } + ll_rw_block(READ, b, bh); + } else { + /* + * Metadata block. + */ + if ((index + 2) > msblk->bytes_used) + goto read_failure; + + bh[0] = get_block_length(sb, &cur_index, &offset, &length); + if (bh[0] == NULL) + goto read_failure; + b = 1; + + bytes = msblk->devblksize - offset; + compressed = SQUASHFS_COMPRESSED(length); + length = SQUASHFS_COMPRESSED_SIZE(length); + if (next_index) + *next_index = index + length + 2; - if (next_index) - *next_index = index; + TRACE("Block @ 0x%llx, %scompressed size %d\n", index, + compressed ? "" : "un", length); - if (length) - length = read_data_block(req, length, next_index, sync); - else - length = read_metadata_block(req, next_index); + if (length < 0 || length > output->length || + (index + length) > msblk->bytes_used) + goto block_release; - if (length < 0) { - ERROR("squashfs_read_data failed to read block 0x%llx\n", - (unsigned long long)index); - return -EIO; + for (; bytes < length; b++) { + bh[b] = sb_getblk(sb, ++cur_index); + if (bh[b] == NULL) + goto block_release; + bytes += msblk->devblksize; + } + ll_rw_block(READ, b - 1, bh + 1); } - return length; -} + for (i = 0; i < b; i++) { + wait_on_buffer(bh[i]); + if (!buffer_uptodate(bh[i])) + goto block_release; + } -int squashfs_read_data(struct super_block *sb, u64 index, int length, - u64 *next_index, struct squashfs_page_actor *output) -{ - return __squashfs_read_data(sb, index, length, next_index, output, - true); -} + if (compressed) { + if (!msblk->stream) + goto read_failure; + length = squashfs_decompress(msblk, bh, b, offset, length, + output); + if (length < 0) + goto read_failure; + } else { + /* + * Block is uncompressed. + */ + int in, pg_offset = 0; + void *data = squashfs_first_page(output); + + for (bytes = length; k < b; k++) { + in = min(bytes, msblk->devblksize - offset); + bytes -= in; + while (in) { + if (pg_offset == PAGE_CACHE_SIZE) { + data = squashfs_next_page(output); + pg_offset = 0; + } + avail = min_t(int, in, PAGE_CACHE_SIZE - + pg_offset); + memcpy(data + pg_offset, bh[k]->b_data + offset, + avail); + in -= avail; + pg_offset += avail; + offset += avail; + } + offset = 0; + put_bh(bh[k]); + } + squashfs_finish_page(output); + } -int squashfs_read_data_async(struct super_block *sb, u64 index, int length, - u64 *next_index, struct squashfs_page_actor *output) -{ + kfree(bh); + return length; + +block_release: + for (; k < b; k++) + put_bh(bh[k]); - return __squashfs_read_data(sb, index, length, next_index, output, - false); +read_failure: + ERROR("squashfs_read_data failed to read block 0x%llx\n", + (unsigned long long) index); + kfree(bh); + return -EIO; } diff --git a/fs/squashfs/cache.c b/fs/squashfs/cache.c index 7d78c3d28bbd..91ce49c05b7c 100644 --- a/fs/squashfs/cache.c +++ b/fs/squashfs/cache.c @@ -209,14 +209,17 @@ void squashfs_cache_put(struct squashfs_cache_entry *entry) */ void squashfs_cache_delete(struct squashfs_cache *cache) { - int i; + int i, j; if (cache == NULL) return; for (i = 0; i < cache->entries; i++) { - if (cache->entry[i].page) - free_page_array(cache->entry[i].page, cache->pages); + if (cache->entry[i].data) { + for (j = 0; j < cache->pages; j++) + kfree(cache->entry[i].data[j]); + kfree(cache->entry[i].data); + } kfree(cache->entry[i].actor); } @@ -233,7 +236,7 @@ void squashfs_cache_delete(struct squashfs_cache *cache) struct squashfs_cache *squashfs_cache_init(char *name, int entries, int block_size) { - int i; + int i, j; struct squashfs_cache *cache = kzalloc(sizeof(*cache), GFP_KERNEL); if (cache == NULL) { @@ -265,13 +268,22 @@ struct squashfs_cache *squashfs_cache_init(char *name, int entries, init_waitqueue_head(&cache->entry[i].wait_queue); entry->cache = cache; entry->block = SQUASHFS_INVALID_BLK; - entry->page = alloc_page_array(cache->pages, GFP_KERNEL); - if (!entry->page) { + entry->data = kcalloc(cache->pages, sizeof(void *), GFP_KERNEL); + if (entry->data == NULL) { ERROR("Failed to allocate %s cache entry\n", name); goto cleanup; } - entry->actor = squashfs_page_actor_init(entry->page, - cache->pages, 0, NULL); + + for (j = 0; j < cache->pages; j++) { + entry->data[j] = kmalloc(PAGE_CACHE_SIZE, GFP_KERNEL); + if (entry->data[j] == NULL) { + ERROR("Failed to allocate %s buffer\n", name); + goto cleanup; + } + } + + entry->actor = squashfs_page_actor_init(entry->data, + cache->pages, 0); if (entry->actor == NULL) { ERROR("Failed to allocate %s cache entry\n", name); goto cleanup; @@ -302,20 +314,18 @@ int squashfs_copy_data(void *buffer, struct squashfs_cache_entry *entry, return min(length, entry->length - offset); while (offset < entry->length) { - void *buff = kmap_atomic(entry->page[offset / PAGE_CACHE_SIZE]) - + (offset % PAGE_CACHE_SIZE); + void *buff = entry->data[offset / PAGE_CACHE_SIZE] + + (offset % PAGE_CACHE_SIZE); int bytes = min_t(int, entry->length - offset, PAGE_CACHE_SIZE - (offset % PAGE_CACHE_SIZE)); if (bytes >= remaining) { memcpy(buffer, buff, remaining); - kunmap_atomic(buff); remaining = 0; break; } memcpy(buffer, buff, bytes); - kunmap_atomic(buff); buffer += bytes; remaining -= bytes; offset += bytes; @@ -409,38 +419,43 @@ struct squashfs_cache_entry *squashfs_get_datablock(struct super_block *sb, void *squashfs_read_table(struct super_block *sb, u64 block, int length) { int pages = (length + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT; - struct page **page; - void *buff; - int res; + int i, res; + void *table, *buffer, **data; struct squashfs_page_actor *actor; - page = alloc_page_array(pages, GFP_KERNEL); - if (!page) + table = buffer = kmalloc(length, GFP_KERNEL); + if (table == NULL) return ERR_PTR(-ENOMEM); - actor = squashfs_page_actor_init(page, pages, length, NULL); - if (actor == NULL) { + data = kcalloc(pages, sizeof(void *), GFP_KERNEL); + if (data == NULL) { res = -ENOMEM; goto failed; } + actor = squashfs_page_actor_init(data, pages, length); + if (actor == NULL) { + res = -ENOMEM; + goto failed2; + } + + for (i = 0; i < pages; i++, buffer += PAGE_CACHE_SIZE) + data[i] = buffer; + res = squashfs_read_data(sb, block, length | SQUASHFS_COMPRESSED_BIT_BLOCK, NULL, actor); + kfree(data); + kfree(actor); + if (res < 0) - goto failed2; + goto failed; - buff = kmalloc(length, GFP_KERNEL); - if (!buff) - goto failed2; - squashfs_actor_to_buf(actor, buff, length); - squashfs_page_actor_free(actor, 0); - free_page_array(page, pages); - return buff; + return table; failed2: - squashfs_page_actor_free(actor, 0); + kfree(data); failed: - free_page_array(page, pages); + kfree(table); return ERR_PTR(res); } diff --git a/fs/squashfs/decompressor.c b/fs/squashfs/decompressor.c index 7de35bf297aa..e9034bf6e5ae 100644 --- a/fs/squashfs/decompressor.c +++ b/fs/squashfs/decompressor.c @@ -24,8 +24,7 @@ #include <linux/types.h> #include <linux/mutex.h> #include <linux/slab.h> -#include <linux/highmem.h> -#include <linux/fs.h> +#include <linux/buffer_head.h> #include "squashfs_fs.h" #include "squashfs_fs_sb.h" @@ -95,44 +94,40 @@ const struct squashfs_decompressor *squashfs_lookup_decompressor(int id) static void *get_comp_opts(struct super_block *sb, unsigned short flags) { struct squashfs_sb_info *msblk = sb->s_fs_info; - void *comp_opts, *buffer = NULL; - struct page *page; + void *buffer = NULL, *comp_opts; struct squashfs_page_actor *actor = NULL; int length = 0; - if (!SQUASHFS_COMP_OPTS(flags)) - return squashfs_comp_opts(msblk, buffer, length); - /* * Read decompressor specific options from file system if present */ - - page = alloc_page(GFP_KERNEL); - if (!page) - return ERR_PTR(-ENOMEM); - - actor = squashfs_page_actor_init(&page, 1, 0, NULL); - if (actor == NULL) { - comp_opts = ERR_PTR(-ENOMEM); - goto actor_error; - } - - length = squashfs_read_data(sb, - sizeof(struct squashfs_super_block), 0, NULL, actor); - - if (length < 0) { - comp_opts = ERR_PTR(length); - goto read_error; + if (SQUASHFS_COMP_OPTS(flags)) { + buffer = kmalloc(PAGE_CACHE_SIZE, GFP_KERNEL); + if (buffer == NULL) { + comp_opts = ERR_PTR(-ENOMEM); + goto out; + } + + actor = squashfs_page_actor_init(&buffer, 1, 0); + if (actor == NULL) { + comp_opts = ERR_PTR(-ENOMEM); + goto out; + } + + length = squashfs_read_data(sb, + sizeof(struct squashfs_super_block), 0, NULL, actor); + + if (length < 0) { + comp_opts = ERR_PTR(length); + goto out; + } } - buffer = kmap_atomic(page); comp_opts = squashfs_comp_opts(msblk, buffer, length); - kunmap_atomic(buffer); -read_error: - squashfs_page_actor_free(actor, 0); -actor_error: - __free_page(page); +out: + kfree(actor); + kfree(buffer); return comp_opts; } diff --git a/fs/squashfs/file.c b/fs/squashfs/file.c index 377be131d23b..979da17cbbf3 100644 --- a/fs/squashfs/file.c +++ b/fs/squashfs/file.c @@ -47,16 +47,12 @@ #include <linux/string.h> #include <linux/pagemap.h> #include <linux/mutex.h> -#include <linux/mm_inline.h> #include "squashfs_fs.h" #include "squashfs_fs_sb.h" #include "squashfs_fs_i.h" #include "squashfs.h" -// Backported from 4.5 -#define lru_to_page(head) (list_entry((head)->prev, struct page, lru)) - /* * Locate cache slot in range [offset, index] for specified inode. If * there's more than one return the slot closest to index. @@ -446,21 +442,6 @@ static int squashfs_readpage_fragment(struct page *page) return res; } -static int squashfs_readpages_fragment(struct page *page, - struct list_head *readahead_pages, struct address_space *mapping) -{ - if (!page) { - page = lru_to_page(readahead_pages); - list_del(&page->lru); - if (add_to_page_cache_lru(page, mapping, page->index, - mapping_gfp_constraint(mapping, GFP_KERNEL))) { - put_page(page); - return 0; - } - } - return squashfs_readpage_fragment(page); -} - static int squashfs_readpage_sparse(struct page *page, int index, int file_end) { struct inode *inode = page->mapping->host; @@ -473,105 +454,54 @@ static int squashfs_readpage_sparse(struct page *page, int index, int file_end) return 0; } -static int squashfs_readpages_sparse(struct page *page, - struct list_head *readahead_pages, int index, int file_end, - struct address_space *mapping) -{ - if (!page) { - page = lru_to_page(readahead_pages); - list_del(&page->lru); - if (add_to_page_cache_lru(page, mapping, page->index, - mapping_gfp_constraint(mapping, GFP_KERNEL))) { - put_page(page); - return 0; - } - } - return squashfs_readpage_sparse(page, index, file_end); -} - -static int __squashfs_readpages(struct file *file, struct page *page, - struct list_head *readahead_pages, unsigned int nr_pages, - struct address_space *mapping) +static int squashfs_readpage(struct file *file, struct page *page) { - struct inode *inode = mapping->host; + struct inode *inode = page->mapping->host; struct squashfs_sb_info *msblk = inode->i_sb->s_fs_info; + int index = page->index >> (msblk->block_log - PAGE_CACHE_SHIFT); int file_end = i_size_read(inode) >> msblk->block_log; int res; - - do { - struct page *cur_page = page ? page - : lru_to_page(readahead_pages); - int page_index = cur_page->index; - int index = page_index >> (msblk->block_log - PAGE_CACHE_SHIFT); - - if (page_index >= ((i_size_read(inode) + PAGE_CACHE_SIZE - 1) >> - PAGE_CACHE_SHIFT)) - return 1; - - if (index < file_end || squashfs_i(inode)->fragment_block == - SQUASHFS_INVALID_BLK) { - u64 block = 0; - int bsize = read_blocklist(inode, index, &block); - - if (bsize < 0) - return -1; - - if (bsize == 0) { - res = squashfs_readpages_sparse(page, - readahead_pages, index, file_end, - mapping); - } else { - res = squashfs_readpages_block(page, - readahead_pages, &nr_pages, mapping, - page_index, block, bsize); - } - } else { - res = squashfs_readpages_fragment(page, - readahead_pages, mapping); - } - if (res) - return 0; - page = NULL; - } while (readahead_pages && !list_empty(readahead_pages)); - - return 0; -} - -static int squashfs_readpage(struct file *file, struct page *page) -{ - int ret; + void *pageaddr; TRACE("Entered squashfs_readpage, page index %lx, start block %llx\n", - page->index, squashfs_i(page->mapping->host)->start); + page->index, squashfs_i(inode)->start); - get_page(page); + if (page->index >= ((i_size_read(inode) + PAGE_CACHE_SIZE - 1) >> + PAGE_CACHE_SHIFT)) + goto out; - ret = __squashfs_readpages(file, page, NULL, 1, page->mapping); - if (ret) { - flush_dcache_page(page); - if (ret < 0) - SetPageError(page); - else - SetPageUptodate(page); - zero_user_segment(page, 0, PAGE_CACHE_SIZE); - unlock_page(page); - put_page(page); - } + if (index < file_end || squashfs_i(inode)->fragment_block == + SQUASHFS_INVALID_BLK) { + u64 block = 0; + int bsize = read_blocklist(inode, index, &block); + if (bsize < 0) + goto error_out; - return 0; -} + if (bsize == 0) + res = squashfs_readpage_sparse(page, index, file_end); + else + res = squashfs_readpage_block(page, block, bsize); + } else + res = squashfs_readpage_fragment(page); + + if (!res) + return 0; + +error_out: + SetPageError(page); +out: + pageaddr = kmap_atomic(page); + memset(pageaddr, 0, PAGE_CACHE_SIZE); + kunmap_atomic(pageaddr); + flush_dcache_page(page); + if (!PageError(page)) + SetPageUptodate(page); + unlock_page(page); -static int squashfs_readpages(struct file *file, struct address_space *mapping, - struct list_head *pages, unsigned int nr_pages) -{ - TRACE("Entered squashfs_readpages, %u pages, first page index %lx\n", - nr_pages, lru_to_page(pages)->index); - __squashfs_readpages(file, NULL, pages, nr_pages, mapping); return 0; } const struct address_space_operations squashfs_aops = { - .readpage = squashfs_readpage, - .readpages = squashfs_readpages, + .readpage = squashfs_readpage }; diff --git a/fs/squashfs/file_cache.c b/fs/squashfs/file_cache.c new file mode 100644 index 000000000000..f2310d2a2019 --- /dev/null +++ b/fs/squashfs/file_cache.c @@ -0,0 +1,38 @@ +/* + * Copyright (c) 2013 + * Phillip Lougher <phillip@squashfs.org.uk> + * + * This work is licensed under the terms of the GNU GPL, version 2. See + * the COPYING file in the top-level directory. + */ + +#include <linux/fs.h> +#include <linux/vfs.h> +#include <linux/kernel.h> +#include <linux/slab.h> +#include <linux/string.h> +#include <linux/pagemap.h> +#include <linux/mutex.h> + +#include "squashfs_fs.h" +#include "squashfs_fs_sb.h" +#include "squashfs_fs_i.h" +#include "squashfs.h" + +/* Read separately compressed datablock and memcopy into page cache */ +int squashfs_readpage_block(struct page *page, u64 block, int bsize) +{ + struct inode *i = page->mapping->host; + struct squashfs_cache_entry *buffer = squashfs_get_datablock(i->i_sb, + block, bsize); + int res = buffer->error; + + if (res) + ERROR("Unable to read page, block %llx, size %x\n", block, + bsize); + else + squashfs_copy_cache(page, buffer, buffer->length, 0); + + squashfs_cache_put(buffer); + return res; +} diff --git a/fs/squashfs/file_direct.c b/fs/squashfs/file_direct.c index c97af4c6ccd0..43e7a7eddac0 100644 --- a/fs/squashfs/file_direct.c +++ b/fs/squashfs/file_direct.c @@ -13,7 +13,6 @@ #include <linux/string.h> #include <linux/pagemap.h> #include <linux/mutex.h> -#include <linux/mm_inline.h> #include "squashfs_fs.h" #include "squashfs_fs_sb.h" @@ -21,139 +20,157 @@ #include "squashfs.h" #include "page_actor.h" -// Backported from 4.5 -#define lru_to_page(head) (list_entry((head)->prev, struct page, lru)) +static int squashfs_read_cache(struct page *target_page, u64 block, int bsize, + int pages, struct page **page); -static void release_actor_pages(struct page **page, int pages, int error) -{ - int i; - - for (i = 0; i < pages; i++) { - if (!page[i]) - continue; - flush_dcache_page(page[i]); - if (!error) - SetPageUptodate(page[i]); - else { - SetPageError(page[i]); - zero_user_segment(page[i], 0, PAGE_CACHE_SIZE); - } - unlock_page(page[i]); - put_page(page[i]); - } - kfree(page); -} +/* Read separately compressed datablock directly into page cache */ +int squashfs_readpage_block(struct page *target_page, u64 block, int bsize) -/* - * Create a "page actor" which will kmap and kunmap the - * page cache pages appropriately within the decompressor - */ -static struct squashfs_page_actor *actor_from_page_cache( - unsigned int actor_pages, struct page *target_page, - struct list_head *rpages, unsigned int *nr_pages, int start_index, - struct address_space *mapping) { + struct inode *inode = target_page->mapping->host; + struct squashfs_sb_info *msblk = inode->i_sb->s_fs_info; + + int file_end = (i_size_read(inode) - 1) >> PAGE_CACHE_SHIFT; + int mask = (1 << (msblk->block_log - PAGE_CACHE_SHIFT)) - 1; + int start_index = target_page->index & ~mask; + int end_index = start_index | mask; + int i, n, pages, missing_pages, bytes, res = -ENOMEM; struct page **page; struct squashfs_page_actor *actor; - int i, n; - gfp_t gfp = mapping_gfp_constraint(mapping, GFP_KERNEL); - - page = kmalloc_array(actor_pages, sizeof(void *), GFP_KERNEL); - if (!page) - return NULL; - - for (i = 0, n = start_index; i < actor_pages; i++, n++) { - if (target_page == NULL && rpages && !list_empty(rpages)) { - struct page *cur_page = lru_to_page(rpages); - - if (cur_page->index < start_index + actor_pages) { - list_del(&cur_page->lru); - --(*nr_pages); - if (add_to_page_cache_lru(cur_page, mapping, - cur_page->index, gfp)) - put_page(cur_page); - else - target_page = cur_page; - } else - rpages = NULL; - } + void *pageaddr; + + if (end_index > file_end) + end_index = file_end; + + pages = end_index - start_index + 1; - if (target_page && target_page->index == n) { - page[i] = target_page; - target_page = NULL; - } else { - page[i] = grab_cache_page_nowait(mapping, n); - if (page[i] == NULL) - continue; + page = kmalloc_array(pages, sizeof(void *), GFP_KERNEL); + if (page == NULL) + return res; + + /* + * Create a "page actor" which will kmap and kunmap the + * page cache pages appropriately within the decompressor + */ + actor = squashfs_page_actor_init_special(page, pages, 0); + if (actor == NULL) + goto out; + + /* Try to grab all the pages covered by the Squashfs block */ + for (missing_pages = 0, i = 0, n = start_index; i < pages; i++, n++) { + page[i] = (n == target_page->index) ? target_page : + grab_cache_page_nowait(target_page->mapping, n); + + if (page[i] == NULL) { + missing_pages++; + continue; } if (PageUptodate(page[i])) { unlock_page(page[i]); - put_page(page[i]); + page_cache_release(page[i]); page[i] = NULL; + missing_pages++; } } - actor = squashfs_page_actor_init(page, actor_pages, 0, - release_actor_pages); - if (!actor) { - release_actor_pages(page, actor_pages, -ENOMEM); - kfree(page); - return NULL; + if (missing_pages) { + /* + * Couldn't get one or more pages, this page has either + * been VM reclaimed, but others are still in the page cache + * and uptodate, or we're racing with another thread in + * squashfs_readpage also trying to grab them. Fall back to + * using an intermediate buffer. + */ + res = squashfs_read_cache(target_page, block, bsize, pages, + page); + if (res < 0) + goto mark_errored; + + goto out; } - return actor; -} -int squashfs_readpages_block(struct page *target_page, - struct list_head *readahead_pages, - unsigned int *nr_pages, - struct address_space *mapping, - int page_index, u64 block, int bsize) + /* Decompress directly into the page cache buffers */ + res = squashfs_read_data(inode->i_sb, block, bsize, NULL, actor); + if (res < 0) + goto mark_errored; + + /* Last page may have trailing bytes not filled */ + bytes = res % PAGE_CACHE_SIZE; + if (bytes) { + pageaddr = kmap_atomic(page[pages - 1]); + memset(pageaddr + bytes, 0, PAGE_CACHE_SIZE - bytes); + kunmap_atomic(pageaddr); + } -{ - struct squashfs_page_actor *actor; - struct inode *inode = mapping->host; - struct squashfs_sb_info *msblk = inode->i_sb->s_fs_info; - int start_index, end_index, file_end, actor_pages, res; - int mask = (1 << (msblk->block_log - PAGE_CACHE_SHIFT)) - 1; + /* Mark pages as uptodate, unlock and release */ + for (i = 0; i < pages; i++) { + flush_dcache_page(page[i]); + SetPageUptodate(page[i]); + unlock_page(page[i]); + if (page[i] != target_page) + page_cache_release(page[i]); + } - /* - * If readpage() is called on an uncompressed datablock, we can just - * read the pages instead of fetching the whole block. - * This greatly improves the performance when a process keep doing - * random reads because we only fetch the necessary data. - * The readahead algorithm will take care of doing speculative reads - * if necessary. - * We can't read more than 1 block even if readahead provides use more - * pages because we don't know yet if the next block is compressed or - * not. + kfree(actor); + kfree(page); + + return 0; + +mark_errored: + /* Decompression failed, mark pages as errored. Target_page is + * dealt with by the caller */ - if (bsize && !SQUASHFS_COMPRESSED_BLOCK(bsize)) { - u64 block_end = block + msblk->block_size; - - block += (page_index & mask) * PAGE_CACHE_SIZE; - actor_pages = (block_end - block) / PAGE_CACHE_SIZE; - if (*nr_pages < actor_pages) - actor_pages = *nr_pages; - start_index = page_index; - bsize = min_t(int, bsize, (PAGE_CACHE_SIZE * actor_pages) - | SQUASHFS_COMPRESSED_BIT_BLOCK); - } else { - file_end = (i_size_read(inode) - 1) >> PAGE_CACHE_SHIFT; - start_index = page_index & ~mask; - end_index = start_index | mask; - if (end_index > file_end) - end_index = file_end; - actor_pages = end_index - start_index + 1; + for (i = 0; i < pages; i++) { + if (page[i] == NULL || page[i] == target_page) + continue; + flush_dcache_page(page[i]); + SetPageError(page[i]); + unlock_page(page[i]); + page_cache_release(page[i]); } - actor = actor_from_page_cache(actor_pages, target_page, - readahead_pages, nr_pages, start_index, - mapping); - if (!actor) - return -ENOMEM; +out: + kfree(actor); + kfree(page); + return res; +} + + +static int squashfs_read_cache(struct page *target_page, u64 block, int bsize, + int pages, struct page **page) +{ + struct inode *i = target_page->mapping->host; + struct squashfs_cache_entry *buffer = squashfs_get_datablock(i->i_sb, + block, bsize); + int bytes = buffer->length, res = buffer->error, n, offset = 0; + void *pageaddr; + + if (res) { + ERROR("Unable to read page, block %llx, size %x\n", block, + bsize); + goto out; + } + + for (n = 0; n < pages && bytes > 0; n++, + bytes -= PAGE_CACHE_SIZE, offset += PAGE_CACHE_SIZE) { + int avail = min_t(int, bytes, PAGE_CACHE_SIZE); + + if (page[n] == NULL) + continue; + + pageaddr = kmap_atomic(page[n]); + squashfs_copy_data(pageaddr, buffer, offset, avail); + memset(pageaddr + avail, 0, PAGE_CACHE_SIZE - avail); + kunmap_atomic(pageaddr); + flush_dcache_page(page[n]); + SetPageUptodate(page[n]); + unlock_page(page[n]); + if (page[n] != target_page) + page_cache_release(page[n]); + } - res = squashfs_read_data_async(inode->i_sb, block, bsize, NULL, - actor); - return res < 0 ? res : 0; +out: + squashfs_cache_put(buffer); + return res; } diff --git a/fs/squashfs/lz4_wrapper.c b/fs/squashfs/lz4_wrapper.c index df4fa3c7ddd0..c31e2bc9c081 100644 --- a/fs/squashfs/lz4_wrapper.c +++ b/fs/squashfs/lz4_wrapper.c @@ -94,17 +94,39 @@ static int lz4_uncompress(struct squashfs_sb_info *msblk, void *strm, struct buffer_head **bh, int b, int offset, int length, struct squashfs_page_actor *output) { - int res; - size_t dest_len = output->length; struct squashfs_lz4 *stream = strm; + void *buff = stream->input, *data; + int avail, i, bytes = length, res; + size_t dest_len = output->length; + + for (i = 0; i < b; i++) { + avail = min(bytes, msblk->devblksize - offset); + memcpy(buff, bh[i]->b_data + offset, avail); + buff += avail; + bytes -= avail; + offset = 0; + put_bh(bh[i]); + } - squashfs_bh_to_buf(bh, b, stream->input, offset, length, - msblk->devblksize); res = lz4_decompress_unknownoutputsize(stream->input, length, stream->output, &dest_len); if (res) return -EIO; - squashfs_buf_to_actor(stream->output, output, dest_len); + + bytes = dest_len; + data = squashfs_first_page(output); + buff = stream->output; + while (data) { + if (bytes <= PAGE_CACHE_SIZE) { + memcpy(data, buff, bytes); + break; + } + memcpy(data, buff, PAGE_CACHE_SIZE); + buff += PAGE_CACHE_SIZE; + bytes -= PAGE_CACHE_SIZE; + data = squashfs_next_page(output); + } + squashfs_finish_page(output); return dest_len; } diff --git a/fs/squashfs/lzo_wrapper.c b/fs/squashfs/lzo_wrapper.c index 2c844d53a59e..244b9fbfff7b 100644 --- a/fs/squashfs/lzo_wrapper.c +++ b/fs/squashfs/lzo_wrapper.c @@ -79,19 +79,45 @@ static int lzo_uncompress(struct squashfs_sb_info *msblk, void *strm, struct buffer_head **bh, int b, int offset, int length, struct squashfs_page_actor *output) { - int res; - size_t out_len = output->length; struct squashfs_lzo *stream = strm; + void *buff = stream->input, *data; + int avail, i, bytes = length, res; + size_t out_len = output->length; + + for (i = 0; i < b; i++) { + avail = min(bytes, msblk->devblksize - offset); + memcpy(buff, bh[i]->b_data + offset, avail); + buff += avail; + bytes -= avail; + offset = 0; + put_bh(bh[i]); + } - squashfs_bh_to_buf(bh, b, stream->input, offset, length, - msblk->devblksize); res = lzo1x_decompress_safe(stream->input, (size_t)length, stream->output, &out_len); if (res != LZO_E_OK) - return -EIO; - squashfs_buf_to_actor(stream->output, output, out_len); + goto failed; - return out_len; + res = bytes = (int)out_len; + data = squashfs_first_page(output); + buff = stream->output; + while (data) { + if (bytes <= PAGE_CACHE_SIZE) { + memcpy(data, buff, bytes); + break; + } else { + memcpy(data, buff, PAGE_CACHE_SIZE); + buff += PAGE_CACHE_SIZE; + bytes -= PAGE_CACHE_SIZE; + data = squashfs_next_page(output); + } + } + squashfs_finish_page(output); + + return res; + +failed: + return -EIO; } const struct squashfs_decompressor squashfs_lzo_comp_ops = { diff --git a/fs/squashfs/page_actor.c b/fs/squashfs/page_actor.c index 53863508e400..5a1c11f56441 100644 --- a/fs/squashfs/page_actor.c +++ b/fs/squashfs/page_actor.c @@ -9,145 +9,92 @@ #include <linux/kernel.h> #include <linux/slab.h> #include <linux/pagemap.h> -#include <linux/buffer_head.h> #include "page_actor.h" -struct squashfs_page_actor *squashfs_page_actor_init(struct page **page, - int pages, int length, void (*release_pages)(struct page **, int, int)) -{ - struct squashfs_page_actor *actor = kmalloc(sizeof(*actor), GFP_KERNEL); - - if (actor == NULL) - return NULL; +/* + * This file contains implementations of page_actor for decompressing into + * an intermediate buffer, and for decompressing directly into the + * page cache. + * + * Calling code should avoid sleeping between calls to squashfs_first_page() + * and squashfs_finish_page(). + */ - actor->length = length ? : pages * PAGE_CACHE_SIZE; - actor->page = page; - actor->pages = pages; - actor->next_page = 0; - actor->pageaddr = NULL; - actor->release_pages = release_pages; - return actor; +/* Implementation of page_actor for decompressing into intermediate buffer */ +static void *cache_first_page(struct squashfs_page_actor *actor) +{ + actor->next_page = 1; + return actor->buffer[0]; } -void squashfs_page_actor_free(struct squashfs_page_actor *actor, int error) +static void *cache_next_page(struct squashfs_page_actor *actor) { - if (!actor) - return; + if (actor->next_page == actor->pages) + return NULL; - if (actor->release_pages) - actor->release_pages(actor->page, actor->pages, error); - kfree(actor); + return actor->buffer[actor->next_page++]; } -void squashfs_actor_to_buf(struct squashfs_page_actor *actor, void *buf, - int length) +static void cache_finish_page(struct squashfs_page_actor *actor) { - void *pageaddr; - int pos = 0, avail, i; - - for (i = 0; i < actor->pages && pos < length; ++i) { - avail = min_t(int, length - pos, PAGE_CACHE_SIZE); - if (actor->page[i]) { - pageaddr = kmap_atomic(actor->page[i]); - memcpy(buf + pos, pageaddr, avail); - kunmap_atomic(pageaddr); - } - pos += avail; - } + /* empty */ } -void squashfs_buf_to_actor(void *buf, struct squashfs_page_actor *actor, - int length) +struct squashfs_page_actor *squashfs_page_actor_init(void **buffer, + int pages, int length) { - void *pageaddr; - int pos = 0, avail, i; - - for (i = 0; i < actor->pages && pos < length; ++i) { - avail = min_t(int, length - pos, PAGE_CACHE_SIZE); - if (actor->page[i]) { - pageaddr = kmap_atomic(actor->page[i]); - memcpy(pageaddr, buf + pos, avail); - kunmap_atomic(pageaddr); - } - pos += avail; - } + struct squashfs_page_actor *actor = kmalloc(sizeof(*actor), GFP_KERNEL); + + if (actor == NULL) + return NULL; + + actor->length = length ? : pages * PAGE_CACHE_SIZE; + actor->buffer = buffer; + actor->pages = pages; + actor->next_page = 0; + actor->squashfs_first_page = cache_first_page; + actor->squashfs_next_page = cache_next_page; + actor->squashfs_finish_page = cache_finish_page; + return actor; } -void squashfs_bh_to_actor(struct buffer_head **bh, int nr_buffers, - struct squashfs_page_actor *actor, int offset, int length, int blksz) +/* Implementation of page_actor for decompressing directly into page cache. */ +static void *direct_first_page(struct squashfs_page_actor *actor) { - void *kaddr = NULL; - int bytes = 0, pgoff = 0, b = 0, p = 0, avail, i; - - while (bytes < length) { - if (actor->page[p]) { - kaddr = kmap_atomic(actor->page[p]); - while (pgoff < PAGE_CACHE_SIZE && bytes < length) { - avail = min_t(int, blksz - offset, - PAGE_CACHE_SIZE - pgoff); - memcpy(kaddr + pgoff, bh[b]->b_data + offset, - avail); - pgoff += avail; - bytes += avail; - offset = (offset + avail) % blksz; - if (!offset) { - put_bh(bh[b]); - ++b; - } - } - kunmap_atomic(kaddr); - pgoff = 0; - } else { - for (i = 0; i < PAGE_CACHE_SIZE / blksz; ++i) { - if (bh[b]) - put_bh(bh[b]); - ++b; - } - bytes += PAGE_CACHE_SIZE; - } - ++p; - } + actor->next_page = 1; + return actor->pageaddr = kmap_atomic(actor->page[0]); } -void squashfs_bh_to_buf(struct buffer_head **bh, int nr_buffers, void *buf, - int offset, int length, int blksz) +static void *direct_next_page(struct squashfs_page_actor *actor) { - int i, avail, bytes = 0; - - for (i = 0; i < nr_buffers && bytes < length; ++i) { - avail = min_t(int, length - bytes, blksz - offset); - if (bh[i]) { - memcpy(buf + bytes, bh[i]->b_data + offset, avail); - put_bh(bh[i]); - } - bytes += avail; - offset = 0; - } + if (actor->pageaddr) + kunmap_atomic(actor->pageaddr); + + return actor->pageaddr = actor->next_page == actor->pages ? NULL : + kmap_atomic(actor->page[actor->next_page++]); } -void free_page_array(struct page **page, int nr_pages) +static void direct_finish_page(struct squashfs_page_actor *actor) { - int i; - - for (i = 0; i < nr_pages; ++i) - __free_page(page[i]); - kfree(page); + if (actor->pageaddr) + kunmap_atomic(actor->pageaddr); } -struct page **alloc_page_array(int nr_pages, int gfp_mask) +struct squashfs_page_actor *squashfs_page_actor_init_special(struct page **page, + int pages, int length) { - int i; - struct page **page; + struct squashfs_page_actor *actor = kmalloc(sizeof(*actor), GFP_KERNEL); - page = kcalloc(nr_pages, sizeof(struct page *), gfp_mask); - if (!page) + if (actor == NULL) return NULL; - for (i = 0; i < nr_pages; ++i) { - page[i] = alloc_page(gfp_mask); - if (!page[i]) { - free_page_array(page, i); - return NULL; - } - } - return page; + + actor->length = length ? : pages * PAGE_CACHE_SIZE; + actor->page = page; + actor->pages = pages; + actor->next_page = 0; + actor->pageaddr = NULL; + actor->squashfs_first_page = direct_first_page; + actor->squashfs_next_page = direct_next_page; + actor->squashfs_finish_page = direct_finish_page; + return actor; } diff --git a/fs/squashfs/page_actor.h b/fs/squashfs/page_actor.h index aa1ed790b5a3..26dd82008b82 100644 --- a/fs/squashfs/page_actor.h +++ b/fs/squashfs/page_actor.h @@ -5,61 +5,77 @@ * Phillip Lougher <phillip@squashfs.org.uk> * * This work is licensed under the terms of the GNU GPL, version 2. See - * the COPYING file in the top-level squashfsory. + * the COPYING file in the top-level directory. */ +#ifndef CONFIG_SQUASHFS_FILE_DIRECT struct squashfs_page_actor { - struct page **page; - void *pageaddr; + void **page; int pages; int length; int next_page; - void (*release_pages)(struct page **, int, int); }; -extern struct squashfs_page_actor *squashfs_page_actor_init(struct page **, - int, int, void (*)(struct page **, int, int)); -extern void squashfs_page_actor_free(struct squashfs_page_actor *, int); +static inline struct squashfs_page_actor *squashfs_page_actor_init(void **page, + int pages, int length) +{ + struct squashfs_page_actor *actor = kmalloc(sizeof(*actor), GFP_KERNEL); -extern void squashfs_actor_to_buf(struct squashfs_page_actor *, void *, int); -extern void squashfs_buf_to_actor(void *, struct squashfs_page_actor *, int); -extern void squashfs_bh_to_actor(struct buffer_head **, int, - struct squashfs_page_actor *, int, int, int); -extern void squashfs_bh_to_buf(struct buffer_head **, int, void *, int, int, - int); + if (actor == NULL) + return NULL; + + actor->length = length ? : pages * PAGE_CACHE_SIZE; + actor->page = page; + actor->pages = pages; + actor->next_page = 0; + return actor; +} -/* - * Calling code should avoid sleeping between calls to squashfs_first_page() - * and squashfs_finish_page(). - */ static inline void *squashfs_first_page(struct squashfs_page_actor *actor) { actor->next_page = 1; - return actor->pageaddr = actor->page[0] ? kmap_atomic(actor->page[0]) - : NULL; + return actor->page[0]; } static inline void *squashfs_next_page(struct squashfs_page_actor *actor) { - if (!IS_ERR_OR_NULL(actor->pageaddr)) - kunmap_atomic(actor->pageaddr); - - if (actor->next_page == actor->pages) - return actor->pageaddr = ERR_PTR(-ENODATA); - - actor->pageaddr = actor->page[actor->next_page] ? - kmap_atomic(actor->page[actor->next_page]) : NULL; - ++actor->next_page; - return actor->pageaddr; + return actor->next_page == actor->pages ? NULL : + actor->page[actor->next_page++]; } static inline void squashfs_finish_page(struct squashfs_page_actor *actor) { - if (!IS_ERR_OR_NULL(actor->pageaddr)) - kunmap_atomic(actor->pageaddr); + /* empty */ } +#else +struct squashfs_page_actor { + union { + void **buffer; + struct page **page; + }; + void *pageaddr; + void *(*squashfs_first_page)(struct squashfs_page_actor *); + void *(*squashfs_next_page)(struct squashfs_page_actor *); + void (*squashfs_finish_page)(struct squashfs_page_actor *); + int pages; + int length; + int next_page; +}; -extern struct page **alloc_page_array(int, int); -extern void free_page_array(struct page **, int); - +extern struct squashfs_page_actor *squashfs_page_actor_init(void **, int, int); +extern struct squashfs_page_actor *squashfs_page_actor_init_special(struct page + **, int, int); +static inline void *squashfs_first_page(struct squashfs_page_actor *actor) +{ + return actor->squashfs_first_page(actor); +} +static inline void *squashfs_next_page(struct squashfs_page_actor *actor) +{ + return actor->squashfs_next_page(actor); +} +static inline void squashfs_finish_page(struct squashfs_page_actor *actor) +{ + actor->squashfs_finish_page(actor); +} +#endif #endif diff --git a/fs/squashfs/squashfs.h b/fs/squashfs/squashfs.h index 6093579c6c5d..887d6d270080 100644 --- a/fs/squashfs/squashfs.h +++ b/fs/squashfs/squashfs.h @@ -28,14 +28,8 @@ #define WARNING(s, args...) pr_warn("SQUASHFS: "s, ## args) /* block.c */ -extern int squashfs_init_read_wq(void); -extern void squashfs_destroy_read_wq(void); extern int squashfs_read_data(struct super_block *, u64, int, u64 *, struct squashfs_page_actor *); -extern int squashfs_read_data(struct super_block *, u64, int, u64 *, - struct squashfs_page_actor *); -extern int squashfs_read_data_async(struct super_block *, u64, int, u64 *, - struct squashfs_page_actor *); /* cache.c */ extern struct squashfs_cache *squashfs_cache_init(char *, int, int); @@ -76,9 +70,8 @@ extern __le64 *squashfs_read_fragment_index_table(struct super_block *, void squashfs_copy_cache(struct page *, struct squashfs_cache_entry *, int, int); -/* file_direct.c */ -extern int squashfs_readpages_block(struct page *, struct list_head *, - unsigned int *, struct address_space *, int, u64, int); +/* file_xxx.c */ +extern int squashfs_readpage_block(struct page *, u64, int); /* id.c */ extern int squashfs_get_id(struct super_block *, unsigned int, unsigned int *); diff --git a/fs/squashfs/squashfs_fs_sb.h b/fs/squashfs/squashfs_fs_sb.h index 9dc66d0003a6..5234c19a0eab 100644 --- a/fs/squashfs/squashfs_fs_sb.h +++ b/fs/squashfs/squashfs_fs_sb.h @@ -49,7 +49,7 @@ struct squashfs_cache_entry { int num_waiters; wait_queue_head_t wait_queue; struct squashfs_cache *cache; - struct page **page; + void **data; struct squashfs_page_actor *actor; }; diff --git a/fs/squashfs/super.c b/fs/squashfs/super.c index 9602ce9ddfc7..44500dcf1805 100644 --- a/fs/squashfs/super.c +++ b/fs/squashfs/super.c @@ -445,15 +445,9 @@ static int __init init_squashfs_fs(void) if (err) return err; - if (!squashfs_init_read_wq()) { - destroy_inodecache(); - return -ENOMEM; - } - err = register_filesystem(&squashfs_fs_type); if (err) { destroy_inodecache(); - squashfs_destroy_read_wq(); return err; } @@ -467,7 +461,6 @@ static void __exit exit_squashfs_fs(void) { unregister_filesystem(&squashfs_fs_type); destroy_inodecache(); - squashfs_destroy_read_wq(); } diff --git a/fs/squashfs/xz_wrapper.c b/fs/squashfs/xz_wrapper.c index 14cd373e1897..c609624e4b8a 100644 --- a/fs/squashfs/xz_wrapper.c +++ b/fs/squashfs/xz_wrapper.c @@ -55,7 +55,7 @@ static void *squashfs_xz_comp_opts(struct squashfs_sb_info *msblk, struct comp_opts *opts; int err = 0, n; - opts = kmalloc(sizeof(*opts), GFP_ATOMIC); + opts = kmalloc(sizeof(*opts), GFP_KERNEL); if (opts == NULL) { err = -ENOMEM; goto out2; @@ -136,7 +136,6 @@ static int squashfs_xz_uncompress(struct squashfs_sb_info *msblk, void *strm, enum xz_ret xz_err; int avail, total = 0, k = 0; struct squashfs_xz *stream = strm; - void *buf = NULL; xz_dec_reset(stream->state); stream->buf.in_pos = 0; @@ -157,20 +156,12 @@ static int squashfs_xz_uncompress(struct squashfs_sb_info *msblk, void *strm, if (stream->buf.out_pos == stream->buf.out_size) { stream->buf.out = squashfs_next_page(output); - if (!IS_ERR(stream->buf.out)) { + if (stream->buf.out != NULL) { stream->buf.out_pos = 0; total += PAGE_CACHE_SIZE; } } - if (!stream->buf.out) { - if (!buf) { - buf = kmalloc(PAGE_CACHE_SIZE, GFP_ATOMIC); - if (!buf) - goto out; - } - stream->buf.out = buf; - } xz_err = xz_dec_run(stream->state, &stream->buf); if (stream->buf.in_pos == stream->buf.in_size && k < b) @@ -182,13 +173,11 @@ static int squashfs_xz_uncompress(struct squashfs_sb_info *msblk, void *strm, if (xz_err != XZ_STREAM_END || k < b) goto out; - kfree(buf); return total + stream->buf.out_pos; out: for (; k < b; k++) put_bh(bh[k]); - kfree(buf); return -EIO; } diff --git a/fs/squashfs/zlib_wrapper.c b/fs/squashfs/zlib_wrapper.c index 09c892b5308e..8727caba6882 100644 --- a/fs/squashfs/zlib_wrapper.c +++ b/fs/squashfs/zlib_wrapper.c @@ -66,7 +66,6 @@ static int zlib_uncompress(struct squashfs_sb_info *msblk, void *strm, struct buffer_head **bh, int b, int offset, int length, struct squashfs_page_actor *output) { - void *buf = NULL; int zlib_err, zlib_init = 0, k = 0; z_stream *stream = strm; @@ -85,19 +84,10 @@ static int zlib_uncompress(struct squashfs_sb_info *msblk, void *strm, if (stream->avail_out == 0) { stream->next_out = squashfs_next_page(output); - if (!IS_ERR(stream->next_out)) + if (stream->next_out != NULL) stream->avail_out = PAGE_CACHE_SIZE; } - if (!stream->next_out) { - if (!buf) { - buf = kmalloc(PAGE_CACHE_SIZE, GFP_ATOMIC); - if (!buf) - goto out; - } - stream->next_out = buf; - } - if (!zlib_init) { zlib_err = zlib_inflateInit(stream); if (zlib_err != Z_OK) { @@ -125,13 +115,11 @@ static int zlib_uncompress(struct squashfs_sb_info *msblk, void *strm, if (k < b) goto out; - kfree(buf); return stream->total_out; out: for (; k < b; k++) put_bh(bh[k]); - kfree(buf); return -EIO; } diff --git a/fs/super.c b/fs/super.c index eb27955a229a..689ec96c43f8 100644 --- a/fs/super.c +++ b/fs/super.c @@ -982,7 +982,7 @@ static int set_bdev_super(struct super_block *s, void *data) * We set the bdi here to the queue backing, file systems can * overwrite this in ->fill_super() */ - s->s_bdi = &bdev_get_queue(s->s_bdev)->backing_dev_info; + s->s_bdi = bdev_get_queue(s->s_bdev)->backing_dev_info; return 0; } diff --git a/fs/timerfd.c b/fs/timerfd.c index ab8dd1538381..147b72349d3b 100644 --- a/fs/timerfd.c +++ b/fs/timerfd.c @@ -44,6 +44,8 @@ struct timerfd_ctx { bool might_cancel; }; +static atomic_t instance_count = ATOMIC_INIT(0); + static LIST_HEAD(cancel_list); static DEFINE_SPINLOCK(cancel_lock); @@ -387,6 +389,9 @@ SYSCALL_DEFINE2(timerfd_create, int, clockid, int, flags) { int ufd; struct timerfd_ctx *ctx; + char task_comm_buf[TASK_COMM_LEN]; + char file_name_buf[32]; + int instance; /* Check the TFD_* constants for consistency. */ BUILD_BUG_ON(TFD_CLOEXEC != O_CLOEXEC); @@ -423,7 +428,12 @@ SYSCALL_DEFINE2(timerfd_create, int, clockid, int, flags) ctx->moffs = ktime_mono_to_real((ktime_t){ .tv64 = 0 }); - ufd = anon_inode_getfd("[timerfd]", &timerfd_fops, ctx, + instance = atomic_inc_return(&instance_count); + get_task_comm(task_comm_buf, current); + snprintf(file_name_buf, sizeof(file_name_buf), "[timerfd%d_%.*s]", + instance, (int)sizeof(task_comm_buf), task_comm_buf); + + ufd = anon_inode_getfd(file_name_buf, &timerfd_fops, ctx, O_RDWR | (flags & TFD_SHARED_FCNTL_FLAGS)); if (ufd < 0) kfree(ctx); diff --git a/fs/ubifs/key.h b/fs/ubifs/key.h index 92a8491a8f8c..c0a95e393347 100644 --- a/fs/ubifs/key.h +++ b/fs/ubifs/key.h @@ -34,6 +34,12 @@ * node. We use "r5" hash borrowed from reiserfs. */ +/* + * Lot's of the key helpers require a struct ubifs_info *c as the first parameter. + * But we are not using it at all currently. That's designed for future extensions of + * different c->key_format. But right now, there is only one key type, UBIFS_SIMPLE_KEY_FMT. + */ + #ifndef __UBIFS_KEY_H__ #define __UBIFS_KEY_H__ diff --git a/fs/ubifs/xattr.c b/fs/ubifs/xattr.c index b5bf23b34241..de6f82d4eda2 100644 --- a/fs/ubifs/xattr.c +++ b/fs/ubifs/xattr.c @@ -268,7 +268,7 @@ static int check_namespace(const struct qstr *nm) if (!strncmp(nm->name, XATTR_TRUSTED_PREFIX, XATTR_TRUSTED_PREFIX_LEN)) { - if (nm->name[sizeof(XATTR_TRUSTED_PREFIX) - 1] == '\0') + if (nm->name[XATTR_TRUSTED_PREFIX_LEN] == '\0') return -EINVAL; type = TRUSTED_XATTR; } else if (!strncmp(nm->name, XATTR_USER_PREFIX, @@ -278,7 +278,7 @@ static int check_namespace(const struct qstr *nm) type = USER_XATTR; } else if (!strncmp(nm->name, XATTR_SECURITY_PREFIX, XATTR_SECURITY_PREFIX_LEN)) { - if (nm->name[sizeof(XATTR_SECURITY_PREFIX) - 1] == '\0') + if (nm->name[XATTR_SECURITY_PREFIX_LEN] == '\0') return -EINVAL; type = SECURITY_XATTR; } else diff --git a/fs/udf/namei.c b/fs/udf/namei.c index 074560ad190e..af6fd442b9d8 100644 --- a/fs/udf/namei.c +++ b/fs/udf/namei.c @@ -611,7 +611,8 @@ static int udf_add_nondir(struct dentry *dentry, struct inode *inode) if (fibh.sbh != fibh.ebh) brelse(fibh.ebh); brelse(fibh.sbh); - d_instantiate_new(dentry, inode); + unlock_new_inode(inode); + d_instantiate(dentry, inode); return 0; } @@ -721,7 +722,8 @@ static int udf_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode) inc_nlink(dir); dir->i_ctime = dir->i_mtime = current_fs_time(dir->i_sb); mark_inode_dirty(dir); - d_instantiate_new(dentry, inode); + unlock_new_inode(inode); + d_instantiate(dentry, inode); if (fibh.sbh != fibh.ebh) brelse(fibh.ebh); brelse(fibh.sbh); diff --git a/fs/ufs/namei.c b/fs/ufs/namei.c index 2ec7689c25cf..47966554317c 100644 --- a/fs/ufs/namei.c +++ b/fs/ufs/namei.c @@ -38,7 +38,8 @@ static inline int ufs_add_nondir(struct dentry *dentry, struct inode *inode) { int err = ufs_add_link(dentry, inode); if (!err) { - d_instantiate_new(dentry, inode); + unlock_new_inode(inode); + d_instantiate(dentry, inode); return 0; } inode_dec_link_count(inode); @@ -190,7 +191,8 @@ static int ufs_mkdir(struct inode * dir, struct dentry * dentry, umode_t mode) if (err) goto out_fail; - d_instantiate_new(dentry, inode); + unlock_new_inode(inode); + d_instantiate(dentry, inode); return 0; out_fail: diff --git a/fs/userfaultfd.c b/fs/userfaultfd.c index 9d20f75c1209..85877c5da0df 100644 --- a/fs/userfaultfd.c +++ b/fs/userfaultfd.c @@ -805,6 +805,18 @@ static int userfaultfd_register(struct userfaultfd_ctx *ctx, goto out_unlock; /* + * UFFDIO_COPY will fill file holes even without + * PROT_WRITE. This check enforces that if this is a + * MAP_SHARED, the process has write permission to the backing + * file. If VM_MAYWRITE is set it also enforces that on a + * MAP_SHARED vma: there is no F_WRITE_SEAL and no further + * F_WRITE_SEAL can be taken until the vma is destroyed. + */ + ret = -EPERM; + if (unlikely(!(cur->vm_flags & VM_MAYWRITE))) + goto out_unlock; + + /* * Check that this vma isn't already owned by a * different userfaultfd. We can't allow more than one * userfaultfd to own a single vma simultaneously or we @@ -829,6 +841,7 @@ static int userfaultfd_register(struct userfaultfd_ctx *ctx, BUG_ON(vma->vm_ops); BUG_ON(vma->vm_userfaultfd_ctx.ctx && vma->vm_userfaultfd_ctx.ctx != ctx); + WARN_ON(!(vma->vm_flags & VM_MAYWRITE)); /* * Nothing to do: this vma is already registered into this @@ -970,6 +983,7 @@ static int userfaultfd_unregister(struct userfaultfd_ctx *ctx, cond_resched(); BUG_ON(vma->vm_ops); + WARN_ON(!(vma->vm_flags & VM_MAYWRITE)); /* * Nothing to do: this vma is already registered into this diff --git a/fs/xfs/xfs_acl.c b/fs/xfs/xfs_acl.c index c5101a3295d8..62ba66e1c598 100644 --- a/fs/xfs/xfs_acl.c +++ b/fs/xfs/xfs_acl.c @@ -289,8 +289,10 @@ xfs_set_acl(struct inode *inode, struct posix_acl *acl, int type) if (type == ACL_TYPE_ACCESS) { umode_t mode; - + struct posix_acl *old_acl = acl; error = posix_acl_update_mode(inode, &mode, &acl); + if (!acl) + posix_acl_release(old_acl); if (error) return error; error = xfs_set_mode(inode, mode); |