diff options
Diffstat (limited to 'fs/f2fs/file.c')
-rw-r--r-- | fs/f2fs/file.c | 846 |
1 files changed, 542 insertions, 304 deletions
diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c index eadd80ca1f27..aa8a31be2eb2 100644 --- a/fs/f2fs/file.c +++ b/fs/f2fs/file.c @@ -1,12 +1,9 @@ +// SPDX-License-Identifier: GPL-2.0 /* * fs/f2fs/file.c * * Copyright (c) 2012 Samsung Electronics Co., Ltd. * http://www.samsung.com/ - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. */ #include <linux/fs.h> #include <linux/f2fs_fs.h> @@ -44,6 +41,8 @@ static int f2fs_filemap_fault(struct vm_area_struct *vma, err = filemap_fault(vma, vmf); up_read(&F2FS_I(inode)->i_mmap_sem); + trace_f2fs_filemap_fault(inode, vmf->pgoff, (unsigned long)err); + return err; } @@ -53,7 +52,7 @@ static int f2fs_vm_page_mkwrite(struct vm_area_struct *vma, struct page *page = vmf->page; struct inode *inode = file_inode(vma->vm_file); struct f2fs_sb_info *sbi = F2FS_I_SB(inode); - struct dnode_of_data dn; + struct dnode_of_data dn = { .node_changed = false }; int err; if (unlikely(f2fs_cp_error(sbi))) { @@ -65,19 +64,6 @@ static int f2fs_vm_page_mkwrite(struct vm_area_struct *vma, f2fs_bug_on(sbi, f2fs_has_inline_data(inode)); - /* block allocation */ - f2fs_lock_op(sbi); - set_new_dnode(&dn, inode, NULL, NULL, 0); - err = f2fs_reserve_block(&dn, page->index); - if (err) { - f2fs_unlock_op(sbi); - goto out; - } - f2fs_put_dnode(&dn); - f2fs_unlock_op(sbi); - - f2fs_balance_fs(sbi, dn.node_changed); - file_update_time(vma->vm_file); down_read(&F2FS_I(inode)->i_mmap_sem); lock_page(page); @@ -89,16 +75,34 @@ static int f2fs_vm_page_mkwrite(struct vm_area_struct *vma, goto out_sem; } + /* block allocation */ + __do_map_lock(sbi, F2FS_GET_BLOCK_PRE_AIO, true); + set_new_dnode(&dn, inode, NULL, NULL, 0); + err = f2fs_get_block(&dn, page->index); + f2fs_put_dnode(&dn); + __do_map_lock(sbi, F2FS_GET_BLOCK_PRE_AIO, false); + if (err) { + unlock_page(page); + goto out_sem; + } + + /* fill the page */ + f2fs_wait_on_page_writeback(page, DATA, false, true); + + /* wait for GCed page writeback via META_MAPPING */ + f2fs_wait_on_block_writeback(inode, dn.data_blkaddr); + /* * check to see if the page is mapped already (no holes) */ if (PageMappedToDisk(page)) - goto mapped; + goto out_sem; /* page is wholly or partially inside EOF */ if (((loff_t)(page->index + 1) << PAGE_SHIFT) > i_size_read(inode)) { - unsigned offset; + loff_t offset; + offset = i_size_read(inode) & ~PAGE_MASK; zero_user_segment(page, offset, PAGE_SIZE); } @@ -107,21 +111,15 @@ static int f2fs_vm_page_mkwrite(struct vm_area_struct *vma, SetPageUptodate(page); f2fs_update_iostat(sbi, APP_MAPPED_IO, F2FS_BLKSIZE); + f2fs_update_time(sbi, REQ_TIME); trace_f2fs_vm_page_mkwrite(page, DATA); -mapped: - /* fill the page */ - f2fs_wait_on_page_writeback(page, DATA, false); - - /* wait for GCed page writeback via META_MAPPING */ - if (f2fs_post_read_required(inode)) - f2fs_wait_on_block_writeback(sbi, dn.data_blkaddr); - out_sem: up_read(&F2FS_I(inode)->i_mmap_sem); -out: + + f2fs_balance_fs(sbi, dn.node_changed); + sb_end_pagefault(inode->i_sb); - f2fs_update_time(sbi, REQ_TIME); err: return block_page_mkwrite_return(err); } @@ -160,17 +158,18 @@ static inline enum cp_reason_type need_do_checkpoint(struct inode *inode) cp_reason = CP_SB_NEED_CP; else if (file_wrong_pino(inode)) cp_reason = CP_WRONG_PINO; - else if (!space_for_roll_forward(sbi)) + else if (!f2fs_space_for_roll_forward(sbi)) cp_reason = CP_NO_SPC_ROLL; - else if (!is_checkpointed_node(sbi, F2FS_I(inode)->i_pino)) + else if (!f2fs_is_checkpointed_node(sbi, F2FS_I(inode)->i_pino)) cp_reason = CP_NODE_NEED_CP; else if (test_opt(sbi, FASTBOOT)) cp_reason = CP_FASTBOOT_MODE; else if (F2FS_OPTION(sbi).active_logs == 2) cp_reason = CP_SPEC_LOG_NUM; else if (F2FS_OPTION(sbi).fsync_mode == FSYNC_MODE_STRICT && - need_dentry_mark(sbi, inode->i_ino) && - exist_written_data(sbi, F2FS_I(inode)->i_pino, TRANS_DIR_INO)) + f2fs_need_dentry_mark(sbi, inode->i_ino) && + f2fs_exist_written_data(sbi, F2FS_I(inode)->i_pino, + TRANS_DIR_INO)) cp_reason = CP_RECOVER_DIR; return cp_reason; @@ -181,7 +180,7 @@ static bool need_inode_page_update(struct f2fs_sb_info *sbi, nid_t ino) struct page *i = find_get_page(NODE_MAPPING(sbi), ino); bool ret = false; /* But we need to avoid that there are some inode updates */ - if ((i && PageDirty(i)) || need_inode_block_update(sbi, ino)) + if ((i && PageDirty(i)) || f2fs_need_inode_block_update(sbi, ino)) ret = true; f2fs_put_page(i, 0); return ret; @@ -214,8 +213,10 @@ static int f2fs_do_sync_file(struct file *file, loff_t start, loff_t end, .nr_to_write = LONG_MAX, .for_reclaim = 0, }; + unsigned int seq_id = 0; - if (unlikely(f2fs_readonly(inode->i_sb))) + if (unlikely(f2fs_readonly(inode->i_sb) || + is_sbi_flag_set(sbi, SBI_CP_DISABLED))) return 0; trace_f2fs_sync_file_enter(inode); @@ -244,14 +245,14 @@ static int f2fs_do_sync_file(struct file *file, loff_t start, loff_t end, * if there is no written data, don't waste time to write recovery info. */ if (!is_inode_flag_set(inode, FI_APPEND_WRITE) && - !exist_written_data(sbi, ino, APPEND_INO)) { + !f2fs_exist_written_data(sbi, ino, APPEND_INO)) { /* it may call write_inode just prior to fsync */ if (need_inode_page_update(sbi, ino)) goto go_write; if (is_inode_flag_set(inode, FI_UPDATE_WRITE) || - exist_written_data(sbi, ino, UPDATE_INO)) + f2fs_exist_written_data(sbi, ino, UPDATE_INO)) goto flush_out; goto out; } @@ -278,7 +279,9 @@ go_write: goto out; } sync_nodes: - ret = fsync_node_pages(sbi, inode, &wbc, atomic); + atomic_inc(&sbi->wb_sync_req[NODE]); + ret = f2fs_fsync_node_pages(sbi, inode, &wbc, atomic, &seq_id); + atomic_dec(&sbi->wb_sync_req[NODE]); if (ret) goto out; @@ -288,7 +291,7 @@ sync_nodes: goto out; } - if (need_inode_block_update(sbi, ino)) { + if (f2fs_need_inode_block_update(sbi, ino)) { f2fs_mark_inode_dirty_sync(inode, true); f2fs_write_inode(inode, NULL); goto sync_nodes; @@ -303,21 +306,21 @@ sync_nodes: * given fsync mark. */ if (!atomic) { - ret = wait_on_node_pages_writeback(sbi, ino); + ret = f2fs_wait_on_node_pages_writeback(sbi, seq_id); if (ret) goto out; } /* once recovery info is written, don't need to tack this */ - remove_ino_entry(sbi, ino, APPEND_INO); + f2fs_remove_ino_entry(sbi, ino, APPEND_INO); clear_inode_flag(inode, FI_APPEND_WRITE); flush_out: if (!atomic && F2FS_OPTION(sbi).fsync_mode != FSYNC_MODE_NOBARRIER) ret = f2fs_issue_flush(sbi, inode->i_ino); if (!ret) { - remove_ino_entry(sbi, ino, UPDATE_INO); + f2fs_remove_ino_entry(sbi, ino, UPDATE_INO); clear_inode_flag(inode, FI_UPDATE_WRITE); - remove_ino_entry(sbi, ino, FLUSH_INO); + f2fs_remove_ino_entry(sbi, ino, FLUSH_INO); } f2fs_update_time(sbi, REQ_TIME); out: @@ -336,28 +339,29 @@ int f2fs_sync_file(struct file *file, loff_t start, loff_t end, int datasync) static pgoff_t __get_first_dirty_index(struct address_space *mapping, pgoff_t pgofs, int whence) { - struct pagevec pvec; + struct page *page; int nr_pages; if (whence != SEEK_DATA) return 0; /* find first dirty page index */ - pagevec_init(&pvec, 0); - nr_pages = pagevec_lookup_tag(&pvec, mapping, &pgofs, - PAGECACHE_TAG_DIRTY, 1); - pgofs = nr_pages ? pvec.pages[0]->index : ULONG_MAX; - pagevec_release(&pvec); + nr_pages = find_get_pages_tag(mapping, &pgofs, PAGECACHE_TAG_DIRTY, + 1, &page); + if (!nr_pages) + return ULONG_MAX; + pgofs = page->index; + put_page(page); return pgofs; } -static bool __found_offset(block_t blkaddr, pgoff_t dirty, pgoff_t pgofs, - int whence) +static bool __found_offset(struct f2fs_sb_info *sbi, block_t blkaddr, + pgoff_t dirty, pgoff_t pgofs, int whence) { switch (whence) { case SEEK_DATA: if ((blkaddr == NEW_ADDR && dirty == pgofs) || - (blkaddr != NEW_ADDR && blkaddr != NULL_ADDR)) + __is_valid_data_blkaddr(blkaddr)) return true; break; case SEEK_HOLE: @@ -397,13 +401,13 @@ static loff_t f2fs_seek_block(struct file *file, loff_t offset, int whence) for (; data_ofs < isize; data_ofs = (loff_t)pgofs << PAGE_SHIFT) { set_new_dnode(&dn, inode, NULL, NULL, 0); - err = get_dnode_of_data(&dn, pgofs, LOOKUP_NODE); + err = f2fs_get_dnode_of_data(&dn, pgofs, LOOKUP_NODE); if (err && err != -ENOENT) { goto fail; } else if (err == -ENOENT) { /* direct node does not exists */ if (whence == SEEK_DATA) { - pgofs = get_next_page_offset(&dn, pgofs); + pgofs = f2fs_get_next_page_offset(&dn, pgofs); continue; } else { goto found; @@ -417,10 +421,19 @@ static loff_t f2fs_seek_block(struct file *file, loff_t offset, int whence) dn.ofs_in_node++, pgofs++, data_ofs = (loff_t)pgofs << PAGE_SHIFT) { block_t blkaddr; + blkaddr = datablock_addr(dn.inode, dn.node_page, dn.ofs_in_node); - if (__found_offset(blkaddr, dirty, pgofs, whence)) { + if (__is_valid_data_blkaddr(blkaddr) && + !f2fs_is_valid_blkaddr(F2FS_I_SB(inode), + blkaddr, DATA_GENERIC_ENHANCE)) { + f2fs_put_dnode(&dn); + goto fail; + } + + if (__found_offset(F2FS_I_SB(inode), blkaddr, dirty, + pgofs, whence)) { f2fs_put_dnode(&dn); goto found; } @@ -491,7 +504,7 @@ static int f2fs_file_open(struct inode *inode, struct file *filp) return dquot_file_open(inode, filp); } -void truncate_data_blocks_range(struct dnode_of_data *dn, int count) +void f2fs_truncate_data_blocks_range(struct dnode_of_data *dn, int count) { struct f2fs_sb_info *sbi = F2FS_I_SB(dn->inode); struct f2fs_node *raw_node; @@ -507,12 +520,19 @@ void truncate_data_blocks_range(struct dnode_of_data *dn, int count) for (; count > 0; count--, addr++, dn->ofs_in_node++) { block_t blkaddr = le32_to_cpu(*addr); + if (blkaddr == NULL_ADDR) continue; dn->data_blkaddr = NULL_ADDR; - set_data_blkaddr(dn); - invalidate_blocks(sbi, blkaddr); + f2fs_set_data_blkaddr(dn); + + if (__is_valid_data_blkaddr(blkaddr) && + !f2fs_is_valid_blkaddr(sbi, blkaddr, + DATA_GENERIC_ENHANCE)) + continue; + + f2fs_invalidate_blocks(sbi, blkaddr); if (dn->ofs_in_node == 0 && IS_INODE(dn->node_page)) clear_inode_flag(dn->inode, FI_FIRST_BLOCK_WRITTEN); nr_free++; @@ -524,7 +544,7 @@ void truncate_data_blocks_range(struct dnode_of_data *dn, int count) * once we invalidate valid blkaddr in range [ofs, ofs + count], * we will invalidate all blkaddr in the whole range. */ - fofs = start_bidx_of_node(ofs_of_node(dn->node_page), + fofs = f2fs_start_bidx_of_node(ofs_of_node(dn->node_page), dn->inode) + ofs; f2fs_update_extent_cache_range(dn, fofs, 0, len); dec_valid_block_count(sbi, dn->inode, nr_free); @@ -536,15 +556,15 @@ void truncate_data_blocks_range(struct dnode_of_data *dn, int count) dn->ofs_in_node, nr_free); } -void truncate_data_blocks(struct dnode_of_data *dn) +void f2fs_truncate_data_blocks(struct dnode_of_data *dn) { - truncate_data_blocks_range(dn, ADDRS_PER_BLOCK); + f2fs_truncate_data_blocks_range(dn, ADDRS_PER_BLOCK(dn->inode)); } static int truncate_partial_data_page(struct inode *inode, u64 from, bool cache_only) { - unsigned offset = from & (PAGE_SIZE - 1); + loff_t offset = from & (PAGE_SIZE - 1); pgoff_t index = from >> PAGE_SHIFT; struct address_space *mapping = inode->i_mapping; struct page *page; @@ -560,11 +580,11 @@ static int truncate_partial_data_page(struct inode *inode, u64 from, return 0; } - page = get_lock_data_page(inode, index, true); + page = f2fs_get_lock_data_page(inode, index, true); if (IS_ERR(page)) return PTR_ERR(page) == -ENOENT ? 0 : PTR_ERR(page); truncate_out: - f2fs_wait_on_page_writeback(page, DATA, true); + f2fs_wait_on_page_writeback(page, DATA, true, true); zero_user(page, offset, PAGE_SIZE - offset); /* An encrypted inode should have a key and truncate the last page. */ @@ -575,7 +595,7 @@ truncate_out: return 0; } -int truncate_blocks(struct inode *inode, u64 from, bool lock) +int f2fs_truncate_blocks(struct inode *inode, u64 from, bool lock) { struct f2fs_sb_info *sbi = F2FS_I_SB(inode); struct dnode_of_data dn; @@ -594,21 +614,21 @@ int truncate_blocks(struct inode *inode, u64 from, bool lock) if (lock) f2fs_lock_op(sbi); - ipage = get_node_page(sbi, inode->i_ino); + ipage = f2fs_get_node_page(sbi, inode->i_ino); if (IS_ERR(ipage)) { err = PTR_ERR(ipage); goto out; } if (f2fs_has_inline_data(inode)) { - truncate_inline_inode(inode, ipage, from); + f2fs_truncate_inline_inode(inode, ipage, from); f2fs_put_page(ipage, 1); truncate_page = true; goto out; } set_new_dnode(&dn, inode, ipage, NULL, 0); - err = get_dnode_of_data(&dn, free_from, LOOKUP_NODE_RA); + err = f2fs_get_dnode_of_data(&dn, free_from, LOOKUP_NODE_RA); if (err) { if (err == -ENOENT) goto free_next; @@ -621,13 +641,13 @@ int truncate_blocks(struct inode *inode, u64 from, bool lock) f2fs_bug_on(sbi, count < 0); if (dn.ofs_in_node || IS_INODE(dn.node_page)) { - truncate_data_blocks_range(&dn, count); + f2fs_truncate_data_blocks_range(&dn, count); free_from += count; } f2fs_put_dnode(&dn); free_next: - err = truncate_inode_blocks(inode, free_from); + err = f2fs_truncate_inode_blocks(inode, free_from); out: if (lock) f2fs_unlock_op(sbi); @@ -653,12 +673,11 @@ int f2fs_truncate(struct inode *inode) trace_f2fs_truncate(inode); -#ifdef CONFIG_F2FS_FAULT_INJECTION if (time_to_inject(F2FS_I_SB(inode), FAULT_TRUNCATE)) { f2fs_show_injection_info(FAULT_TRUNCATE); return -EIO; } -#endif + /* we should check inline_data size */ if (!f2fs_may_inline_data(inode)) { err = f2fs_convert_inline_inode(inode); @@ -666,7 +685,7 @@ int f2fs_truncate(struct inode *inode) return err; } - err = truncate_blocks(inode, i_size_read(inode), true); + err = f2fs_truncate_blocks(inode, i_size_read(inode), true); if (err) return err; @@ -685,27 +704,26 @@ int f2fs_getattr(struct vfsmount *mnt, unsigned int flags; if (f2fs_has_extra_attr(inode) && - f2fs_sb_has_inode_crtime(inode->i_sb) && + f2fs_sb_has_inode_crtime(F2FS_I_SB(inode)) && F2FS_FITS_IN_INODE(ri, fi->i_extra_isize, i_crtime)) { stat->result_mask |= STATX_BTIME; stat->btime.tv_sec = fi->i_crtime.tv_sec; stat->btime.tv_nsec = fi->i_crtime.tv_nsec; } - flags = fi->i_flags & (FS_FL_USER_VISIBLE | FS_PROJINHERIT_FL); - if (flags & FS_APPEND_FL) + flags = fi->i_flags; + if (flags & F2FS_APPEND_FL) stat->attributes |= STATX_ATTR_APPEND; - if (flags & FS_COMPR_FL) + if (flags & F2FS_COMPR_FL) stat->attributes |= STATX_ATTR_COMPRESSED; - if (f2fs_encrypted_inode(inode)) + if (IS_ENCRYPTED(inode)) stat->attributes |= STATX_ATTR_ENCRYPTED; - if (flags & FS_IMMUTABLE_FL) + if (flags & F2FS_IMMUTABLE_FL) stat->attributes |= STATX_ATTR_IMMUTABLE; - if (flags & FS_NODUMP_FL) + if (flags & F2FS_NODUMP_FL) stat->attributes |= STATX_ATTR_NODUMP; stat->attributes_mask |= (STATX_ATTR_APPEND | - STATX_ATTR_COMPRESSED | STATX_ATTR_ENCRYPTED | STATX_ATTR_IMMUTABLE | STATX_ATTR_NODUMP); @@ -755,7 +773,6 @@ int f2fs_setattr(struct dentry *dentry, struct iattr *attr) { struct inode *inode = d_inode(dentry); int err; - bool size_changed = false; if (unlikely(f2fs_cp_error(F2FS_I_SB(inode)))) return -EIO; @@ -777,28 +794,47 @@ int f2fs_setattr(struct dentry *dentry, struct iattr *attr) !uid_eq(attr->ia_uid, inode->i_uid)) || (attr->ia_valid & ATTR_GID && !gid_eq(attr->ia_gid, inode->i_gid))) { + f2fs_lock_op(F2FS_I_SB(inode)); err = dquot_transfer(inode, attr); - if (err) + if (err) { + set_sbi_flag(F2FS_I_SB(inode), + SBI_QUOTA_NEED_REPAIR); + f2fs_unlock_op(F2FS_I_SB(inode)); return err; + } + /* + * update uid/gid under lock_op(), so that dquot and inode can + * be updated atomically. + */ + if (attr->ia_valid & ATTR_UID) + inode->i_uid = attr->ia_uid; + if (attr->ia_valid & ATTR_GID) + inode->i_gid = attr->ia_gid; + f2fs_mark_inode_dirty_sync(inode, true); + f2fs_unlock_op(F2FS_I_SB(inode)); } if (attr->ia_valid & ATTR_SIZE) { - if (attr->ia_size <= i_size_read(inode)) { - down_write(&F2FS_I(inode)->i_mmap_sem); - truncate_setsize(inode, attr->ia_size); + bool to_smaller = (attr->ia_size <= i_size_read(inode)); + + down_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]); + down_write(&F2FS_I(inode)->i_mmap_sem); + + truncate_setsize(inode, attr->ia_size); + + if (to_smaller) err = f2fs_truncate(inode); - up_write(&F2FS_I(inode)->i_mmap_sem); - if (err) - return err; - } else { - /* - * do not trim all blocks after i_size if target size is - * larger than i_size. - */ - down_write(&F2FS_I(inode)->i_mmap_sem); - truncate_setsize(inode, attr->ia_size); - up_write(&F2FS_I(inode)->i_mmap_sem); + /* + * do not trim all blocks after i_size if target size is + * larger than i_size. + */ + up_write(&F2FS_I(inode)->i_mmap_sem); + up_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]); + if (err) + return err; + + if (!to_smaller) { /* should convert inline inode here */ if (!f2fs_may_inline_data(inode)) { err = f2fs_convert_inline_inode(inode); @@ -811,14 +847,12 @@ int f2fs_setattr(struct dentry *dentry, struct iattr *attr) down_write(&F2FS_I(inode)->i_sem); F2FS_I(inode)->last_disk_size = i_size_read(inode); up_write(&F2FS_I(inode)->i_sem); - - size_changed = true; } __setattr_copy(inode, attr); if (attr->ia_valid & ATTR_MODE) { - err = posix_acl_chmod(inode, get_inode_mode(inode)); + err = posix_acl_chmod(inode, f2fs_get_inode_mode(inode)); if (err || is_inode_flag_set(inode, FI_ACL_MODE)) { inode->i_mode = F2FS_I(inode)->i_acl_mode; clear_inode_flag(inode, FI_ACL_MODE); @@ -826,7 +860,7 @@ int f2fs_setattr(struct dentry *dentry, struct iattr *attr) } /* file size may changed here */ - f2fs_mark_inode_dirty_sync(inode, size_changed); + f2fs_mark_inode_dirty_sync(inode, true); /* inode change will produce dirty node pages flushed by checkpoint */ f2fs_balance_fs(F2FS_I_SB(inode), true); @@ -860,20 +894,20 @@ static int fill_zero(struct inode *inode, pgoff_t index, f2fs_balance_fs(sbi, true); f2fs_lock_op(sbi); - page = get_new_data_page(inode, NULL, index, false); + page = f2fs_get_new_data_page(inode, NULL, index, false); f2fs_unlock_op(sbi); if (IS_ERR(page)) return PTR_ERR(page); - f2fs_wait_on_page_writeback(page, DATA, true); + f2fs_wait_on_page_writeback(page, DATA, true, true); zero_user(page, start, len); set_page_dirty(page); f2fs_put_page(page, 1); return 0; } -int truncate_hole(struct inode *inode, pgoff_t pg_start, pgoff_t pg_end) +int f2fs_truncate_hole(struct inode *inode, pgoff_t pg_start, pgoff_t pg_end) { int err; @@ -882,10 +916,11 @@ int truncate_hole(struct inode *inode, pgoff_t pg_start, pgoff_t pg_end) pgoff_t end_offset, count; set_new_dnode(&dn, inode, NULL, NULL, 0); - err = get_dnode_of_data(&dn, pg_start, LOOKUP_NODE); + err = f2fs_get_dnode_of_data(&dn, pg_start, LOOKUP_NODE); if (err) { if (err == -ENOENT) { - pg_start = get_next_page_offset(&dn, pg_start); + pg_start = f2fs_get_next_page_offset(&dn, + pg_start); continue; } return err; @@ -896,7 +931,7 @@ int truncate_hole(struct inode *inode, pgoff_t pg_start, pgoff_t pg_end) f2fs_bug_on(F2FS_I_SB(inode), count == 0 || count > end_offset); - truncate_data_blocks_range(&dn, count); + f2fs_truncate_data_blocks_range(&dn, count); f2fs_put_dnode(&dn); pg_start += count; @@ -947,14 +982,19 @@ static int punch_hole(struct inode *inode, loff_t offset, loff_t len) blk_start = (loff_t)pg_start << PAGE_SHIFT; blk_end = (loff_t)pg_end << PAGE_SHIFT; + + down_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]); down_write(&F2FS_I(inode)->i_mmap_sem); + truncate_inode_pages_range(mapping, blk_start, blk_end - 1); f2fs_lock_op(sbi); - ret = truncate_hole(inode, pg_start, pg_end); + ret = f2fs_truncate_hole(inode, pg_start, pg_end); f2fs_unlock_op(sbi); + up_write(&F2FS_I(inode)->i_mmap_sem); + up_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]); } } @@ -970,13 +1010,14 @@ static int __read_out_blkaddrs(struct inode *inode, block_t *blkaddr, next_dnode: set_new_dnode(&dn, inode, NULL, NULL, 0); - ret = get_dnode_of_data(&dn, off, LOOKUP_NODE_RA); + ret = f2fs_get_dnode_of_data(&dn, off, LOOKUP_NODE_RA); if (ret && ret != -ENOENT) { return ret; } else if (ret == -ENOENT) { if (dn.max_level == 0) return -ENOENT; - done = min((pgoff_t)ADDRS_PER_BLOCK - dn.ofs_in_node, len); + done = min((pgoff_t)ADDRS_PER_BLOCK(inode) - dn.ofs_in_node, + len); blkaddr += done; do_replace += done; goto next; @@ -987,7 +1028,15 @@ next_dnode: for (i = 0; i < done; i++, blkaddr++, do_replace++, dn.ofs_in_node++) { *blkaddr = datablock_addr(dn.inode, dn.node_page, dn.ofs_in_node); - if (!is_checkpointed_data(sbi, *blkaddr)) { + + if (__is_valid_data_blkaddr(*blkaddr) && + !f2fs_is_valid_blkaddr(sbi, *blkaddr, + DATA_GENERIC_ENHANCE)) { + f2fs_put_dnode(&dn); + return -EFSCORRUPTED; + } + + if (!f2fs_is_checkpointed_data(sbi, *blkaddr)) { if (test_opt(sbi, LFS)) { f2fs_put_dnode(&dn); @@ -1020,10 +1069,10 @@ static int __roll_back_blkaddrs(struct inode *inode, block_t *blkaddr, continue; set_new_dnode(&dn, inode, NULL, NULL, 0); - ret = get_dnode_of_data(&dn, off + i, LOOKUP_NODE_RA); + ret = f2fs_get_dnode_of_data(&dn, off + i, LOOKUP_NODE_RA); if (ret) { dec_valid_block_count(sbi, inode, 1); - invalidate_blocks(sbi, *blkaddr); + f2fs_invalidate_blocks(sbi, *blkaddr); } else { f2fs_update_data_blkaddr(&dn, *blkaddr); } @@ -1053,18 +1102,23 @@ static int __clone_blkaddrs(struct inode *src_inode, struct inode *dst_inode, pgoff_t ilen; set_new_dnode(&dn, dst_inode, NULL, NULL, 0); - ret = get_dnode_of_data(&dn, dst + i, ALLOC_NODE); + ret = f2fs_get_dnode_of_data(&dn, dst + i, ALLOC_NODE); if (ret) return ret; - get_node_info(sbi, dn.nid, &ni); + ret = f2fs_get_node_info(sbi, dn.nid, &ni); + if (ret) { + f2fs_put_dnode(&dn); + return ret; + } + ilen = min((pgoff_t) ADDRS_PER_PAGE(dn.node_page, dst_inode) - dn.ofs_in_node, len - i); do { dn.data_blkaddr = datablock_addr(dn.inode, dn.node_page, dn.ofs_in_node); - truncate_data_blocks_range(&dn, 1); + f2fs_truncate_data_blocks_range(&dn, 1); if (do_replace[i]) { f2fs_i_blocks_write(src_inode, @@ -1087,10 +1141,11 @@ static int __clone_blkaddrs(struct inode *src_inode, struct inode *dst_inode, } else { struct page *psrc, *pdst; - psrc = get_lock_data_page(src_inode, src + i, true); + psrc = f2fs_get_lock_data_page(src_inode, + src + i, true); if (IS_ERR(psrc)) return PTR_ERR(psrc); - pdst = get_new_data_page(dst_inode, NULL, dst + i, + pdst = f2fs_get_new_data_page(dst_inode, NULL, dst + i, true); if (IS_ERR(pdst)) { f2fs_put_page(psrc, 1); @@ -1101,7 +1156,8 @@ static int __clone_blkaddrs(struct inode *src_inode, struct inode *dst_inode, f2fs_put_page(pdst, 1); f2fs_put_page(psrc, 1); - ret = truncate_hole(src_inode, src + i, src + i + 1); + ret = f2fs_truncate_hole(src_inode, + src + i, src + i + 1); if (ret) return ret; i++; @@ -1120,15 +1176,17 @@ static int __exchange_data_block(struct inode *src_inode, int ret; while (len) { - olen = min((pgoff_t)4 * ADDRS_PER_BLOCK, len); + olen = min((pgoff_t)4 * ADDRS_PER_BLOCK(src_inode), len); src_blkaddr = f2fs_kvzalloc(F2FS_I_SB(src_inode), - sizeof(block_t) * olen, GFP_KERNEL); + array_size(olen, sizeof(block_t)), + GFP_KERNEL); if (!src_blkaddr) return -ENOMEM; do_replace = f2fs_kvzalloc(F2FS_I_SB(src_inode), - sizeof(int) * olen, GFP_KERNEL); + array_size(olen, sizeof(int)), + GFP_KERNEL); if (!do_replace) { kvfree(src_blkaddr); return -ENOMEM; @@ -1154,31 +1212,39 @@ static int __exchange_data_block(struct inode *src_inode, return 0; roll_back: - __roll_back_blkaddrs(src_inode, src_blkaddr, do_replace, src, len); + __roll_back_blkaddrs(src_inode, src_blkaddr, do_replace, src, olen); kvfree(src_blkaddr); kvfree(do_replace); return ret; } -static int f2fs_do_collapse(struct inode *inode, pgoff_t start, pgoff_t end) +static int f2fs_do_collapse(struct inode *inode, loff_t offset, loff_t len) { struct f2fs_sb_info *sbi = F2FS_I_SB(inode); - pgoff_t nrpages = (i_size_read(inode) + PAGE_SIZE - 1) / PAGE_SIZE; + pgoff_t nrpages = DIV_ROUND_UP(i_size_read(inode), PAGE_SIZE); + pgoff_t start = offset >> PAGE_SHIFT; + pgoff_t end = (offset + len) >> PAGE_SHIFT; int ret; f2fs_balance_fs(sbi, true); - f2fs_lock_op(sbi); - f2fs_drop_extent_tree(inode); + /* avoid gc operation during block exchange */ + down_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]); + down_write(&F2FS_I(inode)->i_mmap_sem); + f2fs_lock_op(sbi); + f2fs_drop_extent_tree(inode); + truncate_pagecache(inode, offset); ret = __exchange_data_block(inode, inode, end, start, nrpages - end, true); f2fs_unlock_op(sbi); + + up_write(&F2FS_I(inode)->i_mmap_sem); + up_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]); return ret; } static int f2fs_collapse_range(struct inode *inode, loff_t offset, loff_t len) { - pgoff_t pg_start, pg_end; loff_t new_size; int ret; @@ -1193,37 +1259,27 @@ static int f2fs_collapse_range(struct inode *inode, loff_t offset, loff_t len) if (ret) return ret; - pg_start = offset >> PAGE_SHIFT; - pg_end = (offset + len) >> PAGE_SHIFT; - - /* avoid gc operation during block exchange */ - down_write(&F2FS_I(inode)->dio_rwsem[WRITE]); - - down_write(&F2FS_I(inode)->i_mmap_sem); /* write out all dirty pages from offset */ ret = filemap_write_and_wait_range(inode->i_mapping, offset, LLONG_MAX); if (ret) - goto out_unlock; - - truncate_pagecache(inode, offset); + return ret; - ret = f2fs_do_collapse(inode, pg_start, pg_end); + ret = f2fs_do_collapse(inode, offset, len); if (ret) - goto out_unlock; + return ret; /* write out all moved pages, if possible */ + down_write(&F2FS_I(inode)->i_mmap_sem); filemap_write_and_wait_range(inode->i_mapping, offset, LLONG_MAX); truncate_pagecache(inode, offset); new_size = i_size_read(inode) - len; truncate_pagecache(inode, new_size); - ret = truncate_blocks(inode, new_size, true); + ret = f2fs_truncate_blocks(inode, new_size, true); + up_write(&F2FS_I(inode)->i_mmap_sem); if (!ret) f2fs_i_size_write(inode, new_size); -out_unlock: - up_write(&F2FS_I(inode)->i_mmap_sem); - up_write(&F2FS_I(inode)->dio_rwsem[WRITE]); return ret; } @@ -1243,7 +1299,7 @@ static int f2fs_do_zero_range(struct dnode_of_data *dn, pgoff_t start, } dn->ofs_in_node = ofs_in_node; - ret = reserve_new_blocks(dn, count); + ret = f2fs_reserve_new_blocks(dn, count); if (ret) return ret; @@ -1252,7 +1308,7 @@ static int f2fs_do_zero_range(struct dnode_of_data *dn, pgoff_t start, dn->data_blkaddr = datablock_addr(dn->inode, dn->node_page, dn->ofs_in_node); /* - * reserve_new_blocks will not guarantee entire block + * f2fs_reserve_new_blocks will not guarantee entire block * allocation. */ if (dn->data_blkaddr == NULL_ADDR) { @@ -1260,9 +1316,9 @@ static int f2fs_do_zero_range(struct dnode_of_data *dn, pgoff_t start, break; } if (dn->data_blkaddr != NEW_ADDR) { - invalidate_blocks(sbi, dn->data_blkaddr); + f2fs_invalidate_blocks(sbi, dn->data_blkaddr); dn->data_blkaddr = NEW_ADDR; - set_data_blkaddr(dn); + f2fs_set_data_blkaddr(dn); } } @@ -1289,12 +1345,9 @@ static int f2fs_zero_range(struct inode *inode, loff_t offset, loff_t len, if (ret) return ret; - down_write(&F2FS_I(inode)->i_mmap_sem); ret = filemap_write_and_wait_range(mapping, offset, offset + len - 1); if (ret) - goto out_sem; - - truncate_pagecache_range(inode, offset, offset + len - 1); + return ret; pg_start = ((unsigned long long) offset) >> PAGE_SHIFT; pg_end = ((unsigned long long) offset + len) >> PAGE_SHIFT; @@ -1306,7 +1359,7 @@ static int f2fs_zero_range(struct inode *inode, loff_t offset, loff_t len, ret = fill_zero(inode, pg_start, off_start, off_end - off_start); if (ret) - goto out_sem; + return ret; new_size = max_t(loff_t, new_size, offset + len); } else { @@ -1314,7 +1367,7 @@ static int f2fs_zero_range(struct inode *inode, loff_t offset, loff_t len, ret = fill_zero(inode, pg_start++, off_start, PAGE_SIZE - off_start); if (ret) - goto out_sem; + return ret; new_size = max_t(loff_t, new_size, (loff_t)pg_start << PAGE_SHIFT); @@ -1325,12 +1378,21 @@ static int f2fs_zero_range(struct inode *inode, loff_t offset, loff_t len, unsigned int end_offset; pgoff_t end; + down_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]); + down_write(&F2FS_I(inode)->i_mmap_sem); + + truncate_pagecache_range(inode, + (loff_t)index << PAGE_SHIFT, + ((loff_t)pg_end << PAGE_SHIFT) - 1); + f2fs_lock_op(sbi); set_new_dnode(&dn, inode, NULL, NULL, 0); - ret = get_dnode_of_data(&dn, index, ALLOC_NODE); + ret = f2fs_get_dnode_of_data(&dn, index, ALLOC_NODE); if (ret) { f2fs_unlock_op(sbi); + up_write(&F2FS_I(inode)->i_mmap_sem); + up_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]); goto out; } @@ -1339,7 +1401,10 @@ static int f2fs_zero_range(struct inode *inode, loff_t offset, loff_t len, ret = f2fs_do_zero_range(&dn, index, end); f2fs_put_dnode(&dn); + f2fs_unlock_op(sbi); + up_write(&F2FS_I(inode)->i_mmap_sem); + up_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]); f2fs_balance_fs(sbi, dn.node_changed); @@ -1367,9 +1432,6 @@ out: else f2fs_i_size_write(inode, new_size); } -out_sem: - up_write(&F2FS_I(inode)->i_mmap_sem); - return ret; } @@ -1398,25 +1460,26 @@ static int f2fs_insert_range(struct inode *inode, loff_t offset, loff_t len) f2fs_balance_fs(sbi, true); - /* avoid gc operation during block exchange */ - down_write(&F2FS_I(inode)->dio_rwsem[WRITE]); - down_write(&F2FS_I(inode)->i_mmap_sem); - ret = truncate_blocks(inode, i_size_read(inode), true); + ret = f2fs_truncate_blocks(inode, i_size_read(inode), true); + up_write(&F2FS_I(inode)->i_mmap_sem); if (ret) - goto out; + return ret; /* write out all dirty pages from offset */ ret = filemap_write_and_wait_range(inode->i_mapping, offset, LLONG_MAX); if (ret) - goto out; - - truncate_pagecache(inode, offset); + return ret; pg_start = offset >> PAGE_SHIFT; pg_end = (offset + len) >> PAGE_SHIFT; delta = pg_end - pg_start; - idx = (i_size_read(inode) + PAGE_SIZE - 1) / PAGE_SIZE; + idx = DIV_ROUND_UP(i_size_read(inode), PAGE_SIZE); + + /* avoid gc operation during block exchange */ + down_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]); + down_write(&F2FS_I(inode)->i_mmap_sem); + truncate_pagecache(inode, offset); while (!ret && idx > pg_start) { nr = idx - pg_start; @@ -1431,16 +1494,17 @@ static int f2fs_insert_range(struct inode *inode, loff_t offset, loff_t len) idx + delta, nr, false); f2fs_unlock_op(sbi); } + up_write(&F2FS_I(inode)->i_mmap_sem); + up_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]); /* write out all moved pages, if possible */ + down_write(&F2FS_I(inode)->i_mmap_sem); filemap_write_and_wait_range(inode->i_mapping, offset, LLONG_MAX); truncate_pagecache(inode, offset); + up_write(&F2FS_I(inode)->i_mmap_sem); if (!ret) f2fs_i_size_write(inode, new_size); -out: - up_write(&F2FS_I(inode)->i_mmap_sem); - up_write(&F2FS_I(inode)->dio_rwsem[WRITE]); return ret; } @@ -1449,7 +1513,8 @@ static int expand_inode_data(struct inode *inode, loff_t offset, { struct f2fs_sb_info *sbi = F2FS_I_SB(inode); struct f2fs_map_blocks map = { .m_next_pgofs = NULL, - .m_next_extent = NULL, .m_seg_type = NO_CHECK_TYPE }; + .m_next_extent = NULL, .m_seg_type = NO_CHECK_TYPE, + .m_may_create = true }; pgoff_t pg_end; loff_t new_size = i_size_read(inode); loff_t off_end; @@ -1473,7 +1538,12 @@ static int expand_inode_data(struct inode *inode, loff_t offset, if (off_end) map.m_len++; - err = f2fs_map_blocks(inode, &map, 1, F2FS_GET_BLOCK_PRE_AIO); + if (f2fs_is_pinned_file(inode)) + map.m_seg_type = CURSEG_COLD_DATA; + + err = f2fs_map_blocks(inode, &map, 1, (f2fs_is_pinned_file(inode) ? + F2FS_GET_BLOCK_PRE_DIO : + F2FS_GET_BLOCK_PRE_AIO)); if (err) { pgoff_t last_off; @@ -1483,7 +1553,7 @@ static int expand_inode_data(struct inode *inode, loff_t offset, last_off = map.m_lblk + map.m_len - 1; /* update new size to the failed position */ - new_size = (last_off == pg_end) ? offset + len: + new_size = (last_off == pg_end) ? offset + len : (loff_t)(last_off + 1) << PAGE_SHIFT; } else { new_size = ((loff_t)pg_end << PAGE_SHIFT) + off_end; @@ -1563,13 +1633,13 @@ static int f2fs_release_file(struct inode *inode, struct file *filp) /* some remained atomic pages should discarded */ if (f2fs_is_atomic_file(inode)) - drop_inmem_pages(inode); + f2fs_drop_inmem_pages(inode); if (f2fs_is_volatile_file(inode)) { - clear_inode_flag(inode, FI_VOLATILE_FILE); - stat_dec_volatile_write(inode); set_inode_flag(inode, FI_DROP_CACHE); filemap_fdatawrite(inode->i_mapping); clear_inode_flag(inode, FI_DROP_CACHE); + clear_inode_flag(inode, FI_VOLATILE_FILE); + stat_dec_volatile_write(inode); } return 0; } @@ -1586,63 +1656,156 @@ static int f2fs_file_flush(struct file *file, fl_owner_t id) */ if (f2fs_is_atomic_file(inode) && F2FS_I(inode)->inmem_task == current) - drop_inmem_pages(inode); + f2fs_drop_inmem_pages(inode); + return 0; +} + +static int f2fs_setflags_common(struct inode *inode, u32 iflags, u32 mask) +{ + struct f2fs_inode_info *fi = F2FS_I(inode); + + /* Is it quota file? Do not allow user to mess with it */ + if (IS_NOQUOTA(inode)) + return -EPERM; + + fi->i_flags = iflags | (fi->i_flags & ~mask); + + if (fi->i_flags & F2FS_PROJINHERIT_FL) + set_inode_flag(inode, FI_PROJ_INHERIT); + else + clear_inode_flag(inode, FI_PROJ_INHERIT); + + inode->i_ctime = current_time(inode); + f2fs_set_inode_flags(inode); + f2fs_mark_inode_dirty_sync(inode, true); return 0; } +/* FS_IOC_GETFLAGS and FS_IOC_SETFLAGS support */ + +/* + * To make a new on-disk f2fs i_flag gettable via FS_IOC_GETFLAGS, add an entry + * for it to f2fs_fsflags_map[], and add its FS_*_FL equivalent to + * F2FS_GETTABLE_FS_FL. To also make it settable via FS_IOC_SETFLAGS, also add + * its FS_*_FL equivalent to F2FS_SETTABLE_FS_FL. + */ + +static const struct { + u32 iflag; + u32 fsflag; +} f2fs_fsflags_map[] = { + { F2FS_SYNC_FL, FS_SYNC_FL }, + { F2FS_IMMUTABLE_FL, FS_IMMUTABLE_FL }, + { F2FS_APPEND_FL, FS_APPEND_FL }, + { F2FS_NODUMP_FL, FS_NODUMP_FL }, + { F2FS_NOATIME_FL, FS_NOATIME_FL }, + { F2FS_INDEX_FL, FS_INDEX_FL }, + { F2FS_DIRSYNC_FL, FS_DIRSYNC_FL }, + { F2FS_PROJINHERIT_FL, FS_PROJINHERIT_FL }, +}; + +#define F2FS_GETTABLE_FS_FL ( \ + FS_SYNC_FL | \ + FS_IMMUTABLE_FL | \ + FS_APPEND_FL | \ + FS_NODUMP_FL | \ + FS_NOATIME_FL | \ + FS_INDEX_FL | \ + FS_DIRSYNC_FL | \ + FS_PROJINHERIT_FL | \ + FS_ENCRYPT_FL | \ + FS_INLINE_DATA_FL | \ + FS_NOCOW_FL) + +#define F2FS_SETTABLE_FS_FL ( \ + FS_SYNC_FL | \ + FS_IMMUTABLE_FL | \ + FS_APPEND_FL | \ + FS_NODUMP_FL | \ + FS_NOATIME_FL | \ + FS_DIRSYNC_FL | \ + FS_PROJINHERIT_FL) + +/* Convert f2fs on-disk i_flags to FS_IOC_{GET,SET}FLAGS flags */ +static inline u32 f2fs_iflags_to_fsflags(u32 iflags) +{ + u32 fsflags = 0; + int i; + + for (i = 0; i < ARRAY_SIZE(f2fs_fsflags_map); i++) + if (iflags & f2fs_fsflags_map[i].iflag) + fsflags |= f2fs_fsflags_map[i].fsflag; + + return fsflags; +} + +/* Convert FS_IOC_{GET,SET}FLAGS flags to f2fs on-disk i_flags */ +static inline u32 f2fs_fsflags_to_iflags(u32 fsflags) +{ + u32 iflags = 0; + int i; + + for (i = 0; i < ARRAY_SIZE(f2fs_fsflags_map); i++) + if (fsflags & f2fs_fsflags_map[i].fsflag) + iflags |= f2fs_fsflags_map[i].iflag; + + return iflags; +} + static int f2fs_ioc_getflags(struct file *filp, unsigned long arg) { struct inode *inode = file_inode(filp); struct f2fs_inode_info *fi = F2FS_I(inode); - unsigned int flags = fi->i_flags & FS_FL_USER_VISIBLE; - return put_user(flags, (int __user *)arg); + u32 fsflags = f2fs_iflags_to_fsflags(fi->i_flags); + + if (IS_ENCRYPTED(inode)) + fsflags |= FS_ENCRYPT_FL; + if (f2fs_has_inline_data(inode) || f2fs_has_inline_dentry(inode)) + fsflags |= FS_INLINE_DATA_FL; + if (is_inode_flag_set(inode, FI_PIN_FILE)) + fsflags |= FS_NOCOW_FL; + + fsflags &= F2FS_GETTABLE_FS_FL; + + return put_user(fsflags, (int __user *)arg); } static int f2fs_ioc_setflags(struct file *filp, unsigned long arg) { struct inode *inode = file_inode(filp); struct f2fs_inode_info *fi = F2FS_I(inode); - unsigned int flags; - unsigned int oldflags; + u32 fsflags, old_fsflags; + u32 iflags; int ret; if (!inode_owner_or_capable(inode)) return -EACCES; - if (get_user(flags, (int __user *)arg)) + if (get_user(fsflags, (int __user *)arg)) return -EFAULT; + if (fsflags & ~F2FS_GETTABLE_FS_FL) + return -EOPNOTSUPP; + fsflags &= F2FS_SETTABLE_FS_FL; + + iflags = f2fs_fsflags_to_iflags(fsflags); + if (f2fs_mask_flags(inode->i_mode, iflags) != iflags) + return -EOPNOTSUPP; + ret = mnt_want_write_file(filp); if (ret) return ret; inode_lock(inode); - /* Is it quota file? Do not allow user to mess with it */ - if (IS_NOQUOTA(inode)) { - ret = -EPERM; - goto unlock_out; - } - - flags = f2fs_mask_flags(inode->i_mode, flags); - - oldflags = fi->i_flags; - - if ((flags ^ oldflags) & (FS_APPEND_FL | FS_IMMUTABLE_FL)) { - if (!capable(CAP_LINUX_IMMUTABLE)) { - ret = -EPERM; - goto unlock_out; - } - } - - flags = flags & FS_FL_USER_MODIFIABLE; - flags |= oldflags & ~FS_FL_USER_MODIFIABLE; - fi->i_flags = flags; + old_fsflags = f2fs_iflags_to_fsflags(fi->i_flags); + ret = vfs_ioc_setflags_prepare(inode, old_fsflags, fsflags); + if (ret) + goto out; - inode->i_ctime = current_time(inode); - f2fs_set_inode_flags(inode); - f2fs_mark_inode_dirty_sync(inode, false); -unlock_out: + ret = f2fs_setflags_common(inode, iflags, + f2fs_fsflags_to_iflags(F2FS_SETTABLE_FS_FL)); +out: inode_unlock(inode); mnt_drop_write_file(filp); return ret; @@ -1658,6 +1821,8 @@ static int f2fs_ioc_getversion(struct file *filp, unsigned long arg) static int f2fs_ioc_start_atomic_write(struct file *filp) { struct inode *inode = file_inode(filp); + struct f2fs_inode_info *fi = F2FS_I(inode); + struct f2fs_sb_info *sbi = F2FS_I_SB(inode); int ret; if (!inode_owner_or_capable(inode)) @@ -1672,31 +1837,42 @@ static int f2fs_ioc_start_atomic_write(struct file *filp) inode_lock(inode); - if (f2fs_is_atomic_file(inode)) + if (f2fs_is_atomic_file(inode)) { + if (is_inode_flag_set(inode, FI_ATOMIC_REVOKE_REQUEST)) + ret = -EINVAL; goto out; + } ret = f2fs_convert_inline_inode(inode); if (ret) goto out; - set_inode_flag(inode, FI_ATOMIC_FILE); - set_inode_flag(inode, FI_HOT_DATA); - f2fs_update_time(F2FS_I_SB(inode), REQ_TIME); + down_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]); - if (!get_dirty_pages(inode)) - goto inc_stat; - - f2fs_msg(F2FS_I_SB(inode)->sb, KERN_WARNING, - "Unexpected flush for atomic writes: ino=%lu, npages=%u", - inode->i_ino, get_dirty_pages(inode)); + /* + * Should wait end_io to count F2FS_WB_CP_DATA correctly by + * f2fs_is_atomic_file. + */ + if (get_dirty_pages(inode)) + f2fs_warn(F2FS_I_SB(inode), "Unexpected flush for atomic writes: ino=%lu, npages=%u", + inode->i_ino, get_dirty_pages(inode)); ret = filemap_write_and_wait_range(inode->i_mapping, 0, LLONG_MAX); if (ret) { - clear_inode_flag(inode, FI_ATOMIC_FILE); - clear_inode_flag(inode, FI_HOT_DATA); + up_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]); goto out; } -inc_stat: + spin_lock(&sbi->inode_lock[ATOMIC_FILE]); + if (list_empty(&fi->inmem_ilist)) + list_add_tail(&fi->inmem_ilist, &sbi->inode_list[ATOMIC_FILE]); + spin_unlock(&sbi->inode_lock[ATOMIC_FILE]); + + /* add inode in inmem_list first and set atomic_file */ + set_inode_flag(inode, FI_ATOMIC_FILE); + clear_inode_flag(inode, FI_ATOMIC_REVOKE_REQUEST); + up_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]); + + f2fs_update_time(F2FS_I_SB(inode), REQ_TIME); F2FS_I(inode)->inmem_task = current; stat_inc_atomic_write(inode); stat_update_max_atomic_write(inode); @@ -1718,29 +1894,31 @@ static int f2fs_ioc_commit_atomic_write(struct file *filp) if (ret) return ret; - inode_lock(inode); + f2fs_balance_fs(F2FS_I_SB(inode), true); - down_write(&F2FS_I(inode)->dio_rwsem[WRITE]); + inode_lock(inode); - if (f2fs_is_volatile_file(inode)) + if (f2fs_is_volatile_file(inode)) { + ret = -EINVAL; goto err_out; + } if (f2fs_is_atomic_file(inode)) { - ret = commit_inmem_pages(inode); + ret = f2fs_commit_inmem_pages(inode); if (ret) goto err_out; ret = f2fs_do_sync_file(filp, 0, LLONG_MAX, 0, true); - if (!ret) { - clear_inode_flag(inode, FI_ATOMIC_FILE); - clear_inode_flag(inode, FI_HOT_DATA); - stat_dec_atomic_write(inode); - } + if (!ret) + f2fs_drop_inmem_pages(inode); } else { ret = f2fs_do_sync_file(filp, 0, LLONG_MAX, 1, false); } err_out: - up_write(&F2FS_I(inode)->dio_rwsem[WRITE]); + if (is_inode_flag_set(inode, FI_ATOMIC_REVOKE_REQUEST)) { + clear_inode_flag(inode, FI_ATOMIC_REVOKE_REQUEST); + ret = -EINVAL; + } inode_unlock(inode); mnt_drop_write_file(filp); return ret; @@ -1825,13 +2003,15 @@ static int f2fs_ioc_abort_volatile_write(struct file *filp) inode_lock(inode); if (f2fs_is_atomic_file(inode)) - drop_inmem_pages(inode); + f2fs_drop_inmem_pages(inode); if (f2fs_is_volatile_file(inode)) { clear_inode_flag(inode, FI_VOLATILE_FILE); stat_dec_volatile_write(inode); ret = f2fs_do_sync_file(filp, 0, LLONG_MAX, 0, true); } + clear_inode_flag(inode, FI_ATOMIC_REVOKE_REQUEST); + inode_unlock(inode); mnt_drop_write_file(filp); @@ -1845,7 +2025,7 @@ static int f2fs_ioc_shutdown(struct file *filp, unsigned long arg) struct f2fs_sb_info *sbi = F2FS_I_SB(inode); struct super_block *sb = sbi->sb; __u32 in; - int ret; + int ret = 0; if (!capable(CAP_SYS_ADMIN)) return -EPERM; @@ -1853,9 +2033,11 @@ static int f2fs_ioc_shutdown(struct file *filp, unsigned long arg) if (get_user(in, (__u32 __user *)arg)) return -EFAULT; - ret = mnt_want_write_file(filp); - if (ret) - return ret; + if (in != F2FS_GOING_DOWN_FULLSYNC) { + ret = mnt_want_write_file(filp); + if (ret) + return ret; + } switch (in) { case F2FS_GOING_DOWN_FULLSYNC: @@ -1866,6 +2048,7 @@ static int f2fs_ioc_shutdown(struct file *filp, unsigned long arg) } if (sb) { f2fs_stop_checkpoint(sbi, false); + set_sbi_flag(sbi, SBI_IS_SHUTDOWN); thaw_bdev(sb->s_bdev, sb); } break; @@ -1875,28 +2058,42 @@ static int f2fs_ioc_shutdown(struct file *filp, unsigned long arg) if (ret) goto out; f2fs_stop_checkpoint(sbi, false); + set_sbi_flag(sbi, SBI_IS_SHUTDOWN); break; case F2FS_GOING_DOWN_NOSYNC: f2fs_stop_checkpoint(sbi, false); + set_sbi_flag(sbi, SBI_IS_SHUTDOWN); break; case F2FS_GOING_DOWN_METAFLUSH: - sync_meta_pages(sbi, META, LONG_MAX, FS_META_IO); + f2fs_sync_meta_pages(sbi, META, LONG_MAX, FS_META_IO); f2fs_stop_checkpoint(sbi, false); + set_sbi_flag(sbi, SBI_IS_SHUTDOWN); break; + case F2FS_GOING_DOWN_NEED_FSCK: + set_sbi_flag(sbi, SBI_NEED_FSCK); + set_sbi_flag(sbi, SBI_CP_DISABLED_QUICK); + set_sbi_flag(sbi, SBI_IS_DIRTY); + /* do checkpoint only */ + ret = f2fs_sync_fs(sb, 1); + goto out; default: ret = -EINVAL; goto out; } - stop_gc_thread(sbi); - stop_discard_thread(sbi); + f2fs_stop_gc_thread(sbi); + f2fs_stop_discard_thread(sbi); - drop_discard_cmd(sbi); + f2fs_drop_discard_cmd(sbi); clear_opt(sbi, DISCARD); f2fs_update_time(sbi, REQ_TIME); out: - mnt_drop_write_file(filp); + if (in != F2FS_GOING_DOWN_FULLSYNC) + mnt_drop_write_file(filp); + + trace_f2fs_shutdown(sbi, in, ret); + return ret; } @@ -1911,7 +2108,7 @@ static int f2fs_ioc_fitrim(struct file *filp, unsigned long arg) if (!capable(CAP_SYS_ADMIN)) return -EPERM; - if (!blk_queue_discard(q)) + if (!f2fs_hw_support_discard(F2FS_SB(sb))) return -EOPNOTSUPP; if (copy_from_user(&range, (struct fstrim_range __user *)arg, @@ -1950,7 +2147,7 @@ static int f2fs_ioc_set_encryption_policy(struct file *filp, unsigned long arg) { struct inode *inode = file_inode(filp); - if (!f2fs_sb_has_encrypt(inode->i_sb)) + if (!f2fs_sb_has_encrypt(F2FS_I_SB(inode))) return -EOPNOTSUPP; f2fs_update_time(F2FS_I_SB(inode), REQ_TIME); @@ -1960,7 +2157,7 @@ static int f2fs_ioc_set_encryption_policy(struct file *filp, unsigned long arg) static int f2fs_ioc_get_encryption_policy(struct file *filp, unsigned long arg) { - if (!f2fs_sb_has_encrypt(file_inode(filp)->i_sb)) + if (!f2fs_sb_has_encrypt(F2FS_I_SB(file_inode(filp)))) return -EOPNOTSUPP; return fscrypt_ioctl_get_policy(filp, (void __user *)arg); } @@ -1971,7 +2168,7 @@ static int f2fs_ioc_get_encryption_pwsalt(struct file *filp, unsigned long arg) struct f2fs_sb_info *sbi = F2FS_I_SB(inode); int err; - if (!f2fs_sb_has_encrypt(inode->i_sb)) + if (!f2fs_sb_has_encrypt(sbi)) return -EOPNOTSUPP; err = mnt_want_write_file(filp); @@ -2055,15 +2252,15 @@ static int f2fs_ioc_gc_range(struct file *filp, unsigned long arg) if (f2fs_readonly(sbi->sb)) return -EROFS; + end = range.start + range.len; + if (range.start < MAIN_BLKADDR(sbi) || end >= MAX_BLKADDR(sbi)) { + return -EINVAL; + } + ret = mnt_want_write_file(filp); if (ret) return ret; - end = range.start + range.len; - if (range.start < MAIN_BLKADDR(sbi) || end >= MAX_BLKADDR(sbi)) { - ret = -EINVAL; - goto out; - } do_more: if (!range.sync) { if (!mutex_trylock(&sbi->gc_mutex)) { @@ -2075,7 +2272,7 @@ do_more: } ret = f2fs_gc(sbi, range.sync, true, GET_SEGNO(sbi, range.start)); - range.start += sbi->blocks_per_seg; + range.start += BLKS_PER_SEC(sbi); if (range.start <= end) goto do_more; out: @@ -2095,6 +2292,11 @@ static int f2fs_ioc_write_checkpoint(struct file *filp, unsigned long arg) if (f2fs_readonly(sbi->sb)) return -EROFS; + if (unlikely(is_sbi_flag_set(sbi, SBI_CP_DISABLED))) { + f2fs_info(sbi, "Skipping Checkpoint. Checkpoints currently disabled."); + return -EINVAL; + } + ret = mnt_want_write_file(filp); if (ret) return ret; @@ -2111,8 +2313,9 @@ static int f2fs_defragment_range(struct f2fs_sb_info *sbi, { struct inode *inode = file_inode(filp); struct f2fs_map_blocks map = { .m_next_extent = NULL, - .m_seg_type = NO_CHECK_TYPE }; - struct extent_info ei = {0,0,0}; + .m_seg_type = NO_CHECK_TYPE , + .m_may_create = false }; + struct extent_info ei = {0, 0, 0}; pgoff_t pg_start, pg_end, next_pgofs; unsigned int blk_per_seg = sbi->blocks_per_seg; unsigned int total = 0, sec_num; @@ -2121,7 +2324,7 @@ static int f2fs_defragment_range(struct f2fs_sb_info *sbi, int err; /* if in-place-update policy is enabled, don't waste time here */ - if (should_update_inplace(inode, NULL)) + if (f2fs_should_update_inplace(inode, NULL)) return -EINVAL; pg_start = range->start >> PAGE_SHIFT; @@ -2179,7 +2382,7 @@ static int f2fs_defragment_range(struct f2fs_sb_info *sbi, if (!fragmented) goto out; - sec_num = (total + BLKS_PER_SEC(sbi) - 1) / BLKS_PER_SEC(sbi); + sec_num = DIV_ROUND_UP(total, BLKS_PER_SEC(sbi)); /* * make sure there are enough free section for LFS allocation, this can @@ -2216,7 +2419,7 @@ do_map: while (idx < map.m_lblk + map.m_len && cnt < blk_per_seg) { struct page *page; - page = get_lock_data_page(inode, idx, true); + page = f2fs_get_lock_data_page(inode, idx, true); if (IS_ERR(page)) { err = PTR_ERR(page); goto clear_out; @@ -2327,15 +2530,10 @@ static int f2fs_move_file_range(struct file *file_in, loff_t pos_in, } inode_lock(src); - down_write(&F2FS_I(src)->dio_rwsem[WRITE]); if (src != dst) { ret = -EBUSY; if (!inode_trylock(dst)) goto out; - if (!down_write_trylock(&F2FS_I(dst)->dio_rwsem[WRITE])) { - inode_unlock(dst); - goto out; - } } ret = -EINVAL; @@ -2380,6 +2578,14 @@ static int f2fs_move_file_range(struct file *file_in, loff_t pos_in, goto out_unlock; f2fs_balance_fs(sbi, true); + + down_write(&F2FS_I(src)->i_gc_rwsem[WRITE]); + if (src != dst) { + ret = -EBUSY; + if (!down_write_trylock(&F2FS_I(dst)->i_gc_rwsem[WRITE])) + goto out_src; + } + f2fs_lock_op(sbi); ret = __exchange_data_block(src, dst, pos_in >> F2FS_BLKSIZE_BITS, pos_out >> F2FS_BLKSIZE_BITS, @@ -2392,13 +2598,15 @@ static int f2fs_move_file_range(struct file *file_in, loff_t pos_in, f2fs_i_size_write(dst, dst_osize); } f2fs_unlock_op(sbi); + + if (src != dst) + up_write(&F2FS_I(dst)->i_gc_rwsem[WRITE]); +out_src: + up_write(&F2FS_I(src)->i_gc_rwsem[WRITE]); out_unlock: - if (src != dst) { - up_write(&F2FS_I(dst)->dio_rwsem[WRITE]); + if (src != dst) inode_unlock(dst); - } out: - up_write(&F2FS_I(src)->dio_rwsem[WRITE]); inode_unlock(src); return ret; } @@ -2461,16 +2669,17 @@ static int f2fs_ioc_flush_device(struct file *filp, unsigned long arg) if (f2fs_readonly(sbi->sb)) return -EROFS; + if (unlikely(is_sbi_flag_set(sbi, SBI_CP_DISABLED))) + return -EINVAL; + if (copy_from_user(&range, (struct f2fs_flush_device __user *)arg, sizeof(range))) return -EFAULT; - if (sbi->s_ndevs <= 1 || sbi->s_ndevs - 1 <= range.dev_num || - sbi->segs_per_sec != 1) { - f2fs_msg(sbi->sb, KERN_WARNING, - "Can't flush %u in %d for segs_per_sec %u != 1\n", - range.dev_num, sbi->s_ndevs, - sbi->segs_per_sec); + if (!f2fs_is_multi_device(sbi) || sbi->s_ndevs - 1 <= range.dev_num || + __is_large_section(sbi)) { + f2fs_warn(sbi, "Can't flush %u in %d for segs_per_sec %u != 1", + range.dev_num, sbi->s_ndevs, sbi->segs_per_sec); return -EINVAL; } @@ -2525,12 +2734,13 @@ int f2fs_pin_file_control(struct inode *inode, bool inc) /* Use i_gc_failures for normal file as a risk signal. */ if (inc) - f2fs_i_gc_failures_write(inode, fi->i_gc_failures + 1); + f2fs_i_gc_failures_write(inode, + fi->i_gc_failures[GC_FAILURE_PIN] + 1); - if (fi->i_gc_failures > sbi->gc_pin_file_threshold) { - f2fs_msg(sbi->sb, KERN_WARNING, - "%s: Enable GC = ino %lx after %x GC trials\n", - __func__, inode->i_ino, fi->i_gc_failures); + if (fi->i_gc_failures[GC_FAILURE_PIN] > sbi->gc_pin_file_threshold) { + f2fs_warn(sbi, "%s: Enable GC = ino %lx after %x GC trials", + __func__, inode->i_ino, + fi->i_gc_failures[GC_FAILURE_PIN]); clear_inode_flag(inode, FI_PIN_FILE); return -EAGAIN; } @@ -2543,9 +2753,6 @@ static int f2fs_ioc_set_pin_file(struct file *filp, unsigned long arg) __u32 pin; int ret = 0; - if (!inode_owner_or_capable(inode)) - return -EACCES; - if (get_user(pin, (__u32 __user *)arg)) return -EFAULT; @@ -2561,14 +2768,14 @@ static int f2fs_ioc_set_pin_file(struct file *filp, unsigned long arg) inode_lock(inode); - if (should_update_outplace(inode, NULL)) { + if (f2fs_should_update_outplace(inode, NULL)) { ret = -EINVAL; goto out; } if (!pin) { clear_inode_flag(inode, FI_PIN_FILE); - F2FS_I(inode)->i_gc_failures = 1; + f2fs_i_gc_failures_write(inode, 0); goto done; } @@ -2581,7 +2788,7 @@ static int f2fs_ioc_set_pin_file(struct file *filp, unsigned long arg) goto out; set_inode_flag(inode, FI_PIN_FILE); - ret = F2FS_I(inode)->i_gc_failures; + ret = F2FS_I(inode)->i_gc_failures[GC_FAILURE_PIN]; done: f2fs_update_time(F2FS_I_SB(inode), REQ_TIME); out: @@ -2596,7 +2803,7 @@ static int f2fs_ioc_get_pin_file(struct file *filp, unsigned long arg) __u32 pin = 0; if (is_inode_flag_set(inode, FI_PIN_FILE)) - pin = F2FS_I(inode)->i_gc_failures; + pin = F2FS_I(inode)->i_gc_failures[GC_FAILURE_PIN]; return put_user(pin, (u32 __user *)arg); } @@ -2615,14 +2822,15 @@ int f2fs_precache_extents(struct inode *inode) map.m_next_pgofs = NULL; map.m_next_extent = &m_next_extent; map.m_seg_type = NO_CHECK_TYPE; + map.m_may_create = false; end = F2FS_I_SB(inode)->max_file_blocks; while (map.m_lblk < end) { map.m_len = end - map.m_lblk; - down_write(&fi->dio_rwsem[WRITE]); + down_write(&fi->i_gc_rwsem[WRITE]); err = f2fs_map_blocks(inode, &map, 0, F2FS_GET_BLOCK_PRECACHE); - up_write(&fi->dio_rwsem[WRITE]); + up_write(&fi->i_gc_rwsem[WRITE]); if (err) return err; @@ -2637,6 +2845,27 @@ static int f2fs_ioc_precache_extents(struct file *filp, unsigned long arg) return f2fs_precache_extents(file_inode(filp)); } +static int f2fs_ioc_resize_fs(struct file *filp, unsigned long arg) +{ + struct f2fs_sb_info *sbi = F2FS_I_SB(file_inode(filp)); + __u64 block_count; + int ret; + + if (!capable(CAP_SYS_ADMIN)) + return -EPERM; + + if (f2fs_readonly(sbi->sb)) + return -EROFS; + + if (copy_from_user(&block_count, (void __user *)arg, + sizeof(block_count))) + return -EFAULT; + + ret = f2fs_resize_fs(sbi, block_count); + + return ret; +} + long f2fs_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) { if (unlikely(f2fs_cp_error(F2FS_I_SB(file_inode(filp))))) @@ -2689,6 +2918,8 @@ long f2fs_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) return f2fs_ioc_set_pin_file(filp, arg); case F2FS_IOC_PRECACHE_EXTENTS: return f2fs_ioc_precache_extents(filp, arg); + case F2FS_IOC_RESIZE_FS: + return f2fs_ioc_resize_fs(filp, arg); default: return -ENOTTY; } @@ -2698,18 +2929,23 @@ static ssize_t f2fs_file_write_iter(struct kiocb *iocb, struct iov_iter *from) { struct file *file = iocb->ki_filp; struct inode *inode = file_inode(file); - struct blk_plug plug; ssize_t ret; - if (unlikely(f2fs_cp_error(F2FS_I_SB(inode)))) - return -EIO; + if (unlikely(f2fs_cp_error(F2FS_I_SB(inode)))) { + ret = -EIO; + goto out; + } - if ((iocb->ki_flags & IOCB_NOWAIT) && !(iocb->ki_flags & IOCB_DIRECT)) - return -EINVAL; + if ((iocb->ki_flags & IOCB_NOWAIT) && !(iocb->ki_flags & IOCB_DIRECT)) { + ret = -EINVAL; + goto out; + } if (!inode_trylock(inode)) { - if (iocb->ki_flags & IOCB_NOWAIT) - return -EAGAIN; + if (iocb->ki_flags & IOCB_NOWAIT) { + ret = -EAGAIN; + goto out; + } inode_lock(inode); } @@ -2722,16 +2958,16 @@ static ssize_t f2fs_file_write_iter(struct kiocb *iocb, struct iov_iter *from) if (iov_iter_fault_in_readable(from, iov_iter_count(from))) set_inode_flag(inode, FI_NO_PREALLOC); - if ((iocb->ki_flags & IOCB_NOWAIT) && - (iocb->ki_flags & IOCB_DIRECT)) { - if (!f2fs_overwrite_io(inode, iocb->ki_pos, + if ((iocb->ki_flags & IOCB_NOWAIT)) { + if (!f2fs_overwrite_io(inode, iocb->ki_pos, iov_iter_count(from)) || - f2fs_has_inline_data(inode) || - f2fs_force_buffered_io(inode, WRITE)) { - inode_unlock(inode); - return -EAGAIN; - } - + f2fs_has_inline_data(inode) || + f2fs_force_buffered_io(inode, iocb, from)) { + clear_inode_flag(inode, FI_NO_PREALLOC); + inode_unlock(inode); + ret = -EAGAIN; + goto out; + } } else { preallocated = true; target_size = iocb->ki_pos + iov_iter_count(from); @@ -2740,12 +2976,11 @@ static ssize_t f2fs_file_write_iter(struct kiocb *iocb, struct iov_iter *from) if (err) { clear_inode_flag(inode, FI_NO_PREALLOC); inode_unlock(inode); - return err; + ret = err; + goto out; } } - blk_start_plug(&plug); ret = __generic_file_write_iter(iocb, from); - blk_finish_plug(&plug); clear_inode_flag(inode, FI_NO_PREALLOC); /* if we couldn't write data, we should deallocate blocks. */ @@ -2756,7 +2991,9 @@ static ssize_t f2fs_file_write_iter(struct kiocb *iocb, struct iov_iter *from) f2fs_update_iostat(F2FS_I_SB(inode), APP_WRITE_IO, ret); } inode_unlock(inode); - +out: + trace_f2fs_file_write_iter(inode, iocb->ki_pos, + iov_iter_count(from), ret); if (ret > 0) { ssize_t err; @@ -2799,6 +3036,7 @@ long f2fs_compat_ioctl(struct file *file, unsigned int cmd, unsigned long arg) case F2FS_IOC_GET_PIN_FILE: case F2FS_IOC_SET_PIN_FILE: case F2FS_IOC_PRECACHE_EXTENTS: + case F2FS_IOC_RESIZE_FS: break; default: return -ENOIOCTLCMD; |