diff options
Diffstat (limited to 'fs/f2fs/checkpoint.c')
| -rw-r--r-- | fs/f2fs/checkpoint.c | 532 |
1 files changed, 351 insertions, 181 deletions
diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c index 494253a253f8..7d7d5dae1500 100644 --- a/fs/f2fs/checkpoint.c +++ b/fs/f2fs/checkpoint.c @@ -1,12 +1,9 @@ +// SPDX-License-Identifier: GPL-2.0 /* * fs/f2fs/checkpoint.c * * Copyright (c) 2012 Samsung Electronics Co., Ltd. * http://www.samsung.com/ - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. */ #include <linux/fs.h> #include <linux/bio.h> @@ -24,10 +21,11 @@ #include <trace/events/f2fs.h> static struct kmem_cache *ino_entry_slab; -struct kmem_cache *inode_entry_slab; +struct kmem_cache *f2fs_inode_entry_slab; void f2fs_stop_checkpoint(struct f2fs_sb_info *sbi, bool end_io) { + f2fs_build_fault_attr(sbi, 0, 0); set_ckpt_flags(sbi, CP_ERROR_FLAG); if (!end_io) f2fs_flush_merged_writes(sbi); @@ -36,7 +34,7 @@ void f2fs_stop_checkpoint(struct f2fs_sb_info *sbi, bool end_io) /* * We guarantee no failure on the returned page. */ -struct page *grab_meta_page(struct f2fs_sb_info *sbi, pgoff_t index) +struct page *f2fs_grab_meta_page(struct f2fs_sb_info *sbi, pgoff_t index) { struct address_space *mapping = META_MAPPING(sbi); struct page *page = NULL; @@ -46,7 +44,7 @@ repeat: cond_resched(); goto repeat; } - f2fs_wait_on_page_writeback(page, META, true); + f2fs_wait_on_page_writeback(page, META, true, true); if (!PageUptodate(page)) SetPageUptodate(page); return page; @@ -68,8 +66,9 @@ static struct page *__get_meta_page(struct f2fs_sb_info *sbi, pgoff_t index, .old_blkaddr = index, .new_blkaddr = index, .encrypted_page = NULL, - .is_meta = is_meta, + .is_por = !is_meta, }; + int err; if (unlikely(!is_meta)) fio.op_flags &= ~REQ_META; @@ -84,9 +83,10 @@ repeat: fio.page = page; - if (f2fs_submit_page_bio(&fio)) { + err = f2fs_submit_page_bio(&fio); + if (err) { f2fs_put_page(page, 1); - goto repeat; + return ERR_PTR(err); } lock_page(page); @@ -95,29 +95,67 @@ repeat: goto repeat; } - /* - * if there is any IO error when accessing device, make our filesystem - * readonly and make sure do not write checkpoint with non-uptodate - * meta page. - */ - if (unlikely(!PageUptodate(page))) - f2fs_stop_checkpoint(sbi, false); + if (unlikely(!PageUptodate(page))) { + f2fs_put_page(page, 1); + return ERR_PTR(-EIO); + } out: return page; } -struct page *get_meta_page(struct f2fs_sb_info *sbi, pgoff_t index) +struct page *f2fs_get_meta_page(struct f2fs_sb_info *sbi, pgoff_t index) { return __get_meta_page(sbi, index, true); } +struct page *f2fs_get_meta_page_nofail(struct f2fs_sb_info *sbi, pgoff_t index) +{ + struct page *page; + int count = 0; + +retry: + page = __get_meta_page(sbi, index, true); + if (IS_ERR(page)) { + if (PTR_ERR(page) == -EIO && + ++count <= DEFAULT_RETRY_IO_COUNT) + goto retry; + f2fs_stop_checkpoint(sbi, false); + } + return page; +} + /* for POR only */ -struct page *get_tmp_page(struct f2fs_sb_info *sbi, pgoff_t index) +struct page *f2fs_get_tmp_page(struct f2fs_sb_info *sbi, pgoff_t index) { return __get_meta_page(sbi, index, false); } -bool is_valid_blkaddr(struct f2fs_sb_info *sbi, block_t blkaddr, int type) +static bool __is_bitmap_valid(struct f2fs_sb_info *sbi, block_t blkaddr, + int type) +{ + struct seg_entry *se; + unsigned int segno, offset; + bool exist; + + if (type != DATA_GENERIC_ENHANCE && type != DATA_GENERIC_ENHANCE_READ) + return true; + + segno = GET_SEGNO(sbi, blkaddr); + offset = GET_BLKOFF_FROM_SEG0(sbi, blkaddr); + se = get_seg_entry(sbi, segno); + + exist = f2fs_test_bit(offset, se->cur_valid_map); + if (!exist && type == DATA_GENERIC_ENHANCE) { + f2fs_err(sbi, "Inconsistent error blkaddr:%u, sit bitmap:%d", + blkaddr, exist); + set_sbi_flag(sbi, SBI_NEED_FSCK); + WARN_ON(1); + } + return exist; +} + +bool f2fs_is_valid_blkaddr(struct f2fs_sb_info *sbi, + block_t blkaddr, int type) { switch (type) { case META_NAT: @@ -141,6 +179,25 @@ bool is_valid_blkaddr(struct f2fs_sb_info *sbi, block_t blkaddr, int type) blkaddr < MAIN_BLKADDR(sbi))) return false; break; + case DATA_GENERIC: + case DATA_GENERIC_ENHANCE: + case DATA_GENERIC_ENHANCE_READ: + if (unlikely(blkaddr >= MAX_BLKADDR(sbi) || + blkaddr < MAIN_BLKADDR(sbi))) { + f2fs_warn(sbi, "access invalid blkaddr:%u", + blkaddr); + set_sbi_flag(sbi, SBI_NEED_FSCK); + WARN_ON(1); + return false; + } else { + return __is_bitmap_valid(sbi, blkaddr, type); + } + break; + case META_GENERIC: + if (unlikely(blkaddr < SEG0_BLKADDR(sbi) || + blkaddr >= MAIN_BLKADDR(sbi))) + return false; + break; default: BUG(); } @@ -151,7 +208,7 @@ bool is_valid_blkaddr(struct f2fs_sb_info *sbi, block_t blkaddr, int type) /* * Readahead CP/NAT/SIT/SSA pages */ -int ra_meta_pages(struct f2fs_sb_info *sbi, block_t start, int nrpages, +int f2fs_ra_meta_pages(struct f2fs_sb_info *sbi, block_t start, int nrpages, int type, bool sync) { struct page *page; @@ -164,7 +221,7 @@ int ra_meta_pages(struct f2fs_sb_info *sbi, block_t start, int nrpages, REQ_RAHEAD, .encrypted_page = NULL, .in_list = false, - .is_meta = (type != META_POR), + .is_por = (type == META_POR), }; struct blk_plug plug; @@ -174,7 +231,7 @@ int ra_meta_pages(struct f2fs_sb_info *sbi, block_t start, int nrpages, blk_start_plug(&plug); for (; nrpages-- > 0; blkno++) { - if (!is_valid_blkaddr(sbi, blkno, type)) + if (!f2fs_is_valid_blkaddr(sbi, blkno, type)) goto out; switch (type) { @@ -220,7 +277,7 @@ out: return blkno - start; } -void ra_meta_pages_cond(struct f2fs_sb_info *sbi, pgoff_t index) +void f2fs_ra_meta_pages_cond(struct f2fs_sb_info *sbi, pgoff_t index) { struct page *page; bool readahead = false; @@ -231,7 +288,7 @@ void ra_meta_pages_cond(struct f2fs_sb_info *sbi, pgoff_t index) f2fs_put_page(page, 0); if (readahead) - ra_meta_pages(sbi, index, BIO_MAX_PAGES, META_POR, true); + f2fs_ra_meta_pages(sbi, index, BIO_MAX_PAGES, META_POR, true); } static int __f2fs_write_meta_page(struct page *page, @@ -242,22 +299,18 @@ static int __f2fs_write_meta_page(struct page *page, trace_f2fs_writepage(page, META); - if (unlikely(f2fs_cp_error(sbi))) { - dec_page_count(sbi, F2FS_DIRTY_META); - unlock_page(page); - return 0; - } + if (unlikely(f2fs_cp_error(sbi))) + goto redirty_out; if (unlikely(is_sbi_flag_set(sbi, SBI_POR_DOING))) goto redirty_out; if (wbc->for_reclaim && page->index < GET_SUM_BLOCK(sbi, 0)) goto redirty_out; - write_meta_page(sbi, page, io_type); + f2fs_do_write_meta_page(sbi, page, io_type); dec_page_count(sbi, F2FS_DIRTY_META); if (wbc->for_reclaim) - f2fs_submit_merged_write_cond(sbi, page->mapping->host, - 0, page->index, META); + f2fs_submit_merged_write_cond(sbi, NULL, page, 0, META); unlock_page(page); @@ -287,8 +340,9 @@ static int f2fs_write_meta_pages(struct address_space *mapping, goto skip_write; /* collect a number of dirty meta pages and write together */ - if (wbc->for_kupdate || - get_pages(sbi, F2FS_DIRTY_META) < nr_pages_to_skip(sbi, META)) + if (wbc->sync_mode != WB_SYNC_ALL && + get_pages(sbi, F2FS_DIRTY_META) < + nr_pages_to_skip(sbi, META)) goto skip_write; /* if locked failed, cp will flush dirty pages instead */ @@ -297,7 +351,7 @@ static int f2fs_write_meta_pages(struct address_space *mapping, trace_f2fs_writepages(mapping->host, wbc, META); diff = nr_pages_to_write(sbi, META, wbc); - written = sync_meta_pages(sbi, META, wbc->nr_to_write, FS_META_IO); + written = f2fs_sync_meta_pages(sbi, META, wbc->nr_to_write, FS_META_IO); mutex_unlock(&sbi->cp_mutex); wbc->nr_to_write = max((long)0, wbc->nr_to_write - written - diff); return 0; @@ -308,13 +362,14 @@ skip_write: return 0; } -long sync_meta_pages(struct f2fs_sb_info *sbi, enum page_type type, +long f2fs_sync_meta_pages(struct f2fs_sb_info *sbi, enum page_type type, long nr_to_write, enum iostat_type io_type) { struct address_space *mapping = META_MAPPING(sbi); - pgoff_t index = 0, end = ULONG_MAX, prev = ULONG_MAX; + pgoff_t index = 0, prev = ULONG_MAX; struct pagevec pvec; long nwritten = 0; + int nr_pages; struct writeback_control wbc = { .for_reclaim = 0, }; @@ -324,13 +379,9 @@ long sync_meta_pages(struct f2fs_sb_info *sbi, enum page_type type, blk_start_plug(&plug); - while (index <= end) { - int i, nr_pages; - nr_pages = pagevec_lookup_tag(&pvec, mapping, &index, - PAGECACHE_TAG_DIRTY, - min(end - index, (pgoff_t)PAGEVEC_SIZE-1) + 1); - if (unlikely(nr_pages == 0)) - break; + while ((nr_pages = pagevec_lookup_tag(&pvec, mapping, &index, + PAGECACHE_TAG_DIRTY))) { + int i; for (i = 0; i < nr_pages; i++) { struct page *page = pvec.pages[i]; @@ -354,9 +405,8 @@ continue_unlock: goto continue_unlock; } - f2fs_wait_on_page_writeback(page, META, true); + f2fs_wait_on_page_writeback(page, META, true, true); - BUG_ON(PageWriteback(page)); if (!clear_page_dirty_for_io(page)) goto continue_unlock; @@ -390,7 +440,7 @@ static int f2fs_set_meta_page_dirty(struct page *page) if (!PageDirty(page)) { __set_page_dirty_nobuffers(page); inc_page_count(F2FS_P_SB(page), F2FS_DIRTY_META); - SetPagePrivate(page); + f2fs_set_page_private(page, 0); f2fs_trace_pid(page); return 1; } @@ -461,20 +511,20 @@ static void __remove_ino_entry(struct f2fs_sb_info *sbi, nid_t ino, int type) spin_unlock(&im->ino_lock); } -void add_ino_entry(struct f2fs_sb_info *sbi, nid_t ino, int type) +void f2fs_add_ino_entry(struct f2fs_sb_info *sbi, nid_t ino, int type) { /* add new dirty ino entry into list */ __add_ino_entry(sbi, ino, 0, type); } -void remove_ino_entry(struct f2fs_sb_info *sbi, nid_t ino, int type) +void f2fs_remove_ino_entry(struct f2fs_sb_info *sbi, nid_t ino, int type) { /* remove dirty ino entry from list */ __remove_ino_entry(sbi, ino, type); } /* mode should be APPEND_INO or UPDATE_INO */ -bool exist_written_data(struct f2fs_sb_info *sbi, nid_t ino, int mode) +bool f2fs_exist_written_data(struct f2fs_sb_info *sbi, nid_t ino, int mode) { struct inode_management *im = &sbi->im[mode]; struct ino_entry *e; @@ -485,7 +535,7 @@ bool exist_written_data(struct f2fs_sb_info *sbi, nid_t ino, int mode) return e ? true : false; } -void release_ino_entry(struct f2fs_sb_info *sbi, bool all) +void f2fs_release_ino_entry(struct f2fs_sb_info *sbi, bool all) { struct ino_entry *e, *tmp; int i; @@ -504,13 +554,13 @@ void release_ino_entry(struct f2fs_sb_info *sbi, bool all) } } -void set_dirty_device(struct f2fs_sb_info *sbi, nid_t ino, +void f2fs_set_dirty_device(struct f2fs_sb_info *sbi, nid_t ino, unsigned int devidx, int type) { __add_ino_entry(sbi, ino, devidx, type); } -bool is_dirty_device(struct f2fs_sb_info *sbi, nid_t ino, +bool f2fs_is_dirty_device(struct f2fs_sb_info *sbi, nid_t ino, unsigned int devidx, int type) { struct inode_management *im = &sbi->im[type]; @@ -525,20 +575,19 @@ bool is_dirty_device(struct f2fs_sb_info *sbi, nid_t ino, return is_dirty; } -int acquire_orphan_inode(struct f2fs_sb_info *sbi) +int f2fs_acquire_orphan_inode(struct f2fs_sb_info *sbi) { struct inode_management *im = &sbi->im[ORPHAN_INO]; int err = 0; spin_lock(&im->ino_lock); -#ifdef CONFIG_F2FS_FAULT_INJECTION if (time_to_inject(sbi, FAULT_ORPHAN)) { spin_unlock(&im->ino_lock); f2fs_show_injection_info(FAULT_ORPHAN); return -ENOSPC; } -#endif + if (unlikely(im->ino_num >= sbi->max_orphans)) err = -ENOSPC; else @@ -548,7 +597,7 @@ int acquire_orphan_inode(struct f2fs_sb_info *sbi) return err; } -void release_orphan_inode(struct f2fs_sb_info *sbi) +void f2fs_release_orphan_inode(struct f2fs_sb_info *sbi) { struct inode_management *im = &sbi->im[ORPHAN_INO]; @@ -558,14 +607,14 @@ void release_orphan_inode(struct f2fs_sb_info *sbi) spin_unlock(&im->ino_lock); } -void add_orphan_inode(struct inode *inode) +void f2fs_add_orphan_inode(struct inode *inode) { /* add new orphan ino entry into list */ __add_ino_entry(F2FS_I_SB(inode), inode->i_ino, 0, ORPHAN_INO); - update_inode_page(inode); + f2fs_update_inode_page(inode); } -void remove_orphan_inode(struct f2fs_sb_info *sbi, nid_t ino) +void f2fs_remove_orphan_inode(struct f2fs_sb_info *sbi, nid_t ino) { /* remove orphan entry from orphan list */ __remove_ino_entry(sbi, ino, ORPHAN_INO); @@ -575,12 +624,7 @@ static int recover_orphan_inode(struct f2fs_sb_info *sbi, nid_t ino) { struct inode *inode; struct node_info ni; - int err = acquire_orphan_inode(sbi); - - if (err) - goto err_out; - - __add_ino_entry(sbi, ino, 0, ORPHAN_INO); + int err; inode = f2fs_iget_retry(sbi->sb, ino); if (IS_ERR(inode)) { @@ -593,34 +637,35 @@ static int recover_orphan_inode(struct f2fs_sb_info *sbi, nid_t ino) } err = dquot_initialize(inode); - if (err) + if (err) { + iput(inode); goto err_out; + } - dquot_initialize(inode); clear_nlink(inode); /* truncate all the data during iput */ iput(inode); - get_node_info(sbi, ino, &ni); + err = f2fs_get_node_info(sbi, ino, &ni); + if (err) + goto err_out; /* ENOMEM was fully retried in f2fs_evict_inode. */ if (ni.blk_addr != NULL_ADDR) { err = -EIO; goto err_out; } - __remove_ino_entry(sbi, ino, ORPHAN_INO); return 0; err_out: set_sbi_flag(sbi, SBI_NEED_FSCK); - f2fs_msg(sbi->sb, KERN_WARNING, - "%s: orphan failed (ino=%x), run fsck to fix.", - __func__, ino); + f2fs_warn(sbi, "%s: orphan failed (ino=%x), run fsck to fix.", + __func__, ino); return err; } -int recover_orphan_inodes(struct f2fs_sb_info *sbi) +int f2fs_recover_orphan_inodes(struct f2fs_sb_info *sbi) { block_t start_blk, orphan_blocks, i, j; unsigned int s_flags = sbi->sb->s_flags; @@ -632,8 +677,13 @@ int recover_orphan_inodes(struct f2fs_sb_info *sbi) if (!is_set_ckpt_flags(sbi, CP_ORPHAN_PRESENT_FLAG)) return 0; + if (bdev_read_only(sbi->sb->s_bdev)) { + f2fs_info(sbi, "write access unavailable, skipping orphan cleanup"); + return 0; + } + if (s_flags & MS_RDONLY) { - f2fs_msg(sbi->sb, KERN_INFO, "orphan cleanup on readonly fs"); + f2fs_info(sbi, "orphan cleanup on readonly fs"); sbi->sb->s_flags &= ~MS_RDONLY; } @@ -641,19 +691,28 @@ int recover_orphan_inodes(struct f2fs_sb_info *sbi) /* Needed for iput() to work correctly and not trash data */ sbi->sb->s_flags |= MS_ACTIVE; - /* Turn on quotas so that they are updated correctly */ + /* + * Turn on quotas which were not enabled for read-only mounts if + * filesystem has quota feature, so that they are updated correctly. + */ quota_enabled = f2fs_enable_quota_files(sbi, s_flags & MS_RDONLY); #endif start_blk = __start_cp_addr(sbi) + 1 + __cp_payload(sbi); orphan_blocks = __start_sum_addr(sbi) - 1 - __cp_payload(sbi); - ra_meta_pages(sbi, start_blk, orphan_blocks, META_CP, true); + f2fs_ra_meta_pages(sbi, start_blk, orphan_blocks, META_CP, true); for (i = 0; i < orphan_blocks; i++) { - struct page *page = get_meta_page(sbi, start_blk + i); + struct page *page; struct f2fs_orphan_block *orphan_blk; + page = f2fs_get_meta_page(sbi, start_blk + i); + if (IS_ERR(page)) { + err = PTR_ERR(page); + goto out; + } + orphan_blk = (struct f2fs_orphan_block *)page_address(page); for (j = 0; j < le32_to_cpu(orphan_blk->entry_count); j++) { nid_t ino = le32_to_cpu(orphan_blk->ino[j]); @@ -668,6 +727,8 @@ int recover_orphan_inodes(struct f2fs_sb_info *sbi) /* clear Orphan Flag */ clear_ckpt_flags(sbi, CP_ORPHAN_PRESENT_FLAG); out: + set_sbi_flag(sbi, SBI_IS_RECOVERED); + #ifdef CONFIG_QUOTA /* Turn quotas off */ if (quota_enabled) @@ -701,7 +762,7 @@ static void write_orphan_inodes(struct f2fs_sb_info *sbi, block_t start_blk) /* loop for each orphan inode entry and write them in Jornal block */ list_for_each_entry(orphan, head, list) { if (!page) { - page = grab_meta_page(sbi, start_blk++); + page = f2fs_grab_meta_page(sbi, start_blk++); orphan_blk = (struct f2fs_orphan_block *)page_address(page); memset(orphan_blk, 0, sizeof(*orphan_blk)); @@ -735,27 +796,46 @@ static void write_orphan_inodes(struct f2fs_sb_info *sbi, block_t start_blk) } } +static __u32 f2fs_checkpoint_chksum(struct f2fs_sb_info *sbi, + struct f2fs_checkpoint *ckpt) +{ + unsigned int chksum_ofs = le32_to_cpu(ckpt->checksum_offset); + __u32 chksum; + + chksum = f2fs_crc32(sbi, ckpt, chksum_ofs); + if (chksum_ofs < CP_CHKSUM_OFFSET) { + chksum_ofs += sizeof(chksum); + chksum = f2fs_chksum(sbi, chksum, (__u8 *)ckpt + chksum_ofs, + F2FS_BLKSIZE - chksum_ofs); + } + return chksum; +} + static int get_checkpoint_version(struct f2fs_sb_info *sbi, block_t cp_addr, struct f2fs_checkpoint **cp_block, struct page **cp_page, unsigned long long *version) { - unsigned long blk_size = sbi->blocksize; size_t crc_offset = 0; - __u32 crc = 0; + __u32 crc; + + *cp_page = f2fs_get_meta_page(sbi, cp_addr); + if (IS_ERR(*cp_page)) + return PTR_ERR(*cp_page); - *cp_page = get_meta_page(sbi, cp_addr); *cp_block = (struct f2fs_checkpoint *)page_address(*cp_page); crc_offset = le32_to_cpu((*cp_block)->checksum_offset); - if (crc_offset > (blk_size - sizeof(__le32))) { - f2fs_msg(sbi->sb, KERN_WARNING, - "invalid crc_offset: %zu", crc_offset); + if (crc_offset < CP_MIN_CHKSUM_OFFSET || + crc_offset > CP_CHKSUM_OFFSET) { + f2fs_put_page(*cp_page, 1); + f2fs_warn(sbi, "invalid crc_offset: %zu", crc_offset); return -EINVAL; } - crc = cur_cp_crc(*cp_block); - if (!f2fs_crc_valid(sbi, crc, *cp_block, crc_offset)) { - f2fs_msg(sbi->sb, KERN_WARNING, "invalid crc value"); + crc = f2fs_checkpoint_chksum(sbi, *cp_block); + if (crc != cur_cp_crc(*cp_block)) { + f2fs_put_page(*cp_page, 1); + f2fs_warn(sbi, "invalid crc value"); return -EINVAL; } @@ -774,14 +854,21 @@ static struct page *validate_checkpoint(struct f2fs_sb_info *sbi, err = get_checkpoint_version(sbi, cp_addr, &cp_block, &cp_page_1, version); if (err) - goto invalid_cp1; + return NULL; + + if (le32_to_cpu(cp_block->cp_pack_total_block_count) > + sbi->blocks_per_seg) { + f2fs_warn(sbi, "invalid cp_pack_total_block_count:%u", + le32_to_cpu(cp_block->cp_pack_total_block_count)); + goto invalid_cp; + } pre_version = *version; cp_addr += le32_to_cpu(cp_block->cp_pack_total_block_count) - 1; err = get_checkpoint_version(sbi, cp_addr, &cp_block, &cp_page_2, version); if (err) - goto invalid_cp2; + goto invalid_cp; cur_version = *version; if (cur_version == pre_version) { @@ -789,14 +876,13 @@ static struct page *validate_checkpoint(struct f2fs_sb_info *sbi, f2fs_put_page(cp_page_2, 1); return cp_page_1; } -invalid_cp2: f2fs_put_page(cp_page_2, 1); -invalid_cp1: +invalid_cp: f2fs_put_page(cp_page_1, 1); return NULL; } -int get_valid_checkpoint(struct f2fs_sb_info *sbi) +int f2fs_get_valid_checkpoint(struct f2fs_sb_info *sbi) { struct f2fs_checkpoint *cp_block; struct f2fs_super_block *fsb = sbi->raw_super; @@ -807,8 +893,10 @@ int get_valid_checkpoint(struct f2fs_sb_info *sbi) unsigned int cp_blks = 1 + __cp_payload(sbi); block_t cp_blk_no; int i; + int err; - sbi->ckpt = f2fs_kzalloc(sbi, cp_blks * blk_size, GFP_KERNEL); + sbi->ckpt = f2fs_kzalloc(sbi, array_size(blk_size, cp_blks), + GFP_KERNEL); if (!sbi->ckpt) return -ENOMEM; /* @@ -833,21 +921,24 @@ int get_valid_checkpoint(struct f2fs_sb_info *sbi) } else if (cp2) { cur_page = cp2; } else { + err = -EFSCORRUPTED; goto fail_no_cp; } cp_block = (struct f2fs_checkpoint *)page_address(cur_page); memcpy(sbi->ckpt, cp_block, blk_size); - /* Sanity checking of checkpoint */ - if (sanity_check_ckpt(sbi)) - goto free_fail_no_cp; - if (cur_page == cp1) sbi->cur_cp_pack = 1; else sbi->cur_cp_pack = 2; + /* Sanity checking of checkpoint */ + if (f2fs_sanity_check_ckpt(sbi)) { + err = -EFSCORRUPTED; + goto free_fail_no_cp; + } + if (cp_blks <= 1) goto done; @@ -859,7 +950,11 @@ int get_valid_checkpoint(struct f2fs_sb_info *sbi) void *sit_bitmap_ptr; unsigned char *ckpt = (unsigned char *)sbi->ckpt; - cur_page = get_meta_page(sbi, cp_blk_no + i); + cur_page = f2fs_get_meta_page(sbi, cp_blk_no + i); + if (IS_ERR(cur_page)) { + err = PTR_ERR(cur_page); + goto free_fail_no_cp; + } sit_bitmap_ptr = page_address(cur_page); memcpy(ckpt + i * blk_size, sit_bitmap_ptr, blk_size); f2fs_put_page(cur_page, 1); @@ -873,8 +968,8 @@ free_fail_no_cp: f2fs_put_page(cp1, 1); f2fs_put_page(cp2, 1); fail_no_cp: - kfree(sbi->ckpt); - return -EINVAL; + kvfree(sbi->ckpt); + return err; } static void __add_dirty_inode(struct inode *inode, enum inode_type type) @@ -904,7 +999,7 @@ static void __remove_dirty_inode(struct inode *inode, enum inode_type type) stat_dec_dirty_inode(F2FS_I_SB(inode), type); } -void update_dirty_page(struct inode *inode, struct page *page) +void f2fs_update_dirty_page(struct inode *inode, struct page *page) { struct f2fs_sb_info *sbi = F2FS_I_SB(inode); enum inode_type type = S_ISDIR(inode->i_mode) ? DIR_INODE : FILE_INODE; @@ -919,11 +1014,11 @@ void update_dirty_page(struct inode *inode, struct page *page) inode_inc_dirty_pages(inode); spin_unlock(&sbi->inode_lock[type]); - SetPagePrivate(page); + f2fs_set_page_private(page, 0); f2fs_trace_pid(page); } -void remove_dirty_inode(struct inode *inode) +void f2fs_remove_dirty_inode(struct inode *inode) { struct f2fs_sb_info *sbi = F2FS_I_SB(inode); enum inode_type type = S_ISDIR(inode->i_mode) ? DIR_INODE : FILE_INODE; @@ -940,7 +1035,7 @@ void remove_dirty_inode(struct inode *inode) spin_unlock(&sbi->inode_lock[type]); } -int sync_dirty_inodes(struct f2fs_sb_info *sbi, enum inode_type type) +int f2fs_sync_dirty_inodes(struct f2fs_sb_info *sbi, enum inode_type type) { struct list_head *head; struct inode *inode; @@ -971,22 +1066,18 @@ retry: if (inode) { unsigned long cur_ino = inode->i_ino; - if (is_dir) - F2FS_I(inode)->cp_task = current; + F2FS_I(inode)->cp_task = current; filemap_fdatawrite(inode->i_mapping); - if (is_dir) - F2FS_I(inode)->cp_task = NULL; + F2FS_I(inode)->cp_task = NULL; iput(inode); /* We need to give cpu to another writers. */ - if (ino == cur_ino) { - congestion_wait(BLK_RW_ASYNC, HZ/50); + if (ino == cur_ino) cond_resched(); - } else { + else ino = cur_ino; - } } else { /* * We should submit bio, since it exists several @@ -1023,7 +1114,7 @@ int f2fs_sync_inode_meta(struct f2fs_sb_info *sbi) /* it's on eviction */ if (is_inode_flag_set(inode, FI_DIRTY_INODE)) - update_inode_page(inode); + f2fs_update_inode_page(inode); iput(inode); } } @@ -1043,6 +1134,28 @@ static void __prepare_cp_block(struct f2fs_sb_info *sbi) ckpt->next_free_nid = cpu_to_le32(last_nid); } +static bool __need_flush_quota(struct f2fs_sb_info *sbi) +{ + bool ret = false; + + if (!is_journalled_quota(sbi)) + return false; + + down_write(&sbi->quota_sem); + if (is_sbi_flag_set(sbi, SBI_QUOTA_SKIP_FLUSH)) { + ret = false; + } else if (is_sbi_flag_set(sbi, SBI_QUOTA_NEED_REPAIR)) { + ret = false; + } else if (is_sbi_flag_set(sbi, SBI_QUOTA_NEED_FLUSH)) { + clear_sbi_flag(sbi, SBI_QUOTA_NEED_FLUSH); + ret = true; + } else if (get_pages(sbi, F2FS_DIRTY_QDATA)) { + ret = true; + } + up_write(&sbi->quota_sem); + return ret; +} + /* * Freeze all the FS-operations for checkpoint. */ @@ -1054,20 +1167,40 @@ static int block_operations(struct f2fs_sb_info *sbi) .for_reclaim = 0, }; struct blk_plug plug; - int err = 0; + int err = 0, cnt = 0; blk_start_plug(&plug); -retry_flush_dents: +retry_flush_quotas: f2fs_lock_all(sbi); + if (__need_flush_quota(sbi)) { + int locked; + + if (++cnt > DEFAULT_RETRY_QUOTA_FLUSH_COUNT) { + set_sbi_flag(sbi, SBI_QUOTA_SKIP_FLUSH); + set_sbi_flag(sbi, SBI_QUOTA_NEED_FLUSH); + goto retry_flush_dents; + } + f2fs_unlock_all(sbi); + + /* only failed during mount/umount/freeze/quotactl */ + locked = down_read_trylock(&sbi->sb->s_umount); + f2fs_quota_sync(sbi->sb, -1); + if (locked) + up_read(&sbi->sb->s_umount); + cond_resched(); + goto retry_flush_quotas; + } + +retry_flush_dents: /* write all the dirty dentry pages */ if (get_pages(sbi, F2FS_DIRTY_DENTS)) { f2fs_unlock_all(sbi); - err = sync_dirty_inodes(sbi, DIR_INODE); + err = f2fs_sync_dirty_inodes(sbi, DIR_INODE); if (err) goto out; cond_resched(); - goto retry_flush_dents; + goto retry_flush_quotas; } /* @@ -1083,7 +1216,7 @@ retry_flush_dents: if (err) goto out; cond_resched(); - goto retry_flush_dents; + goto retry_flush_quotas; } retry_flush_nodes: @@ -1091,7 +1224,9 @@ retry_flush_nodes: if (get_pages(sbi, F2FS_DIRTY_NODES)) { up_write(&sbi->node_write); - err = sync_node_pages(sbi, &wbc, false, FS_CP_NODE_IO); + atomic_inc(&sbi->wb_sync_req[NODE]); + err = f2fs_sync_node_pages(sbi, &wbc, false, FS_CP_NODE_IO); + atomic_dec(&sbi->wb_sync_req[NODE]); if (err) { up_write(&sbi->node_change); f2fs_unlock_all(sbi); @@ -1118,7 +1253,7 @@ static void unblock_operations(struct f2fs_sb_info *sbi) f2fs_unlock_all(sbi); } -static void wait_on_all_pages_writeback(struct f2fs_sb_info *sbi) +void f2fs_wait_on_all_pages_writeback(struct f2fs_sb_info *sbi) { DEFINE_WAIT(wait); @@ -1128,6 +1263,9 @@ static void wait_on_all_pages_writeback(struct f2fs_sb_info *sbi) if (!get_pages(sbi, F2FS_WB_CP_DATA)) break; + if (unlikely(f2fs_cp_error(sbi))) + break; + io_schedule_timeout(5*HZ); } finish_wait(&sbi->cp_wait, &wait); @@ -1166,9 +1304,28 @@ static void update_ckpt_flags(struct f2fs_sb_info *sbi, struct cp_control *cpc) else __clear_ckpt_flags(ckpt, CP_ORPHAN_PRESENT_FLAG); - if (is_sbi_flag_set(sbi, SBI_NEED_FSCK)) + if (is_sbi_flag_set(sbi, SBI_NEED_FSCK) || + is_sbi_flag_set(sbi, SBI_IS_RESIZEFS)) __set_ckpt_flags(ckpt, CP_FSCK_FLAG); + if (is_sbi_flag_set(sbi, SBI_CP_DISABLED)) + __set_ckpt_flags(ckpt, CP_DISABLED_FLAG); + else + __clear_ckpt_flags(ckpt, CP_DISABLED_FLAG); + + if (is_sbi_flag_set(sbi, SBI_CP_DISABLED_QUICK)) + __set_ckpt_flags(ckpt, CP_DISABLED_QUICK_FLAG); + else + __clear_ckpt_flags(ckpt, CP_DISABLED_QUICK_FLAG); + + if (is_sbi_flag_set(sbi, SBI_QUOTA_SKIP_FLUSH)) + __set_ckpt_flags(ckpt, CP_QUOTA_NEED_FSCK_FLAG); + else + __clear_ckpt_flags(ckpt, CP_QUOTA_NEED_FSCK_FLAG); + + if (is_sbi_flag_set(sbi, SBI_QUOTA_NEED_REPAIR)) + __set_ckpt_flags(ckpt, CP_QUOTA_NEED_FSCK_FLAG); + /* set this flag to activate crc|cp_ver for recovery */ __set_ckpt_flags(ckpt, CP_CRC_RECOVERY_FLAG); __clear_ckpt_flags(ckpt, CP_NOCRC_RECOVERY_FLAG); @@ -1185,24 +1342,28 @@ static void commit_checkpoint(struct f2fs_sb_info *sbi, /* * pagevec_lookup_tag and lock_page again will take - * some extra time. Therefore, update_meta_pages and - * sync_meta_pages are combined in this function. + * some extra time. Therefore, f2fs_update_meta_pages and + * f2fs_sync_meta_pages are combined in this function. */ - struct page *page = grab_meta_page(sbi, blk_addr); + struct page *page = f2fs_grab_meta_page(sbi, blk_addr); int err; + f2fs_wait_on_page_writeback(page, META, true, true); + memcpy(page_address(page), src, PAGE_SIZE); - set_page_dirty(page); - f2fs_wait_on_page_writeback(page, META, true); - f2fs_bug_on(sbi, PageWriteback(page)); + set_page_dirty(page); if (unlikely(!clear_page_dirty_for_io(page))) f2fs_bug_on(sbi, 1); /* writeout cp pack 2 page */ err = __f2fs_write_meta_page(page, &wbc, FS_CP_META_IO); - f2fs_bug_on(sbi, err); + if (unlikely(err && f2fs_cp_error(sbi))) { + f2fs_put_page(page, 1); + return; + } + f2fs_bug_on(sbi, err); f2fs_put_page(page, 0); /* submit checkpoint (with barrier if NOBARRIER is not set) */ @@ -1225,17 +1386,15 @@ static int do_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc) int err; /* Flush all the NAT/SIT pages */ - while (get_pages(sbi, F2FS_DIRTY_META)) { - sync_meta_pages(sbi, META, LONG_MAX, FS_CP_META_IO); - if (unlikely(f2fs_cp_error(sbi))) - return -EIO; - } + f2fs_sync_meta_pages(sbi, META, LONG_MAX, FS_CP_META_IO); + f2fs_bug_on(sbi, get_pages(sbi, F2FS_DIRTY_META) && + !f2fs_cp_error(sbi)); /* * modify checkpoint * version number is already updated */ - ckpt->elapsed_time = cpu_to_le64(get_mtime(sbi)); + ckpt->elapsed_time = cpu_to_le64(get_mtime(sbi, true)); ckpt->free_segment_count = cpu_to_le32(free_segments(sbi)); for (i = 0; i < NR_CURSEG_NODE_TYPE; i++) { ckpt->cur_node_segno[i] = @@ -1255,7 +1414,7 @@ static int do_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc) } /* 2 cp + n data seg summary + orphan inode blocks */ - data_sum_blocks = npages_for_summary_flush(sbi, false); + data_sum_blocks = f2fs_npages_for_summary_flush(sbi, false); spin_lock_irqsave(&sbi->cp_lock, flags); if (data_sum_blocks < NR_CURSEG_DATA_TYPE) __set_ckpt_flags(ckpt, CP_COMPACT_SUM_FLAG); @@ -1283,7 +1442,7 @@ static int do_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc) get_sit_bitmap(sbi, __bitmap_ptr(sbi, SIT_BITMAP)); get_nat_bitmap(sbi, __bitmap_ptr(sbi, NAT_BITMAP)); - crc32 = f2fs_crc32(sbi, ckpt, le32_to_cpu(ckpt->checksum_offset)); + crc32 = f2fs_checkpoint_chksum(sbi, ckpt); *((__le32 *)((unsigned char *)ckpt + le32_to_cpu(ckpt->checksum_offset))) = cpu_to_le32(crc32); @@ -1300,22 +1459,15 @@ static int do_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc) blk = start_blk + sbi->blocks_per_seg - nm_i->nat_bits_blocks; for (i = 0; i < nm_i->nat_bits_blocks; i++) - update_meta_page(sbi, nm_i->nat_bits + + f2fs_update_meta_page(sbi, nm_i->nat_bits + (i << F2FS_BLKSIZE_BITS), blk + i); - - /* Flush all the NAT BITS pages */ - while (get_pages(sbi, F2FS_DIRTY_META)) { - sync_meta_pages(sbi, META, LONG_MAX, FS_CP_META_IO); - if (unlikely(f2fs_cp_error(sbi))) - return -EIO; - } } /* write out checkpoint buffer at block 0 */ - update_meta_page(sbi, ckpt, start_blk++); + f2fs_update_meta_page(sbi, ckpt, start_blk++); for (i = 1; i < 1 + cp_payload_blks; i++) - update_meta_page(sbi, (char *)ckpt + i * F2FS_BLKSIZE, + f2fs_update_meta_page(sbi, (char *)ckpt + i * F2FS_BLKSIZE, start_blk++); if (orphan_num) { @@ -1323,7 +1475,7 @@ static int do_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc) start_blk += orphan_blocks; } - write_data_summaries(sbi, start_blk); + f2fs_write_data_summaries(sbi, start_blk); start_blk += data_sum_blocks; /* Record write statistics in the hot node summary */ @@ -1334,7 +1486,7 @@ static int do_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc) seg_i->journal->info.kbytes_written = cpu_to_le64(kbytes_written); if (__remain_node_summaries(cpc->reason)) { - write_node_summaries(sbi, start_blk); + f2fs_write_node_summaries(sbi, start_blk); start_blk += NR_CURSEG_NODE_TYPE; } @@ -1343,13 +1495,12 @@ static int do_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc) percpu_counter_set(&sbi->alloc_valid_block_count, 0); /* Here, we have one bio having CP pack except cp pack 2 page */ - sync_meta_pages(sbi, META, LONG_MAX, FS_CP_META_IO); + f2fs_sync_meta_pages(sbi, META, LONG_MAX, FS_CP_META_IO); + f2fs_bug_on(sbi, get_pages(sbi, F2FS_DIRTY_META) && + !f2fs_cp_error(sbi)); /* wait for previous submitted meta pages writeback */ - wait_on_all_pages_writeback(sbi); - - if (unlikely(f2fs_cp_error(sbi))) - return -EIO; + f2fs_wait_on_all_pages_writeback(sbi); /* flush all device cache */ err = f2fs_flush_device_cache(sbi); @@ -1358,15 +1509,28 @@ static int do_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc) /* barrier and flush checkpoint cp pack 2 page if it can */ commit_checkpoint(sbi, ckpt, start_blk); - wait_on_all_pages_writeback(sbi); + f2fs_wait_on_all_pages_writeback(sbi); - release_ino_entry(sbi, false); + /* + * invalidate intermediate page cache borrowed from meta inode + * which are used for migration of encrypted inode's blocks. + */ + if (f2fs_sb_has_encrypt(sbi)) + invalidate_mapping_pages(META_MAPPING(sbi), + MAIN_BLKADDR(sbi), MAX_BLKADDR(sbi) - 1); - if (unlikely(f2fs_cp_error(sbi))) - return -EIO; + f2fs_release_ino_entry(sbi, false); + + f2fs_reset_fsync_node_info(sbi); clear_sbi_flag(sbi, SBI_IS_DIRTY); clear_sbi_flag(sbi, SBI_NEED_CP); + clear_sbi_flag(sbi, SBI_QUOTA_SKIP_FLUSH); + + spin_lock(&sbi->stat_lock); + sbi->unusable_block_count = 0; + spin_unlock(&sbi->stat_lock); + __set_cp_next_pack(sbi); /* @@ -1379,18 +1543,26 @@ static int do_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc) f2fs_bug_on(sbi, get_pages(sbi, F2FS_DIRTY_DENTS)); - return 0; + return unlikely(f2fs_cp_error(sbi)) ? -EIO : 0; } /* * We guarantee that this checkpoint procedure will not fail. */ -int write_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc) +int f2fs_write_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc) { struct f2fs_checkpoint *ckpt = F2FS_CKPT(sbi); unsigned long long ckpt_ver; int err = 0; + if (f2fs_readonly(sbi->sb) || f2fs_hw_is_readonly(sbi)) + return -EROFS; + + if (unlikely(is_sbi_flag_set(sbi, SBI_CP_DISABLED))) { + if (cpc->reason != CP_PAUSE) + return 0; + f2fs_warn(sbi, "Start checkpoint disabled!"); + } mutex_lock(&sbi->cp_mutex); if (!is_sbi_flag_set(sbi, SBI_IS_DIRTY) && @@ -1401,10 +1573,6 @@ int write_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc) err = -EIO; goto out; } - if (f2fs_readonly(sbi->sb)) { - err = -EROFS; - goto out; - } trace_f2fs_write_checkpoint(sbi->sb, cpc->reason, "start block_ops"); @@ -1418,7 +1586,7 @@ int write_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc) /* this is the case of multiple fstrims without any changes */ if (cpc->reason & CP_DISCARD) { - if (!exist_trim_candidates(sbi, cpc)) { + if (!f2fs_exist_trim_candidates(sbi, cpc)) { unblock_operations(sbi); goto out; } @@ -1426,8 +1594,8 @@ int write_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc) if (NM_I(sbi)->dirty_nat_cnt == 0 && SIT_I(sbi)->dirty_sentries == 0 && prefree_segments(sbi) == 0) { - flush_sit_entries(sbi, cpc); - clear_prefree_segments(sbi, cpc); + f2fs_flush_sit_entries(sbi, cpc); + f2fs_clear_prefree_segments(sbi, cpc); unblock_operations(sbi); goto out; } @@ -1442,22 +1610,24 @@ int write_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc) ckpt->checkpoint_ver = cpu_to_le64(++ckpt_ver); /* write cached NAT/SIT entries to NAT/SIT area */ - flush_nat_entries(sbi, cpc); - flush_sit_entries(sbi, cpc); + err = f2fs_flush_nat_entries(sbi, cpc); + if (err) + goto stop; + + f2fs_flush_sit_entries(sbi, cpc); /* unlock all the fs_lock[] in do_checkpoint() */ err = do_checkpoint(sbi, cpc); if (err) - release_discard_addrs(sbi); + f2fs_release_discard_addrs(sbi); else - clear_prefree_segments(sbi, cpc); - + f2fs_clear_prefree_segments(sbi, cpc); +stop: unblock_operations(sbi); stat_inc_cp_count(sbi->stat_info); if (cpc->reason & CP_RECOVERY) - f2fs_msg(sbi->sb, KERN_NOTICE, - "checkpoint: version = %llx", ckpt_ver); + f2fs_notice(sbi, "checkpoint: version = %llx", ckpt_ver); /* do checkpoint periodically */ f2fs_update_time(sbi, CP_TIME); @@ -1467,7 +1637,7 @@ out: return err; } -void init_ino_entry_info(struct f2fs_sb_info *sbi) +void f2fs_init_ino_entry_info(struct f2fs_sb_info *sbi) { int i; @@ -1485,23 +1655,23 @@ void init_ino_entry_info(struct f2fs_sb_info *sbi) F2FS_ORPHANS_PER_BLOCK; } -int __init create_checkpoint_caches(void) +int __init f2fs_create_checkpoint_caches(void) { ino_entry_slab = f2fs_kmem_cache_create("f2fs_ino_entry", sizeof(struct ino_entry)); if (!ino_entry_slab) return -ENOMEM; - inode_entry_slab = f2fs_kmem_cache_create("f2fs_inode_entry", + f2fs_inode_entry_slab = f2fs_kmem_cache_create("f2fs_inode_entry", sizeof(struct inode_entry)); - if (!inode_entry_slab) { + if (!f2fs_inode_entry_slab) { kmem_cache_destroy(ino_entry_slab); return -ENOMEM; } return 0; } -void destroy_checkpoint_caches(void) +void f2fs_destroy_checkpoint_caches(void) { kmem_cache_destroy(ino_entry_slab); - kmem_cache_destroy(inode_entry_slab); + kmem_cache_destroy(f2fs_inode_entry_slab); } |
