From 8da9e3f7476137747b8502b87df80738861d324c Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Wed, 12 Oct 2016 17:18:56 -0700 Subject: f2fs: backport from (4c1fad64 - Merge tag 'for-f2fs-4.9' of git://git.kernel.org/pub/scm/linux/kernel/git/jaegeuk/f2fs) Signed-off-by: Jaegeuk Kim --- fs/f2fs/segment.c | 695 ++++++++++++++++++++++++++++++++---------------------- 1 file changed, 415 insertions(+), 280 deletions(-) (limited to 'fs/f2fs/segment.c') diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index f77b3258454a..b3c61ae37f92 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -86,6 +86,7 @@ static inline unsigned long __reverse_ffs(unsigned long word) /* * __find_rev_next(_zero)_bit is copied from lib/find_next_bit.c because * f2fs_set_bit makes MSB and LSB reversed in a byte. + * @size must be integral times of unsigned long. * Example: * MSB <--> LSB * f2fs_set_bit(0, bitmap) => 1000 0000 @@ -95,94 +96,73 @@ static unsigned long __find_rev_next_bit(const unsigned long *addr, unsigned long size, unsigned long offset) { const unsigned long *p = addr + BIT_WORD(offset); - unsigned long result = offset & ~(BITS_PER_LONG - 1); + unsigned long result = size; unsigned long tmp; if (offset >= size) return size; - size -= result; + size -= (offset & ~(BITS_PER_LONG - 1)); offset %= BITS_PER_LONG; - if (!offset) - goto aligned; - - tmp = __reverse_ulong((unsigned char *)p); - tmp &= ~0UL >> offset; - - if (size < BITS_PER_LONG) - goto found_first; - if (tmp) - goto found_middle; - - size -= BITS_PER_LONG; - result += BITS_PER_LONG; - p++; -aligned: - while (size & ~(BITS_PER_LONG-1)) { + + while (1) { + if (*p == 0) + goto pass; + tmp = __reverse_ulong((unsigned char *)p); + + tmp &= ~0UL >> offset; + if (size < BITS_PER_LONG) + tmp &= (~0UL << (BITS_PER_LONG - size)); if (tmp) - goto found_middle; - result += BITS_PER_LONG; + goto found; +pass: + if (size <= BITS_PER_LONG) + break; size -= BITS_PER_LONG; + offset = 0; p++; } - if (!size) - return result; - - tmp = __reverse_ulong((unsigned char *)p); -found_first: - tmp &= (~0UL << (BITS_PER_LONG - size)); - if (!tmp) /* Are any bits set? */ - return result + size; /* Nope. */ -found_middle: - return result + __reverse_ffs(tmp); + return result; +found: + return result - size + __reverse_ffs(tmp); } static unsigned long __find_rev_next_zero_bit(const unsigned long *addr, unsigned long size, unsigned long offset) { const unsigned long *p = addr + BIT_WORD(offset); - unsigned long result = offset & ~(BITS_PER_LONG - 1); + unsigned long result = size; unsigned long tmp; if (offset >= size) return size; - size -= result; + size -= (offset & ~(BITS_PER_LONG - 1)); offset %= BITS_PER_LONG; - if (!offset) - goto aligned; - - tmp = __reverse_ulong((unsigned char *)p); - tmp |= ~((~0UL << offset) >> offset); - - if (size < BITS_PER_LONG) - goto found_first; - if (tmp != ~0UL) - goto found_middle; - - size -= BITS_PER_LONG; - result += BITS_PER_LONG; - p++; -aligned: - while (size & ~(BITS_PER_LONG - 1)) { + + while (1) { + if (*p == ~0UL) + goto pass; + tmp = __reverse_ulong((unsigned char *)p); + + if (offset) + tmp |= ~0UL << (BITS_PER_LONG - offset); + if (size < BITS_PER_LONG) + tmp |= ~0UL >> size; if (tmp != ~0UL) - goto found_middle; - result += BITS_PER_LONG; + goto found; +pass: + if (size <= BITS_PER_LONG) + break; size -= BITS_PER_LONG; + offset = 0; p++; } - if (!size) - return result; - - tmp = __reverse_ulong((unsigned char *)p); -found_first: - tmp |= ~(~0UL << (BITS_PER_LONG - size)); - if (tmp == ~0UL) /* Are any bits zero? */ - return result + size; /* Nope. */ -found_middle: - return result + __reverse_ffz(tmp); + return result; +found: + return result - size + __reverse_ffz(tmp); } void register_inmem_page(struct inode *inode, struct page *page) @@ -211,69 +191,149 @@ void register_inmem_page(struct inode *inode, struct page *page) trace_f2fs_register_inmem_page(page, INMEM); } -int commit_inmem_pages(struct inode *inode, bool abort) +static int __revoke_inmem_pages(struct inode *inode, + struct list_head *head, bool drop, bool recover) +{ + struct f2fs_sb_info *sbi = F2FS_I_SB(inode); + struct inmem_pages *cur, *tmp; + int err = 0; + + list_for_each_entry_safe(cur, tmp, head, list) { + struct page *page = cur->page; + + if (drop) + trace_f2fs_commit_inmem_page(page, INMEM_DROP); + + lock_page(page); + + if (recover) { + struct dnode_of_data dn; + struct node_info ni; + + trace_f2fs_commit_inmem_page(page, INMEM_REVOKE); + + set_new_dnode(&dn, inode, NULL, NULL, 0); + if (get_dnode_of_data(&dn, page->index, LOOKUP_NODE)) { + err = -EAGAIN; + goto next; + } + get_node_info(sbi, dn.nid, &ni); + f2fs_replace_block(sbi, &dn, dn.data_blkaddr, + cur->old_addr, ni.version, true, true); + f2fs_put_dnode(&dn); + } +next: + /* we don't need to invalidate this in the sccessful status */ + if (drop || recover) + ClearPageUptodate(page); + set_page_private(page, 0); + ClearPagePrivate(page); + f2fs_put_page(page, 1); + + list_del(&cur->list); + kmem_cache_free(inmem_entry_slab, cur); + dec_page_count(F2FS_I_SB(inode), F2FS_INMEM_PAGES); + } + return err; +} + +void drop_inmem_pages(struct inode *inode) +{ + struct f2fs_inode_info *fi = F2FS_I(inode); + + clear_inode_flag(inode, FI_ATOMIC_FILE); + + mutex_lock(&fi->inmem_lock); + __revoke_inmem_pages(inode, &fi->inmem_pages, true, false); + mutex_unlock(&fi->inmem_lock); +} + +static int __commit_inmem_pages(struct inode *inode, + struct list_head *revoke_list) { struct f2fs_sb_info *sbi = F2FS_I_SB(inode); struct f2fs_inode_info *fi = F2FS_I(inode); struct inmem_pages *cur, *tmp; - bool submit_bio = false; struct f2fs_io_info fio = { .sbi = sbi, .type = DATA, .rw = WRITE_SYNC | REQ_PRIO, .encrypted_page = NULL, }; + bool submit_bio = false; int err = 0; - /* - * The abort is true only when f2fs_evict_inode is called. - * Basically, the f2fs_evict_inode doesn't produce any data writes, so - * that we don't need to call f2fs_balance_fs. - * Otherwise, f2fs_gc in f2fs_balance_fs can wait forever until this - * inode becomes free by iget_locked in f2fs_iget. - */ - if (!abort) { - f2fs_balance_fs(sbi); - f2fs_lock_op(sbi); - } - - mutex_lock(&fi->inmem_lock); list_for_each_entry_safe(cur, tmp, &fi->inmem_pages, list) { - lock_page(cur->page); - if (!abort) { - if (cur->page->mapping == inode->i_mapping) { - set_page_dirty(cur->page); - f2fs_wait_on_page_writeback(cur->page, DATA); - if (clear_page_dirty_for_io(cur->page)) - inode_dec_dirty_pages(inode); - trace_f2fs_commit_inmem_page(cur->page, INMEM); - fio.page = cur->page; - err = do_write_data_page(&fio); - if (err) { - unlock_page(cur->page); - break; - } - clear_cold_data(cur->page); - submit_bio = true; + struct page *page = cur->page; + + lock_page(page); + if (page->mapping == inode->i_mapping) { + trace_f2fs_commit_inmem_page(page, INMEM); + + set_page_dirty(page); + f2fs_wait_on_page_writeback(page, DATA, true); + if (clear_page_dirty_for_io(page)) + inode_dec_dirty_pages(inode); + + fio.page = page; + err = do_write_data_page(&fio); + if (err) { + unlock_page(page); + break; } - } else { - trace_f2fs_commit_inmem_page(cur->page, INMEM_DROP); + + /* record old blkaddr for revoking */ + cur->old_addr = fio.old_blkaddr; + + clear_cold_data(page); + submit_bio = true; } - set_page_private(cur->page, 0); - ClearPagePrivate(cur->page); - f2fs_put_page(cur->page, 1); + unlock_page(page); + list_move_tail(&cur->list, revoke_list); + } - list_del(&cur->list); - kmem_cache_free(inmem_entry_slab, cur); - dec_page_count(F2FS_I_SB(inode), F2FS_INMEM_PAGES); + if (submit_bio) + f2fs_submit_merged_bio_cond(sbi, inode, NULL, 0, DATA, WRITE); + + if (!err) + __revoke_inmem_pages(inode, revoke_list, false, false); + + return err; +} + +int commit_inmem_pages(struct inode *inode) +{ + struct f2fs_sb_info *sbi = F2FS_I_SB(inode); + struct f2fs_inode_info *fi = F2FS_I(inode); + struct list_head revoke_list; + int err; + + INIT_LIST_HEAD(&revoke_list); + f2fs_balance_fs(sbi, true); + f2fs_lock_op(sbi); + + mutex_lock(&fi->inmem_lock); + err = __commit_inmem_pages(inode, &revoke_list); + if (err) { + int ret; + /* + * try to revoke all committed pages, but still we could fail + * due to no memory or other reason, if that happened, EAGAIN + * will be returned, which means in such case, transaction is + * already not integrity, caller should use journal to do the + * recovery or rewrite & commit last transaction. For other + * error number, revoking was done by filesystem itself. + */ + ret = __revoke_inmem_pages(inode, &revoke_list, false, true); + if (ret) + err = ret; + + /* drop all uncommitted pages */ + __revoke_inmem_pages(inode, &fi->inmem_pages, true, false); } mutex_unlock(&fi->inmem_lock); - if (!abort) { - f2fs_unlock_op(sbi); - if (submit_bio) - f2fs_submit_merged_bio(sbi, DATA, WRITE); - } + f2fs_unlock_op(sbi); return err; } @@ -281,13 +341,25 @@ int commit_inmem_pages(struct inode *inode, bool abort) * This function balances dirty node and dentry pages. * In addition, it controls garbage collection. */ -void f2fs_balance_fs(struct f2fs_sb_info *sbi) +void f2fs_balance_fs(struct f2fs_sb_info *sbi, bool need) { +#ifdef CONFIG_F2FS_FAULT_INJECTION + if (time_to_inject(sbi, FAULT_CHECKPOINT)) + f2fs_stop_checkpoint(sbi, false); +#endif + + if (!need) + return; + + /* balance_fs_bg is able to be pending */ + if (excess_cached_nats(sbi)) + f2fs_balance_fs_bg(sbi); + /* * We should do GC or end up with checkpoint, if there are so many dirty * dir/node pages without enough free segments. */ - if (has_not_enough_free_secs(sbi, 0)) { + if (has_not_enough_free_secs(sbi, 0, 0)) { mutex_lock(&sbi->gc_mutex); f2fs_gc(sbi, false); } @@ -304,14 +376,26 @@ void f2fs_balance_fs_bg(struct f2fs_sb_info *sbi) try_to_free_nats(sbi, NAT_ENTRY_PER_BLOCK); if (!available_free_memory(sbi, FREE_NIDS)) - try_to_free_nids(sbi, NAT_ENTRY_PER_BLOCK * FREE_NID_PAGES); + try_to_free_nids(sbi, MAX_FREE_NIDS); + else + build_free_nids(sbi); /* checkpoint is the only way to shrink partial cached entries */ if (!available_free_memory(sbi, NAT_ENTRIES) || - excess_prefree_segs(sbi) || !available_free_memory(sbi, INO_ENTRIES) || - jiffies > sbi->cp_expires) + excess_prefree_segs(sbi) || + excess_dirty_nats(sbi) || + (is_idle(sbi) && f2fs_time_over(sbi, CP_TIME))) { + if (test_opt(sbi, DATA_FLUSH)) { + struct blk_plug plug; + + blk_start_plug(&plug); + sync_dirty_inodes(sbi, FILE_INODE); + blk_finish_plug(&plug); + } f2fs_sync_fs(sbi->sb, true); + stat_inc_bg_cp_count(sbi->stat_info); + } } static int issue_flush_thread(void *data) @@ -361,24 +445,28 @@ int f2fs_issue_flush(struct f2fs_sb_info *sbi) if (test_opt(sbi, NOBARRIER)) return 0; - if (!test_opt(sbi, FLUSH_MERGE)) { + if (!test_opt(sbi, FLUSH_MERGE) || !atomic_read(&fcc->submit_flush)) { struct bio *bio = f2fs_bio_alloc(0); int ret; + atomic_inc(&fcc->submit_flush); bio->bi_bdev = sbi->sb->s_bdev; ret = submit_bio_wait(WRITE_FLUSH, bio); + atomic_dec(&fcc->submit_flush); bio_put(bio); return ret; } init_completion(&cmd.wait); + atomic_inc(&fcc->submit_flush); llist_add(&cmd.llnode, &fcc->issue_list); if (!fcc->dispatch_list) wake_up(&fcc->flush_wait_queue); wait_for_completion(&cmd.wait); + atomic_dec(&fcc->submit_flush); return cmd.ret; } @@ -392,6 +480,7 @@ int create_flush_cmd_control(struct f2fs_sb_info *sbi) fcc = kzalloc(sizeof(struct flush_cmd_control), GFP_KERNEL); if (!fcc) return -ENOMEM; + atomic_set(&fcc->submit_flush, 0); init_waitqueue_head(&fcc->flush_wait_queue); init_llist_head(&fcc->issue_list); SM_I(sbi)->cmd_control_info = fcc; @@ -513,28 +602,6 @@ static int f2fs_issue_discard(struct f2fs_sb_info *sbi, return blkdev_issue_discard(sbi->sb->s_bdev, start, len, GFP_NOFS, 0); } -bool discard_next_dnode(struct f2fs_sb_info *sbi, block_t blkaddr) -{ - int err = -ENOTSUPP; - - if (test_opt(sbi, DISCARD)) { - struct seg_entry *se = get_seg_entry(sbi, - GET_SEGNO(sbi, blkaddr)); - unsigned int offset = GET_BLKOFF_FROM_SEG0(sbi, blkaddr); - - if (f2fs_test_bit(offset, se->discard_map)) - return false; - - err = f2fs_issue_discard(sbi, blkaddr, 1); - } - - if (err) { - update_meta_page(sbi, NULL, blkaddr); - return true; - } - return false; -} - static void __add_discard_entry(struct f2fs_sb_info *sbi, struct cp_control *cpc, struct seg_entry *se, unsigned int start, unsigned int end) @@ -573,7 +640,7 @@ static void add_discard_addrs(struct f2fs_sb_info *sbi, struct cp_control *cpc) bool force = (cpc->reason == CP_DISCARD); int i; - if (se->valid_blocks == max_blocks) + if (se->valid_blocks == max_blocks || !f2fs_discard_en(sbi)) return; if (!force) { @@ -593,6 +660,10 @@ static void add_discard_addrs(struct f2fs_sb_info *sbi, struct cp_control *cpc) break; end = __find_rev_next_zero_bit(dmap, max_blocks, start + 1); + if (force && start && end != max_blocks + && (end - start) < cpc->trim_minlen) + continue; + __add_discard_entry(sbi, cpc, se, start, end); } } @@ -630,6 +701,8 @@ void clear_prefree_segments(struct f2fs_sb_info *sbi, struct cp_control *cpc) struct dirty_seglist_info *dirty_i = DIRTY_I(sbi); unsigned long *prefree_map = dirty_i->dirty_segmap[PRE]; unsigned int start = 0, end = -1; + unsigned int secno, start_segno; + bool force = (cpc->reason == CP_DISCARD); mutex_lock(&dirty_i->seglist_lock); @@ -646,17 +719,31 @@ void clear_prefree_segments(struct f2fs_sb_info *sbi, struct cp_control *cpc) dirty_i->nr_dirty[PRE] -= end - start; - if (!test_opt(sbi, DISCARD)) + if (force || !test_opt(sbi, DISCARD)) continue; - f2fs_issue_discard(sbi, START_BLOCK(sbi, start), + if (!test_opt(sbi, LFS) || sbi->segs_per_sec == 1) { + f2fs_issue_discard(sbi, START_BLOCK(sbi, start), (end - start) << sbi->log_blocks_per_seg); + continue; + } +next: + secno = GET_SECNO(sbi, start); + start_segno = secno * sbi->segs_per_sec; + if (!IS_CURSEC(sbi, secno) && + !get_valid_blocks(sbi, start, sbi->segs_per_sec)) + f2fs_issue_discard(sbi, START_BLOCK(sbi, start_segno), + sbi->segs_per_sec << sbi->log_blocks_per_seg); + + start = start_segno + sbi->segs_per_sec; + if (start < end) + goto next; } mutex_unlock(&dirty_i->seglist_lock); /* send small discards */ list_for_each_entry_safe(entry, this, head, list) { - if (cpc->reason == CP_DISCARD && entry->len < cpc->trim_minlen) + if (force && entry->len < cpc->trim_minlen) goto skip; f2fs_issue_discard(sbi, entry->blkaddr, entry->len); cpc->trimmed += entry->len; @@ -711,12 +798,14 @@ static void update_sit_entry(struct f2fs_sb_info *sbi, block_t blkaddr, int del) if (del > 0) { if (f2fs_test_and_set_bit(offset, se->cur_valid_map)) f2fs_bug_on(sbi, 1); - if (!f2fs_test_and_set_bit(offset, se->discard_map)) + if (f2fs_discard_en(sbi) && + !f2fs_test_and_set_bit(offset, se->discard_map)) sbi->discard_blks--; } else { if (!f2fs_test_and_clear_bit(offset, se->cur_valid_map)) f2fs_bug_on(sbi, 1); - if (f2fs_test_and_clear_bit(offset, se->discard_map)) + if (f2fs_discard_en(sbi) && + f2fs_test_and_clear_bit(offset, se->discard_map)) sbi->discard_blks++; } if (!f2fs_test_bit(offset, se->ckpt_valid_map)) @@ -817,12 +906,12 @@ int npages_for_summary_flush(struct f2fs_sb_info *sbi, bool for_ra) } } - sum_in_page = (PAGE_CACHE_SIZE - 2 * SUM_JOURNAL_SIZE - + sum_in_page = (PAGE_SIZE - 2 * SUM_JOURNAL_SIZE - SUM_FOOTER_SIZE) / SUMMARY_SIZE; if (valid_sum_count <= sum_in_page) return 1; else if ((valid_sum_count - sum_in_page) <= - (PAGE_CACHE_SIZE - SUM_FOOTER_SIZE) / SUMMARY_SIZE) + (PAGE_SIZE - SUM_FOOTER_SIZE) / SUMMARY_SIZE) return 2; return 3; } @@ -841,9 +930,9 @@ void update_meta_page(struct f2fs_sb_info *sbi, void *src, block_t blk_addr) void *dst = page_address(page); if (src) - memcpy(dst, src, PAGE_CACHE_SIZE); + memcpy(dst, src, PAGE_SIZE); else - memset(dst, 0, PAGE_CACHE_SIZE); + memset(dst, 0, PAGE_SIZE); set_page_dirty(page); f2fs_put_page(page, 1); } @@ -854,6 +943,31 @@ static void write_sum_page(struct f2fs_sb_info *sbi, update_meta_page(sbi, (void *)sum_blk, blk_addr); } +static void write_current_sum_page(struct f2fs_sb_info *sbi, + int type, block_t blk_addr) +{ + struct curseg_info *curseg = CURSEG_I(sbi, type); + struct page *page = grab_meta_page(sbi, blk_addr); + struct f2fs_summary_block *src = curseg->sum_blk; + struct f2fs_summary_block *dst; + + dst = (struct f2fs_summary_block *)page_address(page); + + mutex_lock(&curseg->curseg_mutex); + + down_read(&curseg->journal_rwsem); + memcpy(&dst->journal, curseg->journal, SUM_JOURNAL_SIZE); + up_read(&curseg->journal_rwsem); + + memcpy(dst->entries, src->entries, SUM_ENTRY_SIZE); + memcpy(&dst->footer, &src->footer, SUM_FOOTER_SIZE); + + mutex_unlock(&curseg->curseg_mutex); + + set_page_dirty(page); + f2fs_put_page(page, 1); +} + static int is_next_segment_free(struct f2fs_sb_info *sbi, int type) { struct curseg_info *curseg = CURSEG_I(sbi, type); @@ -886,9 +1000,8 @@ static void get_new_segment(struct f2fs_sb_info *sbi, if (!new_sec && ((*newseg + 1) % sbi->segs_per_sec)) { segno = find_next_zero_bit(free_i->free_segmap, - MAIN_SEGS(sbi), *newseg + 1); - if (segno - *newseg < sbi->segs_per_sec - - (*newseg % sbi->segs_per_sec)) + (hint + 1) * sbi->segs_per_sec, *newseg + 1); + if (segno < (hint + 1) * sbi->segs_per_sec) goto got_it; } find_other_zone: @@ -1071,7 +1184,7 @@ static int get_ssr_segment(struct f2fs_sb_info *sbi, int type) struct curseg_info *curseg = CURSEG_I(sbi, type); const struct victim_selection *v_ops = DIRTY_I(sbi)->v_ops; - if (IS_NODESEG(type) || !has_not_enough_free_secs(sbi, 0)) + if (IS_NODESEG(type) || !has_not_enough_free_secs(sbi, 0, 0)) return v_ops->get_victim(sbi, &(curseg)->next_segno, BG_GC, type, SSR); @@ -1120,6 +1233,9 @@ void allocate_new_segments(struct f2fs_sb_info *sbi) { int i; + if (test_opt(sbi, LFS)) + return; + for (i = CURSEG_HOT_DATA; i <= CURSEG_COLD_DATA; i++) __allocate_new_segments(sbi, i); } @@ -1134,6 +1250,7 @@ int f2fs_trim_fs(struct f2fs_sb_info *sbi, struct fstrim_range *range) __u64 end = start + F2FS_BYTES_TO_BLK(range->len) - 1; unsigned int start_segno, end_segno; struct cp_control cpc; + int err = 0; if (start >= MAX_BLKADDR(sbi) || range->len < sbi->blocksize) return -EINVAL; @@ -1142,6 +1259,12 @@ int f2fs_trim_fs(struct f2fs_sb_info *sbi, struct fstrim_range *range) if (end <= MAIN_BLKADDR(sbi)) goto out; + if (is_sbi_flag_set(sbi, SBI_NEED_FSCK)) { + f2fs_msg(sbi->sb, KERN_WARNING, + "Found FS corruption, run fsck to fix."); + goto out; + } + /* start/end segment number in main_area */ start_segno = (start <= MAIN_BLKADDR(sbi)) ? 0 : GET_SEGNO(sbi, start); end_segno = (end >= MAX_BLKADDR(sbi)) ? MAIN_SEGS(sbi) - 1 : @@ -1164,12 +1287,16 @@ int f2fs_trim_fs(struct f2fs_sb_info *sbi, struct fstrim_range *range) sbi->segs_per_sec) - 1, end_segno); mutex_lock(&sbi->gc_mutex); - write_checkpoint(sbi, &cpc); + err = write_checkpoint(sbi, &cpc); mutex_unlock(&sbi->gc_mutex); + if (err) + break; + + schedule(); } out: range->len = F2FS_BLK_TO_BYTES(cpc.trimmed); - return 0; + return err; } static bool __has_curseg_space(struct f2fs_sb_info *sbi, int type) @@ -1256,7 +1383,7 @@ void allocate_data_block(struct f2fs_sb_info *sbi, struct page *page, /* direct_io'ed data is aligned to the segment for better performance */ if (direct_io && curseg->next_blkoff && - !has_not_enough_free_secs(sbi, 0)) + !has_not_enough_free_secs(sbi, 0, 0)) __allocate_new_segments(sbi, type); *new_blkaddr = NEXT_FREE_BLKADDR(sbi, curseg); @@ -1292,11 +1419,17 @@ static void do_write_page(struct f2fs_summary *sum, struct f2fs_io_info *fio) { int type = __get_segment_type(fio->page, fio->type); - allocate_data_block(fio->sbi, fio->page, fio->blk_addr, - &fio->blk_addr, sum, type); + if (fio->type == NODE || fio->type == DATA) + mutex_lock(&fio->sbi->wio_mutex[fio->type]); + + allocate_data_block(fio->sbi, fio->page, fio->old_blkaddr, + &fio->new_blkaddr, sum, type); /* writeout dirty page into bdev */ f2fs_submit_page_mbio(fio); + + if (fio->type == NODE || fio->type == DATA) + mutex_unlock(&fio->sbi->wio_mutex[fio->type]); } void write_meta_page(struct f2fs_sb_info *sbi, struct page *page) @@ -1305,7 +1438,8 @@ void write_meta_page(struct f2fs_sb_info *sbi, struct page *page) .sbi = sbi, .type = META, .rw = WRITE_SYNC | REQ_META | REQ_PRIO, - .blk_addr = page->index, + .old_blkaddr = page->index, + .new_blkaddr = page->index, .page = page, .encrypted_page = NULL, }; @@ -1335,19 +1469,19 @@ void write_data_page(struct dnode_of_data *dn, struct f2fs_io_info *fio) get_node_info(sbi, dn->nid, &ni); set_summary(&sum, dn->nid, dn->ofs_in_node, ni.version); do_write_page(&sum, fio); - dn->data_blkaddr = fio->blk_addr; + f2fs_update_data_blkaddr(dn, fio->new_blkaddr); } void rewrite_data_page(struct f2fs_io_info *fio) { + fio->new_blkaddr = fio->old_blkaddr; stat_inc_inplace_blocks(fio->sbi); f2fs_submit_page_mbio(fio); } -static void __f2fs_replace_block(struct f2fs_sb_info *sbi, - struct f2fs_summary *sum, +void __f2fs_replace_block(struct f2fs_sb_info *sbi, struct f2fs_summary *sum, block_t old_blkaddr, block_t new_blkaddr, - bool recover_curseg) + bool recover_curseg, bool recover_newaddr) { struct sit_info *sit_i = SIT_I(sbi); struct curseg_info *curseg; @@ -1390,7 +1524,7 @@ static void __f2fs_replace_block(struct f2fs_sb_info *sbi, curseg->next_blkoff = GET_BLKOFF_FROM_SEG0(sbi, new_blkaddr); __add_sum_entry(sbi, type, sum); - if (!recover_curseg) + if (!recover_curseg || recover_newaddr) update_sit_entry(sbi, new_blkaddr, 1); if (GET_SEGNO(sbi, old_blkaddr) != NULL_SEGNO) update_sit_entry(sbi, old_blkaddr, -1); @@ -1414,66 +1548,30 @@ static void __f2fs_replace_block(struct f2fs_sb_info *sbi, void f2fs_replace_block(struct f2fs_sb_info *sbi, struct dnode_of_data *dn, block_t old_addr, block_t new_addr, - unsigned char version, bool recover_curseg) + unsigned char version, bool recover_curseg, + bool recover_newaddr) { struct f2fs_summary sum; set_summary(&sum, dn->nid, dn->ofs_in_node, version); - __f2fs_replace_block(sbi, &sum, old_addr, new_addr, recover_curseg); - - dn->data_blkaddr = new_addr; - set_data_blkaddr(dn); - f2fs_update_extent_cache(dn); -} - -static inline bool is_merged_page(struct f2fs_sb_info *sbi, - struct page *page, enum page_type type) -{ - enum page_type btype = PAGE_TYPE_OF_BIO(type); - struct f2fs_bio_info *io = &sbi->write_io[btype]; - struct bio_vec *bvec; - struct page *target; - int i; - - down_read(&io->io_rwsem); - if (!io->bio) { - up_read(&io->io_rwsem); - return false; - } - - bio_for_each_segment_all(bvec, io->bio, i) { - - if (bvec->bv_page->mapping) { - target = bvec->bv_page; - } else { - struct f2fs_crypto_ctx *ctx; - - /* encrypted page */ - ctx = (struct f2fs_crypto_ctx *)page_private( - bvec->bv_page); - target = ctx->w.control_page; - } - - if (page == target) { - up_read(&io->io_rwsem); - return true; - } - } + __f2fs_replace_block(sbi, &sum, old_addr, new_addr, + recover_curseg, recover_newaddr); - up_read(&io->io_rwsem); - return false; + f2fs_update_data_blkaddr(dn, new_addr); } void f2fs_wait_on_page_writeback(struct page *page, - enum page_type type) + enum page_type type, bool ordered) { if (PageWriteback(page)) { struct f2fs_sb_info *sbi = F2FS_P_SB(page); - if (is_merged_page(sbi, page, type)) - f2fs_submit_merged_bio(sbi, type, WRITE); - wait_on_page_writeback(page); + f2fs_submit_merged_bio_cond(sbi, NULL, page, 0, type, WRITE); + if (ordered) + wait_on_page_writeback(page); + else + wait_for_stable_page(page); } } @@ -1482,14 +1580,12 @@ void f2fs_wait_on_encrypted_page_writeback(struct f2fs_sb_info *sbi, { struct page *cpage; - if (blkaddr == NEW_ADDR) + if (blkaddr == NEW_ADDR || blkaddr == NULL_ADDR) return; - f2fs_bug_on(sbi, blkaddr == NULL_ADDR); - cpage = find_lock_page(META_MAPPING(sbi), blkaddr); if (cpage) { - f2fs_wait_on_page_writeback(cpage, DATA); + f2fs_wait_on_page_writeback(cpage, DATA, true); f2fs_put_page(cpage, 1); } } @@ -1510,12 +1606,11 @@ static int read_compacted_summaries(struct f2fs_sb_info *sbi) /* Step 1: restore nat cache */ seg_i = CURSEG_I(sbi, CURSEG_HOT_DATA); - memcpy(&seg_i->sum_blk->n_nats, kaddr, SUM_JOURNAL_SIZE); + memcpy(seg_i->journal, kaddr, SUM_JOURNAL_SIZE); /* Step 2: restore sit cache */ seg_i = CURSEG_I(sbi, CURSEG_COLD_DATA); - memcpy(&seg_i->sum_blk->n_sits, kaddr + SUM_JOURNAL_SIZE, - SUM_JOURNAL_SIZE); + memcpy(seg_i->journal, kaddr + SUM_JOURNAL_SIZE, SUM_JOURNAL_SIZE); offset = 2 * SUM_JOURNAL_SIZE; /* Step 3: restore summary entries */ @@ -1539,7 +1634,7 @@ static int read_compacted_summaries(struct f2fs_sb_info *sbi) s = (struct f2fs_summary *)(kaddr + offset); seg_i->sum_blk->entries[j] = *s; offset += SUMMARY_SIZE; - if (offset + SUMMARY_SIZE <= PAGE_CACHE_SIZE - + if (offset + SUMMARY_SIZE <= PAGE_SIZE - SUM_FOOTER_SIZE) continue; @@ -1611,7 +1706,14 @@ static int read_normal_summaries(struct f2fs_sb_info *sbi, int type) /* set uncompleted segment to curseg */ curseg = CURSEG_I(sbi, type); mutex_lock(&curseg->curseg_mutex); - memcpy(curseg->sum_blk, sum, PAGE_CACHE_SIZE); + + /* update journal info */ + down_write(&curseg->journal_rwsem); + memcpy(curseg->journal, &sum->journal, SUM_JOURNAL_SIZE); + up_write(&curseg->journal_rwsem); + + memcpy(curseg->sum_blk->entries, sum->entries, SUM_ENTRY_SIZE); + memcpy(&curseg->sum_blk->footer, &sum->footer, SUM_FOOTER_SIZE); curseg->next_segno = segno; reset_curseg(sbi, type, 0); curseg->alloc_type = ckpt->alloc_type[type]; @@ -1626,7 +1728,7 @@ static int restore_curseg_summaries(struct f2fs_sb_info *sbi) int type = CURSEG_HOT_DATA; int err; - if (is_set_ckpt_flags(F2FS_CKPT(sbi), CP_COMPACT_SUM_FLAG)) { + if (is_set_ckpt_flags(sbi, CP_COMPACT_SUM_FLAG)) { int npages = npages_for_summary_flush(sbi, true); if (npages >= 2) @@ -1666,13 +1768,12 @@ static void write_compacted_summaries(struct f2fs_sb_info *sbi, block_t blkaddr) /* Step 1: write nat cache */ seg_i = CURSEG_I(sbi, CURSEG_HOT_DATA); - memcpy(kaddr, &seg_i->sum_blk->n_nats, SUM_JOURNAL_SIZE); + memcpy(kaddr, seg_i->journal, SUM_JOURNAL_SIZE); written_size += SUM_JOURNAL_SIZE; /* Step 2: write sit cache */ seg_i = CURSEG_I(sbi, CURSEG_COLD_DATA); - memcpy(kaddr + written_size, &seg_i->sum_blk->n_sits, - SUM_JOURNAL_SIZE); + memcpy(kaddr + written_size, seg_i->journal, SUM_JOURNAL_SIZE); written_size += SUM_JOURNAL_SIZE; /* Step 3: write summary entries */ @@ -1694,7 +1795,7 @@ static void write_compacted_summaries(struct f2fs_sb_info *sbi, block_t blkaddr) *summary = seg_i->sum_blk->entries[j]; written_size += SUMMARY_SIZE; - if (written_size + SUMMARY_SIZE <= PAGE_CACHE_SIZE - + if (written_size + SUMMARY_SIZE <= PAGE_SIZE - SUM_FOOTER_SIZE) continue; @@ -1718,17 +1819,13 @@ static void write_normal_summaries(struct f2fs_sb_info *sbi, else end = type + NR_CURSEG_NODE_TYPE; - for (i = type; i < end; i++) { - struct curseg_info *sum = CURSEG_I(sbi, i); - mutex_lock(&sum->curseg_mutex); - write_sum_page(sbi, sum->sum_blk, blkaddr + (i - type)); - mutex_unlock(&sum->curseg_mutex); - } + for (i = type; i < end; i++) + write_current_sum_page(sbi, i, blkaddr + (i - type)); } void write_data_summaries(struct f2fs_sb_info *sbi, block_t start_blk) { - if (is_set_ckpt_flags(F2FS_CKPT(sbi), CP_COMPACT_SUM_FLAG)) + if (is_set_ckpt_flags(sbi, CP_COMPACT_SUM_FLAG)) write_compacted_summaries(sbi, start_blk); else write_normal_summaries(sbi, start_blk, CURSEG_HOT_DATA); @@ -1739,24 +1836,24 @@ void write_node_summaries(struct f2fs_sb_info *sbi, block_t start_blk) write_normal_summaries(sbi, start_blk, CURSEG_HOT_NODE); } -int lookup_journal_in_cursum(struct f2fs_summary_block *sum, int type, +int lookup_journal_in_cursum(struct f2fs_journal *journal, int type, unsigned int val, int alloc) { int i; if (type == NAT_JOURNAL) { - for (i = 0; i < nats_in_cursum(sum); i++) { - if (le32_to_cpu(nid_in_journal(sum, i)) == val) + for (i = 0; i < nats_in_cursum(journal); i++) { + if (le32_to_cpu(nid_in_journal(journal, i)) == val) return i; } - if (alloc && nats_in_cursum(sum) < NAT_JOURNAL_ENTRIES) - return update_nats_in_cursum(sum, 1); + if (alloc && __has_cursum_space(journal, 1, NAT_JOURNAL)) + return update_nats_in_cursum(journal, 1); } else if (type == SIT_JOURNAL) { - for (i = 0; i < sits_in_cursum(sum); i++) - if (le32_to_cpu(segno_in_journal(sum, i)) == val) + for (i = 0; i < sits_in_cursum(journal); i++) + if (le32_to_cpu(segno_in_journal(journal, i)) == val) return i; - if (alloc && sits_in_cursum(sum) < SIT_JOURNAL_ENTRIES) - return update_sits_in_cursum(sum, 1); + if (alloc && __has_cursum_space(journal, 1, SIT_JOURNAL)) + return update_sits_in_cursum(journal, 1); } return -1; } @@ -1785,7 +1882,7 @@ static struct page *get_next_sit_page(struct f2fs_sb_info *sbi, src_addr = page_address(src_page); dst_addr = page_address(dst_page); - memcpy(dst_addr, src_addr, PAGE_CACHE_SIZE); + memcpy(dst_addr, src_addr, PAGE_SIZE); set_page_dirty(dst_page); f2fs_put_page(src_page, 1); @@ -1860,20 +1957,22 @@ static void add_sits_in_set(struct f2fs_sb_info *sbi) static void remove_sits_in_journal(struct f2fs_sb_info *sbi) { struct curseg_info *curseg = CURSEG_I(sbi, CURSEG_COLD_DATA); - struct f2fs_summary_block *sum = curseg->sum_blk; + struct f2fs_journal *journal = curseg->journal; int i; - for (i = sits_in_cursum(sum) - 1; i >= 0; i--) { + down_write(&curseg->journal_rwsem); + for (i = 0; i < sits_in_cursum(journal); i++) { unsigned int segno; bool dirtied; - segno = le32_to_cpu(segno_in_journal(sum, i)); + segno = le32_to_cpu(segno_in_journal(journal, i)); dirtied = __mark_sit_entry_dirty(sbi, segno); if (!dirtied) add_sit_entry(segno, &SM_I(sbi)->sit_entry_set); } - update_sits_in_cursum(sum, -sits_in_cursum(sum)); + update_sits_in_cursum(journal, -i); + up_write(&curseg->journal_rwsem); } /* @@ -1885,13 +1984,12 @@ void flush_sit_entries(struct f2fs_sb_info *sbi, struct cp_control *cpc) struct sit_info *sit_i = SIT_I(sbi); unsigned long *bitmap = sit_i->dirty_sentries_bitmap; struct curseg_info *curseg = CURSEG_I(sbi, CURSEG_COLD_DATA); - struct f2fs_summary_block *sum = curseg->sum_blk; + struct f2fs_journal *journal = curseg->journal; struct sit_entry_set *ses, *tmp; struct list_head *head = &SM_I(sbi)->sit_entry_set; bool to_journal = true; struct seg_entry *se; - mutex_lock(&curseg->curseg_mutex); mutex_lock(&sit_i->sentry_lock); if (!sit_i->dirty_sentries) @@ -1908,7 +2006,7 @@ void flush_sit_entries(struct f2fs_sb_info *sbi, struct cp_control *cpc) * entries, remove all entries from journal and add and account * them in sit entry set. */ - if (!__has_cursum_space(sum, sit_i->dirty_sentries, SIT_JOURNAL)) + if (!__has_cursum_space(journal, sit_i->dirty_sentries, SIT_JOURNAL)) remove_sits_in_journal(sbi); /* @@ -1925,10 +2023,12 @@ void flush_sit_entries(struct f2fs_sb_info *sbi, struct cp_control *cpc) unsigned int segno = start_segno; if (to_journal && - !__has_cursum_space(sum, ses->entry_cnt, SIT_JOURNAL)) + !__has_cursum_space(journal, ses->entry_cnt, SIT_JOURNAL)) to_journal = false; - if (!to_journal) { + if (to_journal) { + down_write(&curseg->journal_rwsem); + } else { page = get_next_sit_page(sbi, start_segno); raw_sit = page_address(page); } @@ -1946,13 +2046,13 @@ void flush_sit_entries(struct f2fs_sb_info *sbi, struct cp_control *cpc) } if (to_journal) { - offset = lookup_journal_in_cursum(sum, + offset = lookup_journal_in_cursum(journal, SIT_JOURNAL, segno, 1); f2fs_bug_on(sbi, offset < 0); - segno_in_journal(sum, offset) = + segno_in_journal(journal, offset) = cpu_to_le32(segno); seg_info_to_raw_sit(se, - &sit_in_journal(sum, offset)); + &sit_in_journal(journal, offset)); } else { sit_offset = SIT_ENTRY_OFFSET(sit_i, segno); seg_info_to_raw_sit(se, @@ -1964,7 +2064,9 @@ void flush_sit_entries(struct f2fs_sb_info *sbi, struct cp_control *cpc) ses->entry_cnt--; } - if (!to_journal) + if (to_journal) + up_write(&curseg->journal_rwsem); + else f2fs_put_page(page, 1); f2fs_bug_on(sbi, ses->entry_cnt); @@ -1979,7 +2081,6 @@ out: add_discard_addrs(sbi, cpc); } mutex_unlock(&sit_i->sentry_lock); - mutex_unlock(&curseg->curseg_mutex); set_prefree_as_free_segments(sbi); } @@ -2015,12 +2116,16 @@ static int build_sit_info(struct f2fs_sb_info *sbi) = kzalloc(SIT_VBLOCK_MAP_SIZE, GFP_KERNEL); sit_i->sentries[start].ckpt_valid_map = kzalloc(SIT_VBLOCK_MAP_SIZE, GFP_KERNEL); - sit_i->sentries[start].discard_map - = kzalloc(SIT_VBLOCK_MAP_SIZE, GFP_KERNEL); if (!sit_i->sentries[start].cur_valid_map || - !sit_i->sentries[start].ckpt_valid_map || - !sit_i->sentries[start].discard_map) + !sit_i->sentries[start].ckpt_valid_map) return -ENOMEM; + + if (f2fs_discard_en(sbi)) { + sit_i->sentries[start].discard_map + = kzalloc(SIT_VBLOCK_MAP_SIZE, GFP_KERNEL); + if (!sit_i->sentries[start].discard_map) + return -ENOMEM; + } } sit_i->tmp_map = kzalloc(SIT_VBLOCK_MAP_SIZE, GFP_KERNEL); @@ -2108,9 +2213,14 @@ static int build_curseg(struct f2fs_sb_info *sbi) for (i = 0; i < NR_CURSEG_TYPE; i++) { mutex_init(&array[i].curseg_mutex); - array[i].sum_blk = kzalloc(PAGE_CACHE_SIZE, GFP_KERNEL); + array[i].sum_blk = kzalloc(PAGE_SIZE, GFP_KERNEL); if (!array[i].sum_blk) return -ENOMEM; + init_rwsem(&array[i].journal_rwsem); + array[i].journal = kzalloc(sizeof(struct f2fs_journal), + GFP_KERNEL); + if (!array[i].journal) + return -ENOMEM; array[i].segno = NULL_SEGNO; array[i].next_blkoff = 0; } @@ -2121,11 +2231,13 @@ static void build_sit_entries(struct f2fs_sb_info *sbi) { struct sit_info *sit_i = SIT_I(sbi); struct curseg_info *curseg = CURSEG_I(sbi, CURSEG_COLD_DATA); - struct f2fs_summary_block *sum = curseg->sum_blk; + struct f2fs_journal *journal = curseg->journal; + struct seg_entry *se; + struct f2fs_sit_entry sit; int sit_blk_cnt = SIT_BLK_CNT(sbi); unsigned int i, start, end; unsigned int readed, start_blk = 0; - int nrpages = MAX_BIO_BLOCKS(sbi); + int nrpages = MAX_BIO_BLOCKS(sbi) * 8; do { readed = ra_meta_pages(sbi, start_blk, nrpages, META_SIT, true); @@ -2134,41 +2246,58 @@ static void build_sit_entries(struct f2fs_sb_info *sbi) end = (start_blk + readed) * sit_i->sents_per_block; for (; start < end && start < MAIN_SEGS(sbi); start++) { - struct seg_entry *se = &sit_i->sentries[start]; struct f2fs_sit_block *sit_blk; - struct f2fs_sit_entry sit; struct page *page; - mutex_lock(&curseg->curseg_mutex); - for (i = 0; i < sits_in_cursum(sum); i++) { - if (le32_to_cpu(segno_in_journal(sum, i)) - == start) { - sit = sit_in_journal(sum, i); - mutex_unlock(&curseg->curseg_mutex); - goto got_it; - } - } - mutex_unlock(&curseg->curseg_mutex); - + se = &sit_i->sentries[start]; page = get_current_sit_page(sbi, start); sit_blk = (struct f2fs_sit_block *)page_address(page); sit = sit_blk->entries[SIT_ENTRY_OFFSET(sit_i, start)]; f2fs_put_page(page, 1); -got_it: + check_block_count(sbi, start, &sit); seg_info_from_raw_sit(se, &sit); /* build discard map only one time */ - memcpy(se->discard_map, se->cur_valid_map, SIT_VBLOCK_MAP_SIZE); - sbi->discard_blks += sbi->blocks_per_seg - se->valid_blocks; - - if (sbi->segs_per_sec > 1) { - struct sec_entry *e = get_sec_entry(sbi, start); - e->valid_blocks += se->valid_blocks; + if (f2fs_discard_en(sbi)) { + memcpy(se->discard_map, se->cur_valid_map, + SIT_VBLOCK_MAP_SIZE); + sbi->discard_blks += sbi->blocks_per_seg - + se->valid_blocks; } + + if (sbi->segs_per_sec > 1) + get_sec_entry(sbi, start)->valid_blocks += + se->valid_blocks; } start_blk += readed; } while (start_blk < sit_blk_cnt); + + down_read(&curseg->journal_rwsem); + for (i = 0; i < sits_in_cursum(journal); i++) { + unsigned int old_valid_blocks; + + start = le32_to_cpu(segno_in_journal(journal, i)); + se = &sit_i->sentries[start]; + sit = sit_in_journal(journal, i); + + old_valid_blocks = se->valid_blocks; + + check_block_count(sbi, start, &sit); + seg_info_from_raw_sit(se, &sit); + + if (f2fs_discard_en(sbi)) { + memcpy(se->discard_map, se->cur_valid_map, + SIT_VBLOCK_MAP_SIZE); + sbi->discard_blks += old_valid_blocks - + se->valid_blocks; + } + + if (sbi->segs_per_sec > 1) + get_sec_entry(sbi, start)->valid_blocks += + se->valid_blocks - old_valid_blocks; + } + up_read(&curseg->journal_rwsem); } static void init_free_segmap(struct f2fs_sb_info *sbi) @@ -2301,7 +2430,11 @@ int build_segment_manager(struct f2fs_sb_info *sbi) sm_info->ssa_blkaddr = le32_to_cpu(raw_super->ssa_blkaddr); sm_info->rec_prefree_segments = sm_info->main_segments * DEF_RECLAIM_PREFREE_SEGMENTS / 100; - sm_info->ipu_policy = 1 << F2FS_IPU_FSYNC; + if (sm_info->rec_prefree_segments > DEF_MAX_RECLAIM_PREFREE_SEGMENTS) + sm_info->rec_prefree_segments = DEF_MAX_RECLAIM_PREFREE_SEGMENTS; + + if (!test_opt(sbi, LFS)) + sm_info->ipu_policy = 1 << F2FS_IPU_FSYNC; sm_info->min_ipu_util = DEF_MIN_IPU_UTIL; sm_info->min_fsync_blocks = DEF_MIN_FSYNC_BLOCKS; @@ -2383,8 +2516,10 @@ static void destroy_curseg(struct f2fs_sb_info *sbi) if (!array) return; SM_I(sbi)->curseg_array = NULL; - for (i = 0; i < NR_CURSEG_TYPE; i++) + for (i = 0; i < NR_CURSEG_TYPE; i++) { kfree(array[i].sum_blk); + kfree(array[i].journal); + } kfree(array); } @@ -2450,7 +2585,7 @@ int __init create_segment_manager_caches(void) sit_entry_set_slab = f2fs_kmem_cache_create("sit_entry_set", sizeof(struct sit_entry_set)); if (!sit_entry_set_slab) - goto destory_discard_entry; + goto destroy_discard_entry; inmem_entry_slab = f2fs_kmem_cache_create("inmem_page_entry", sizeof(struct inmem_pages)); @@ -2460,7 +2595,7 @@ int __init create_segment_manager_caches(void) destroy_sit_entry_set: kmem_cache_destroy(sit_entry_set_slab); -destory_discard_entry: +destroy_discard_entry: kmem_cache_destroy(discard_entry_slab); fail: return -ENOMEM; -- cgit v1.2.3 From 372f295d622c643f865e3cb83b2cf9b23f5bc49b Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Tue, 11 Oct 2016 22:57:01 +0800 Subject: f2fs: give a chance to detach from dirty list If there is no dirty pages in inode, we should give a chance to detach the inode from global dirty list, otherwise it needs to call another unnecessary .writepages for detaching. Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/segment.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'fs/f2fs/segment.c') diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index b3c61ae37f92..75477ec6c535 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -272,8 +272,10 @@ static int __commit_inmem_pages(struct inode *inode, set_page_dirty(page); f2fs_wait_on_page_writeback(page, DATA, true); - if (clear_page_dirty_for_io(page)) + if (clear_page_dirty_for_io(page)) { inode_dec_dirty_pages(inode); + remove_dirty_inode(inode); + } fio.page = page; err = do_write_data_page(&fio); -- cgit v1.2.3 From b1b14da24aab69fbb84159fe5c57035dafc50276 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Tue, 11 Oct 2016 22:31:35 +0800 Subject: f2fs: don't interrupt free nids building during nid allocation Let build_free_nids support sync/async methods, in allocation flow of nids, we use synchronuous method, so that we can avoid looping in alloc_nid when free memory is low; in unblock_operations and f2fs_balance_fs_bg we use asynchronuous method in where low memory condition can interrupt us. Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/segment.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs/f2fs/segment.c') diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index 75477ec6c535..48903702de27 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -380,7 +380,7 @@ void f2fs_balance_fs_bg(struct f2fs_sb_info *sbi) if (!available_free_memory(sbi, FREE_NIDS)) try_to_free_nids(sbi, MAX_FREE_NIDS); else - build_free_nids(sbi); + build_free_nids(sbi, false); /* checkpoint is the only way to shrink partial cached entries */ if (!available_free_memory(sbi, NAT_ENTRIES) || -- cgit v1.2.3 From 1d486e74cf8427152f96688d466a1c57a44a7642 Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Tue, 18 Oct 2016 11:07:45 -0700 Subject: f2fs: use BIO_MAX_PAGES for bio allocation We don't need to allocate bio partially in order to maximize sequential writes. Signed-off-by: Jaegeuk Kim --- fs/f2fs/segment.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'fs/f2fs/segment.c') diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index 48903702de27..ec4d74c26067 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -2239,10 +2239,10 @@ static void build_sit_entries(struct f2fs_sb_info *sbi) int sit_blk_cnt = SIT_BLK_CNT(sbi); unsigned int i, start, end; unsigned int readed, start_blk = 0; - int nrpages = MAX_BIO_BLOCKS(sbi) * 8; do { - readed = ra_meta_pages(sbi, start_blk, nrpages, META_SIT, true); + readed = ra_meta_pages(sbi, start_blk, BIO_MAX_PAGES, + META_SIT, true); start = start_blk * sit_i->sents_per_block; end = (start_blk + readed) * sit_i->sents_per_block; -- cgit v1.2.3 From ac0357e5d5e91b10d66cd6b01fe10424bb5215b7 Mon Sep 17 00:00:00 2001 From: Damien Le Moal Date: Fri, 28 Oct 2016 17:45:06 +0900 Subject: f2fs: Reset sequential zones on zoned block devices When a zoned block device is mounted, discarding sections contained in sequential zones must reset the zone write pointer. For sections contained in conventional zones, the regular discard is used if the drive supports it. Signed-off-by: Damien Le Moal Signed-off-by: Jaegeuk Kim Conflicts: fs/f2fs/segment.c --- fs/f2fs/segment.c | 45 +++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 45 insertions(+) (limited to 'fs/f2fs/segment.c') diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index ec4d74c26067..8e4863bd36f5 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -16,6 +16,7 @@ #include #include #include +#include #include "f2fs.h" #include "segment.h" @@ -584,6 +585,45 @@ static void locate_dirty_segment(struct f2fs_sb_info *sbi, unsigned int segno) mutex_unlock(&dirty_i->seglist_lock); } +#ifdef CONFIG_BLK_DEV_ZONED +static int f2fs_issue_discard_zone(struct f2fs_sb_info *sbi, + block_t blkstart, block_t blklen) +{ + sector_t sector = SECTOR_FROM_BLOCK(blkstart); + sector_t nr_sects = SECTOR_FROM_BLOCK(blklen); + struct block_device *bdev = sbi->sb->s_bdev; + + if (nr_sects != bdev_zone_size(bdev)) { + f2fs_msg(sbi->sb, KERN_INFO, + "Unaligned discard attempted (sector %llu + %llu)", + (unsigned long long)sector, + (unsigned long long)nr_sects); + return -EIO; + } + + /* + * We need to know the type of the zone: for conventional zones, + * use regular discard if the drive supports it. For sequential + * zones, reset the zone write pointer. + */ + switch (get_blkz_type(sbi, blkstart)) { + + case BLK_ZONE_TYPE_CONVENTIONAL: + if (!blk_queue_discard(bdev_get_queue(bdev))) + return 0; + return blkdev_issue_discard(bdev, sector, nr_sects, + GFP_NOFS, 0); + case BLK_ZONE_TYPE_SEQWRITE_REQ: + case BLK_ZONE_TYPE_SEQWRITE_PREF: + return blkdev_reset_zones(bdev, sector, + nr_sects, GFP_NOFS); + default: + /* Unknown zone type: broken device ? */ + return -EIO; + } +} +#endif + static int f2fs_issue_discard(struct f2fs_sb_info *sbi, block_t blkstart, block_t blklen) { @@ -601,6 +641,11 @@ static int f2fs_issue_discard(struct f2fs_sb_info *sbi, sbi->discard_blks--; } trace_f2fs_issue_discard(sbi->sb, blkstart, blklen); + +#ifdef CONFIG_BLK_DEV_ZONED + if (f2fs_sb_mounted_blkzoned(sbi->sb)) + return f2fs_issue_discard_zone(sbi, blkstart, blklen); +#endif return blkdev_issue_discard(sbi->sb->s_bdev, start, len, GFP_NOFS, 0); } -- cgit v1.2.3 From 55fac8071160fb3368531abedb3b72e7a7394004 Mon Sep 17 00:00:00 2001 From: Damien Le Moal Date: Fri, 28 Oct 2016 17:45:07 +0900 Subject: f2fs: Trace reset zone events Similarly to the regular discard, trace zone reset events. Signed-off-by: Damien Le Moal Signed-off-by: Jaegeuk Kim --- fs/f2fs/segment.c | 1 + 1 file changed, 1 insertion(+) (limited to 'fs/f2fs/segment.c') diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index 8e4863bd36f5..06b9d16a19f6 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -615,6 +615,7 @@ static int f2fs_issue_discard_zone(struct f2fs_sb_info *sbi, GFP_NOFS, 0); case BLK_ZONE_TYPE_SEQWRITE_REQ: case BLK_ZONE_TYPE_SEQWRITE_PREF: + trace_f2fs_issue_reset_zone(sbi->sb, blkstart); return blkdev_reset_zones(bdev, sector, nr_sects, GFP_NOFS); default: -- cgit v1.2.3 From 4cd4b0465d2227a6ca686d72f81d3fd3b207e94c Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Fri, 4 Nov 2016 14:33:57 -0700 Subject: f2fs: assign segments correctly for direct_io Previously, we assigned CURSEG_WARM_DATA for direct_io, but if we have two or four logs, we do not use that type at all. Let's fix it. Signed-off-by: Jaegeuk Kim --- fs/f2fs/segment.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) (limited to 'fs/f2fs/segment.c') diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index 06b9d16a19f6..4bdf1191a36f 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -1422,8 +1422,12 @@ void allocate_data_block(struct f2fs_sb_info *sbi, struct page *page, struct curseg_info *curseg; bool direct_io = (type == CURSEG_DIRECT_IO); - type = direct_io ? CURSEG_WARM_DATA : type; - + if (direct_io) { + if (sbi->active_logs <= 4) + type = CURSEG_HOT_DATA; + else + type = CURSEG_WARM_DATA; + } curseg = CURSEG_I(sbi, type); mutex_lock(&curseg->curseg_mutex); -- cgit v1.2.3 From 17aa419b53395bb52d0afb5863335d9260e1775c Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Fri, 11 Nov 2016 12:31:40 -0800 Subject: f2fs: revert segment allocation for direct IO Now we don't need to be too much careful about storage alignment for dio, since its speed becomes quite fast and we'd better avoid any misalignment first. Revert: 38aa0889b250 (f2fs: align direct_io'ed data to section) Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/segment.c | 36 +++++++++--------------------------- 1 file changed, 9 insertions(+), 27 deletions(-) (limited to 'fs/f2fs/segment.c') diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index 4bdf1191a36f..19ab2e63d8d7 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -1267,25 +1267,21 @@ static void allocate_segment_by_default(struct f2fs_sb_info *sbi, stat_inc_seg_type(sbi, curseg); } -static void __allocate_new_segments(struct f2fs_sb_info *sbi, int type) -{ - struct curseg_info *curseg = CURSEG_I(sbi, type); - unsigned int old_segno; - - old_segno = curseg->segno; - SIT_I(sbi)->s_ops->allocate_segment(sbi, type, true); - locate_dirty_segment(sbi, old_segno); -} - void allocate_new_segments(struct f2fs_sb_info *sbi) { + struct curseg_info *curseg; + unsigned int old_segno; int i; if (test_opt(sbi, LFS)) return; - for (i = CURSEG_HOT_DATA; i <= CURSEG_COLD_DATA; i++) - __allocate_new_segments(sbi, i); + for (i = CURSEG_HOT_DATA; i <= CURSEG_COLD_DATA; i++) { + curseg = CURSEG_I(sbi, i); + old_segno = curseg->segno; + SIT_I(sbi)->s_ops->allocate_segment(sbi, i, true); + locate_dirty_segment(sbi, old_segno); + } } static const struct segment_allocation default_salloc_ops = { @@ -1419,25 +1415,11 @@ void allocate_data_block(struct f2fs_sb_info *sbi, struct page *page, struct f2fs_summary *sum, int type) { struct sit_info *sit_i = SIT_I(sbi); - struct curseg_info *curseg; - bool direct_io = (type == CURSEG_DIRECT_IO); - - if (direct_io) { - if (sbi->active_logs <= 4) - type = CURSEG_HOT_DATA; - else - type = CURSEG_WARM_DATA; - } - curseg = CURSEG_I(sbi, type); + struct curseg_info *curseg = CURSEG_I(sbi, type); mutex_lock(&curseg->curseg_mutex); mutex_lock(&sit_i->sentry_lock); - /* direct_io'ed data is aligned to the segment for better performance */ - if (direct_io && curseg->next_blkoff && - !has_not_enough_free_secs(sbi, 0, 0)) - __allocate_new_segments(sbi, type); - *new_blkaddr = NEXT_FREE_BLKADDR(sbi, curseg); /* -- cgit v1.2.3 From 79d47107adb6f58c8555be43e6b18fccf944ae8d Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Thu, 6 Oct 2016 19:02:05 -0700 Subject: f2fs: support multiple devices This patch implements multiple devices support for f2fs. Given multiple devices by mkfs.f2fs, f2fs shows them entirely as one big volume under one f2fs instance. Internal block management is very simple, but we will modify block allocation and background GC policy to boost IO speed by exploiting them accoording to each device speed. Signed-off-by: Jaegeuk Kim Conflicts: fs/f2fs/data.c fs/f2fs/segment.c --- fs/f2fs/segment.c | 112 ++++++++++++++++++++++++++++++++++++++++-------------- 1 file changed, 83 insertions(+), 29 deletions(-) (limited to 'fs/f2fs/segment.c') diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index 19ab2e63d8d7..30d0e9a76c62 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -401,6 +401,32 @@ void f2fs_balance_fs_bg(struct f2fs_sb_info *sbi) } } +static int __submit_flush_wait(struct block_device *bdev) +{ + struct bio *bio = f2fs_bio_alloc(0); + int ret; + + bio->bi_bdev = bdev; + ret = submit_bio_wait(WRITE_FLUSH, bio); + bio_put(bio); + return ret; +} + +static int submit_flush_wait(struct f2fs_sb_info *sbi) +{ + int ret = __submit_flush_wait(sbi->sb->s_bdev); + int i; + + if (sbi->s_ndevs && !ret) { + for (i = 1; i < sbi->s_ndevs; i++) { + ret = __submit_flush_wait(FDEV(i).bdev); + if (ret) + break; + } + } + return ret; +} + static int issue_flush_thread(void *data) { struct f2fs_sb_info *sbi = data; @@ -411,24 +437,18 @@ repeat: return 0; if (!llist_empty(&fcc->issue_list)) { - struct bio *bio; struct flush_cmd *cmd, *next; int ret; - bio = f2fs_bio_alloc(0); - fcc->dispatch_list = llist_del_all(&fcc->issue_list); fcc->dispatch_list = llist_reverse_order(fcc->dispatch_list); - bio->bi_bdev = sbi->sb->s_bdev; - ret = submit_bio_wait(WRITE_FLUSH, bio); - + ret = submit_flush_wait(sbi); llist_for_each_entry_safe(cmd, next, fcc->dispatch_list, llnode) { cmd->ret = ret; complete(&cmd->wait); } - bio_put(bio); fcc->dispatch_list = NULL; } @@ -449,14 +469,11 @@ int f2fs_issue_flush(struct f2fs_sb_info *sbi) return 0; if (!test_opt(sbi, FLUSH_MERGE) || !atomic_read(&fcc->submit_flush)) { - struct bio *bio = f2fs_bio_alloc(0); int ret; atomic_inc(&fcc->submit_flush); - bio->bi_bdev = sbi->sb->s_bdev; - ret = submit_bio_wait(WRITE_FLUSH, bio); + ret = submit_flush_wait(sbi); atomic_dec(&fcc->submit_flush); - bio_put(bio); return ret; } @@ -586,18 +603,24 @@ static void locate_dirty_segment(struct f2fs_sb_info *sbi, unsigned int segno) } #ifdef CONFIG_BLK_DEV_ZONED -static int f2fs_issue_discard_zone(struct f2fs_sb_info *sbi, - block_t blkstart, block_t blklen) +static int __f2fs_issue_discard_zone(struct f2fs_sb_info *sbi, + struct block_device *bdev, block_t blkstart, block_t blklen) { - sector_t sector = SECTOR_FROM_BLOCK(blkstart); sector_t nr_sects = SECTOR_FROM_BLOCK(blklen); - struct block_device *bdev = sbi->sb->s_bdev; + sector_t sector; + int devi = 0; - if (nr_sects != bdev_zone_size(bdev)) { + if (sbi->s_ndevs) { + devi = f2fs_target_device_index(sbi, blkstart); + blkstart -= FDEV(devi).start_blk; + } + sector = SECTOR_FROM_BLOCK(blkstart); + + if (sector % bdev_zone_size(bdev) || nr_sects != bdev_zone_size(bdev)) { f2fs_msg(sbi->sb, KERN_INFO, - "Unaligned discard attempted (sector %llu + %llu)", - (unsigned long long)sector, - (unsigned long long)nr_sects); + "(%d) %s: Unaligned discard attempted (block %x + %x)", + devi, sbi->s_ndevs ? FDEV(devi).path: "", + blkstart, blklen); return -EIO; } @@ -606,7 +629,7 @@ static int f2fs_issue_discard_zone(struct f2fs_sb_info *sbi, * use regular discard if the drive supports it. For sequential * zones, reset the zone write pointer. */ - switch (get_blkz_type(sbi, blkstart)) { + switch (get_blkz_type(sbi, bdev, blkstart)) { case BLK_ZONE_TYPE_CONVENTIONAL: if (!blk_queue_discard(bdev_get_queue(bdev))) @@ -625,29 +648,60 @@ static int f2fs_issue_discard_zone(struct f2fs_sb_info *sbi, } #endif -static int f2fs_issue_discard(struct f2fs_sb_info *sbi, - block_t blkstart, block_t blklen) +static int __issue_discard_async(struct f2fs_sb_info *sbi, + struct block_device *bdev, block_t blkstart, block_t blklen) { sector_t start = SECTOR_FROM_BLOCK(blkstart); sector_t len = SECTOR_FROM_BLOCK(blklen); + +#ifdef CONFIG_BLK_DEV_ZONED + if (f2fs_sb_mounted_blkzoned(sbi->sb) && + bdev_zoned_model(bdev) != BLK_ZONED_NONE) + return __f2fs_issue_discard_zone(sbi, bdev, blkstart, blklen); +#endif + return blkdev_issue_discard(bdev, start, len, GFP_NOFS, 0); +} + +static int f2fs_issue_discard(struct f2fs_sb_info *sbi, + block_t blkstart, block_t blklen) +{ + sector_t start = blkstart, len = 0; + struct block_device *bdev; struct seg_entry *se; unsigned int offset; block_t i; + int err = 0; + + bdev = f2fs_target_device(sbi, blkstart, NULL); + + for (i = blkstart; i < blkstart + blklen; i++, len++) { + if (i != start) { + struct block_device *bdev2 = + f2fs_target_device(sbi, i, NULL); + + if (bdev2 != bdev) { + err = __issue_discard_async(sbi, bdev, + start, len); + if (err) + return err; + bdev = bdev2; + start = i; + len = 0; + } + } - for (i = blkstart; i < blkstart + blklen; i++) { se = get_seg_entry(sbi, GET_SEGNO(sbi, i)); offset = GET_BLKOFF_FROM_SEG0(sbi, i); if (!f2fs_test_and_set_bit(offset, se->discard_map)) sbi->discard_blks--; } - trace_f2fs_issue_discard(sbi->sb, blkstart, blklen); -#ifdef CONFIG_BLK_DEV_ZONED - if (f2fs_sb_mounted_blkzoned(sbi->sb)) - return f2fs_issue_discard_zone(sbi, blkstart, blklen); -#endif - return blkdev_issue_discard(sbi->sb->s_bdev, start, len, GFP_NOFS, 0); + if (len) + err = __issue_discard_async(sbi, bdev, start, len); + + trace_f2fs_issue_discard(sbi->sb, blkstart, blklen); + return err; } static void __add_discard_entry(struct f2fs_sb_info *sbi, -- cgit v1.2.3 From 3d89bca8b1feefa6b4a574207115de1458711c7f Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Mon, 14 Nov 2016 17:38:35 -0800 Subject: f2fs: avoid BG_GC in f2fs_balance_fs If many threads hit has_not_enough_free_secs() in f2fs_balance_fs() at the same time, all the threads would do FG_GC or BG_GC. In this critical path, we totally don't need to do BG_GC at all. Let's avoid that. Signed-off-by: Jaegeuk Kim --- fs/f2fs/segment.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs/f2fs/segment.c') diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index 30d0e9a76c62..27e1b7c56e4c 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -364,7 +364,7 @@ void f2fs_balance_fs(struct f2fs_sb_info *sbi, bool need) */ if (has_not_enough_free_secs(sbi, 0, 0)) { mutex_lock(&sbi->gc_mutex); - f2fs_gc(sbi, false); + f2fs_gc(sbi, false, false); } } -- cgit v1.2.3 From 185a1b0664eef1c95a2ab55ffc274146333f921f Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Mon, 14 Nov 2016 18:20:10 -0800 Subject: f2fs: fix wrong written_valid_blocks counting Previously, written_valid_blocks was got by ckpt->valid_block_count. But if the last checkpoint has some NEW_ADDR due to power-cut, we can get wrong value. Fix it to get the number from actual written block count from sit entries. Signed-off-by: Jaegeuk Kim --- fs/f2fs/segment.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'fs/f2fs/segment.c') diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index 27e1b7c56e4c..58cae4a541a7 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -2176,7 +2176,6 @@ out: static int build_sit_info(struct f2fs_sb_info *sbi) { struct f2fs_super_block *raw_super = F2FS_RAW_SUPER(sbi); - struct f2fs_checkpoint *ckpt = F2FS_CKPT(sbi); struct sit_info *sit_i; unsigned int sit_segs, start; char *src_bitmap, *dst_bitmap; @@ -2243,7 +2242,7 @@ static int build_sit_info(struct f2fs_sb_info *sbi) sit_i->sit_base_addr = le32_to_cpu(raw_super->sit_blkaddr); sit_i->sit_blocks = sit_segs << sbi->log_blocks_per_seg; - sit_i->written_valid_blocks = le64_to_cpu(ckpt->valid_block_count); + sit_i->written_valid_blocks = 0; sit_i->sit_bitmap = dst_bitmap; sit_i->bitmap_size = bitmap_size; sit_i->dirty_sentries = 0; @@ -2397,6 +2396,9 @@ static void init_free_segmap(struct f2fs_sb_info *sbi) struct seg_entry *sentry = get_seg_entry(sbi, start); if (!sentry->valid_blocks) __set_free(sbi, start); + else + SIT_I(sbi)->written_valid_blocks += + sentry->valid_blocks; } /* set use the current segments */ -- cgit v1.2.3 From 2ea2e28982f0264aae0d1c3e413eddfa7c8c149e Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Wed, 16 Nov 2016 10:41:20 +0800 Subject: f2fs: don't wait writeback for datas during checkpoint Normally, while committing checkpoint, we will wait on all pages to be writebacked no matter the page is data or metadata, so in scenario where there are lots of data IO being submitted with metadata, we may suffer long latency for waiting writeback during checkpoint. Indeed, we only care about persistence for pages with metadata, but not pages with data, as file system consistent are only related to metadate, so in order to avoid encountering long latency in above scenario, let's recognize and reference metadata in submitted IOs, wait writeback only for metadatas. Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim Conflicts: fs/f2fs/data.c --- fs/f2fs/segment.c | 1 - 1 file changed, 1 deletion(-) (limited to 'fs/f2fs/segment.c') diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index 58cae4a541a7..23e8892c4e60 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -288,7 +288,6 @@ static int __commit_inmem_pages(struct inode *inode, /* record old blkaddr for revoking */ cur->old_addr = fio.old_blkaddr; - clear_cold_data(page); submit_bio = true; } unlock_page(page); -- cgit v1.2.3 From 10a2e5e7a2d4a17f15df8b23002979c00d562d4d Mon Sep 17 00:00:00 2001 From: Geliang Tang Date: Fri, 18 Nov 2016 22:21:13 +0800 Subject: f2fs: drop duplicate header timer.h Drop duplicate header timer.h from segment.c. Signed-off-by: Geliang Tang Signed-off-by: Jaegeuk Kim --- fs/f2fs/segment.c | 1 - 1 file changed, 1 deletion(-) (limited to 'fs/f2fs/segment.c') diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index 23e8892c4e60..ba715d60c738 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -16,7 +16,6 @@ #include #include #include -#include #include "f2fs.h" #include "segment.h" -- cgit v1.2.3 From b683e01e25446c1fbf67d1a895d5ee25d897c694 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Tue, 22 Nov 2016 15:20:16 +0100 Subject: f2fs: fix 32-bit build The addition of multiple-device support broke CONFIG_BLK_DEV_ZONED on 32-bit machines because of a 64-bit division: fs/f2fs/f2fs.o: In function `__issue_discard_async': extent_cache.c:(.text.__issue_discard_async+0xd4): undefined reference to `__aeabi_uldivmod' Fortunately, bdev_zone_size() is guaranteed to return a power-of-two number, so we can replace the % operator with a cheaper bit mask. Fixes: 792b84b74b54 ("f2fs: support multiple devices") Signed-off-by: Arnd Bergmann Signed-off-by: Jaegeuk Kim --- fs/f2fs/segment.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'fs/f2fs/segment.c') diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index ba715d60c738..4f557e5e789d 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -614,7 +614,8 @@ static int __f2fs_issue_discard_zone(struct f2fs_sb_info *sbi, } sector = SECTOR_FROM_BLOCK(blkstart); - if (sector % bdev_zone_size(bdev) || nr_sects != bdev_zone_size(bdev)) { + if (sector & (bdev_zone_size(bdev) - 1) || + nr_sects != bdev_zone_size(bdev)) { f2fs_msg(sbi->sb, KERN_INFO, "(%d) %s: Unaligned discard attempted (block %x + %x)", devi, sbi->s_ndevs ? FDEV(devi).path: "", -- cgit v1.2.3 From a980f29780f397a57a0f53cc2ce7a85078cf7e5d Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Mon, 5 Dec 2016 11:37:14 -0800 Subject: f2fs: call sync_fs when f2fs is idle The sync_fs in f2fs_balance_fs_bg must avoid interrupting current user requests. Signed-off-by: Jaegeuk Kim --- fs/f2fs/segment.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'fs/f2fs/segment.c') diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index 4f557e5e789d..b95f07559d90 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -381,12 +381,15 @@ void f2fs_balance_fs_bg(struct f2fs_sb_info *sbi) else build_free_nids(sbi, false); + if (!is_idle(sbi)) + return; + /* checkpoint is the only way to shrink partial cached entries */ if (!available_free_memory(sbi, NAT_ENTRIES) || !available_free_memory(sbi, INO_ENTRIES) || excess_prefree_segs(sbi) || excess_dirty_nats(sbi) || - (is_idle(sbi) && f2fs_time_over(sbi, CP_TIME))) { + f2fs_time_over(sbi, CP_TIME)) { if (test_opt(sbi, DATA_FLUSH)) { struct blk_plug plug; -- cgit v1.2.3 From f96ce4c98613274ebbdc8cf527a8eb43e47ba4ba Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Wed, 7 Dec 2016 16:23:32 -0800 Subject: f2fs: fix to access nullified flush_cmd_control pointer f2fs_sync_file() remount_ro - f2fs_readonly - destroy_flush_cmd_control - f2fs_issue_flush - no fcc pointer! So, this patch doesn't free fcc in this case, but just stop its kernel thread which sends flush commands. Signed-off-by: Jaegeuk Kim --- fs/f2fs/segment.c | 33 +++++++++++++++++++++++++-------- 1 file changed, 25 insertions(+), 8 deletions(-) (limited to 'fs/f2fs/segment.c') diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index b95f07559d90..a288de069164 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -486,8 +486,13 @@ int f2fs_issue_flush(struct f2fs_sb_info *sbi) if (!fcc->dispatch_list) wake_up(&fcc->flush_wait_queue); - wait_for_completion(&cmd.wait); - atomic_dec(&fcc->submit_flush); + if (fcc->f2fs_issue_flush) { + wait_for_completion(&cmd.wait); + atomic_dec(&fcc->submit_flush); + } else { + llist_del_all(&fcc->issue_list); + atomic_set(&fcc->submit_flush, 0); + } return cmd.ret; } @@ -498,6 +503,11 @@ int create_flush_cmd_control(struct f2fs_sb_info *sbi) struct flush_cmd_control *fcc; int err = 0; + if (SM_I(sbi)->cmd_control_info) { + fcc = SM_I(sbi)->cmd_control_info; + goto init_thread; + } + fcc = kzalloc(sizeof(struct flush_cmd_control), GFP_KERNEL); if (!fcc) return -ENOMEM; @@ -505,6 +515,7 @@ int create_flush_cmd_control(struct f2fs_sb_info *sbi) init_waitqueue_head(&fcc->flush_wait_queue); init_llist_head(&fcc->issue_list); SM_I(sbi)->cmd_control_info = fcc; +init_thread: fcc->f2fs_issue_flush = kthread_run(issue_flush_thread, sbi, "f2fs_flush-%u:%u", MAJOR(dev), MINOR(dev)); if (IS_ERR(fcc->f2fs_issue_flush)) { @@ -517,14 +528,20 @@ int create_flush_cmd_control(struct f2fs_sb_info *sbi) return err; } -void destroy_flush_cmd_control(struct f2fs_sb_info *sbi) +void destroy_flush_cmd_control(struct f2fs_sb_info *sbi, bool free) { struct flush_cmd_control *fcc = SM_I(sbi)->cmd_control_info; - if (fcc && fcc->f2fs_issue_flush) - kthread_stop(fcc->f2fs_issue_flush); - kfree(fcc); - SM_I(sbi)->cmd_control_info = NULL; + if (fcc && fcc->f2fs_issue_flush) { + struct task_struct *flush_thread = fcc->f2fs_issue_flush; + + fcc->f2fs_issue_flush = NULL; + kthread_stop(flush_thread); + } + if (free) { + kfree(fcc); + SM_I(sbi)->cmd_control_info = NULL; + } } static void __locate_dirty_segment(struct f2fs_sb_info *sbi, unsigned int segno, @@ -2658,7 +2675,7 @@ void destroy_segment_manager(struct f2fs_sb_info *sbi) if (!sm_info) return; - destroy_flush_cmd_control(sbi); + destroy_flush_cmd_control(sbi, true); destroy_dirty_segmap(sbi); destroy_curseg(sbi); destroy_free_segmap(sbi); -- cgit v1.2.3 From 7146292938e42afacadd7b3402f459e638f5b77a Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Wed, 11 Jan 2017 18:24:54 -0800 Subject: f2fs: resolve op and op_flags confilcts Signed-off-by: Jaegeuk Kim --- fs/f2fs/segment.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) (limited to 'fs/f2fs/segment.c') diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index a288de069164..70aec4a8de13 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -257,7 +257,8 @@ static int __commit_inmem_pages(struct inode *inode, struct f2fs_io_info fio = { .sbi = sbi, .type = DATA, - .rw = WRITE_SYNC | REQ_PRIO, + .op = REQ_OP_WRITE, + .op_flags = REQ_SYNC | REQ_NOIDLE | REQ_PRIO, .encrypted_page = NULL, }; bool submit_bio = false; @@ -407,6 +408,7 @@ static int __submit_flush_wait(struct block_device *bdev) struct bio *bio = f2fs_bio_alloc(0); int ret; + bio->bi_rw = REQ_OP_WRITE; bio->bi_bdev = bdev; ret = submit_bio_wait(WRITE_FLUSH, bio); bio_put(bio); @@ -1544,7 +1546,8 @@ void write_meta_page(struct f2fs_sb_info *sbi, struct page *page) struct f2fs_io_info fio = { .sbi = sbi, .type = META, - .rw = WRITE_SYNC | REQ_META | REQ_PRIO, + .op = REQ_OP_WRITE, + .op_flags = REQ_SYNC | REQ_NOIDLE | REQ_META | REQ_PRIO, .old_blkaddr = page->index, .new_blkaddr = page->index, .page = page, @@ -1552,7 +1555,7 @@ void write_meta_page(struct f2fs_sb_info *sbi, struct page *page) }; if (unlikely(page->index >= MAIN_BLKADDR(sbi))) - fio.rw &= ~REQ_META; + fio.op_flags &= ~REQ_META; set_page_writeback(page); f2fs_submit_page_mbio(&fio); -- cgit v1.2.3 From 373bb0247ae5d5ff0e371d613599fa44392e972e Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Wed, 11 Jan 2017 16:41:25 -0800 Subject: f2fs: support async discard based on v4.9 This patch is based on commit 275b66b09e85 (f2fs: support async discard). Signed-off-by: Jaegeuk Kim --- fs/f2fs/segment.c | 183 +++++++++++++++++++++++++++++++++++++++++++++++++++--- 1 file changed, 174 insertions(+), 9 deletions(-) (limited to 'fs/f2fs/segment.c') diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index 70aec4a8de13..13bea6e5120e 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -26,6 +26,7 @@ #define __reverse_ffz(x) __reverse_ffs(~(x)) static struct kmem_cache *discard_entry_slab; +static struct kmem_cache *bio_entry_slab; static struct kmem_cache *sit_entry_set_slab; static struct kmem_cache *inmem_entry_slab; @@ -622,6 +623,162 @@ static void locate_dirty_segment(struct f2fs_sb_info *sbi, unsigned int segno) mutex_unlock(&dirty_i->seglist_lock); } +static struct bio_entry *__add_bio_entry(struct f2fs_sb_info *sbi, + struct bio *bio) +{ + struct list_head *wait_list = &(SM_I(sbi)->wait_list); + struct bio_entry *be = f2fs_kmem_cache_alloc(bio_entry_slab, GFP_NOFS); + + INIT_LIST_HEAD(&be->list); + be->bio = bio; + init_completion(&be->event); + list_add_tail(&be->list, wait_list); + + return be; +} + +void f2fs_wait_all_discard_bio(struct f2fs_sb_info *sbi) +{ + struct list_head *wait_list = &(SM_I(sbi)->wait_list); + struct bio_entry *be, *tmp; + + list_for_each_entry_safe(be, tmp, wait_list, list) { + struct bio *bio = be->bio; + int err; + + wait_for_completion_io(&be->event); + err = be->error; + if (err == -EOPNOTSUPP) + err = 0; + + if (err) + f2fs_msg(sbi->sb, KERN_INFO, + "Issue discard failed, ret: %d", err); + + bio_put(bio); + list_del(&be->list); + kmem_cache_free(bio_entry_slab, be); + } +} + +static void f2fs_submit_bio_wait_endio(struct bio *bio) +{ + struct bio_entry *be = (struct bio_entry *)bio->bi_private; + + be->error = bio->bi_error; + complete(&be->event); +} + +/* copied from block/blk-lib.c in 4.10-rc1 */ +static int __blkdev_issue_discard(struct block_device *bdev, sector_t sector, + sector_t nr_sects, gfp_t gfp_mask, int flags, + struct bio **biop) +{ + struct request_queue *q = bdev_get_queue(bdev); + struct bio *bio = *biop; + unsigned int granularity; + int op = REQ_WRITE | REQ_DISCARD; + int alignment; + sector_t bs_mask; + + if (!q) + return -ENXIO; + + if (!blk_queue_discard(q)) + return -EOPNOTSUPP; + + if (flags & BLKDEV_DISCARD_SECURE) { + if (!blk_queue_secdiscard(q)) + return -EOPNOTSUPP; + op |= REQ_SECURE; + } + + bs_mask = (bdev_logical_block_size(bdev) >> 9) - 1; + if ((sector | nr_sects) & bs_mask) + return -EINVAL; + + /* Zero-sector (unknown) and one-sector granularities are the same. */ + granularity = max(q->limits.discard_granularity >> 9, 1U); + alignment = (bdev_discard_alignment(bdev) >> 9) % granularity; + + while (nr_sects) { + unsigned int req_sects; + sector_t end_sect, tmp; + + /* Make sure bi_size doesn't overflow */ + req_sects = min_t(sector_t, nr_sects, UINT_MAX >> 9); + + /** + * If splitting a request, and the next starting sector would be + * misaligned, stop the discard at the previous aligned sector. + */ + end_sect = sector + req_sects; + tmp = end_sect; + if (req_sects < nr_sects && + sector_div(tmp, granularity) != alignment) { + end_sect = end_sect - alignment; + sector_div(end_sect, granularity); + end_sect = end_sect * granularity + alignment; + req_sects = end_sect - sector; + } + + if (bio) { + int ret = submit_bio_wait(0, bio); + bio_put(bio); + if (ret) + return ret; + } + bio = f2fs_bio_alloc(0); + bio->bi_iter.bi_sector = sector; + bio->bi_bdev = bdev; + bio_set_op_attrs(bio, op, 0); + + bio->bi_iter.bi_size = req_sects << 9; + nr_sects -= req_sects; + sector = end_sect; + + /* + * We can loop for a long time in here, if someone does + * full device discards (like mkfs). Be nice and allow + * us to schedule out to avoid softlocking if preempt + * is disabled. + */ + cond_resched(); + } + + *biop = bio; + return 0; +} + +/* this function is copied from blkdev_issue_discard from block/blk-lib.c */ +static int __f2fs_issue_discard_async(struct f2fs_sb_info *sbi, + struct block_device *bdev, block_t blkstart, block_t blklen) +{ + struct bio *bio = NULL; + int err; + + trace_f2fs_issue_discard(sbi->sb, blkstart, blklen); + + if (sbi->s_ndevs) { + int devi = f2fs_target_device_index(sbi, blkstart); + + blkstart -= FDEV(devi).start_blk; + } + err = __blkdev_issue_discard(bdev, + SECTOR_FROM_BLOCK(blkstart), + SECTOR_FROM_BLOCK(blklen), + GFP_NOFS, 0, &bio); + if (!err && bio) { + struct bio_entry *be = __add_bio_entry(sbi, bio); + + bio->bi_private = be; + bio->bi_end_io = f2fs_submit_bio_wait_endio; + submit_bio(REQ_SYNC, bio); + } + + return err; +} + #ifdef CONFIG_BLK_DEV_ZONED static int __f2fs_issue_discard_zone(struct f2fs_sb_info *sbi, struct block_device *bdev, block_t blkstart, block_t blklen) @@ -655,8 +812,7 @@ static int __f2fs_issue_discard_zone(struct f2fs_sb_info *sbi, case BLK_ZONE_TYPE_CONVENTIONAL: if (!blk_queue_discard(bdev_get_queue(bdev))) return 0; - return blkdev_issue_discard(bdev, sector, nr_sects, - GFP_NOFS, 0); + return __f2fs_issue_discard_async(sbi, bdev, blkstart, blklen); case BLK_ZONE_TYPE_SEQWRITE_REQ: case BLK_ZONE_TYPE_SEQWRITE_PREF: trace_f2fs_issue_reset_zone(sbi->sb, blkstart); @@ -672,15 +828,12 @@ static int __f2fs_issue_discard_zone(struct f2fs_sb_info *sbi, static int __issue_discard_async(struct f2fs_sb_info *sbi, struct block_device *bdev, block_t blkstart, block_t blklen) { - sector_t start = SECTOR_FROM_BLOCK(blkstart); - sector_t len = SECTOR_FROM_BLOCK(blklen); - #ifdef CONFIG_BLK_DEV_ZONED if (f2fs_sb_mounted_blkzoned(sbi->sb) && bdev_zoned_model(bdev) != BLK_ZONED_NONE) return __f2fs_issue_discard_zone(sbi, bdev, blkstart, blklen); #endif - return blkdev_issue_discard(bdev, start, len, GFP_NOFS, 0); + return __f2fs_issue_discard_async(sbi, bdev, blkstart, blklen); } static int f2fs_issue_discard(struct f2fs_sb_info *sbi, @@ -720,8 +873,6 @@ static int f2fs_issue_discard(struct f2fs_sb_info *sbi, if (len) err = __issue_discard_async(sbi, bdev, start, len); - - trace_f2fs_issue_discard(sbi->sb, blkstart, blklen); return err; } @@ -822,11 +973,14 @@ void clear_prefree_segments(struct f2fs_sb_info *sbi, struct cp_control *cpc) struct list_head *head = &(SM_I(sbi)->discard_list); struct discard_entry *entry, *this; struct dirty_seglist_info *dirty_i = DIRTY_I(sbi); + struct blk_plug plug; unsigned long *prefree_map = dirty_i->dirty_segmap[PRE]; unsigned int start = 0, end = -1; unsigned int secno, start_segno; bool force = (cpc->reason == CP_DISCARD); + blk_start_plug(&plug); + mutex_lock(&dirty_i->seglist_lock); while (1) { @@ -875,6 +1029,8 @@ skip: SM_I(sbi)->nr_discards -= entry->len; kmem_cache_free(discard_entry_slab, entry); } + + blk_finish_plug(&plug); } static bool __mark_sit_entry_dirty(struct f2fs_sb_info *sbi, unsigned int segno) @@ -2551,6 +2707,7 @@ int build_segment_manager(struct f2fs_sb_info *sbi) sm_info->min_fsync_blocks = DEF_MIN_FSYNC_BLOCKS; INIT_LIST_HEAD(&sm_info->discard_list); + INIT_LIST_HEAD(&sm_info->wait_list); sm_info->nr_discards = 0; sm_info->max_discards = 0; @@ -2694,10 +2851,15 @@ int __init create_segment_manager_caches(void) if (!discard_entry_slab) goto fail; + bio_entry_slab = f2fs_kmem_cache_create("bio_entry", + sizeof(struct bio_entry)); + if (!bio_entry_slab) + goto destroy_discard_entry; + sit_entry_set_slab = f2fs_kmem_cache_create("sit_entry_set", sizeof(struct sit_entry_set)); if (!sit_entry_set_slab) - goto destroy_discard_entry; + goto destroy_bio_entry; inmem_entry_slab = f2fs_kmem_cache_create("inmem_page_entry", sizeof(struct inmem_pages)); @@ -2707,6 +2869,8 @@ int __init create_segment_manager_caches(void) destroy_sit_entry_set: kmem_cache_destroy(sit_entry_set_slab); +destroy_bio_entry: + kmem_cache_destroy(bio_entry_slab); destroy_discard_entry: kmem_cache_destroy(discard_entry_slab); fail: @@ -2716,6 +2880,7 @@ fail: void destroy_segment_manager_caches(void) { kmem_cache_destroy(sit_entry_set_slab); + kmem_cache_destroy(bio_entry_slab); kmem_cache_destroy(discard_entry_slab); kmem_cache_destroy(inmem_entry_slab); } -- cgit v1.2.3 From 0c61b0a37be7e2f7f55066dfa77c3697765724ff Mon Sep 17 00:00:00 2001 From: Yunlei He Date: Thu, 22 Dec 2016 11:46:24 +0800 Subject: f2fs: fix a missing discard prefree segments If userspace issue a fstrim with a range not involve prefree segments, it will reuse these segments without discard. This patch fix it. Signed-off-by: Yunlei He Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/segment.c | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) (limited to 'fs/f2fs/segment.c') diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index 13bea6e5120e..f4e41f997ae3 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -996,9 +996,13 @@ void clear_prefree_segments(struct f2fs_sb_info *sbi, struct cp_control *cpc) dirty_i->nr_dirty[PRE] -= end - start; - if (force || !test_opt(sbi, DISCARD)) + if (!test_opt(sbi, DISCARD)) continue; + if (force && start >= cpc->trim_start && + (end - 1) <= cpc->trim_end) + continue; + if (!test_opt(sbi, LFS) || sbi->segs_per_sec == 1) { f2fs_issue_discard(sbi, START_BLOCK(sbi, start), (end - start) << sbi->log_blocks_per_seg); @@ -2343,8 +2347,12 @@ void flush_sit_entries(struct f2fs_sb_info *sbi, struct cp_control *cpc) f2fs_bug_on(sbi, sit_i->dirty_sentries); out: if (cpc->reason == CP_DISCARD) { + __u64 trim_start = cpc->trim_start; + for (; cpc->trim_start <= cpc->trim_end; cpc->trim_start++) add_discard_addrs(sbi, cpc); + + cpc->trim_start = trim_start; } mutex_unlock(&sit_i->sentry_lock); -- cgit v1.2.3 From c1e5d5278024fbe65f123c6f8e772b82f1f72106 Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Wed, 21 Dec 2016 11:51:32 -0800 Subject: f2fs: reassign new segment for mode=lfs Otherwise we can remain wrong curseg->next_blkoff, resulting in fsck failure. Signed-off-by: Jaegeuk Kim --- fs/f2fs/segment.c | 3 --- 1 file changed, 3 deletions(-) (limited to 'fs/f2fs/segment.c') diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index f4e41f997ae3..e6d3f3d4b028 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -1508,9 +1508,6 @@ void allocate_new_segments(struct f2fs_sb_info *sbi) unsigned int old_segno; int i; - if (test_opt(sbi, LFS)) - return; - for (i = CURSEG_HOT_DATA; i <= CURSEG_COLD_DATA; i++) { curseg = CURSEG_I(sbi, i); old_segno = curseg->segno; -- cgit v1.2.3 From 22f1947949fd050ca103d37f369dbd6d2024ea50 Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Wed, 14 Dec 2016 10:12:56 -0800 Subject: f2fs: support IO alignment for DATA and NODE writes This patch implements IO alignment by filling dummy blocks in DATA and NODE write bios. If we can guarantee, for example, 32KB or 64KB for such the IOs, we can eliminate underlying dummy page problem which FTL conducts in order to close MLC or TLC partial written pages. Note that, - it requires "-o mode=lfs". - IO size should be power of 2, not exceed BIO_MAX_PAGES, 256. - read IO is still 4KB. - do checkpoint at fsync, if dummy NODE page was written. Signed-off-by: Jaegeuk Kim --- fs/f2fs/segment.c | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) (limited to 'fs/f2fs/segment.c') diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index e6d3f3d4b028..a7bb97826445 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -1684,15 +1684,20 @@ void allocate_data_block(struct f2fs_sb_info *sbi, struct page *page, static void do_write_page(struct f2fs_summary *sum, struct f2fs_io_info *fio) { int type = __get_segment_type(fio->page, fio->type); + int err; if (fio->type == NODE || fio->type == DATA) mutex_lock(&fio->sbi->wio_mutex[fio->type]); - +reallocate: allocate_data_block(fio->sbi, fio->page, fio->old_blkaddr, &fio->new_blkaddr, sum, type); /* writeout dirty page into bdev */ - f2fs_submit_page_mbio(fio); + err = f2fs_submit_page_mbio(fio); + if (err == -EAGAIN) { + fio->old_blkaddr = fio->new_blkaddr; + goto reallocate; + } if (fio->type == NODE || fio->type == DATA) mutex_unlock(&fio->sbi->wio_mutex[fio->type]); -- cgit v1.2.3 From 75e402e690b9f1713458f3adbef306a73de74f3c Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Wed, 28 Dec 2016 13:55:09 -0800 Subject: f2fs: show the max number of atomic operations This patch adds to show the max number of atomic operations which are conducting concurrently. Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/segment.c | 1 + 1 file changed, 1 insertion(+) (limited to 'fs/f2fs/segment.c') diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index a7bb97826445..353ec85b3835 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -243,6 +243,7 @@ void drop_inmem_pages(struct inode *inode) struct f2fs_inode_info *fi = F2FS_I(inode); clear_inode_flag(inode, FI_ATOMIC_FILE); + stat_dec_atomic_write(inode); mutex_lock(&fi->inmem_lock); __revoke_inmem_pages(inode, &fi->inmem_pages, true, false); -- cgit v1.2.3 From 5521ead70476162d3cef2324320784cb4dbd0c10 Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Thu, 29 Dec 2016 14:07:53 -0800 Subject: f2fs: relax async discard commands more This patch relaxes async discard commands to avoid waiting its end_io during checkpoint. Instead of waiting them during checkpoint, it will be done when actually reusing them. Test on initial partition of nvme drive. # time fstrim /mnt/test Before : 6.158s After : 4.822s Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/segment.c | 24 +++++++++++++++++++----- 1 file changed, 19 insertions(+), 5 deletions(-) (limited to 'fs/f2fs/segment.c') diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index 353ec85b3835..fa3d4f8db389 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -625,20 +625,23 @@ static void locate_dirty_segment(struct f2fs_sb_info *sbi, unsigned int segno) } static struct bio_entry *__add_bio_entry(struct f2fs_sb_info *sbi, - struct bio *bio) + struct bio *bio, block_t lstart, block_t len) { struct list_head *wait_list = &(SM_I(sbi)->wait_list); struct bio_entry *be = f2fs_kmem_cache_alloc(bio_entry_slab, GFP_NOFS); INIT_LIST_HEAD(&be->list); be->bio = bio; + be->lstart = lstart; + be->len = len; init_completion(&be->event); list_add_tail(&be->list, wait_list); return be; } -void f2fs_wait_all_discard_bio(struct f2fs_sb_info *sbi) +/* This should be covered by global mutex, &sit_i->sentry_lock */ +void f2fs_wait_discard_bio(struct f2fs_sb_info *sbi, block_t blkaddr) { struct list_head *wait_list = &(SM_I(sbi)->wait_list); struct bio_entry *be, *tmp; @@ -647,7 +650,15 @@ void f2fs_wait_all_discard_bio(struct f2fs_sb_info *sbi) struct bio *bio = be->bio; int err; - wait_for_completion_io(&be->event); + if (!completion_done(&be->event)) { + if ((be->lstart <= blkaddr && + blkaddr < be->lstart + be->len) || + blkaddr == NULL_ADDR) + wait_for_completion_io(&be->event); + else + continue; + } + err = be->error; if (err == -EOPNOTSUPP) err = 0; @@ -756,6 +767,7 @@ static int __f2fs_issue_discard_async(struct f2fs_sb_info *sbi, struct block_device *bdev, block_t blkstart, block_t blklen) { struct bio *bio = NULL; + block_t lblkstart = blkstart; int err; trace_f2fs_issue_discard(sbi->sb, blkstart, blklen); @@ -770,13 +782,13 @@ static int __f2fs_issue_discard_async(struct f2fs_sb_info *sbi, SECTOR_FROM_BLOCK(blklen), GFP_NOFS, 0, &bio); if (!err && bio) { - struct bio_entry *be = __add_bio_entry(sbi, bio); + struct bio_entry *be = __add_bio_entry(sbi, bio, + lblkstart, blklen); bio->bi_private = be; bio->bi_end_io = f2fs_submit_bio_wait_endio; submit_bio(REQ_SYNC, bio); } - return err; } @@ -1655,6 +1667,8 @@ void allocate_data_block(struct f2fs_sb_info *sbi, struct page *page, *new_blkaddr = NEXT_FREE_BLKADDR(sbi, curseg); + f2fs_wait_discard_bio(sbi, *new_blkaddr); + /* * __add_sum_entry should be resided under the curseg_mutex * because, this function updates a summary entry in the -- cgit v1.2.3 From 99a5dca4d9c6efd55dd548cf1e30ce86912f47ac Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Thu, 29 Dec 2016 22:06:15 -0800 Subject: f2fs: return fs_trim if there is no candidate If there is no candidate to submit discard command during f2sf_trim_fs, let's return without checkpoint. Signed-off-by: Jaegeuk Kim --- fs/f2fs/segment.c | 28 +++++++++++++++++++++++----- 1 file changed, 23 insertions(+), 5 deletions(-) (limited to 'fs/f2fs/segment.c') diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index fa3d4f8db389..12f8d5ab7ccf 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -914,7 +914,8 @@ done: SM_I(sbi)->nr_discards += end - start; } -static void add_discard_addrs(struct f2fs_sb_info *sbi, struct cp_control *cpc) +static bool add_discard_addrs(struct f2fs_sb_info *sbi, struct cp_control *cpc, + bool check_only) { int entries = SIT_VBLOCK_MAP_SIZE / sizeof(unsigned long); int max_blocks = sbi->blocks_per_seg; @@ -928,12 +929,12 @@ static void add_discard_addrs(struct f2fs_sb_info *sbi, struct cp_control *cpc) int i; if (se->valid_blocks == max_blocks || !f2fs_discard_en(sbi)) - return; + return false; if (!force) { if (!test_opt(sbi, DISCARD) || !se->valid_blocks || SM_I(sbi)->nr_discards >= SM_I(sbi)->max_discards) - return; + return false; } /* SIT_VBLOCK_MAP_SIZE should be multiple of sizeof(unsigned long) */ @@ -951,8 +952,12 @@ static void add_discard_addrs(struct f2fs_sb_info *sbi, struct cp_control *cpc) && (end - start) < cpc->trim_minlen) continue; + if (check_only) + return true; + __add_discard_entry(sbi, cpc, se, start, end); } + return false; } void release_discard_addrs(struct f2fs_sb_info *sbi) @@ -1533,6 +1538,19 @@ static const struct segment_allocation default_salloc_ops = { .allocate_segment = allocate_segment_by_default, }; +bool exist_trim_candidates(struct f2fs_sb_info *sbi, struct cp_control *cpc) +{ + __u64 trim_start = cpc->trim_start; + + mutex_lock(&SIT_I(sbi)->sentry_lock); + for (; trim_start <= cpc->trim_end; trim_start++) + if (add_discard_addrs(sbi, cpc, true)) + break; + mutex_unlock(&SIT_I(sbi)->sentry_lock); + + return trim_start <= cpc->trim_end; +} + int f2fs_trim_fs(struct f2fs_sb_info *sbi, struct fstrim_range *range) { __u64 start = F2FS_BYTES_TO_BLK(range->start); @@ -2329,7 +2347,7 @@ void flush_sit_entries(struct f2fs_sb_info *sbi, struct cp_control *cpc) /* add discard candidates */ if (cpc->reason != CP_DISCARD) { cpc->trim_start = segno; - add_discard_addrs(sbi, cpc); + add_discard_addrs(sbi, cpc, false); } if (to_journal) { @@ -2367,7 +2385,7 @@ out: __u64 trim_start = cpc->trim_start; for (; cpc->trim_start <= cpc->trim_end; cpc->trim_start++) - add_discard_addrs(sbi, cpc); + add_discard_addrs(sbi, cpc, false); cpc->trim_start = trim_start; } -- cgit v1.2.3 From 72d48dabe998550f45038d44c98ec286e5161ce6 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Sat, 7 Jan 2017 18:50:26 +0800 Subject: f2fs: introduce FI_ATOMIC_COMMIT This patch introduces a new flag to indicate inode status of doing atomic write committing, so that, we can keep atomic write status for inode during atomic committing, then we can skip GCing pages of atomic write inode, that avoids random GCed datas being mixed with current transaction, so isolation of transaction can be kept. Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/segment.c | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) (limited to 'fs/f2fs/segment.c') diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index 12f8d5ab7ccf..6a870677d58a 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -242,12 +242,12 @@ void drop_inmem_pages(struct inode *inode) { struct f2fs_inode_info *fi = F2FS_I(inode); - clear_inode_flag(inode, FI_ATOMIC_FILE); - stat_dec_atomic_write(inode); - mutex_lock(&fi->inmem_lock); __revoke_inmem_pages(inode, &fi->inmem_pages, true, false); mutex_unlock(&fi->inmem_lock); + + clear_inode_flag(inode, FI_ATOMIC_FILE); + stat_dec_atomic_write(inode); } static int __commit_inmem_pages(struct inode *inode, @@ -316,6 +316,8 @@ int commit_inmem_pages(struct inode *inode) f2fs_balance_fs(sbi, true); f2fs_lock_op(sbi); + set_inode_flag(inode, FI_ATOMIC_COMMIT); + mutex_lock(&fi->inmem_lock); err = __commit_inmem_pages(inode, &revoke_list); if (err) { @@ -337,6 +339,8 @@ int commit_inmem_pages(struct inode *inode) } mutex_unlock(&fi->inmem_lock); + clear_inode_flag(inode, FI_ATOMIC_COMMIT); + f2fs_unlock_op(sbi); return err; } -- cgit v1.2.3 From c50d5c09193e413467fca1a3fdfa5a69e59a6930 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Sat, 7 Jan 2017 18:51:01 +0800 Subject: f2fs: check in-memory block bitmap This patch adds a mirror for valid block bitmap, and use it to detect in-memory bitmap corruption which may be caused by bit-transition of cache or memory overflow. Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/segment.c | 32 ++++++++++++++++++++++++++++++-- 1 file changed, 30 insertions(+), 2 deletions(-) (limited to 'fs/f2fs/segment.c') diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index 6a870677d58a..aae1c2ea7a1d 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -1101,14 +1101,32 @@ static void update_sit_entry(struct f2fs_sb_info *sbi, block_t blkaddr, int del) /* Update valid block bitmap */ if (del > 0) { - if (f2fs_test_and_set_bit(offset, se->cur_valid_map)) + if (f2fs_test_and_set_bit(offset, se->cur_valid_map)) { +#ifdef CONFIG_F2FS_CHECK_FS + if (f2fs_test_and_set_bit(offset, + se->cur_valid_map_mir)) + f2fs_bug_on(sbi, 1); + else + WARN_ON(1); +#else f2fs_bug_on(sbi, 1); +#endif + } if (f2fs_discard_en(sbi) && !f2fs_test_and_set_bit(offset, se->discard_map)) sbi->discard_blks--; } else { - if (!f2fs_test_and_clear_bit(offset, se->cur_valid_map)) + if (!f2fs_test_and_clear_bit(offset, se->cur_valid_map)) { +#ifdef CONFIG_F2FS_CHECK_FS + if (!f2fs_test_and_clear_bit(offset, + se->cur_valid_map_mir)) + f2fs_bug_on(sbi, 1); + else + WARN_ON(1); +#else f2fs_bug_on(sbi, 1); +#endif + } if (f2fs_discard_en(sbi) && f2fs_test_and_clear_bit(offset, se->discard_map)) sbi->discard_blks++; @@ -2432,6 +2450,13 @@ static int build_sit_info(struct f2fs_sb_info *sbi) !sit_i->sentries[start].ckpt_valid_map) return -ENOMEM; +#ifdef CONFIG_F2FS_CHECK_FS + sit_i->sentries[start].cur_valid_map_mir + = kzalloc(SIT_VBLOCK_MAP_SIZE, GFP_KERNEL); + if (!sit_i->sentries[start].cur_valid_map_mir) + return -ENOMEM; +#endif + if (f2fs_discard_en(sbi)) { sit_i->sentries[start].discard_map = kzalloc(SIT_VBLOCK_MAP_SIZE, GFP_KERNEL); @@ -2861,6 +2886,9 @@ static void destroy_sit_info(struct f2fs_sb_info *sbi) if (sit_i->sentries) { for (start = 0; start < MAIN_SEGS(sbi); start++) { kfree(sit_i->sentries[start].cur_valid_map); +#ifdef CONFIG_F2FS_CHECK_FS + kfree(sit_i->sentries[start].cur_valid_map_mir); +#endif kfree(sit_i->sentries[start].ckpt_valid_map); kfree(sit_i->sentries[start].discard_map); } -- cgit v1.2.3 From e3d4c4b5f18cd817225906e341aadf8fd8a01345 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Sat, 7 Jan 2017 18:52:34 +0800 Subject: f2fs: check in-memory sit version bitmap This patch adds a mirror for sit version bitmap, and use it to detect in-memory bitmap corruption which may be caused by bit-transition of cache or memory overflow. Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/segment.c | 16 ++++++++++++---- 1 file changed, 12 insertions(+), 4 deletions(-) (limited to 'fs/f2fs/segment.c') diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index aae1c2ea7a1d..c39bbffb0cac 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -2421,7 +2421,7 @@ static int build_sit_info(struct f2fs_sb_info *sbi) struct f2fs_super_block *raw_super = F2FS_RAW_SUPER(sbi); struct sit_info *sit_i; unsigned int sit_segs, start; - char *src_bitmap, *dst_bitmap; + char *src_bitmap; unsigned int bitmap_size; /* allocate memory for SIT information */ @@ -2483,17 +2483,22 @@ static int build_sit_info(struct f2fs_sb_info *sbi) bitmap_size = __bitmap_size(sbi, SIT_BITMAP); src_bitmap = __bitmap_ptr(sbi, SIT_BITMAP); - dst_bitmap = kmemdup(src_bitmap, bitmap_size, GFP_KERNEL); - if (!dst_bitmap) + sit_i->sit_bitmap = kmemdup(src_bitmap, bitmap_size, GFP_KERNEL); + if (!sit_i->sit_bitmap) return -ENOMEM; +#ifdef CONFIG_F2FS_CHECK_FS + sit_i->sit_bitmap_mir = kmemdup(src_bitmap, bitmap_size, GFP_KERNEL); + if (!sit_i->sit_bitmap_mir) + return -ENOMEM; +#endif + /* init SIT information */ sit_i->s_ops = &default_salloc_ops; sit_i->sit_base_addr = le32_to_cpu(raw_super->sit_blkaddr); sit_i->sit_blocks = sit_segs << sbi->log_blocks_per_seg; sit_i->written_valid_blocks = 0; - sit_i->sit_bitmap = dst_bitmap; sit_i->bitmap_size = bitmap_size; sit_i->dirty_sentries = 0; sit_i->sents_per_block = SIT_ENTRY_PER_BLOCK; @@ -2901,6 +2906,9 @@ static void destroy_sit_info(struct f2fs_sb_info *sbi) SM_I(sbi)->sit_info = NULL; kfree(sit_i->sit_bitmap); +#ifdef CONFIG_F2FS_CHECK_FS + kfree(sit_i->sit_bitmap_mir); +#endif kfree(sit_i); } -- cgit v1.2.3 From 0d7a55b0135b91c5eb8ecaf1b6bfe0c0f5eca3fd Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Mon, 9 Jan 2017 14:13:03 -0800 Subject: f2fs: clean up flush/discard command namings This patch simply cleans up the names for flush/discard commands. Signed-off-by: Jaegeuk Kim Conflicts: fs/f2fs/segment.c --- fs/f2fs/segment.c | 98 +++++++++++++++++++++++++++---------------------------- 1 file changed, 49 insertions(+), 49 deletions(-) (limited to 'fs/f2fs/segment.c') diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index c39bbffb0cac..289b3facd2d8 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -26,7 +26,7 @@ #define __reverse_ffz(x) __reverse_ffs(~(x)) static struct kmem_cache *discard_entry_slab; -static struct kmem_cache *bio_entry_slab; +static struct kmem_cache *discard_cmd_slab; static struct kmem_cache *sit_entry_set_slab; static struct kmem_cache *inmem_entry_slab; @@ -439,7 +439,7 @@ static int submit_flush_wait(struct f2fs_sb_info *sbi) static int issue_flush_thread(void *data) { struct f2fs_sb_info *sbi = data; - struct flush_cmd_control *fcc = SM_I(sbi)->cmd_control_info; + struct flush_cmd_control *fcc = SM_I(sbi)->fcc_info; wait_queue_head_t *q = &fcc->flush_wait_queue; repeat: if (kthread_should_stop()) @@ -468,7 +468,7 @@ repeat: int f2fs_issue_flush(struct f2fs_sb_info *sbi) { - struct flush_cmd_control *fcc = SM_I(sbi)->cmd_control_info; + struct flush_cmd_control *fcc = SM_I(sbi)->fcc_info; struct flush_cmd cmd; trace_f2fs_issue_flush(sbi->sb, test_opt(sbi, NOBARRIER), @@ -511,8 +511,8 @@ int create_flush_cmd_control(struct f2fs_sb_info *sbi) struct flush_cmd_control *fcc; int err = 0; - if (SM_I(sbi)->cmd_control_info) { - fcc = SM_I(sbi)->cmd_control_info; + if (SM_I(sbi)->fcc_info) { + fcc = SM_I(sbi)->fcc_info; goto init_thread; } @@ -522,14 +522,14 @@ int create_flush_cmd_control(struct f2fs_sb_info *sbi) atomic_set(&fcc->submit_flush, 0); init_waitqueue_head(&fcc->flush_wait_queue); init_llist_head(&fcc->issue_list); - SM_I(sbi)->cmd_control_info = fcc; + SM_I(sbi)->fcc_info = fcc; init_thread: fcc->f2fs_issue_flush = kthread_run(issue_flush_thread, sbi, "f2fs_flush-%u:%u", MAJOR(dev), MINOR(dev)); if (IS_ERR(fcc->f2fs_issue_flush)) { err = PTR_ERR(fcc->f2fs_issue_flush); kfree(fcc); - SM_I(sbi)->cmd_control_info = NULL; + SM_I(sbi)->fcc_info = NULL; return err; } @@ -538,7 +538,7 @@ init_thread: void destroy_flush_cmd_control(struct f2fs_sb_info *sbi, bool free) { - struct flush_cmd_control *fcc = SM_I(sbi)->cmd_control_info; + struct flush_cmd_control *fcc = SM_I(sbi)->fcc_info; if (fcc && fcc->f2fs_issue_flush) { struct task_struct *flush_thread = fcc->f2fs_issue_flush; @@ -548,7 +548,7 @@ void destroy_flush_cmd_control(struct f2fs_sb_info *sbi, bool free) } if (free) { kfree(fcc); - SM_I(sbi)->cmd_control_info = NULL; + SM_I(sbi)->fcc_info = NULL; } } @@ -628,42 +628,43 @@ static void locate_dirty_segment(struct f2fs_sb_info *sbi, unsigned int segno) mutex_unlock(&dirty_i->seglist_lock); } -static struct bio_entry *__add_bio_entry(struct f2fs_sb_info *sbi, +static struct discard_cmd *__add_discard_cmd(struct f2fs_sb_info *sbi, struct bio *bio, block_t lstart, block_t len) { - struct list_head *wait_list = &(SM_I(sbi)->wait_list); - struct bio_entry *be = f2fs_kmem_cache_alloc(bio_entry_slab, GFP_NOFS); + struct list_head *wait_list = &(SM_I(sbi)->discard_cmd_list); + struct discard_cmd *dc; - INIT_LIST_HEAD(&be->list); - be->bio = bio; - be->lstart = lstart; - be->len = len; - init_completion(&be->event); - list_add_tail(&be->list, wait_list); + dc = f2fs_kmem_cache_alloc(discard_cmd_slab, GFP_NOFS); + INIT_LIST_HEAD(&dc->list); + dc->bio = bio; + dc->lstart = lstart; + dc->len = len; + init_completion(&dc->wait); + list_add_tail(&dc->list, wait_list); - return be; + return dc; } /* This should be covered by global mutex, &sit_i->sentry_lock */ void f2fs_wait_discard_bio(struct f2fs_sb_info *sbi, block_t blkaddr) { - struct list_head *wait_list = &(SM_I(sbi)->wait_list); - struct bio_entry *be, *tmp; + struct list_head *wait_list = &(SM_I(sbi)->discard_cmd_list); + struct discard_cmd *dc, *tmp; - list_for_each_entry_safe(be, tmp, wait_list, list) { - struct bio *bio = be->bio; + list_for_each_entry_safe(dc, tmp, wait_list, list) { + struct bio *bio = dc->bio; int err; - if (!completion_done(&be->event)) { - if ((be->lstart <= blkaddr && - blkaddr < be->lstart + be->len) || + if (!completion_done(&dc->wait)) { + if ((dc->lstart <= blkaddr && + blkaddr < dc->lstart + dc->len) || blkaddr == NULL_ADDR) - wait_for_completion_io(&be->event); + wait_for_completion_io(&dc->wait); else continue; } - err = be->error; + err = bio->bi_error; if (err == -EOPNOTSUPP) err = 0; @@ -672,17 +673,16 @@ void f2fs_wait_discard_bio(struct f2fs_sb_info *sbi, block_t blkaddr) "Issue discard failed, ret: %d", err); bio_put(bio); - list_del(&be->list); - kmem_cache_free(bio_entry_slab, be); + list_del(&dc->list); + kmem_cache_free(discard_cmd_slab, dc); } } -static void f2fs_submit_bio_wait_endio(struct bio *bio) +static void f2fs_submit_discard_endio(struct bio *bio) { - struct bio_entry *be = (struct bio_entry *)bio->bi_private; + struct discard_cmd *dc = (struct discard_cmd *)bio->bi_private; - be->error = bio->bi_error; - complete(&be->event); + complete(&dc->wait); } /* copied from block/blk-lib.c in 4.10-rc1 */ @@ -786,11 +786,11 @@ static int __f2fs_issue_discard_async(struct f2fs_sb_info *sbi, SECTOR_FROM_BLOCK(blklen), GFP_NOFS, 0, &bio); if (!err && bio) { - struct bio_entry *be = __add_bio_entry(sbi, bio, + struct discard_cmd *dc = __add_discard_cmd(sbi, bio, lblkstart, blklen); - bio->bi_private = be; - bio->bi_end_io = f2fs_submit_bio_wait_endio; + bio->bi_private = dc; + bio->bi_end_io = f2fs_submit_discard_endio; submit_bio(REQ_SYNC, bio); } return err; @@ -897,7 +897,7 @@ static void __add_discard_entry(struct f2fs_sb_info *sbi, struct cp_control *cpc, struct seg_entry *se, unsigned int start, unsigned int end) { - struct list_head *head = &SM_I(sbi)->discard_list; + struct list_head *head = &SM_I(sbi)->discard_entry_list; struct discard_entry *new, *last; if (!list_empty(head)) { @@ -966,7 +966,7 @@ static bool add_discard_addrs(struct f2fs_sb_info *sbi, struct cp_control *cpc, void release_discard_addrs(struct f2fs_sb_info *sbi) { - struct list_head *head = &(SM_I(sbi)->discard_list); + struct list_head *head = &(SM_I(sbi)->discard_entry_list); struct discard_entry *entry, *this; /* drop caches */ @@ -992,7 +992,7 @@ static void set_prefree_as_free_segments(struct f2fs_sb_info *sbi) void clear_prefree_segments(struct f2fs_sb_info *sbi, struct cp_control *cpc) { - struct list_head *head = &(SM_I(sbi)->discard_list); + struct list_head *head = &(SM_I(sbi)->discard_entry_list); struct discard_entry *entry, *this; struct dirty_seglist_info *dirty_i = DIRTY_I(sbi); struct blk_plug plug; @@ -2783,8 +2783,8 @@ int build_segment_manager(struct f2fs_sb_info *sbi) sm_info->min_ipu_util = DEF_MIN_IPU_UTIL; sm_info->min_fsync_blocks = DEF_MIN_FSYNC_BLOCKS; - INIT_LIST_HEAD(&sm_info->discard_list); - INIT_LIST_HEAD(&sm_info->wait_list); + INIT_LIST_HEAD(&sm_info->discard_entry_list); + INIT_LIST_HEAD(&sm_info->discard_cmd_list); sm_info->nr_discards = 0; sm_info->max_discards = 0; @@ -2934,15 +2934,15 @@ int __init create_segment_manager_caches(void) if (!discard_entry_slab) goto fail; - bio_entry_slab = f2fs_kmem_cache_create("bio_entry", - sizeof(struct bio_entry)); - if (!bio_entry_slab) + discard_cmd_slab = f2fs_kmem_cache_create("discard_cmd", + sizeof(struct discard_cmd)); + if (!discard_cmd_slab) goto destroy_discard_entry; sit_entry_set_slab = f2fs_kmem_cache_create("sit_entry_set", sizeof(struct sit_entry_set)); if (!sit_entry_set_slab) - goto destroy_bio_entry; + goto destroy_discard_cmd; inmem_entry_slab = f2fs_kmem_cache_create("inmem_page_entry", sizeof(struct inmem_pages)); @@ -2952,8 +2952,8 @@ int __init create_segment_manager_caches(void) destroy_sit_entry_set: kmem_cache_destroy(sit_entry_set_slab); -destroy_bio_entry: - kmem_cache_destroy(bio_entry_slab); +destroy_discard_cmd: + kmem_cache_destroy(discard_cmd_slab); destroy_discard_entry: kmem_cache_destroy(discard_entry_slab); fail: @@ -2963,7 +2963,7 @@ fail: void destroy_segment_manager_caches(void) { kmem_cache_destroy(sit_entry_set_slab); - kmem_cache_destroy(bio_entry_slab); + kmem_cache_destroy(discard_cmd_slab); kmem_cache_destroy(discard_entry_slab); kmem_cache_destroy(inmem_entry_slab); } -- cgit v1.2.3 From c4cc29d19eaf010c1133823438f5a3adba155f05 Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Mon, 9 Jan 2017 18:16:29 -0800 Subject: f2fs: remove batched discard in f2fs_trim_fs We don't need to do multiple checkpoints, since we don't actually wait for completion of discard commands during checkpoint. Instead, we still need to avoid very big discard commands, since that large discard can interfere block allocation. Signed-off-by: Jaegeuk Kim --- fs/f2fs/segment.c | 38 +++++++++++++------------------------- 1 file changed, 13 insertions(+), 25 deletions(-) (limited to 'fs/f2fs/segment.c') diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index 289b3facd2d8..245ba28529b1 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -903,7 +903,8 @@ static void __add_discard_entry(struct f2fs_sb_info *sbi, if (!list_empty(head)) { last = list_last_entry(head, struct discard_entry, list); if (START_BLOCK(sbi, cpc->trim_start) + start == - last->blkaddr + last->len) { + last->blkaddr + last->len && + last->len <= MAX_DISCARD_BLOCKS(sbi)) { last->len += end - start; goto done; } @@ -1593,36 +1594,25 @@ int f2fs_trim_fs(struct f2fs_sb_info *sbi, struct fstrim_range *range) "Found FS corruption, run fsck to fix."); goto out; } + if (sbi->discard_blks == 0) + goto out; /* start/end segment number in main_area */ start_segno = (start <= MAIN_BLKADDR(sbi)) ? 0 : GET_SEGNO(sbi, start); end_segno = (end >= MAX_BLKADDR(sbi)) ? MAIN_SEGS(sbi) - 1 : GET_SEGNO(sbi, end); + /* + * do checkpoint to issue discard commands safely since we now can + * use async discard. + */ cpc.reason = CP_DISCARD; cpc.trim_minlen = max_t(__u64, 1, F2FS_BYTES_TO_BLK(range->minlen)); + cpc.trim_start = start_segno; + cpc.trim_end = end_segno; - /* do checkpoint to issue discard commands safely */ - for (; start_segno <= end_segno; start_segno = cpc.trim_end + 1) { - cpc.trim_start = start_segno; - - if (sbi->discard_blks == 0) - break; - else if (sbi->discard_blks < BATCHED_TRIM_BLOCKS(sbi)) - cpc.trim_end = end_segno; - else - cpc.trim_end = min_t(unsigned int, - rounddown(start_segno + - BATCHED_TRIM_SEGMENTS(sbi), - sbi->segs_per_sec) - 1, end_segno); - - mutex_lock(&sbi->gc_mutex); - err = write_checkpoint(sbi, &cpc); - mutex_unlock(&sbi->gc_mutex); - if (err) - break; - - schedule(); - } + mutex_lock(&sbi->gc_mutex); + err = write_checkpoint(sbi, &cpc); + mutex_unlock(&sbi->gc_mutex); out: range->len = F2FS_BLK_TO_BYTES(cpc.trimmed); return err; @@ -2788,8 +2778,6 @@ int build_segment_manager(struct f2fs_sb_info *sbi) sm_info->nr_discards = 0; sm_info->max_discards = 0; - sm_info->trim_sections = DEF_BATCHED_TRIM_SECTIONS; - INIT_LIST_HEAD(&sm_info->sit_entry_set); if (test_opt(sbi, FLUSH_MERGE) && !f2fs_readonly(sbi->sb)) { -- cgit v1.2.3 From 565f0225f95f1518132952e8fe6854c92a60fd46 Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Wed, 11 Jan 2017 14:40:24 -0800 Subject: f2fs: factor out discard command info into discard_cmd_control This patch adds discard_cmd_control with the existing discarding controls. Signed-off-by: Jaegeuk Kim --- fs/f2fs/segment.c | 68 +++++++++++++++++++++++++++++++++++++++++++------------ 1 file changed, 53 insertions(+), 15 deletions(-) (limited to 'fs/f2fs/segment.c') diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index 245ba28529b1..dbe4b3e3198f 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -631,7 +631,8 @@ static void locate_dirty_segment(struct f2fs_sb_info *sbi, unsigned int segno) static struct discard_cmd *__add_discard_cmd(struct f2fs_sb_info *sbi, struct bio *bio, block_t lstart, block_t len) { - struct list_head *wait_list = &(SM_I(sbi)->discard_cmd_list); + struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info; + struct list_head *cmd_list = &(dcc->discard_cmd_list); struct discard_cmd *dc; dc = f2fs_kmem_cache_alloc(discard_cmd_slab, GFP_NOFS); @@ -640,7 +641,7 @@ static struct discard_cmd *__add_discard_cmd(struct f2fs_sb_info *sbi, dc->lstart = lstart; dc->len = len; init_completion(&dc->wait); - list_add_tail(&dc->list, wait_list); + list_add_tail(&dc->list, cmd_list); return dc; } @@ -648,7 +649,8 @@ static struct discard_cmd *__add_discard_cmd(struct f2fs_sb_info *sbi, /* This should be covered by global mutex, &sit_i->sentry_lock */ void f2fs_wait_discard_bio(struct f2fs_sb_info *sbi, block_t blkaddr) { - struct list_head *wait_list = &(SM_I(sbi)->discard_cmd_list); + struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info; + struct list_head *wait_list = &(dcc->discard_cmd_list); struct discard_cmd *dc, *tmp; list_for_each_entry_safe(dc, tmp, wait_list, list) { @@ -897,7 +899,7 @@ static void __add_discard_entry(struct f2fs_sb_info *sbi, struct cp_control *cpc, struct seg_entry *se, unsigned int start, unsigned int end) { - struct list_head *head = &SM_I(sbi)->discard_entry_list; + struct list_head *head = &SM_I(sbi)->dcc_info->discard_entry_list; struct discard_entry *new, *last; if (!list_empty(head)) { @@ -916,7 +918,7 @@ static void __add_discard_entry(struct f2fs_sb_info *sbi, new->len = end - start; list_add_tail(&new->list, head); done: - SM_I(sbi)->nr_discards += end - start; + SM_I(sbi)->dcc_info->nr_discards += end - start; } static bool add_discard_addrs(struct f2fs_sb_info *sbi, struct cp_control *cpc, @@ -938,7 +940,8 @@ static bool add_discard_addrs(struct f2fs_sb_info *sbi, struct cp_control *cpc, if (!force) { if (!test_opt(sbi, DISCARD) || !se->valid_blocks || - SM_I(sbi)->nr_discards >= SM_I(sbi)->max_discards) + SM_I(sbi)->dcc_info->nr_discards >= + SM_I(sbi)->dcc_info->max_discards) return false; } @@ -947,7 +950,8 @@ static bool add_discard_addrs(struct f2fs_sb_info *sbi, struct cp_control *cpc, dmap[i] = force ? ~ckpt_map[i] & ~discard_map[i] : (cur_map[i] ^ ckpt_map[i]) & ckpt_map[i]; - while (force || SM_I(sbi)->nr_discards <= SM_I(sbi)->max_discards) { + while (force || SM_I(sbi)->dcc_info->nr_discards <= + SM_I(sbi)->dcc_info->max_discards) { start = __find_rev_next_bit(dmap, max_blocks, end + 1); if (start >= max_blocks) break; @@ -967,7 +971,7 @@ static bool add_discard_addrs(struct f2fs_sb_info *sbi, struct cp_control *cpc, void release_discard_addrs(struct f2fs_sb_info *sbi) { - struct list_head *head = &(SM_I(sbi)->discard_entry_list); + struct list_head *head = &(SM_I(sbi)->dcc_info->discard_entry_list); struct discard_entry *entry, *this; /* drop caches */ @@ -993,7 +997,7 @@ static void set_prefree_as_free_segments(struct f2fs_sb_info *sbi) void clear_prefree_segments(struct f2fs_sb_info *sbi, struct cp_control *cpc) { - struct list_head *head = &(SM_I(sbi)->discard_entry_list); + struct list_head *head = &(SM_I(sbi)->dcc_info->discard_entry_list); struct discard_entry *entry, *this; struct dirty_seglist_info *dirty_i = DIRTY_I(sbi); struct blk_plug plug; @@ -1053,13 +1057,47 @@ next: cpc->trimmed += entry->len; skip: list_del(&entry->list); - SM_I(sbi)->nr_discards -= entry->len; + SM_I(sbi)->dcc_info->nr_discards -= entry->len; kmem_cache_free(discard_entry_slab, entry); } blk_finish_plug(&plug); } +int create_discard_cmd_control(struct f2fs_sb_info *sbi) +{ + struct discard_cmd_control *dcc; + int err = 0; + + if (SM_I(sbi)->dcc_info) { + dcc = SM_I(sbi)->dcc_info; + goto init_thread; + } + + dcc = kzalloc(sizeof(struct discard_cmd_control), GFP_KERNEL); + if (!dcc) + return -ENOMEM; + + INIT_LIST_HEAD(&dcc->discard_entry_list); + INIT_LIST_HEAD(&dcc->discard_cmd_list); + dcc->nr_discards = 0; + dcc->max_discards = 0; + + SM_I(sbi)->dcc_info = dcc; +init_thread: + return err; +} + +void destroy_discard_cmd_control(struct f2fs_sb_info *sbi, bool free) +{ + struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info; + + if (free) { + kfree(dcc); + SM_I(sbi)->dcc_info = NULL; + } +} + static bool __mark_sit_entry_dirty(struct f2fs_sb_info *sbi, unsigned int segno) { struct sit_info *sit_i = SIT_I(sbi); @@ -2773,11 +2811,6 @@ int build_segment_manager(struct f2fs_sb_info *sbi) sm_info->min_ipu_util = DEF_MIN_IPU_UTIL; sm_info->min_fsync_blocks = DEF_MIN_FSYNC_BLOCKS; - INIT_LIST_HEAD(&sm_info->discard_entry_list); - INIT_LIST_HEAD(&sm_info->discard_cmd_list); - sm_info->nr_discards = 0; - sm_info->max_discards = 0; - INIT_LIST_HEAD(&sm_info->sit_entry_set); if (test_opt(sbi, FLUSH_MERGE) && !f2fs_readonly(sbi->sb)) { @@ -2786,6 +2819,10 @@ int build_segment_manager(struct f2fs_sb_info *sbi) return err; } + err = create_discard_cmd_control(sbi); + if (err) + return err; + err = build_sit_info(sbi); if (err) return err; @@ -2907,6 +2944,7 @@ void destroy_segment_manager(struct f2fs_sb_info *sbi) if (!sm_info) return; destroy_flush_cmd_control(sbi, true); + destroy_discard_cmd_control(sbi, true); destroy_dirty_segmap(sbi); destroy_curseg(sbi); destroy_free_segmap(sbi); -- cgit v1.2.3 From 587ad91ac9a8fe33865b05e086fea6384ecfbe48 Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Mon, 9 Jan 2017 20:32:07 -0800 Subject: f2fs: add a kernel thread to issue discard commands asynchronously This patch adds a kernel thread to issue discard commands. It proposes three states, D_PREP, D_SUBMIT, and D_DONE to identify current bio status. Signed-off-by: Jaegeuk Kim Conflicts: fs/f2fs/segment.c --- fs/f2fs/segment.c | 127 +++++++++++++++++++++++++++++++++++++++++------------- 1 file changed, 96 insertions(+), 31 deletions(-) (limited to 'fs/f2fs/segment.c') diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index dbe4b3e3198f..bae15887ac98 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -628,7 +628,7 @@ static void locate_dirty_segment(struct f2fs_sb_info *sbi, unsigned int segno) mutex_unlock(&dirty_i->seglist_lock); } -static struct discard_cmd *__add_discard_cmd(struct f2fs_sb_info *sbi, +static void __add_discard_cmd(struct f2fs_sb_info *sbi, struct bio *bio, block_t lstart, block_t len) { struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info; @@ -638,12 +638,30 @@ static struct discard_cmd *__add_discard_cmd(struct f2fs_sb_info *sbi, dc = f2fs_kmem_cache_alloc(discard_cmd_slab, GFP_NOFS); INIT_LIST_HEAD(&dc->list); dc->bio = bio; + bio->bi_private = dc; dc->lstart = lstart; dc->len = len; + dc->state = D_PREP; init_completion(&dc->wait); + + mutex_lock(&dcc->cmd_lock); list_add_tail(&dc->list, cmd_list); + mutex_unlock(&dcc->cmd_lock); +} + +static void __remove_discard_cmd(struct f2fs_sb_info *sbi, struct discard_cmd *dc) +{ + int err = dc->bio->bi_error; - return dc; + if (err == -EOPNOTSUPP) + err = 0; + + if (err) + f2fs_msg(sbi->sb, KERN_INFO, + "Issue discard failed, ret: %d", err); + bio_put(dc->bio); + list_del(&dc->list); + kmem_cache_free(discard_cmd_slab, dc); } /* This should be covered by global mutex, &sit_i->sentry_lock */ @@ -653,31 +671,28 @@ void f2fs_wait_discard_bio(struct f2fs_sb_info *sbi, block_t blkaddr) struct list_head *wait_list = &(dcc->discard_cmd_list); struct discard_cmd *dc, *tmp; + mutex_lock(&dcc->cmd_lock); list_for_each_entry_safe(dc, tmp, wait_list, list) { - struct bio *bio = dc->bio; - int err; - if (!completion_done(&dc->wait)) { - if ((dc->lstart <= blkaddr && - blkaddr < dc->lstart + dc->len) || - blkaddr == NULL_ADDR) + if (blkaddr == NULL_ADDR) { + if (dc->state == D_PREP) { + dc->state = D_SUBMIT; + submit_bio(REQ_SYNC, dc->bio); + } + wait_for_completion_io(&dc->wait); + + __remove_discard_cmd(sbi, dc); + continue; + } + + if (dc->lstart <= blkaddr && blkaddr < dc->lstart + dc->len) { + if (dc->state == D_SUBMIT) wait_for_completion_io(&dc->wait); else - continue; + __remove_discard_cmd(sbi, dc); } - - err = bio->bi_error; - if (err == -EOPNOTSUPP) - err = 0; - - if (err) - f2fs_msg(sbi->sb, KERN_INFO, - "Issue discard failed, ret: %d", err); - - bio_put(bio); - list_del(&dc->list); - kmem_cache_free(discard_cmd_slab, dc); } + mutex_unlock(&dcc->cmd_lock); } static void f2fs_submit_discard_endio(struct bio *bio) @@ -685,6 +700,7 @@ static void f2fs_submit_discard_endio(struct bio *bio) struct discard_cmd *dc = (struct discard_cmd *)bio->bi_private; complete(&dc->wait); + dc->state = D_DONE; } /* copied from block/blk-lib.c in 4.10-rc1 */ @@ -768,6 +784,45 @@ static int __blkdev_issue_discard(struct block_device *bdev, sector_t sector, return 0; } +static int issue_discard_thread(void *data) +{ + struct f2fs_sb_info *sbi = data; + struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info; + wait_queue_head_t *q = &dcc->discard_wait_queue; + struct list_head *cmd_list = &dcc->discard_cmd_list; + struct discard_cmd *dc, *tmp; + struct blk_plug plug; + int iter = 0; +repeat: + if (kthread_should_stop()) + return 0; + + blk_start_plug(&plug); + + mutex_lock(&dcc->cmd_lock); + list_for_each_entry_safe(dc, tmp, cmd_list, list) { + if (dc->state == D_PREP) { + dc->state = D_SUBMIT; + submit_bio(REQ_SYNC, dc->bio); + if (iter++ > DISCARD_ISSUE_RATE) + break; + } else if (dc->state == D_DONE) { + __remove_discard_cmd(sbi, dc); + } + } + mutex_unlock(&dcc->cmd_lock); + + blk_finish_plug(&plug); + + iter = 0; + congestion_wait(BLK_RW_SYNC, HZ/50); + + wait_event_interruptible(*q, + kthread_should_stop() || !list_empty(&dcc->discard_cmd_list)); + goto repeat; +} + + /* this function is copied from blkdev_issue_discard from block/blk-lib.c */ static int __f2fs_issue_discard_async(struct f2fs_sb_info *sbi, struct block_device *bdev, block_t blkstart, block_t blklen) @@ -788,12 +843,9 @@ static int __f2fs_issue_discard_async(struct f2fs_sb_info *sbi, SECTOR_FROM_BLOCK(blklen), GFP_NOFS, 0, &bio); if (!err && bio) { - struct discard_cmd *dc = __add_discard_cmd(sbi, bio, - lblkstart, blklen); - - bio->bi_private = dc; bio->bi_end_io = f2fs_submit_discard_endio; - submit_bio(REQ_SYNC, bio); + __add_discard_cmd(sbi, bio, lblkstart, blklen); + wake_up(&SM_I(sbi)->dcc_info->discard_wait_queue); } return err; } @@ -1000,14 +1052,11 @@ void clear_prefree_segments(struct f2fs_sb_info *sbi, struct cp_control *cpc) struct list_head *head = &(SM_I(sbi)->dcc_info->discard_entry_list); struct discard_entry *entry, *this; struct dirty_seglist_info *dirty_i = DIRTY_I(sbi); - struct blk_plug plug; unsigned long *prefree_map = dirty_i->dirty_segmap[PRE]; unsigned int start = 0, end = -1; unsigned int secno, start_segno; bool force = (cpc->reason == CP_DISCARD); - blk_start_plug(&plug); - mutex_lock(&dirty_i->seglist_lock); while (1) { @@ -1060,12 +1109,11 @@ skip: SM_I(sbi)->dcc_info->nr_discards -= entry->len; kmem_cache_free(discard_entry_slab, entry); } - - blk_finish_plug(&plug); } int create_discard_cmd_control(struct f2fs_sb_info *sbi) { + dev_t dev = sbi->sb->s_bdev->bd_dev; struct discard_cmd_control *dcc; int err = 0; @@ -1080,11 +1128,22 @@ int create_discard_cmd_control(struct f2fs_sb_info *sbi) INIT_LIST_HEAD(&dcc->discard_entry_list); INIT_LIST_HEAD(&dcc->discard_cmd_list); + mutex_init(&dcc->cmd_lock); dcc->nr_discards = 0; dcc->max_discards = 0; + init_waitqueue_head(&dcc->discard_wait_queue); SM_I(sbi)->dcc_info = dcc; init_thread: + dcc->f2fs_issue_discard = kthread_run(issue_discard_thread, sbi, + "f2fs_discard-%u:%u", MAJOR(dev), MINOR(dev)); + if (IS_ERR(dcc->f2fs_issue_discard)) { + err = PTR_ERR(dcc->f2fs_issue_discard); + kfree(dcc); + SM_I(sbi)->dcc_info = NULL; + return err; + } + return err; } @@ -1092,6 +1151,12 @@ void destroy_discard_cmd_control(struct f2fs_sb_info *sbi, bool free) { struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info; + if (dcc && dcc->f2fs_issue_discard) { + struct task_struct *discard_thread = dcc->f2fs_issue_discard; + + dcc->f2fs_issue_discard = NULL; + kthread_stop(discard_thread); + } if (free) { kfree(dcc); SM_I(sbi)->dcc_info = NULL; -- cgit v1.2.3 From 334173cc4ca16534d011446c301e85a7cba5c035 Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Wed, 11 Jan 2017 10:20:04 -0800 Subject: f2fs: show # of on-going flush and discard bios This patch adds stat information for flush and discard commands. Signed-off-by: Jaegeuk Kim Conflicts: fs/f2fs/segment.c --- fs/f2fs/segment.c | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'fs/f2fs/segment.c') diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index bae15887ac98..5efc36f88b4a 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -653,6 +653,9 @@ static void __remove_discard_cmd(struct f2fs_sb_info *sbi, struct discard_cmd *d { int err = dc->bio->bi_error; + if (dc->state == D_DONE) + atomic_dec(&(SM_I(sbi)->dcc_info->submit_discard)); + if (err == -EOPNOTSUPP) err = 0; @@ -678,6 +681,7 @@ void f2fs_wait_discard_bio(struct f2fs_sb_info *sbi, block_t blkaddr) if (dc->state == D_PREP) { dc->state = D_SUBMIT; submit_bio(REQ_SYNC, dc->bio); + atomic_inc(&dcc->submit_discard); } wait_for_completion_io(&dc->wait); @@ -804,6 +808,7 @@ repeat: if (dc->state == D_PREP) { dc->state = D_SUBMIT; submit_bio(REQ_SYNC, dc->bio); + atomic_inc(&dcc->submit_discard); if (iter++ > DISCARD_ISSUE_RATE) break; } else if (dc->state == D_DONE) { @@ -1129,6 +1134,7 @@ int create_discard_cmd_control(struct f2fs_sb_info *sbi) INIT_LIST_HEAD(&dcc->discard_entry_list); INIT_LIST_HEAD(&dcc->discard_cmd_list); mutex_init(&dcc->cmd_lock); + atomic_set(&dcc->submit_discard, 0); dcc->nr_discards = 0; dcc->max_discards = 0; -- cgit v1.2.3 From fb40e1231cbc48f58432e095a22aadcff0c0c557 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Wed, 25 Jan 2017 10:52:40 +0800 Subject: f2fs: fix null pointer dereference when issuing flush in ->fsync We only allocate flush merge control structure sbi::sm_info::fcc_info when flush_merge option is on, but in f2fs_issue_flush we still try to access member of the control structure without that option, it incurs panic as show below, fix it. Call Trace: __remove_ino_entry+0xa9/0xc0 [f2fs] f2fs_do_sync_file.isra.27+0x214/0x6d0 [f2fs] f2fs_sync_file+0x18/0x20 [f2fs] vfs_fsync_range+0x3d/0xb0 __do_page_fault+0x261/0x4d0 do_fsync+0x3d/0x70 SyS_fsync+0x10/0x20 do_syscall_64+0x6e/0x180 entry_SYSCALL64_slow_path+0x25/0x25 RIP: 0033:0x7f18ce260de0 RSP: 002b:00007ffdd4589258 EFLAGS: 00000246 ORIG_RAX: 000000000000004a RAX: ffffffffffffffda RBX: 0000000000000001 RCX: 00007f18ce260de0 RDX: 0000000000000006 RSI: 00000000016c0360 RDI: 0000000000000003 RBP: 00000000016c0360 R08: 000000000000ffff R09: 000000000000001f R10: 00007ffdd4589020 R11: 0000000000000246 R12: 00000000016c0100 R13: 0000000000000000 R14: 00000000016c1f00 R15: 00000000016c0100 Code: fb 81 e3 00 08 00 00 48 89 45 a0 0f 1f 44 00 00 31 c0 85 db 75 27 41 81 e7 00 04 00 00 74 0c 41 8b 45 20 85 c0 0f 85 81 00 00 00 41 ff 45 20 4c 89 e7 e8 f8 e9 ff ff f0 41 ff 4d 20 48 83 c4 RIP: f2fs_issue_flush+0x5b/0x170 [f2fs] RSP: ffffc90003b5fd78 CR2: 0000000000000020 ---[ end trace a09314c24f037648 ]--- Reported-by: Shuoran Liu Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/segment.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'fs/f2fs/segment.c') diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index 5efc36f88b4a..11f2eccd873d 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -477,7 +477,10 @@ int f2fs_issue_flush(struct f2fs_sb_info *sbi) if (test_opt(sbi, NOBARRIER)) return 0; - if (!test_opt(sbi, FLUSH_MERGE) || !atomic_read(&fcc->submit_flush)) { + if (!test_opt(sbi, FLUSH_MERGE)) + return submit_flush_wait(sbi); + + if (!atomic_read(&fcc->submit_flush)) { int ret; atomic_inc(&fcc->submit_flush); -- cgit v1.2.3 From 01940a21a97e8f03b8f8fe618728ff099f0e9481 Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Sun, 29 Jan 2017 14:27:02 +0900 Subject: f2fs: declare missing static function We missed two functions declared as static functions. Signed-off-by: Jaegeuk Kim --- fs/f2fs/segment.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'fs/f2fs/segment.c') diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index 11f2eccd873d..9f0d77b4eefd 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -1119,7 +1119,7 @@ skip: } } -int create_discard_cmd_control(struct f2fs_sb_info *sbi) +static int create_discard_cmd_control(struct f2fs_sb_info *sbi) { dev_t dev = sbi->sb->s_bdev->bd_dev; struct discard_cmd_control *dcc; @@ -1156,7 +1156,7 @@ init_thread: return err; } -void destroy_discard_cmd_control(struct f2fs_sb_info *sbi, bool free) +static void destroy_discard_cmd_control(struct f2fs_sb_info *sbi, bool free) { struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info; -- cgit v1.2.3 From 040eb7fd6c297f6f2b7e899930892e87d6a52796 Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Thu, 2 Feb 2017 16:40:55 -0800 Subject: f2fs: move flush tracepoint This patch moves the tracepoint location for flush command. Signed-off-by: Jaegeuk Kim --- fs/f2fs/segment.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'fs/f2fs/segment.c') diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index 9f0d77b4eefd..ab62f0be2b15 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -426,6 +426,9 @@ static int submit_flush_wait(struct f2fs_sb_info *sbi) int ret = __submit_flush_wait(sbi->sb->s_bdev); int i; + trace_f2fs_issue_flush(sbi->sb, test_opt(sbi, NOBARRIER), + test_opt(sbi, FLUSH_MERGE)); + if (sbi->s_ndevs && !ret) { for (i = 1; i < sbi->s_ndevs; i++) { ret = __submit_flush_wait(FDEV(i).bdev); @@ -471,9 +474,6 @@ int f2fs_issue_flush(struct f2fs_sb_info *sbi) struct flush_cmd_control *fcc = SM_I(sbi)->fcc_info; struct flush_cmd cmd; - trace_f2fs_issue_flush(sbi->sb, test_opt(sbi, NOBARRIER), - test_opt(sbi, FLUSH_MERGE)); - if (test_opt(sbi, NOBARRIER)) return 0; -- cgit v1.2.3 From 6cd8a154387ddc8025c93327e86cfbe690dd7035 Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Thu, 23 Feb 2017 09:52:35 -0800 Subject: f2fs: fix missing bio_alloc(1) For discard commands, we should use bio_alloc(1) in old versions. Fixes: 373bb0247a ("f2fs: support async discard based on v4.9") Signed-off-by: Jaegeuk Kim --- fs/f2fs/segment.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs/f2fs/segment.c') diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index ab62f0be2b15..83d34c80ec37 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -769,7 +769,7 @@ static int __blkdev_issue_discard(struct block_device *bdev, sector_t sector, if (ret) return ret; } - bio = f2fs_bio_alloc(0); + bio = f2fs_bio_alloc(1); bio->bi_iter.bi_sector = sector; bio->bi_bdev = bdev; bio_set_op_attrs(bio, op, 0); -- cgit v1.2.3 From 5e95180bf64c01a1ed4238ff92b1524ebfb8e34f Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Thu, 23 Feb 2017 16:53:14 -0800 Subject: Revert "f2fs: remove batched discard in f2fs_trim_fs" This reverts commit c4cc29d19eaf010c1133823438f5a3adba155f05. Conflicts: fs/f2fs/f2fs.h fs/f2fs/segment.c fs/f2fs/super.c --- fs/f2fs/segment.c | 38 +++++++++++++++++++++++++------------- 1 file changed, 25 insertions(+), 13 deletions(-) (limited to 'fs/f2fs/segment.c') diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index 83d34c80ec37..ab0fc88bfe17 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -965,8 +965,7 @@ static void __add_discard_entry(struct f2fs_sb_info *sbi, if (!list_empty(head)) { last = list_last_entry(head, struct discard_entry, list); if (START_BLOCK(sbi, cpc->trim_start) + start == - last->blkaddr + last->len && - last->len <= MAX_DISCARD_BLOCKS(sbi)) { + last->blkaddr + last->len) { last->len += end - start; goto done; } @@ -1706,25 +1705,36 @@ int f2fs_trim_fs(struct f2fs_sb_info *sbi, struct fstrim_range *range) "Found FS corruption, run fsck to fix."); goto out; } - if (sbi->discard_blks == 0) - goto out; /* start/end segment number in main_area */ start_segno = (start <= MAIN_BLKADDR(sbi)) ? 0 : GET_SEGNO(sbi, start); end_segno = (end >= MAX_BLKADDR(sbi)) ? MAIN_SEGS(sbi) - 1 : GET_SEGNO(sbi, end); - /* - * do checkpoint to issue discard commands safely since we now can - * use async discard. - */ cpc.reason = CP_DISCARD; cpc.trim_minlen = max_t(__u64, 1, F2FS_BYTES_TO_BLK(range->minlen)); - cpc.trim_start = start_segno; - cpc.trim_end = end_segno; - mutex_lock(&sbi->gc_mutex); - err = write_checkpoint(sbi, &cpc); - mutex_unlock(&sbi->gc_mutex); + /* do checkpoint to issue discard commands safely */ + for (; start_segno <= end_segno; start_segno = cpc.trim_end + 1) { + cpc.trim_start = start_segno; + + if (sbi->discard_blks == 0) + break; + else if (sbi->discard_blks < BATCHED_TRIM_BLOCKS(sbi)) + cpc.trim_end = end_segno; + else + cpc.trim_end = min_t(unsigned int, + rounddown(start_segno + + BATCHED_TRIM_SEGMENTS(sbi), + sbi->segs_per_sec) - 1, end_segno); + + mutex_lock(&sbi->gc_mutex); + err = write_checkpoint(sbi, &cpc); + mutex_unlock(&sbi->gc_mutex); + if (err) + break; + + schedule(); + } out: range->len = F2FS_BLK_TO_BYTES(cpc.trimmed); return err; @@ -2885,6 +2895,8 @@ int build_segment_manager(struct f2fs_sb_info *sbi) sm_info->min_ipu_util = DEF_MIN_IPU_UTIL; sm_info->min_fsync_blocks = DEF_MIN_FSYNC_BLOCKS; + sm_info->trim_sections = DEF_BATCHED_TRIM_SECTIONS; + INIT_LIST_HEAD(&sm_info->sit_entry_set); if (test_opt(sbi, FLUSH_MERGE) && !f2fs_readonly(sbi->sb)) { -- cgit v1.2.3 From 0166e6469f9dbbd0c98c71a4803f818f98749929 Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Thu, 23 Feb 2017 16:58:09 -0800 Subject: f2fs: fix trim_fs assignment This is missing fix from upstream. Signed-off-by: Jaegeuk Kim --- fs/f2fs/segment.c | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) (limited to 'fs/f2fs/segment.c') diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index ab0fc88bfe17..f69ddd77558f 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -1675,14 +1675,19 @@ static const struct segment_allocation default_salloc_ops = { bool exist_trim_candidates(struct f2fs_sb_info *sbi, struct cp_control *cpc) { __u64 trim_start = cpc->trim_start; + bool has_candidate = false; mutex_lock(&SIT_I(sbi)->sentry_lock); - for (; trim_start <= cpc->trim_end; trim_start++) - if (add_discard_addrs(sbi, cpc, true)) + for (; cpc->trim_start <= cpc->trim_end; cpc->trim_start++) { + if (add_discard_addrs(sbi, cpc, true)) { + has_candidate = true; break; + } + } mutex_unlock(&SIT_I(sbi)->sentry_lock); - return trim_start <= cpc->trim_end; + cpc->trim_start = trim_start; + return has_candidate; } int f2fs_trim_fs(struct f2fs_sb_info *sbi, struct fstrim_range *range) -- cgit v1.2.3 From 5b7c84083345af3007d71394037cb2cec31e26f7 Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Wed, 1 Feb 2017 16:51:22 -0800 Subject: f2fs: check last page index in cached bio to decide submission If the cached bio has the last page's index, then we need to submit it. Otherwise, we don't need to submit it and can wait for further IO merges. Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/segment.c | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) (limited to 'fs/f2fs/segment.c') diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index f69ddd77558f..ddada895787f 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -263,7 +263,7 @@ static int __commit_inmem_pages(struct inode *inode, .op_flags = REQ_SYNC | REQ_NOIDLE | REQ_PRIO, .encrypted_page = NULL, }; - bool submit_bio = false; + pgoff_t last_idx = ULONG_MAX; int err = 0; list_for_each_entry_safe(cur, tmp, &fi->inmem_pages, list) { @@ -289,15 +289,15 @@ static int __commit_inmem_pages(struct inode *inode, /* record old blkaddr for revoking */ cur->old_addr = fio.old_blkaddr; - - submit_bio = true; + last_idx = page->index; } unlock_page(page); list_move_tail(&cur->list, revoke_list); } - if (submit_bio) - f2fs_submit_merged_bio_cond(sbi, inode, NULL, 0, DATA, WRITE); + if (last_idx != ULONG_MAX) + f2fs_submit_merged_bio_cond(sbi, inode, 0, last_idx, + DATA, WRITE); if (!err) __revoke_inmem_pages(inode, revoke_list, false, false); @@ -2011,7 +2011,8 @@ void f2fs_wait_on_page_writeback(struct page *page, if (PageWriteback(page)) { struct f2fs_sb_info *sbi = F2FS_P_SB(page); - f2fs_submit_merged_bio_cond(sbi, NULL, page, 0, type, WRITE); + f2fs_submit_merged_bio_cond(sbi, page->mapping->host, + 0, page->index, type, WRITE); if (ordered) wait_on_page_writeback(page); else -- cgit v1.2.3 From 5331a1d87fc592190b4eb7cbe6031ea1c6e6a70c Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Tue, 14 Feb 2017 19:32:51 -0800 Subject: f2fs: use SSR for warm node as well We have had node chains, but haven't used it so far due to stale node blocks. Now, we have crc|cp_ver in node footer and give random cp_ver at format time, we can start to use it again. Signed-off-by: Jaegeuk Kim --- fs/f2fs/segment.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'fs/f2fs/segment.c') diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index ddada895787f..3c39982bd4b8 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -1642,7 +1642,8 @@ static void allocate_segment_by_default(struct f2fs_sb_info *sbi, if (force) new_curseg(sbi, type, true); - else if (type == CURSEG_WARM_NODE) + else if (!is_set_ckpt_flags(sbi, CP_CRC_RECOVERY_FLAG) && + type == CURSEG_WARM_NODE) new_curseg(sbi, type, false); else if (curseg->alloc_type == LFS && is_next_segment_free(sbi, type)) new_curseg(sbi, type, false); -- cgit v1.2.3 From ff3bf2f2079260ccd00a2847f244dd0acb6c67b1 Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Wed, 15 Feb 2017 11:14:06 -0800 Subject: f2fs: show actual device info in tracepoints This patch shows actual device information in the tracepoints. Signed-off-by: Jaegeuk Kim Conflicts: include/trace/events/f2fs.h --- fs/f2fs/segment.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) (limited to 'fs/f2fs/segment.c') diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index 3c39982bd4b8..4fc23afc03e2 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -426,11 +426,11 @@ static int submit_flush_wait(struct f2fs_sb_info *sbi) int ret = __submit_flush_wait(sbi->sb->s_bdev); int i; - trace_f2fs_issue_flush(sbi->sb, test_opt(sbi, NOBARRIER), - test_opt(sbi, FLUSH_MERGE)); - if (sbi->s_ndevs && !ret) { for (i = 1; i < sbi->s_ndevs; i++) { + trace_f2fs_issue_flush(FDEV(i).bdev, + test_opt(sbi, NOBARRIER), + test_opt(sbi, FLUSH_MERGE)); ret = __submit_flush_wait(FDEV(i).bdev); if (ret) break; @@ -839,7 +839,7 @@ static int __f2fs_issue_discard_async(struct f2fs_sb_info *sbi, block_t lblkstart = blkstart; int err; - trace_f2fs_issue_discard(sbi->sb, blkstart, blklen); + trace_f2fs_issue_discard(bdev, blkstart, blklen); if (sbi->s_ndevs) { int devi = f2fs_target_device_index(sbi, blkstart); @@ -894,7 +894,7 @@ static int __f2fs_issue_discard_zone(struct f2fs_sb_info *sbi, return __f2fs_issue_discard_async(sbi, bdev, blkstart, blklen); case BLK_ZONE_TYPE_SEQWRITE_REQ: case BLK_ZONE_TYPE_SEQWRITE_PREF: - trace_f2fs_issue_reset_zone(sbi->sb, blkstart); + trace_f2fs_issue_reset_zone(bdev, blkstart); return blkdev_reset_zones(bdev, sector, nr_sects, GFP_NOFS); default: -- cgit v1.2.3 From 1466b660bebe2334aad1f806c793be57f028f17d Mon Sep 17 00:00:00 2001 From: Yunlong Song Date: Tue, 21 Feb 2017 16:59:26 +0800 Subject: f2fs: put allocate_segment after refresh_sit_entry SIT information should be updated before segment allocation, since SSR needs latest valid block information. Current code does not update the old_blkaddr info in sit_entry, so adjust the allocate_segment to its proper location. Commit 5e443818fa0b2a2845561ee25bec181424fb2889 ("f2fs: handle dirty segments inside refresh_sit_entry") puts it into wrong location. Signed-off-by: Yunlong Song Signed-off-by: Jaegeuk Kim --- fs/f2fs/segment.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'fs/f2fs/segment.c') diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index 4fc23afc03e2..22e9c31e189f 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -1838,14 +1838,15 @@ void allocate_data_block(struct f2fs_sb_info *sbi, struct page *page, stat_inc_block_count(sbi, curseg); - if (!__has_curseg_space(sbi, type)) - sit_i->s_ops->allocate_segment(sbi, type, false); /* * SIT information should be updated before segment allocation, * since SSR needs latest valid block information. */ refresh_sit_entry(sbi, old_blkaddr, *new_blkaddr); + if (!__has_curseg_space(sbi, type)) + sit_i->s_ops->allocate_segment(sbi, type, false); + mutex_unlock(&sit_i->sentry_lock); if (page && IS_NODESEG(type)) -- cgit v1.2.3 From e3e27c59487bf43b9b5a8bfc1d825e5720443332 Mon Sep 17 00:00:00 2001 From: Yunlong Song Date: Wed, 22 Feb 2017 20:50:49 +0800 Subject: f2fs: do SSR for data when there is enough free space In allocate_segment_by_default(), need_SSR() already detected it's time to do SSR. So, let's try to find victims for data segments more aggressively in time. Signed-off-by: Yunlong Song Signed-off-by: Jaegeuk Kim --- fs/f2fs/segment.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs/f2fs/segment.c') diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index 22e9c31e189f..98eef04bffa3 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -1619,7 +1619,7 @@ static int get_ssr_segment(struct f2fs_sb_info *sbi, int type) struct curseg_info *curseg = CURSEG_I(sbi, type); const struct victim_selection *v_ops = DIRTY_I(sbi)->v_ops; - if (IS_NODESEG(type) || !has_not_enough_free_secs(sbi, 0, 0)) + if (IS_NODESEG(type)) return v_ops->get_victim(sbi, &(curseg)->next_segno, BG_GC, type, SSR); -- cgit v1.2.3 From 62d8564f71ec326e7cb5ba2ab582942858ccccec Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Wed, 22 Feb 2017 16:39:11 -0800 Subject: f2fs: do SSR in higher priority Let's check SSR in prior to LFS allocation. Signed-off-by: Jaegeuk Kim --- fs/f2fs/segment.c | 17 +---------------- 1 file changed, 1 insertion(+), 16 deletions(-) (limited to 'fs/f2fs/segment.c') diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index 98eef04bffa3..209fe59e45f4 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -1403,17 +1403,6 @@ static void write_current_sum_page(struct f2fs_sb_info *sbi, f2fs_put_page(page, 1); } -static int is_next_segment_free(struct f2fs_sb_info *sbi, int type) -{ - struct curseg_info *curseg = CURSEG_I(sbi, type); - unsigned int segno = curseg->segno + 1; - struct free_segmap_info *free_i = FREE_I(sbi); - - if (segno < MAIN_SEGS(sbi) && segno % sbi->segs_per_sec) - return !test_bit(segno, free_i->free_segmap); - return 0; -} - /* * Find a new segment from the free segments bitmap to right order * This function should be returned with success, otherwise BUG @@ -1638,21 +1627,17 @@ static int get_ssr_segment(struct f2fs_sb_info *sbi, int type) static void allocate_segment_by_default(struct f2fs_sb_info *sbi, int type, bool force) { - struct curseg_info *curseg = CURSEG_I(sbi, type); - if (force) new_curseg(sbi, type, true); else if (!is_set_ckpt_flags(sbi, CP_CRC_RECOVERY_FLAG) && type == CURSEG_WARM_NODE) new_curseg(sbi, type, false); - else if (curseg->alloc_type == LFS && is_next_segment_free(sbi, type)) - new_curseg(sbi, type, false); else if (need_SSR(sbi) && get_ssr_segment(sbi, type)) change_curseg(sbi, type, true); else new_curseg(sbi, type, false); - stat_inc_seg_type(sbi, curseg); + stat_inc_seg_type(sbi, CURSEG_I(sbi, type)); } void allocate_new_segments(struct f2fs_sb_info *sbi) -- cgit v1.2.3 From 3f2523b222146a4af5b0c0c65e82e641475a3c66 Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Wed, 22 Feb 2017 17:10:18 -0800 Subject: f2fs: find data segments across all the types Previously, if type is CURSEG_HOT_DATA, we only check CURSEG_HOT_DATA only. This patch fixes to search all the different types for SSR. Signed-off-by: Jaegeuk Kim --- fs/f2fs/segment.c | 15 +++++++++++---- 1 file changed, 11 insertions(+), 4 deletions(-) (limited to 'fs/f2fs/segment.c') diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index 209fe59e45f4..aebbc3dbc2de 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -1607,16 +1607,23 @@ static int get_ssr_segment(struct f2fs_sb_info *sbi, int type) { struct curseg_info *curseg = CURSEG_I(sbi, type); const struct victim_selection *v_ops = DIRTY_I(sbi)->v_ops; + int i; + + /* need_SSR() already forces to do this */ + if (v_ops->get_victim(sbi, &(curseg)->next_segno, BG_GC, type, SSR)) + return 1; if (IS_NODESEG(type)) - return v_ops->get_victim(sbi, - &(curseg)->next_segno, BG_GC, type, SSR); + return 0; /* For data segments, let's do SSR more intensively */ - for (; type >= CURSEG_HOT_DATA; type--) + for (i = CURSEG_HOT_DATA; i <= CURSEG_COLD_DATA; i++) { + if (i == type) + continue; if (v_ops->get_victim(sbi, &(curseg)->next_segno, - BG_GC, type, SSR)) + BG_GC, i, SSR)) return 1; + } return 0; } -- cgit v1.2.3 From 91ef1346c8a399e90c9e3611914dd21f20172029 Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Wed, 22 Feb 2017 19:10:35 -0800 Subject: f2fs: avoid very large discard command This patch adds MAX_DISCARD_BLOCKS() to avoid issuing too much large single discard command. Signed-off-by: Jaegeuk Kim --- fs/f2fs/segment.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'fs/f2fs/segment.c') diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index aebbc3dbc2de..63846d45b4ad 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -965,7 +965,8 @@ static void __add_discard_entry(struct f2fs_sb_info *sbi, if (!list_empty(head)) { last = list_last_entry(head, struct discard_entry, list); if (START_BLOCK(sbi, cpc->trim_start) + start == - last->blkaddr + last->len) { + last->blkaddr + last->len && + last->len < MAX_DISCARD_BLOCKS(sbi)) { last->len += end - start; goto done; } -- cgit v1.2.3 From 97a61adb90fb6d15e46b693a7a60b5bde6b1c8d2 Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Wed, 22 Feb 2017 19:58:23 -0800 Subject: f2fs: wait for discard completion after submission We don't need to wait for each discard commands when unmounting the image. Signed-off-by: Jaegeuk Kim --- fs/f2fs/segment.c | 16 +++++++++++++--- 1 file changed, 13 insertions(+), 3 deletions(-) (limited to 'fs/f2fs/segment.c') diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index 63846d45b4ad..609d09ab6012 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -676,8 +676,12 @@ void f2fs_wait_discard_bio(struct f2fs_sb_info *sbi, block_t blkaddr) struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info; struct list_head *wait_list = &(dcc->discard_cmd_list); struct discard_cmd *dc, *tmp; + struct blk_plug plug; mutex_lock(&dcc->cmd_lock); + + blk_start_plug(&plug); + list_for_each_entry_safe(dc, tmp, wait_list, list) { if (blkaddr == NULL_ADDR) { @@ -686,9 +690,6 @@ void f2fs_wait_discard_bio(struct f2fs_sb_info *sbi, block_t blkaddr) submit_bio(REQ_SYNC, dc->bio); atomic_inc(&dcc->submit_discard); } - wait_for_completion_io(&dc->wait); - - __remove_discard_cmd(sbi, dc); continue; } @@ -699,6 +700,15 @@ void f2fs_wait_discard_bio(struct f2fs_sb_info *sbi, block_t blkaddr) __remove_discard_cmd(sbi, dc); } } + blk_finish_plug(&plug); + + /* this comes from f2fs_put_super */ + if (blkaddr == NULL_ADDR) { + list_for_each_entry_safe(dc, tmp, wait_list, list) { + wait_for_completion_io(&dc->wait); + __remove_discard_cmd(sbi, dc); + } + } mutex_unlock(&dcc->cmd_lock); } -- cgit v1.2.3 From 70dd5a4c5aa2440965085a36a17496be6ba9b760 Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Wed, 22 Feb 2017 20:18:35 -0800 Subject: f2fs: check discard alignment only for SEQWRITE zones For converntional zones, we don't need to align discard commands to exact zone size. Signed-off-by: Jaegeuk Kim Conflicts: fs/f2fs/segment.c --- fs/f2fs/segment.c | 24 ++++++++++++------------ 1 file changed, 12 insertions(+), 12 deletions(-) (limited to 'fs/f2fs/segment.c') diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index 609d09ab6012..d77b2cddf9df 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -872,24 +872,13 @@ static int __f2fs_issue_discard_async(struct f2fs_sb_info *sbi, static int __f2fs_issue_discard_zone(struct f2fs_sb_info *sbi, struct block_device *bdev, block_t blkstart, block_t blklen) { - sector_t nr_sects = SECTOR_FROM_BLOCK(blklen); - sector_t sector; + sector_t sector, nr_sects; int devi = 0; if (sbi->s_ndevs) { devi = f2fs_target_device_index(sbi, blkstart); blkstart -= FDEV(devi).start_blk; } - sector = SECTOR_FROM_BLOCK(blkstart); - - if (sector & (bdev_zone_size(bdev) - 1) || - nr_sects != bdev_zone_size(bdev)) { - f2fs_msg(sbi->sb, KERN_INFO, - "(%d) %s: Unaligned discard attempted (block %x + %x)", - devi, sbi->s_ndevs ? FDEV(devi).path: "", - blkstart, blklen); - return -EIO; - } /* * We need to know the type of the zone: for conventional zones, @@ -904,6 +893,17 @@ static int __f2fs_issue_discard_zone(struct f2fs_sb_info *sbi, return __f2fs_issue_discard_async(sbi, bdev, blkstart, blklen); case BLK_ZONE_TYPE_SEQWRITE_REQ: case BLK_ZONE_TYPE_SEQWRITE_PREF: + sector = SECTOR_FROM_BLOCK(blkstart); + nr_sects = SECTOR_FROM_BLOCK(blklen); + + if (sector & (bdev_zone_size(bdev) - 1) || + nr_sects != bdev_zone_size(bdev)) { + f2fs_msg(sbi->sb, KERN_INFO, + "(%d) %s: Unaligned discard attempted (block %x + %x)", + devi, sbi->s_ndevs ? FDEV(devi).path: "", + blkstart, blklen); + return -EIO; + } trace_f2fs_issue_reset_zone(bdev, blkstart); return blkdev_reset_zones(bdev, sector, nr_sects, GFP_NOFS); -- cgit v1.2.3 From cb9ca08d121ab076d027d3f37806fd315d9771fa Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Wed, 22 Feb 2017 17:02:32 -0800 Subject: f2fs: do SSR for node segments more aggresively This patch gives more SSR chances for node blocks. Signed-off-by: Jaegeuk Kim --- fs/f2fs/segment.c | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) (limited to 'fs/f2fs/segment.c') diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index d77b2cddf9df..934749663b61 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -1618,17 +1618,22 @@ static int get_ssr_segment(struct f2fs_sb_info *sbi, int type) { struct curseg_info *curseg = CURSEG_I(sbi, type); const struct victim_selection *v_ops = DIRTY_I(sbi)->v_ops; - int i; + int i, n; /* need_SSR() already forces to do this */ if (v_ops->get_victim(sbi, &(curseg)->next_segno, BG_GC, type, SSR)) return 1; - if (IS_NODESEG(type)) - return 0; + /* For node segments, let's do SSR more intensively */ + if (IS_NODESEG(type)) { + i = CURSEG_HOT_NODE; + n = CURSEG_COLD_NODE; + } else { + i = CURSEG_HOT_DATA; + n = CURSEG_COLD_DATA; + } - /* For data segments, let's do SSR more intensively */ - for (i = CURSEG_HOT_DATA; i <= CURSEG_COLD_DATA; i++) { + for (; i <= n; i++) { if (i == type) continue; if (v_ops->get_victim(sbi, &(curseg)->next_segno, -- cgit v1.2.3 From 273924c37731e8a4e578f31727167338a237d1dd Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Thu, 9 Feb 2017 10:38:09 -0800 Subject: f2fs: add bitmaps for empty or full NAT blocks This patches adds bitmaps to represent empty or full NAT blocks containing free nid entries. If we can find valid crc|cp_ver in the last block of checkpoint pack, we'll use these bitmaps when building free nids. In order to avoid checkpointing burden, up-to-date bitmaps will be flushed only during umount time. So, normally we can get this gain, but when power-cut happens, we rely on fsck.f2fs which recovers this bitmap again. After this patch, we build free nids from nid #0 at mount time to make more full NAT blocks, but in runtime, we check empty NAT blocks to load free nids without loading any NAT pages from disk. Signed-off-by: Jaegeuk Kim --- fs/f2fs/segment.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs/f2fs/segment.c') diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index 934749663b61..953599361fb0 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -386,7 +386,7 @@ void f2fs_balance_fs_bg(struct f2fs_sb_info *sbi) if (!available_free_memory(sbi, FREE_NIDS)) try_to_free_nids(sbi, MAX_FREE_NIDS); else - build_free_nids(sbi, false); + build_free_nids(sbi, false, false); if (!is_idle(sbi)) return; -- cgit v1.2.3 From 3a40c74cce8bf6a05114d70317fe4c2c8b6ca50d Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Sat, 25 Feb 2017 11:08:28 +0800 Subject: f2fs: show simple call stack in fault injection message Previously kernel message can show that in which function we do the injection, but unfortunately, most of the caller are the same, for tracking more information of injection path, it needs to show upper caller's name. This patch supports that ability. Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim Conflicts: fs/f2fs/data.c --- fs/f2fs/segment.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'fs/f2fs/segment.c') diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index 953599361fb0..684a5165dd04 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -352,8 +352,10 @@ int commit_inmem_pages(struct inode *inode) void f2fs_balance_fs(struct f2fs_sb_info *sbi, bool need) { #ifdef CONFIG_F2FS_FAULT_INJECTION - if (time_to_inject(sbi, FAULT_CHECKPOINT)) + if (time_to_inject(sbi, FAULT_CHECKPOINT)) { + f2fs_show_injection_info(FAULT_CHECKPOINT); f2fs_stop_checkpoint(sbi, false); + } #endif if (!need) -- cgit v1.2.3 From 377816fec3f305ab5f4f72356363bfa98b992db5 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Fri, 24 Feb 2017 18:46:00 +0800 Subject: f2fs: select target segment with closer temperature in SSR mode In SSR mode, we can allocate target segment which has different temperature type from the type of current block, in order to avoid mixing coldest and hottest data/node as much as possible, change SSR allocation policy to select closer temperature for current block prior. Signed-off-by: Yunlong Song Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/segment.c | 23 +++++++++++++++++------ 1 file changed, 17 insertions(+), 6 deletions(-) (limited to 'fs/f2fs/segment.c') diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index 684a5165dd04..e4ef306ba234 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -1620,7 +1620,8 @@ static int get_ssr_segment(struct f2fs_sb_info *sbi, int type) { struct curseg_info *curseg = CURSEG_I(sbi, type); const struct victim_selection *v_ops = DIRTY_I(sbi)->v_ops; - int i, n; + int i, cnt; + bool reversed = false; /* need_SSR() already forces to do this */ if (v_ops->get_victim(sbi, &(curseg)->next_segno, BG_GC, type, SSR)) @@ -1628,14 +1629,24 @@ static int get_ssr_segment(struct f2fs_sb_info *sbi, int type) /* For node segments, let's do SSR more intensively */ if (IS_NODESEG(type)) { - i = CURSEG_HOT_NODE; - n = CURSEG_COLD_NODE; + if (type >= CURSEG_WARM_NODE) { + reversed = true; + i = CURSEG_COLD_NODE; + } else { + i = CURSEG_HOT_NODE; + } + cnt = NR_CURSEG_NODE_TYPE; } else { - i = CURSEG_HOT_DATA; - n = CURSEG_COLD_DATA; + if (type >= CURSEG_WARM_DATA) { + reversed = true; + i = CURSEG_COLD_DATA; + } else { + i = CURSEG_HOT_DATA; + } + cnt = NR_CURSEG_DATA_TYPE; } - for (; i <= n; i++) { + for (; cnt-- > 0; reversed ? i-- : i++) { if (i == type) continue; if (v_ops->get_victim(sbi, &(curseg)->next_segno, -- cgit v1.2.3 From 7375ae65fa6dae808669b6837b49ca40fe227531 Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Mon, 27 Feb 2017 11:57:11 -0800 Subject: f2fs: avoid to issue redundant discard commands If segs_per_sec is over 1 like under SMR, previously f2fs issues discard commands redundantly on the same section, since we didn't move end position for the previous discard command. E.g., start end | | prefree_bitmap = [01111100111100] And, after issue discard for this section, end start | | prefree_bitmap = [01111100111100] Select this section again by searching from (end + 1), start end | | prefree_bitmap = [01111100111100] Fixes: 36abef4e796d38 ("f2fs: introduce mode=lfs mount option") Cc: Cc: Damien Le Moal Signed-off-by: Jaegeuk Kim --- fs/f2fs/segment.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'fs/f2fs/segment.c') diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index e4ef306ba234..a09c726cc1c3 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -1115,6 +1115,8 @@ next: start = start_segno + sbi->segs_per_sec; if (start < end) goto next; + else + end = start - 1; } mutex_unlock(&dirty_i->seglist_lock); -- cgit v1.2.3 From 4b056f06acfac3a7a5124c5eb1c1ee43807b3367 Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Mon, 6 Mar 2017 11:59:56 -0800 Subject: f2fs: don't overwrite node block by SSR This patch fixes that SSR can overwrite previous warm node block consisting of a node chain since the last checkpoint. Fixes: 5b6c6be2d878 ("f2fs: use SSR for warm node as well") Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/segment.c | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'fs/f2fs/segment.c') diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index a09c726cc1c3..de30f4a86219 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -1242,6 +1242,12 @@ static void update_sit_entry(struct f2fs_sb_info *sbi, block_t blkaddr, int del) if (f2fs_discard_en(sbi) && !f2fs_test_and_set_bit(offset, se->discard_map)) sbi->discard_blks--; + + /* don't overwrite by SSR to keep node chain */ + if (se->type == CURSEG_WARM_NODE) { + if (!f2fs_test_and_set_bit(offset, se->ckpt_valid_map)) + se->ckpt_valid_blocks++; + } } else { if (!f2fs_test_and_clear_bit(offset, se->cur_valid_map)) { #ifdef CONFIG_F2FS_CHECK_FS -- cgit v1.2.3 From ef250a614506801289ffb8cb1e8e20e1a4e341ec Mon Sep 17 00:00:00 2001 From: Kinglong Mee Date: Wed, 8 Mar 2017 09:49:53 +0800 Subject: f2fs: fix the fault of calculating blkstart twice When the zone type is BLK_ZONE_TYPE_CONVENTIONAL, the blkstart is calculated twice. Signed-off-by: Kinglong Mee Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/segment.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'fs/f2fs/segment.c') diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index de30f4a86219..b914cfb49096 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -875,6 +875,7 @@ static int __f2fs_issue_discard_zone(struct f2fs_sb_info *sbi, struct block_device *bdev, block_t blkstart, block_t blklen) { sector_t sector, nr_sects; + block_t lblkstart = blkstart; int devi = 0; if (sbi->s_ndevs) { @@ -892,7 +893,7 @@ static int __f2fs_issue_discard_zone(struct f2fs_sb_info *sbi, case BLK_ZONE_TYPE_CONVENTIONAL: if (!blk_queue_discard(bdev_get_queue(bdev))) return 0; - return __f2fs_issue_discard_async(sbi, bdev, blkstart, blklen); + return __f2fs_issue_discard_async(sbi, bdev, lblkstart, blklen); case BLK_ZONE_TYPE_SEQWRITE_REQ: case BLK_ZONE_TYPE_SEQWRITE_PREF: sector = SECTOR_FROM_BLOCK(blkstart); -- cgit v1.2.3 From 26012ec09c68b91929d33ad268a8e470c5df870e Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Fri, 17 Mar 2017 09:55:52 +0800 Subject: f2fs: fix stale ATOMIC_WRITTEN_PAGE private pointer When I forced to enable atomic operations intentionally, I could hit the below panic, since we didn't clear page->private in f2fs_invalidate_page called by file truncation. The panic occurs due to NULL mapping having page->private. BUG: unable to handle kernel paging request at ffffffffffffffff IP: drop_buffers+0x38/0xe0 PGD 5d00c067 PUD 5d00e067 PMD 0 CPU: 3 PID: 1648 Comm: fsstress Tainted: G D OE 4.10.0+ #5 Hardware name: innotek GmbH VirtualBox/VirtualBox, BIOS VirtualBox 12/01/2006 task: ffff9151952863c0 task.stack: ffffaaec40db4000 RIP: 0010:drop_buffers+0x38/0xe0 RSP: 0018:ffffaaec40db74c8 EFLAGS: 00010292 Call Trace: ? page_referenced+0x8b/0x170 try_to_free_buffers+0xc5/0xe0 try_to_release_page+0x49/0x50 shrink_page_list+0x8bc/0x9f0 shrink_inactive_list+0x1dd/0x500 ? shrink_active_list+0x2c0/0x430 shrink_node_memcg+0x5eb/0x7c0 shrink_node+0xe1/0x320 do_try_to_free_pages+0xef/0x2e0 try_to_free_pages+0xe9/0x190 __alloc_pages_slowpath+0x390/0xe70 __alloc_pages_nodemask+0x291/0x2b0 alloc_pages_current+0x95/0x140 __page_cache_alloc+0xc4/0xe0 pagecache_get_page+0xab/0x2a0 grab_cache_page_write_begin+0x20/0x40 get_read_data_page+0x2e6/0x4c0 [f2fs] ? f2fs_mark_inode_dirty_sync+0x16/0x30 [f2fs] ? truncate_data_blocks_range+0x238/0x2b0 [f2fs] get_lock_data_page+0x30/0x190 [f2fs] __exchange_data_block+0xaaf/0xf40 [f2fs] f2fs_fallocate+0x418/0xd00 [f2fs] vfs_fallocate+0x157/0x220 SyS_fallocate+0x48/0x80 Signed-off-by: Yunlei He Signed-off-by: Chao Yu [Chao Yu: use INMEM_INVALIDATE for better tracing] Signed-off-by: Jaegeuk Kim --- fs/f2fs/segment.c | 30 ++++++++++++++++++++++++++++++ 1 file changed, 30 insertions(+) (limited to 'fs/f2fs/segment.c') diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index b914cfb49096..7d7a8270bbbe 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -250,6 +250,36 @@ void drop_inmem_pages(struct inode *inode) stat_dec_atomic_write(inode); } +void drop_inmem_page(struct inode *inode, struct page *page) +{ + struct f2fs_inode_info *fi = F2FS_I(inode); + struct f2fs_sb_info *sbi = F2FS_I_SB(inode); + struct list_head *head = &fi->inmem_pages; + struct inmem_pages *cur = NULL; + + f2fs_bug_on(sbi, !IS_ATOMIC_WRITTEN_PAGE(page)); + + mutex_lock(&fi->inmem_lock); + list_for_each_entry(cur, head, list) { + if (cur->page == page) + break; + } + + f2fs_bug_on(sbi, !cur || cur->page != page); + list_del(&cur->list); + mutex_unlock(&fi->inmem_lock); + + dec_page_count(sbi, F2FS_INMEM_PAGES); + kmem_cache_free(inmem_entry_slab, cur); + + ClearPageUptodate(page); + set_page_private(page, 0); + ClearPagePrivate(page); + f2fs_put_page(page, 0); + + trace_f2fs_commit_inmem_page(page, INMEM_INVALIDATE); +} + static int __commit_inmem_pages(struct inode *inode, struct list_head *revoke_list) { -- cgit v1.2.3 From 363f8e93f52dafc302716056f31e835b045cd2c3 Mon Sep 17 00:00:00 2001 From: Kinglong Mee Date: Sat, 4 Mar 2017 22:13:10 +0800 Subject: f2fs: make sure trace all f2fs_issue_flush The root device's issue flush trace is missing, add it and tracing the result from submit. Fixes d50aaeec90 ("f2fs: show actual device info in tracepoints") Signed-off-by: Kinglong Mee Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/segment.c | 24 +++++++++++++----------- 1 file changed, 13 insertions(+), 11 deletions(-) (limited to 'fs/f2fs/segment.c') diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index 7d7a8270bbbe..26eef87e82ec 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -441,7 +441,8 @@ void f2fs_balance_fs_bg(struct f2fs_sb_info *sbi) } } -static int __submit_flush_wait(struct block_device *bdev) +static int __submit_flush_wait(struct f2fs_sb_info *sbi, + struct block_device *bdev) { struct bio *bio = f2fs_bio_alloc(0); int ret; @@ -450,23 +451,24 @@ static int __submit_flush_wait(struct block_device *bdev) bio->bi_bdev = bdev; ret = submit_bio_wait(WRITE_FLUSH, bio); bio_put(bio); + + trace_f2fs_issue_flush(bdev, test_opt(sbi, NOBARRIER), + test_opt(sbi, FLUSH_MERGE), ret); return ret; } static int submit_flush_wait(struct f2fs_sb_info *sbi) { - int ret = __submit_flush_wait(sbi->sb->s_bdev); + int ret = __submit_flush_wait(sbi, sbi->sb->s_bdev); int i; - if (sbi->s_ndevs && !ret) { - for (i = 1; i < sbi->s_ndevs; i++) { - trace_f2fs_issue_flush(FDEV(i).bdev, - test_opt(sbi, NOBARRIER), - test_opt(sbi, FLUSH_MERGE)); - ret = __submit_flush_wait(FDEV(i).bdev); - if (ret) - break; - } + if (!sbi->s_ndevs || ret) + return ret; + + for (i = 1; i < sbi->s_ndevs; i++) { + ret = __submit_flush_wait(sbi, FDEV(i).bdev); + if (ret) + break; } return ret; } -- cgit v1.2.3 From 28fa89b32d44f7a09e6ef7357049a4ebce892e3b Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Tue, 7 Mar 2017 18:02:02 -0800 Subject: f2fs: allocate a bio for discarding when actually issuing it Let's allocate a bio when issuing discard commands later. Signed-off-by: Jaegeuk Kim Conflicts: fs/f2fs/segment.c --- fs/f2fs/segment.c | 192 +++++++++++++++++++++++++++++------------------------- 1 file changed, 102 insertions(+), 90 deletions(-) (limited to 'fs/f2fs/segment.c') diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index 26eef87e82ec..f0c06e4785a9 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -666,7 +666,8 @@ static void locate_dirty_segment(struct f2fs_sb_info *sbi, unsigned int segno) } static void __add_discard_cmd(struct f2fs_sb_info *sbi, - struct bio *bio, block_t lstart, block_t len) + struct block_device *bdev, block_t lstart, + block_t start, block_t len) { struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info; struct list_head *cmd_list = &(dcc->discard_cmd_list); @@ -674,11 +675,12 @@ static void __add_discard_cmd(struct f2fs_sb_info *sbi, dc = f2fs_kmem_cache_alloc(discard_cmd_slab, GFP_NOFS); INIT_LIST_HEAD(&dc->list); - dc->bio = bio; - bio->bi_private = dc; + dc->bdev = bdev; dc->lstart = lstart; + dc->start = start; dc->len = len; dc->state = D_PREP; + dc->error = 0; init_completion(&dc->wait); mutex_lock(&dcc->cmd_lock); @@ -688,70 +690,27 @@ static void __add_discard_cmd(struct f2fs_sb_info *sbi, static void __remove_discard_cmd(struct f2fs_sb_info *sbi, struct discard_cmd *dc) { - int err = dc->bio->bi_error; - if (dc->state == D_DONE) atomic_dec(&(SM_I(sbi)->dcc_info->submit_discard)); - if (err == -EOPNOTSUPP) - err = 0; + if (dc->error == -EOPNOTSUPP) + dc->error = 0; - if (err) + if (dc->error) f2fs_msg(sbi->sb, KERN_INFO, - "Issue discard failed, ret: %d", err); - bio_put(dc->bio); + "Issue discard failed, ret: %d", dc->error); list_del(&dc->list); kmem_cache_free(discard_cmd_slab, dc); } -/* This should be covered by global mutex, &sit_i->sentry_lock */ -void f2fs_wait_discard_bio(struct f2fs_sb_info *sbi, block_t blkaddr) -{ - struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info; - struct list_head *wait_list = &(dcc->discard_cmd_list); - struct discard_cmd *dc, *tmp; - struct blk_plug plug; - - mutex_lock(&dcc->cmd_lock); - - blk_start_plug(&plug); - - list_for_each_entry_safe(dc, tmp, wait_list, list) { - - if (blkaddr == NULL_ADDR) { - if (dc->state == D_PREP) { - dc->state = D_SUBMIT; - submit_bio(REQ_SYNC, dc->bio); - atomic_inc(&dcc->submit_discard); - } - continue; - } - - if (dc->lstart <= blkaddr && blkaddr < dc->lstart + dc->len) { - if (dc->state == D_SUBMIT) - wait_for_completion_io(&dc->wait); - else - __remove_discard_cmd(sbi, dc); - } - } - blk_finish_plug(&plug); - - /* this comes from f2fs_put_super */ - if (blkaddr == NULL_ADDR) { - list_for_each_entry_safe(dc, tmp, wait_list, list) { - wait_for_completion_io(&dc->wait); - __remove_discard_cmd(sbi, dc); - } - } - mutex_unlock(&dcc->cmd_lock); -} - static void f2fs_submit_discard_endio(struct bio *bio) { struct discard_cmd *dc = (struct discard_cmd *)bio->bi_private; complete(&dc->wait); + dc->error = bio->bi_error; dc->state = D_DONE; + bio_put(bio); } /* copied from block/blk-lib.c in 4.10-rc1 */ @@ -835,6 +794,88 @@ static int __blkdev_issue_discard(struct block_device *bdev, sector_t sector, return 0; } +static void __submit_discard_cmd(struct f2fs_sb_info *sbi, + struct discard_cmd *dc) +{ + struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info; + struct bio *bio = NULL; + + if (dc->state != D_PREP) + return; + + dc->error = __blkdev_issue_discard(dc->bdev, + SECTOR_FROM_BLOCK(dc->start), + SECTOR_FROM_BLOCK(dc->len), + GFP_NOFS, 0, &bio); + if (!dc->error) { + /* should keep before submission to avoid D_DONE right away */ + dc->state = D_SUBMIT; + atomic_inc(&dcc->submit_discard); + if (bio) { + bio->bi_private = dc; + bio->bi_end_io = f2fs_submit_discard_endio; + submit_bio(REQ_SYNC, bio); + } + } else { + __remove_discard_cmd(sbi, dc); + } +} + +static int __queue_discard_cmd(struct f2fs_sb_info *sbi, + struct block_device *bdev, block_t blkstart, block_t blklen) +{ + block_t lblkstart = blkstart; + + trace_f2fs_issue_discard(bdev, blkstart, blklen); + + if (sbi->s_ndevs) { + int devi = f2fs_target_device_index(sbi, blkstart); + + blkstart -= FDEV(devi).start_blk; + } + __add_discard_cmd(sbi, bdev, lblkstart, blkstart, blklen); + wake_up(&SM_I(sbi)->dcc_info->discard_wait_queue); + return 0; +} + +/* This should be covered by global mutex, &sit_i->sentry_lock */ +void f2fs_wait_discard_bio(struct f2fs_sb_info *sbi, block_t blkaddr) +{ + struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info; + struct list_head *wait_list = &(dcc->discard_cmd_list); + struct discard_cmd *dc, *tmp; + struct blk_plug plug; + + mutex_lock(&dcc->cmd_lock); + + blk_start_plug(&plug); + + list_for_each_entry_safe(dc, tmp, wait_list, list) { + + if (blkaddr == NULL_ADDR) { + __submit_discard_cmd(sbi, dc); + continue; + } + + if (dc->lstart <= blkaddr && blkaddr < dc->lstart + dc->len) { + if (dc->state == D_SUBMIT) + wait_for_completion_io(&dc->wait); + else + __remove_discard_cmd(sbi, dc); + } + } + blk_finish_plug(&plug); + + /* this comes from f2fs_put_super */ + if (blkaddr == NULL_ADDR) { + list_for_each_entry_safe(dc, tmp, wait_list, list) { + wait_for_completion_io(&dc->wait); + __remove_discard_cmd(sbi, dc); + } + } + mutex_unlock(&dcc->cmd_lock); +} + static int issue_discard_thread(void *data) { struct f2fs_sb_info *sbi = data; @@ -852,15 +893,14 @@ repeat: mutex_lock(&dcc->cmd_lock); list_for_each_entry_safe(dc, tmp, cmd_list, list) { - if (dc->state == D_PREP) { - dc->state = D_SUBMIT; - submit_bio(REQ_SYNC, dc->bio); - atomic_inc(&dcc->submit_discard); - if (iter++ > DISCARD_ISSUE_RATE) - break; - } else if (dc->state == D_DONE) { + + if (is_idle(sbi)) + __submit_discard_cmd(sbi, dc); + + if (dc->state == D_PREP && iter++ > DISCARD_ISSUE_RATE) + break; + if (dc->state == D_DONE) __remove_discard_cmd(sbi, dc); - } } mutex_unlock(&dcc->cmd_lock); @@ -874,34 +914,6 @@ repeat: goto repeat; } - -/* this function is copied from blkdev_issue_discard from block/blk-lib.c */ -static int __f2fs_issue_discard_async(struct f2fs_sb_info *sbi, - struct block_device *bdev, block_t blkstart, block_t blklen) -{ - struct bio *bio = NULL; - block_t lblkstart = blkstart; - int err; - - trace_f2fs_issue_discard(bdev, blkstart, blklen); - - if (sbi->s_ndevs) { - int devi = f2fs_target_device_index(sbi, blkstart); - - blkstart -= FDEV(devi).start_blk; - } - err = __blkdev_issue_discard(bdev, - SECTOR_FROM_BLOCK(blkstart), - SECTOR_FROM_BLOCK(blklen), - GFP_NOFS, 0, &bio); - if (!err && bio) { - bio->bi_end_io = f2fs_submit_discard_endio; - __add_discard_cmd(sbi, bio, lblkstart, blklen); - wake_up(&SM_I(sbi)->dcc_info->discard_wait_queue); - } - return err; -} - #ifdef CONFIG_BLK_DEV_ZONED static int __f2fs_issue_discard_zone(struct f2fs_sb_info *sbi, struct block_device *bdev, block_t blkstart, block_t blklen) @@ -925,7 +937,7 @@ static int __f2fs_issue_discard_zone(struct f2fs_sb_info *sbi, case BLK_ZONE_TYPE_CONVENTIONAL: if (!blk_queue_discard(bdev_get_queue(bdev))) return 0; - return __f2fs_issue_discard_async(sbi, bdev, lblkstart, blklen); + return __queue_discard_cmd(sbi, bdev, lblkstart, blklen); case BLK_ZONE_TYPE_SEQWRITE_REQ: case BLK_ZONE_TYPE_SEQWRITE_PREF: sector = SECTOR_FROM_BLOCK(blkstart); @@ -957,7 +969,7 @@ static int __issue_discard_async(struct f2fs_sb_info *sbi, bdev_zoned_model(bdev) != BLK_ZONED_NONE) return __f2fs_issue_discard_zone(sbi, bdev, blkstart, blklen); #endif - return __f2fs_issue_discard_async(sbi, bdev, blkstart, blklen); + return __queue_discard_cmd(sbi, bdev, blkstart, blklen); } static int f2fs_issue_discard(struct f2fs_sb_info *sbi, -- cgit v1.2.3 From 02f88520d6f38a485923badc286238a8833dd3ca Mon Sep 17 00:00:00 2001 From: Yunlei He Date: Thu, 2 Mar 2017 10:36:20 +0800 Subject: f2fs: add a punch discard command function This patch add a function to punch discard command if one segment reuse before discard. Split this segment from multi-segments discard range, and discard the left bigger range. Signed-off-by: Yunlei He Signed-off-by: Jaegeuk Kim --- fs/f2fs/segment.c | 22 ++++++++++++++++++++-- 1 file changed, 20 insertions(+), 2 deletions(-) (limited to 'fs/f2fs/segment.c') diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index f0c06e4785a9..5fc0173af7e3 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -838,6 +838,25 @@ static int __queue_discard_cmd(struct f2fs_sb_info *sbi, return 0; } +static void __punch_discard_cmd(struct f2fs_sb_info *sbi, + struct discard_cmd *dc, block_t blkaddr) +{ + block_t end_block = START_BLOCK(sbi, GET_SEGNO(sbi, blkaddr) + 1); + + if (dc->state == D_DONE || dc->lstart + dc->len <= end_block) { + __remove_discard_cmd(sbi, dc); + return; + } + + if (blkaddr - dc->lstart < dc->lstart + dc->len - end_block) { + dc->start += (end_block - dc->lstart); + dc->len -= (end_block - dc->lstart); + dc->lstart = end_block; + } else { + dc->len = blkaddr - dc->lstart; + } +} + /* This should be covered by global mutex, &sit_i->sentry_lock */ void f2fs_wait_discard_bio(struct f2fs_sb_info *sbi, block_t blkaddr) { @@ -860,8 +879,7 @@ void f2fs_wait_discard_bio(struct f2fs_sb_info *sbi, block_t blkaddr) if (dc->lstart <= blkaddr && blkaddr < dc->lstart + dc->len) { if (dc->state == D_SUBMIT) wait_for_completion_io(&dc->wait); - else - __remove_discard_cmd(sbi, dc); + __punch_discard_cmd(sbi, dc, blkaddr); } } blk_finish_plug(&plug); -- cgit v1.2.3 From 5bac5ad719c9fce5975f1c838a66b530d4a3d8f2 Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Fri, 24 Mar 2017 20:41:45 -0400 Subject: f2fs: allocate node and hot data in the beginning of partition In order to give more spatial locality, this patch changes the block allocation policy which assigns beginning of partition for small and hot data/node blocks. In order to do this, we set noheap allocation by default and introduce another mount option, heap, to reset it back. Signed-off-by: Jaegeuk Kim --- fs/f2fs/segment.c | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'fs/f2fs/segment.c') diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index 5fc0173af7e3..a305b38737f8 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -1598,6 +1598,14 @@ static void reset_curseg(struct f2fs_sb_info *sbi, int type, int modified) __set_sit_entry_type(sbi, type, curseg->segno, modified); } +static unsigned int __get_next_segno(struct f2fs_sb_info *sbi, int type) +{ + if (type == CURSEG_HOT_DATA || IS_NODESEG(type)) + return 0; + + return CURSEG_I(sbi, type)->segno; +} + /* * Allocate a current working segment. * This function always allocates a free segment in LFS manner. @@ -1616,6 +1624,7 @@ static void new_curseg(struct f2fs_sb_info *sbi, int type, bool new_sec) if (test_opt(sbi, NOHEAP)) dir = ALLOC_RIGHT; + segno = __get_next_segno(sbi, type); get_new_segment(sbi, &segno, new_sec, dir); curseg->next_segno = segno; reset_curseg(sbi, type, 1); -- cgit v1.2.3 From 0d5b6b22f109d29112080ef74da07c2ac8916e3c Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Sat, 25 Mar 2017 17:19:58 +0800 Subject: f2fs: show issued flush/discard count Show historical count of flush command and discard command. Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/segment.c | 37 +++++++++++++++++++++++-------------- 1 file changed, 23 insertions(+), 14 deletions(-) (limited to 'fs/f2fs/segment.c') diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index a305b38737f8..717d6cc51ef2 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -490,6 +490,8 @@ repeat: fcc->dispatch_list = llist_reverse_order(fcc->dispatch_list); ret = submit_flush_wait(sbi); + atomic_inc(&fcc->issued_flush); + llist_for_each_entry_safe(cmd, next, fcc->dispatch_list, llnode) { cmd->ret = ret; @@ -507,25 +509,29 @@ int f2fs_issue_flush(struct f2fs_sb_info *sbi) { struct flush_cmd_control *fcc = SM_I(sbi)->fcc_info; struct flush_cmd cmd; + int ret; if (test_opt(sbi, NOBARRIER)) return 0; - if (!test_opt(sbi, FLUSH_MERGE)) - return submit_flush_wait(sbi); - - if (!atomic_read(&fcc->submit_flush)) { - int ret; + if (!test_opt(sbi, FLUSH_MERGE)) { + ret = submit_flush_wait(sbi); + atomic_inc(&fcc->issued_flush); + return ret; + } - atomic_inc(&fcc->submit_flush); + if (!atomic_read(&fcc->issing_flush)) { + atomic_inc(&fcc->issing_flush); ret = submit_flush_wait(sbi); - atomic_dec(&fcc->submit_flush); + atomic_dec(&fcc->issing_flush); + + atomic_inc(&fcc->issued_flush); return ret; } init_completion(&cmd.wait); - atomic_inc(&fcc->submit_flush); + atomic_inc(&fcc->issing_flush); llist_add(&cmd.llnode, &fcc->issue_list); if (!fcc->dispatch_list) @@ -533,10 +539,10 @@ int f2fs_issue_flush(struct f2fs_sb_info *sbi) if (fcc->f2fs_issue_flush) { wait_for_completion(&cmd.wait); - atomic_dec(&fcc->submit_flush); + atomic_dec(&fcc->issing_flush); } else { llist_del_all(&fcc->issue_list); - atomic_set(&fcc->submit_flush, 0); + atomic_set(&fcc->issing_flush, 0); } return cmd.ret; @@ -556,7 +562,8 @@ int create_flush_cmd_control(struct f2fs_sb_info *sbi) fcc = kzalloc(sizeof(struct flush_cmd_control), GFP_KERNEL); if (!fcc) return -ENOMEM; - atomic_set(&fcc->submit_flush, 0); + atomic_set(&fcc->issued_flush, 0); + atomic_set(&fcc->issing_flush, 0); init_waitqueue_head(&fcc->flush_wait_queue); init_llist_head(&fcc->issue_list); SM_I(sbi)->fcc_info = fcc; @@ -691,7 +698,7 @@ static void __add_discard_cmd(struct f2fs_sb_info *sbi, static void __remove_discard_cmd(struct f2fs_sb_info *sbi, struct discard_cmd *dc) { if (dc->state == D_DONE) - atomic_dec(&(SM_I(sbi)->dcc_info->submit_discard)); + atomic_dec(&(SM_I(sbi)->dcc_info->issing_discard)); if (dc->error == -EOPNOTSUPP) dc->error = 0; @@ -810,7 +817,8 @@ static void __submit_discard_cmd(struct f2fs_sb_info *sbi, if (!dc->error) { /* should keep before submission to avoid D_DONE right away */ dc->state = D_SUBMIT; - atomic_inc(&dcc->submit_discard); + atomic_inc(&dcc->issued_discard); + atomic_inc(&dcc->issing_discard); if (bio) { bio->bi_private = dc; bio->bi_end_io = f2fs_submit_discard_endio; @@ -1214,7 +1222,8 @@ static int create_discard_cmd_control(struct f2fs_sb_info *sbi) INIT_LIST_HEAD(&dcc->discard_entry_list); INIT_LIST_HEAD(&dcc->discard_cmd_list); mutex_init(&dcc->cmd_lock); - atomic_set(&dcc->submit_discard, 0); + atomic_set(&dcc->issued_discard, 0); + atomic_set(&dcc->issing_discard, 0); dcc->nr_discards = 0; dcc->max_discards = 0; -- cgit v1.2.3 From 54c1e9049e250dfbbef3f2a51da4efa72e6e4b0c Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Sat, 25 Mar 2017 17:19:59 +0800 Subject: f2fs: count discard command entry Adds to count discard command entry and show the number in debugfs, also fix to add cost of discard command cache into total comsumed memory footprint. Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/segment.c | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'fs/f2fs/segment.c') diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index 717d6cc51ef2..036b41257d60 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -693,6 +693,8 @@ static void __add_discard_cmd(struct f2fs_sb_info *sbi, mutex_lock(&dcc->cmd_lock); list_add_tail(&dc->list, cmd_list); mutex_unlock(&dcc->cmd_lock); + + atomic_inc(&dcc->discard_cmd_cnt); } static void __remove_discard_cmd(struct f2fs_sb_info *sbi, struct discard_cmd *dc) @@ -708,6 +710,7 @@ static void __remove_discard_cmd(struct f2fs_sb_info *sbi, struct discard_cmd *d "Issue discard failed, ret: %d", dc->error); list_del(&dc->list); kmem_cache_free(discard_cmd_slab, dc); + atomic_dec(&SM_I(sbi)->dcc_info->discard_cmd_cnt); } static void f2fs_submit_discard_endio(struct bio *bio) @@ -1224,6 +1227,7 @@ static int create_discard_cmd_control(struct f2fs_sb_info *sbi) mutex_init(&dcc->cmd_lock); atomic_set(&dcc->issued_discard, 0); atomic_set(&dcc->issing_discard, 0); + atomic_set(&dcc->discard_cmd_cnt, 0); dcc->nr_discards = 0; dcc->max_discards = 0; -- cgit v1.2.3 From 79bd5ed6e3181002b57cf693dff1f0a886007453 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Mon, 27 Mar 2017 18:14:04 +0800 Subject: f2fs: clean up destroy_discard_cmd_control Remove unneeded parameter and simply change flow in destroy_discard_cmd_control. Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/segment.c | 16 +++++++++------- 1 file changed, 9 insertions(+), 7 deletions(-) (limited to 'fs/f2fs/segment.c') diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index 036b41257d60..4934e8869240 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -1246,20 +1246,22 @@ init_thread: return err; } -static void destroy_discard_cmd_control(struct f2fs_sb_info *sbi, bool free) +static void destroy_discard_cmd_control(struct f2fs_sb_info *sbi) { struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info; - if (dcc && dcc->f2fs_issue_discard) { + if (!dcc) + return; + + if (dcc->f2fs_issue_discard) { struct task_struct *discard_thread = dcc->f2fs_issue_discard; dcc->f2fs_issue_discard = NULL; kthread_stop(discard_thread); } - if (free) { - kfree(dcc); - SM_I(sbi)->dcc_info = NULL; - } + + kfree(dcc); + SM_I(sbi)->dcc_info = NULL; } static bool __mark_sit_entry_dirty(struct f2fs_sb_info *sbi, unsigned int segno) @@ -3152,7 +3154,7 @@ void destroy_segment_manager(struct f2fs_sb_info *sbi) if (!sm_info) return; destroy_flush_cmd_control(sbi, true); - destroy_discard_cmd_control(sbi, true); + destroy_discard_cmd_control(sbi); destroy_dirty_segmap(sbi); destroy_curseg(sbi); destroy_free_segmap(sbi); -- cgit v1.2.3 From 77deaff0083f66f951b6415382e26b29b41d29af Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Tue, 28 Mar 2017 18:18:50 +0800 Subject: f2fs: use bitmap in discard_entry This patch changes to use bitmap instead of extent in struct discard_entry to indicate discard range in one segment, for fragmented space, this implementation can save memory footprint. Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/segment.c | 72 ++++++++++++++++++++++++++++++------------------------- 1 file changed, 40 insertions(+), 32 deletions(-) (limited to 'fs/f2fs/segment.c') diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index 4934e8869240..0aab0bdb5da3 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -1041,32 +1041,6 @@ static int f2fs_issue_discard(struct f2fs_sb_info *sbi, return err; } -static void __add_discard_entry(struct f2fs_sb_info *sbi, - struct cp_control *cpc, struct seg_entry *se, - unsigned int start, unsigned int end) -{ - struct list_head *head = &SM_I(sbi)->dcc_info->discard_entry_list; - struct discard_entry *new, *last; - - if (!list_empty(head)) { - last = list_last_entry(head, struct discard_entry, list); - if (START_BLOCK(sbi, cpc->trim_start) + start == - last->blkaddr + last->len && - last->len < MAX_DISCARD_BLOCKS(sbi)) { - last->len += end - start; - goto done; - } - } - - new = f2fs_kmem_cache_alloc(discard_entry_slab, GFP_NOFS); - INIT_LIST_HEAD(&new->list); - new->blkaddr = START_BLOCK(sbi, cpc->trim_start) + start; - new->len = end - start; - list_add_tail(&new->list, head); -done: - SM_I(sbi)->dcc_info->nr_discards += end - start; -} - static bool add_discard_addrs(struct f2fs_sb_info *sbi, struct cp_control *cpc, bool check_only) { @@ -1079,6 +1053,8 @@ static bool add_discard_addrs(struct f2fs_sb_info *sbi, struct cp_control *cpc, unsigned long *dmap = SIT_I(sbi)->tmp_map; unsigned int start = 0, end = -1; bool force = (cpc->reason == CP_DISCARD); + struct discard_entry *de = NULL; + struct list_head *head = &SM_I(sbi)->dcc_info->discard_entry_list; int i; if (se->valid_blocks == max_blocks || !f2fs_discard_en(sbi)) @@ -1110,7 +1086,17 @@ static bool add_discard_addrs(struct f2fs_sb_info *sbi, struct cp_control *cpc, if (check_only) return true; - __add_discard_entry(sbi, cpc, se, start, end); + if (!de) { + de = f2fs_kmem_cache_alloc(discard_entry_slab, + GFP_F2FS_ZERO); + de->start_blkaddr = START_BLOCK(sbi, cpc->trim_start); + list_add_tail(&de->list, head); + } + + for (i = start; i < end; i++) + __set_bit_le(i, (void *)de->discard_map); + + SM_I(sbi)->dcc_info->nr_discards += end - start; } return false; } @@ -1196,13 +1182,35 @@ next: /* send small discards */ list_for_each_entry_safe(entry, this, head, list) { - if (force && entry->len < cpc->trim_minlen) - goto skip; - f2fs_issue_discard(sbi, entry->blkaddr, entry->len); - cpc->trimmed += entry->len; + unsigned int cur_pos = 0, next_pos, len, total_len = 0; + bool is_valid = test_bit_le(0, entry->discard_map); + +find_next: + if (is_valid) { + next_pos = find_next_zero_bit_le(entry->discard_map, + sbi->blocks_per_seg, cur_pos); + len = next_pos - cur_pos; + + if (force && len < cpc->trim_minlen) + goto skip; + + f2fs_issue_discard(sbi, entry->start_blkaddr + cur_pos, + len); + cpc->trimmed += len; + total_len += len; + } else { + next_pos = find_next_bit_le(entry->discard_map, + sbi->blocks_per_seg, cur_pos); + } skip: + cur_pos = next_pos; + is_valid = !is_valid; + + if (cur_pos < sbi->blocks_per_seg) + goto find_next; + list_del(&entry->list); - SM_I(sbi)->dcc_info->nr_discards -= entry->len; + SM_I(sbi)->dcc_info->nr_discards -= total_len; kmem_cache_free(discard_entry_slab, entry); } } -- cgit v1.2.3 From 669457e6c2af8bdf84090510a4955a249697683f Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Fri, 24 Mar 2017 20:05:13 -0400 Subject: f2fs: write small sized IO to hot log It would better split small and large IOs separately in order to get more consecutive big writes. The default threshold is set to 64KB, but configurable by sysfs/min_hot_blocks. Signed-off-by: Jaegeuk Kim --- fs/f2fs/segment.c | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) (limited to 'fs/f2fs/segment.c') diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index 0aab0bdb5da3..ed9db665ffe7 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -1908,18 +1908,16 @@ static int __get_segment_type_6(struct page *page, enum page_type p_type) if (p_type == DATA) { struct inode *inode = page->mapping->host; - if (S_ISDIR(inode->i_mode)) - return CURSEG_HOT_DATA; - else if (is_cold_data(page) || file_is_cold(inode)) + if (is_cold_data(page) || file_is_cold(inode)) return CURSEG_COLD_DATA; - else - return CURSEG_WARM_DATA; + if (is_inode_flag_set(inode, FI_HOT_DATA)) + return CURSEG_HOT_DATA; + return CURSEG_WARM_DATA; } else { if (IS_DNODE(page)) return is_cold_node(page) ? CURSEG_WARM_NODE : CURSEG_HOT_NODE; - else - return CURSEG_COLD_NODE; + return CURSEG_COLD_NODE; } } @@ -3026,6 +3024,7 @@ int build_segment_manager(struct f2fs_sb_info *sbi) sm_info->ipu_policy = 1 << F2FS_IPU_FSYNC; sm_info->min_ipu_util = DEF_MIN_IPU_UTIL; sm_info->min_fsync_blocks = DEF_MIN_FSYNC_BLOCKS; + sm_info->min_hot_blocks = DEF_MIN_HOT_BLOCKS; sm_info->trim_sections = DEF_BATCHED_TRIM_SECTIONS; -- cgit v1.2.3 From 074a551c90b9b1bf413b7b44ec5ee3ab9c7526eb Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Thu, 30 Mar 2017 21:02:46 -0700 Subject: f2fs: submit bio of in-place-update pages This patch tries to split in-place-update bios from sequential bios. Suggested-by: Yunlei He Signed-off-by: Jaegeuk Kim --- fs/f2fs/segment.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'fs/f2fs/segment.c') diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index ed9db665ffe7..5e9635df9923 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -2040,11 +2040,11 @@ void write_data_page(struct dnode_of_data *dn, struct f2fs_io_info *fio) f2fs_update_data_blkaddr(dn, fio->new_blkaddr); } -void rewrite_data_page(struct f2fs_io_info *fio) +int rewrite_data_page(struct f2fs_io_info *fio) { fio->new_blkaddr = fio->old_blkaddr; stat_inc_inplace_blocks(fio->sbi); - f2fs_submit_page_mbio(fio); + return f2fs_submit_page_bio(fio); } void __f2fs_replace_block(struct f2fs_sb_info *sbi, struct f2fs_summary *sum, -- cgit v1.2.3 From e5c2a70c4a8b85817054c1292f497309a7d871f9 Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Tue, 4 Apr 2017 16:45:30 -0700 Subject: Revert "f2fs: put allocate_segment after refresh_sit_entry" This reverts commit 3436c4bdb30de421d46f58c9174669fbcfd40ce0. This makes a leak to register dirty segments. I reproduced the issue by modified postmark which injects a lot of file create/delete/update and finally triggers huge number of SSR allocations. Cc: # v4.10+ [Jaegeuk Kim: Change missing incorrect comment] Signed-off-by: Jaegeuk Kim --- fs/f2fs/segment.c | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) (limited to 'fs/f2fs/segment.c') diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index 5e9635df9923..c79f1b05d667 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -1960,15 +1960,14 @@ void allocate_data_block(struct f2fs_sb_info *sbi, struct page *page, stat_inc_block_count(sbi, curseg); + if (!__has_curseg_space(sbi, type)) + sit_i->s_ops->allocate_segment(sbi, type, false); /* - * SIT information should be updated before segment allocation, - * since SSR needs latest valid block information. + * SIT information should be updated after segment allocation, + * since we need to keep dirty segments precisely under SSR. */ refresh_sit_entry(sbi, old_blkaddr, *new_blkaddr); - if (!__has_curseg_space(sbi, type)) - sit_i->s_ops->allocate_segment(sbi, type, false); - mutex_unlock(&sit_i->sentry_lock); if (page && IS_NODESEG(type)) -- cgit v1.2.3 From 4adc71ee11d7edb42a176efc970baa45ba9d6e9a Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Wed, 5 Apr 2017 18:19:48 +0800 Subject: f2fs: split discard_cmd_list Split discard_cmd_list to discard_{pend,wait}_list, so while sending/waiting discard command, we can avoid traversing unneeded entries in original list. Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim Conflicts: fs/f2fs/segment.c --- fs/f2fs/segment.c | 47 ++++++++++++++++++++++++++++++----------------- 1 file changed, 30 insertions(+), 17 deletions(-) (limited to 'fs/f2fs/segment.c') diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index c79f1b05d667..0582eecb5272 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -677,7 +677,7 @@ static void __add_discard_cmd(struct f2fs_sb_info *sbi, block_t start, block_t len) { struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info; - struct list_head *cmd_list = &(dcc->discard_cmd_list); + struct list_head *pend_list = &(dcc->discard_pend_list); struct discard_cmd *dc; dc = f2fs_kmem_cache_alloc(discard_cmd_slab, GFP_NOFS); @@ -691,7 +691,7 @@ static void __add_discard_cmd(struct f2fs_sb_info *sbi, init_completion(&dc->wait); mutex_lock(&dcc->cmd_lock); - list_add_tail(&dc->list, cmd_list); + list_add_tail(&dc->list, pend_list); mutex_unlock(&dcc->cmd_lock); atomic_inc(&dcc->discard_cmd_cnt); @@ -826,6 +826,7 @@ static void __submit_discard_cmd(struct f2fs_sb_info *sbi, bio->bi_private = dc; bio->bi_end_io = f2fs_submit_discard_endio; submit_bio(REQ_SYNC, bio); + list_move_tail(&dc->list, &dcc->discard_wait_list); } } else { __remove_discard_cmd(sbi, dc); @@ -872,31 +873,37 @@ static void __punch_discard_cmd(struct f2fs_sb_info *sbi, void f2fs_wait_discard_bio(struct f2fs_sb_info *sbi, block_t blkaddr) { struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info; - struct list_head *wait_list = &(dcc->discard_cmd_list); + struct list_head *pend_list = &(dcc->discard_pend_list); + struct list_head *wait_list = &(dcc->discard_wait_list); struct discard_cmd *dc, *tmp; struct blk_plug plug; mutex_lock(&dcc->cmd_lock); - blk_start_plug(&plug); - - list_for_each_entry_safe(dc, tmp, wait_list, list) { + if (blkaddr == NULL_ADDR) + goto release_discard; - if (blkaddr == NULL_ADDR) { - __submit_discard_cmd(sbi, dc); - continue; - } + list_for_each_entry_safe(dc, tmp, pend_list, list) { + if (dc->lstart <= blkaddr && blkaddr < dc->lstart + dc->len) + __punch_discard_cmd(sbi, dc, blkaddr); + } + list_for_each_entry_safe(dc, tmp, wait_list, list) { if (dc->lstart <= blkaddr && blkaddr < dc->lstart + dc->len) { if (dc->state == D_SUBMIT) wait_for_completion_io(&dc->wait); __punch_discard_cmd(sbi, dc, blkaddr); } } - blk_finish_plug(&plug); +release_discard: /* this comes from f2fs_put_super */ if (blkaddr == NULL_ADDR) { + blk_start_plug(&plug); + list_for_each_entry_safe(dc, tmp, pend_list, list) + __submit_discard_cmd(sbi, dc); + blk_finish_plug(&plug); + list_for_each_entry_safe(dc, tmp, wait_list, list) { wait_for_completion_io(&dc->wait); __remove_discard_cmd(sbi, dc); @@ -910,7 +917,8 @@ static int issue_discard_thread(void *data) struct f2fs_sb_info *sbi = data; struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info; wait_queue_head_t *q = &dcc->discard_wait_queue; - struct list_head *cmd_list = &dcc->discard_cmd_list; + struct list_head *pend_list = &dcc->discard_pend_list; + struct list_head *wait_list = &dcc->discard_wait_list; struct discard_cmd *dc, *tmp; struct blk_plug plug; int iter = 0; @@ -921,13 +929,17 @@ repeat: blk_start_plug(&plug); mutex_lock(&dcc->cmd_lock); - list_for_each_entry_safe(dc, tmp, cmd_list, list) { + list_for_each_entry_safe(dc, tmp, pend_list, list) { + f2fs_bug_on(sbi, dc->state != D_PREP); if (is_idle(sbi)) __submit_discard_cmd(sbi, dc); - if (dc->state == D_PREP && iter++ > DISCARD_ISSUE_RATE) + if (iter++ > DISCARD_ISSUE_RATE) break; + } + + list_for_each_entry_safe(dc, tmp, wait_list, list) { if (dc->state == D_DONE) __remove_discard_cmd(sbi, dc); } @@ -938,8 +950,8 @@ repeat: iter = 0; congestion_wait(BLK_RW_SYNC, HZ/50); - wait_event_interruptible(*q, - kthread_should_stop() || !list_empty(&dcc->discard_cmd_list)); + wait_event_interruptible(*q, kthread_should_stop() || + !list_empty(pend_list) || !list_empty(wait_list)); goto repeat; } @@ -1231,7 +1243,8 @@ static int create_discard_cmd_control(struct f2fs_sb_info *sbi) return -ENOMEM; INIT_LIST_HEAD(&dcc->discard_entry_list); - INIT_LIST_HEAD(&dcc->discard_cmd_list); + INIT_LIST_HEAD(&dcc->discard_pend_list); + INIT_LIST_HEAD(&dcc->discard_wait_list); mutex_init(&dcc->cmd_lock); atomic_set(&dcc->issued_discard, 0); atomic_set(&dcc->issing_discard, 0); -- cgit v1.2.3 From 745d922434837feebaa8497563bf8f8b39cde782 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Wed, 5 Apr 2017 18:19:49 +0800 Subject: f2fs: introduce f2fs_wait_discard_bios Split f2fs_wait_discard_bios from f2fs_wait_discard_bio, just for cleanup, no logic change. Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/segment.c | 37 ++++++++++++++++++++++--------------- 1 file changed, 22 insertions(+), 15 deletions(-) (limited to 'fs/f2fs/segment.c') diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index 0582eecb5272..acf0c2c7d3b3 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -876,13 +876,9 @@ void f2fs_wait_discard_bio(struct f2fs_sb_info *sbi, block_t blkaddr) struct list_head *pend_list = &(dcc->discard_pend_list); struct list_head *wait_list = &(dcc->discard_wait_list); struct discard_cmd *dc, *tmp; - struct blk_plug plug; mutex_lock(&dcc->cmd_lock); - if (blkaddr == NULL_ADDR) - goto release_discard; - list_for_each_entry_safe(dc, tmp, pend_list, list) { if (dc->lstart <= blkaddr && blkaddr < dc->lstart + dc->len) __punch_discard_cmd(sbi, dc, blkaddr); @@ -896,19 +892,30 @@ void f2fs_wait_discard_bio(struct f2fs_sb_info *sbi, block_t blkaddr) } } -release_discard: - /* this comes from f2fs_put_super */ - if (blkaddr == NULL_ADDR) { - blk_start_plug(&plug); - list_for_each_entry_safe(dc, tmp, pend_list, list) - __submit_discard_cmd(sbi, dc); - blk_finish_plug(&plug); + mutex_unlock(&dcc->cmd_lock); +} - list_for_each_entry_safe(dc, tmp, wait_list, list) { - wait_for_completion_io(&dc->wait); - __remove_discard_cmd(sbi, dc); - } +/* This comes from f2fs_put_super */ +void f2fs_wait_discard_bios(struct f2fs_sb_info *sbi) +{ + struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info; + struct list_head *pend_list = &(dcc->discard_pend_list); + struct list_head *wait_list = &(dcc->discard_wait_list); + struct discard_cmd *dc, *tmp; + struct blk_plug plug; + + mutex_lock(&dcc->cmd_lock); + + blk_start_plug(&plug); + list_for_each_entry_safe(dc, tmp, pend_list, list) + __submit_discard_cmd(sbi, dc); + blk_finish_plug(&plug); + + list_for_each_entry_safe(dc, tmp, wait_list, list) { + wait_for_completion_io(&dc->wait); + __remove_discard_cmd(sbi, dc); } + mutex_unlock(&dcc->cmd_lock); } -- cgit v1.2.3 From e09409d5c38de68d0c0f4fe43c3ae6da521fb58a Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Wed, 5 Apr 2017 18:26:26 +0800 Subject: f2fs: prevent waiter encountering incorrect discard states In f2fs_submit_discard_endio, we will wake up waiter before setting discard command states, so waiter may use incorrect states. Change the order between complete() and states setting to fix this issue. Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/segment.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) (limited to 'fs/f2fs/segment.c') diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index acf0c2c7d3b3..dc2737cfaa67 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -717,9 +717,9 @@ static void f2fs_submit_discard_endio(struct bio *bio) { struct discard_cmd *dc = (struct discard_cmd *)bio->bi_private; - complete(&dc->wait); dc->error = bio->bi_error; dc->state = D_DONE; + complete(&dc->wait); bio_put(bio); } @@ -886,8 +886,7 @@ void f2fs_wait_discard_bio(struct f2fs_sb_info *sbi, block_t blkaddr) list_for_each_entry_safe(dc, tmp, wait_list, list) { if (dc->lstart <= blkaddr && blkaddr < dc->lstart + dc->len) { - if (dc->state == D_SUBMIT) - wait_for_completion_io(&dc->wait); + wait_for_completion_io(&dc->wait); __punch_discard_cmd(sbi, dc, blkaddr); } } @@ -947,8 +946,10 @@ repeat: } list_for_each_entry_safe(dc, tmp, wait_list, list) { - if (dc->state == D_DONE) + if (dc->state == D_DONE) { + wait_for_completion_io(&dc->wait); __remove_discard_cmd(sbi, dc); + } } mutex_unlock(&dcc->cmd_lock); -- cgit v1.2.3 From af381ca699eb25cb4ab153300b5c0b9d3c727bb2 Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Fri, 7 Apr 2017 14:33:22 -0700 Subject: f2fs: clean up get_valid_blocks with consistent parameter This patch cleans up get_valid_blocks, which has no functional change. Signed-off-by: Jaegeuk Kim --- fs/f2fs/segment.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'fs/f2fs/segment.c') diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index dc2737cfaa67..8ab152f209af 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -636,7 +636,7 @@ static void __remove_dirty_segment(struct f2fs_sb_info *sbi, unsigned int segno, if (test_and_clear_bit(segno, dirty_i->dirty_segmap[t])) dirty_i->nr_dirty[t]--; - if (get_valid_blocks(sbi, segno, sbi->segs_per_sec) == 0) + if (get_valid_blocks(sbi, segno, true) == 0) clear_bit(GET_SECNO(sbi, segno), dirty_i->victim_secmap); } @@ -657,7 +657,7 @@ static void locate_dirty_segment(struct f2fs_sb_info *sbi, unsigned int segno) mutex_lock(&dirty_i->seglist_lock); - valid_blocks = get_valid_blocks(sbi, segno, 0); + valid_blocks = get_valid_blocks(sbi, segno, false); if (valid_blocks == 0) { __locate_dirty_segment(sbi, segno, PRE); @@ -1188,7 +1188,7 @@ next: secno = GET_SECNO(sbi, start); start_segno = secno * sbi->segs_per_sec; if (!IS_CURSEC(sbi, secno) && - !get_valid_blocks(sbi, start, sbi->segs_per_sec)) + !get_valid_blocks(sbi, start, true)) f2fs_issue_discard(sbi, START_BLOCK(sbi, start_segno), sbi->segs_per_sec << sbi->log_blocks_per_seg); @@ -2938,7 +2938,7 @@ static void init_dirty_segmap(struct f2fs_sb_info *sbi) if (segno >= MAIN_SEGS(sbi)) break; offset = segno + 1; - valid_blocks = get_valid_blocks(sbi, segno, 0); + valid_blocks = get_valid_blocks(sbi, segno, false); if (valid_blocks == sbi->blocks_per_seg || !valid_blocks) continue; if (valid_blocks > sbi->blocks_per_seg) { -- cgit v1.2.3 From d1c1a744c455f1f76098792f9b1bdd682fc95dfe Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Fri, 7 Apr 2017 15:08:17 -0700 Subject: f2fs: clean up some macros in terms of GET_SEGNO This patch cleans several macros by introducing: - BLKS_PER_SEC - GET_SEC_FROM_SEG - GET_SEG_FROM_SEC - GET_ZONE_FROM_SEC - GET_ZONE_FROM_SEG Signed-off-by: Jaegeuk Kim --- fs/f2fs/segment.c | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) (limited to 'fs/f2fs/segment.c') diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index 8ab152f209af..40474e7c2033 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -637,7 +637,7 @@ static void __remove_dirty_segment(struct f2fs_sb_info *sbi, unsigned int segno, dirty_i->nr_dirty[t]--; if (get_valid_blocks(sbi, segno, true) == 0) - clear_bit(GET_SECNO(sbi, segno), + clear_bit(GET_SEC_FROM_SEG(sbi, segno), dirty_i->victim_secmap); } } @@ -1185,8 +1185,8 @@ void clear_prefree_segments(struct f2fs_sb_info *sbi, struct cp_control *cpc) continue; } next: - secno = GET_SECNO(sbi, start); - start_segno = secno * sbi->segs_per_sec; + secno = GET_SEC_FROM_SEG(sbi, start); + start_segno = GET_SEG_FROM_SEC(sbi, secno); if (!IS_CURSEC(sbi, secno) && !get_valid_blocks(sbi, start, true)) f2fs_issue_discard(sbi, START_BLOCK(sbi, start_segno), @@ -1541,8 +1541,8 @@ static void get_new_segment(struct f2fs_sb_info *sbi, struct free_segmap_info *free_i = FREE_I(sbi); unsigned int segno, secno, zoneno; unsigned int total_zones = MAIN_SECS(sbi) / sbi->secs_per_zone; - unsigned int hint = *newseg / sbi->segs_per_sec; - unsigned int old_zoneno = GET_ZONENO_FROM_SEGNO(sbi, *newseg); + unsigned int hint = GET_SEC_FROM_SEG(sbi, *newseg); + unsigned int old_zoneno = GET_ZONE_FROM_SEG(sbi, *newseg); unsigned int left_start = hint; bool init = true; int go_left = 0; @@ -1552,8 +1552,8 @@ static void get_new_segment(struct f2fs_sb_info *sbi, if (!new_sec && ((*newseg + 1) % sbi->segs_per_sec)) { segno = find_next_zero_bit(free_i->free_segmap, - (hint + 1) * sbi->segs_per_sec, *newseg + 1); - if (segno < (hint + 1) * sbi->segs_per_sec) + GET_SEG_FROM_SEC(sbi, hint + 1), *newseg + 1); + if (segno < GET_SEG_FROM_SEC(sbi, hint + 1)) goto got_it; } find_other_zone: @@ -1584,8 +1584,8 @@ find_other_zone: secno = left_start; skip_left: hint = secno; - segno = secno * sbi->segs_per_sec; - zoneno = secno / sbi->secs_per_zone; + segno = GET_SEG_FROM_SEC(sbi, secno); + zoneno = GET_ZONE_FROM_SEC(sbi, secno); /* give up on finding another zone */ if (!init) @@ -1629,7 +1629,7 @@ static void reset_curseg(struct f2fs_sb_info *sbi, int type, int modified) struct summary_footer *sum_footer; curseg->segno = curseg->next_segno; - curseg->zone = GET_ZONENO_FROM_SEGNO(sbi, curseg->segno); + curseg->zone = GET_ZONE_FROM_SEG(sbi, curseg->segno); curseg->next_blkoff = 0; curseg->next_segno = NULL_SEGNO; -- cgit v1.2.3 From fa3a914e8bf8f0f06e9423fc857ca4f5d44d00d3 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Mon, 27 Mar 2017 18:14:05 +0800 Subject: f2fs: shrink blk plug region Don't use blk plug covering area where there won't be any IOs being issued. Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/segment.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) (limited to 'fs/f2fs/segment.c') diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index 40474e7c2033..1e726e893eec 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -932,9 +932,8 @@ repeat: if (kthread_should_stop()) return 0; - blk_start_plug(&plug); - mutex_lock(&dcc->cmd_lock); + blk_start_plug(&plug); list_for_each_entry_safe(dc, tmp, pend_list, list) { f2fs_bug_on(sbi, dc->state != D_PREP); @@ -944,6 +943,7 @@ repeat: if (iter++ > DISCARD_ISSUE_RATE) break; } + blk_finish_plug(&plug); list_for_each_entry_safe(dc, tmp, wait_list, list) { if (dc->state == D_DONE) { @@ -953,8 +953,6 @@ repeat: } mutex_unlock(&dcc->cmd_lock); - blk_finish_plug(&plug); - iter = 0; congestion_wait(BLK_RW_SYNC, HZ/50); -- cgit v1.2.3 From 062eb908b28711e4c9323a79dfe69a265efd64d5 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Fri, 14 Apr 2017 23:24:55 +0800 Subject: f2fs: use rb-tree to track pending discard commands Introduce rb-tree based discard cache infrastructure to speed up lookup and merge operation of discard entry. Signed-off-by: Chao Yu [Jaegeuk Kim: initialize dc to avoid build warning] Signed-off-by: Jaegeuk Kim --- fs/f2fs/segment.c | 223 ++++++++++++++++++++++++++++++++++++++++++++---------- 1 file changed, 183 insertions(+), 40 deletions(-) (limited to 'fs/f2fs/segment.c') diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index 1e726e893eec..9adc3bcfb4f4 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -672,7 +672,7 @@ static void locate_dirty_segment(struct f2fs_sb_info *sbi, unsigned int segno) mutex_unlock(&dirty_i->seglist_lock); } -static void __add_discard_cmd(struct f2fs_sb_info *sbi, +static struct discard_cmd *__create_discard_cmd(struct f2fs_sb_info *sbi, struct block_device *bdev, block_t lstart, block_t start, block_t len) { @@ -689,18 +689,46 @@ static void __add_discard_cmd(struct f2fs_sb_info *sbi, dc->state = D_PREP; dc->error = 0; init_completion(&dc->wait); - - mutex_lock(&dcc->cmd_lock); list_add_tail(&dc->list, pend_list); - mutex_unlock(&dcc->cmd_lock); - atomic_inc(&dcc->discard_cmd_cnt); + + return dc; +} + +static struct discard_cmd *__attach_discard_cmd(struct f2fs_sb_info *sbi, + struct block_device *bdev, block_t lstart, + block_t start, block_t len, + struct rb_node *parent, struct rb_node **p) +{ + struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info; + struct discard_cmd *dc; + + dc = __create_discard_cmd(sbi, bdev, lstart, start, len); + + rb_link_node(&dc->rb_node, parent, p); + rb_insert_color(&dc->rb_node, &dcc->root); + + return dc; } -static void __remove_discard_cmd(struct f2fs_sb_info *sbi, struct discard_cmd *dc) +static void __detach_discard_cmd(struct discard_cmd_control *dcc, + struct discard_cmd *dc) { if (dc->state == D_DONE) - atomic_dec(&(SM_I(sbi)->dcc_info->issing_discard)); + atomic_dec(&dcc->issing_discard); + + list_del(&dc->list); + rb_erase(&dc->rb_node, &dcc->root); + + kmem_cache_free(discard_cmd_slab, dc); + + atomic_dec(&dcc->discard_cmd_cnt); +} + +static void __remove_discard_cmd(struct f2fs_sb_info *sbi, + struct discard_cmd *dc) +{ + struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info; if (dc->error == -EOPNOTSUPP) dc->error = 0; @@ -708,9 +736,7 @@ static void __remove_discard_cmd(struct f2fs_sb_info *sbi, struct discard_cmd *d if (dc->error) f2fs_msg(sbi->sb, KERN_INFO, "Issue discard failed, ret: %d", dc->error); - list_del(&dc->list); - kmem_cache_free(discard_cmd_slab, dc); - atomic_dec(&SM_I(sbi)->dcc_info->discard_cmd_cnt); + __detach_discard_cmd(dcc, dc); } static void f2fs_submit_discard_endio(struct bio *bio) @@ -833,62 +859,178 @@ static void __submit_discard_cmd(struct f2fs_sb_info *sbi, } } -static int __queue_discard_cmd(struct f2fs_sb_info *sbi, - struct block_device *bdev, block_t blkstart, block_t blklen) +static struct discard_cmd *__insert_discard_tree(struct f2fs_sb_info *sbi, + struct block_device *bdev, block_t lstart, + block_t start, block_t len, + struct rb_node **insert_p, + struct rb_node *insert_parent) { - block_t lblkstart = blkstart; + struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info; + struct rb_node **p = &dcc->root.rb_node; + struct rb_node *parent = NULL; + struct discard_cmd *dc = NULL; - trace_f2fs_issue_discard(bdev, blkstart, blklen); + if (insert_p && insert_parent) { + parent = insert_parent; + p = insert_p; + goto do_insert; + } - if (sbi->s_ndevs) { - int devi = f2fs_target_device_index(sbi, blkstart); + p = __lookup_rb_tree_for_insert(sbi, &dcc->root, &parent, lstart); +do_insert: + dc = __attach_discard_cmd(sbi, bdev, lstart, start, len, parent, p); + if (!dc) + return NULL; - blkstart -= FDEV(devi).start_blk; - } - __add_discard_cmd(sbi, bdev, lblkstart, blkstart, blklen); - wake_up(&SM_I(sbi)->dcc_info->discard_wait_queue); - return 0; + return dc; } static void __punch_discard_cmd(struct f2fs_sb_info *sbi, struct discard_cmd *dc, block_t blkaddr) { - block_t end_block = START_BLOCK(sbi, GET_SEGNO(sbi, blkaddr) + 1); + struct discard_info di = dc->di; + bool modified = false; - if (dc->state == D_DONE || dc->lstart + dc->len <= end_block) { + if (dc->state == D_DONE || dc->len == 1) { __remove_discard_cmd(sbi, dc); return; } - if (blkaddr - dc->lstart < dc->lstart + dc->len - end_block) { - dc->start += (end_block - dc->lstart); - dc->len -= (end_block - dc->lstart); - dc->lstart = end_block; - } else { + if (blkaddr > di.lstart) { dc->len = blkaddr - dc->lstart; + modified = true; + } + + if (blkaddr < di.lstart + di.len - 1) { + if (modified) { + __insert_discard_tree(sbi, dc->bdev, blkaddr + 1, + di.start + blkaddr + 1 - di.lstart, + di.lstart + di.len - 1 - blkaddr, + NULL, NULL); + } else { + dc->lstart++; + dc->len--; + dc->start++; + } } } -/* This should be covered by global mutex, &sit_i->sentry_lock */ -void f2fs_wait_discard_bio(struct f2fs_sb_info *sbi, block_t blkaddr) +static void __update_discard_tree_range(struct f2fs_sb_info *sbi, + struct block_device *bdev, block_t lstart, + block_t start, block_t len) { struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info; - struct list_head *pend_list = &(dcc->discard_pend_list); - struct list_head *wait_list = &(dcc->discard_wait_list); - struct discard_cmd *dc, *tmp; + struct discard_cmd *prev_dc = NULL, *next_dc = NULL; + struct discard_cmd *dc; + struct discard_info di = {0}; + struct rb_node **insert_p = NULL, *insert_parent = NULL; + block_t end = lstart + len; mutex_lock(&dcc->cmd_lock); - list_for_each_entry_safe(dc, tmp, pend_list, list) { - if (dc->lstart <= blkaddr && blkaddr < dc->lstart + dc->len) - __punch_discard_cmd(sbi, dc, blkaddr); + dc = (struct discard_cmd *)__lookup_rb_tree_ret(&dcc->root, + NULL, lstart, + (struct rb_entry **)&prev_dc, + (struct rb_entry **)&next_dc, + &insert_p, &insert_parent, true); + if (dc) + prev_dc = dc; + + if (!prev_dc) { + di.lstart = lstart; + di.len = next_dc ? next_dc->lstart - lstart : len; + di.len = min(di.len, len); + di.start = start; } - list_for_each_entry_safe(dc, tmp, wait_list, list) { - if (dc->lstart <= blkaddr && blkaddr < dc->lstart + dc->len) { - wait_for_completion_io(&dc->wait); - __punch_discard_cmd(sbi, dc, blkaddr); + while (1) { + struct rb_node *node; + bool merged = false; + struct discard_cmd *tdc = NULL; + + if (prev_dc) { + di.lstart = prev_dc->lstart + prev_dc->len; + if (di.lstart < lstart) + di.lstart = lstart; + if (di.lstart >= end) + break; + + if (!next_dc || next_dc->lstart > end) + di.len = end - di.lstart; + else + di.len = next_dc->lstart - di.lstart; + di.start = start + di.lstart - lstart; + } + + if (!di.len) + goto next; + + if (prev_dc && prev_dc->state == D_PREP && + prev_dc->bdev == bdev && + __is_discard_back_mergeable(&di, &prev_dc->di)) { + prev_dc->di.len += di.len; + di = prev_dc->di; + tdc = prev_dc; + merged = true; + } + + if (next_dc && next_dc->state == D_PREP && + next_dc->bdev == bdev && + __is_discard_front_mergeable(&di, &next_dc->di)) { + next_dc->di.lstart = di.lstart; + next_dc->di.len += di.len; + next_dc->di.start = di.start; + if (tdc) + __remove_discard_cmd(sbi, tdc); + + merged = true; } + + if (!merged) + __insert_discard_tree(sbi, bdev, di.lstart, di.start, + di.len, NULL, NULL); + next: + prev_dc = next_dc; + if (!prev_dc) + break; + + node = rb_next(&prev_dc->rb_node); + next_dc = rb_entry_safe(node, struct discard_cmd, rb_node); + } + + mutex_unlock(&dcc->cmd_lock); +} + +static int __queue_discard_cmd(struct f2fs_sb_info *sbi, + struct block_device *bdev, block_t blkstart, block_t blklen) +{ + block_t lblkstart = blkstart; + + trace_f2fs_issue_discard(bdev, blkstart, blklen); + + if (sbi->s_ndevs) { + int devi = f2fs_target_device_index(sbi, blkstart); + + blkstart -= FDEV(devi).start_blk; + } + __update_discard_tree_range(sbi, bdev, lblkstart, blkstart, blklen); + wake_up(&SM_I(sbi)->dcc_info->discard_wait_queue); + return 0; +} + +/* This should be covered by global mutex, &sit_i->sentry_lock */ +void f2fs_wait_discard_bio(struct f2fs_sb_info *sbi, block_t blkaddr) +{ + struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info; + struct discard_cmd *dc; + + mutex_lock(&dcc->cmd_lock); + + dc = (struct discard_cmd *)__lookup_rb_tree(&dcc->root, NULL, blkaddr); + if (dc) { + if (dc->state != D_PREP) + wait_for_completion_io(&dc->wait); + __punch_discard_cmd(sbi, dc, blkaddr); } mutex_unlock(&dcc->cmd_lock); @@ -1257,6 +1399,7 @@ static int create_discard_cmd_control(struct f2fs_sb_info *sbi) atomic_set(&dcc->discard_cmd_cnt, 0); dcc->nr_discards = 0; dcc->max_discards = 0; + dcc->root = RB_ROOT; init_waitqueue_head(&dcc->discard_wait_queue); SM_I(sbi)->dcc_info = dcc; -- cgit v1.2.3 From 2814d83ec77245d9855b2b053fcab47ae23614d9 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Sat, 15 Apr 2017 14:09:36 +0800 Subject: f2fs: clean up discard_cmd_control structure Avoid long variable name in discard_cmd_control structure, no logic change. Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim Conflicts: fs/f2fs/segment.c --- fs/f2fs/segment.c | 24 ++++++++++++------------ 1 file changed, 12 insertions(+), 12 deletions(-) (limited to 'fs/f2fs/segment.c') diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index 9adc3bcfb4f4..d237efa523ca 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -677,7 +677,7 @@ static struct discard_cmd *__create_discard_cmd(struct f2fs_sb_info *sbi, block_t start, block_t len) { struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info; - struct list_head *pend_list = &(dcc->discard_pend_list); + struct list_head *pend_list = &(dcc->pend_list); struct discard_cmd *dc; dc = f2fs_kmem_cache_alloc(discard_cmd_slab, GFP_NOFS); @@ -852,7 +852,7 @@ static void __submit_discard_cmd(struct f2fs_sb_info *sbi, bio->bi_private = dc; bio->bi_end_io = f2fs_submit_discard_endio; submit_bio(REQ_SYNC, bio); - list_move_tail(&dc->list, &dcc->discard_wait_list); + list_move_tail(&dc->list, &dcc->wait_list); } } else { __remove_discard_cmd(sbi, dc); @@ -1040,8 +1040,8 @@ void f2fs_wait_discard_bio(struct f2fs_sb_info *sbi, block_t blkaddr) void f2fs_wait_discard_bios(struct f2fs_sb_info *sbi) { struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info; - struct list_head *pend_list = &(dcc->discard_pend_list); - struct list_head *wait_list = &(dcc->discard_wait_list); + struct list_head *pend_list = &(dcc->pend_list); + struct list_head *wait_list = &(dcc->wait_list); struct discard_cmd *dc, *tmp; struct blk_plug plug; @@ -1065,8 +1065,8 @@ static int issue_discard_thread(void *data) struct f2fs_sb_info *sbi = data; struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info; wait_queue_head_t *q = &dcc->discard_wait_queue; - struct list_head *pend_list = &dcc->discard_pend_list; - struct list_head *wait_list = &dcc->discard_wait_list; + struct list_head *pend_list = &dcc->pend_list; + struct list_head *wait_list = &dcc->wait_list; struct discard_cmd *dc, *tmp; struct blk_plug plug; int iter = 0; @@ -1214,7 +1214,7 @@ static bool add_discard_addrs(struct f2fs_sb_info *sbi, struct cp_control *cpc, unsigned int start = 0, end = -1; bool force = (cpc->reason == CP_DISCARD); struct discard_entry *de = NULL; - struct list_head *head = &SM_I(sbi)->dcc_info->discard_entry_list; + struct list_head *head = &SM_I(sbi)->dcc_info->entry_list; int i; if (se->valid_blocks == max_blocks || !f2fs_discard_en(sbi)) @@ -1263,7 +1263,7 @@ static bool add_discard_addrs(struct f2fs_sb_info *sbi, struct cp_control *cpc, void release_discard_addrs(struct f2fs_sb_info *sbi) { - struct list_head *head = &(SM_I(sbi)->dcc_info->discard_entry_list); + struct list_head *head = &(SM_I(sbi)->dcc_info->entry_list); struct discard_entry *entry, *this; /* drop caches */ @@ -1289,7 +1289,7 @@ static void set_prefree_as_free_segments(struct f2fs_sb_info *sbi) void clear_prefree_segments(struct f2fs_sb_info *sbi, struct cp_control *cpc) { - struct list_head *head = &(SM_I(sbi)->dcc_info->discard_entry_list); + struct list_head *head = &(SM_I(sbi)->dcc_info->entry_list); struct discard_entry *entry, *this; struct dirty_seglist_info *dirty_i = DIRTY_I(sbi); unsigned long *prefree_map = dirty_i->dirty_segmap[PRE]; @@ -1390,9 +1390,9 @@ static int create_discard_cmd_control(struct f2fs_sb_info *sbi) if (!dcc) return -ENOMEM; - INIT_LIST_HEAD(&dcc->discard_entry_list); - INIT_LIST_HEAD(&dcc->discard_pend_list); - INIT_LIST_HEAD(&dcc->discard_wait_list); + INIT_LIST_HEAD(&dcc->entry_list); + INIT_LIST_HEAD(&dcc->pend_list); + INIT_LIST_HEAD(&dcc->wait_list); mutex_init(&dcc->cmd_lock); atomic_set(&dcc->issued_discard, 0); atomic_set(&dcc->issing_discard, 0); -- cgit v1.2.3 From 0e9f98f97b6871261a5fd7420402039cc6a77198 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Sat, 15 Apr 2017 14:09:37 +0800 Subject: f2fs: in prior to issue big discard Keep issuing big size discard in prior instead of the one with random size, so that we expect that it will help to: - be quick to recycle unused large space in flash storage device. - give a chance for a) wait to merge small piece discards into bigger one, or b) avoid issuing discards while they have being reallocated by SSR. Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/segment.c | 54 +++++++++++++++++++++++++++++++++++++++--------------- 1 file changed, 39 insertions(+), 15 deletions(-) (limited to 'fs/f2fs/segment.c') diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index d237efa523ca..c77037521dfe 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -677,9 +677,13 @@ static struct discard_cmd *__create_discard_cmd(struct f2fs_sb_info *sbi, block_t start, block_t len) { struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info; - struct list_head *pend_list = &(dcc->pend_list); + struct list_head *pend_list; struct discard_cmd *dc; + f2fs_bug_on(sbi, !len); + + pend_list = &dcc->pend_list[plist_idx(len)]; + dc = f2fs_kmem_cache_alloc(discard_cmd_slab, GFP_NOFS); INIT_LIST_HEAD(&dc->list); dc->bdev = bdev; @@ -885,9 +889,16 @@ do_insert: return dc; } +static void __relocate_discard_cmd(struct discard_cmd_control *dcc, + struct discard_cmd *dc) +{ + list_move_tail(&dc->list, &dcc->pend_list[plist_idx(dc->len)]); +} + static void __punch_discard_cmd(struct f2fs_sb_info *sbi, struct discard_cmd *dc, block_t blkaddr) { + struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info; struct discard_info di = dc->di; bool modified = false; @@ -898,6 +909,7 @@ static void __punch_discard_cmd(struct f2fs_sb_info *sbi, if (blkaddr > di.lstart) { dc->len = blkaddr - dc->lstart; + __relocate_discard_cmd(dcc, dc); modified = true; } @@ -911,6 +923,7 @@ static void __punch_discard_cmd(struct f2fs_sb_info *sbi, dc->lstart++; dc->len--; dc->start++; + __relocate_discard_cmd(dcc, dc); } } } @@ -969,6 +982,7 @@ static void __update_discard_tree_range(struct f2fs_sb_info *sbi, prev_dc->bdev == bdev && __is_discard_back_mergeable(&di, &prev_dc->di)) { prev_dc->di.len += di.len; + __relocate_discard_cmd(dcc, prev_dc); di = prev_dc->di; tdc = prev_dc; merged = true; @@ -980,6 +994,7 @@ static void __update_discard_tree_range(struct f2fs_sb_info *sbi, next_dc->di.lstart = di.lstart; next_dc->di.len += di.len; next_dc->di.start = di.start; + __relocate_discard_cmd(dcc, next_dc); if (tdc) __remove_discard_cmd(sbi, tdc); @@ -1040,16 +1055,20 @@ void f2fs_wait_discard_bio(struct f2fs_sb_info *sbi, block_t blkaddr) void f2fs_wait_discard_bios(struct f2fs_sb_info *sbi) { struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info; - struct list_head *pend_list = &(dcc->pend_list); + struct list_head *pend_list; struct list_head *wait_list = &(dcc->wait_list); struct discard_cmd *dc, *tmp; struct blk_plug plug; + int i; mutex_lock(&dcc->cmd_lock); blk_start_plug(&plug); - list_for_each_entry_safe(dc, tmp, pend_list, list) - __submit_discard_cmd(sbi, dc); + for (i = 0; i < MAX_PLIST_NUM; i++) { + pend_list = &dcc->pend_list[i]; + list_for_each_entry_safe(dc, tmp, pend_list, list) + __submit_discard_cmd(sbi, dc); + } blk_finish_plug(&plug); list_for_each_entry_safe(dc, tmp, wait_list, list) { @@ -1065,26 +1084,30 @@ static int issue_discard_thread(void *data) struct f2fs_sb_info *sbi = data; struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info; wait_queue_head_t *q = &dcc->discard_wait_queue; - struct list_head *pend_list = &dcc->pend_list; + struct list_head *pend_list; struct list_head *wait_list = &dcc->wait_list; struct discard_cmd *dc, *tmp; struct blk_plug plug; - int iter = 0; + int iter = 0, i; repeat: if (kthread_should_stop()) return 0; mutex_lock(&dcc->cmd_lock); blk_start_plug(&plug); - list_for_each_entry_safe(dc, tmp, pend_list, list) { - f2fs_bug_on(sbi, dc->state != D_PREP); + for (i = MAX_PLIST_NUM - 1; i >= 0; i--) { + pend_list = &dcc->pend_list[i]; + list_for_each_entry_safe(dc, tmp, pend_list, list) { + f2fs_bug_on(sbi, dc->state != D_PREP); - if (is_idle(sbi)) - __submit_discard_cmd(sbi, dc); + if (is_idle(sbi)) + __submit_discard_cmd(sbi, dc); - if (iter++ > DISCARD_ISSUE_RATE) - break; + if (iter++ > DISCARD_ISSUE_RATE) + goto next_step; + } } +next_step: blk_finish_plug(&plug); list_for_each_entry_safe(dc, tmp, wait_list, list) { @@ -1099,7 +1122,7 @@ repeat: congestion_wait(BLK_RW_SYNC, HZ/50); wait_event_interruptible(*q, kthread_should_stop() || - !list_empty(pend_list) || !list_empty(wait_list)); + atomic_read(&dcc->discard_cmd_cnt)); goto repeat; } @@ -1379,7 +1402,7 @@ static int create_discard_cmd_control(struct f2fs_sb_info *sbi) { dev_t dev = sbi->sb->s_bdev->bd_dev; struct discard_cmd_control *dcc; - int err = 0; + int err = 0, i; if (SM_I(sbi)->dcc_info) { dcc = SM_I(sbi)->dcc_info; @@ -1391,7 +1414,8 @@ static int create_discard_cmd_control(struct f2fs_sb_info *sbi) return -ENOMEM; INIT_LIST_HEAD(&dcc->entry_list); - INIT_LIST_HEAD(&dcc->pend_list); + for (i = 0; i < MAX_PLIST_NUM; i++) + INIT_LIST_HEAD(&dcc->pend_list[i]); INIT_LIST_HEAD(&dcc->wait_list); mutex_init(&dcc->cmd_lock); atomic_set(&dcc->issued_discard, 0); -- cgit v1.2.3 From 048fe2a0a94798c9708f4ad3a0d3bcdd095f5da8 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Sat, 15 Apr 2017 14:09:38 +0800 Subject: f2fs: trace __submit_discard_cmd Add an even class f2fs_discard for introducing f2fs_queue_discard, then use f2fs_{queue,issue}_discard to trace __{queue,submit}_discard_cmd. Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/segment.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'fs/f2fs/segment.c') diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index c77037521dfe..e44fea9e2205 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -843,6 +843,8 @@ static void __submit_discard_cmd(struct f2fs_sb_info *sbi, if (dc->state != D_PREP) return; + trace_f2fs_issue_discard(dc->bdev, dc->start, dc->len); + dc->error = __blkdev_issue_discard(dc->bdev, SECTOR_FROM_BLOCK(dc->start), SECTOR_FROM_BLOCK(dc->len), @@ -1021,7 +1023,7 @@ static int __queue_discard_cmd(struct f2fs_sb_info *sbi, { block_t lblkstart = blkstart; - trace_f2fs_issue_discard(bdev, blkstart, blklen); + trace_f2fs_queue_discard(bdev, blkstart, blklen); if (sbi->s_ndevs) { int devi = f2fs_target_device_index(sbi, blkstart); -- cgit v1.2.3 From 73d23680deb6dada7423e3d7656ca8fd743cc58e Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Mon, 17 Apr 2017 18:21:43 +0800 Subject: f2fs: introduce __check_rb_tree_consistence Introduce __check_rb_tree_consistence to check consistence of rb-tree based discard cache in runtime. Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/segment.c | 15 +++++++++++++-- 1 file changed, 13 insertions(+), 2 deletions(-) (limited to 'fs/f2fs/segment.c') diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index e44fea9e2205..df0bb6c4bb90 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -912,6 +912,7 @@ static void __punch_discard_cmd(struct f2fs_sb_info *sbi, if (blkaddr > di.lstart) { dc->len = blkaddr - dc->lstart; __relocate_discard_cmd(dcc, dc); + f2fs_bug_on(sbi, !__check_rb_tree_consistence(sbi, &dcc->root)); modified = true; } @@ -921,11 +922,15 @@ static void __punch_discard_cmd(struct f2fs_sb_info *sbi, di.start + blkaddr + 1 - di.lstart, di.lstart + di.len - 1 - blkaddr, NULL, NULL); + f2fs_bug_on(sbi, + !__check_rb_tree_consistence(sbi, &dcc->root)); } else { dc->lstart++; dc->len--; dc->start++; __relocate_discard_cmd(dcc, dc); + f2fs_bug_on(sbi, + !__check_rb_tree_consistence(sbi, &dcc->root)); } } } @@ -985,6 +990,8 @@ static void __update_discard_tree_range(struct f2fs_sb_info *sbi, __is_discard_back_mergeable(&di, &prev_dc->di)) { prev_dc->di.len += di.len; __relocate_discard_cmd(dcc, prev_dc); + f2fs_bug_on(sbi, + !__check_rb_tree_consistence(sbi, &dcc->root)); di = prev_dc->di; tdc = prev_dc; merged = true; @@ -999,13 +1006,17 @@ static void __update_discard_tree_range(struct f2fs_sb_info *sbi, __relocate_discard_cmd(dcc, next_dc); if (tdc) __remove_discard_cmd(sbi, tdc); - + f2fs_bug_on(sbi, + !__check_rb_tree_consistence(sbi, &dcc->root)); merged = true; } - if (!merged) + if (!merged) { __insert_discard_tree(sbi, bdev, di.lstart, di.start, di.len, NULL, NULL); + f2fs_bug_on(sbi, + !__check_rb_tree_consistence(sbi, &dcc->root)); + } next: prev_dc = next_dc; if (!prev_dc) -- cgit v1.2.3 From b88a1ae0f2d2baa6aab582fc22c7af238547a1e3 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Tue, 18 Apr 2017 19:27:39 +0800 Subject: f2fs: add undiscard blocks stat This patch adds to account undiscard blocks. Signed-off-by: Chao Yu --- fs/f2fs/segment.c | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'fs/f2fs/segment.c') diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index df0bb6c4bb90..ba46aa20db1a 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -695,6 +695,7 @@ static struct discard_cmd *__create_discard_cmd(struct f2fs_sb_info *sbi, init_completion(&dc->wait); list_add_tail(&dc->list, pend_list); atomic_inc(&dcc->discard_cmd_cnt); + dcc->undiscard_blks += len; return dc; } @@ -723,6 +724,7 @@ static void __detach_discard_cmd(struct discard_cmd_control *dcc, list_del(&dc->list); rb_erase(&dc->rb_node, &dcc->root); + dcc->undiscard_blks -= dc->len; kmem_cache_free(discard_cmd_slab, dc); @@ -909,8 +911,11 @@ static void __punch_discard_cmd(struct f2fs_sb_info *sbi, return; } + dcc->undiscard_blks -= di.len; + if (blkaddr > di.lstart) { dc->len = blkaddr - dc->lstart; + dcc->undiscard_blks += dc->len; __relocate_discard_cmd(dcc, dc); f2fs_bug_on(sbi, !__check_rb_tree_consistence(sbi, &dcc->root)); modified = true; @@ -928,6 +933,7 @@ static void __punch_discard_cmd(struct f2fs_sb_info *sbi, dc->lstart++; dc->len--; dc->start++; + dcc->undiscard_blks += dc->len; __relocate_discard_cmd(dcc, dc); f2fs_bug_on(sbi, !__check_rb_tree_consistence(sbi, &dcc->root)); @@ -989,6 +995,7 @@ static void __update_discard_tree_range(struct f2fs_sb_info *sbi, prev_dc->bdev == bdev && __is_discard_back_mergeable(&di, &prev_dc->di)) { prev_dc->di.len += di.len; + dcc->undiscard_blks += di.len; __relocate_discard_cmd(dcc, prev_dc); f2fs_bug_on(sbi, !__check_rb_tree_consistence(sbi, &dcc->root)); @@ -1003,6 +1010,7 @@ static void __update_discard_tree_range(struct f2fs_sb_info *sbi, next_dc->di.lstart = di.lstart; next_dc->di.len += di.len; next_dc->di.start = di.start; + dcc->undiscard_blks += di.len; __relocate_discard_cmd(dcc, next_dc); if (tdc) __remove_discard_cmd(sbi, tdc); @@ -1436,6 +1444,7 @@ static int create_discard_cmd_control(struct f2fs_sb_info *sbi) atomic_set(&dcc->discard_cmd_cnt, 0); dcc->nr_discards = 0; dcc->max_discards = 0; + dcc->undiscard_blks = 0; dcc->root = RB_ROOT; init_waitqueue_head(&dcc->discard_wait_queue); -- cgit v1.2.3 From 1b73445838adfda63d6bba040e3aec87b7d9818a Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Thu, 13 Apr 2017 15:17:00 -0700 Subject: f2fs: add ioctl to flush data from faster device to cold area This patch adds an ioctl to flush data in faster device to cold area. User can give device number and number of segments to move. It doesn't move it if there is only one device. The parameter looks like: struct f2fs_flush_device { u32 dev_num; /* device number to flush */ u32 segments; /* # of segments to flush */ }; Signed-off-by: Jaegeuk Kim --- fs/f2fs/segment.c | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) (limited to 'fs/f2fs/segment.c') diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index ba46aa20db1a..888bde8cec34 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -401,7 +401,7 @@ void f2fs_balance_fs(struct f2fs_sb_info *sbi, bool need) */ if (has_not_enough_free_secs(sbi, 0, 0)) { mutex_lock(&sbi->gc_mutex); - f2fs_gc(sbi, false, false); + f2fs_gc(sbi, false, false, NULL_SEGNO); } } @@ -1834,6 +1834,8 @@ static unsigned int __get_next_segno(struct f2fs_sb_info *sbi, int type) if (type == CURSEG_HOT_DATA || IS_NODESEG(type)) return 0; + if (SIT_I(sbi)->last_victim[ALLOC_NEXT]) + return SIT_I(sbi)->last_victim[ALLOC_NEXT]; return CURSEG_I(sbi, type)->segno; } @@ -1931,12 +1933,15 @@ static int get_ssr_segment(struct f2fs_sb_info *sbi, int type) { struct curseg_info *curseg = CURSEG_I(sbi, type); const struct victim_selection *v_ops = DIRTY_I(sbi)->v_ops; + unsigned segno = NULL_SEGNO; int i, cnt; bool reversed = false; /* need_SSR() already forces to do this */ - if (v_ops->get_victim(sbi, &(curseg)->next_segno, BG_GC, type, SSR)) + if (v_ops->get_victim(sbi, &segno, BG_GC, type, SSR)) { + curseg->next_segno = segno; return 1; + } /* For node segments, let's do SSR more intensively */ if (IS_NODESEG(type)) { @@ -1960,9 +1965,10 @@ static int get_ssr_segment(struct f2fs_sb_info *sbi, int type) for (; cnt-- > 0; reversed ? i-- : i++) { if (i == type) continue; - if (v_ops->get_victim(sbi, &(curseg)->next_segno, - BG_GC, i, SSR)) + if (v_ops->get_victim(sbi, &segno, BG_GC, i, SSR)) { + curseg->next_segno = segno; return 1; + } } return 0; } -- cgit v1.2.3 From 0cfd113b84607c3827f01049bc9cd75559a906d1 Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Thu, 20 Apr 2017 13:51:57 -0700 Subject: f2fs: fix out-of free segments This patch also reverts d0db7703ac1 ("f2fs: do SSR in higher priority"). This patch fixes out of free segments caused by many small file creation by 1) mkfs -s 1 2G 2) mount 3) untar - preoduce 60000 small files burstly 4) sync - flush node pages - flush imeta Here, when we do f2fs_balance_fs, we missed # of imeta blocks, resulting in skipping to check has_not_enough_free_secs. Another test is done by 1) mkfs -s 12 2G 2) mount 3) untar - preoduce 60000 small files burstly 4) sync - flush node pages - flush imeta In this case, this patch also fixes wrong block allocation under large section size. Reported-by: William Brana Cc: Signed-off-by: Jaegeuk Kim --- fs/f2fs/segment.c | 26 +++++++++++++++++++++----- 1 file changed, 21 insertions(+), 5 deletions(-) (limited to 'fs/f2fs/segment.c') diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index 888bde8cec34..f5dbb6ef8390 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -388,11 +388,8 @@ void f2fs_balance_fs(struct f2fs_sb_info *sbi, bool need) } #endif - if (!need) - return; - /* balance_fs_bg is able to be pending */ - if (excess_cached_nats(sbi)) + if (need && excess_cached_nats(sbi)) f2fs_balance_fs_bg(sbi); /* @@ -1718,6 +1715,17 @@ static void write_current_sum_page(struct f2fs_sb_info *sbi, f2fs_put_page(page, 1); } +static int is_next_segment_free(struct f2fs_sb_info *sbi, int type) +{ + struct curseg_info *curseg = CURSEG_I(sbi, type); + unsigned int segno = curseg->segno + 1; + struct free_segmap_info *free_i = FREE_I(sbi); + + if (segno < MAIN_SEGS(sbi) && segno % sbi->segs_per_sec) + return !test_bit(segno, free_i->free_segmap); + return 0; +} + /* * Find a new segment from the free segments bitmap to right order * This function should be returned with success, otherwise BUG @@ -1831,6 +1839,10 @@ static void reset_curseg(struct f2fs_sb_info *sbi, int type, int modified) static unsigned int __get_next_segno(struct f2fs_sb_info *sbi, int type) { + /* if segs_per_sec is large than 1, we need to keep original policy. */ + if (sbi->segs_per_sec != 1) + return CURSEG_I(sbi, type)->segno; + if (type == CURSEG_HOT_DATA || IS_NODESEG(type)) return 0; @@ -1980,17 +1992,21 @@ static int get_ssr_segment(struct f2fs_sb_info *sbi, int type) static void allocate_segment_by_default(struct f2fs_sb_info *sbi, int type, bool force) { + struct curseg_info *curseg = CURSEG_I(sbi, type); + if (force) new_curseg(sbi, type, true); else if (!is_set_ckpt_flags(sbi, CP_CRC_RECOVERY_FLAG) && type == CURSEG_WARM_NODE) new_curseg(sbi, type, false); + else if (curseg->alloc_type == LFS && is_next_segment_free(sbi, type)) + new_curseg(sbi, type, false); else if (need_SSR(sbi) && get_ssr_segment(sbi, type)) change_curseg(sbi, type, true); else new_curseg(sbi, type, false); - stat_inc_seg_type(sbi, CURSEG_I(sbi, type)); + stat_inc_seg_type(sbi, curseg); } void allocate_new_segments(struct f2fs_sb_info *sbi) -- cgit v1.2.3 From e7a9ce2e7cc68d29ed27926a40eafb33c9bb62e0 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Tue, 25 Apr 2017 00:21:34 +0800 Subject: f2fs: delay awaking discard thread It's better to delay awaking discard thread while queuing discard commands in checkpoint, it will help to give more chances for merging big and small discard. Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/segment.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'fs/f2fs/segment.c') diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index f5dbb6ef8390..93c6d8a00722 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -1047,7 +1047,6 @@ static int __queue_discard_cmd(struct f2fs_sb_info *sbi, blkstart -= FDEV(devi).start_blk; } __update_discard_tree_range(sbi, bdev, lblkstart, blkstart, blklen); - wake_up(&SM_I(sbi)->dcc_info->discard_wait_queue); return 0; } @@ -1414,6 +1413,8 @@ skip: SM_I(sbi)->dcc_info->nr_discards -= total_len; kmem_cache_free(discard_entry_slab, entry); } + + wake_up(&SM_I(sbi)->dcc_info->discard_wait_queue); } static int create_discard_cmd_control(struct f2fs_sb_info *sbi) -- cgit v1.2.3 From 9170805a6362eb449000f3e47a4a9c39e0f0dd8a Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Tue, 25 Apr 2017 00:21:35 +0800 Subject: f2fs: enable small discard by default This patch start to enable 4K granularity small discard by default when realtime discard is on, so, in seriously fragmented space, small size discard can be issued in time to avoid useless storage space occupying of invalid filesystem's data, then performance of flash storage can be recovered. Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/segment.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs/f2fs/segment.c') diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index 93c6d8a00722..cc617da64d38 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -1441,7 +1441,7 @@ static int create_discard_cmd_control(struct f2fs_sb_info *sbi) atomic_set(&dcc->issing_discard, 0); atomic_set(&dcc->discard_cmd_cnt, 0); dcc->nr_discards = 0; - dcc->max_discards = 0; + dcc->max_discards = MAIN_SEGS(sbi) << sbi->log_blocks_per_seg; dcc->undiscard_blks = 0; dcc->root = RB_ROOT; -- cgit v1.2.3 From 72b8a76169d7b2743479a693f4b15d36ccf70e0c Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Tue, 25 Apr 2017 20:21:37 +0800 Subject: f2fs: introduce __issue_discard_cmd Just cleanup, no logic change. Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/segment.c | 63 ++++++++++++++++++++++++++----------------------------- 1 file changed, 30 insertions(+), 33 deletions(-) (limited to 'fs/f2fs/segment.c') diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index cc617da64d38..b49818dd02c4 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -1050,6 +1050,32 @@ static int __queue_discard_cmd(struct f2fs_sb_info *sbi, return 0; } +static void __issue_discard_cmd(struct f2fs_sb_info *sbi, bool issue_cond) +{ + struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info; + struct list_head *pend_list; + struct discard_cmd *dc, *tmp; + struct blk_plug plug; + int i, iter = 0; + + mutex_lock(&dcc->cmd_lock); + blk_start_plug(&plug); + for (i = MAX_PLIST_NUM - 1; i >= 0; i--) { + pend_list = &dcc->pend_list[i]; + list_for_each_entry_safe(dc, tmp, pend_list, list) { + f2fs_bug_on(sbi, dc->state != D_PREP); + + if (!issue_cond || is_idle(sbi)) + __submit_discard_cmd(sbi, dc); + if (issue_cond && iter++ > DISCARD_ISSUE_RATE) + goto out; + } + } +out: + blk_finish_plug(&plug); + mutex_unlock(&dcc->cmd_lock); +} + /* This should be covered by global mutex, &sit_i->sentry_lock */ void f2fs_wait_discard_bio(struct f2fs_sb_info *sbi, block_t blkaddr) { @@ -1072,27 +1098,16 @@ void f2fs_wait_discard_bio(struct f2fs_sb_info *sbi, block_t blkaddr) void f2fs_wait_discard_bios(struct f2fs_sb_info *sbi) { struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info; - struct list_head *pend_list; struct list_head *wait_list = &(dcc->wait_list); struct discard_cmd *dc, *tmp; - struct blk_plug plug; - int i; - mutex_lock(&dcc->cmd_lock); - - blk_start_plug(&plug); - for (i = 0; i < MAX_PLIST_NUM; i++) { - pend_list = &dcc->pend_list[i]; - list_for_each_entry_safe(dc, tmp, pend_list, list) - __submit_discard_cmd(sbi, dc); - } - blk_finish_plug(&plug); + __issue_discard_cmd(sbi, false); + mutex_lock(&dcc->cmd_lock); list_for_each_entry_safe(dc, tmp, wait_list, list) { wait_for_completion_io(&dc->wait); __remove_discard_cmd(sbi, dc); } - mutex_unlock(&dcc->cmd_lock); } @@ -1101,32 +1116,15 @@ static int issue_discard_thread(void *data) struct f2fs_sb_info *sbi = data; struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info; wait_queue_head_t *q = &dcc->discard_wait_queue; - struct list_head *pend_list; struct list_head *wait_list = &dcc->wait_list; struct discard_cmd *dc, *tmp; - struct blk_plug plug; - int iter = 0, i; repeat: if (kthread_should_stop()) return 0; - mutex_lock(&dcc->cmd_lock); - blk_start_plug(&plug); - for (i = MAX_PLIST_NUM - 1; i >= 0; i--) { - pend_list = &dcc->pend_list[i]; - list_for_each_entry_safe(dc, tmp, pend_list, list) { - f2fs_bug_on(sbi, dc->state != D_PREP); - - if (is_idle(sbi)) - __submit_discard_cmd(sbi, dc); - - if (iter++ > DISCARD_ISSUE_RATE) - goto next_step; - } - } -next_step: - blk_finish_plug(&plug); + __issue_discard_cmd(sbi, true); + mutex_lock(&dcc->cmd_lock); list_for_each_entry_safe(dc, tmp, wait_list, list) { if (dc->state == D_DONE) { wait_for_completion_io(&dc->wait); @@ -1135,7 +1133,6 @@ next_step: } mutex_unlock(&dcc->cmd_lock); - iter = 0; congestion_wait(BLK_RW_SYNC, HZ/50); wait_event_interruptible(*q, kthread_should_stop() || -- cgit v1.2.3 From 6cd09438a3311c769442e877eb5fc1ae32ccc3e7 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Tue, 25 Apr 2017 20:21:38 +0800 Subject: f2fs: introduce __wait_discard_cmd Just cleanup, no logic change. Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/segment.c | 40 ++++++++++++++++++---------------------- 1 file changed, 18 insertions(+), 22 deletions(-) (limited to 'fs/f2fs/segment.c') diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index b49818dd02c4..a0a0592e1681 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -1076,6 +1076,22 @@ out: mutex_unlock(&dcc->cmd_lock); } +static void __wait_discard_cmd(struct f2fs_sb_info *sbi, bool wait_cond) +{ + struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info; + struct list_head *wait_list = &(dcc->wait_list); + struct discard_cmd *dc, *tmp; + + mutex_lock(&dcc->cmd_lock); + list_for_each_entry_safe(dc, tmp, wait_list, list) { + if (!wait_cond || dc->state == D_DONE) { + wait_for_completion_io(&dc->wait); + __remove_discard_cmd(sbi, dc); + } + } + mutex_unlock(&dcc->cmd_lock); +} + /* This should be covered by global mutex, &sit_i->sentry_lock */ void f2fs_wait_discard_bio(struct f2fs_sb_info *sbi, block_t blkaddr) { @@ -1097,18 +1113,8 @@ void f2fs_wait_discard_bio(struct f2fs_sb_info *sbi, block_t blkaddr) /* This comes from f2fs_put_super */ void f2fs_wait_discard_bios(struct f2fs_sb_info *sbi) { - struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info; - struct list_head *wait_list = &(dcc->wait_list); - struct discard_cmd *dc, *tmp; - __issue_discard_cmd(sbi, false); - - mutex_lock(&dcc->cmd_lock); - list_for_each_entry_safe(dc, tmp, wait_list, list) { - wait_for_completion_io(&dc->wait); - __remove_discard_cmd(sbi, dc); - } - mutex_unlock(&dcc->cmd_lock); + __wait_discard_cmd(sbi, false); } static int issue_discard_thread(void *data) @@ -1116,22 +1122,12 @@ static int issue_discard_thread(void *data) struct f2fs_sb_info *sbi = data; struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info; wait_queue_head_t *q = &dcc->discard_wait_queue; - struct list_head *wait_list = &dcc->wait_list; - struct discard_cmd *dc, *tmp; repeat: if (kthread_should_stop()) return 0; __issue_discard_cmd(sbi, true); - - mutex_lock(&dcc->cmd_lock); - list_for_each_entry_safe(dc, tmp, wait_list, list) { - if (dc->state == D_DONE) { - wait_for_completion_io(&dc->wait); - __remove_discard_cmd(sbi, dc); - } - } - mutex_unlock(&dcc->cmd_lock); + __wait_discard_cmd(sbi, true); congestion_wait(BLK_RW_SYNC, HZ/50); -- cgit v1.2.3 From cf1770e0fa436b62f454732cd0e7842bed61430a Mon Sep 17 00:00:00 2001 From: Hou Pengyang Date: Tue, 25 Apr 2017 12:45:13 +0000 Subject: f2fs: lookup extent cache first under IPU scenario If a page is cold, NOT atomit written and need_ipu now, there is a high probability that IPU should be adapted. For IPU, we try to check extent tree to get the block index first, instead of reading the dnode page, where may lead to an useless dnode IO, since no need to update the dnode index for IPU. Signed-off-by: Hou Pengyang Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim Conflicts: fs/f2fs/gc.c --- fs/f2fs/segment.c | 1 + 1 file changed, 1 insertion(+) (limited to 'fs/f2fs/segment.c') diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index a0a0592e1681..69ead09ba06f 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -311,6 +311,7 @@ static int __commit_inmem_pages(struct inode *inode, } fio.page = page; + fio.old_blkaddr = NULL_ADDR; err = do_write_data_page(&fio); if (err) { unlock_page(page); -- cgit v1.2.3 From 5abcd71d0fd8a642d848a13de041e2112df21a23 Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Wed, 26 Apr 2017 11:11:12 -0700 Subject: f2fs: nullify fio->encrypted_page for each writes This makes sure each write request has nullified encrypted_page pointer. Signed-off-by: Jaegeuk Kim Conflicts: fs/f2fs/segment.c --- fs/f2fs/segment.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'fs/f2fs/segment.c') diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index 69ead09ba06f..9fcc2f9aa732 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -290,8 +290,7 @@ static int __commit_inmem_pages(struct inode *inode, .sbi = sbi, .type = DATA, .op = REQ_OP_WRITE, - .op_flags = REQ_SYNC | REQ_NOIDLE | REQ_PRIO, - .encrypted_page = NULL, + .op_flags = REQ_SYNC | REQ_PRIO, }; pgoff_t last_idx = ULONG_MAX; int err = 0; @@ -312,6 +311,7 @@ static int __commit_inmem_pages(struct inode *inode, fio.page = page; fio.old_blkaddr = NULL_ADDR; + fio.encrypted_page = NULL; err = do_write_data_page(&fio); if (err) { unlock_page(page); -- cgit v1.2.3 From 7ec84ed608e4fe4b00189ed36363da553c092eaf Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Wed, 26 Apr 2017 17:39:54 +0800 Subject: f2fs: don't hold cmd_lock during waiting discard command Previously, with protection of cmd_lock, we will wait for end io of discard command which potentially may lead long latency, making worse concurrency. So, in this patch, we try to add reference into discard entry to prevent the entry being released by other thread, then we can avoid holding global cmd_lock during waiting discard to finish. Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/segment.c | 25 ++++++++++++++++++++----- 1 file changed, 20 insertions(+), 5 deletions(-) (limited to 'fs/f2fs/segment.c') diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index 9fcc2f9aa732..6a79d0b3b423 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -688,6 +688,7 @@ static struct discard_cmd *__create_discard_cmd(struct f2fs_sb_info *sbi, dc->lstart = lstart; dc->start = start; dc->len = len; + dc->ref = 0; dc->state = D_PREP; dc->error = 0; init_completion(&dc->wait); @@ -1086,6 +1087,8 @@ static void __wait_discard_cmd(struct f2fs_sb_info *sbi, bool wait_cond) mutex_lock(&dcc->cmd_lock); list_for_each_entry_safe(dc, tmp, wait_list, list) { if (!wait_cond || dc->state == D_DONE) { + if (dc->ref) + continue; wait_for_completion_io(&dc->wait); __remove_discard_cmd(sbi, dc); } @@ -1098,17 +1101,29 @@ void f2fs_wait_discard_bio(struct f2fs_sb_info *sbi, block_t blkaddr) { struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info; struct discard_cmd *dc; + bool need_wait = false; mutex_lock(&dcc->cmd_lock); - dc = (struct discard_cmd *)__lookup_rb_tree(&dcc->root, NULL, blkaddr); if (dc) { - if (dc->state != D_PREP) - wait_for_completion_io(&dc->wait); - __punch_discard_cmd(sbi, dc, blkaddr); + if (dc->state == D_PREP) { + __punch_discard_cmd(sbi, dc, blkaddr); + } else { + dc->ref++; + need_wait = true; + } } - mutex_unlock(&dcc->cmd_lock); + + if (need_wait) { + wait_for_completion_io(&dc->wait); + mutex_lock(&dcc->cmd_lock); + f2fs_bug_on(sbi, dc->state != D_DONE); + dc->ref--; + if (!dc->ref) + __remove_discard_cmd(sbi, dc); + mutex_unlock(&dcc->cmd_lock); + } } /* This comes from f2fs_put_super */ -- cgit v1.2.3 From 87c98567046f8f5890bbb7e5dab874ced61ccbf8 Mon Sep 17 00:00:00 2001 From: Hou Pengyang Date: Thu, 27 Apr 2017 00:17:21 +0800 Subject: f2fs: release cp and dnode lock before IPU We don't need to rewrite the page under cp_rwsem and dnode locks. Signed-off-by: Hou Pengyang Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/segment.c | 1 + 1 file changed, 1 insertion(+) (limited to 'fs/f2fs/segment.c') diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index 6a79d0b3b423..9e15496036ff 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -312,6 +312,7 @@ static int __commit_inmem_pages(struct inode *inode, fio.page = page; fio.old_blkaddr = NULL_ADDR; fio.encrypted_page = NULL; + fio.need_lock = false, err = do_write_data_page(&fio); if (err) { unlock_page(page); -- cgit v1.2.3 From 30d60edd7becac313a6a3adb9733ea123695b2f6 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Thu, 27 Apr 2017 20:40:39 +0800 Subject: f2fs: allow cpc->reason to indicate more than one reason Change to use different bits of cpc->reason to indicate different status, so cpc->reason can indicate more than one reason. Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/segment.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'fs/f2fs/segment.c') diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index 9e15496036ff..444ea2c4f671 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -1262,7 +1262,7 @@ static bool add_discard_addrs(struct f2fs_sb_info *sbi, struct cp_control *cpc, unsigned long *discard_map = (unsigned long *)se->discard_map; unsigned long *dmap = SIT_I(sbi)->tmp_map; unsigned int start = 0, end = -1; - bool force = (cpc->reason == CP_DISCARD); + bool force = (cpc->reason & CP_DISCARD); struct discard_entry *de = NULL; struct list_head *head = &SM_I(sbi)->dcc_info->entry_list; int i; @@ -1345,7 +1345,7 @@ void clear_prefree_segments(struct f2fs_sb_info *sbi, struct cp_control *cpc) unsigned long *prefree_map = dirty_i->dirty_segmap[PRE]; unsigned int start = 0, end = -1; unsigned int secno, start_segno; - bool force = (cpc->reason == CP_DISCARD); + bool force = (cpc->reason & CP_DISCARD); mutex_lock(&dirty_i->seglist_lock); @@ -2849,7 +2849,7 @@ void flush_sit_entries(struct f2fs_sb_info *sbi, struct cp_control *cpc) se = get_seg_entry(sbi, segno); /* add discard candidates */ - if (cpc->reason != CP_DISCARD) { + if (!(cpc->reason & CP_DISCARD)) { cpc->trim_start = segno; add_discard_addrs(sbi, cpc, false); } @@ -2885,7 +2885,7 @@ void flush_sit_entries(struct f2fs_sb_info *sbi, struct cp_control *cpc) f2fs_bug_on(sbi, !list_empty(head)); f2fs_bug_on(sbi, sit_i->dirty_sentries); out: - if (cpc->reason == CP_DISCARD) { + if (cpc->reason & CP_DISCARD) { __u64 trim_start = cpc->trim_start; for (; cpc->trim_start <= cpc->trim_end; cpc->trim_start++) -- cgit v1.2.3 From a725708ca434461dbaaefa7ec5004373ec3ba054 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Fri, 28 Apr 2017 13:56:08 +0800 Subject: f2fs: introduce CP_TRIMMED_FLAG to avoid unneeded discard Introduce CP_TRIMMED_FLAG to indicate all invalid block were trimmed before umount, so once we do mount with image which contain the flag, we don't record invalid blocks as undiscard one, when fstrim is being triggered, we can avoid issuing redundant discard commands. Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/segment.c | 28 ++++++++++++++++++++-------- 1 file changed, 20 insertions(+), 8 deletions(-) (limited to 'fs/f2fs/segment.c') diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index 444ea2c4f671..23e809f64ded 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -3084,10 +3084,17 @@ static void build_sit_entries(struct f2fs_sb_info *sbi) /* build discard map only one time */ if (f2fs_discard_en(sbi)) { - memcpy(se->discard_map, se->cur_valid_map, - SIT_VBLOCK_MAP_SIZE); - sbi->discard_blks += sbi->blocks_per_seg - - se->valid_blocks; + if (is_set_ckpt_flags(sbi, CP_TRIMMED_FLAG)) { + memset(se->discard_map, 0xff, + SIT_VBLOCK_MAP_SIZE); + } else { + memcpy(se->discard_map, + se->cur_valid_map, + SIT_VBLOCK_MAP_SIZE); + sbi->discard_blks += + sbi->blocks_per_seg - + se->valid_blocks; + } } if (sbi->segs_per_sec > 1) @@ -3111,10 +3118,15 @@ static void build_sit_entries(struct f2fs_sb_info *sbi) seg_info_from_raw_sit(se, &sit); if (f2fs_discard_en(sbi)) { - memcpy(se->discard_map, se->cur_valid_map, - SIT_VBLOCK_MAP_SIZE); - sbi->discard_blks += old_valid_blocks - - se->valid_blocks; + if (is_set_ckpt_flags(sbi, CP_TRIMMED_FLAG)) { + memset(se->discard_map, 0xff, + SIT_VBLOCK_MAP_SIZE); + } else { + memcpy(se->discard_map, se->cur_valid_map, + SIT_VBLOCK_MAP_SIZE); + sbi->discard_blks += old_valid_blocks - + se->valid_blocks; + } } if (sbi->segs_per_sec > 1) -- cgit v1.2.3 From 19023fdfb16384ce695bd7ce07902518d39f5435 Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Mon, 1 May 2017 18:09:44 -0700 Subject: f2fs: flush dirty nats periodically This patch flushes dirty nats in order to acquire available nids by writing checkpoint. Otherwise, we can have no chance to get freed nids. Signed-off-by: Jaegeuk Kim --- fs/f2fs/segment.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs/f2fs/segment.c') diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index 23e809f64ded..5fdc995b1f1e 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -419,7 +419,7 @@ void f2fs_balance_fs_bg(struct f2fs_sb_info *sbi) else build_free_nids(sbi, false, false); - if (!is_idle(sbi)) + if (!is_idle(sbi) && !excess_dirty_nats(sbi)) return; /* checkpoint is the only way to shrink partial cached entries */ -- cgit v1.2.3 From 6190400da0498c63e01f1984f3386e95c487d2f7 Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Thu, 6 Jul 2017 12:24:49 -0700 Subject: f2fs, block_dump: give WRITE direction to submit_bio The block_dump in submit_bio uses rw, instead of bio->bi_rw. Signed-off-by: Jaegeuk Kim --- fs/f2fs/segment.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs/f2fs/segment.c') diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index 5fdc995b1f1e..c35e70e72e8b 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -809,7 +809,7 @@ static int __blkdev_issue_discard(struct block_device *bdev, sector_t sector, } if (bio) { - int ret = submit_bio_wait(0, bio); + int ret = submit_bio_wait(op, bio); bio_put(bio); if (ret) return ret; -- cgit v1.2.3 From 4a6ac1475b49371eda81c62150e9a626882f2029 Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Wed, 10 May 2017 11:28:38 -0700 Subject: f2fs: remove unnecessary read cases in merged IO flow Merged IO flow doesn't need to care about read IOs. f2fs_submit_merged_bio -> f2fs_submit_merged_write f2fs_submit_merged_bios -> f2fs_submit_merged_writes f2fs_submit_merged_bio_cond -> f2fs_submit_merged_write_cond Signed-off-by: Jaegeuk Kim --- fs/f2fs/segment.c | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) (limited to 'fs/f2fs/segment.c') diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index c35e70e72e8b..5331cbefd681 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -328,8 +328,7 @@ static int __commit_inmem_pages(struct inode *inode, } if (last_idx != ULONG_MAX) - f2fs_submit_merged_bio_cond(sbi, inode, 0, last_idx, - DATA, WRITE); + f2fs_submit_merged_write_cond(sbi, inode, 0, last_idx, DATA); if (!err) __revoke_inmem_pages(inode, revoke_list, false, false); @@ -2229,7 +2228,7 @@ reallocate: &fio->new_blkaddr, sum, type); /* writeout dirty page into bdev */ - err = f2fs_submit_page_mbio(fio); + err = f2fs_submit_page_write(fio); if (err == -EAGAIN) { fio->old_blkaddr = fio->new_blkaddr; goto reallocate; @@ -2256,7 +2255,7 @@ void write_meta_page(struct f2fs_sb_info *sbi, struct page *page) fio.op_flags &= ~REQ_META; set_page_writeback(page); - f2fs_submit_page_mbio(&fio); + f2fs_submit_page_write(&fio); } void write_node_page(unsigned int nid, struct f2fs_io_info *fio) @@ -2375,8 +2374,8 @@ void f2fs_wait_on_page_writeback(struct page *page, if (PageWriteback(page)) { struct f2fs_sb_info *sbi = F2FS_P_SB(page); - f2fs_submit_merged_bio_cond(sbi, page->mapping->host, - 0, page->index, type, WRITE); + f2fs_submit_merged_write_cond(sbi, page->mapping->host, + 0, page->index, type); if (ordered) wait_on_page_writeback(page); else -- cgit v1.2.3 From e61d6504368df2834b9c2b76debe8b1b557d08e0 Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Wed, 10 May 2017 14:19:54 -0700 Subject: f2fs: use fio instead of multiple parameters This patch just changes using fio instead of parameters. Signed-off-by: Jaegeuk Kim --- fs/f2fs/segment.c | 41 +++++++++++++++++++++-------------------- 1 file changed, 21 insertions(+), 20 deletions(-) (limited to 'fs/f2fs/segment.c') diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index 5331cbefd681..ca5f815c6eab 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -2118,61 +2118,62 @@ static bool __has_curseg_space(struct f2fs_sb_info *sbi, int type) return false; } -static int __get_segment_type_2(struct page *page, enum page_type p_type) +static int __get_segment_type_2(struct f2fs_io_info *fio) { - if (p_type == DATA) + if (fio->type == DATA) return CURSEG_HOT_DATA; else return CURSEG_HOT_NODE; } -static int __get_segment_type_4(struct page *page, enum page_type p_type) +static int __get_segment_type_4(struct f2fs_io_info *fio) { - if (p_type == DATA) { - struct inode *inode = page->mapping->host; + if (fio->type == DATA) { + struct inode *inode = fio->page->mapping->host; if (S_ISDIR(inode->i_mode)) return CURSEG_HOT_DATA; else return CURSEG_COLD_DATA; } else { - if (IS_DNODE(page) && is_cold_node(page)) + if (IS_DNODE(fio->page) && is_cold_node(fio->page)) return CURSEG_WARM_NODE; else return CURSEG_COLD_NODE; } } -static int __get_segment_type_6(struct page *page, enum page_type p_type) +static int __get_segment_type_6(struct f2fs_io_info *fio) { - if (p_type == DATA) { - struct inode *inode = page->mapping->host; + if (fio->type == DATA) { + struct inode *inode = fio->page->mapping->host; - if (is_cold_data(page) || file_is_cold(inode)) + if (is_cold_data(fio->page) || file_is_cold(inode)) return CURSEG_COLD_DATA; if (is_inode_flag_set(inode, FI_HOT_DATA)) return CURSEG_HOT_DATA; return CURSEG_WARM_DATA; } else { - if (IS_DNODE(page)) - return is_cold_node(page) ? CURSEG_WARM_NODE : + if (IS_DNODE(fio->page)) + return is_cold_node(fio->page) ? CURSEG_WARM_NODE : CURSEG_HOT_NODE; return CURSEG_COLD_NODE; } } -static int __get_segment_type(struct page *page, enum page_type p_type) +static int __get_segment_type(struct f2fs_io_info *fio) { - switch (F2FS_P_SB(page)->active_logs) { + switch (fio->sbi->active_logs) { case 2: - return __get_segment_type_2(page, p_type); + return __get_segment_type_2(fio); case 4: - return __get_segment_type_4(page, p_type); + return __get_segment_type_4(fio); } + /* NR_CURSEG_TYPE(6) logs by default */ - f2fs_bug_on(F2FS_P_SB(page), - F2FS_P_SB(page)->active_logs != NR_CURSEG_TYPE); - return __get_segment_type_6(page, p_type); + f2fs_bug_on(fio->sbi, fio->sbi->active_logs != NR_CURSEG_TYPE); + + return __get_segment_type_6(fio); } void allocate_data_block(struct f2fs_sb_info *sbi, struct page *page, @@ -2218,7 +2219,7 @@ void allocate_data_block(struct f2fs_sb_info *sbi, struct page *page, static void do_write_page(struct f2fs_summary *sum, struct f2fs_io_info *fio) { - int type = __get_segment_type(fio->page, fio->type); + int type = __get_segment_type(fio); int err; if (fio->type == NODE || fio->type == DATA) -- cgit v1.2.3 From a3b6a409692bee072eec659b9d18766d53f96c36 Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Wed, 10 May 2017 11:18:25 -0700 Subject: f2fs: split bio cache Split DATA/NODE type bio cache according to different temperature, so write IOs with the same temperature can be merged in corresponding bio cache as much as possible, otherwise, different temperature write IOs submitting into one bio cache will always cause split of bio. Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim Conflicts: include/trace/events/f2fs.h --- fs/f2fs/segment.c | 24 ++++++++++++++++++------ 1 file changed, 18 insertions(+), 6 deletions(-) (limited to 'fs/f2fs/segment.c') diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index ca5f815c6eab..f6cbacf66ddc 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -2163,17 +2163,29 @@ static int __get_segment_type_6(struct f2fs_io_info *fio) static int __get_segment_type(struct f2fs_io_info *fio) { + int type = 0; + switch (fio->sbi->active_logs) { case 2: - return __get_segment_type_2(fio); + type = __get_segment_type_2(fio); + break; case 4: - return __get_segment_type_4(fio); + type = __get_segment_type_4(fio); + break; + case 6: + type = __get_segment_type_6(fio); + break; + default: + f2fs_bug_on(fio->sbi, true); } - /* NR_CURSEG_TYPE(6) logs by default */ - f2fs_bug_on(fio->sbi, fio->sbi->active_logs != NR_CURSEG_TYPE); - - return __get_segment_type_6(fio); + if (IS_HOT(type)) + fio->temp = HOT; + else if (IS_WARM(type)) + fio->temp = WARM; + else + fio->temp = COLD; + return type; } void allocate_data_block(struct f2fs_sb_info *sbi, struct page *page, -- cgit v1.2.3 From 5d6951b8e115161e940f46690a2c971833769584 Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Fri, 12 May 2017 13:51:34 -0700 Subject: f2fs: avoid f2fs_lock_op for IPU writes Currently, if we do get_node_of_data before f2fs_lock_op, there may be dead lock as follows, where process A would be in infinite loop, and B will NOT be awaked. Process A(cp): Process B: f2fs_lock_all(sbi) get_dnode_of_data <---- lock dn.node_page flush_nodes f2fs_lock_op So, this patch adds f2fs_trylock_op to avoid f2fs_lock_op done by IPU. Signed-off-by: Hou Pengyang Signed-off-by: Jaegeuk Kim --- fs/f2fs/segment.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs/f2fs/segment.c') diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index f6cbacf66ddc..1a280099da8f 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -312,7 +312,7 @@ static int __commit_inmem_pages(struct inode *inode, fio.page = page; fio.old_blkaddr = NULL_ADDR; fio.encrypted_page = NULL; - fio.need_lock = false, + fio.need_lock = LOCK_DONE; err = do_write_data_page(&fio); if (err) { unlock_page(page); -- cgit v1.2.3 From 842ce444fd86167f3e9dc858f22e4c90639764e0 Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Wed, 17 May 2017 10:36:58 -0700 Subject: f2fs: try to freeze in gc and discard threads This allows to freeze gc and discard threads. Cc: stable@vger.kernel.org Signed-off-by: Jaegeuk Kim --- fs/f2fs/segment.c | 25 ++++++++++++++++--------- 1 file changed, 16 insertions(+), 9 deletions(-) (limited to 'fs/f2fs/segment.c') diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index 1a280099da8f..46ee1139046c 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -16,6 +16,7 @@ #include #include #include +#include #include "f2fs.h" #include "segment.h" @@ -1138,18 +1139,24 @@ static int issue_discard_thread(void *data) struct f2fs_sb_info *sbi = data; struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info; wait_queue_head_t *q = &dcc->discard_wait_queue; -repeat: - if (kthread_should_stop()) - return 0; - __issue_discard_cmd(sbi, true); - __wait_discard_cmd(sbi, true); + set_freezable(); - congestion_wait(BLK_RW_SYNC, HZ/50); + do { + wait_event_interruptible(*q, kthread_should_stop() || + freezing(current) || + atomic_read(&dcc->discard_cmd_cnt)); + if (try_to_freeze()) + continue; + if (kthread_should_stop()) + return 0; - wait_event_interruptible(*q, kthread_should_stop() || - atomic_read(&dcc->discard_cmd_cnt)); - goto repeat; + __issue_discard_cmd(sbi, true); + __wait_discard_cmd(sbi, true); + + congestion_wait(BLK_RW_SYNC, HZ/50); + } while (!kthread_should_stop()); + return 0; } #ifdef CONFIG_BLK_DEV_ZONED -- cgit v1.2.3 From b89cdaf6b93047d2f916d8c34100b1239665fd20 Mon Sep 17 00:00:00 2001 From: Yunlei He Date: Fri, 19 May 2017 14:42:12 +0800 Subject: f2fs: combine huge num of discard rb tree consistence checks Came across a hungtask caused by huge number of rb tree traversing during adding discard addrs in cp. This patch combine these consistence checks and move it to discard thread. Signed-off-by: Yunlei He Signed-off-by: Jaegeuk Kim --- fs/f2fs/segment.c | 13 ++----------- 1 file changed, 2 insertions(+), 11 deletions(-) (limited to 'fs/f2fs/segment.c') diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index 46ee1139046c..3c24a8ca0283 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -917,7 +917,6 @@ static void __punch_discard_cmd(struct f2fs_sb_info *sbi, dc->len = blkaddr - dc->lstart; dcc->undiscard_blks += dc->len; __relocate_discard_cmd(dcc, dc); - f2fs_bug_on(sbi, !__check_rb_tree_consistence(sbi, &dcc->root)); modified = true; } @@ -927,16 +926,12 @@ static void __punch_discard_cmd(struct f2fs_sb_info *sbi, di.start + blkaddr + 1 - di.lstart, di.lstart + di.len - 1 - blkaddr, NULL, NULL); - f2fs_bug_on(sbi, - !__check_rb_tree_consistence(sbi, &dcc->root)); } else { dc->lstart++; dc->len--; dc->start++; dcc->undiscard_blks += dc->len; __relocate_discard_cmd(dcc, dc); - f2fs_bug_on(sbi, - !__check_rb_tree_consistence(sbi, &dcc->root)); } } } @@ -997,8 +992,6 @@ static void __update_discard_tree_range(struct f2fs_sb_info *sbi, prev_dc->di.len += di.len; dcc->undiscard_blks += di.len; __relocate_discard_cmd(dcc, prev_dc); - f2fs_bug_on(sbi, - !__check_rb_tree_consistence(sbi, &dcc->root)); di = prev_dc->di; tdc = prev_dc; merged = true; @@ -1014,16 +1007,12 @@ static void __update_discard_tree_range(struct f2fs_sb_info *sbi, __relocate_discard_cmd(dcc, next_dc); if (tdc) __remove_discard_cmd(sbi, tdc); - f2fs_bug_on(sbi, - !__check_rb_tree_consistence(sbi, &dcc->root)); merged = true; } if (!merged) { __insert_discard_tree(sbi, bdev, di.lstart, di.start, di.len, NULL, NULL); - f2fs_bug_on(sbi, - !__check_rb_tree_consistence(sbi, &dcc->root)); } next: prev_dc = next_dc; @@ -1062,6 +1051,8 @@ static void __issue_discard_cmd(struct f2fs_sb_info *sbi, bool issue_cond) int i, iter = 0; mutex_lock(&dcc->cmd_lock); + f2fs_bug_on(sbi, + !__check_rb_tree_consistence(sbi, &dcc->root)); blk_start_plug(&plug); for (i = MAX_PLIST_NUM - 1; i >= 0; i--) { pend_list = &dcc->pend_list[i]; -- cgit v1.2.3 From f152939829d14d6fd6e0f8a461df1996acc8269d Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Fri, 19 May 2017 23:37:00 +0800 Subject: f2fs: split wio_mutex Split wio_mutex to adjust different temperature bio cache. Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/segment.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'fs/f2fs/segment.c') diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index 3c24a8ca0283..00503627c1d1 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -2233,7 +2233,7 @@ static void do_write_page(struct f2fs_summary *sum, struct f2fs_io_info *fio) int err; if (fio->type == NODE || fio->type == DATA) - mutex_lock(&fio->sbi->wio_mutex[fio->type]); + mutex_lock(&fio->sbi->wio_mutex[fio->type][fio->temp]); reallocate: allocate_data_block(fio->sbi, fio->page, fio->old_blkaddr, &fio->new_blkaddr, sum, type); @@ -2246,7 +2246,7 @@ reallocate: } if (fio->type == NODE || fio->type == DATA) - mutex_unlock(&fio->sbi->wio_mutex[fio->type]); + mutex_unlock(&fio->sbi->wio_mutex[fio->type][fio->temp]); } void write_meta_page(struct f2fs_sb_info *sbi, struct page *page) -- cgit v1.2.3 From 843d3364d7996211e38545bcd484d2eebeb1e5a5 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Fri, 19 May 2017 23:37:01 +0800 Subject: f2fs: introduce io_list for serialize data/node IOs Serialize data/node IOs by using fifo list instead of mutex lock, it will help to enhance concurrency of f2fs, meanwhile keeping LFS IO semantics. Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/segment.c | 22 +++++++++++++++------- 1 file changed, 15 insertions(+), 7 deletions(-) (limited to 'fs/f2fs/segment.c') diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index 00503627c1d1..1be5947ae1fe 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -2188,7 +2188,8 @@ static int __get_segment_type(struct f2fs_io_info *fio) void allocate_data_block(struct f2fs_sb_info *sbi, struct page *page, block_t old_blkaddr, block_t *new_blkaddr, - struct f2fs_summary *sum, int type) + struct f2fs_summary *sum, int type, + struct f2fs_io_info *fio, bool add_list) { struct sit_info *sit_i = SIT_I(sbi); struct curseg_info *curseg = CURSEG_I(sbi, type); @@ -2224,6 +2225,17 @@ void allocate_data_block(struct f2fs_sb_info *sbi, struct page *page, if (page && IS_NODESEG(type)) fill_node_footer_blkaddr(page, NEXT_FREE_BLKADDR(sbi, curseg)); + if (add_list) { + struct f2fs_bio_info *io; + + INIT_LIST_HEAD(&fio->list); + fio->in_list = true; + io = sbi->write_io[fio->type] + fio->temp; + spin_lock(&io->io_lock); + list_add_tail(&fio->list, &io->io_list); + spin_unlock(&io->io_lock); + } + mutex_unlock(&curseg->curseg_mutex); } @@ -2232,11 +2244,9 @@ static void do_write_page(struct f2fs_summary *sum, struct f2fs_io_info *fio) int type = __get_segment_type(fio); int err; - if (fio->type == NODE || fio->type == DATA) - mutex_lock(&fio->sbi->wio_mutex[fio->type][fio->temp]); reallocate: allocate_data_block(fio->sbi, fio->page, fio->old_blkaddr, - &fio->new_blkaddr, sum, type); + &fio->new_blkaddr, sum, type, fio, true); /* writeout dirty page into bdev */ err = f2fs_submit_page_write(fio); @@ -2244,9 +2254,6 @@ reallocate: fio->old_blkaddr = fio->new_blkaddr; goto reallocate; } - - if (fio->type == NODE || fio->type == DATA) - mutex_unlock(&fio->sbi->wio_mutex[fio->type][fio->temp]); } void write_meta_page(struct f2fs_sb_info *sbi, struct page *page) @@ -2260,6 +2267,7 @@ void write_meta_page(struct f2fs_sb_info *sbi, struct page *page) .new_blkaddr = page->index, .page = page, .encrypted_page = NULL, + .in_list = false, }; if (unlikely(page->index >= MAIN_BLKADDR(sbi))) -- cgit v1.2.3 From 732de6bf9e328f15a85606744ddab0559ab7bf65 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Fri, 19 May 2017 23:46:43 +0800 Subject: f2fs: show more info if fail to issue discard Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/segment.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'fs/f2fs/segment.c') diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index 1be5947ae1fe..fcdc45c8ba1b 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -741,7 +741,8 @@ static void __remove_discard_cmd(struct f2fs_sb_info *sbi, if (dc->error) f2fs_msg(sbi->sb, KERN_INFO, - "Issue discard failed, ret: %d", dc->error); + "Issue discard(%u, %u, %u) failed, ret: %d", + dc->lstart, dc->start, dc->len, dc->error); __detach_discard_cmd(dcc, dc); } -- cgit v1.2.3 From aa9d75d2f522fccac641f5cb535acd53b0238f2e Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Fri, 19 May 2017 23:46:44 +0800 Subject: f2fs: wake up all waiters in f2fs_submit_discard_endio There could be more than one waiter waiting discard IO completion, so we need use complete_all() instead of complete() in f2fs_submit_discard_endio to avoid hungtask. Fixes: ec9895add2c5 ("f2fs: don't hold cmd_lock during waiting discard command") Cc: Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/segment.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs/f2fs/segment.c') diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index fcdc45c8ba1b..66cbd3da0404 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -752,7 +752,7 @@ static void f2fs_submit_discard_endio(struct bio *bio) dc->error = bio->bi_error; dc->state = D_DONE; - complete(&dc->wait); + complete_all(&dc->wait); bio_put(bio); } -- cgit v1.2.3 From b3df3669b73c7e48e6b808e1b17522f585bd69ea Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Fri, 19 May 2017 23:46:45 +0800 Subject: f2fs: wait discard IO completion without cmd_lock held Wait discard IO completion outside cmd_lock to avoid long latency of holding cmd_lock in IO busy scenario. Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/segment.c | 23 ++++++++++++++++++++--- 1 file changed, 20 insertions(+), 3 deletions(-) (limited to 'fs/f2fs/segment.c') diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index 66cbd3da0404..c8f5d8feac44 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -1076,17 +1076,34 @@ static void __wait_discard_cmd(struct f2fs_sb_info *sbi, bool wait_cond) struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info; struct list_head *wait_list = &(dcc->wait_list); struct discard_cmd *dc, *tmp; + bool need_wait; + +next: + need_wait = false; mutex_lock(&dcc->cmd_lock); list_for_each_entry_safe(dc, tmp, wait_list, list) { - if (!wait_cond || dc->state == D_DONE) { - if (dc->ref) - continue; + if (!wait_cond || (dc->state == D_DONE && !dc->ref)) { wait_for_completion_io(&dc->wait); __remove_discard_cmd(sbi, dc); + } else { + dc->ref++; + need_wait = true; + break; } } mutex_unlock(&dcc->cmd_lock); + + if (need_wait) { + wait_for_completion_io(&dc->wait); + mutex_lock(&dcc->cmd_lock); + f2fs_bug_on(sbi, dc->state != D_DONE); + dc->ref--; + if (!dc->ref) + __remove_discard_cmd(sbi, dc); + mutex_unlock(&dcc->cmd_lock); + goto next; + } } /* This should be covered by global mutex, &sit_i->sentry_lock */ -- cgit v1.2.3 From 46281c4ef52648cf84e16013bd196a78f9d84d3d Mon Sep 17 00:00:00 2001 From: Damien Le Moal Date: Fri, 26 May 2017 17:04:40 +0900 Subject: f2fs: Do not issue small discards in LFS mode clear_prefree_segments() issues small discards after discarding full segments. These small discards may not be section aligned, so not zone aligned on a zoned block device, causing __f2fs_iissue_discard_zone() to fail. Fix this by not issuing small discards for a volume mounted with the BLKZONED feature enabled. Cc: stable@vger.kernel.org Signed-off-by: Damien Le Moal Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/segment.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'fs/f2fs/segment.c') diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index c8f5d8feac44..33b1628245e7 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -1416,7 +1416,8 @@ find_next: sbi->blocks_per_seg, cur_pos); len = next_pos - cur_pos; - if (force && len < cpc->trim_minlen) + if (f2fs_sb_mounted_blkzoned(sbi->sb) || + (force && len < cpc->trim_minlen)) goto skip; f2fs_issue_discard(sbi, entry->start_blkaddr + cur_pos, -- cgit v1.2.3 From 83f42def8ae0598dfa66c3419c9df4078df60a48 Mon Sep 17 00:00:00 2001 From: Yunlei He Date: Thu, 1 Jun 2017 16:43:51 +0800 Subject: f2fs: fix a panic caused by NULL flush_cmd_control Mount fs with option noflush_merge, boot failed for illegal address fcc in function f2fs_issue_flush: if (!test_opt(sbi, FLUSH_MERGE)) { ret = submit_flush_wait(sbi); atomic_inc(&fcc->issued_flush); -> Here, fcc illegal return ret; } Signed-off-by: Yunlei He Signed-off-by: Jaegeuk Kim --- fs/f2fs/segment.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'fs/f2fs/segment.c') diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index 33b1628245e7..3e2121dcaded 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -566,6 +566,9 @@ int create_flush_cmd_control(struct f2fs_sb_info *sbi) init_waitqueue_head(&fcc->flush_wait_queue); init_llist_head(&fcc->issue_list); SM_I(sbi)->fcc_info = fcc; + if (!test_opt(sbi, FLUSH_MERGE)) + return err; + init_thread: fcc->f2fs_issue_flush = kthread_run(issue_flush_thread, sbi, "f2fs_flush-%u:%u", MAJOR(dev), MINOR(dev)); @@ -3319,7 +3322,7 @@ int build_segment_manager(struct f2fs_sb_info *sbi) INIT_LIST_HEAD(&sm_info->sit_entry_set); - if (test_opt(sbi, FLUSH_MERGE) && !f2fs_readonly(sbi->sb)) { + if (!f2fs_readonly(sbi->sb)) { err = create_flush_cmd_control(sbi); if (err) return err; -- cgit v1.2.3 From 1479b6ffeb2eba6575600fb941f73f47d74a91a0 Mon Sep 17 00:00:00 2001 From: Jin Qian Date: Thu, 1 Jun 2017 11:18:30 -0700 Subject: f2fs: sanity check size of nat and sit cache Make sure number of entires doesn't exceed max journal size. Cc: stable@vger.kernel.org Signed-off-by: Jin Qian Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/segment.c | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'fs/f2fs/segment.c') diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index 3e2121dcaded..f86fd003f932 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -2574,6 +2574,8 @@ static int read_normal_summaries(struct f2fs_sb_info *sbi, int type) static int restore_curseg_summaries(struct f2fs_sb_info *sbi) { + struct f2fs_journal *sit_j = CURSEG_I(sbi, CURSEG_COLD_DATA)->journal; + struct f2fs_journal *nat_j = CURSEG_I(sbi, CURSEG_HOT_DATA)->journal; int type = CURSEG_HOT_DATA; int err; @@ -2600,6 +2602,11 @@ static int restore_curseg_summaries(struct f2fs_sb_info *sbi) return err; } + /* sanity check for summary blocks */ + if (nats_in_cursum(nat_j) > NAT_JOURNAL_ENTRIES || + sits_in_cursum(sit_j) > SIT_JOURNAL_ENTRIES) + return -EINVAL; + return 0; } -- cgit v1.2.3 From 4798bcc8085173a5f747ba9a597bc8fc27cbd8d6 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Mon, 5 Jun 2017 18:29:06 +0800 Subject: f2fs: introduce __wait_one_discard_bio In order to avoid copied codes. Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/segment.c | 33 +++++++++++++++++---------------- 1 file changed, 17 insertions(+), 16 deletions(-) (limited to 'fs/f2fs/segment.c') diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index f86fd003f932..e10d4e5f2193 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -1074,6 +1074,20 @@ out: mutex_unlock(&dcc->cmd_lock); } +static void __wait_one_discard_bio(struct f2fs_sb_info *sbi, + struct discard_cmd *dc) +{ + struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info; + + wait_for_completion_io(&dc->wait); + mutex_lock(&dcc->cmd_lock); + f2fs_bug_on(sbi, dc->state != D_DONE); + dc->ref--; + if (!dc->ref) + __remove_discard_cmd(sbi, dc); + mutex_unlock(&dcc->cmd_lock); +} + static void __wait_discard_cmd(struct f2fs_sb_info *sbi, bool wait_cond) { struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info; @@ -1098,13 +1112,7 @@ next: mutex_unlock(&dcc->cmd_lock); if (need_wait) { - wait_for_completion_io(&dc->wait); - mutex_lock(&dcc->cmd_lock); - f2fs_bug_on(sbi, dc->state != D_DONE); - dc->ref--; - if (!dc->ref) - __remove_discard_cmd(sbi, dc); - mutex_unlock(&dcc->cmd_lock); + __wait_one_discard_bio(sbi, dc); goto next; } } @@ -1128,15 +1136,8 @@ void f2fs_wait_discard_bio(struct f2fs_sb_info *sbi, block_t blkaddr) } mutex_unlock(&dcc->cmd_lock); - if (need_wait) { - wait_for_completion_io(&dc->wait); - mutex_lock(&dcc->cmd_lock); - f2fs_bug_on(sbi, dc->state != D_DONE); - dc->ref--; - if (!dc->ref) - __remove_discard_cmd(sbi, dc); - mutex_unlock(&dcc->cmd_lock); - } + if (need_wait) + __wait_one_discard_bio(sbi, dc); } /* This comes from f2fs_put_super */ -- cgit v1.2.3 From f3fb4448d8f0843b6aec068fdcb0c8ea8a5321da Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Mon, 5 Jun 2017 18:29:07 +0800 Subject: f2fs: add f2fs_bug_on in __remove_discard_cmd Recently, discard related codes have changed a lot, so add f2fs_bug_on to detect potential bug. Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/segment.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'fs/f2fs/segment.c') diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index e10d4e5f2193..9b08a6660d13 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -739,6 +739,8 @@ static void __remove_discard_cmd(struct f2fs_sb_info *sbi, { struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info; + f2fs_bug_on(sbi, dc->ref); + if (dc->error == -EOPNOTSUPP) dc->error = 0; -- cgit v1.2.3 From 99ad6f555ab7f7ca7c337521dfe0c66a703dbdff Mon Sep 17 00:00:00 2001 From: Yunlong Song Date: Sat, 24 Jun 2017 15:57:19 +0800 Subject: f2fs: avoid redundant f2fs_flush after remount create_flush_cmd_control will create redundant issue_flush_thread after each remount with flush_merge option. Signed-off-by: Yunlong Song Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/segment.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'fs/f2fs/segment.c') diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index 9b08a6660d13..d73b4b29055c 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -555,6 +555,8 @@ int create_flush_cmd_control(struct f2fs_sb_info *sbi) if (SM_I(sbi)->fcc_info) { fcc = SM_I(sbi)->fcc_info; + if (fcc->f2fs_issue_flush) + return err; goto init_thread; } -- cgit v1.2.3 From 243d3acf5b181bbea3a4ad2ec8a3a84c26a22701 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Thu, 29 Jun 2017 23:17:45 +0800 Subject: f2fs: stop gc/discard thread in prior during umount This patch resolves kernel panic for xfstests/081, caused by recent f2fs_bug_on f2fs: add f2fs_bug_on in __remove_discard_cmd For fixing, we will stop gc/discard thread in prior in ->kill_sb in order to avoid referring and releasing race among them. Signed-off-by: Jaegeuk Kim Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/segment.c | 19 +++++++++++++------ 1 file changed, 13 insertions(+), 6 deletions(-) (limited to 'fs/f2fs/segment.c') diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index d73b4b29055c..09532f823cbc 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -1144,6 +1144,18 @@ void f2fs_wait_discard_bio(struct f2fs_sb_info *sbi, block_t blkaddr) __wait_one_discard_bio(sbi, dc); } +void stop_discard_thread(struct f2fs_sb_info *sbi) +{ + struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info; + + if (dcc && dcc->f2fs_issue_discard) { + struct task_struct *discard_thread = dcc->f2fs_issue_discard; + + dcc->f2fs_issue_discard = NULL; + kthread_stop(discard_thread); + } +} + /* This comes from f2fs_put_super */ void f2fs_wait_discard_bios(struct f2fs_sb_info *sbi) { @@ -1501,12 +1513,7 @@ static void destroy_discard_cmd_control(struct f2fs_sb_info *sbi) if (!dcc) return; - if (dcc->f2fs_issue_discard) { - struct task_struct *discard_thread = dcc->f2fs_issue_discard; - - dcc->f2fs_issue_discard = NULL; - kthread_stop(discard_thread); - } + stop_discard_thread(sbi); kfree(dcc); SM_I(sbi)->dcc_info = NULL; -- cgit v1.2.3 From 3099c953ccfdd643e83329f55afa088e94904831 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Fri, 30 Jun 2017 17:19:02 +0800 Subject: f2fs: introduce __check_sit_bitmap After we introduce discard thread, discard command can be issued concurrently with data allocating, this patch adds new function to heck sit bitmap to ensure that userdata was invalid in which on-going discard command covered. Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim Conflicts: fs/f2fs/segment.c --- fs/f2fs/segment.c | 26 ++++++++++++++++++++++++++ 1 file changed, 26 insertions(+) (limited to 'fs/f2fs/segment.c') diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index 09532f823cbc..0f6cded83c7b 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -844,6 +844,31 @@ static int __blkdev_issue_discard(struct block_device *bdev, sector_t sector, return 0; } +void __check_sit_bitmap(struct f2fs_sb_info *sbi, + block_t start, block_t end) +{ +#ifdef CONFIG_F2FS_CHECK_FS + struct seg_entry *sentry; + unsigned int segno; + block_t blk = start; + unsigned long offset, size, max_blocks = sbi->blocks_per_seg; + unsigned long *map; + + while (blk < end) { + segno = GET_SEGNO(sbi, blk); + sentry = get_seg_entry(sbi, segno); + offset = GET_BLKOFF_FROM_SEG0(sbi, blk); + + size = min((unsigned long)(end - blk), max_blocks); + map = (unsigned long *)(sentry->cur_valid_map); + offset = __find_rev_next_bit(map, size, offset); + f2fs_bug_on(sbi, offset != size); + blk += size; + } +#endif +} + +/* this function is copied from blkdev_issue_discard from block/blk-lib.c */ static void __submit_discard_cmd(struct f2fs_sb_info *sbi, struct discard_cmd *dc) { @@ -869,6 +894,7 @@ static void __submit_discard_cmd(struct f2fs_sb_info *sbi, bio->bi_end_io = f2fs_submit_discard_endio; submit_bio(REQ_SYNC, bio); list_move_tail(&dc->list, &dcc->wait_list); + __check_sit_bitmap(sbi, dc->start, dc->start + dc->len); } } else { __remove_discard_cmd(sbi, dc); -- cgit v1.2.3 From 972aaba68e97f8eabdfcfe594a9b78d604ee613f Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Mon, 10 Jul 2017 19:16:28 -0700 Subject: f2fs: make more close to v4.13-rc1 Signed-off-by: Jaegeuk Kim --- fs/f2fs/segment.c | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) (limited to 'fs/f2fs/segment.c') diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index 0f6cded83c7b..9744e8c9d308 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -1243,8 +1243,8 @@ static int __f2fs_issue_discard_zone(struct f2fs_sb_info *sbi, sector = SECTOR_FROM_BLOCK(blkstart); nr_sects = SECTOR_FROM_BLOCK(blklen); - if (sector & (bdev_zone_size(bdev) - 1) || - nr_sects != bdev_zone_size(bdev)) { + if (sector & (bdev_zone_sectors(bdev) - 1) || + nr_sects != bdev_zone_sectors(bdev)) { f2fs_msg(sbi->sb, KERN_INFO, "(%d) %s: Unaligned discard attempted (block %x + %x)", devi, sbi->s_ndevs ? FDEV(devi).path: "", @@ -2998,13 +2998,13 @@ static int build_sit_info(struct f2fs_sb_info *sbi) SM_I(sbi)->sit_info = sit_i; - sit_i->sentries = f2fs_kvzalloc(MAIN_SEGS(sbi) * + sit_i->sentries = kvzalloc(MAIN_SEGS(sbi) * sizeof(struct seg_entry), GFP_KERNEL); if (!sit_i->sentries) return -ENOMEM; bitmap_size = f2fs_bitmap_size(MAIN_SEGS(sbi)); - sit_i->dirty_sentries_bitmap = f2fs_kvzalloc(bitmap_size, GFP_KERNEL); + sit_i->dirty_sentries_bitmap = kvzalloc(bitmap_size, GFP_KERNEL); if (!sit_i->dirty_sentries_bitmap) return -ENOMEM; @@ -3037,7 +3037,7 @@ static int build_sit_info(struct f2fs_sb_info *sbi) return -ENOMEM; if (sbi->segs_per_sec > 1) { - sit_i->sec_entries = f2fs_kvzalloc(MAIN_SECS(sbi) * + sit_i->sec_entries = kvzalloc(MAIN_SECS(sbi) * sizeof(struct sec_entry), GFP_KERNEL); if (!sit_i->sec_entries) return -ENOMEM; @@ -3088,12 +3088,12 @@ static int build_free_segmap(struct f2fs_sb_info *sbi) SM_I(sbi)->free_info = free_i; bitmap_size = f2fs_bitmap_size(MAIN_SEGS(sbi)); - free_i->free_segmap = f2fs_kvmalloc(bitmap_size, GFP_KERNEL); + free_i->free_segmap = kvmalloc(bitmap_size, GFP_KERNEL); if (!free_i->free_segmap) return -ENOMEM; sec_bitmap_size = f2fs_bitmap_size(MAIN_SECS(sbi)); - free_i->free_secmap = f2fs_kvmalloc(sec_bitmap_size, GFP_KERNEL); + free_i->free_secmap = kvmalloc(sec_bitmap_size, GFP_KERNEL); if (!free_i->free_secmap) return -ENOMEM; @@ -3273,7 +3273,7 @@ static int init_victim_secmap(struct f2fs_sb_info *sbi) struct dirty_seglist_info *dirty_i = DIRTY_I(sbi); unsigned int bitmap_size = f2fs_bitmap_size(MAIN_SECS(sbi)); - dirty_i->victim_secmap = f2fs_kvzalloc(bitmap_size, GFP_KERNEL); + dirty_i->victim_secmap = kvzalloc(bitmap_size, GFP_KERNEL); if (!dirty_i->victim_secmap) return -ENOMEM; return 0; @@ -3295,7 +3295,7 @@ static int build_dirty_segmap(struct f2fs_sb_info *sbi) bitmap_size = f2fs_bitmap_size(MAIN_SEGS(sbi)); for (i = 0; i < NR_DIRTY_TYPE; i++) { - dirty_i->dirty_segmap[i] = f2fs_kvzalloc(bitmap_size, GFP_KERNEL); + dirty_i->dirty_segmap[i] = kvzalloc(bitmap_size, GFP_KERNEL); if (!dirty_i->dirty_segmap[i]) return -ENOMEM; } -- cgit v1.2.3 From 6bf7fc57146876306137d2229d42082165af5dbf Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Wed, 19 Jul 2017 10:59:55 -0700 Subject: f2fs: give a try to do atomic write in -ENOMEM case It'd be better to retry writing atomic pages when we get -ENOMEM. Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/segment.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) (limited to 'fs/f2fs/segment.c') diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index 9744e8c9d308..bf9d66fa0af5 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -309,17 +309,21 @@ static int __commit_inmem_pages(struct inode *inode, inode_dec_dirty_pages(inode); remove_dirty_inode(inode); } - +retry: fio.page = page; fio.old_blkaddr = NULL_ADDR; fio.encrypted_page = NULL; fio.need_lock = LOCK_DONE; err = do_write_data_page(&fio); if (err) { + if (err == -ENOMEM) { + congestion_wait(BLK_RW_ASYNC, HZ/50); + cond_resched(); + goto retry; + } unlock_page(page); break; } - /* record old blkaddr for revoking */ cur->old_addr = fio.old_blkaddr; last_idx = page->index; -- cgit v1.2.3 From e088277a813b12d98f81f79da0c738b54974d56a Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Sat, 22 Jul 2017 08:52:23 +0800 Subject: f2fs: make background threads of f2fs being aware of freezing When ->freeze_fs is called from lvm for doing snapshot, it needs to make sure there will be no more changes in filesystem's data, however, previously, background threads like GC thread wasn't aware of freezing, so in environment with active background threads, data of snapshot becomes unstable. This patch fixes this issue by adding sb_{start,end}_intwrite in below background threads: - GC thread - flush thread - discard thread Note that, don't use sb_start_intwrite() in gc_thread_func() due to: generic/241 reports below bug: ====================================================== WARNING: possible circular locking dependency detected 4.13.0-rc1+ #32 Tainted: G O ------------------------------------------------------ f2fs_gc-250:0/22186 is trying to acquire lock: (&sbi->gc_mutex){+.+...}, at: [] f2fs_sync_fs+0x7b/0x1b0 [f2fs] but task is already holding lock: (sb_internal#2){++++.-}, at: [] gc_thread_func+0x159/0x4a0 [f2fs] which lock already depends on the new lock. the existing dependency chain (in reverse order) is: -> #2 (sb_internal#2){++++.-}: __lock_acquire+0x405/0x7b0 lock_acquire+0xae/0x220 __sb_start_write+0x11d/0x1f0 f2fs_evict_inode+0x2d6/0x4e0 [f2fs] evict+0xa8/0x170 iput+0x1fb/0x2c0 f2fs_sync_inode_meta+0x3f/0xf0 [f2fs] write_checkpoint+0x1b1/0x750 [f2fs] f2fs_sync_fs+0x85/0x1b0 [f2fs] f2fs_do_sync_file.isra.24+0x137/0xa30 [f2fs] f2fs_sync_file+0x34/0x40 [f2fs] vfs_fsync_range+0x4a/0xa0 do_fsync+0x3c/0x60 SyS_fdatasync+0x15/0x20 do_fast_syscall_32+0xa1/0x1b0 entry_SYSENTER_32+0x4c/0x7b -> #1 (&sbi->cp_mutex){+.+...}: __lock_acquire+0x405/0x7b0 lock_acquire+0xae/0x220 __mutex_lock+0x4f/0x830 mutex_lock_nested+0x25/0x30 write_checkpoint+0x2f/0x750 [f2fs] f2fs_sync_fs+0x85/0x1b0 [f2fs] sync_filesystem+0x67/0x80 generic_shutdown_super+0x27/0x100 kill_block_super+0x22/0x50 kill_f2fs_super+0x3a/0x40 [f2fs] deactivate_locked_super+0x3d/0x70 deactivate_super+0x40/0x60 cleanup_mnt+0x39/0x70 __cleanup_mnt+0x10/0x20 task_work_run+0x69/0x80 exit_to_usermode_loop+0x57/0x92 do_fast_syscall_32+0x18c/0x1b0 entry_SYSENTER_32+0x4c/0x7b -> #0 (&sbi->gc_mutex){+.+...}: validate_chain.isra.36+0xc50/0xdb0 __lock_acquire+0x405/0x7b0 lock_acquire+0xae/0x220 __mutex_lock+0x4f/0x830 mutex_lock_nested+0x25/0x30 f2fs_sync_fs+0x7b/0x1b0 [f2fs] f2fs_balance_fs_bg+0xb9/0x200 [f2fs] gc_thread_func+0x302/0x4a0 [f2fs] kthread+0xe9/0x120 ret_from_fork+0x19/0x24 other info that might help us debug this: Chain exists of: &sbi->gc_mutex --> &sbi->cp_mutex --> sb_internal#2 Possible unsafe locking scenario: CPU0 CPU1 ---- ---- lock(sb_internal#2); lock(&sbi->cp_mutex); lock(sb_internal#2); lock(&sbi->gc_mutex); *** DEADLOCK *** 1 lock held by f2fs_gc-250:0/22186: #0: (sb_internal#2){++++.-}, at: [] gc_thread_func+0x159/0x4a0 [f2fs] stack backtrace: CPU: 2 PID: 22186 Comm: f2fs_gc-250:0 Tainted: G O 4.13.0-rc1+ #32 Hardware name: innotek GmbH VirtualBox/VirtualBox, BIOS VirtualBox 12/01/2006 Call Trace: dump_stack+0x5f/0x92 print_circular_bug+0x1b3/0x1bd validate_chain.isra.36+0xc50/0xdb0 ? __this_cpu_preempt_check+0xf/0x20 __lock_acquire+0x405/0x7b0 lock_acquire+0xae/0x220 ? f2fs_sync_fs+0x7b/0x1b0 [f2fs] __mutex_lock+0x4f/0x830 ? f2fs_sync_fs+0x7b/0x1b0 [f2fs] mutex_lock_nested+0x25/0x30 ? f2fs_sync_fs+0x7b/0x1b0 [f2fs] f2fs_sync_fs+0x7b/0x1b0 [f2fs] f2fs_balance_fs_bg+0xb9/0x200 [f2fs] gc_thread_func+0x302/0x4a0 [f2fs] ? preempt_schedule_common+0x2f/0x4d ? f2fs_gc+0x540/0x540 [f2fs] kthread+0xe9/0x120 ? f2fs_gc+0x540/0x540 [f2fs] ? kthread_create_on_node+0x30/0x30 ret_from_fork+0x19/0x24 The deadlock occurs in below condition: GC Thread Thread B - sb_start_intwrite - f2fs_sync_file - f2fs_sync_fs - mutex_lock(&sbi->gc_mutex) - write_checkpoint - block_operations - f2fs_sync_inode_meta - iput - sb_start_intwrite - mutex_lock(&sbi->gc_mutex) Fix this by altering sb_start_intwrite to sb_start_write_trylock. Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/segment.c | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'fs/f2fs/segment.c') diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index bf9d66fa0af5..3573b95f4fab 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -485,6 +485,8 @@ repeat: if (kthread_should_stop()) return 0; + sb_start_intwrite(sbi->sb); + if (!llist_empty(&fcc->issue_list)) { struct flush_cmd *cmd, *next; int ret; @@ -503,6 +505,8 @@ repeat: fcc->dispatch_list = NULL; } + sb_end_intwrite(sbi->sb); + wait_event_interruptible(*q, kthread_should_stop() || !llist_empty(&fcc->issue_list)); goto repeat; @@ -1210,9 +1214,13 @@ static int issue_discard_thread(void *data) if (kthread_should_stop()) return 0; + sb_start_intwrite(sbi->sb); + __issue_discard_cmd(sbi, true); __wait_discard_cmd(sbi, true); + sb_end_intwrite(sbi->sb); + congestion_wait(BLK_RW_SYNC, HZ/50); } while (!kthread_should_stop()); return 0; -- cgit v1.2.3 From 12832f18b49d43473a9c59d7666887ee1d21d03c Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Mon, 31 Jul 2017 20:19:09 +0800 Subject: f2fs: support inode checksum This patch adds to support inode checksum in f2fs. Signed-off-by: Chao Yu [Jaegeuk Kim: fix verification flow] Signed-off-by: Jaegeuk Kim --- fs/f2fs/segment.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'fs/f2fs/segment.c') diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index 3573b95f4fab..af7da1b62e94 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -2294,9 +2294,12 @@ void allocate_data_block(struct f2fs_sb_info *sbi, struct page *page, mutex_unlock(&sit_i->sentry_lock); - if (page && IS_NODESEG(type)) + if (page && IS_NODESEG(type)) { fill_node_footer_blkaddr(page, NEXT_FREE_BLKADDR(sbi, curseg)); + f2fs_inode_chksum_set(sbi, page); + } + if (add_list) { struct f2fs_bio_info *io; -- cgit v1.2.3 From f18ec06e50207cb24e29523e7ad75237335d0b9c Mon Sep 17 00:00:00 2001 From: Yunlong Song Date: Wed, 2 Aug 2017 21:20:13 +0800 Subject: f2fs: update cur_valid_map_mir together with cur_valid_map When cur_valid_map passes the f2fs_test_and_set(,clear)_bit test, cur_valid_map_mir update is skipped unlikely, so fix it. The fix now changes the mirror check together with cur_valid_map all the time. Signed-off-by: Yunlong Song Signed-off-by: Chao Yu [Jaegeuk Kim: Fix unused variable and add unlikely for corner condition.] Signed-off-by: Jaegeuk Kim --- fs/f2fs/segment.c | 44 ++++++++++++++++++++++++++++++-------------- 1 file changed, 30 insertions(+), 14 deletions(-) (limited to 'fs/f2fs/segment.c') diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index af7da1b62e94..1a9737f764d1 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -1583,6 +1583,10 @@ static void update_sit_entry(struct f2fs_sb_info *sbi, block_t blkaddr, int del) struct seg_entry *se; unsigned int segno, offset; long int new_vblocks; + bool exist; +#ifdef CONFIG_F2FS_CHECK_FS + bool mir_exist; +#endif segno = GET_SEGNO(sbi, blkaddr); @@ -1599,17 +1603,23 @@ static void update_sit_entry(struct f2fs_sb_info *sbi, block_t blkaddr, int del) /* Update valid block bitmap */ if (del > 0) { - if (f2fs_test_and_set_bit(offset, se->cur_valid_map)) { + exist = f2fs_test_and_set_bit(offset, se->cur_valid_map); #ifdef CONFIG_F2FS_CHECK_FS - if (f2fs_test_and_set_bit(offset, - se->cur_valid_map_mir)) - f2fs_bug_on(sbi, 1); - else - WARN_ON(1); -#else + mir_exist = f2fs_test_and_set_bit(offset, + se->cur_valid_map_mir); + if (unlikely(exist != mir_exist)) { + f2fs_msg(sbi->sb, KERN_ERR, "Inconsistent error " + "when setting bitmap, blk:%u, old bit:%d", + blkaddr, exist); f2fs_bug_on(sbi, 1); + } #endif + if (unlikely(exist)) { + f2fs_msg(sbi->sb, KERN_ERR, + "Bitmap was wrongly set, blk:%u", blkaddr); + f2fs_bug_on(sbi, 1); } + if (f2fs_discard_en(sbi) && !f2fs_test_and_set_bit(offset, se->discard_map)) sbi->discard_blks--; @@ -1620,17 +1630,23 @@ static void update_sit_entry(struct f2fs_sb_info *sbi, block_t blkaddr, int del) se->ckpt_valid_blocks++; } } else { - if (!f2fs_test_and_clear_bit(offset, se->cur_valid_map)) { + exist = f2fs_test_and_clear_bit(offset, se->cur_valid_map); #ifdef CONFIG_F2FS_CHECK_FS - if (!f2fs_test_and_clear_bit(offset, - se->cur_valid_map_mir)) - f2fs_bug_on(sbi, 1); - else - WARN_ON(1); -#else + mir_exist = f2fs_test_and_clear_bit(offset, + se->cur_valid_map_mir); + if (unlikely(exist != mir_exist)) { + f2fs_msg(sbi->sb, KERN_ERR, "Inconsistent error " + "when clearing bitmap, blk:%u, old bit:%d", + blkaddr, exist); f2fs_bug_on(sbi, 1); + } #endif + if (unlikely(!exist)) { + f2fs_msg(sbi->sb, KERN_ERR, + "Bitmap was wrongly cleared, blk:%u", blkaddr); + f2fs_bug_on(sbi, 1); } + if (f2fs_discard_en(sbi) && f2fs_test_and_clear_bit(offset, se->discard_map)) sbi->discard_blks++; -- cgit v1.2.3 From 98407fc7a07f5b7d21fc5caaf00844f3889e77a0 Mon Sep 17 00:00:00 2001 From: Yunlong Song Date: Wed, 2 Aug 2017 22:16:54 +0800 Subject: f2fs: do not change the valid_block value if cur_valid_map was wrongly set or cleared Signed-off-by: Yunlong Song Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/segment.c | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'fs/f2fs/segment.c') diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index 1a9737f764d1..09df86430ed0 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -1618,6 +1618,8 @@ static void update_sit_entry(struct f2fs_sb_info *sbi, block_t blkaddr, int del) f2fs_msg(sbi->sb, KERN_ERR, "Bitmap was wrongly set, blk:%u", blkaddr); f2fs_bug_on(sbi, 1); + se->valid_blocks--; + del = 0; } if (f2fs_discard_en(sbi) && @@ -1645,6 +1647,8 @@ static void update_sit_entry(struct f2fs_sb_info *sbi, block_t blkaddr, int del) f2fs_msg(sbi->sb, KERN_ERR, "Bitmap was wrongly cleared, blk:%u", blkaddr); f2fs_bug_on(sbi, 1); + se->valid_blocks++; + del = 0; } if (f2fs_discard_en(sbi) && -- cgit v1.2.3 From d39f75a593462334d1baf72b67e57bd93e9a1b0d Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Wed, 2 Aug 2017 23:21:48 +0800 Subject: f2fs: add app/fs io stat This patch enables inner app/fs io stats and introduces below virtual fs nodes for exposing stats info: /sys/fs/f2fs//iostat_enable /proc/fs/f2fs//iostat_info Signed-off-by: Chao Yu [Jaegeuk Kim: fix wrong stat assignment] Signed-off-by: Jaegeuk Kim --- fs/f2fs/segment.c | 21 +++++++++++++++++++-- 1 file changed, 19 insertions(+), 2 deletions(-) (limited to 'fs/f2fs/segment.c') diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index 09df86430ed0..edc7c3d254c7 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -292,6 +292,7 @@ static int __commit_inmem_pages(struct inode *inode, .type = DATA, .op = REQ_OP_WRITE, .op_flags = REQ_SYNC | REQ_PRIO, + .io_type = FS_DATA_IO, }; pgoff_t last_idx = ULONG_MAX; int err = 0; @@ -903,6 +904,8 @@ static void __submit_discard_cmd(struct f2fs_sb_info *sbi, submit_bio(REQ_SYNC, bio); list_move_tail(&dc->list, &dcc->wait_list); __check_sit_bitmap(sbi, dc->start, dc->start + dc->len); + + f2fs_update_iostat(sbi, FS_DISCARD, 1); } } else { __remove_discard_cmd(sbi, dc); @@ -2351,7 +2354,8 @@ reallocate: } } -void write_meta_page(struct f2fs_sb_info *sbi, struct page *page) +void write_meta_page(struct f2fs_sb_info *sbi, struct page *page, + enum iostat_type io_type) { struct f2fs_io_info fio = { .sbi = sbi, @@ -2370,6 +2374,8 @@ void write_meta_page(struct f2fs_sb_info *sbi, struct page *page) set_page_writeback(page); f2fs_submit_page_write(&fio); + + f2fs_update_iostat(sbi, io_type, F2FS_BLKSIZE); } void write_node_page(unsigned int nid, struct f2fs_io_info *fio) @@ -2378,6 +2384,8 @@ void write_node_page(unsigned int nid, struct f2fs_io_info *fio) set_summary(&sum, nid, 0, 0); do_write_page(&sum, fio); + + f2fs_update_iostat(fio->sbi, fio->io_type, F2FS_BLKSIZE); } void write_data_page(struct dnode_of_data *dn, struct f2fs_io_info *fio) @@ -2391,13 +2399,22 @@ void write_data_page(struct dnode_of_data *dn, struct f2fs_io_info *fio) set_summary(&sum, dn->nid, dn->ofs_in_node, ni.version); do_write_page(&sum, fio); f2fs_update_data_blkaddr(dn, fio->new_blkaddr); + + f2fs_update_iostat(sbi, fio->io_type, F2FS_BLKSIZE); } int rewrite_data_page(struct f2fs_io_info *fio) { + int err; + fio->new_blkaddr = fio->old_blkaddr; stat_inc_inplace_blocks(fio->sbi); - return f2fs_submit_page_bio(fio); + + err = f2fs_submit_page_bio(fio); + + f2fs_update_iostat(fio->sbi, fio->io_type, F2FS_BLKSIZE); + + return err; } void __f2fs_replace_block(struct f2fs_sb_info *sbi, struct f2fs_summary *sum, -- cgit v1.2.3 From c9881425b5b16c4dd9656d6ae0b95029157ccc50 Mon Sep 17 00:00:00 2001 From: Yunlong Song Date: Fri, 4 Aug 2017 17:07:15 +0800 Subject: f2fs: fix the size value in __check_sit_bitmap The current size value is not correct and will miss bitmap check. Signed-off-by: Yunlong Song Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/segment.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) (limited to 'fs/f2fs/segment.c') diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index edc7c3d254c7..20f466ace8b0 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -868,11 +868,14 @@ void __check_sit_bitmap(struct f2fs_sb_info *sbi, sentry = get_seg_entry(sbi, segno); offset = GET_BLKOFF_FROM_SEG0(sbi, blk); - size = min((unsigned long)(end - blk), max_blocks); + if (end < START_BLOCK(sbi, segno + 1)) + size = GET_BLKOFF_FROM_SEG0(sbi, end); + else + size = max_blocks; map = (unsigned long *)(sentry->cur_valid_map); offset = __find_rev_next_bit(map, size, offset); f2fs_bug_on(sbi, offset != size); - blk += size; + blk = START_BLOCK(sbi, segno + 1); } #endif } -- cgit v1.2.3 From f542a0378dc8b18ffb09bcdbf23aa55c260d6acc Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Tue, 8 Aug 2017 19:09:08 +0800 Subject: f2fs: retry to revoke atomic commit in -ENOMEM case During atomic committing, if we encounter -ENOMEM in revoke path, it's better to give a chance to retry revoking. Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/segment.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) (limited to 'fs/f2fs/segment.c') diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index 20f466ace8b0..03849778b881 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -213,9 +213,15 @@ static int __revoke_inmem_pages(struct inode *inode, struct node_info ni; trace_f2fs_commit_inmem_page(page, INMEM_REVOKE); - +retry: set_new_dnode(&dn, inode, NULL, NULL, 0); - if (get_dnode_of_data(&dn, page->index, LOOKUP_NODE)) { + err = get_dnode_of_data(&dn, page->index, LOOKUP_NODE); + if (err) { + if (err == -ENOMEM) { + congestion_wait(BLK_RW_ASYNC, HZ/50); + cond_resched(); + goto retry; + } err = -EAGAIN; goto next; } -- cgit v1.2.3 From 4ff6d9bf5af4c74a6a69e32a630975c92f9614d1 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Mon, 7 Aug 2017 23:09:56 +0800 Subject: f2fs: introduce discard_granularity sysfs entry Commit d618ebaf0aa8 ("f2fs: enable small discard by default") enables f2fs to issue 4K size discard in real-time discard mode. However, issuing smaller discard may cost more lifetime but releasing less free space in flash device. Since f2fs has ability of separating hot/cold data and garbage collection, we can expect that small-sized invalid region would expand soon with OPU, deletion or garbage collection on valid datas, so it's better to delay or skip issuing smaller size discards, it could help to reduce overmuch consumption of IO bandwidth and lifetime of flash storage. This patch makes f2fs selectng 64K size as its default minimal granularity, and issue discard with the size which is not smaller than minimal granularity. Also it exposes discard granularity as sysfs entry for configuration in different scenario. Jaegeuk Kim: We must issue all the accumulated discard commands when fstrim is called. So, I've added pend_list_tag[] to indicate whether we should issue the commands or not. If tag sets P_ACTIVE or P_TRIM, we have to issue them. P_TRIM is set once at a time, given fstrim trigger. In addition, issue_discard_thread is calling too much due to the number of discard commands remaining in the pending list. I added a timer to control it likewise gc_thread. Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/segment.c | 91 ++++++++++++++++++++++++++++++++++++++++++++++--------- 1 file changed, 77 insertions(+), 14 deletions(-) (limited to 'fs/f2fs/segment.c') diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index 03849778b881..97d43373e10e 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -1096,32 +1096,65 @@ static int __queue_discard_cmd(struct f2fs_sb_info *sbi, return 0; } -static void __issue_discard_cmd(struct f2fs_sb_info *sbi, bool issue_cond) +static int __issue_discard_cmd(struct f2fs_sb_info *sbi, bool issue_cond) { struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info; struct list_head *pend_list; struct discard_cmd *dc, *tmp; struct blk_plug plug; - int i, iter = 0; + int iter = 0, issued = 0; + int i; mutex_lock(&dcc->cmd_lock); f2fs_bug_on(sbi, !__check_rb_tree_consistence(sbi, &dcc->root)); blk_start_plug(&plug); - for (i = MAX_PLIST_NUM - 1; i >= 0; i--) { + for (i = MAX_PLIST_NUM - 1; + i >= 0 && plist_issue(dcc->pend_list_tag[i]); i--) { pend_list = &dcc->pend_list[i]; list_for_each_entry_safe(dc, tmp, pend_list, list) { f2fs_bug_on(sbi, dc->state != D_PREP); - if (!issue_cond || is_idle(sbi)) + /* Hurry up to finish fstrim */ + if (dcc->pend_list_tag[i] & P_TRIM) { + __submit_discard_cmd(sbi, dc); + issued++; + continue; + } + + if (!issue_cond || is_idle(sbi)) { + issued++; __submit_discard_cmd(sbi, dc); + } if (issue_cond && iter++ > DISCARD_ISSUE_RATE) goto out; } + if (list_empty(pend_list) && dcc->pend_list_tag[i] & P_TRIM) + dcc->pend_list_tag[i] &= (~P_TRIM); } out: blk_finish_plug(&plug); mutex_unlock(&dcc->cmd_lock); + + return issued; +} + +static void __drop_discard_cmd(struct f2fs_sb_info *sbi) +{ + struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info; + struct list_head *pend_list; + struct discard_cmd *dc, *tmp; + int i; + + mutex_lock(&dcc->cmd_lock); + for (i = MAX_PLIST_NUM - 1; i >= 0; i--) { + pend_list = &dcc->pend_list[i]; + list_for_each_entry_safe(dc, tmp, pend_list, list) { + f2fs_bug_on(sbi, dc->state != D_PREP); + __remove_discard_cmd(sbi, dc); + } + } + mutex_unlock(&dcc->cmd_lock); } static void __wait_one_discard_bio(struct f2fs_sb_info *sbi, @@ -1206,34 +1239,56 @@ void stop_discard_thread(struct f2fs_sb_info *sbi) void f2fs_wait_discard_bios(struct f2fs_sb_info *sbi) { __issue_discard_cmd(sbi, false); + __drop_discard_cmd(sbi); __wait_discard_cmd(sbi, false); } +static void mark_discard_range_all(struct f2fs_sb_info *sbi) +{ + struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info; + int i; + + mutex_lock(&dcc->cmd_lock); + for (i = 0; i < MAX_PLIST_NUM; i++) + dcc->pend_list_tag[i] |= P_TRIM; + mutex_unlock(&dcc->cmd_lock); +} + static int issue_discard_thread(void *data) { struct f2fs_sb_info *sbi = data; struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info; wait_queue_head_t *q = &dcc->discard_wait_queue; + unsigned int wait_ms = DEF_MIN_DISCARD_ISSUE_TIME; + int issued; set_freezable(); do { - wait_event_interruptible(*q, kthread_should_stop() || - freezing(current) || - atomic_read(&dcc->discard_cmd_cnt)); + wait_event_interruptible_timeout(*q, + kthread_should_stop() || freezing(current) || + dcc->discard_wake, + msecs_to_jiffies(wait_ms)); if (try_to_freeze()) continue; if (kthread_should_stop()) return 0; + if (dcc->discard_wake) + dcc->discard_wake = 0; + sb_start_intwrite(sbi->sb); - __issue_discard_cmd(sbi, true); - __wait_discard_cmd(sbi, true); + issued = __issue_discard_cmd(sbi, true); + if (issued) { + __wait_discard_cmd(sbi, true); + wait_ms = DEF_MIN_DISCARD_ISSUE_TIME; + } else { + wait_ms = DEF_MAX_DISCARD_ISSUE_TIME; + } sb_end_intwrite(sbi->sb); - congestion_wait(BLK_RW_SYNC, HZ/50); } while (!kthread_should_stop()); return 0; } @@ -1424,7 +1479,8 @@ static void set_prefree_as_free_segments(struct f2fs_sb_info *sbi) void clear_prefree_segments(struct f2fs_sb_info *sbi, struct cp_control *cpc) { - struct list_head *head = &(SM_I(sbi)->dcc_info->entry_list); + struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info; + struct list_head *head = &dcc->entry_list; struct discard_entry *entry, *this; struct dirty_seglist_info *dirty_i = DIRTY_I(sbi); unsigned long *prefree_map = dirty_i->dirty_segmap[PRE]; @@ -1506,11 +1562,12 @@ skip: goto find_next; list_del(&entry->list); - SM_I(sbi)->dcc_info->nr_discards -= total_len; + dcc->nr_discards -= total_len; kmem_cache_free(discard_entry_slab, entry); } - wake_up(&SM_I(sbi)->dcc_info->discard_wait_queue); + dcc->discard_wake = 1; + wake_up_interruptible_all(&dcc->discard_wait_queue); } static int create_discard_cmd_control(struct f2fs_sb_info *sbi) @@ -1528,9 +1585,13 @@ static int create_discard_cmd_control(struct f2fs_sb_info *sbi) if (!dcc) return -ENOMEM; + dcc->discard_granularity = DEFAULT_DISCARD_GRANULARITY; INIT_LIST_HEAD(&dcc->entry_list); - for (i = 0; i < MAX_PLIST_NUM; i++) + for (i = 0; i < MAX_PLIST_NUM; i++) { INIT_LIST_HEAD(&dcc->pend_list[i]); + if (i >= dcc->discard_granularity - 1) + dcc->pend_list_tag[i] |= P_ACTIVE; + } INIT_LIST_HEAD(&dcc->wait_list); mutex_init(&dcc->cmd_lock); atomic_set(&dcc->issued_discard, 0); @@ -2207,6 +2268,8 @@ int f2fs_trim_fs(struct f2fs_sb_info *sbi, struct fstrim_range *range) schedule(); } + /* It's time to issue all the filed discards */ + mark_discard_range_all(sbi); out: range->len = F2FS_BLK_TO_BYTES(cpc.trimmed); return err; -- cgit v1.2.3 From 8f8b9cda392501633678aa75ed8830d31e8e79f9 Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Tue, 15 Aug 2017 21:27:19 -0700 Subject: f2fs: issue discard commands if gc_urgent is set It's time to issue all the discard commands, if user sets the idle time. Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/segment.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) (limited to 'fs/f2fs/segment.c') diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index 97d43373e10e..abfa55174d0c 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -21,6 +21,7 @@ #include "f2fs.h" #include "segment.h" #include "node.h" +#include "gc.h" #include "trace.h" #include @@ -1274,8 +1275,11 @@ static int issue_discard_thread(void *data) if (kthread_should_stop()) return 0; - if (dcc->discard_wake) + if (dcc->discard_wake) { dcc->discard_wake = 0; + if (sbi->gc_thread && sbi->gc_thread->gc_urgent) + mark_discard_range_all(sbi); + } sb_start_intwrite(sbi->sb); -- cgit v1.2.3 From 440c08fb62d22b49bcb846b1805a20441d297172 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Mon, 21 Aug 2017 22:53:45 +0800 Subject: f2fs: fix out-of-order execution in f2fs_issue_flush In f2fs_issue_flush, due to out-of-order execution of CPU, wake_up can be called before we insert issue_list, result in long latency of wait_for_completion. Fix this by adding smp_mb() to force the order of related codes. Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/segment.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'fs/f2fs/segment.c') diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index abfa55174d0c..e9416ae025aa 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -549,7 +549,10 @@ int f2fs_issue_flush(struct f2fs_sb_info *sbi) atomic_inc(&fcc->issing_flush); llist_add(&cmd.llnode, &fcc->issue_list); - if (!fcc->dispatch_list) + /* update issue_list before we wake up issue_flush thread */ + smp_mb(); + + if (waitqueue_active(&fcc->flush_wait_queue)) wake_up(&fcc->flush_wait_queue); if (fcc->f2fs_issue_flush) { -- cgit v1.2.3 From 9071bb1c094f70ab08253b9335e62b8a79e20b15 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Fri, 18 Aug 2017 23:37:36 +0800 Subject: f2fs: clear FI_HOT_DATA correctly This patch fixes to clear FI_HOT_DATA correctly in below path: - error handling in f2fs_ioc_start_atomic_write - after commit atomic write in f2fs_ioc_commit_atomic_write - after drop atomic write in drop_inmem_pages Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/segment.c | 1 + 1 file changed, 1 insertion(+) (limited to 'fs/f2fs/segment.c') diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index e9416ae025aa..78a0e8ee62b8 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -255,6 +255,7 @@ void drop_inmem_pages(struct inode *inode) mutex_unlock(&fi->inmem_lock); clear_inode_flag(inode, FI_ATOMIC_FILE); + clear_inode_flag(inode, FI_HOT_DATA); stat_dec_atomic_write(inode); } -- cgit v1.2.3 From 1e5c4e7c8dc42ce706d09ace943ab29fbe6aa6ac Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Tue, 22 Aug 2017 21:15:43 -0700 Subject: f2fs: wake up discard_thread iff there is a candidate This patch fixes to avoid needless wake ups. Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/segment.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'fs/f2fs/segment.c') diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index 78a0e8ee62b8..00253111c227 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -1574,8 +1574,7 @@ skip: kmem_cache_free(discard_entry_slab, entry); } - dcc->discard_wake = 1; - wake_up_interruptible_all(&dcc->discard_wait_queue); + wake_up_discard_thread(sbi, false); } static int create_discard_cmd_control(struct f2fs_sb_info *sbi) -- cgit v1.2.3 From bc0c8fe8b11e30ac7d881638ed422a7473827bb7 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Wed, 30 Aug 2017 18:04:48 +0800 Subject: f2fs: remove unneeded parameter of change_curseg allocate_segment_by_default is the only caller of change_curseg passing @reuse with 'false', but commit 763bfe1bc575 ("f2fs: remove reusing any prefree segments") removes the calling, after that, @reuse in change_curseg always be true, so, let's clean up the unneeded parameter. Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/segment.c | 18 ++++++++---------- 1 file changed, 8 insertions(+), 10 deletions(-) (limited to 'fs/f2fs/segment.c') diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index 00253111c227..a44c6fd2f1c5 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -2092,7 +2092,7 @@ static void __refresh_next_blkoff(struct f2fs_sb_info *sbi, * This function always allocates a used segment(from dirty seglist) by SSR * manner, so it should recover the existing segment information of valid blocks */ -static void change_curseg(struct f2fs_sb_info *sbi, int type, bool reuse) +static void change_curseg(struct f2fs_sb_info *sbi, int type) { struct dirty_seglist_info *dirty_i = DIRTY_I(sbi); struct curseg_info *curseg = CURSEG_I(sbi, type); @@ -2113,12 +2113,10 @@ static void change_curseg(struct f2fs_sb_info *sbi, int type, bool reuse) curseg->alloc_type = SSR; __next_free_blkoff(sbi, curseg, 0); - if (reuse) { - sum_page = get_sum_page(sbi, new_segno); - sum_node = (struct f2fs_summary_block *)page_address(sum_page); - memcpy(curseg->sum_blk, sum_node, SUM_ENTRY_SIZE); - f2fs_put_page(sum_page, 1); - } + sum_page = get_sum_page(sbi, new_segno); + sum_node = (struct f2fs_summary_block *)page_address(sum_page); + memcpy(curseg->sum_blk, sum_node, SUM_ENTRY_SIZE); + f2fs_put_page(sum_page, 1); } static int get_ssr_segment(struct f2fs_sb_info *sbi, int type) @@ -2182,7 +2180,7 @@ static void allocate_segment_by_default(struct f2fs_sb_info *sbi, else if (curseg->alloc_type == LFS && is_next_segment_free(sbi, type)) new_curseg(sbi, type, false); else if (need_SSR(sbi) && get_ssr_segment(sbi, type)) - change_curseg(sbi, type, true); + change_curseg(sbi, type); else new_curseg(sbi, type, false); @@ -2535,7 +2533,7 @@ void __f2fs_replace_block(struct f2fs_sb_info *sbi, struct f2fs_summary *sum, /* change the current segment */ if (segno != curseg->segno) { curseg->next_segno = segno; - change_curseg(sbi, type, true); + change_curseg(sbi, type); } curseg->next_blkoff = GET_BLKOFF_FROM_SEG0(sbi, new_blkaddr); @@ -2554,7 +2552,7 @@ void __f2fs_replace_block(struct f2fs_sb_info *sbi, struct f2fs_summary *sum, if (recover_curseg) { if (old_cursegno != curseg->segno) { curseg->next_segno = old_cursegno; - change_curseg(sbi, type, true); + change_curseg(sbi, type); } curseg->next_blkoff = old_blkoff; } -- cgit v1.2.3 From 3b8bbd990ce57815f0b0b4b60029ba8d173f5912 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Thu, 31 Aug 2017 18:56:05 +0800 Subject: f2fs: avoid race in between atomic_read & atomic_inc Previously, we will miss merging flush command during fsync due to below race condition: Thread A Thread B Thread C - f2fs_issue_flush - atomic_read(&issing_flush) - f2fs_issue_flush - atomic_read(&issing_flush) - f2fs_issue_flush - atomic_read(&issing_flush) - atomic_inc(&issing_flush) - atomic_inc(&issing_flush) - atomic_inc(&issing_flush) - submit_flush_wait - submit_flush_wait - submit_flush_wait It needs to use atomic_inc_return instead to avoid such race. Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/segment.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) (limited to 'fs/f2fs/segment.c') diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index a44c6fd2f1c5..370b4ca0e294 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -536,8 +536,7 @@ int f2fs_issue_flush(struct f2fs_sb_info *sbi) return ret; } - if (!atomic_read(&fcc->issing_flush)) { - atomic_inc(&fcc->issing_flush); + if (atomic_inc_return(&fcc->issing_flush) == 1) { ret = submit_flush_wait(sbi); atomic_dec(&fcc->issing_flush); @@ -547,7 +546,6 @@ int f2fs_issue_flush(struct f2fs_sb_info *sbi) init_completion(&cmd.wait); - atomic_inc(&fcc->issing_flush); llist_add(&cmd.llnode, &fcc->issue_list); /* update issue_list before we wake up issue_flush thread */ -- cgit v1.2.3 From ccb0b5d09d8c46c03e91c8e7a62b57fedee0662e Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Thu, 31 Aug 2017 18:56:06 +0800 Subject: f2fs: fix to wake up all sleeping flusher In scenario of remount_ro vs flush, after flush_thread exits in ->remount_fs, flusher will only clean up golbal issue_list, but without waking up flushers waiting on that list, result in hang related user threads. In order to fix this issue, this patch enables the flusher to take charge of issue_flush thread: executes merged flush command, and wake up all sleeping flushers. Fixes: 5eba8c5d1fb3 ("f2fs: fix to access nullified flush_cmd_control pointer") Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/segment.c | 23 +++++++++++++++++++++-- 1 file changed, 21 insertions(+), 2 deletions(-) (limited to 'fs/f2fs/segment.c') diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index 370b4ca0e294..9d8d32b38073 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -558,8 +558,27 @@ int f2fs_issue_flush(struct f2fs_sb_info *sbi) wait_for_completion(&cmd.wait); atomic_dec(&fcc->issing_flush); } else { - llist_del_all(&fcc->issue_list); - atomic_set(&fcc->issing_flush, 0); + struct llist_node *list; + + list = llist_del_all(&fcc->issue_list); + if (!list) { + wait_for_completion(&cmd.wait); + atomic_dec(&fcc->issing_flush); + } else { + struct flush_cmd *tmp, *next; + + ret = submit_flush_wait(sbi); + + llist_for_each_entry_safe(tmp, next, list, llnode) { + if (tmp == &cmd) { + cmd.ret = ret; + atomic_dec(&fcc->issing_flush); + continue; + } + tmp->ret = ret; + complete(&tmp->wait); + } + } } return cmd.ret; -- cgit v1.2.3 From e2cd416ffa3262e4cffb03aec48f6ad15f996f95 Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Tue, 5 Sep 2017 17:04:35 -0700 Subject: f2fs: use generic terms used for encrypted block management This patch renames functions regarding to buffer management via META_MAPPING used for encrypted blocks especially. We can actually use them in generic way. Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/segment.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'fs/f2fs/segment.c') diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index 9d8d32b38073..e95470071030 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -2608,8 +2608,7 @@ void f2fs_wait_on_page_writeback(struct page *page, } } -void f2fs_wait_on_encrypted_page_writeback(struct f2fs_sb_info *sbi, - block_t blkaddr) +void f2fs_wait_on_block_writeback(struct f2fs_sb_info *sbi, block_t blkaddr) { struct page *cpage; -- cgit v1.2.3 From ef75b9afda215c1446195bf487987af43a458959 Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Sat, 9 Sep 2017 12:03:23 -0700 Subject: f2fs: better to wait for fstrim completion In android, we'd better wait for fstrim completion instead of issuing the discard commands asynchronous. Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/segment.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) (limited to 'fs/f2fs/segment.c') diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index e95470071030..8ee473b1830f 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -17,6 +17,7 @@ #include #include #include +#include #include "f2fs.h" #include "segment.h" @@ -1141,6 +1142,9 @@ static int __issue_discard_cmd(struct f2fs_sb_info *sbi, bool issue_cond) if (dcc->pend_list_tag[i] & P_TRIM) { __submit_discard_cmd(sbi, dc); issued++; + + if (fatal_signal_pending(current)) + break; continue; } @@ -1257,7 +1261,7 @@ void stop_discard_thread(struct f2fs_sb_info *sbi) } } -/* This comes from f2fs_put_super */ +/* This comes from f2fs_put_super and f2fs_trim_fs */ void f2fs_wait_discard_bios(struct f2fs_sb_info *sbi) { __issue_discard_cmd(sbi, false); @@ -2292,6 +2296,7 @@ int f2fs_trim_fs(struct f2fs_sb_info *sbi, struct fstrim_range *range) } /* It's time to issue all the filed discards */ mark_discard_range_all(sbi); + f2fs_wait_discard_bios(sbi); out: range->len = F2FS_BLK_TO_BYTES(cpc.trimmed); return err; -- cgit v1.2.3 From 29f775fa640d4df8d5a1a0dad0b1c44143a00007 Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Sat, 9 Sep 2017 11:11:04 -0700 Subject: f2fs: speed up gc_urgent mode with SSR This patch activates SSR in gc_urgent mode. Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/segment.c | 15 +++++++++++++++ 1 file changed, 15 insertions(+) (limited to 'fs/f2fs/segment.c') diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index 8ee473b1830f..3244cfb1885f 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -169,6 +169,21 @@ found: return result - size + __reverse_ffz(tmp); } +bool need_SSR(struct f2fs_sb_info *sbi) +{ + int node_secs = get_blocktype_secs(sbi, F2FS_DIRTY_NODES); + int dent_secs = get_blocktype_secs(sbi, F2FS_DIRTY_DENTS); + int imeta_secs = get_blocktype_secs(sbi, F2FS_DIRTY_IMETA); + + if (test_opt(sbi, LFS)) + return false; + if (sbi->gc_thread && sbi->gc_thread->gc_urgent) + return true; + + return free_sections(sbi) <= (node_secs + 2 * dent_secs + imeta_secs + + 2 * reserved_sections(sbi)); +} + void register_inmem_page(struct inode *inode, struct page *page) { struct f2fs_inode_info *fi = F2FS_I(inode); -- cgit v1.2.3 From c7fd9e2b4a687666fbf12b73e443134580976606 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Tue, 12 Sep 2017 21:35:12 +0800 Subject: f2fs: hurry up to issue discard after io interruption Once we encounter I/O interruption during issuing discards, we will delay long time before next round, but if system status is I/O idle during the time, it may loses opportunity to issue discards. So this patch changes to hurry up to issue discard after io interruption. Besides, this patch also fixes to issue discards accurately with assigned rate. Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/segment.c | 17 +++++++++++++++-- 1 file changed, 15 insertions(+), 2 deletions(-) (limited to 'fs/f2fs/segment.c') diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index 3244cfb1885f..059a219b7740 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -1142,6 +1142,7 @@ static int __issue_discard_cmd(struct f2fs_sb_info *sbi, bool issue_cond) struct blk_plug plug; int iter = 0, issued = 0; int i; + bool io_interrupted = false; mutex_lock(&dcc->cmd_lock); f2fs_bug_on(sbi, @@ -1163,11 +1164,20 @@ static int __issue_discard_cmd(struct f2fs_sb_info *sbi, bool issue_cond) continue; } - if (!issue_cond || is_idle(sbi)) { + if (!issue_cond) { + __submit_discard_cmd(sbi, dc); issued++; + continue; + } + + if (is_idle(sbi)) { __submit_discard_cmd(sbi, dc); + issued++; + } else { + io_interrupted = true; } - if (issue_cond && iter++ > DISCARD_ISSUE_RATE) + + if (++iter >= DISCARD_ISSUE_RATE) goto out; } if (list_empty(pend_list) && dcc->pend_list_tag[i] & P_TRIM) @@ -1177,6 +1187,9 @@ out: blk_finish_plug(&plug); mutex_unlock(&dcc->cmd_lock); + if (!issued && io_interrupted) + issued = -1; + return issued; } -- cgit v1.2.3 From d5347b1e666dd8ef0d26fde2f4f55e7bbd987dce Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Mon, 2 Oct 2017 02:50:16 +0800 Subject: f2fs: fix potential panic during fstrim As Ju Hyung Park reported: "When 'fstrim' is called for manual trim, a BUG() can be triggered randomly with this patch. I'm seeing this issue on both x86 Desktop and arm64 Android phone. On x86 Desktop, this was caused during Ubuntu boot-up. I have a cronjob installed which calls 'fstrim -v /' during boot. On arm64 Android, this was caused during GC looping with 1ms gc_min_sleep_time & gc_max_sleep_time." Root cause of this issue is that f2fs_wait_discard_bios can only be used by f2fs_put_super, because during put_super there must be no other referrers, so it can ignore discard entry's reference count when removing the entry, otherwise in other caller we will hit bug_on in __remove_discard_cmd as there may be other issuer added reference count in discard entry. Thread A Thread B - issue_discard_thread - f2fs_ioc_fitrim - f2fs_trim_fs - f2fs_wait_discard_bios - __issue_discard_cmd - __submit_discard_cmd - __wait_discard_cmd - dc->ref++ - __wait_one_discard_bio - __wait_discard_cmd - __remove_discard_cmd - f2fs_bug_on(sbi, dc->ref) Fixes: 969d1b180d987c2be02de890d0fff0f66a0e80de Reported-by: Ju Hyung Park Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/segment.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'fs/f2fs/segment.c') diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index 059a219b7740..f5c494389483 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -1290,11 +1290,11 @@ void stop_discard_thread(struct f2fs_sb_info *sbi) } /* This comes from f2fs_put_super and f2fs_trim_fs */ -void f2fs_wait_discard_bios(struct f2fs_sb_info *sbi) +void f2fs_wait_discard_bios(struct f2fs_sb_info *sbi, bool umount) { __issue_discard_cmd(sbi, false); __drop_discard_cmd(sbi); - __wait_discard_cmd(sbi, false); + __wait_discard_cmd(sbi, !umount); } static void mark_discard_range_all(struct f2fs_sb_info *sbi) @@ -2324,7 +2324,7 @@ int f2fs_trim_fs(struct f2fs_sb_info *sbi, struct fstrim_range *range) } /* It's time to issue all the filed discards */ mark_discard_range_all(sbi); - f2fs_wait_discard_bios(sbi); + f2fs_wait_discard_bios(sbi, false); out: range->len = F2FS_BLK_TO_BYTES(cpc.trimmed); return err; -- cgit v1.2.3 From 08bb9d68d51b2946f244f77865a48b23b29af1eb Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Fri, 29 Sep 2017 13:59:38 +0800 Subject: f2fs: enhance multiple device flush When multiple device feature is enabled, during ->fsync we will issue flush in all devices to make sure node/data of the file being persisted into storage. But some flushes of device could be unneeded as file's data may be not writebacked into those devices. So this patch adds and manage bitmap per inode in global cache to indicate which device is dirty and it needs to issue flush during ->fsync, hence, we could improve performance of fsync in scenario of multiple device. Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/segment.c | 46 +++++++++++++++++++++++++++++++++++----------- 1 file changed, 35 insertions(+), 11 deletions(-) (limited to 'fs/f2fs/segment.c') diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index f5c494389483..5351caa2ffd9 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -313,6 +313,7 @@ static int __commit_inmem_pages(struct inode *inode, struct inmem_pages *cur, *tmp; struct f2fs_io_info fio = { .sbi = sbi, + .ino = inode->i_ino, .type = DATA, .op = REQ_OP_WRITE, .op_flags = REQ_SYNC | REQ_PRIO, @@ -485,15 +486,17 @@ static int __submit_flush_wait(struct f2fs_sb_info *sbi, return ret; } -static int submit_flush_wait(struct f2fs_sb_info *sbi) +static int submit_flush_wait(struct f2fs_sb_info *sbi, nid_t ino) { - int ret = __submit_flush_wait(sbi, sbi->sb->s_bdev); + int ret = 0; int i; - if (!sbi->s_ndevs || ret) - return ret; + if (!sbi->s_ndevs) + return __submit_flush_wait(sbi, sbi->sb->s_bdev); - for (i = 1; i < sbi->s_ndevs; i++) { + for (i = 0; i < sbi->s_ndevs; i++) { + if (!is_dirty_device(sbi, ino, i, FLUSH_INO)) + continue; ret = __submit_flush_wait(sbi, FDEV(i).bdev); if (ret) break; @@ -519,7 +522,9 @@ repeat: fcc->dispatch_list = llist_del_all(&fcc->issue_list); fcc->dispatch_list = llist_reverse_order(fcc->dispatch_list); - ret = submit_flush_wait(sbi); + cmd = llist_entry(fcc->dispatch_list, struct flush_cmd, llnode); + + ret = submit_flush_wait(sbi, cmd->ino); atomic_inc(&fcc->issued_flush); llist_for_each_entry_safe(cmd, next, @@ -537,7 +542,7 @@ repeat: goto repeat; } -int f2fs_issue_flush(struct f2fs_sb_info *sbi) +int f2fs_issue_flush(struct f2fs_sb_info *sbi, nid_t ino) { struct flush_cmd_control *fcc = SM_I(sbi)->fcc_info; struct flush_cmd cmd; @@ -547,19 +552,20 @@ int f2fs_issue_flush(struct f2fs_sb_info *sbi) return 0; if (!test_opt(sbi, FLUSH_MERGE)) { - ret = submit_flush_wait(sbi); + ret = submit_flush_wait(sbi, ino); atomic_inc(&fcc->issued_flush); return ret; } - if (atomic_inc_return(&fcc->issing_flush) == 1) { - ret = submit_flush_wait(sbi); + if (atomic_inc_return(&fcc->issing_flush) == 1 || sbi->s_ndevs > 1) { + ret = submit_flush_wait(sbi, ino); atomic_dec(&fcc->issing_flush); atomic_inc(&fcc->issued_flush); return ret; } + cmd.ino = ino; init_completion(&cmd.wait); llist_add(&cmd.llnode, &fcc->issue_list); @@ -583,7 +589,7 @@ int f2fs_issue_flush(struct f2fs_sb_info *sbi) } else { struct flush_cmd *tmp, *next; - ret = submit_flush_wait(sbi); + ret = submit_flush_wait(sbi, ino); llist_for_each_entry_safe(tmp, next, list, llnode) { if (tmp == &cmd) { @@ -2464,6 +2470,20 @@ void allocate_data_block(struct f2fs_sb_info *sbi, struct page *page, mutex_unlock(&curseg->curseg_mutex); } +static void update_device_state(struct f2fs_io_info *fio) +{ + struct f2fs_sb_info *sbi = fio->sbi; + unsigned int devidx; + + if (!sbi->s_ndevs) + return; + + devidx = f2fs_target_device_index(sbi, fio->new_blkaddr); + + /* update device state for fsync */ + set_dirty_device(sbi, fio->ino, devidx, FLUSH_INO); +} + static void do_write_page(struct f2fs_summary *sum, struct f2fs_io_info *fio) { int type = __get_segment_type(fio); @@ -2478,6 +2498,8 @@ reallocate: if (err == -EAGAIN) { fio->old_blkaddr = fio->new_blkaddr; goto reallocate; + } else if (!err) { + update_device_state(fio); } } @@ -2538,6 +2560,8 @@ int rewrite_data_page(struct f2fs_io_info *fio) stat_inc_inplace_blocks(fio->sbi); err = f2fs_submit_page_bio(fio); + if (!err) + update_device_state(fio); f2fs_update_iostat(fio->sbi, fio->io_type, F2FS_BLKSIZE); -- cgit v1.2.3 From 27eaad09380fe2f1fd8dbfb1e3e7ae6afd70ca80 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Fri, 29 Sep 2017 13:59:39 +0800 Subject: f2fs: fix to flush multiple device in checkpoint If f2fs manages multiple devices, in checkpoint, we need to issue flush in those devices which contain dirty data/node in their cache before we write checkpoint region, otherwise, filesystem metadata could be corrupted if hitting SPO after checkpoint. Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/segment.c | 29 +++++++++++++++++++++++++++++ 1 file changed, 29 insertions(+) (limited to 'fs/f2fs/segment.c') diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index 5351caa2ffd9..c009bdff2ff6 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -659,6 +659,28 @@ void destroy_flush_cmd_control(struct f2fs_sb_info *sbi, bool free) } } +int f2fs_flush_device_cache(struct f2fs_sb_info *sbi) +{ + int ret = 0, i; + + if (!sbi->s_ndevs) + return 0; + + for (i = 1; i < sbi->s_ndevs; i++) { + if (!f2fs_test_bit(i, (char *)&sbi->dirty_device)) + continue; + ret = __submit_flush_wait(sbi, FDEV(i).bdev); + if (ret) + break; + + spin_lock(&sbi->dev_lock); + f2fs_clear_bit(i, (char *)&sbi->dirty_device); + spin_unlock(&sbi->dev_lock); + } + + return ret; +} + static void __locate_dirty_segment(struct f2fs_sb_info *sbi, unsigned int segno, enum dirty_type dirty_type) { @@ -2482,6 +2504,13 @@ static void update_device_state(struct f2fs_io_info *fio) /* update device state for fsync */ set_dirty_device(sbi, fio->ino, devidx, FLUSH_INO); + + /* update device state for checkpoint */ + if (!f2fs_test_bit(devidx, (char *)&sbi->dirty_device)) { + spin_lock(&sbi->dev_lock); + f2fs_set_bit(devidx, (char *)&sbi->dirty_device); + spin_unlock(&sbi->dev_lock); + } } static void do_write_page(struct f2fs_summary *sum, struct f2fs_io_info *fio) -- cgit v1.2.3 From 684447dad1385fef8a1c2bfaff770860b0beddc2 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Wed, 4 Oct 2017 09:08:32 +0800 Subject: f2fs: support issuing/waiting discard in range Fstrim intends to trim invalid blocks of filesystem only with specified range and granularity, but actually, it will issue all previous cached discard commands which may be out-of-range and be with unmatched granularity, it's unneeded. In order to fix above issues, this patch introduces new helps to support to issue and wait discard in range and adds a new fstrim_list for tracking in-flight discard from ->fstrim. Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/segment.c | 127 +++++++++++++++++++++++++++++++++++++++++++++--------- 1 file changed, 106 insertions(+), 21 deletions(-) (limited to 'fs/f2fs/segment.c') diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index c009bdff2ff6..8bdc31d1c847 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -954,9 +954,11 @@ void __check_sit_bitmap(struct f2fs_sb_info *sbi, /* this function is copied from blkdev_issue_discard from block/blk-lib.c */ static void __submit_discard_cmd(struct f2fs_sb_info *sbi, - struct discard_cmd *dc) + struct discard_cmd *dc, bool fstrim) { struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info; + struct list_head *wait_list = fstrim ? &(dcc->fstrim_list) : + &(dcc->wait_list); struct bio *bio = NULL; if (dc->state != D_PREP) @@ -977,7 +979,7 @@ static void __submit_discard_cmd(struct f2fs_sb_info *sbi, bio->bi_private = dc; bio->bi_end_io = f2fs_submit_discard_endio; submit_bio(REQ_SYNC, bio); - list_move_tail(&dc->list, &dcc->wait_list); + list_move_tail(&dc->list, wait_list); __check_sit_bitmap(sbi, dc->start, dc->start + dc->len); f2fs_update_iostat(sbi, FS_DISCARD, 1); @@ -1162,6 +1164,68 @@ static int __queue_discard_cmd(struct f2fs_sb_info *sbi, return 0; } +static void __issue_discard_cmd_range(struct f2fs_sb_info *sbi, + unsigned int start, unsigned int end, + unsigned int granularity) +{ + struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info; + struct discard_cmd *prev_dc = NULL, *next_dc = NULL; + struct rb_node **insert_p = NULL, *insert_parent = NULL; + struct discard_cmd *dc; + struct blk_plug plug; + int issued; + +next: + issued = 0; + + mutex_lock(&dcc->cmd_lock); + f2fs_bug_on(sbi, !__check_rb_tree_consistence(sbi, &dcc->root)); + + dc = (struct discard_cmd *)__lookup_rb_tree_ret(&dcc->root, + NULL, start, + (struct rb_entry **)&prev_dc, + (struct rb_entry **)&next_dc, + &insert_p, &insert_parent, true); + if (!dc) + dc = next_dc; + + blk_start_plug(&plug); + + while (dc && dc->lstart <= end) { + struct rb_node *node; + + if (dc->len < granularity) + goto skip; + + if (dc->state != D_PREP) { + list_move_tail(&dc->list, &dcc->fstrim_list); + goto skip; + } + + __submit_discard_cmd(sbi, dc, true); + + if (++issued >= DISCARD_ISSUE_RATE) { + start = dc->lstart + dc->len; + + blk_finish_plug(&plug); + mutex_unlock(&dcc->cmd_lock); + + schedule(); + + goto next; + } +skip: + node = rb_next(&dc->rb_node); + dc = rb_entry_safe(node, struct discard_cmd, rb_node); + + if (fatal_signal_pending(current)) + break; + } + + blk_finish_plug(&plug); + mutex_unlock(&dcc->cmd_lock); +} + static int __issue_discard_cmd(struct f2fs_sb_info *sbi, bool issue_cond) { struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info; @@ -1184,22 +1248,19 @@ static int __issue_discard_cmd(struct f2fs_sb_info *sbi, bool issue_cond) /* Hurry up to finish fstrim */ if (dcc->pend_list_tag[i] & P_TRIM) { - __submit_discard_cmd(sbi, dc); + __submit_discard_cmd(sbi, dc, false); issued++; - - if (fatal_signal_pending(current)) - break; continue; } if (!issue_cond) { - __submit_discard_cmd(sbi, dc); + __submit_discard_cmd(sbi, dc, false); issued++; continue; } if (is_idle(sbi)) { - __submit_discard_cmd(sbi, dc); + __submit_discard_cmd(sbi, dc, false); issued++; } else { io_interrupted = true; @@ -1253,10 +1314,14 @@ static void __wait_one_discard_bio(struct f2fs_sb_info *sbi, mutex_unlock(&dcc->cmd_lock); } -static void __wait_discard_cmd(struct f2fs_sb_info *sbi, bool wait_cond) +static void __wait_discard_cmd_range(struct f2fs_sb_info *sbi, bool wait_cond, + block_t start, block_t end, + unsigned int granularity, + bool fstrim) { struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info; - struct list_head *wait_list = &(dcc->wait_list); + struct list_head *wait_list = fstrim ? &(dcc->fstrim_list) : + &(dcc->wait_list); struct discard_cmd *dc, *tmp; bool need_wait; @@ -1265,6 +1330,10 @@ next: mutex_lock(&dcc->cmd_lock); list_for_each_entry_safe(dc, tmp, wait_list, list) { + if (dc->lstart + dc->len <= start || end <= dc->lstart) + continue; + if (dc->len < granularity) + continue; if (!wait_cond || (dc->state == D_DONE && !dc->ref)) { wait_for_completion_io(&dc->wait); __remove_discard_cmd(sbi, dc); @@ -1282,6 +1351,11 @@ next: } } +static void __wait_all_discard_cmd(struct f2fs_sb_info *sbi, bool wait_cond) +{ + __wait_discard_cmd_range(sbi, wait_cond, 0, UINT_MAX, 1, false); +} + /* This should be covered by global mutex, &sit_i->sentry_lock */ void f2fs_wait_discard_bio(struct f2fs_sb_info *sbi, block_t blkaddr) { @@ -1317,12 +1391,12 @@ void stop_discard_thread(struct f2fs_sb_info *sbi) } } -/* This comes from f2fs_put_super and f2fs_trim_fs */ -void f2fs_wait_discard_bios(struct f2fs_sb_info *sbi, bool umount) +/* This comes from f2fs_put_super */ +void f2fs_wait_discard_bios(struct f2fs_sb_info *sbi) { __issue_discard_cmd(sbi, false); __drop_discard_cmd(sbi); - __wait_discard_cmd(sbi, !umount); + __wait_all_discard_cmd(sbi, false); } static void mark_discard_range_all(struct f2fs_sb_info *sbi) @@ -1366,7 +1440,7 @@ static int issue_discard_thread(void *data) issued = __issue_discard_cmd(sbi, true); if (issued) { - __wait_discard_cmd(sbi, true); + __wait_all_discard_cmd(sbi, true); wait_ms = DEF_MIN_DISCARD_ISSUE_TIME; } else { wait_ms = DEF_MAX_DISCARD_ISSUE_TIME; @@ -1677,6 +1751,7 @@ static int create_discard_cmd_control(struct f2fs_sb_info *sbi) dcc->pend_list_tag[i] |= P_ACTIVE; } INIT_LIST_HEAD(&dcc->wait_list); + INIT_LIST_HEAD(&dcc->fstrim_list); mutex_init(&dcc->cmd_lock); atomic_set(&dcc->issued_discard, 0); atomic_set(&dcc->issing_discard, 0); @@ -2304,7 +2379,8 @@ int f2fs_trim_fs(struct f2fs_sb_info *sbi, struct fstrim_range *range) { __u64 start = F2FS_BYTES_TO_BLK(range->start); __u64 end = start + F2FS_BYTES_TO_BLK(range->len) - 1; - unsigned int start_segno, end_segno; + unsigned int start_segno, end_segno, cur_segno; + block_t start_block, end_block; struct cp_control cpc; int err = 0; @@ -2325,12 +2401,17 @@ int f2fs_trim_fs(struct f2fs_sb_info *sbi, struct fstrim_range *range) start_segno = (start <= MAIN_BLKADDR(sbi)) ? 0 : GET_SEGNO(sbi, start); end_segno = (end >= MAX_BLKADDR(sbi)) ? MAIN_SEGS(sbi) - 1 : GET_SEGNO(sbi, end); + + start_block = START_BLOCK(sbi, start_segno); + end_block = START_BLOCK(sbi, end_segno + 1); + cpc.reason = CP_DISCARD; cpc.trim_minlen = max_t(__u64, 1, F2FS_BYTES_TO_BLK(range->minlen)); /* do checkpoint to issue discard commands safely */ - for (; start_segno <= end_segno; start_segno = cpc.trim_end + 1) { - cpc.trim_start = start_segno; + for (cur_segno = start_segno; cur_segno <= end_segno; + cur_segno = cpc.trim_end + 1) { + cpc.trim_start = cur_segno; if (sbi->discard_blks == 0) break; @@ -2338,7 +2419,7 @@ int f2fs_trim_fs(struct f2fs_sb_info *sbi, struct fstrim_range *range) cpc.trim_end = end_segno; else cpc.trim_end = min_t(unsigned int, - rounddown(start_segno + + rounddown(cur_segno + BATCHED_TRIM_SEGMENTS(sbi), sbi->segs_per_sec) - 1, end_segno); @@ -2350,9 +2431,13 @@ int f2fs_trim_fs(struct f2fs_sb_info *sbi, struct fstrim_range *range) schedule(); } - /* It's time to issue all the filed discards */ - mark_discard_range_all(sbi); - f2fs_wait_discard_bios(sbi, false); + + start_block = START_BLOCK(sbi, start_segno); + end_block = START_BLOCK(sbi, min(cur_segno, end_segno) + 1); + + __issue_discard_cmd_range(sbi, start_block, end_block, cpc.trim_minlen); + __wait_discard_cmd_range(sbi, true, start_block, end_block, + cpc.trim_minlen, true); out: range->len = F2FS_BLK_TO_BYTES(cpc.trimmed); return err; -- cgit v1.2.3 From 1e65afd14d32eb318caaebf16f5797e2c723fa20 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Wed, 4 Oct 2017 09:08:33 +0800 Subject: f2fs: wrap discard policy This patch wraps scattered optional parameters into discard policy as below, later, with it we expect that we can adjust these parameters with proper strategy in different scenario. struct discard_policy { unsigned int min_interval; /* used for candidates exist */ unsigned int max_interval; /* used for candidates not exist */ unsigned int max_requests; /* # of discards issued per round */ unsigned int io_aware_gran; /* minimum granularity discard not be aware of I/O */ bool io_aware; /* issue discard in idle time */ bool sync; /* submit discard with REQ_SYNC flag */ }; This patch doesn't change any logic of codes. Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/segment.c | 38 +++++++++++++++++++++++++++++--------- 1 file changed, 29 insertions(+), 9 deletions(-) (limited to 'fs/f2fs/segment.c') diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index 8bdc31d1c847..c1d648a7d214 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -960,6 +960,7 @@ static void __submit_discard_cmd(struct f2fs_sb_info *sbi, struct list_head *wait_list = fstrim ? &(dcc->fstrim_list) : &(dcc->wait_list); struct bio *bio = NULL; + int flag = dcc->dpolicy.sync ? REQ_SYNC : 0; if (dc->state != D_PREP) return; @@ -978,7 +979,7 @@ static void __submit_discard_cmd(struct f2fs_sb_info *sbi, if (bio) { bio->bi_private = dc; bio->bi_end_io = f2fs_submit_discard_endio; - submit_bio(REQ_SYNC, bio); + submit_bio(flag, bio); list_move_tail(&dc->list, wait_list); __check_sit_bitmap(sbi, dc->start, dc->start + dc->len); @@ -1172,6 +1173,7 @@ static void __issue_discard_cmd_range(struct f2fs_sb_info *sbi, struct discard_cmd *prev_dc = NULL, *next_dc = NULL; struct rb_node **insert_p = NULL, *insert_parent = NULL; struct discard_cmd *dc; + struct discard_policy *dpolicy = &dcc->dpolicy; struct blk_plug plug; int issued; @@ -1204,7 +1206,7 @@ next: __submit_discard_cmd(sbi, dc, true); - if (++issued >= DISCARD_ISSUE_RATE) { + if (++issued >= dpolicy->max_requests) { start = dc->lstart + dc->len; blk_finish_plug(&plug); @@ -1232,6 +1234,7 @@ static int __issue_discard_cmd(struct f2fs_sb_info *sbi, bool issue_cond) struct list_head *pend_list; struct discard_cmd *dc, *tmp; struct blk_plug plug; + struct discard_policy *dpolicy = &dcc->dpolicy; int iter = 0, issued = 0; int i; bool io_interrupted = false; @@ -1259,14 +1262,16 @@ static int __issue_discard_cmd(struct f2fs_sb_info *sbi, bool issue_cond) continue; } - if (is_idle(sbi)) { - __submit_discard_cmd(sbi, dc, false); - issued++; - } else { + if (dpolicy->io_aware && i < dpolicy->io_aware_gran && + !is_idle(sbi)) { io_interrupted = true; + goto skip; } - if (++iter >= DISCARD_ISSUE_RATE) + __submit_discard_cmd(sbi, dc, false); + issued++; +skip: + if (++iter >= dpolicy->max_requests) goto out; } if (list_empty(pend_list) && dcc->pend_list_tag[i] & P_TRIM) @@ -1415,6 +1420,7 @@ static int issue_discard_thread(void *data) struct f2fs_sb_info *sbi = data; struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info; wait_queue_head_t *q = &dcc->discard_wait_queue; + struct discard_policy *dpolicy = &dcc->dpolicy; unsigned int wait_ms = DEF_MIN_DISCARD_ISSUE_TIME; int issued; @@ -1441,9 +1447,9 @@ static int issue_discard_thread(void *data) issued = __issue_discard_cmd(sbi, true); if (issued) { __wait_all_discard_cmd(sbi, true); - wait_ms = DEF_MIN_DISCARD_ISSUE_TIME; + wait_ms = dpolicy->min_interval; } else { - wait_ms = DEF_MAX_DISCARD_ISSUE_TIME; + wait_ms = dpolicy->max_interval; } sb_end_intwrite(sbi->sb); @@ -1728,6 +1734,18 @@ skip: wake_up_discard_thread(sbi, false); } +static void inline init_discard_policy(struct discard_cmd_control *dcc) +{ + struct discard_policy *dpolicy = &dcc->dpolicy; + + dpolicy->min_interval = DEF_MIN_DISCARD_ISSUE_TIME; + dpolicy->max_interval = DEF_MAX_DISCARD_ISSUE_TIME; + dpolicy->max_requests = DEF_MAX_DISCARD_REQUEST; + dpolicy->io_aware_gran = MAX_PLIST_NUM; + dpolicy->io_aware = true; + dpolicy->sync = true; +} + static int create_discard_cmd_control(struct f2fs_sb_info *sbi) { dev_t dev = sbi->sb->s_bdev->bd_dev; @@ -1761,6 +1779,8 @@ static int create_discard_cmd_control(struct f2fs_sb_info *sbi) dcc->undiscard_blks = 0; dcc->root = RB_ROOT; + init_discard_policy(dcc); + init_waitqueue_head(&dcc->discard_wait_queue); SM_I(sbi)->dcc_info = dcc; init_thread: -- cgit v1.2.3 From a34ab5ca4f94543741fa304c4cb2095f0bc82898 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Wed, 4 Oct 2017 09:08:34 +0800 Subject: f2fs: split discard policy There are many different scenarios such as fstrim, umount, urgent or background where we will issue discards, actually, they need use different policy in aspect of io aware, discard granularity, delay interval and so on. But now they just share one common discard policy, so there will be race when changing policy in between these scenarios, the interference of changing discard policy will be very serious. This patch changes to split discard policy for different scenarios. Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/segment.c | 149 ++++++++++++++++++++++++++---------------------------- 1 file changed, 73 insertions(+), 76 deletions(-) (limited to 'fs/f2fs/segment.c') diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index c1d648a7d214..f1dbf8d5574e 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -954,13 +954,14 @@ void __check_sit_bitmap(struct f2fs_sb_info *sbi, /* this function is copied from blkdev_issue_discard from block/blk-lib.c */ static void __submit_discard_cmd(struct f2fs_sb_info *sbi, - struct discard_cmd *dc, bool fstrim) + struct discard_policy *dpolicy, + struct discard_cmd *dc) { struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info; - struct list_head *wait_list = fstrim ? &(dcc->fstrim_list) : - &(dcc->wait_list); + struct list_head *wait_list = (dpolicy->type == DPOLICY_FSTRIM) ? + &(dcc->fstrim_list) : &(dcc->wait_list); struct bio *bio = NULL; - int flag = dcc->dpolicy.sync ? REQ_SYNC : 0; + int flag = dpolicy->sync ? REQ_SYNC : 0; if (dc->state != D_PREP) return; @@ -1166,14 +1167,13 @@ static int __queue_discard_cmd(struct f2fs_sb_info *sbi, } static void __issue_discard_cmd_range(struct f2fs_sb_info *sbi, - unsigned int start, unsigned int end, - unsigned int granularity) + struct discard_policy *dpolicy, + unsigned int start, unsigned int end) { struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info; struct discard_cmd *prev_dc = NULL, *next_dc = NULL; struct rb_node **insert_p = NULL, *insert_parent = NULL; struct discard_cmd *dc; - struct discard_policy *dpolicy = &dcc->dpolicy; struct blk_plug plug; int issued; @@ -1196,7 +1196,7 @@ next: while (dc && dc->lstart <= end) { struct rb_node *node; - if (dc->len < granularity) + if (dc->len < dpolicy->granularity) goto skip; if (dc->state != D_PREP) { @@ -1204,7 +1204,7 @@ next: goto skip; } - __submit_discard_cmd(sbi, dc, true); + __submit_discard_cmd(sbi, dpolicy, dc); if (++issued >= dpolicy->max_requests) { start = dc->lstart + dc->len; @@ -1228,54 +1228,39 @@ skip: mutex_unlock(&dcc->cmd_lock); } -static int __issue_discard_cmd(struct f2fs_sb_info *sbi, bool issue_cond) +static int __issue_discard_cmd(struct f2fs_sb_info *sbi, + struct discard_policy *dpolicy) { struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info; struct list_head *pend_list; struct discard_cmd *dc, *tmp; struct blk_plug plug; - struct discard_policy *dpolicy = &dcc->dpolicy; - int iter = 0, issued = 0; - int i; + int i, iter = 0, issued = 0; bool io_interrupted = false; mutex_lock(&dcc->cmd_lock); f2fs_bug_on(sbi, !__check_rb_tree_consistence(sbi, &dcc->root)); blk_start_plug(&plug); - for (i = MAX_PLIST_NUM - 1; - i >= 0 && plist_issue(dcc->pend_list_tag[i]); i--) { + for (i = MAX_PLIST_NUM - 1; i >= 0; i--) { + if (i + 1 < dpolicy->granularity) + break; pend_list = &dcc->pend_list[i]; list_for_each_entry_safe(dc, tmp, pend_list, list) { f2fs_bug_on(sbi, dc->state != D_PREP); - /* Hurry up to finish fstrim */ - if (dcc->pend_list_tag[i] & P_TRIM) { - __submit_discard_cmd(sbi, dc, false); - issued++; - continue; - } - - if (!issue_cond) { - __submit_discard_cmd(sbi, dc, false); - issued++; - continue; - } - if (dpolicy->io_aware && i < dpolicy->io_aware_gran && !is_idle(sbi)) { io_interrupted = true; goto skip; } - __submit_discard_cmd(sbi, dc, false); + __submit_discard_cmd(sbi, dpolicy, dc); issued++; skip: if (++iter >= dpolicy->max_requests) goto out; } - if (list_empty(pend_list) && dcc->pend_list_tag[i] & P_TRIM) - dcc->pend_list_tag[i] &= (~P_TRIM); } out: blk_finish_plug(&plug); @@ -1319,14 +1304,13 @@ static void __wait_one_discard_bio(struct f2fs_sb_info *sbi, mutex_unlock(&dcc->cmd_lock); } -static void __wait_discard_cmd_range(struct f2fs_sb_info *sbi, bool wait_cond, - block_t start, block_t end, - unsigned int granularity, - bool fstrim) +static void __wait_discard_cmd_range(struct f2fs_sb_info *sbi, + struct discard_policy *dpolicy, + block_t start, block_t end) { struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info; - struct list_head *wait_list = fstrim ? &(dcc->fstrim_list) : - &(dcc->wait_list); + struct list_head *wait_list = (dpolicy->type == DPOLICY_FSTRIM) ? + &(dcc->fstrim_list) : &(dcc->wait_list); struct discard_cmd *dc, *tmp; bool need_wait; @@ -1337,9 +1321,9 @@ next: list_for_each_entry_safe(dc, tmp, wait_list, list) { if (dc->lstart + dc->len <= start || end <= dc->lstart) continue; - if (dc->len < granularity) + if (dc->len < dpolicy->granularity) continue; - if (!wait_cond || (dc->state == D_DONE && !dc->ref)) { + if (dc->state == D_DONE && !dc->ref) { wait_for_completion_io(&dc->wait); __remove_discard_cmd(sbi, dc); } else { @@ -1356,9 +1340,10 @@ next: } } -static void __wait_all_discard_cmd(struct f2fs_sb_info *sbi, bool wait_cond) +static void __wait_all_discard_cmd(struct f2fs_sb_info *sbi, + struct discard_policy *dpolicy) { - __wait_discard_cmd_range(sbi, wait_cond, 0, UINT_MAX, 1, false); + __wait_discard_cmd_range(sbi, dpolicy, 0, UINT_MAX); } /* This should be covered by global mutex, &sit_i->sentry_lock */ @@ -1398,21 +1383,14 @@ void stop_discard_thread(struct f2fs_sb_info *sbi) /* This comes from f2fs_put_super */ void f2fs_wait_discard_bios(struct f2fs_sb_info *sbi) -{ - __issue_discard_cmd(sbi, false); - __drop_discard_cmd(sbi); - __wait_all_discard_cmd(sbi, false); -} - -static void mark_discard_range_all(struct f2fs_sb_info *sbi) { struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info; - int i; + struct discard_policy dpolicy; - mutex_lock(&dcc->cmd_lock); - for (i = 0; i < MAX_PLIST_NUM; i++) - dcc->pend_list_tag[i] |= P_TRIM; - mutex_unlock(&dcc->cmd_lock); + init_discard_policy(&dpolicy, DPOLICY_UMOUNT, dcc->discard_granularity); + __issue_discard_cmd(sbi, &dpolicy); + __drop_discard_cmd(sbi); + __wait_all_discard_cmd(sbi, &dpolicy); } static int issue_discard_thread(void *data) @@ -1420,13 +1398,16 @@ static int issue_discard_thread(void *data) struct f2fs_sb_info *sbi = data; struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info; wait_queue_head_t *q = &dcc->discard_wait_queue; - struct discard_policy *dpolicy = &dcc->dpolicy; + struct discard_policy dpolicy; unsigned int wait_ms = DEF_MIN_DISCARD_ISSUE_TIME; int issued; set_freezable(); do { + init_discard_policy(&dpolicy, DPOLICY_BG, + dcc->discard_granularity); + wait_event_interruptible_timeout(*q, kthread_should_stop() || freezing(current) || dcc->discard_wake, @@ -1439,17 +1420,18 @@ static int issue_discard_thread(void *data) if (dcc->discard_wake) { dcc->discard_wake = 0; if (sbi->gc_thread && sbi->gc_thread->gc_urgent) - mark_discard_range_all(sbi); + init_discard_policy(&dpolicy, + DPOLICY_FORCE, 1); } sb_start_intwrite(sbi->sb); - issued = __issue_discard_cmd(sbi, true); + issued = __issue_discard_cmd(sbi, &dpolicy); if (issued) { - __wait_all_discard_cmd(sbi, true); - wait_ms = dpolicy->min_interval; + __wait_all_discard_cmd(sbi, &dpolicy); + wait_ms = dpolicy.min_interval; } else { - wait_ms = dpolicy->max_interval; + wait_ms = dpolicy.max_interval; } sb_end_intwrite(sbi->sb); @@ -1734,16 +1716,35 @@ skip: wake_up_discard_thread(sbi, false); } -static void inline init_discard_policy(struct discard_cmd_control *dcc) +void init_discard_policy(struct discard_policy *dpolicy, + int discard_type, unsigned int granularity) { - struct discard_policy *dpolicy = &dcc->dpolicy; - - dpolicy->min_interval = DEF_MIN_DISCARD_ISSUE_TIME; - dpolicy->max_interval = DEF_MAX_DISCARD_ISSUE_TIME; - dpolicy->max_requests = DEF_MAX_DISCARD_REQUEST; - dpolicy->io_aware_gran = MAX_PLIST_NUM; - dpolicy->io_aware = true; + /* common policy */ + dpolicy->type = discard_type; dpolicy->sync = true; + dpolicy->granularity = granularity; + + if (discard_type == DPOLICY_BG) { + dpolicy->min_interval = DEF_MIN_DISCARD_ISSUE_TIME; + dpolicy->max_interval = DEF_MAX_DISCARD_ISSUE_TIME; + dpolicy->max_requests = DEF_MAX_DISCARD_REQUEST; + dpolicy->io_aware_gran = MAX_PLIST_NUM; + dpolicy->io_aware = true; + } else if (discard_type == DPOLICY_FORCE) { + dpolicy->min_interval = DEF_MIN_DISCARD_ISSUE_TIME; + dpolicy->max_interval = DEF_MAX_DISCARD_ISSUE_TIME; + dpolicy->max_requests = DEF_MAX_DISCARD_REQUEST; + dpolicy->io_aware_gran = MAX_PLIST_NUM; + dpolicy->io_aware = true; + } else if (discard_type == DPOLICY_FSTRIM) { + dpolicy->max_requests = DEF_MAX_DISCARD_REQUEST; + dpolicy->io_aware_gran = MAX_PLIST_NUM; + dpolicy->io_aware = false; + } else if (discard_type == DPOLICY_UMOUNT) { + dpolicy->max_requests = DEF_MAX_DISCARD_REQUEST; + dpolicy->io_aware_gran = MAX_PLIST_NUM; + dpolicy->io_aware = false; + } } static int create_discard_cmd_control(struct f2fs_sb_info *sbi) @@ -1763,11 +1764,8 @@ static int create_discard_cmd_control(struct f2fs_sb_info *sbi) dcc->discard_granularity = DEFAULT_DISCARD_GRANULARITY; INIT_LIST_HEAD(&dcc->entry_list); - for (i = 0; i < MAX_PLIST_NUM; i++) { + for (i = 0; i < MAX_PLIST_NUM; i++) INIT_LIST_HEAD(&dcc->pend_list[i]); - if (i >= dcc->discard_granularity - 1) - dcc->pend_list_tag[i] |= P_ACTIVE; - } INIT_LIST_HEAD(&dcc->wait_list); INIT_LIST_HEAD(&dcc->fstrim_list); mutex_init(&dcc->cmd_lock); @@ -1779,8 +1777,6 @@ static int create_discard_cmd_control(struct f2fs_sb_info *sbi) dcc->undiscard_blks = 0; dcc->root = RB_ROOT; - init_discard_policy(dcc); - init_waitqueue_head(&dcc->discard_wait_queue); SM_I(sbi)->dcc_info = dcc; init_thread: @@ -2402,6 +2398,7 @@ int f2fs_trim_fs(struct f2fs_sb_info *sbi, struct fstrim_range *range) unsigned int start_segno, end_segno, cur_segno; block_t start_block, end_block; struct cp_control cpc; + struct discard_policy dpolicy; int err = 0; if (start >= MAX_BLKADDR(sbi) || range->len < sbi->blocksize) @@ -2455,9 +2452,9 @@ int f2fs_trim_fs(struct f2fs_sb_info *sbi, struct fstrim_range *range) start_block = START_BLOCK(sbi, start_segno); end_block = START_BLOCK(sbi, min(cur_segno, end_segno) + 1); - __issue_discard_cmd_range(sbi, start_block, end_block, cpc.trim_minlen); - __wait_discard_cmd_range(sbi, true, start_block, end_block, - cpc.trim_minlen, true); + init_discard_policy(&dpolicy, DPOLICY_FSTRIM, cpc.trim_minlen); + __issue_discard_cmd_range(sbi, &dpolicy, start_block, end_block); + __wait_discard_cmd_range(sbi, &dpolicy, start_block, end_block); out: range->len = F2FS_BLK_TO_BYTES(cpc.trimmed); return err; -- cgit v1.2.3 From bd502c6e3e7a59aaf28b6d065384bd90f40790bf Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Wed, 4 Oct 2017 09:08:35 +0800 Subject: f2fs: reduce cmd_lock coverage in __issue_discard_cmd __submit_discard_cmd may lead long latency due to exhaustion of I/O request resource in block layer, so issuing all discard under cmd_lock may lead to hangtask, in order to avoid that, let's reduce it's coverage. Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/segment.c | 18 ++++++++++-------- 1 file changed, 10 insertions(+), 8 deletions(-) (limited to 'fs/f2fs/segment.c') diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index f1dbf8d5574e..859ead471243 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -1238,14 +1238,14 @@ static int __issue_discard_cmd(struct f2fs_sb_info *sbi, int i, iter = 0, issued = 0; bool io_interrupted = false; - mutex_lock(&dcc->cmd_lock); - f2fs_bug_on(sbi, - !__check_rb_tree_consistence(sbi, &dcc->root)); - blk_start_plug(&plug); for (i = MAX_PLIST_NUM - 1; i >= 0; i--) { if (i + 1 < dpolicy->granularity) break; pend_list = &dcc->pend_list[i]; + + mutex_lock(&dcc->cmd_lock); + f2fs_bug_on(sbi, !__check_rb_tree_consistence(sbi, &dcc->root)); + blk_start_plug(&plug); list_for_each_entry_safe(dc, tmp, pend_list, list) { f2fs_bug_on(sbi, dc->state != D_PREP); @@ -1259,12 +1259,14 @@ static int __issue_discard_cmd(struct f2fs_sb_info *sbi, issued++; skip: if (++iter >= dpolicy->max_requests) - goto out; + break; } + blk_finish_plug(&plug); + mutex_unlock(&dcc->cmd_lock); + + if (iter >= dpolicy->max_requests) + break; } -out: - blk_finish_plug(&plug); - mutex_unlock(&dcc->cmd_lock); if (!issued && io_interrupted) issued = -1; -- cgit v1.2.3 From df74eacb207596ba0f4323bbb6b2bbc974c6f87b Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Wed, 4 Oct 2017 09:08:36 +0800 Subject: f2fs: trace f2fs_remove_discard This patch adds tracepoint to trace f2fs_remove_discard. Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/segment.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'fs/f2fs/segment.c') diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index 859ead471243..41b3fc0cca62 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -822,6 +822,8 @@ static void __remove_discard_cmd(struct f2fs_sb_info *sbi, { struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info; + trace_f2fs_remove_discard(dc->bdev, dc->start, dc->len); + f2fs_bug_on(sbi, dc->ref); if (dc->error == -EOPNOTSUPP) -- cgit v1.2.3 From 68e801abc520b06ba24af6fde667408e4372fe3a Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Wed, 4 Oct 2017 09:08:37 +0800 Subject: f2fs: give up CP_TRIMMED_FLAG if it drops discards In ->umount, once we drop remained discard entries, we should not set CP_TRIMMED_FLAG with another checkpoint. Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/segment.c | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) (limited to 'fs/f2fs/segment.c') diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index 41b3fc0cca62..a065a2c01b5f 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -1276,12 +1276,13 @@ skip: return issued; } -static void __drop_discard_cmd(struct f2fs_sb_info *sbi) +static bool __drop_discard_cmd(struct f2fs_sb_info *sbi) { struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info; struct list_head *pend_list; struct discard_cmd *dc, *tmp; int i; + bool dropped = false; mutex_lock(&dcc->cmd_lock); for (i = MAX_PLIST_NUM - 1; i >= 0; i--) { @@ -1289,9 +1290,12 @@ static void __drop_discard_cmd(struct f2fs_sb_info *sbi) list_for_each_entry_safe(dc, tmp, pend_list, list) { f2fs_bug_on(sbi, dc->state != D_PREP); __remove_discard_cmd(sbi, dc); + dropped = true; } } mutex_unlock(&dcc->cmd_lock); + + return dropped; } static void __wait_one_discard_bio(struct f2fs_sb_info *sbi, @@ -1386,15 +1390,18 @@ void stop_discard_thread(struct f2fs_sb_info *sbi) } /* This comes from f2fs_put_super */ -void f2fs_wait_discard_bios(struct f2fs_sb_info *sbi) +bool f2fs_wait_discard_bios(struct f2fs_sb_info *sbi) { struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info; struct discard_policy dpolicy; + bool dropped; init_discard_policy(&dpolicy, DPOLICY_UMOUNT, dcc->discard_granularity); __issue_discard_cmd(sbi, &dpolicy); - __drop_discard_cmd(sbi); + dropped = __drop_discard_cmd(sbi); __wait_all_discard_cmd(sbi, &dpolicy); + + return dropped; } static int issue_discard_thread(void *data) -- cgit v1.2.3 From 171b638fc49bdaf3302d7df8eb7b9d5bc2d3dfbe Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Wed, 18 Oct 2017 19:05:57 -0700 Subject: f2fs: limit # of inmemory pages If some abnormal users try lots of atomic write operations, f2fs is able to produce pinned pages in the main memory which affects system performance. This patch limits that as 20% over total memory size, and if f2fs reaches to the limit, it will drop all the inmemory pages. Signed-off-by: Jaegeuk Kim --- fs/f2fs/segment.c | 38 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 38 insertions(+) (limited to 'fs/f2fs/segment.c') diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index a065a2c01b5f..f0916b24f5b4 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -186,6 +186,7 @@ bool need_SSR(struct f2fs_sb_info *sbi) void register_inmem_page(struct inode *inode, struct page *page) { + struct f2fs_sb_info *sbi = F2FS_I_SB(inode); struct f2fs_inode_info *fi = F2FS_I(inode); struct inmem_pages *new; @@ -204,6 +205,10 @@ void register_inmem_page(struct inode *inode, struct page *page) mutex_lock(&fi->inmem_lock); get_page(page); list_add_tail(&new->list, &fi->inmem_pages); + spin_lock(&sbi->inode_lock[ATOMIC_FILE]); + if (list_empty(&fi->inmem_ilist)) + list_add_tail(&fi->inmem_ilist, &sbi->inode_list[ATOMIC_FILE]); + spin_unlock(&sbi->inode_lock[ATOMIC_FILE]); inc_page_count(F2FS_I_SB(inode), F2FS_INMEM_PAGES); mutex_unlock(&fi->inmem_lock); @@ -262,12 +267,41 @@ next: return err; } +void drop_inmem_pages_all(struct f2fs_sb_info *sbi) +{ + struct list_head *head = &sbi->inode_list[ATOMIC_FILE]; + struct inode *inode; + struct f2fs_inode_info *fi; +next: + spin_lock(&sbi->inode_lock[ATOMIC_FILE]); + if (list_empty(head)) { + spin_unlock(&sbi->inode_lock[ATOMIC_FILE]); + return; + } + fi = list_first_entry(head, struct f2fs_inode_info, inmem_ilist); + inode = igrab(&fi->vfs_inode); + spin_unlock(&sbi->inode_lock[ATOMIC_FILE]); + + if (inode) { + drop_inmem_pages(inode); + iput(inode); + } + congestion_wait(BLK_RW_ASYNC, HZ/50); + cond_resched(); + goto next; +} + void drop_inmem_pages(struct inode *inode) { + struct f2fs_sb_info *sbi = F2FS_I_SB(inode); struct f2fs_inode_info *fi = F2FS_I(inode); mutex_lock(&fi->inmem_lock); __revoke_inmem_pages(inode, &fi->inmem_pages, true, false); + spin_lock(&sbi->inode_lock[ATOMIC_FILE]); + if (!list_empty(&fi->inmem_ilist)) + list_del_init(&fi->inmem_ilist); + spin_unlock(&sbi->inode_lock[ATOMIC_FILE]); mutex_unlock(&fi->inmem_lock); clear_inode_flag(inode, FI_ATOMIC_FILE); @@ -399,6 +433,10 @@ int commit_inmem_pages(struct inode *inode) /* drop all uncommitted pages */ __revoke_inmem_pages(inode, &fi->inmem_pages, true, false); } + spin_lock(&sbi->inode_lock[ATOMIC_FILE]); + if (!list_empty(&fi->inmem_ilist)) + list_del_init(&fi->inmem_ilist); + spin_unlock(&sbi->inode_lock[ATOMIC_FILE]); mutex_unlock(&fi->inmem_lock); clear_inode_flag(inode, FI_ATOMIC_COMMIT); -- cgit v1.2.3 From 91bea0c391b3c01b617237a107e76151c2b376b7 Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Thu, 19 Oct 2017 12:58:21 +0200 Subject: f2fs: remove several redundant assignments There are several assignments to variables that are redundant as the values are never read when the variables are updated later and so the redundant statements can be safely removed. Cleans up clang warnings: fs/f2fs/segment.c:923:19: warning: Value stored to 'p' during its initialization is never read fs/f2fs/segment.c:2060:2: warning: Value stored to 'hint' is never read fs/f2fs/segment.c:2353:2: warning: Value stored to 'start_block' is never read fs/f2fs/segment.c:2354:2: warning: Value stored to 'end_block' is never read Signed-off-by: Colin Ian King Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/segment.c | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) (limited to 'fs/f2fs/segment.c') diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index f0916b24f5b4..85295baa74c8 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -1038,7 +1038,7 @@ static struct discard_cmd *__insert_discard_tree(struct f2fs_sb_info *sbi, struct rb_node *insert_parent) { struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info; - struct rb_node **p = &dcc->root.rb_node; + struct rb_node **p; struct rb_node *parent = NULL; struct discard_cmd *dc = NULL; @@ -2175,7 +2175,6 @@ find_other_zone: } secno = left_start; skip_left: - hint = secno; segno = GET_SEG_FROM_SEC(sbi, secno); zoneno = GET_ZONE_FROM_SEC(sbi, secno); @@ -2468,9 +2467,6 @@ int f2fs_trim_fs(struct f2fs_sb_info *sbi, struct fstrim_range *range) end_segno = (end >= MAX_BLKADDR(sbi)) ? MAIN_SEGS(sbi) - 1 : GET_SEGNO(sbi, end); - start_block = START_BLOCK(sbi, start_segno); - end_block = START_BLOCK(sbi, end_segno + 1); - cpc.reason = CP_DISCARD; cpc.trim_minlen = max_t(__u64, 1, F2FS_BYTES_TO_BLK(range->minlen)); -- cgit v1.2.3 From 5612922fb0acb33f54fc0a67837b3510dde4b00b Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Sat, 28 Oct 2017 16:52:31 +0800 Subject: f2fs: support bio allocation error injection This patch adds to support bio allocation error injection to simulate out-of-memory test scenario. Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/segment.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'fs/f2fs/segment.c') diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index 85295baa74c8..132e1e424ffe 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -511,7 +511,7 @@ void f2fs_balance_fs_bg(struct f2fs_sb_info *sbi) static int __submit_flush_wait(struct f2fs_sb_info *sbi, struct block_device *bdev) { - struct bio *bio = f2fs_bio_alloc(0); + struct bio *bio = f2fs_bio_alloc(sbi, 0, true); int ret; bio->bi_rw = REQ_OP_WRITE; @@ -943,7 +943,7 @@ static int __blkdev_issue_discard(struct block_device *bdev, sector_t sector, if (ret) return ret; } - bio = f2fs_bio_alloc(1); + bio = bio_alloc(GFP_NOIO | __GFP_NOFAIL, 1); bio->bi_iter.bi_sector = sector; bio->bi_bdev = bdev; bio_set_op_attrs(bio, op, 0); -- cgit v1.2.3 From 90c28a18d2a499c53dbff24b382d1b8e4e9547d3 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Sat, 28 Oct 2017 16:52:32 +0800 Subject: f2fs: give correct trimmed blocks in fstrim We have supported to issue discard in specified range during fstrim, it needs to return caller with successfully trimmed bytes in that range instead of bytes of invalid blocks which are scanned in checkpoint. Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/segment.c | 27 +++++++++++++++++++-------- 1 file changed, 19 insertions(+), 8 deletions(-) (limited to 'fs/f2fs/segment.c') diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index 132e1e424ffe..27a6df3bbff3 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -1336,21 +1336,27 @@ static bool __drop_discard_cmd(struct f2fs_sb_info *sbi) return dropped; } -static void __wait_one_discard_bio(struct f2fs_sb_info *sbi, +static unsigned int __wait_one_discard_bio(struct f2fs_sb_info *sbi, struct discard_cmd *dc) { struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info; + unsigned int len = 0; wait_for_completion_io(&dc->wait); mutex_lock(&dcc->cmd_lock); f2fs_bug_on(sbi, dc->state != D_DONE); dc->ref--; - if (!dc->ref) + if (!dc->ref) { + if (!dc->error) + len = dc->len; __remove_discard_cmd(sbi, dc); + } mutex_unlock(&dcc->cmd_lock); + + return len; } -static void __wait_discard_cmd_range(struct f2fs_sb_info *sbi, +static unsigned int __wait_discard_cmd_range(struct f2fs_sb_info *sbi, struct discard_policy *dpolicy, block_t start, block_t end) { @@ -1359,6 +1365,7 @@ static void __wait_discard_cmd_range(struct f2fs_sb_info *sbi, &(dcc->fstrim_list) : &(dcc->wait_list); struct discard_cmd *dc, *tmp; bool need_wait; + unsigned int trimmed = 0; next: need_wait = false; @@ -1371,6 +1378,8 @@ next: continue; if (dc->state == D_DONE && !dc->ref) { wait_for_completion_io(&dc->wait); + if (!dc->error) + trimmed += dc->len; __remove_discard_cmd(sbi, dc); } else { dc->ref++; @@ -1381,9 +1390,11 @@ next: mutex_unlock(&dcc->cmd_lock); if (need_wait) { - __wait_one_discard_bio(sbi, dc); + trimmed += __wait_one_discard_bio(sbi, dc); goto next; } + + return trimmed; } static void __wait_all_discard_cmd(struct f2fs_sb_info *sbi, @@ -1744,7 +1755,6 @@ find_next: f2fs_issue_discard(sbi, entry->start_blkaddr + cur_pos, len); - cpc->trimmed += len; total_len += len; } else { next_pos = find_next_bit_le(entry->discard_map, @@ -2447,12 +2457,12 @@ int f2fs_trim_fs(struct f2fs_sb_info *sbi, struct fstrim_range *range) block_t start_block, end_block; struct cp_control cpc; struct discard_policy dpolicy; + unsigned long long trimmed = 0; int err = 0; if (start >= MAX_BLKADDR(sbi) || range->len < sbi->blocksize) return -EINVAL; - cpc.trimmed = 0; if (end <= MAIN_BLKADDR(sbi)) goto out; @@ -2499,9 +2509,10 @@ int f2fs_trim_fs(struct f2fs_sb_info *sbi, struct fstrim_range *range) init_discard_policy(&dpolicy, DPOLICY_FSTRIM, cpc.trim_minlen); __issue_discard_cmd_range(sbi, &dpolicy, start_block, end_block); - __wait_discard_cmd_range(sbi, &dpolicy, start_block, end_block); + trimmed = __wait_discard_cmd_range(sbi, &dpolicy, + start_block, end_block); out: - range->len = F2FS_BLK_TO_BYTES(cpc.trimmed); + range->len = F2FS_BLK_TO_BYTES(trimmed); return err; } -- cgit v1.2.3 From ae66786296b4a210c75db6259300636ceb1abdba Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Sat, 28 Oct 2017 16:52:33 +0800 Subject: f2fs: export SSR allocation threshold This patch exports min_ssr_segments threshold in sysfs to let user control triggering SSR allocation flexibly. Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/segment.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'fs/f2fs/segment.c') diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index 27a6df3bbff3..af536d427424 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -181,7 +181,7 @@ bool need_SSR(struct f2fs_sb_info *sbi) return true; return free_sections(sbi) <= (node_secs + 2 * dent_secs + imeta_secs + - 2 * reserved_sections(sbi)); + SM_I(sbi)->min_ssr_sections + reserved_sections(sbi)); } void register_inmem_page(struct inode *inode, struct page *page) @@ -3751,6 +3751,7 @@ int build_segment_manager(struct f2fs_sb_info *sbi) sm_info->min_ipu_util = DEF_MIN_IPU_UTIL; sm_info->min_fsync_blocks = DEF_MIN_FSYNC_BLOCKS; sm_info->min_hot_blocks = DEF_MIN_HOT_BLOCKS; + sm_info->min_ssr_sections = reserved_sections(sbi); sm_info->trim_sections = DEF_BATCHED_TRIM_SECTIONS; -- cgit v1.2.3 From c713fdb5a23cdda4ed85e04c7dec1094ab9c691f Mon Sep 17 00:00:00 2001 From: Yunlong Song Date: Mon, 30 Oct 2017 09:33:41 +0800 Subject: Revert "f2fs: handle dirty segments inside refresh_sit_entry" This reverts commit 5e443818fa0b2a2845561ee25bec181424fb2889 The commit should be reverted because call sequence of below two parts of code must be kept: a. update sit information, it needs to be updated before segment allocation since latter allocation may trigger SSR, and SSR allocation needs latest valid block information of all segments. b. update segment status, it needs to be updated after segment allocation since we can skip updating current opened segment status. Fixes: 5e443818fa0b ("f2fs: handle dirty segments inside refresh_sit_entry") Suggested-by: Chao Yu Signed-off-by: Yunlong Song Reviewed-by: Chao Yu [Jaegeuk Kim: remove refresh_sit_entry function] Signed-off-by: Jaegeuk Kim --- fs/f2fs/segment.c | 27 ++++++++++++++------------- 1 file changed, 14 insertions(+), 13 deletions(-) (limited to 'fs/f2fs/segment.c') diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index af536d427424..f59b00aa502b 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -1974,16 +1974,6 @@ static void update_sit_entry(struct f2fs_sb_info *sbi, block_t blkaddr, int del) get_sec_entry(sbi, segno)->valid_blocks += del; } -void refresh_sit_entry(struct f2fs_sb_info *sbi, block_t old, block_t new) -{ - update_sit_entry(sbi, new, 1); - if (GET_SEGNO(sbi, old) != NULL_SEGNO) - update_sit_entry(sbi, old, -1); - - locate_dirty_segment(sbi, GET_SEGNO(sbi, old)); - locate_dirty_segment(sbi, GET_SEGNO(sbi, new)); -} - void invalidate_blocks(struct f2fs_sb_info *sbi, block_t addr) { unsigned int segno = GET_SEGNO(sbi, addr); @@ -2620,13 +2610,24 @@ void allocate_data_block(struct f2fs_sb_info *sbi, struct page *page, stat_inc_block_count(sbi, curseg); + /* + * SIT information should be updated before segment allocation, + * since SSR needs latest valid block information. + */ + update_sit_entry(sbi, *new_blkaddr, 1); + if (GET_SEGNO(sbi, old_blkaddr) != NULL_SEGNO) + update_sit_entry(sbi, old_blkaddr, -1); + if (!__has_curseg_space(sbi, type)) sit_i->s_ops->allocate_segment(sbi, type, false); + /* - * SIT information should be updated after segment allocation, - * since we need to keep dirty segments precisely under SSR. + * segment dirty status should be updated after segment allocation, + * so we just need to update status only one time after previous + * segment being closed. */ - refresh_sit_entry(sbi, old_blkaddr, *new_blkaddr); + locate_dirty_segment(sbi, GET_SEGNO(sbi, old_blkaddr)); + locate_dirty_segment(sbi, GET_SEGNO(sbi, *new_blkaddr)); mutex_unlock(&sit_i->sentry_lock); -- cgit v1.2.3 From c5470498e59be4c3d9ebc9d7ee396dd8e6c6b1ea Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Mon, 30 Oct 2017 17:49:53 +0800 Subject: f2fs: use rw_semaphore to protect SIT cache There are some cases user didn't update SIT cache under this lock, so let's use rw_semaphore instead of mutex to enhance concurrently accessing. Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/segment.c | 34 +++++++++++++++++++--------------- 1 file changed, 19 insertions(+), 15 deletions(-) (limited to 'fs/f2fs/segment.c') diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index f59b00aa502b..7dfd4580380e 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -1984,14 +1984,14 @@ void invalidate_blocks(struct f2fs_sb_info *sbi, block_t addr) return; /* add it into sit main buffer */ - mutex_lock(&sit_i->sentry_lock); + down_write(&sit_i->sentry_lock); update_sit_entry(sbi, addr, -1); /* add it into dirty seglist */ locate_dirty_segment(sbi, segno); - mutex_unlock(&sit_i->sentry_lock); + up_write(&sit_i->sentry_lock); } bool is_checkpointed_data(struct f2fs_sb_info *sbi, block_t blkaddr) @@ -2004,7 +2004,7 @@ bool is_checkpointed_data(struct f2fs_sb_info *sbi, block_t blkaddr) if (blkaddr == NEW_ADDR || blkaddr == NULL_ADDR) return true; - mutex_lock(&sit_i->sentry_lock); + down_read(&sit_i->sentry_lock); segno = GET_SEGNO(sbi, blkaddr); se = get_seg_entry(sbi, segno); @@ -2013,7 +2013,7 @@ bool is_checkpointed_data(struct f2fs_sb_info *sbi, block_t blkaddr) if (f2fs_test_bit(offset, se->ckpt_valid_map)) is_cp = true; - mutex_unlock(&sit_i->sentry_lock); + up_read(&sit_i->sentry_lock); return is_cp; } @@ -2409,12 +2409,16 @@ void allocate_new_segments(struct f2fs_sb_info *sbi) unsigned int old_segno; int i; + down_write(&SIT_I(sbi)->sentry_lock); + for (i = CURSEG_HOT_DATA; i <= CURSEG_COLD_DATA; i++) { curseg = CURSEG_I(sbi, i); old_segno = curseg->segno; SIT_I(sbi)->s_ops->allocate_segment(sbi, i, true); locate_dirty_segment(sbi, old_segno); } + + up_write(&SIT_I(sbi)->sentry_lock); } static const struct segment_allocation default_salloc_ops = { @@ -2426,14 +2430,14 @@ bool exist_trim_candidates(struct f2fs_sb_info *sbi, struct cp_control *cpc) __u64 trim_start = cpc->trim_start; bool has_candidate = false; - mutex_lock(&SIT_I(sbi)->sentry_lock); + down_write(&SIT_I(sbi)->sentry_lock); for (; cpc->trim_start <= cpc->trim_end; cpc->trim_start++) { if (add_discard_addrs(sbi, cpc, true)) { has_candidate = true; break; } } - mutex_unlock(&SIT_I(sbi)->sentry_lock); + up_write(&SIT_I(sbi)->sentry_lock); cpc->trim_start = trim_start; return has_candidate; @@ -2593,7 +2597,7 @@ void allocate_data_block(struct f2fs_sb_info *sbi, struct page *page, struct curseg_info *curseg = CURSEG_I(sbi, type); mutex_lock(&curseg->curseg_mutex); - mutex_lock(&sit_i->sentry_lock); + down_write(&sit_i->sentry_lock); *new_blkaddr = NEXT_FREE_BLKADDR(sbi, curseg); @@ -2629,7 +2633,7 @@ void allocate_data_block(struct f2fs_sb_info *sbi, struct page *page, locate_dirty_segment(sbi, GET_SEGNO(sbi, old_blkaddr)); locate_dirty_segment(sbi, GET_SEGNO(sbi, *new_blkaddr)); - mutex_unlock(&sit_i->sentry_lock); + up_write(&sit_i->sentry_lock); if (page && IS_NODESEG(type)) { fill_node_footer_blkaddr(page, NEXT_FREE_BLKADDR(sbi, curseg)); @@ -2787,7 +2791,7 @@ void __f2fs_replace_block(struct f2fs_sb_info *sbi, struct f2fs_summary *sum, curseg = CURSEG_I(sbi, type); mutex_lock(&curseg->curseg_mutex); - mutex_lock(&sit_i->sentry_lock); + down_write(&sit_i->sentry_lock); old_cursegno = curseg->segno; old_blkoff = curseg->next_blkoff; @@ -2819,7 +2823,7 @@ void __f2fs_replace_block(struct f2fs_sb_info *sbi, struct f2fs_summary *sum, curseg->next_blkoff = old_blkoff; } - mutex_unlock(&sit_i->sentry_lock); + up_write(&sit_i->sentry_lock); mutex_unlock(&curseg->curseg_mutex); } @@ -3274,7 +3278,7 @@ void flush_sit_entries(struct f2fs_sb_info *sbi, struct cp_control *cpc) bool to_journal = true; struct seg_entry *se; - mutex_lock(&sit_i->sentry_lock); + down_write(&sit_i->sentry_lock); if (!sit_i->dirty_sentries) goto out; @@ -3368,7 +3372,7 @@ out: cpc->trim_start = trim_start; } - mutex_unlock(&sit_i->sentry_lock); + up_write(&sit_i->sentry_lock); set_prefree_as_free_segments(sbi); } @@ -3461,7 +3465,7 @@ static int build_sit_info(struct f2fs_sb_info *sbi) sit_i->sents_per_block = SIT_ENTRY_PER_BLOCK; sit_i->elapsed_time = le64_to_cpu(sbi->ckpt->elapsed_time); sit_i->mounted_time = CURRENT_TIME_SEC.tv_sec; - mutex_init(&sit_i->sentry_lock); + init_rwsem(&sit_i->sentry_lock); return 0; } @@ -3702,7 +3706,7 @@ static void init_min_max_mtime(struct f2fs_sb_info *sbi) struct sit_info *sit_i = SIT_I(sbi); unsigned int segno; - mutex_lock(&sit_i->sentry_lock); + down_write(&sit_i->sentry_lock); sit_i->min_mtime = LLONG_MAX; @@ -3719,7 +3723,7 @@ static void init_min_max_mtime(struct f2fs_sb_info *sbi) sit_i->min_mtime = mtime; } sit_i->max_mtime = get_mtime(sbi); - mutex_unlock(&sit_i->sentry_lock); + up_write(&sit_i->sentry_lock); } int build_segment_manager(struct f2fs_sb_info *sbi) -- cgit v1.2.3 From 44889e487981b1aa258399696a35f1a7be96ea9f Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Thu, 2 Nov 2017 20:41:02 +0800 Subject: f2fs: remove dead code in update_meta_page After commit a468f0ef516f ("f2fs: use crc and cp version to determine roll-forward recovery"), last caller of update_meta_page passing @src with NULL is gone, so remove related dead code there. Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/segment.c | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) (limited to 'fs/f2fs/segment.c') diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index 7dfd4580380e..9538e1ac652d 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -2071,12 +2071,8 @@ struct page *get_sum_page(struct f2fs_sb_info *sbi, unsigned int segno) void update_meta_page(struct f2fs_sb_info *sbi, void *src, block_t blk_addr) { struct page *page = grab_meta_page(sbi, blk_addr); - void *dst = page_address(page); - if (src) - memcpy(dst, src, PAGE_SIZE); - else - memset(dst, 0, PAGE_SIZE); + memcpy(page_address(page), src, PAGE_SIZE); set_page_dirty(page); f2fs_put_page(page, 1); } -- cgit v1.2.3 From 3e3b40557525c0bdb32f4b8d19f02b660245bc27 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Thu, 2 Nov 2017 20:41:03 +0800 Subject: f2fs: fix summary info corruption Sometimes, after running generic/270 of fstest, fsck reports summary info and actual position of block address in direct node becoming inconsistent. The root cause is race in between __f2fs_replace_block and change_curseg as below: Thread A Thread B - __clone_blkaddrs - f2fs_replace_block - __f2fs_replace_block - segnoA = GET_SEGNO(sbi, blkaddrA); - type = se->type:=CURSEG_HOT_DATA - if (!IS_CURSEG(sbi, segnoA)) type = CURSEG_WARM_DATA - allocate_data_block - allocate_segment - get_ssr_segment - change_curseg(segnoA, CURSEG_HOT_DATA) - change_curseg(segnoA, CURSEG_WARM_DATA) - reset_curseg - __set_sit_entry_type - change se->type from CURSEG_HOT_DATA to CURSEG_WARM_DATA So finally, hot curseg locates in segnoA, but type of segnoA becomes CURSEG_WARM_DATA. Then if we invoke __f2fs_replace_block(blkaddrB, blkaddrA, true, false), as blkaddrA locates in segnoA, so we will move warm type curseg to segnoA, then change its summary cache and writeback it to summary block. But segnoA is used by hot type curseg too, once it moves or persist, it will cover summary block content with inner old summary cache, result in inconsistent status. This patch tries to fix this issue by introduce global curseg lock to avoid race in between __f2fs_replace_block and change_curseg. Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/segment.c | 28 +++++++++++++++++++++++++++- 1 file changed, 27 insertions(+), 1 deletion(-) (limited to 'fs/f2fs/segment.c') diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index 9538e1ac652d..734c6a880633 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -2592,6 +2592,8 @@ void allocate_data_block(struct f2fs_sb_info *sbi, struct page *page, struct sit_info *sit_i = SIT_I(sbi); struct curseg_info *curseg = CURSEG_I(sbi, type); + down_read(&SM_I(sbi)->curseg_lock); + mutex_lock(&curseg->curseg_mutex); down_write(&sit_i->sentry_lock); @@ -2649,6 +2651,8 @@ void allocate_data_block(struct f2fs_sb_info *sbi, struct page *page, } mutex_unlock(&curseg->curseg_mutex); + + up_read(&SM_I(sbi)->curseg_lock); } static void update_device_state(struct f2fs_io_info *fio) @@ -2756,6 +2760,18 @@ int rewrite_data_page(struct f2fs_io_info *fio) return err; } +static inline int __f2fs_get_curseg(struct f2fs_sb_info *sbi, + unsigned int segno) +{ + int i; + + for (i = CURSEG_HOT_DATA; i < NO_CHECK_TYPE; i++) { + if (CURSEG_I(sbi, i)->segno == segno) + break; + } + return i; +} + void __f2fs_replace_block(struct f2fs_sb_info *sbi, struct f2fs_summary *sum, block_t old_blkaddr, block_t new_blkaddr, bool recover_curseg, bool recover_newaddr) @@ -2771,6 +2787,8 @@ void __f2fs_replace_block(struct f2fs_sb_info *sbi, struct f2fs_summary *sum, se = get_seg_entry(sbi, segno); type = se->type; + down_write(&SM_I(sbi)->curseg_lock); + if (!recover_curseg) { /* for recovery flow */ if (se->valid_blocks == 0 && !IS_CURSEG(sbi, segno)) { @@ -2780,8 +2798,13 @@ void __f2fs_replace_block(struct f2fs_sb_info *sbi, struct f2fs_summary *sum, type = CURSEG_WARM_DATA; } } else { - if (!IS_CURSEG(sbi, segno)) + if (IS_CURSEG(sbi, segno)) { + /* se->type is volatile as SSR allocation */ + type = __f2fs_get_curseg(sbi, segno); + f2fs_bug_on(sbi, type == NO_CHECK_TYPE); + } else { type = CURSEG_WARM_DATA; + } } curseg = CURSEG_I(sbi, type); @@ -2821,6 +2844,7 @@ void __f2fs_replace_block(struct f2fs_sb_info *sbi, struct f2fs_summary *sum, up_write(&sit_i->sentry_lock); mutex_unlock(&curseg->curseg_mutex); + up_write(&SM_I(sbi)->curseg_lock); } void f2fs_replace_block(struct f2fs_sb_info *sbi, struct dnode_of_data *dn, @@ -3758,6 +3782,8 @@ int build_segment_manager(struct f2fs_sb_info *sbi) INIT_LIST_HEAD(&sm_info->sit_entry_set); + init_rwsem(&sm_info->curseg_lock); + if (!f2fs_readonly(sbi->sb)) { err = create_flush_cmd_control(sbi); if (err) -- cgit v1.2.3 From 47af6c72d9440c90674d9b79da13ce8922491d24 Mon Sep 17 00:00:00 2001 From: Hyunchul Lee Date: Thu, 9 Nov 2017 14:51:27 +0900 Subject: f2fs: apply write hints to select the type of segments for buffered write Write hints helps F2FS to determine which type of segments would be selected for buffered write. This patch implements the mapping from write hints to segment types as shown below. hints segment type ----- ------------ WRITE_LIFE_SHORT CURSEG_HOT_DATA WRITE_LIFE_EXTREME CURSEG_COLD_DATA others CURSEG_WARM_DATA the F2FS poliy for hot/cold seperation has precedence over this hints. And hints are not applied in in-place update. Signed-off-by: Hyunchul Lee Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/segment.c | 15 +++++++++++++++ 1 file changed, 15 insertions(+) (limited to 'fs/f2fs/segment.c') diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index 734c6a880633..94939a5a96c8 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -2514,6 +2514,20 @@ static bool __has_curseg_space(struct f2fs_sb_info *sbi, int type) return false; } +#if 0 +int rw_hint_to_seg_type(enum rw_hint hint) +{ + switch (hint) { + case WRITE_LIFE_SHORT: + return CURSEG_HOT_DATA; + case WRITE_LIFE_EXTREME: + return CURSEG_COLD_DATA; + default: + return CURSEG_WARM_DATA; + } +} +#endif + static int __get_segment_type_2(struct f2fs_io_info *fio) { if (fio->type == DATA) @@ -2548,6 +2562,7 @@ static int __get_segment_type_6(struct f2fs_io_info *fio) return CURSEG_COLD_DATA; if (is_inode_flag_set(inode, FI_HOT_DATA)) return CURSEG_HOT_DATA; + /* rw_hint_to_seg_type(inode->i_write_hint); */ return CURSEG_WARM_DATA; } else { if (IS_DNODE(fio->page)) -- cgit v1.2.3 From 3bc01114a338a9ac336b3e139948e69ef0488a43 Mon Sep 17 00:00:00 2001 From: Hyunchul Lee Date: Tue, 28 Nov 2017 09:23:00 +0900 Subject: f2fs: apply write hints to select the type of segment for direct write When blocks are allocated for direct write, select the type of segment using the kiocb hint. But if an inode has FI_NO_ALLOC, use the inode hint. Signed-off-by: Hyunchul Lee Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/segment.c | 2 -- 1 file changed, 2 deletions(-) (limited to 'fs/f2fs/segment.c') diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index 94939a5a96c8..82fb22b5e4ad 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -2514,7 +2514,6 @@ static bool __has_curseg_space(struct f2fs_sb_info *sbi, int type) return false; } -#if 0 int rw_hint_to_seg_type(enum rw_hint hint) { switch (hint) { @@ -2526,7 +2525,6 @@ int rw_hint_to_seg_type(enum rw_hint hint) return CURSEG_WARM_DATA; } } -#endif static int __get_segment_type_2(struct f2fs_io_info *fio) { -- cgit v1.2.3 From 8b33886c37cdff86070ca0fec4bdf7f644dea219 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Thu, 30 Nov 2017 19:28:17 +0800 Subject: f2fs: inject fault to kzalloc This patch introduces f2fs_kzalloc based on f2fs_kmalloc in order to support error injection for kzalloc(). Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/segment.c | 30 ++++++++++++++++-------------- 1 file changed, 16 insertions(+), 14 deletions(-) (limited to 'fs/f2fs/segment.c') diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index 82fb22b5e4ad..19403f8e2161 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -657,7 +657,7 @@ int create_flush_cmd_control(struct f2fs_sb_info *sbi) goto init_thread; } - fcc = kzalloc(sizeof(struct flush_cmd_control), GFP_KERNEL); + fcc = f2fs_kzalloc(sbi, sizeof(struct flush_cmd_control), GFP_KERNEL); if (!fcc) return -ENOMEM; atomic_set(&fcc->issued_flush, 0); @@ -1817,7 +1817,7 @@ static int create_discard_cmd_control(struct f2fs_sb_info *sbi) goto init_thread; } - dcc = kzalloc(sizeof(struct discard_cmd_control), GFP_KERNEL); + dcc = f2fs_kzalloc(sbi, sizeof(struct discard_cmd_control), GFP_KERNEL); if (!dcc) return -ENOMEM; @@ -3419,7 +3419,7 @@ static int build_sit_info(struct f2fs_sb_info *sbi) unsigned int bitmap_size; /* allocate memory for SIT information */ - sit_i = kzalloc(sizeof(struct sit_info), GFP_KERNEL); + sit_i = f2fs_kzalloc(sbi, sizeof(struct sit_info), GFP_KERNEL); if (!sit_i) return -ENOMEM; @@ -3437,29 +3437,30 @@ static int build_sit_info(struct f2fs_sb_info *sbi) for (start = 0; start < MAIN_SEGS(sbi); start++) { sit_i->sentries[start].cur_valid_map - = kzalloc(SIT_VBLOCK_MAP_SIZE, GFP_KERNEL); + = f2fs_kzalloc(sbi, SIT_VBLOCK_MAP_SIZE, GFP_KERNEL); sit_i->sentries[start].ckpt_valid_map - = kzalloc(SIT_VBLOCK_MAP_SIZE, GFP_KERNEL); + = f2fs_kzalloc(sbi, SIT_VBLOCK_MAP_SIZE, GFP_KERNEL); if (!sit_i->sentries[start].cur_valid_map || !sit_i->sentries[start].ckpt_valid_map) return -ENOMEM; #ifdef CONFIG_F2FS_CHECK_FS sit_i->sentries[start].cur_valid_map_mir - = kzalloc(SIT_VBLOCK_MAP_SIZE, GFP_KERNEL); + = f2fs_kzalloc(sbi, SIT_VBLOCK_MAP_SIZE, GFP_KERNEL); if (!sit_i->sentries[start].cur_valid_map_mir) return -ENOMEM; #endif if (f2fs_discard_en(sbi)) { sit_i->sentries[start].discard_map - = kzalloc(SIT_VBLOCK_MAP_SIZE, GFP_KERNEL); + = f2fs_kzalloc(sbi, SIT_VBLOCK_MAP_SIZE, + GFP_KERNEL); if (!sit_i->sentries[start].discard_map) return -ENOMEM; } } - sit_i->tmp_map = kzalloc(SIT_VBLOCK_MAP_SIZE, GFP_KERNEL); + sit_i->tmp_map = f2fs_kzalloc(sbi, SIT_VBLOCK_MAP_SIZE, GFP_KERNEL); if (!sit_i->tmp_map) return -ENOMEM; @@ -3508,7 +3509,7 @@ static int build_free_segmap(struct f2fs_sb_info *sbi) unsigned int bitmap_size, sec_bitmap_size; /* allocate memory for free segmap information */ - free_i = kzalloc(sizeof(struct free_segmap_info), GFP_KERNEL); + free_i = f2fs_kzalloc(sbi, sizeof(struct free_segmap_info), GFP_KERNEL); if (!free_i) return -ENOMEM; @@ -3549,12 +3550,12 @@ static int build_curseg(struct f2fs_sb_info *sbi) for (i = 0; i < NR_CURSEG_TYPE; i++) { mutex_init(&array[i].curseg_mutex); - array[i].sum_blk = kzalloc(PAGE_SIZE, GFP_KERNEL); + array[i].sum_blk = f2fs_kzalloc(sbi, PAGE_SIZE, GFP_KERNEL); if (!array[i].sum_blk) return -ENOMEM; init_rwsem(&array[i].journal_rwsem); - array[i].journal = kzalloc(sizeof(struct f2fs_journal), - GFP_KERNEL); + array[i].journal = f2fs_kzalloc(sbi, + sizeof(struct f2fs_journal), GFP_KERNEL); if (!array[i].journal) return -ENOMEM; array[i].segno = NULL_SEGNO; @@ -3712,7 +3713,8 @@ static int build_dirty_segmap(struct f2fs_sb_info *sbi) unsigned int bitmap_size, i; /* allocate memory for dirty segments list information */ - dirty_i = kzalloc(sizeof(struct dirty_seglist_info), GFP_KERNEL); + dirty_i = f2fs_kzalloc(sbi, sizeof(struct dirty_seglist_info), + GFP_KERNEL); if (!dirty_i) return -ENOMEM; @@ -3766,7 +3768,7 @@ int build_segment_manager(struct f2fs_sb_info *sbi) struct f2fs_sm_info *sm_info; int err; - sm_info = kzalloc(sizeof(struct f2fs_sm_info), GFP_KERNEL); + sm_info = f2fs_kzalloc(sbi, sizeof(struct f2fs_sm_info), GFP_KERNEL); if (!sm_info) return -ENOMEM; -- cgit v1.2.3 From 5d4e487b9929cced66ccdeb29e0ef429fed2f504 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Thu, 30 Nov 2017 19:28:18 +0800 Subject: f2fs: inject fault to kvmalloc This patch supports to inject fault into kvmalloc/kvzalloc. Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/segment.c | 16 +++++++++------- 1 file changed, 9 insertions(+), 7 deletions(-) (limited to 'fs/f2fs/segment.c') diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index 19403f8e2161..ac12f3deac75 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -3425,13 +3425,14 @@ static int build_sit_info(struct f2fs_sb_info *sbi) SM_I(sbi)->sit_info = sit_i; - sit_i->sentries = kvzalloc(MAIN_SEGS(sbi) * + sit_i->sentries = f2fs_kvzalloc(sbi, MAIN_SEGS(sbi) * sizeof(struct seg_entry), GFP_KERNEL); if (!sit_i->sentries) return -ENOMEM; bitmap_size = f2fs_bitmap_size(MAIN_SEGS(sbi)); - sit_i->dirty_sentries_bitmap = kvzalloc(bitmap_size, GFP_KERNEL); + sit_i->dirty_sentries_bitmap = f2fs_kvzalloc(sbi, bitmap_size, + GFP_KERNEL); if (!sit_i->dirty_sentries_bitmap) return -ENOMEM; @@ -3465,7 +3466,7 @@ static int build_sit_info(struct f2fs_sb_info *sbi) return -ENOMEM; if (sbi->segs_per_sec > 1) { - sit_i->sec_entries = kvzalloc(MAIN_SECS(sbi) * + sit_i->sec_entries = f2fs_kvzalloc(sbi, MAIN_SECS(sbi) * sizeof(struct sec_entry), GFP_KERNEL); if (!sit_i->sec_entries) return -ENOMEM; @@ -3516,12 +3517,12 @@ static int build_free_segmap(struct f2fs_sb_info *sbi) SM_I(sbi)->free_info = free_i; bitmap_size = f2fs_bitmap_size(MAIN_SEGS(sbi)); - free_i->free_segmap = kvmalloc(bitmap_size, GFP_KERNEL); + free_i->free_segmap = f2fs_kvmalloc(sbi, bitmap_size, GFP_KERNEL); if (!free_i->free_segmap) return -ENOMEM; sec_bitmap_size = f2fs_bitmap_size(MAIN_SECS(sbi)); - free_i->free_secmap = kvmalloc(sec_bitmap_size, GFP_KERNEL); + free_i->free_secmap = f2fs_kvmalloc(sbi, sec_bitmap_size, GFP_KERNEL); if (!free_i->free_secmap) return -ENOMEM; @@ -3701,7 +3702,7 @@ static int init_victim_secmap(struct f2fs_sb_info *sbi) struct dirty_seglist_info *dirty_i = DIRTY_I(sbi); unsigned int bitmap_size = f2fs_bitmap_size(MAIN_SECS(sbi)); - dirty_i->victim_secmap = kvzalloc(bitmap_size, GFP_KERNEL); + dirty_i->victim_secmap = f2fs_kvzalloc(sbi, bitmap_size, GFP_KERNEL); if (!dirty_i->victim_secmap) return -ENOMEM; return 0; @@ -3724,7 +3725,8 @@ static int build_dirty_segmap(struct f2fs_sb_info *sbi) bitmap_size = f2fs_bitmap_size(MAIN_SEGS(sbi)); for (i = 0; i < NR_DIRTY_TYPE; i++) { - dirty_i->dirty_segmap[i] = kvzalloc(bitmap_size, GFP_KERNEL); + dirty_i->dirty_segmap[i] = f2fs_kvzalloc(sbi, bitmap_size, + GFP_KERNEL); if (!dirty_i->dirty_segmap[i]) return -ENOMEM; } -- cgit v1.2.3 From e7db649b5fb191a56fb83ed47c3bbe08f4b7c955 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Thu, 30 Nov 2017 19:28:19 +0800 Subject: f2fs: spread f2fs_k{m,z}alloc Use f2fs_k{m,z}alloc as much as possible to increase fault injection points. Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/segment.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs/f2fs/segment.c') diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index ac12f3deac75..fac18cc58c44 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -3543,7 +3543,7 @@ static int build_curseg(struct f2fs_sb_info *sbi) struct curseg_info *array; int i; - array = kcalloc(NR_CURSEG_TYPE, sizeof(*array), GFP_KERNEL); + array = f2fs_kzalloc(sbi, sizeof(*array) * NR_CURSEG_TYPE, GFP_KERNEL); if (!array) return -ENOMEM; -- cgit v1.2.3 From e81cafbeba4bf252b24778a17aef3f623a0815e3 Mon Sep 17 00:00:00 2001 From: Yunlei He Date: Wed, 6 Dec 2017 11:31:29 +0800 Subject: f2fs: no need return value in restore summary process No need return value in restore summary process Signed-off-by: Yunlei He Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/segment.c | 14 +++----------- 1 file changed, 3 insertions(+), 11 deletions(-) (limited to 'fs/f2fs/segment.c') diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index fac18cc58c44..2206c297ec16 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -2904,7 +2904,7 @@ void f2fs_wait_on_block_writeback(struct f2fs_sb_info *sbi, block_t blkaddr) } } -static int read_compacted_summaries(struct f2fs_sb_info *sbi) +static void read_compacted_summaries(struct f2fs_sb_info *sbi) { struct f2fs_checkpoint *ckpt = F2FS_CKPT(sbi); struct curseg_info *seg_i; @@ -2961,7 +2961,6 @@ static int read_compacted_summaries(struct f2fs_sb_info *sbi) } } f2fs_put_page(page, 1); - return 0; } static int read_normal_summaries(struct f2fs_sb_info *sbi, int type) @@ -3007,13 +3006,7 @@ static int read_normal_summaries(struct f2fs_sb_info *sbi, int type) ns->ofs_in_node = 0; } } else { - int err; - - err = restore_node_summary(sbi, segno, sum); - if (err) { - f2fs_put_page(new, 1); - return err; - } + restore_node_summary(sbi, segno, sum); } } @@ -3052,8 +3045,7 @@ static int restore_curseg_summaries(struct f2fs_sb_info *sbi) META_CP, true); /* restore for compacted data summary */ - if (read_compacted_summaries(sbi)) - return -EINVAL; + read_compacted_summaries(sbi); type = CURSEG_HOT_NODE; } -- cgit v1.2.3 From e2bb618a0a6bb232c22b37d27a5a631f2fc198af Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Tue, 19 Dec 2017 19:16:34 -0800 Subject: f2fs: return error during fill_super Let's avoid BUG_ON during fill_super, when on-disk was totall corrupted. Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/segment.c | 16 ++++++++++++---- 1 file changed, 12 insertions(+), 4 deletions(-) (limited to 'fs/f2fs/segment.c') diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index 2206c297ec16..6af71864b501 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -3557,7 +3557,7 @@ static int build_curseg(struct f2fs_sb_info *sbi) return restore_curseg_summaries(sbi); } -static void build_sit_entries(struct f2fs_sb_info *sbi) +static int build_sit_entries(struct f2fs_sb_info *sbi) { struct sit_info *sit_i = SIT_I(sbi); struct curseg_info *curseg = CURSEG_I(sbi, CURSEG_COLD_DATA); @@ -3567,6 +3567,7 @@ static void build_sit_entries(struct f2fs_sb_info *sbi) int sit_blk_cnt = SIT_BLK_CNT(sbi); unsigned int i, start, end; unsigned int readed, start_blk = 0; + int err = 0; do { readed = ra_meta_pages(sbi, start_blk, BIO_MAX_PAGES, @@ -3585,7 +3586,9 @@ static void build_sit_entries(struct f2fs_sb_info *sbi) sit = sit_blk->entries[SIT_ENTRY_OFFSET(sit_i, start)]; f2fs_put_page(page, 1); - check_block_count(sbi, start, &sit); + err = check_block_count(sbi, start, &sit); + if (err) + return err; seg_info_from_raw_sit(se, &sit); /* build discard map only one time */ @@ -3620,7 +3623,9 @@ static void build_sit_entries(struct f2fs_sb_info *sbi) old_valid_blocks = se->valid_blocks; - check_block_count(sbi, start, &sit); + err = check_block_count(sbi, start, &sit); + if (err) + break; seg_info_from_raw_sit(se, &sit); if (f2fs_discard_en(sbi)) { @@ -3640,6 +3645,7 @@ static void build_sit_entries(struct f2fs_sb_info *sbi) se->valid_blocks - old_valid_blocks; } up_read(&curseg->journal_rwsem); + return err; } static void init_free_segmap(struct f2fs_sb_info *sbi) @@ -3814,7 +3820,9 @@ int build_segment_manager(struct f2fs_sb_info *sbi) return err; /* reinit free segmap based on SIT */ - build_sit_entries(sbi); + err = build_sit_entries(sbi); + if (err) + return err; init_free_segmap(sbi); err = build_dirty_segmap(sbi); -- cgit v1.2.3 From 06a366757ff766936c307afef902300f602cb6a2 Mon Sep 17 00:00:00 2001 From: Yunlong Song Date: Thu, 4 Jan 2018 15:02:02 +0800 Subject: f2fs: check segment type in __f2fs_replace_block In some case, the node blocks has wrong blkaddr whose segment type is NODE, e.g., recover inode has missing xattr flag and the blkaddr is in the xattr range. Since fsck.f2fs does not check the recovery nodes, this will cause __f2fs_replace_block change the curseg of node and do the update_sit_entry(sbi, new_blkaddr, 1) with no next_blkoff refresh, as a result, when recovery process write checkpoint and sync nodes, the next_blkoff of curseg is used in the segment bit map, then it will cause f2fs_bug_on. So let's check segment type in __f2fs_replace_block. Signed-off-by: Yunlong Song Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/segment.c | 1 + 1 file changed, 1 insertion(+) (limited to 'fs/f2fs/segment.c') diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index 6af71864b501..96b01c7bea42 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -2820,6 +2820,7 @@ void __f2fs_replace_block(struct f2fs_sb_info *sbi, struct f2fs_summary *sum, } } + f2fs_bug_on(sbi, !IS_DATASEG(type)); curseg = CURSEG_I(sbi, type); mutex_lock(&curseg->curseg_mutex); -- cgit v1.2.3 From b78e9302e2e358d45ea4377bf2c20d045f1c3b8a Mon Sep 17 00:00:00 2001 From: Wei Yongjun Date: Fri, 5 Jan 2018 09:41:20 +0000 Subject: f2fs: make local functions static Fixes the following sparse warnings: fs/f2fs/segment.c:887:6: warning: symbol '__check_sit_bitmap' was not declared. Should it be static? fs/f2fs/segment.c:1327:6: warning: symbol 'f2fs_wait_discard_bio' was not declared. Should it be static? fs/f2fs/super.c:1661:5: warning: symbol 'f2fs_get_projid' was not declared. Should it be static? Signed-off-by: Wei Yongjun Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/segment.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'fs/f2fs/segment.c') diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index 96b01c7bea42..116e50470360 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -965,7 +965,7 @@ static int __blkdev_issue_discard(struct block_device *bdev, sector_t sector, return 0; } -void __check_sit_bitmap(struct f2fs_sb_info *sbi, +static void __check_sit_bitmap(struct f2fs_sb_info *sbi, block_t start, block_t end) { #ifdef CONFIG_F2FS_CHECK_FS @@ -1404,7 +1404,7 @@ static void __wait_all_discard_cmd(struct f2fs_sb_info *sbi, } /* This should be covered by global mutex, &sit_i->sentry_lock */ -void f2fs_wait_discard_bio(struct f2fs_sb_info *sbi, block_t blkaddr) +static void f2fs_wait_discard_bio(struct f2fs_sb_info *sbi, block_t blkaddr) { struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info; struct discard_cmd *dc; -- cgit v1.2.3 From d4f19f6266abaf573312c78723e09fb6498980ab Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Mon, 8 Jan 2018 18:48:33 +0800 Subject: f2fs: avoid high cpu usage in discard thread We take very long time to finish generic/476, this is because we will check consistence of all discard entries in global rb tree while traversing all different granularity pending lists, even when the list is empty, in order to avoid that unneeded overhead, we have to skip the check when coming up an empty list. generic/476 time consumption: cost Before patch & w/o consistence check 57s Before patch & w/ consistence check 1426s After patch 78s Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/segment.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'fs/f2fs/segment.c') diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index 116e50470360..d13e36b292a4 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -1284,6 +1284,8 @@ static int __issue_discard_cmd(struct f2fs_sb_info *sbi, pend_list = &dcc->pend_list[i]; mutex_lock(&dcc->cmd_lock); + if (list_empty(pend_list)) + goto next; f2fs_bug_on(sbi, !__check_rb_tree_consistence(sbi, &dcc->root)); blk_start_plug(&plug); list_for_each_entry_safe(dc, tmp, pend_list, list) { @@ -1302,6 +1304,7 @@ skip: break; } blk_finish_plug(&plug); +next: mutex_unlock(&dcc->cmd_lock); if (iter >= dpolicy->max_requests) -- cgit v1.2.3 From d49132d45cb07dc77904bf9b6501df2dd77b251b Mon Sep 17 00:00:00 2001 From: Daeho Jeong Date: Wed, 10 Jan 2018 16:49:10 +0900 Subject: f2fs: handle newly created page when revoking inmem pages When committing inmem pages is successful, we revoke already committed blocks in __revoke_inmem_pages() and finally replace the committed ones with the old blocks using f2fs_replace_block(). However, if the committed block was newly created one, the address of the old block is NEW_ADDR and __f2fs_replace_block() cannot handle NEW_ADDR as new_blkaddr properly and a kernel panic occurrs. Signed-off-by: Daeho Jeong Tested-by: Shu Tan Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/segment.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) (limited to 'fs/f2fs/segment.c') diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index d13e36b292a4..7638ebb1c343 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -248,7 +248,11 @@ retry: goto next; } get_node_info(sbi, dn.nid, &ni); - f2fs_replace_block(sbi, &dn, dn.data_blkaddr, + if (cur->old_addr == NEW_ADDR) { + invalidate_blocks(sbi, dn.data_blkaddr); + f2fs_update_data_blkaddr(&dn, NEW_ADDR); + } else + f2fs_replace_block(sbi, &dn, dn.data_blkaddr, cur->old_addr, ni.version, true, true); f2fs_put_dnode(&dn); } -- cgit v1.2.3 From 8069a0e983d999641331e3a7c8cda42de0ae1166 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Thu, 18 Jan 2018 17:23:29 +0800 Subject: f2fs: stop gc/discard thread after fs shutdown Once filesystem shuts down, daemons like gc/discard thread should be aware of it, and do exit, in addtion, drop all cached pending discard commands and turn off real-time discard mode. Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/segment.c | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'fs/f2fs/segment.c') diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index 7638ebb1c343..cfc19d8d4625 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -1343,6 +1343,11 @@ static bool __drop_discard_cmd(struct f2fs_sb_info *sbi) return dropped; } +void drop_discard_cmd(struct f2fs_sb_info *sbi) +{ + __drop_discard_cmd(sbi); +} + static unsigned int __wait_one_discard_bio(struct f2fs_sb_info *sbi, struct discard_cmd *dc) { -- cgit v1.2.3 From fa043fae90300e9b49218e204409a5066121b0a7 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Thu, 25 Jan 2018 18:57:26 +0800 Subject: f2fs: clean up duplicated assignment in init_discard_policy Remove duplicated codes of assignment for .max_requests and .io_aware_gran. Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/segment.c | 11 +++-------- 1 file changed, 3 insertions(+), 8 deletions(-) (limited to 'fs/f2fs/segment.c') diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index cfc19d8d4625..31c69c6660e7 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -1795,25 +1795,20 @@ void init_discard_policy(struct discard_policy *dpolicy, dpolicy->sync = true; dpolicy->granularity = granularity; + dpolicy->max_requests = DEF_MAX_DISCARD_REQUEST; + dpolicy->io_aware_gran = MAX_PLIST_NUM; + if (discard_type == DPOLICY_BG) { dpolicy->min_interval = DEF_MIN_DISCARD_ISSUE_TIME; dpolicy->max_interval = DEF_MAX_DISCARD_ISSUE_TIME; - dpolicy->max_requests = DEF_MAX_DISCARD_REQUEST; - dpolicy->io_aware_gran = MAX_PLIST_NUM; dpolicy->io_aware = true; } else if (discard_type == DPOLICY_FORCE) { dpolicy->min_interval = DEF_MIN_DISCARD_ISSUE_TIME; dpolicy->max_interval = DEF_MAX_DISCARD_ISSUE_TIME; - dpolicy->max_requests = DEF_MAX_DISCARD_REQUEST; - dpolicy->io_aware_gran = MAX_PLIST_NUM; dpolicy->io_aware = true; } else if (discard_type == DPOLICY_FSTRIM) { - dpolicy->max_requests = DEF_MAX_DISCARD_REQUEST; - dpolicy->io_aware_gran = MAX_PLIST_NUM; dpolicy->io_aware = false; } else if (discard_type == DPOLICY_UMOUNT) { - dpolicy->max_requests = DEF_MAX_DISCARD_REQUEST; - dpolicy->io_aware_gran = MAX_PLIST_NUM; dpolicy->io_aware = false; } } -- cgit v1.2.3 From 1062a0c018296c6719e49b25bed206d414c18898 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Thu, 25 Jan 2018 18:57:27 +0800 Subject: f2fs: stop issuing discard if fs is readonly If filesystem is readonly, stop to issue discard in daemon. Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/segment.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'fs/f2fs/segment.c') diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index 31c69c6660e7..6662c6caf477 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -1486,6 +1486,8 @@ static int issue_discard_thread(void *data) msecs_to_jiffies(wait_ms)); if (try_to_freeze()) continue; + if (f2fs_readonly(sbi->sb)) + continue; if (kthread_should_stop()) return 0; -- cgit v1.2.3 From 9fb0de175172c63132cc84b630e8c50834269e1b Mon Sep 17 00:00:00 2001 From: Yunlei He Date: Thu, 25 Jan 2018 17:27:11 +0800 Subject: f2fs: rebuild sit page from sit info in mem This patch rebuild sit page from sit info in mem instead of issue a read io. I test this method and the result is as below: Pre: mmc_perf_test-12061 [001] ...1 976.819992: f2fs_write_checkpoint: dev = (259,44), checkpoint for Sync, state = start flush sit mmc_perf_test-12061 [001] ...1 976.856446: f2fs_write_checkpoint: dev = (259,44), checkpoint for Sync, state = end flush sit mmc_perf_test-12061 [003] ...1 998.976946: f2fs_write_checkpoint: dev = (259,44), checkpoint for Sync, state = start flush sit mmc_perf_test-12061 [003] ...1 999.023269: f2fs_write_checkpoint: dev = (259,44), checkpoint for Sync, state = end flush sit mmc_perf_test-12061 [003] ...1 1022.060772: f2fs_write_checkpoint: dev = (259,44), checkpoint for Sync, state = start flush sit mmc_perf_test-12061 [003] ...1 1022.111034: f2fs_write_checkpoint: dev = (259,44), checkpoint for Sync, state = end flush sit mmc_perf_test-12061 [002] ...1 1070.127643: f2fs_write_checkpoint: dev = (259,44), checkpoint for Sync, state = start flush sit mmc_perf_test-12061 [003] ...1 1070.187352: f2fs_write_checkpoint: dev = (259,44), checkpoint for Sync, state = end flush sit mmc_perf_test-12061 [003] ...1 1095.942124: f2fs_write_checkpoint: dev = (259,44), checkpoint for Sync, state = start flush sit mmc_perf_test-12061 [003] ...1 1095.995975: f2fs_write_checkpoint: dev = (259,44), checkpoint for Sync, state = end flush sit mmc_perf_test-12061 [003] ...1 1122.535091: f2fs_write_checkpoint: dev = (259,44), checkpoint for Sync, state = start flush sit mmc_perf_test-12061 [003] ...1 1122.586521: f2fs_write_checkpoint: dev = (259,44), checkpoint for Sync, state = end flush sit mmc_perf_test-12061 [001] ...1 1147.897487: f2fs_write_checkpoint: dev = (259,44), checkpoint for Sync, state = start flush sit mmc_perf_test-12061 [001] ...1 1147.959438: f2fs_write_checkpoint: dev = (259,44), checkpoint for Sync, state = end flush sit mmc_perf_test-12061 [003] ...1 1177.926951: f2fs_write_checkpoint: dev = (259,44), checkpoint for Sync, state = start flush sit mmc_perf_test-12061 [002] ...1 1177.976823: f2fs_write_checkpoint: dev = (259,44), checkpoint for Sync, state = end flush sit mmc_perf_test-12061 [002] ...1 1204.176087: f2fs_write_checkpoint: dev = (259,44), checkpoint for Sync, state = start flush sit mmc_perf_test-12061 [002] ...1 1204.239046: f2fs_write_checkpoint: dev = (259,44), checkpoint for Sync, state = end flush sit Some sit flush consume more than 50ms. Now: mmc_perf_test-2187 [007] ...1 196.840684: f2fs_write_checkpoint: dev = (259,44), checkpoint for Sync, state = start flush sit mmc_perf_test-2187 [007] ...1 196.841258: f2fs_write_checkpoint: dev = (259,44), checkpoint for Sync, state = end flush sit mmc_perf_test-2187 [007] ...1 219.430582: f2fs_write_checkpoint: dev = (259,44), checkpoint for Sync, state = start flush sit mmc_perf_test-2187 [007] ...1 219.431144: f2fs_write_checkpoint: dev = (259,44), checkpoint for Sync, state = end flush sit mmc_perf_test-2187 [002] ...1 243.638678: f2fs_write_checkpoint: dev = (259,44), checkpoint for Sync, state = start flush sit mmc_perf_test-2187 [000] ...1 243.638980: f2fs_write_checkpoint: dev = (259,44), checkpoint for Sync, state = end flush sit mmc_perf_test-2187 [002] ...1 265.392180: f2fs_write_checkpoint: dev = (259,44), checkpoint for Sync, state = start flush sit mmc_perf_test-2187 [002] ...1 265.392245: f2fs_write_checkpoint: dev = (259,44), checkpoint for Sync, state = end flush sit mmc_perf_test-2187 [000] ...1 290.309051: f2fs_write_checkpoint: dev = (259,44), checkpoint for Sync, state = start flush sit mmc_perf_test-2187 [000] ...1 290.309116: f2fs_write_checkpoint: dev = (259,44), checkpoint for Sync, state = end flush sit mmc_perf_test-2187 [003] ...1 317.144209: f2fs_write_checkpoint: dev = (259,44), checkpoint for Sync, state = start flush sit mmc_perf_test-2187 [003] ...1 317.145913: f2fs_write_checkpoint: dev = (259,44), checkpoint for Sync, state = end flush sit mmc_perf_test-2187 [005] ...1 343.224954: f2fs_write_checkpoint: dev = (259,44), checkpoint for Sync, state = start flush sit mmc_perf_test-2187 [005] ...1 343.225574: f2fs_write_checkpoint: dev = (259,44), checkpoint for Sync, state = end flush sit mmc_perf_test-2187 [000] ...1 370.239846: f2fs_write_checkpoint: dev = (259,44), checkpoint for Sync, state = start flush sit mmc_perf_test-2187 [000] ...1 370.241138: f2fs_write_checkpoint: dev = (259,44), checkpoint for Sync, state = end flush sit mmc_perf_test-2187 [001] ...1 397.029043: f2fs_write_checkpoint: dev = (259,44), checkpoint for Sync, state = start flush sit mmc_perf_test-2187 [001] ...1 397.030750: f2fs_write_checkpoint: dev = (259,44), checkpoint for Sync, state = end flush sit mmc_perf_test-2187 [003] ...1 425.386377: f2fs_write_checkpoint: dev = (259,44), checkpoint for Sync, state = start flush sit mmc_perf_test-2187 [003] ...1 425.387735: f2fs_write_checkpoint: dev = (259,44), checkpoint for Sync, state = end flush sit Most sit flush consume no more than 1ms. Signed-off-by: Yunlei He Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/segment.c | 19 +++++-------------- 1 file changed, 5 insertions(+), 14 deletions(-) (limited to 'fs/f2fs/segment.c') diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index 6662c6caf477..bf98f6f34b7e 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -3191,28 +3191,19 @@ static struct page *get_next_sit_page(struct f2fs_sb_info *sbi, unsigned int start) { struct sit_info *sit_i = SIT_I(sbi); - struct page *src_page, *dst_page; + struct page *page; pgoff_t src_off, dst_off; - void *src_addr, *dst_addr; src_off = current_sit_addr(sbi, start); dst_off = next_sit_addr(sbi, src_off); - /* get current sit block page without lock */ - src_page = get_meta_page(sbi, src_off); - dst_page = grab_meta_page(sbi, dst_off); - f2fs_bug_on(sbi, PageDirty(src_page)); - - src_addr = page_address(src_page); - dst_addr = page_address(dst_page); - memcpy(dst_addr, src_addr, PAGE_SIZE); - - set_page_dirty(dst_page); - f2fs_put_page(src_page, 1); + page = grab_meta_page(sbi, dst_off); + seg_info_to_sit_page(sbi, page, start); + set_page_dirty(page); set_to_next_sit(sit_i, start); - return dst_page; + return page; } static struct sit_entry_set *grab_sit_entry_set(void) -- cgit v1.2.3 From 41dda11641377f1233f14aae8fe8b3d0a2989ff8 Mon Sep 17 00:00:00 2001 From: Yunlong Song Date: Mon, 29 Jan 2018 11:37:45 +0800 Subject: f2fs: fix heap mode to reset it back Commit 7a20b8a61eff81bdb7097a578752a74860e9d142 ("f2fs: allocate node and hot data in the beginning of partition") introduces another mount option, heap, to reset it back. But it does not do anything for heap mode, so fix it. Cc: stable@vger.kernel.org Signed-off-by: Yunlong Song Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/segment.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'fs/f2fs/segment.c') diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index bf98f6f34b7e..4e27b6721ba1 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -2244,7 +2244,8 @@ static unsigned int __get_next_segno(struct f2fs_sb_info *sbi, int type) if (sbi->segs_per_sec != 1) return CURSEG_I(sbi, type)->segno; - if (type == CURSEG_HOT_DATA || IS_NODESEG(type)) + if (test_opt(sbi, NOHEAP) && + (type == CURSEG_HOT_DATA || IS_NODESEG(type))) return 0; if (SIT_I(sbi)->last_victim[ALLOC_NEXT]) -- cgit v1.2.3 From 22fa74c2b0975f8ae05e55860b84ba2557c940ad Mon Sep 17 00:00:00 2001 From: Hyunchul Lee Date: Wed, 31 Jan 2018 11:36:57 +0900 Subject: f2fs: support passing down write hints given by users to block layer Add the 'whint_mode' mount option that controls which write hints are passed down to block layer. There are "off" and "user-based" mode. The default mode is "off". 1) whint_mode=off. F2FS only passes down WRITE_LIFE_NOT_SET. 2) whint_mode=user-based. F2FS tries to pass down hints given by users. User F2FS Block ---- ---- ----- META WRITE_LIFE_NOT_SET HOT_NODE " WARM_NODE " COLD_NODE " ioctl(COLD) COLD_DATA WRITE_LIFE_EXTREME extension list " " -- buffered io WRITE_LIFE_EXTREME COLD_DATA WRITE_LIFE_EXTREME WRITE_LIFE_SHORT HOT_DATA WRITE_LIFE_SHORT WRITE_LIFE_NOT_SET WARM_DATA WRITE_LIFE_NOT_SET WRITE_LIFE_NONE " " WRITE_LIFE_MEDIUM " " WRITE_LIFE_LONG " " -- direct io WRITE_LIFE_EXTREME COLD_DATA WRITE_LIFE_EXTREME WRITE_LIFE_SHORT HOT_DATA WRITE_LIFE_SHORT WRITE_LIFE_NOT_SET WARM_DATA WRITE_LIFE_NOT_SET WRITE_LIFE_NONE " WRITE_LIFE_NONE WRITE_LIFE_MEDIUM " WRITE_LIFE_MEDIUM WRITE_LIFE_LONG " WRITE_LIFE_LONG Many thanks to Chao Yu and Jaegeuk Kim for comments to implement this patch. Signed-off-by: Hyunchul Lee Reviewed-by: Chao Yu [Jaegeuk Kim: avoid build warning] [Chao Yu: fix to restore whint_mode in ->remount_fs] Signed-off-by: Jaegeuk Kim --- fs/f2fs/segment.c | 59 +++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 59 insertions(+) (limited to 'fs/f2fs/segment.c') diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index 4e27b6721ba1..5dc604058205 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -2536,6 +2536,62 @@ int rw_hint_to_seg_type(enum rw_hint hint) } } +/* This returns write hints for each segment type. This hints will be + * passed down to block layer. There are mapping tables which depend on + * the mount option 'whint_mode'. + * + * 1) whint_mode=off. F2FS only passes down WRITE_LIFE_NOT_SET. + * + * 2) whint_mode=user-based. F2FS tries to pass down hints given by users. + * + * User F2FS Block + * ---- ---- ----- + * META WRITE_LIFE_NOT_SET + * HOT_NODE " + * WARM_NODE " + * COLD_NODE " + * ioctl(COLD) COLD_DATA WRITE_LIFE_EXTREME + * extension list " " + * + * -- buffered io + * WRITE_LIFE_EXTREME COLD_DATA WRITE_LIFE_EXTREME + * WRITE_LIFE_SHORT HOT_DATA WRITE_LIFE_SHORT + * WRITE_LIFE_NOT_SET WARM_DATA WRITE_LIFE_NOT_SET + * WRITE_LIFE_NONE " " + * WRITE_LIFE_MEDIUM " " + * WRITE_LIFE_LONG " " + * + * -- direct io + * WRITE_LIFE_EXTREME COLD_DATA WRITE_LIFE_EXTREME + * WRITE_LIFE_SHORT HOT_DATA WRITE_LIFE_SHORT + * WRITE_LIFE_NOT_SET WARM_DATA WRITE_LIFE_NOT_SET + * WRITE_LIFE_NONE " WRITE_LIFE_NONE + * WRITE_LIFE_MEDIUM " WRITE_LIFE_MEDIUM + * WRITE_LIFE_LONG " WRITE_LIFE_LONG + * + */ + +enum rw_hint io_type_to_rw_hint(struct f2fs_sb_info *sbi, + enum page_type type, enum temp_type temp) +{ + if (sbi->whint_mode == WHINT_MODE_USER) { + if (type == DATA) { + switch (temp) { + case COLD: + return WRITE_LIFE_EXTREME; + case HOT: + return WRITE_LIFE_SHORT; + default: + return WRITE_LIFE_NOT_SET; + } + } else { + return WRITE_LIFE_NOT_SET; + } + } else { + return WRITE_LIFE_NOT_SET; + } +} + static int __get_segment_type_2(struct f2fs_io_info *fio) { if (fio->type == DATA) @@ -2724,6 +2780,7 @@ void write_meta_page(struct f2fs_sb_info *sbi, struct page *page, struct f2fs_io_info fio = { .sbi = sbi, .type = META, + .temp = HOT, .op = REQ_OP_WRITE, .op_flags = REQ_SYNC | REQ_NOIDLE | REQ_META | REQ_PRIO, .old_blkaddr = page->index, @@ -2772,6 +2829,8 @@ int rewrite_data_page(struct f2fs_io_info *fio) int err; fio->new_blkaddr = fio->old_blkaddr; + /* i/o temperature is needed for passing down write hints */ + __get_segment_type(fio); stat_inc_inplace_blocks(fio->sbi); err = f2fs_submit_page_bio(fio); -- cgit v1.2.3 From 92b12bb1a23e6e808e40d2c01f231b881e44abb2 Mon Sep 17 00:00:00 2001 From: Hyunchul Lee Date: Wed, 31 Jan 2018 11:36:58 +0900 Subject: f2fs: support passing down write hints to block layer with F2FS policy Add 'whint_mode=fs-based' mount option. In this mode, F2FS passes down write hints with its policy. * whint_mode=fs-based. F2FS passes down hints with its policy. User F2FS Block ---- ---- ----- META WRITE_LIFE_MEDIUM; HOT_NODE WRITE_LIFE_NOT_SET WARM_NODE " COLD_NODE WRITE_LIFE_NONE ioctl(COLD) COLD_DATA WRITE_LIFE_EXTREME extension list " " -- buffered io WRITE_LIFE_EXTREME COLD_DATA WRITE_LIFE_EXTREME WRITE_LIFE_SHORT HOT_DATA WRITE_LIFE_SHORT WRITE_LIFE_NOT_SET WARM_DATA WRITE_LIFE_LONG WRITE_LIFE_NONE " " WRITE_LIFE_MEDIUM " " WRITE_LIFE_LONG " " -- direct io WRITE_LIFE_EXTREME COLD_DATA WRITE_LIFE_EXTREME WRITE_LIFE_SHORT HOT_DATA WRITE_LIFE_SHORT WRITE_LIFE_NOT_SET WARM_DATA WRITE_LIFE_NOT_SET WRITE_LIFE_NONE " WRITE_LIFE_NONE WRITE_LIFE_MEDIUM " WRITE_LIFE_MEDIUM WRITE_LIFE_LONG " WRITE_LIFE_LONG Many thanks to Chao Yu and Jaegeuk Kim for comments to implement this patch. Signed-off-by: Hyunchul Lee Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/segment.c | 57 ++++++++++++++++++++++++++++++++++++++++++++++--------- 1 file changed, 48 insertions(+), 9 deletions(-) (limited to 'fs/f2fs/segment.c') diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index 5dc604058205..3a150018fd2c 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -2569,6 +2569,32 @@ int rw_hint_to_seg_type(enum rw_hint hint) * WRITE_LIFE_MEDIUM " WRITE_LIFE_MEDIUM * WRITE_LIFE_LONG " WRITE_LIFE_LONG * + * 3) whint_mode=fs-based. F2FS passes down hints with its policy. + * + * User F2FS Block + * ---- ---- ----- + * META WRITE_LIFE_MEDIUM; + * HOT_NODE WRITE_LIFE_NOT_SET + * WARM_NODE " + * COLD_NODE WRITE_LIFE_NONE + * ioctl(COLD) COLD_DATA WRITE_LIFE_EXTREME + * extension list " " + * + * -- buffered io + * WRITE_LIFE_EXTREME COLD_DATA WRITE_LIFE_EXTREME + * WRITE_LIFE_SHORT HOT_DATA WRITE_LIFE_SHORT + * WRITE_LIFE_NOT_SET WARM_DATA WRITE_LIFE_LONG + * WRITE_LIFE_NONE " " + * WRITE_LIFE_MEDIUM " " + * WRITE_LIFE_LONG " " + * + * -- direct io + * WRITE_LIFE_EXTREME COLD_DATA WRITE_LIFE_EXTREME + * WRITE_LIFE_SHORT HOT_DATA WRITE_LIFE_SHORT + * WRITE_LIFE_NOT_SET WARM_DATA WRITE_LIFE_NOT_SET + * WRITE_LIFE_NONE " WRITE_LIFE_NONE + * WRITE_LIFE_MEDIUM " WRITE_LIFE_MEDIUM + * WRITE_LIFE_LONG " WRITE_LIFE_LONG */ enum rw_hint io_type_to_rw_hint(struct f2fs_sb_info *sbi, @@ -2576,20 +2602,33 @@ enum rw_hint io_type_to_rw_hint(struct f2fs_sb_info *sbi, { if (sbi->whint_mode == WHINT_MODE_USER) { if (type == DATA) { - switch (temp) { - case COLD: - return WRITE_LIFE_EXTREME; - case HOT: - return WRITE_LIFE_SHORT; - default: + if (temp == WARM) return WRITE_LIFE_NOT_SET; - } + else if (temp == HOT) + return WRITE_LIFE_SHORT; + else if (temp == COLD) + return WRITE_LIFE_EXTREME; } else { return WRITE_LIFE_NOT_SET; } - } else { - return WRITE_LIFE_NOT_SET; + } else if (sbi->whint_mode == WHINT_MODE_FS) { + if (type == DATA) { + if (temp == WARM) + return WRITE_LIFE_LONG; + else if (temp == HOT) + return WRITE_LIFE_SHORT; + else if (temp == COLD) + return WRITE_LIFE_EXTREME; + } else if (type == NODE) { + if (temp == WARM || temp == HOT) + return WRITE_LIFE_NOT_SET; + else if (temp == COLD) + return WRITE_LIFE_NONE; + } else if (type == META) { + return WRITE_LIFE_MEDIUM; + } } + return WRITE_LIFE_NOT_SET; } static int __get_segment_type_2(struct f2fs_io_info *fio) -- cgit v1.2.3 From e5081a52ac0965739126d52db39b32a12e7a06b7 Mon Sep 17 00:00:00 2001 From: Sheng Yong Date: Tue, 6 Feb 2018 12:31:17 +0800 Subject: f2fs: clean up f2fs_sb_has_xxx functions This patch introduces F2FS_FEATURE_FUNCS to clean up the definitions of different f2fs_sb_has_xxx functions. Signed-off-by: Sheng Yong Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/segment.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'fs/f2fs/segment.c') diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index 3a150018fd2c..d4e09133c013 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -1565,7 +1565,7 @@ static int __issue_discard_async(struct f2fs_sb_info *sbi, struct block_device *bdev, block_t blkstart, block_t blklen) { #ifdef CONFIG_BLK_DEV_ZONED - if (f2fs_sb_mounted_blkzoned(sbi->sb) && + if (f2fs_sb_has_blkzoned(sbi->sb) && bdev_zoned_model(bdev) != BLK_ZONED_NONE) return __f2fs_issue_discard_zone(sbi, bdev, blkstart, blklen); #endif @@ -1763,7 +1763,7 @@ find_next: sbi->blocks_per_seg, cur_pos); len = next_pos - cur_pos; - if (f2fs_sb_mounted_blkzoned(sbi->sb) || + if (f2fs_sb_has_blkzoned(sbi->sb) || (force && len < cpc->trim_minlen)) goto skip; -- cgit v1.2.3 From 0ffdffc8f106628a4c6bc3eed2eb3cf88393d2f3 Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Sun, 18 Feb 2018 08:50:49 -0800 Subject: f2fs: add mount option for segment allocation policy This patch adds an mount option, "alloc_mode=%s" having two options, "default" and "reuse". In "alloc_mode=reuse" case, f2fs starts to allocate segments from 0'th segment all the time to reassign segments. It'd be useful for small-sized eMMC parts. Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/segment.c | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'fs/f2fs/segment.c') diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index d4e09133c013..da498a1de469 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -2250,6 +2250,11 @@ static unsigned int __get_next_segno(struct f2fs_sb_info *sbi, int type) if (SIT_I(sbi)->last_victim[ALLOC_NEXT]) return SIT_I(sbi)->last_victim[ALLOC_NEXT]; + + /* find segments from 0 to reuse freed segments */ + if (sbi->alloc_mode == ALLOC_MODE_REUSE) + return 0; + return CURSEG_I(sbi, type)->segno; } -- cgit v1.2.3 From 10b2d001d6ace7f509bda9321a729b6949cc6ea0 Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Thu, 22 Feb 2018 23:30:55 -0800 Subject: f2fs: issue discard aggressively in the gc_urgent mode This patch avoids to skip discard commands when user sets gc_urgent mode. Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/segment.c | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) (limited to 'fs/f2fs/segment.c') diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index da498a1de469..c217a91088af 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -1491,12 +1491,11 @@ static int issue_discard_thread(void *data) if (kthread_should_stop()) return 0; - if (dcc->discard_wake) { + if (dcc->discard_wake) dcc->discard_wake = 0; - if (sbi->gc_thread && sbi->gc_thread->gc_urgent) - init_discard_policy(&dpolicy, - DPOLICY_FORCE, 1); - } + + if (sbi->gc_thread && sbi->gc_thread->gc_urgent) + init_discard_policy(&dpolicy, DPOLICY_FORCE, 1); sb_start_intwrite(sbi->sb); @@ -1807,7 +1806,7 @@ void init_discard_policy(struct discard_policy *dpolicy, } else if (discard_type == DPOLICY_FORCE) { dpolicy->min_interval = DEF_MIN_DISCARD_ISSUE_TIME; dpolicy->max_interval = DEF_MAX_DISCARD_ISSUE_TIME; - dpolicy->io_aware = true; + dpolicy->io_aware = false; } else if (discard_type == DPOLICY_FSTRIM) { dpolicy->io_aware = false; } else if (discard_type == DPOLICY_UMOUNT) { -- cgit v1.2.3 From 076a6f32fe5d2d8c43f44e625c67d796eeb8f1ed Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Wed, 28 Feb 2018 17:07:27 +0800 Subject: f2fs: support hot file extension This patch supports to recognize hot file extension in f2fs, so that we can allocate proper hot segment location for its data, which can lead to better hot/cold seperation in filesystem. In addition, we changes a bit on query/add/del operation method for extension_list sysfs entry as below: - Query: cat /sys/fs/f2fs//extension_list - Add: echo 'extension' > /sys/fs/f2fs//extension_list - Del: echo '!extension' > /sys/fs/f2fs//extension_list - Add: echo '[h/c]extension' > /sys/fs/f2fs//extension_list - Del: echo '[h/c]!extension' > /sys/fs/f2fs//extension_list - [h] means add/del hot file extension - [c] means add/del cold file extension Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/segment.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'fs/f2fs/segment.c') diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index c217a91088af..2d753f9b7499 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -2667,7 +2667,8 @@ static int __get_segment_type_6(struct f2fs_io_info *fio) if (is_cold_data(fio->page) || file_is_cold(inode)) return CURSEG_COLD_DATA; - if (is_inode_flag_set(inode, FI_HOT_DATA)) + if (file_is_hot(inode) || + is_inode_flag_set(inode, FI_HOT_DATA)) return CURSEG_HOT_DATA; /* rw_hint_to_seg_type(inode->i_write_hint); */ return CURSEG_WARM_DATA; -- cgit v1.2.3 From 5738be52b3e88fab6008a95bab75548ef2f47826 Mon Sep 17 00:00:00 2001 From: Yunlei He Date: Wed, 7 Mar 2018 16:22:50 +0800 Subject: f2fs: Don't overwrite all types of node to keep node chain Currently, we enable node SSR by default, and mixed different types of node segment to do SSR more intensively. Although reuse warm node is not allowed, warm node chain will be destroyed by errors introduced by other types node chain. So we'd better forbid reusing all types of node to keep warm node chain. Signed-off-by: Yunlei He Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/segment.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs/f2fs/segment.c') diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index 2d753f9b7499..92a46a7ba931 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -1942,7 +1942,7 @@ static void update_sit_entry(struct f2fs_sb_info *sbi, block_t blkaddr, int del) sbi->discard_blks--; /* don't overwrite by SSR to keep node chain */ - if (se->type == CURSEG_WARM_NODE) { + if (IS_NODESEG(se->type)) { if (!f2fs_test_and_set_bit(offset, se->ckpt_valid_map)) se->ckpt_valid_blocks++; } -- cgit v1.2.3 From d909e9410634d321ae6931e87bb0ad5eaac3fa62 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Thu, 8 Mar 2018 14:22:56 +0800 Subject: f2fs: wrap all options with f2fs_sb_info.mount_opt This patch merges miscellaneous mount options into struct f2fs_mount_info, After this patch, once we add new mount option, we don't need to worry about recovery of it in remount_fs(), since we will recover the f2fs_sb_info.mount_opt including all options. Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/segment.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'fs/f2fs/segment.c') diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index 92a46a7ba931..3389721893d3 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -2251,7 +2251,7 @@ static unsigned int __get_next_segno(struct f2fs_sb_info *sbi, int type) return SIT_I(sbi)->last_victim[ALLOC_NEXT]; /* find segments from 0 to reuse freed segments */ - if (sbi->alloc_mode == ALLOC_MODE_REUSE) + if (F2FS_OPTION(sbi).alloc_mode == ALLOC_MODE_REUSE) return 0; return CURSEG_I(sbi, type)->segno; @@ -2604,7 +2604,7 @@ int rw_hint_to_seg_type(enum rw_hint hint) enum rw_hint io_type_to_rw_hint(struct f2fs_sb_info *sbi, enum page_type type, enum temp_type temp) { - if (sbi->whint_mode == WHINT_MODE_USER) { + if (F2FS_OPTION(sbi).whint_mode == WHINT_MODE_USER) { if (type == DATA) { if (temp == WARM) return WRITE_LIFE_NOT_SET; @@ -2615,7 +2615,7 @@ enum rw_hint io_type_to_rw_hint(struct f2fs_sb_info *sbi, } else { return WRITE_LIFE_NOT_SET; } - } else if (sbi->whint_mode == WHINT_MODE_FS) { + } else if (F2FS_OPTION(sbi).whint_mode == WHINT_MODE_FS) { if (type == DATA) { if (temp == WARM) return WRITE_LIFE_LONG; @@ -2684,7 +2684,7 @@ static int __get_segment_type(struct f2fs_io_info *fio) { int type = 0; - switch (fio->sbi->active_logs) { + switch (F2FS_OPTION(fio->sbi).active_logs) { case 2: type = __get_segment_type_2(fio); break; -- cgit v1.2.3 From ee2e74b3f00e663207d7832f613e75a5df3ae3fb Mon Sep 17 00:00:00 2001 From: Yunlei He Date: Mon, 26 Mar 2018 17:32:23 +0800 Subject: f2fs: Add a segment type check in inplace write This patch add a segment type check in IPU, in case of something wrong with blkadd in dnode. Signed-off-by: Yunlei He Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/segment.c | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'fs/f2fs/segment.c') diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index 3389721893d3..d7bac60ad719 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -2871,10 +2871,15 @@ void write_data_page(struct dnode_of_data *dn, struct f2fs_io_info *fio) int rewrite_data_page(struct f2fs_io_info *fio) { int err; + struct f2fs_sb_info *sbi = fio->sbi; fio->new_blkaddr = fio->old_blkaddr; /* i/o temperature is needed for passing down write hints */ __get_segment_type(fio); + + f2fs_bug_on(sbi, !IS_DATASEG(get_seg_entry(sbi, + GET_SEGNO(sbi, fio->new_blkaddr))->type)); + stat_inc_inplace_blocks(fio->sbi); err = f2fs_submit_page_bio(fio); -- cgit v1.2.3 From a44b418c31458b213ab59659776f5597e7e78b32 Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Wed, 11 Apr 2018 23:09:04 -0700 Subject: f2fs: clear PageError on writepage - part 2 This patch clears PageError in some pages tagged by read path, but when we write the pages with valid contents, writepage should clear the bit likewise ext4. Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/segment.c | 1 + 1 file changed, 1 insertion(+) (limited to 'fs/f2fs/segment.c') diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index d7bac60ad719..01bc94df9f00 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -2838,6 +2838,7 @@ void write_meta_page(struct f2fs_sb_info *sbi, struct page *page, fio.op_flags &= ~REQ_META; set_page_writeback(page); + ClearPageError(page); f2fs_submit_page_write(&fio); f2fs_update_iostat(sbi, io_type, F2FS_BLKSIZE); -- cgit v1.2.3 From bb53d06b5f21161295e6dea0eda941351cc9d3a1 Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Thu, 24 May 2018 13:57:26 -0700 Subject: f2fs: let fstrim issue discard commands in lower priority The fstrim gathers huge number of large discard commands, and tries to issue without IO awareness, which results in long user-perceive IO latencies on READ, WRITE, and FLUSH in UFS. We've observed some of commands take several seconds due to long discard latency. This patch limits the maximum size to 2MB per candidate, and check IO congestion when issuing them to disk. Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/segment.c | 139 +++++++++++++++++++++++++++++------------------------- 1 file changed, 75 insertions(+), 64 deletions(-) (limited to 'fs/f2fs/segment.c') diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index 01bc94df9f00..0889f4b8dbf3 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -1210,68 +1210,6 @@ static int __queue_discard_cmd(struct f2fs_sb_info *sbi, return 0; } -static void __issue_discard_cmd_range(struct f2fs_sb_info *sbi, - struct discard_policy *dpolicy, - unsigned int start, unsigned int end) -{ - struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info; - struct discard_cmd *prev_dc = NULL, *next_dc = NULL; - struct rb_node **insert_p = NULL, *insert_parent = NULL; - struct discard_cmd *dc; - struct blk_plug plug; - int issued; - -next: - issued = 0; - - mutex_lock(&dcc->cmd_lock); - f2fs_bug_on(sbi, !__check_rb_tree_consistence(sbi, &dcc->root)); - - dc = (struct discard_cmd *)__lookup_rb_tree_ret(&dcc->root, - NULL, start, - (struct rb_entry **)&prev_dc, - (struct rb_entry **)&next_dc, - &insert_p, &insert_parent, true); - if (!dc) - dc = next_dc; - - blk_start_plug(&plug); - - while (dc && dc->lstart <= end) { - struct rb_node *node; - - if (dc->len < dpolicy->granularity) - goto skip; - - if (dc->state != D_PREP) { - list_move_tail(&dc->list, &dcc->fstrim_list); - goto skip; - } - - __submit_discard_cmd(sbi, dpolicy, dc); - - if (++issued >= dpolicy->max_requests) { - start = dc->lstart + dc->len; - - blk_finish_plug(&plug); - mutex_unlock(&dcc->cmd_lock); - - schedule(); - - goto next; - } -skip: - node = rb_next(&dc->rb_node); - dc = rb_entry_safe(node, struct discard_cmd, rb_node); - - if (fatal_signal_pending(current)) - break; - } - - blk_finish_plug(&plug); - mutex_unlock(&dcc->cmd_lock); -} - static int __issue_discard_cmd(struct f2fs_sb_info *sbi, struct discard_policy *dpolicy) { @@ -1412,7 +1350,18 @@ next: static void __wait_all_discard_cmd(struct f2fs_sb_info *sbi, struct discard_policy *dpolicy) { - __wait_discard_cmd_range(sbi, dpolicy, 0, UINT_MAX); + struct discard_policy dp; + + if (dpolicy) { + __wait_discard_cmd_range(sbi, dpolicy, 0, UINT_MAX); + return; + } + + /* wait all */ + init_discard_policy(&dp, DPOLICY_FSTRIM, 1); + __wait_discard_cmd_range(sbi, &dp, 0, UINT_MAX); + init_discard_policy(&dp, DPOLICY_UMOUNT, 1); + __wait_discard_cmd_range(sbi, &dp, 0, UINT_MAX); } /* This should be covered by global mutex, &sit_i->sentry_lock */ @@ -1460,8 +1409,9 @@ bool f2fs_wait_discard_bios(struct f2fs_sb_info *sbi) init_discard_policy(&dpolicy, DPOLICY_UMOUNT, dcc->discard_granularity); __issue_discard_cmd(sbi, &dpolicy); dropped = __drop_discard_cmd(sbi); - __wait_all_discard_cmd(sbi, &dpolicy); + /* just to make sure there is no pending discard commands */ + __wait_all_discard_cmd(sbi, NULL); return dropped; } @@ -2453,6 +2403,67 @@ bool exist_trim_candidates(struct f2fs_sb_info *sbi, struct cp_control *cpc) return has_candidate; } +static void __issue_discard_cmd_range(struct f2fs_sb_info *sbi, + struct discard_policy *dpolicy, + unsigned int start, unsigned int end) +{ + struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info; + struct discard_cmd *prev_dc = NULL, *next_dc = NULL; + struct rb_node **insert_p = NULL, *insert_parent = NULL; + struct discard_cmd *dc; + struct blk_plug plug; + int issued; + +next: + issued = 0; + + mutex_lock(&dcc->cmd_lock); + f2fs_bug_on(sbi, !__check_rb_tree_consistence(sbi, &dcc->root)); + + dc = (struct discard_cmd *)__lookup_rb_tree_ret(&dcc->root, + NULL, start, + (struct rb_entry **)&prev_dc, + (struct rb_entry **)&next_dc, + &insert_p, &insert_parent, true); + if (!dc) + dc = next_dc; + + blk_start_plug(&plug); + + while (dc && dc->lstart <= end) { + struct rb_node *node; + + if (dc->len < dpolicy->granularity) + goto skip; + + if (dc->state != D_PREP) { + list_move_tail(&dc->list, &dcc->fstrim_list); + goto skip; + } + + __submit_discard_cmd(sbi, dpolicy, dc); + + if (++issued >= dpolicy->max_requests) { + start = dc->lstart + dc->len; + + blk_finish_plug(&plug); + mutex_unlock(&dcc->cmd_lock); + __wait_all_discard_cmd(sbi, NULL); + congestion_wait(BLK_RW_ASYNC, HZ/50); + goto next; + } +skip: + node = rb_next(&dc->rb_node); + dc = rb_entry_safe(node, struct discard_cmd, rb_node); + + if (fatal_signal_pending(current)) + break; + } + + blk_finish_plug(&plug); + mutex_unlock(&dcc->cmd_lock); +} + int f2fs_trim_fs(struct f2fs_sb_info *sbi, struct fstrim_range *range) { __u64 start = F2FS_BYTES_TO_BLK(range->start); -- cgit v1.2.3 From 31e2713935ea102ddb29dc5bf496d335a213f7f9 Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Tue, 29 May 2018 09:58:42 -0700 Subject: f2fs: issue discard commands proactively in high fs utilization In the high utilization like over 80%, we don't expect huge # of large discard commands, but do many small pending discards which affects FTL GCs a lot. Let's issue them in that case. Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/segment.c | 71 ++++++++++++++++++++++++++++++------------------------- 1 file changed, 39 insertions(+), 32 deletions(-) (limited to 'fs/f2fs/segment.c') diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index 0889f4b8dbf3..8df1a168256b 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -996,6 +996,38 @@ static void __check_sit_bitmap(struct f2fs_sb_info *sbi, #endif } +static void __init_discard_policy(struct f2fs_sb_info *sbi, + struct discard_policy *dpolicy, + int discard_type, unsigned int granularity) +{ + /* common policy */ + dpolicy->type = discard_type; + dpolicy->sync = true; + dpolicy->granularity = granularity; + + dpolicy->max_requests = DEF_MAX_DISCARD_REQUEST; + dpolicy->io_aware_gran = MAX_PLIST_NUM; + + if (discard_type == DPOLICY_BG) { + dpolicy->min_interval = DEF_MIN_DISCARD_ISSUE_TIME; + dpolicy->max_interval = DEF_MAX_DISCARD_ISSUE_TIME; + dpolicy->io_aware = true; + if (utilization(sbi) > DEF_DISCARD_URGENT_UTIL) { + dpolicy->granularity = 1; + dpolicy->max_interval = DEF_MIN_DISCARD_ISSUE_TIME; + } + } else if (discard_type == DPOLICY_FORCE) { + dpolicy->min_interval = DEF_MIN_DISCARD_ISSUE_TIME; + dpolicy->max_interval = DEF_MAX_DISCARD_ISSUE_TIME; + dpolicy->io_aware = false; + } else if (discard_type == DPOLICY_FSTRIM) { + dpolicy->io_aware = false; + } else if (discard_type == DPOLICY_UMOUNT) { + dpolicy->io_aware = false; + } +} + + /* this function is copied from blkdev_issue_discard from block/blk-lib.c */ static void __submit_discard_cmd(struct f2fs_sb_info *sbi, struct discard_policy *dpolicy, @@ -1358,9 +1390,9 @@ static void __wait_all_discard_cmd(struct f2fs_sb_info *sbi, } /* wait all */ - init_discard_policy(&dp, DPOLICY_FSTRIM, 1); + __init_discard_policy(sbi, &dp, DPOLICY_FSTRIM, 1); __wait_discard_cmd_range(sbi, &dp, 0, UINT_MAX); - init_discard_policy(&dp, DPOLICY_UMOUNT, 1); + __init_discard_policy(sbi, &dp, DPOLICY_UMOUNT, 1); __wait_discard_cmd_range(sbi, &dp, 0, UINT_MAX); } @@ -1406,7 +1438,8 @@ bool f2fs_wait_discard_bios(struct f2fs_sb_info *sbi) struct discard_policy dpolicy; bool dropped; - init_discard_policy(&dpolicy, DPOLICY_UMOUNT, dcc->discard_granularity); + __init_discard_policy(sbi, &dpolicy, DPOLICY_UMOUNT, + dcc->discard_granularity); __issue_discard_cmd(sbi, &dpolicy); dropped = __drop_discard_cmd(sbi); @@ -1427,7 +1460,7 @@ static int issue_discard_thread(void *data) set_freezable(); do { - init_discard_policy(&dpolicy, DPOLICY_BG, + __init_discard_policy(sbi, &dpolicy, DPOLICY_BG, dcc->discard_granularity); wait_event_interruptible_timeout(*q, @@ -1445,7 +1478,7 @@ static int issue_discard_thread(void *data) dcc->discard_wake = 0; if (sbi->gc_thread && sbi->gc_thread->gc_urgent) - init_discard_policy(&dpolicy, DPOLICY_FORCE, 1); + __init_discard_policy(sbi, &dpolicy, DPOLICY_FORCE, 1); sb_start_intwrite(sbi->sb); @@ -1738,32 +1771,6 @@ skip: wake_up_discard_thread(sbi, false); } -void init_discard_policy(struct discard_policy *dpolicy, - int discard_type, unsigned int granularity) -{ - /* common policy */ - dpolicy->type = discard_type; - dpolicy->sync = true; - dpolicy->granularity = granularity; - - dpolicy->max_requests = DEF_MAX_DISCARD_REQUEST; - dpolicy->io_aware_gran = MAX_PLIST_NUM; - - if (discard_type == DPOLICY_BG) { - dpolicy->min_interval = DEF_MIN_DISCARD_ISSUE_TIME; - dpolicy->max_interval = DEF_MAX_DISCARD_ISSUE_TIME; - dpolicy->io_aware = true; - } else if (discard_type == DPOLICY_FORCE) { - dpolicy->min_interval = DEF_MIN_DISCARD_ISSUE_TIME; - dpolicy->max_interval = DEF_MAX_DISCARD_ISSUE_TIME; - dpolicy->io_aware = false; - } else if (discard_type == DPOLICY_FSTRIM) { - dpolicy->io_aware = false; - } else if (discard_type == DPOLICY_UMOUNT) { - dpolicy->io_aware = false; - } -} - static int create_discard_cmd_control(struct f2fs_sb_info *sbi) { dev_t dev = sbi->sb->s_bdev->bd_dev; @@ -2522,7 +2529,7 @@ int f2fs_trim_fs(struct f2fs_sb_info *sbi, struct fstrim_range *range) start_block = START_BLOCK(sbi, start_segno); end_block = START_BLOCK(sbi, min(cur_segno, end_segno) + 1); - init_discard_policy(&dpolicy, DPOLICY_FSTRIM, cpc.trim_minlen); + __init_discard_policy(sbi, &dpolicy, DPOLICY_FSTRIM, cpc.trim_minlen); __issue_discard_cmd_range(sbi, &dpolicy, start_block, end_block); trimmed = __wait_discard_cmd_range(sbi, &dpolicy, start_block, end_block); -- cgit v1.2.3 From 4738f527db84ecb5d40691b8e5bf3e9bfced2243 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Mon, 9 Apr 2018 10:25:23 +0800 Subject: f2fs: don't split checkpoint in fstrim Now, we issue discard asynchronously in separated thread instead of in checkpoint, after that, we won't encounter long latency in checkpoint due to huge number of synchronous discard command handling, so, we don't need to split checkpoint to do trim in batch, merge it and obsolete related sysfs entry. Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/segment.c | 39 ++++++++++++--------------------------- 1 file changed, 12 insertions(+), 27 deletions(-) (limited to 'fs/f2fs/segment.c') diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index 8df1a168256b..30f07dd5da3f 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -2475,7 +2475,7 @@ int f2fs_trim_fs(struct f2fs_sb_info *sbi, struct fstrim_range *range) { __u64 start = F2FS_BYTES_TO_BLK(range->start); __u64 end = start + F2FS_BYTES_TO_BLK(range->len) - 1; - unsigned int start_segno, end_segno, cur_segno; + unsigned int start_segno, end_segno; block_t start_block, end_block; struct cp_control cpc; struct discard_policy dpolicy; @@ -2501,40 +2501,27 @@ int f2fs_trim_fs(struct f2fs_sb_info *sbi, struct fstrim_range *range) cpc.reason = CP_DISCARD; cpc.trim_minlen = max_t(__u64, 1, F2FS_BYTES_TO_BLK(range->minlen)); + cpc.trim_start = start_segno; + cpc.trim_end = end_segno; - /* do checkpoint to issue discard commands safely */ - for (cur_segno = start_segno; cur_segno <= end_segno; - cur_segno = cpc.trim_end + 1) { - cpc.trim_start = cur_segno; - - if (sbi->discard_blks == 0) - break; - else if (sbi->discard_blks < BATCHED_TRIM_BLOCKS(sbi)) - cpc.trim_end = end_segno; - else - cpc.trim_end = min_t(unsigned int, - rounddown(cur_segno + - BATCHED_TRIM_SEGMENTS(sbi), - sbi->segs_per_sec) - 1, end_segno); - - mutex_lock(&sbi->gc_mutex); - err = write_checkpoint(sbi, &cpc); - mutex_unlock(&sbi->gc_mutex); - if (err) - break; + if (sbi->discard_blks == 0) + goto out; - schedule(); - } + mutex_lock(&sbi->gc_mutex); + err = write_checkpoint(sbi, &cpc); + mutex_unlock(&sbi->gc_mutex); + if (err) + goto out; start_block = START_BLOCK(sbi, start_segno); - end_block = START_BLOCK(sbi, min(cur_segno, end_segno) + 1); + end_block = START_BLOCK(sbi, end_segno + 1); __init_discard_policy(sbi, &dpolicy, DPOLICY_FSTRIM, cpc.trim_minlen); __issue_discard_cmd_range(sbi, &dpolicy, start_block, end_block); trimmed = __wait_discard_cmd_range(sbi, &dpolicy, start_block, end_block); -out: range->len = F2FS_BLK_TO_BYTES(trimmed); +out: return err; } @@ -3922,8 +3909,6 @@ int build_segment_manager(struct f2fs_sb_info *sbi) sm_info->min_hot_blocks = DEF_MIN_HOT_BLOCKS; sm_info->min_ssr_sections = reserved_sections(sbi); - sm_info->trim_sections = DEF_BATCHED_TRIM_SECTIONS; - INIT_LIST_HEAD(&sm_info->sit_entry_set); init_rwsem(&sm_info->curseg_lock); -- cgit v1.2.3 From 85d2070f60c66b469e21a0c8e67c25c7cd5b4c45 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Tue, 10 Apr 2018 15:43:09 +0800 Subject: f2fs: turn down IO priority of discard from background In order to avoid interfering normal r/w IO, let's turn down IO priority of discard issued from background. Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/segment.c | 1 + 1 file changed, 1 insertion(+) (limited to 'fs/f2fs/segment.c') diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index 30f07dd5da3f..478a4504ba9a 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -1012,6 +1012,7 @@ static void __init_discard_policy(struct f2fs_sb_info *sbi, dpolicy->min_interval = DEF_MIN_DISCARD_ISSUE_TIME; dpolicy->max_interval = DEF_MAX_DISCARD_ISSUE_TIME; dpolicy->io_aware = true; + dpolicy->sync = false; if (utilization(sbi) > DEF_DISCARD_URGENT_UTIL) { dpolicy->granularity = 1; dpolicy->max_interval = DEF_MIN_DISCARD_ISSUE_TIME; -- cgit v1.2.3 From 73450231fffffe6a2863d493cd053596f2a2de57 Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Thu, 31 May 2018 10:20:48 -0700 Subject: f2fs: run fstrim asynchronously if runtime discard is on We don't need to wait for whole bunch of discard candidates in fstrim, since runtime discard will issue them in idle time. Signed-off-by: Jaegeuk Kim --- fs/f2fs/segment.c | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) (limited to 'fs/f2fs/segment.c') diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index 478a4504ba9a..a02d5c1a7ed2 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -2519,9 +2519,18 @@ int f2fs_trim_fs(struct f2fs_sb_info *sbi, struct fstrim_range *range) __init_discard_policy(sbi, &dpolicy, DPOLICY_FSTRIM, cpc.trim_minlen); __issue_discard_cmd_range(sbi, &dpolicy, start_block, end_block); - trimmed = __wait_discard_cmd_range(sbi, &dpolicy, + + /* + * We filed discard candidates, but actually we don't need to wait for + * all of them, since they'll be issued in idle time along with runtime + * discard option. User configuration looks like using runtime discard + * or periodic fstrim instead of it. + */ + if (!test_opt(sbi, DISCARD)) { + trimmed = __wait_discard_cmd_range(sbi, &dpolicy, start_block, end_block); - range->len = F2FS_BLK_TO_BYTES(trimmed); + range->len = F2FS_BLK_TO_BYTES(trimmed); + } out: return err; } -- cgit v1.2.3 From fdf61219dc2512cd29b8b03a460a51af8ddca876 Mon Sep 17 00:00:00 2001 From: Yunlei He Date: Wed, 4 Apr 2018 17:29:05 +0800 Subject: f2fs: issue all big range discards in umount process This patch modify max_requests to UINT_MAX, to issue all big range discards in umount. Signed-off-by: Yunlei He Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/segment.c | 1 + 1 file changed, 1 insertion(+) (limited to 'fs/f2fs/segment.c') diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index a02d5c1a7ed2..3c2e44f76ff3 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -1024,6 +1024,7 @@ static void __init_discard_policy(struct f2fs_sb_info *sbi, } else if (discard_type == DPOLICY_FSTRIM) { dpolicy->io_aware = false; } else if (discard_type == DPOLICY_UMOUNT) { + dpolicy->max_requests = UINT_MAX; dpolicy->io_aware = false; } } -- cgit v1.2.3 From 298032d4d4a6dc6da4f7298da0200ef56e93006d Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Mon, 9 Apr 2018 20:25:06 +0800 Subject: f2fs: don't use GFP_ZERO for page caches Related to https://lkml.org/lkml/2018/4/8/661 Sometimes, we need to write meta data to new allocated block address, then we will allocate a zeroed page in inner inode's address space, and fill partial data in it, and leave other place with zero value which means some fields are initial status. There are two inner inodes (meta inode and node inode) setting __GFP_ZERO, I have just checked them, for both of them, we can avoid using __GFP_ZERO, and do initialization by ourselves to avoid unneeded/redundant zeroing from mm. Cc: Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/segment.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'fs/f2fs/segment.c') diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index 3c2e44f76ff3..a1f9c8a19383 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -2059,6 +2059,7 @@ static void write_current_sum_page(struct f2fs_sb_info *sbi, struct f2fs_summary_block *dst; dst = (struct f2fs_summary_block *)page_address(page); + memset(dst, 0, PAGE_SIZE); mutex_lock(&curseg->curseg_mutex); @@ -3214,6 +3215,7 @@ static void write_compacted_summaries(struct f2fs_sb_info *sbi, block_t blkaddr) page = grab_meta_page(sbi, blkaddr++); kaddr = (unsigned char *)page_address(page); + memset(kaddr, 0, PAGE_SIZE); /* Step 1: write nat cache */ seg_i = CURSEG_I(sbi, CURSEG_HOT_DATA); @@ -3238,6 +3240,7 @@ static void write_compacted_summaries(struct f2fs_sb_info *sbi, block_t blkaddr) if (!page) { page = grab_meta_page(sbi, blkaddr++); kaddr = (unsigned char *)page_address(page); + memset(kaddr, 0, PAGE_SIZE); written_size = 0; } summary = (struct f2fs_summary *)(kaddr + written_size); -- cgit v1.2.3 From 9190cadf38db9a3b321c8882b1d27219a5e6f436 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Sun, 8 Apr 2018 20:39:03 +0800 Subject: f2fs: correct return value of f2fs_trim_fs Correct return value in two cases: - return EINVAL if end boundary is out-of-range. - return EIO if fs needs off-line check. Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/segment.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'fs/f2fs/segment.c') diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index a1f9c8a19383..f1fe260537e0 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -2489,12 +2489,12 @@ int f2fs_trim_fs(struct f2fs_sb_info *sbi, struct fstrim_range *range) return -EINVAL; if (end <= MAIN_BLKADDR(sbi)) - goto out; + return -EINVAL; if (is_sbi_flag_set(sbi, SBI_NEED_FSCK)) { f2fs_msg(sbi->sb, KERN_WARNING, "Found FS corruption, run fsck to fix."); - goto out; + return -EIO; } /* start/end segment number in main_area */ -- cgit v1.2.3 From ea2813111f1f31e04892c955291356322eec23b8 Mon Sep 17 00:00:00 2001 From: Zhikang Zhang Date: Mon, 9 Apr 2018 04:28:41 +0800 Subject: f2fs: check cur_valid_map_mir & raw_sit block count when flush sit entries We should check valid_map_mir and block count to ensure the flushed raw_sit is correct. Signed-off-by: Zhikang Zhang Signed-off-by: Yunlei He Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/segment.c | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'fs/f2fs/segment.c') diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index f1fe260537e0..7d6c1e4b1374 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -3481,6 +3481,11 @@ void flush_sit_entries(struct f2fs_sb_info *sbi, struct cp_control *cpc) int offset, sit_offset; se = get_seg_entry(sbi, segno); +#ifdef CONFIG_F2FS_CHECK_FS + if (memcmp(se->cur_valid_map, se->cur_valid_map_mir, + SIT_VBLOCK_MAP_SIZE)) + f2fs_bug_on(sbi, 1); +#endif /* add discard candidates */ if (!(cpc->reason & CP_DISCARD)) { @@ -3496,10 +3501,14 @@ void flush_sit_entries(struct f2fs_sb_info *sbi, struct cp_control *cpc) cpu_to_le32(segno); seg_info_to_raw_sit(se, &sit_in_journal(journal, offset)); + check_block_count(sbi, segno, + &sit_in_journal(journal, offset)); } else { sit_offset = SIT_ENTRY_OFFSET(sit_i, segno); seg_info_to_raw_sit(se, &raw_sit->entries[sit_offset]); + check_block_count(sbi, segno, + &raw_sit->entries[sit_offset]); } __clear_bit(segno, bitmap); -- cgit v1.2.3 From aa857e0f3b0993899e39659b2f671e8cc9870ac3 Mon Sep 17 00:00:00 2001 From: Sheng Yong Date: Tue, 17 Apr 2018 17:12:27 +0800 Subject: f2fs: check if inmem_pages list is empty correctly `cur' will never be NULL, we should check inmem_pages list instead. Signed-off-by: Sheng Yong Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/segment.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs/f2fs/segment.c') diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index 7d6c1e4b1374..a7f0e5932642 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -328,7 +328,7 @@ void drop_inmem_page(struct inode *inode, struct page *page) break; } - f2fs_bug_on(sbi, !cur || cur->page != page); + f2fs_bug_on(sbi, list_empty(head) || cur->page != page); list_del(&cur->list); mutex_unlock(&fi->inmem_lock); -- cgit v1.2.3 From 937f4ef79e257735e03149815fb231c8d02e3a1f Mon Sep 17 00:00:00 2001 From: Yunlei He Date: Fri, 13 Apr 2018 11:08:05 +0800 Subject: f2fs: stop issue discard if something wrong with f2fs v4->v5: move data corruption check to __submit_discard_cmd, in order to control discard io submitted more accurately, besides, increase async thread wait time if data corruption detected. This patch stop async thread and umount process to issue discard if something wrong with f2fs, which is similar to fstrim. Signed-off-by: Yunlei He Signed-off-by: Jaegeuk Kim --- fs/f2fs/segment.c | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'fs/f2fs/segment.c') diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index a7f0e5932642..d4b787b00c5a 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -1044,6 +1044,9 @@ static void __submit_discard_cmd(struct f2fs_sb_info *sbi, if (dc->state != D_PREP) return; + if (is_sbi_flag_set(sbi, SBI_NEED_FSCK)) + return; + trace_f2fs_issue_discard(dc->bdev, dc->start, dc->len); dc->error = __blkdev_issue_discard(dc->bdev, @@ -1475,6 +1478,10 @@ static int issue_discard_thread(void *data) continue; if (kthread_should_stop()) return 0; + if (is_sbi_flag_set(sbi, SBI_NEED_FSCK)) { + wait_ms = dpolicy.max_interval; + continue; + } if (dcc->discard_wake) dcc->discard_wake = 0; -- cgit v1.2.3 From b025f6dfc018e49f53549c846c3ad6045aab39cd Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Mon, 23 Apr 2018 10:36:14 +0800 Subject: f2fs: clean up commit_inmem_pages() This patch moves error handling from commit_inmem_pages() into __commit_inmem_page() for cleanup, no logic change. Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/segment.c | 46 ++++++++++++++++++++++------------------------ 1 file changed, 22 insertions(+), 24 deletions(-) (limited to 'fs/f2fs/segment.c') diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index d4b787b00c5a..7f6f029aa866 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -343,8 +343,7 @@ void drop_inmem_page(struct inode *inode, struct page *page) trace_f2fs_commit_inmem_page(page, INMEM_INVALIDATE); } -static int __commit_inmem_pages(struct inode *inode, - struct list_head *revoke_list) +static int __commit_inmem_pages(struct inode *inode) { struct f2fs_sb_info *sbi = F2FS_I_SB(inode); struct f2fs_inode_info *fi = F2FS_I(inode); @@ -357,9 +356,12 @@ static int __commit_inmem_pages(struct inode *inode, .op_flags = REQ_SYNC | REQ_PRIO, .io_type = FS_DATA_IO, }; + struct list_head revoke_list; pgoff_t last_idx = ULONG_MAX; int err = 0; + INIT_LIST_HEAD(&revoke_list); + list_for_each_entry_safe(cur, tmp, &fi->inmem_pages, list) { struct page *page = cur->page; @@ -393,14 +395,28 @@ retry: last_idx = page->index; } unlock_page(page); - list_move_tail(&cur->list, revoke_list); + list_move_tail(&cur->list, &revoke_list); } if (last_idx != ULONG_MAX) f2fs_submit_merged_write_cond(sbi, inode, 0, last_idx, DATA); - if (!err) - __revoke_inmem_pages(inode, revoke_list, false, false); + if (err) { + /* + * try to revoke all committed pages, but still we could fail + * due to no memory or other reason, if that happened, EAGAIN + * will be returned, which means in such case, transaction is + * already not integrity, caller should use journal to do the + * recovery or rewrite & commit last transaction. For other + * error number, revoking was done by filesystem itself. + */ + err = __revoke_inmem_pages(inode, &revoke_list, false, true); + + /* drop all uncommitted pages */ + __revoke_inmem_pages(inode, &fi->inmem_pages, true, false); + } else { + __revoke_inmem_pages(inode, &revoke_list, false, false); + } return err; } @@ -409,34 +425,16 @@ int commit_inmem_pages(struct inode *inode) { struct f2fs_sb_info *sbi = F2FS_I_SB(inode); struct f2fs_inode_info *fi = F2FS_I(inode); - struct list_head revoke_list; int err; - INIT_LIST_HEAD(&revoke_list); f2fs_balance_fs(sbi, true); f2fs_lock_op(sbi); set_inode_flag(inode, FI_ATOMIC_COMMIT); mutex_lock(&fi->inmem_lock); - err = __commit_inmem_pages(inode, &revoke_list); - if (err) { - int ret; - /* - * try to revoke all committed pages, but still we could fail - * due to no memory or other reason, if that happened, EAGAIN - * will be returned, which means in such case, transaction is - * already not integrity, caller should use journal to do the - * recovery or rewrite & commit last transaction. For other - * error number, revoking was done by filesystem itself. - */ - ret = __revoke_inmem_pages(inode, &revoke_list, false, true); - if (ret) - err = ret; + err = __commit_inmem_pages(inode); - /* drop all uncommitted pages */ - __revoke_inmem_pages(inode, &fi->inmem_pages, true, false); - } spin_lock(&sbi->inode_lock[ATOMIC_FILE]); if (!list_empty(&fi->inmem_ilist)) list_del_init(&fi->inmem_ilist); -- cgit v1.2.3 From 78f8b0f46fa23f9dc5c8b501db414e9546ba44b0 Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Tue, 24 Apr 2018 15:44:16 -0600 Subject: f2fs: sanity check on sit entry syzbot hit the following crash on upstream commit 87ef12027b9b1dd0e0b12cf311fbcb19f9d92539 (Wed Apr 18 19:48:17 2018 +0000) Merge tag 'ceph-for-4.17-rc2' of git://github.com/ceph/ceph-client syzbot dashboard link: https://syzkaller.appspot.com/bug?extid=83699adeb2d13579c31e C reproducer: https://syzkaller.appspot.com/x/repro.c?id=5805208181407744 syzkaller reproducer: https://syzkaller.appspot.com/x/repro.syz?id=6005073343676416 Raw console output: https://syzkaller.appspot.com/x/log.txt?id=6555047731134464 Kernel config: https://syzkaller.appspot.com/x/.config?id=1808800213120130118 compiler: gcc (GCC) 8.0.1 20180413 (experimental) IMPORTANT: if you fix the bug, please add the following tag to the commit: Reported-by: syzbot+83699adeb2d13579c31e@syzkaller.appspotmail.com It will help syzbot understand when the bug is fixed. See footer for details. If you forward the report, please keep this part and the footer. F2FS-fs (loop0): Magic Mismatch, valid(0xf2f52010) - read(0x0) F2FS-fs (loop0): Can't find valid F2FS filesystem in 1th superblock F2FS-fs (loop0): invalid crc value BUG: unable to handle kernel paging request at ffffed006b2a50c0 PGD 21ffee067 P4D 21ffee067 PUD 21fbeb067 PMD 0 Oops: 0000 [#1] SMP KASAN Dumping ftrace buffer: (ftrace buffer empty) Modules linked in: CPU: 0 PID: 4514 Comm: syzkaller989480 Not tainted 4.17.0-rc1+ #8 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011 RIP: 0010:build_sit_entries fs/f2fs/segment.c:3653 [inline] RIP: 0010:build_segment_manager+0x7ef7/0xbf70 fs/f2fs/segment.c:3852 RSP: 0018:ffff8801b102e5b0 EFLAGS: 00010a06 RAX: 1ffff1006b2a50c0 RBX: 0000000000000004 RCX: 0000000000000001 RDX: 0000000000000000 RSI: 0000000000000001 RDI: ffff8801ac74243e RBP: ffff8801b102f410 R08: ffff8801acbd46c0 R09: fffffbfff14d9af8 R10: fffffbfff14d9af8 R11: ffff8801acbd46c0 R12: ffff8801ac742a80 R13: ffff8801d9519100 R14: dffffc0000000000 R15: ffff880359528600 FS: 0000000001e04880(0000) GS:ffff8801dae00000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: ffffed006b2a50c0 CR3: 00000001ac6ac000 CR4: 00000000001406f0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 Call Trace: f2fs_fill_super+0x4095/0x7bf0 fs/f2fs/super.c:2803 mount_bdev+0x30c/0x3e0 fs/super.c:1165 f2fs_mount+0x34/0x40 fs/f2fs/super.c:3020 mount_fs+0xae/0x328 fs/super.c:1268 vfs_kern_mount.part.34+0xd4/0x4d0 fs/namespace.c:1037 vfs_kern_mount fs/namespace.c:1027 [inline] do_new_mount fs/namespace.c:2517 [inline] do_mount+0x564/0x3070 fs/namespace.c:2847 ksys_mount+0x12d/0x140 fs/namespace.c:3063 __do_sys_mount fs/namespace.c:3077 [inline] __se_sys_mount fs/namespace.c:3074 [inline] __x64_sys_mount+0xbe/0x150 fs/namespace.c:3074 do_syscall_64+0x1b1/0x800 arch/x86/entry/common.c:287 entry_SYSCALL_64_after_hwframe+0x49/0xbe RIP: 0033:0x443d6a RSP: 002b:00007ffd312813c8 EFLAGS: 00000297 ORIG_RAX: 00000000000000a5 RAX: ffffffffffffffda RBX: 0000000020000c00 RCX: 0000000000443d6a RDX: 0000000020000000 RSI: 0000000020000100 RDI: 00007ffd312813d0 RBP: 0000000000000003 R08: 0000000020016a00 R09: 000000000000000a R10: 0000000000000000 R11: 0000000000000297 R12: 0000000000000004 R13: 0000000000402c60 R14: 0000000000000000 R15: 0000000000000000 RIP: build_sit_entries fs/f2fs/segment.c:3653 [inline] RSP: ffff8801b102e5b0 RIP: build_segment_manager+0x7ef7/0xbf70 fs/f2fs/segment.c:3852 RSP: ffff8801b102e5b0 CR2: ffffed006b2a50c0 ---[ end trace a2034989e196ff17 ]--- Reported-and-tested-by: syzbot+83699adeb2d13579c31e@syzkaller.appspotmail.com Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/segment.c | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'fs/f2fs/segment.c') diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index 7f6f029aa866..ae3cf8dce38e 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -3762,6 +3762,15 @@ static int build_sit_entries(struct f2fs_sb_info *sbi) unsigned int old_valid_blocks; start = le32_to_cpu(segno_in_journal(journal, i)); + if (start >= MAIN_SEGS(sbi)) { + f2fs_msg(sbi->sb, KERN_ERR, + "Wrong journal entry on segno %u", + start); + set_sbi_flag(sbi, SBI_NEED_FSCK); + err = -EINVAL; + break; + } + se = &sit_i->sentries[start]; sit = sit_in_journal(journal, i); -- cgit v1.2.3 From 26bf4e8a96aada18cf1b23a920f1f3ee50b5b739 Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Tue, 24 Apr 2018 21:34:05 -0600 Subject: f2fs: sanity check for total valid node blocks This patch enhances sanity check for SIT entries. syzbot hit the following crash on upstream commit 83beed7b2b26f232d782127792dd0cd4362fdc41 (Fri Apr 20 17:56:32 2018 +0000) Merge branch 'fixes' of git://git.kernel.org/pub/scm/linux/kernel/git/evalenti/linux-soc-thermal syzbot dashboard link: https://syzkaller.appspot.com/bug?extid=bf9253040425feb155ad syzkaller reproducer: https://syzkaller.appspot.com/x/repro.syz?id=5692130282438656 Raw console output: https://syzkaller.appspot.com/x/log.txt?id=5095924598571008 Kernel config: https://syzkaller.appspot.com/x/.config?id=1808800213120130118 compiler: gcc (GCC) 8.0.1 20180413 (experimental) IMPORTANT: if you fix the bug, please add the following tag to the commit: Reported-by: syzbot+bf9253040425feb155ad@syzkaller.appspotmail.com It will help syzbot understand when the bug is fixed. See footer for details. If you forward the report, please keep this part and the footer. F2FS-fs (loop0): invalid crc value F2FS-fs (loop0): Try to recover 1th superblock, ret: 0 F2FS-fs (loop0): Mounted with checkpoint version = d F2FS-fs (loop0): Bitmap was wrongly cleared, blk:9740 ------------[ cut here ]------------ kernel BUG at fs/f2fs/segment.c:1884! invalid opcode: 0000 [#1] SMP KASAN Dumping ftrace buffer: (ftrace buffer empty) Modules linked in: CPU: 1 PID: 4508 Comm: syz-executor0 Not tainted 4.17.0-rc1+ #10 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011 RIP: 0010:update_sit_entry+0x1215/0x1590 fs/f2fs/segment.c:1882 RSP: 0018:ffff8801af526708 EFLAGS: 00010282 RAX: ffffed0035ea4cc0 RBX: ffff8801ad454f90 RCX: 0000000000000000 RDX: 0000000000000000 RSI: ffffffff82eeb87e RDI: ffffed0035ea4cb6 RBP: ffff8801af526760 R08: ffff8801ad4a2480 R09: ffffed003b5e4f90 R10: ffffed003b5e4f90 R11: ffff8801daf27c87 R12: ffff8801adb8d380 R13: 0000000000000001 R14: 0000000000000008 R15: 00000000ffffffff FS: 00000000014af940(0000) GS:ffff8801daf00000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 00007f06bc223000 CR3: 00000001adb02000 CR4: 00000000001406e0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 Call Trace: allocate_data_block+0x66f/0x2050 fs/f2fs/segment.c:2663 do_write_page+0x105/0x1b0 fs/f2fs/segment.c:2727 write_node_page+0x129/0x350 fs/f2fs/segment.c:2770 __write_node_page+0x7da/0x1370 fs/f2fs/node.c:1398 sync_node_pages+0x18cf/0x1eb0 fs/f2fs/node.c:1652 block_operations+0x429/0xa60 fs/f2fs/checkpoint.c:1088 write_checkpoint+0x3ba/0x5380 fs/f2fs/checkpoint.c:1405 f2fs_sync_fs+0x2fb/0x6a0 fs/f2fs/super.c:1077 __sync_filesystem fs/sync.c:39 [inline] sync_filesystem+0x265/0x310 fs/sync.c:67 generic_shutdown_super+0xd7/0x520 fs/super.c:429 kill_block_super+0xa4/0x100 fs/super.c:1191 kill_f2fs_super+0x9f/0xd0 fs/f2fs/super.c:3030 deactivate_locked_super+0x97/0x100 fs/super.c:316 deactivate_super+0x188/0x1b0 fs/super.c:347 cleanup_mnt+0xbf/0x160 fs/namespace.c:1174 __cleanup_mnt+0x16/0x20 fs/namespace.c:1181 task_work_run+0x1e4/0x290 kernel/task_work.c:113 tracehook_notify_resume include/linux/tracehook.h:191 [inline] exit_to_usermode_loop+0x2bd/0x310 arch/x86/entry/common.c:166 prepare_exit_to_usermode arch/x86/entry/common.c:196 [inline] syscall_return_slowpath arch/x86/entry/common.c:265 [inline] do_syscall_64+0x6ac/0x800 arch/x86/entry/common.c:290 entry_SYSCALL_64_after_hwframe+0x49/0xbe RIP: 0033:0x457d97 RSP: 002b:00007ffd46f9c8e8 EFLAGS: 00000246 ORIG_RAX: 00000000000000a6 RAX: 0000000000000000 RBX: 0000000000000000 RCX: 0000000000457d97 RDX: 00000000014b09a3 RSI: 0000000000000002 RDI: 00007ffd46f9da50 RBP: 00007ffd46f9da50 R08: 0000000000000000 R09: 0000000000000009 R10: 0000000000000005 R11: 0000000000000246 R12: 00000000014b0940 R13: 0000000000000000 R14: 0000000000000002 R15: 000000000000658e RIP: update_sit_entry+0x1215/0x1590 fs/f2fs/segment.c:1882 RSP: ffff8801af526708 ---[ end trace f498328bb02610a2 ]--- Reported-and-tested-by: syzbot+bf9253040425feb155ad@syzkaller.appspotmail.com Reported-and-tested-by: syzbot+7d6d31d3bc702f566ce3@syzkaller.appspotmail.com Reported-and-tested-by: syzbot+0a725420475916460f12@syzkaller.appspotmail.com Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/segment.c | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) (limited to 'fs/f2fs/segment.c') diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index ae3cf8dce38e..29a648e01415 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -3712,6 +3712,7 @@ static int build_sit_entries(struct f2fs_sb_info *sbi) unsigned int i, start, end; unsigned int readed, start_blk = 0; int err = 0; + block_t total_node_blocks = 0; do { readed = ra_meta_pages(sbi, start_blk, BIO_MAX_PAGES, @@ -3734,6 +3735,8 @@ static int build_sit_entries(struct f2fs_sb_info *sbi) if (err) return err; seg_info_from_raw_sit(se, &sit); + if (IS_NODESEG(se->type)) + total_node_blocks += se->valid_blocks; /* build discard map only one time */ if (f2fs_discard_en(sbi)) { @@ -3775,11 +3778,15 @@ static int build_sit_entries(struct f2fs_sb_info *sbi) sit = sit_in_journal(journal, i); old_valid_blocks = se->valid_blocks; + if (IS_NODESEG(se->type)) + total_node_blocks -= old_valid_blocks; err = check_block_count(sbi, start, &sit); if (err) break; seg_info_from_raw_sit(se, &sit); + if (IS_NODESEG(se->type)) + total_node_blocks += se->valid_blocks; if (f2fs_discard_en(sbi)) { if (is_set_ckpt_flags(sbi, CP_TRIMMED_FLAG)) { @@ -3798,6 +3805,15 @@ static int build_sit_entries(struct f2fs_sb_info *sbi) se->valid_blocks - old_valid_blocks; } up_read(&curseg->journal_rwsem); + + if (!err && total_node_blocks != valid_node_count(sbi)) { + f2fs_msg(sbi->sb, KERN_ERR, + "SIT is corrupted node# %u vs %u", + total_node_blocks, valid_node_count(sbi)); + set_sbi_flag(sbi, SBI_NEED_FSCK); + err = -EINVAL; + } + return err; } -- cgit v1.2.3 From 03279ce90b4666931c32cebf089c49a223db0c09 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Wed, 25 Apr 2018 19:38:17 +0800 Subject: f2fs: fix potential overflow In build_sit_entries(), if valid_blocks in SIT block is smaller than valid_blocks in journal, for below calculation: sbi->discard_blks += old_valid_blocks - se->valid_blocks; There will be two times potential overflow: - old_valid_blocks - se->valid_blocks will overflow, and be a very large number. - sbi->discard_blks += result will overflow again, comes out a correct result accidently. Anyway, it should be fixed. Fixes: d600af236da5 ("f2fs: avoid unneeded loop in build_sit_entries") Fixes: 1f43e2ad7bff ("f2fs: introduce CP_TRIMMED_FLAG to avoid unneeded discard") Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/segment.c | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) (limited to 'fs/f2fs/segment.c') diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index 29a648e01415..b6a420d65f4e 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -3795,14 +3795,17 @@ static int build_sit_entries(struct f2fs_sb_info *sbi) } else { memcpy(se->discard_map, se->cur_valid_map, SIT_VBLOCK_MAP_SIZE); - sbi->discard_blks += old_valid_blocks - - se->valid_blocks; + sbi->discard_blks += old_valid_blocks; + sbi->discard_blks -= se->valid_blocks; } } - if (sbi->segs_per_sec > 1) + if (sbi->segs_per_sec > 1) { get_sec_entry(sbi, start)->valid_blocks += - se->valid_blocks - old_valid_blocks; + se->valid_blocks; + get_sec_entry(sbi, start)->valid_blocks -= + old_valid_blocks; + } } up_read(&curseg->journal_rwsem); -- cgit v1.2.3 From 2cf64590361ec367f3d2b91ab29777eb087222bb Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Wed, 25 Apr 2018 17:38:29 +0800 Subject: f2fs: introduce release_discard_addr() for cleanup Introduce release_discard_addr() to include common codes for cleanup. Signed-off-by: Chao Yu [Fengguang Wu: declare static function, reported by kbuild test robot] Signed-off-by: Fengguang Wu Signed-off-by: Jaegeuk Kim --- fs/f2fs/segment.c | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) (limited to 'fs/f2fs/segment.c') diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index b6a420d65f4e..aa5da6ea4ff8 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -1661,16 +1661,20 @@ static bool add_discard_addrs(struct f2fs_sb_info *sbi, struct cp_control *cpc, return false; } +static void release_discard_addr(struct discard_entry *entry) +{ + list_del(&entry->list); + kmem_cache_free(discard_entry_slab, entry); +} + void release_discard_addrs(struct f2fs_sb_info *sbi) { struct list_head *head = &(SM_I(sbi)->dcc_info->entry_list); struct discard_entry *entry, *this; /* drop caches */ - list_for_each_entry_safe(entry, this, head, list) { - list_del(&entry->list); - kmem_cache_free(discard_entry_slab, entry); - } + list_for_each_entry_safe(entry, this, head, list) + release_discard_addr(entry); } /* @@ -1770,9 +1774,8 @@ skip: if (cur_pos < sbi->blocks_per_seg) goto find_next; - list_del(&entry->list); + release_discard_addr(entry); dcc->nr_discards -= total_len; - kmem_cache_free(discard_entry_slab, entry); } wake_up_discard_thread(sbi, false); -- cgit v1.2.3 From 9bb86b63dc0f16877a3014611bce29921c1b2ffa Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Thu, 26 Apr 2018 17:05:50 +0800 Subject: f2fs: treat volatile file's data as hot one Volatile file's data will be updated oftenly, so it'd better to place its data into hot data segment. In addition, for atomic file, we change to check FI_ATOMIC_FILE instead of FI_HOT_DATA to make code readability better. Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/segment.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'fs/f2fs/segment.c') diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index aa5da6ea4ff8..917d7acb12cf 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -309,7 +309,6 @@ void drop_inmem_pages(struct inode *inode) mutex_unlock(&fi->inmem_lock); clear_inode_flag(inode, FI_ATOMIC_FILE); - clear_inode_flag(inode, FI_HOT_DATA); stat_dec_atomic_write(inode); } @@ -2693,7 +2692,9 @@ static int __get_segment_type_6(struct f2fs_io_info *fio) if (is_cold_data(fio->page) || file_is_cold(inode)) return CURSEG_COLD_DATA; if (file_is_hot(inode) || - is_inode_flag_set(inode, FI_HOT_DATA)) + is_inode_flag_set(inode, FI_HOT_DATA) || + is_inode_flag_set(inode, FI_ATOMIC_FILE) || + is_inode_flag_set(inode, FI_VOLATILE_FILE)) return CURSEG_HOT_DATA; /* rw_hint_to_seg_type(inode->i_write_hint); */ return CURSEG_WARM_DATA; -- cgit v1.2.3 From a5d0ccbc189a02a0931d7a3ee092d64f89d69f0f Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Tue, 15 May 2018 18:59:55 +0800 Subject: f2fs: fix to initialize min_mtime with ULLONG_MAX Since sit_i.min_mtime's type is unsigned long long, so we should initialize it with max value of the type ULLONG_MAX instead of LLONG_MAX. Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/segment.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs/f2fs/segment.c') diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index 917d7acb12cf..719022d62d8f 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -3919,7 +3919,7 @@ static void init_min_max_mtime(struct f2fs_sb_info *sbi) down_write(&sit_i->sentry_lock); - sit_i->min_mtime = LLONG_MAX; + sit_i->min_mtime = ULLONG_MAX; for (segno = 0; segno < MAIN_SEGS(sbi); segno += sbi->segs_per_sec) { unsigned int i; -- cgit v1.2.3 From 461247b21fde524b9022dcadb2a8e751ab520a55 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Wed, 23 May 2018 22:25:08 +0800 Subject: f2fs: clean up with is_valid_blkaddr() - rename is_valid_blkaddr() to is_valid_meta_blkaddr() for readability. - introduce is_valid_blkaddr() for cleanup. No logic change in this patch. Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/segment.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'fs/f2fs/segment.c') diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index 719022d62d8f..4412c506c6ad 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -1975,7 +1975,7 @@ bool is_checkpointed_data(struct f2fs_sb_info *sbi, block_t blkaddr) struct seg_entry *se; bool is_cp = false; - if (blkaddr == NEW_ADDR || blkaddr == NULL_ADDR) + if (!is_valid_blkaddr(blkaddr)) return true; down_read(&sit_i->sentry_lock); @@ -3040,7 +3040,7 @@ void f2fs_wait_on_block_writeback(struct f2fs_sb_info *sbi, block_t blkaddr) { struct page *cpage; - if (blkaddr == NEW_ADDR || blkaddr == NULL_ADDR) + if (!is_valid_blkaddr(blkaddr)) return; cpage = find_lock_page(META_MAPPING(sbi), blkaddr); -- cgit v1.2.3 From c4408c238722fdfd3302be50dcce1f89c12d6666 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Mon, 23 Apr 2018 10:36:13 +0800 Subject: f2fs: fix to wait page writeback during revoking atomic write After revoking atomic write, related LBA can be reused by others, so we need to wait page writeback before reusing the LBA, in order to avoid interference between old atomic written in-flight IO and new IO. Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/segment.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'fs/f2fs/segment.c') diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index 4412c506c6ad..a31517e231b6 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -230,6 +230,8 @@ static int __revoke_inmem_pages(struct inode *inode, lock_page(page); + f2fs_wait_on_page_writeback(page, DATA, true); + if (recover) { struct dnode_of_data dn; struct node_info ni; -- cgit v1.2.3 From 1f62e4702a34d1fc33be8734777fd858b1147bb7 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Sat, 26 May 2018 09:00:13 +0800 Subject: f2fs: keep migration IO order in LFS mode For non-migration IO, we will keep order of data/node blocks' submitting as allocation sequence by sorting IOs in per log io_list list, but for migration IO, it could be out-of-order. In LFS mode, we should keep all IOs including migration IO be ordered, so that this patch fixes to add an additional lock to keep submitting order. Signed-off-by: Chao Yu Signed-off-by: Yunlong Song Signed-off-by: Jaegeuk Kim --- fs/f2fs/segment.c | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'fs/f2fs/segment.c') diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index a31517e231b6..ce5a2bd19e4b 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -2831,7 +2831,10 @@ static void do_write_page(struct f2fs_summary *sum, struct f2fs_io_info *fio) { int type = __get_segment_type(fio); int err; + bool keep_order = (test_opt(fio->sbi, LFS) && type == CURSEG_COLD_DATA); + if (keep_order) + down_read(&fio->sbi->io_order_lock); reallocate: allocate_data_block(fio->sbi, fio->page, fio->old_blkaddr, &fio->new_blkaddr, sum, type, fio, true); @@ -2844,6 +2847,8 @@ reallocate: } else if (!err) { update_device_state(fio); } + if (keep_order) + up_read(&fio->sbi->io_order_lock); } void write_meta_page(struct f2fs_sb_info *sbi, struct page *page, -- cgit v1.2.3 From 405909e7f53293a13c9a0fad5c81ce1472e9fd32 Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Mon, 7 May 2018 14:22:40 -0700 Subject: f2fs: introduce sbi->gc_mode to determine the policy This is to avoid sbi->gc_thread pointer access. Signed-off-by: Jaegeuk Kim --- fs/f2fs/segment.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'fs/f2fs/segment.c') diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index ce5a2bd19e4b..8656295c76e3 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -177,7 +177,7 @@ bool need_SSR(struct f2fs_sb_info *sbi) if (test_opt(sbi, LFS)) return false; - if (sbi->gc_thread && sbi->gc_thread->gc_urgent) + if (sbi->gc_mode == GC_URGENT) return true; return free_sections(sbi) <= (node_secs + 2 * dent_secs + imeta_secs + @@ -1485,7 +1485,7 @@ static int issue_discard_thread(void *data) if (dcc->discard_wake) dcc->discard_wake = 0; - if (sbi->gc_thread && sbi->gc_thread->gc_urgent) + if (sbi->gc_mode == GC_URGENT) __init_discard_policy(sbi, &dpolicy, DPOLICY_FORCE, 1); sb_start_intwrite(sbi->sb); -- cgit v1.2.3 From b125dfb20d18db91eac671aa241346cd1e1c0106 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Mon, 7 May 2018 20:28:54 +0800 Subject: f2fs: avoid stucking GC due to atomic write f2fs doesn't allow abuse on atomic write class interface, so except limiting in-mem pages' total memory usage capacity, we need to limit atomic-write usage as well when filesystem is seriously fragmented, otherwise we may run into infinite loop during foreground GC because target blocks in victim segment are belong to atomic opened file for long time. Now, we will detect failure due to atomic write in foreground GC, if the count exceeds threshold, we will drop all atomic written data in cache, by this, I expect it can keep our system running safely to prevent Dos attack. In addition, his patch adds to show GC skip information in debugfs, now it just shows count of skipped caused by atomic write. Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/segment.c | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) (limited to 'fs/f2fs/segment.c') diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index 8656295c76e3..e2317c6c1080 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -273,7 +273,7 @@ next: return err; } -void drop_inmem_pages_all(struct f2fs_sb_info *sbi) +void drop_inmem_pages_all(struct f2fs_sb_info *sbi, bool gc_failure) { struct list_head *head = &sbi->inode_list[ATOMIC_FILE]; struct inode *inode; @@ -289,9 +289,17 @@ next: spin_unlock(&sbi->inode_lock[ATOMIC_FILE]); if (inode) { + if (gc_failure) { + if (fi->i_gc_failures[GC_FAILURE_ATOMIC]) + goto drop; + goto skip; + } +drop: + set_inode_flag(inode, FI_ATOMIC_REVOKE_REQUEST); drop_inmem_pages(inode); iput(inode); } +skip: congestion_wait(BLK_RW_ASYNC, HZ/50); cond_resched(); goto next; @@ -311,6 +319,7 @@ void drop_inmem_pages(struct inode *inode) mutex_unlock(&fi->inmem_lock); clear_inode_flag(inode, FI_ATOMIC_FILE); + fi->i_gc_failures[GC_FAILURE_ATOMIC] = 0; stat_dec_atomic_write(inode); } -- cgit v1.2.3 From b25a1872e9a518c8ea5c76bb8441209db3117574 Mon Sep 17 00:00:00 2001 From: Yunlei He Date: Sun, 8 Apr 2018 15:11:11 +0800 Subject: f2fs: let discard thread wait a little longer if dev is busy This patch modify discard thread wait policy as below: issued io_interrupted wait time(ms) 1. 8 0 50 2. (0,8) 1 50 3. 0 1 500 (dev is busy) 4. 0 0 60000 (no candidates) Signed-off-by: Yunlei He Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/segment.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) (limited to 'fs/f2fs/segment.c') diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index e2317c6c1080..fe3b6c3e7553 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -1018,6 +1018,7 @@ static void __init_discard_policy(struct f2fs_sb_info *sbi, if (discard_type == DPOLICY_BG) { dpolicy->min_interval = DEF_MIN_DISCARD_ISSUE_TIME; + dpolicy->mid_interval = DEF_MID_DISCARD_ISSUE_TIME; dpolicy->max_interval = DEF_MAX_DISCARD_ISSUE_TIME; dpolicy->io_aware = true; dpolicy->sync = false; @@ -1027,6 +1028,7 @@ static void __init_discard_policy(struct f2fs_sb_info *sbi, } } else if (discard_type == DPOLICY_FORCE) { dpolicy->min_interval = DEF_MIN_DISCARD_ISSUE_TIME; + dpolicy->mid_interval = DEF_MID_DISCARD_ISSUE_TIME; dpolicy->max_interval = DEF_MAX_DISCARD_ISSUE_TIME; dpolicy->io_aware = false; } else if (discard_type == DPOLICY_FSTRIM) { @@ -1500,9 +1502,11 @@ static int issue_discard_thread(void *data) sb_start_intwrite(sbi->sb); issued = __issue_discard_cmd(sbi, &dpolicy); - if (issued) { + if (issued > 0) { __wait_all_discard_cmd(sbi, &dpolicy); wait_ms = dpolicy.min_interval; + } else if (issued == -1){ + wait_ms = dpolicy.mid_interval; } else { wait_ms = dpolicy.max_interval; } -- cgit v1.2.3 From e72a2cca82d8e8809be75012ded23781434d31fa Mon Sep 17 00:00:00 2001 From: Sheng Yong Date: Tue, 8 May 2018 17:51:34 +0800 Subject: f2fs: clear discard_wake earlier If SBI_NEED_FSCK is set, discard_wake will never be cleared. As a result, the condition of wait_event_interruptible_timeout() is always true, which gets discard thread run too frequently. Signed-off-by: Sheng Yong Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/segment.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) (limited to 'fs/f2fs/segment.c') diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index fe3b6c3e7553..0caabb0f42bc 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -1482,6 +1482,10 @@ static int issue_discard_thread(void *data) kthread_should_stop() || freezing(current) || dcc->discard_wake, msecs_to_jiffies(wait_ms)); + + if (dcc->discard_wake) + dcc->discard_wake = 0; + if (try_to_freeze()) continue; if (f2fs_readonly(sbi->sb)) @@ -1493,9 +1497,6 @@ static int issue_discard_thread(void *data) continue; } - if (dcc->discard_wake) - dcc->discard_wake = 0; - if (sbi->gc_mode == GC_URGENT) __init_discard_policy(sbi, &dpolicy, DPOLICY_FORCE, 1); -- cgit v1.2.3 From c74034518fdc8b21a2b3f0aace06965cea5fa09d Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Sat, 26 May 2018 18:03:34 +0800 Subject: f2fs: fix to don't trigger writeback during recovery - f2fs_fill_super - recover_fsync_data - recover_data - del_fsync_inode - iput - iput_final - write_inode_now - f2fs_write_inode - f2fs_balance_fs - f2fs_balance_fs_bg - sync_dirty_inodes With data_flush mount option, during recovery, in order to avoid entering above writeback flow, let's detect recovery status and do skip in f2fs_balance_fs_bg. Signed-off-by: Chao Yu Signed-off-by: Yunlei He Signed-off-by: Jaegeuk Kim --- fs/f2fs/segment.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'fs/f2fs/segment.c') diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index 0caabb0f42bc..4557704a852e 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -486,6 +486,9 @@ void f2fs_balance_fs(struct f2fs_sb_info *sbi, bool need) void f2fs_balance_fs_bg(struct f2fs_sb_info *sbi) { + if (unlikely(is_sbi_flag_set(sbi, SBI_POR_DOING))) + return; + /* try to shrink extent cache when there is no enough memory */ if (!available_free_memory(sbi, EXTENT_CACHE)) f2fs_shrink_extent_tree(sbi, EXTENT_CACHE_SHRINK_NUMBER); -- cgit v1.2.3 From b7f55946709538653ce33f06e67219c15b140039 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Mon, 28 May 2018 23:47:18 +0800 Subject: f2fs: fix to let caller retry allocating block address Configure io_bits with 2 and enable LFS mode, generic/013 reports below dmesg: BUG: unable to handle kernel NULL pointer dereference at 00000104 *pdpt = 0000000029b7b001 *pde = 0000000000000000 Oops: 0002 [#1] PREEMPT SMP Modules linked in: crc32_generic zram f2fs(O) rfcomm bnep bluetooth ecdh_generic snd_intel8x0 snd_ac97_codec ac97_bus snd_pcm snd_seq_midi snd_seq_midi_event snd_rawmidi snd_seq pcbc joydev snd_seq_device aesni_intel snd_timer aes_i586 snd crypto_simd cryptd soundcore i2c_piix4 serio_raw mac_hid video parport_pc ppdev lp parport hid_generic psmouse usbhid hid e1000 CPU: 0 PID: 11161 Comm: fsstress Tainted: G O 4.17.0-rc2 #38 Hardware name: innotek GmbH VirtualBox/VirtualBox, BIOS VirtualBox 12/01/2006 EIP: f2fs_submit_page_write+0x28d/0x550 [f2fs] EFLAGS: 00010206 CPU: 0 EAX: e863dcd8 EBX: 00000000 ECX: 00000100 EDX: 00000200 ESI: e863dcf4 EDI: f6f82768 EBP: e863dbb0 ESP: e863db74 DS: 007b ES: 007b FS: 00d8 GS: 0033 SS: 0068 CR0: 80050033 CR2: 00000104 CR3: 29a62020 CR4: 000406f0 Call Trace: do_write_page+0x6f/0xc0 [f2fs] write_data_page+0x4a/0xd0 [f2fs] do_write_data_page+0x327/0x630 [f2fs] __write_data_page+0x34b/0x820 [f2fs] __f2fs_write_data_pages+0x42d/0x8c0 [f2fs] f2fs_write_data_pages+0x27/0x30 [f2fs] do_writepages+0x1a/0x70 __filemap_fdatawrite_range+0x94/0xd0 filemap_write_and_wait_range+0x3d/0xa0 __generic_file_write_iter+0x11a/0x1f0 f2fs_file_write_iter+0xdd/0x3b0 [f2fs] __vfs_write+0xd2/0x150 vfs_write+0x9b/0x190 ksys_write+0x45/0x90 sys_write+0x16/0x20 do_fast_syscall_32+0xaa/0x22c entry_SYSENTER_32+0x4c/0x7b EIP: 0xb7fc8c51 EFLAGS: 00000246 CPU: 0 EAX: ffffffda EBX: 00000003 ECX: 09cde000 EDX: 00001000 ESI: 00000003 EDI: 00001000 EBP: 00000000 ESP: bfbded38 DS: 007b ES: 007b FS: 0000 GS: 0033 SS: 007b Code: e8 f9 77 34 c9 8b 45 e0 8b 80 b8 00 00 00 39 45 d8 0f 84 bb 02 00 00 8b 45 e0 8b 80 b8 00 00 00 8d 50 d8 8b 08 89 55 f0 8b 50 04 <89> 51 04 89 0a c7 00 00 01 00 00 c7 40 04 00 02 00 00 8b 45 dc EIP: f2fs_submit_page_write+0x28d/0x550 [f2fs] SS:ESP: 0068:e863db74 CR2: 0000000000000104 ---[ end trace 4cac79c0d1305ee6 ]--- allocate_data_block will submit all sequential pending IOs sorted by a FIFO list, If we failed to submit other user's IO due to unaligned write, we will retry to allocate new block address for current IO, then it will initialize fio.list again, if fio was in the list before, it can break FIFO list, result in above panic. Thread A Thread B - do_write_page - allocate_data_block - list_add_tail : fioA cached in FIFO list. - do_write_page - allocate_data_block - list_add_tail : fioB cached in FIFO list. - f2fs_submit_page_write : fail to submit IO - allocate_data_block - INIT_LIST_HEAD - f2fs_submit_page_write - list_del <-- NULL pointer dereference This patch adds fio.retry parameter to indicate failure status for each IO, and avoid bailing out if there is still pending IO in FIFO list for fixing. Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/segment.c | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) (limited to 'fs/f2fs/segment.c') diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index 4557704a852e..507f697178b6 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -2812,6 +2812,7 @@ void allocate_data_block(struct f2fs_sb_info *sbi, struct page *page, INIT_LIST_HEAD(&fio->list); fio->in_list = true; + fio->retry = false; io = sbi->write_io[fio->type] + fio->temp; spin_lock(&io->io_lock); list_add_tail(&fio->list, &io->io_list); @@ -2847,7 +2848,6 @@ static void update_device_state(struct f2fs_io_info *fio) static void do_write_page(struct f2fs_summary *sum, struct f2fs_io_info *fio) { int type = __get_segment_type(fio); - int err; bool keep_order = (test_opt(fio->sbi, LFS) && type == CURSEG_COLD_DATA); if (keep_order) @@ -2857,13 +2857,14 @@ reallocate: &fio->new_blkaddr, sum, type, fio, true); /* writeout dirty page into bdev */ - err = f2fs_submit_page_write(fio); - if (err == -EAGAIN) { + f2fs_submit_page_write(fio); + if (fio->retry) { fio->old_blkaddr = fio->new_blkaddr; goto reallocate; - } else if (!err) { - update_device_state(fio); } + + update_device_state(fio); + if (keep_order) up_read(&fio->sbi->io_order_lock); } -- cgit v1.2.3 From c35da89531b3cf7939498e4e1f39bf9338ebc10f Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Wed, 30 May 2018 00:20:41 +0800 Subject: f2fs: clean up symbol namespace As Ted reported: "Hi, I was looking at f2fs's sources recently, and I noticed that there is a very large number of non-static symbols which don't have a f2fs prefix. There's well over a hundred (see attached below). As one example, in fs/f2fs/dir.c there is: unsigned char get_de_type(struct f2fs_dir_entry *de) This function is clearly only useful for f2fs, but it has a generic name. This means that if any other file system tries to have the same symbol name, there will be a symbol conflict and the kernel would not successfully build. It also means that when someone is looking f2fs sources, it's not at all obvious whether a function such as read_data_page(), invalidate_blocks(), is a generic kernel function found in the fs, mm, or block layers, or a f2fs specific function. You might want to fix this at some point. Hopefully Kent's bcachefs isn't similarly using genericly named functions, since that might cause conflicts with f2fs's functions --- but just as this would be a problem that we would rightly insist that Kent fix, this is something that we should have rightly insisted that f2fs should have fixed before it was integrated into the mainline kernel. acquire_orphan_inode add_ino_entry add_orphan_inode allocate_data_block allocate_new_segments alloc_nid alloc_nid_done alloc_nid_failed available_free_memory ...." This patch adds "f2fs_" prefix for all non-static symbols in order to: a) avoid conflict with other kernel generic symbols; b) to indicate the function is f2fs specific one instead of generic one; Reported-by: Theodore Ts'o Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/segment.c | 185 ++++++++++++++++++++++++++++-------------------------- 1 file changed, 96 insertions(+), 89 deletions(-) (limited to 'fs/f2fs/segment.c') diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index 507f697178b6..8672bf574426 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -169,7 +169,7 @@ found: return result - size + __reverse_ffz(tmp); } -bool need_SSR(struct f2fs_sb_info *sbi) +bool f2fs_need_SSR(struct f2fs_sb_info *sbi) { int node_secs = get_blocktype_secs(sbi, F2FS_DIRTY_NODES); int dent_secs = get_blocktype_secs(sbi, F2FS_DIRTY_DENTS); @@ -184,7 +184,7 @@ bool need_SSR(struct f2fs_sb_info *sbi) SM_I(sbi)->min_ssr_sections + reserved_sections(sbi)); } -void register_inmem_page(struct inode *inode, struct page *page) +void f2fs_register_inmem_page(struct inode *inode, struct page *page) { struct f2fs_sb_info *sbi = F2FS_I_SB(inode); struct f2fs_inode_info *fi = F2FS_I(inode); @@ -239,7 +239,8 @@ static int __revoke_inmem_pages(struct inode *inode, trace_f2fs_commit_inmem_page(page, INMEM_REVOKE); retry: set_new_dnode(&dn, inode, NULL, NULL, 0); - err = get_dnode_of_data(&dn, page->index, LOOKUP_NODE); + err = f2fs_get_dnode_of_data(&dn, page->index, + LOOKUP_NODE); if (err) { if (err == -ENOMEM) { congestion_wait(BLK_RW_ASYNC, HZ/50); @@ -249,9 +250,9 @@ retry: err = -EAGAIN; goto next; } - get_node_info(sbi, dn.nid, &ni); + f2fs_get_node_info(sbi, dn.nid, &ni); if (cur->old_addr == NEW_ADDR) { - invalidate_blocks(sbi, dn.data_blkaddr); + f2fs_invalidate_blocks(sbi, dn.data_blkaddr); f2fs_update_data_blkaddr(&dn, NEW_ADDR); } else f2fs_replace_block(sbi, &dn, dn.data_blkaddr, @@ -273,7 +274,7 @@ next: return err; } -void drop_inmem_pages_all(struct f2fs_sb_info *sbi, bool gc_failure) +void f2fs_drop_inmem_pages_all(struct f2fs_sb_info *sbi, bool gc_failure) { struct list_head *head = &sbi->inode_list[ATOMIC_FILE]; struct inode *inode; @@ -296,7 +297,7 @@ next: } drop: set_inode_flag(inode, FI_ATOMIC_REVOKE_REQUEST); - drop_inmem_pages(inode); + f2fs_drop_inmem_pages(inode); iput(inode); } skip: @@ -305,7 +306,7 @@ skip: goto next; } -void drop_inmem_pages(struct inode *inode) +void f2fs_drop_inmem_pages(struct inode *inode) { struct f2fs_sb_info *sbi = F2FS_I_SB(inode); struct f2fs_inode_info *fi = F2FS_I(inode); @@ -323,7 +324,7 @@ void drop_inmem_pages(struct inode *inode) stat_dec_atomic_write(inode); } -void drop_inmem_page(struct inode *inode, struct page *page) +void f2fs_drop_inmem_page(struct inode *inode, struct page *page) { struct f2fs_inode_info *fi = F2FS_I(inode); struct f2fs_sb_info *sbi = F2FS_I_SB(inode); @@ -353,7 +354,7 @@ void drop_inmem_page(struct inode *inode, struct page *page) trace_f2fs_commit_inmem_page(page, INMEM_INVALIDATE); } -static int __commit_inmem_pages(struct inode *inode) +static int __f2fs_commit_inmem_pages(struct inode *inode) { struct f2fs_sb_info *sbi = F2FS_I_SB(inode); struct f2fs_inode_info *fi = F2FS_I(inode); @@ -383,14 +384,14 @@ static int __commit_inmem_pages(struct inode *inode) f2fs_wait_on_page_writeback(page, DATA, true); if (clear_page_dirty_for_io(page)) { inode_dec_dirty_pages(inode); - remove_dirty_inode(inode); + f2fs_remove_dirty_inode(inode); } retry: fio.page = page; fio.old_blkaddr = NULL_ADDR; fio.encrypted_page = NULL; fio.need_lock = LOCK_DONE; - err = do_write_data_page(&fio); + err = f2fs_do_write_data_page(&fio); if (err) { if (err == -ENOMEM) { congestion_wait(BLK_RW_ASYNC, HZ/50); @@ -431,7 +432,7 @@ retry: return err; } -int commit_inmem_pages(struct inode *inode) +int f2fs_commit_inmem_pages(struct inode *inode) { struct f2fs_sb_info *sbi = F2FS_I_SB(inode); struct f2fs_inode_info *fi = F2FS_I(inode); @@ -443,7 +444,7 @@ int commit_inmem_pages(struct inode *inode) set_inode_flag(inode, FI_ATOMIC_COMMIT); mutex_lock(&fi->inmem_lock); - err = __commit_inmem_pages(inode); + err = __f2fs_commit_inmem_pages(inode); spin_lock(&sbi->inode_lock[ATOMIC_FILE]); if (!list_empty(&fi->inmem_ilist)) @@ -490,24 +491,24 @@ void f2fs_balance_fs_bg(struct f2fs_sb_info *sbi) return; /* try to shrink extent cache when there is no enough memory */ - if (!available_free_memory(sbi, EXTENT_CACHE)) + if (!f2fs_available_free_memory(sbi, EXTENT_CACHE)) f2fs_shrink_extent_tree(sbi, EXTENT_CACHE_SHRINK_NUMBER); /* check the # of cached NAT entries */ - if (!available_free_memory(sbi, NAT_ENTRIES)) - try_to_free_nats(sbi, NAT_ENTRY_PER_BLOCK); + if (!f2fs_available_free_memory(sbi, NAT_ENTRIES)) + f2fs_try_to_free_nats(sbi, NAT_ENTRY_PER_BLOCK); - if (!available_free_memory(sbi, FREE_NIDS)) - try_to_free_nids(sbi, MAX_FREE_NIDS); + if (!f2fs_available_free_memory(sbi, FREE_NIDS)) + f2fs_try_to_free_nids(sbi, MAX_FREE_NIDS); else - build_free_nids(sbi, false, false); + f2fs_build_free_nids(sbi, false, false); if (!is_idle(sbi) && !excess_dirty_nats(sbi)) return; /* checkpoint is the only way to shrink partial cached entries */ - if (!available_free_memory(sbi, NAT_ENTRIES) || - !available_free_memory(sbi, INO_ENTRIES) || + if (!f2fs_available_free_memory(sbi, NAT_ENTRIES) || + !f2fs_available_free_memory(sbi, INO_ENTRIES) || excess_prefree_segs(sbi) || excess_dirty_nats(sbi) || f2fs_time_over(sbi, CP_TIME)) { @@ -515,7 +516,7 @@ void f2fs_balance_fs_bg(struct f2fs_sb_info *sbi) struct blk_plug plug; blk_start_plug(&plug); - sync_dirty_inodes(sbi, FILE_INODE); + f2fs_sync_dirty_inodes(sbi, FILE_INODE); blk_finish_plug(&plug); } f2fs_sync_fs(sbi->sb, true); @@ -548,7 +549,7 @@ static int submit_flush_wait(struct f2fs_sb_info *sbi, nid_t ino) return __submit_flush_wait(sbi, sbi->sb->s_bdev); for (i = 0; i < sbi->s_ndevs; i++) { - if (!is_dirty_device(sbi, ino, i, FLUSH_INO)) + if (!f2fs_is_dirty_device(sbi, ino, i, FLUSH_INO)) continue; ret = __submit_flush_wait(sbi, FDEV(i).bdev); if (ret) @@ -659,7 +660,7 @@ int f2fs_issue_flush(struct f2fs_sb_info *sbi, nid_t ino) return cmd.ret; } -int create_flush_cmd_control(struct f2fs_sb_info *sbi) +int f2fs_create_flush_cmd_control(struct f2fs_sb_info *sbi) { dev_t dev = sbi->sb->s_bdev->bd_dev; struct flush_cmd_control *fcc; @@ -696,7 +697,7 @@ init_thread: return err; } -void destroy_flush_cmd_control(struct f2fs_sb_info *sbi, bool free) +void f2fs_destroy_flush_cmd_control(struct f2fs_sb_info *sbi, bool free) { struct flush_cmd_control *fcc = SM_I(sbi)->fcc_info; @@ -1102,7 +1103,7 @@ static struct discard_cmd *__insert_discard_tree(struct f2fs_sb_info *sbi, goto do_insert; } - p = __lookup_rb_tree_for_insert(sbi, &dcc->root, &parent, lstart); + p = f2fs_lookup_rb_tree_for_insert(sbi, &dcc->root, &parent, lstart); do_insert: dc = __attach_discard_cmd(sbi, bdev, lstart, start, len, parent, p); if (!dc) @@ -1167,7 +1168,7 @@ static void __update_discard_tree_range(struct f2fs_sb_info *sbi, mutex_lock(&dcc->cmd_lock); - dc = (struct discard_cmd *)__lookup_rb_tree_ret(&dcc->root, + dc = (struct discard_cmd *)f2fs_lookup_rb_tree_ret(&dcc->root, NULL, lstart, (struct rb_entry **)&prev_dc, (struct rb_entry **)&next_dc, @@ -1278,7 +1279,8 @@ static int __issue_discard_cmd(struct f2fs_sb_info *sbi, mutex_lock(&dcc->cmd_lock); if (list_empty(pend_list)) goto next; - f2fs_bug_on(sbi, !__check_rb_tree_consistence(sbi, &dcc->root)); + f2fs_bug_on(sbi, + !f2fs_check_rb_tree_consistence(sbi, &dcc->root)); blk_start_plug(&plug); list_for_each_entry_safe(dc, tmp, pend_list, list) { f2fs_bug_on(sbi, dc->state != D_PREP); @@ -1331,7 +1333,7 @@ static bool __drop_discard_cmd(struct f2fs_sb_info *sbi) return dropped; } -void drop_discard_cmd(struct f2fs_sb_info *sbi) +void f2fs_drop_discard_cmd(struct f2fs_sb_info *sbi) { __drop_discard_cmd(sbi); } @@ -1422,7 +1424,8 @@ static void f2fs_wait_discard_bio(struct f2fs_sb_info *sbi, block_t blkaddr) bool need_wait = false; mutex_lock(&dcc->cmd_lock); - dc = (struct discard_cmd *)__lookup_rb_tree(&dcc->root, NULL, blkaddr); + dc = (struct discard_cmd *)f2fs_lookup_rb_tree(&dcc->root, + NULL, blkaddr); if (dc) { if (dc->state == D_PREP) { __punch_discard_cmd(sbi, dc, blkaddr); @@ -1437,7 +1440,7 @@ static void f2fs_wait_discard_bio(struct f2fs_sb_info *sbi, block_t blkaddr) __wait_one_discard_bio(sbi, dc); } -void stop_discard_thread(struct f2fs_sb_info *sbi) +void f2fs_stop_discard_thread(struct f2fs_sb_info *sbi) { struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info; @@ -1685,7 +1688,7 @@ static void release_discard_addr(struct discard_entry *entry) kmem_cache_free(discard_entry_slab, entry); } -void release_discard_addrs(struct f2fs_sb_info *sbi) +void f2fs_release_discard_addrs(struct f2fs_sb_info *sbi) { struct list_head *head = &(SM_I(sbi)->dcc_info->entry_list); struct discard_entry *entry, *this; @@ -1696,7 +1699,7 @@ void release_discard_addrs(struct f2fs_sb_info *sbi) } /* - * Should call clear_prefree_segments after checkpoint is done. + * Should call f2fs_clear_prefree_segments after checkpoint is done. */ static void set_prefree_as_free_segments(struct f2fs_sb_info *sbi) { @@ -1709,7 +1712,8 @@ static void set_prefree_as_free_segments(struct f2fs_sb_info *sbi) mutex_unlock(&dirty_i->seglist_lock); } -void clear_prefree_segments(struct f2fs_sb_info *sbi, struct cp_control *cpc) +void f2fs_clear_prefree_segments(struct f2fs_sb_info *sbi, + struct cp_control *cpc) { struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info; struct list_head *head = &dcc->entry_list; @@ -1851,7 +1855,7 @@ static void destroy_discard_cmd_control(struct f2fs_sb_info *sbi) if (!dcc) return; - stop_discard_thread(sbi); + f2fs_stop_discard_thread(sbi); kfree(dcc); SM_I(sbi)->dcc_info = NULL; @@ -1967,7 +1971,7 @@ static void update_sit_entry(struct f2fs_sb_info *sbi, block_t blkaddr, int del) get_sec_entry(sbi, segno)->valid_blocks += del; } -void invalidate_blocks(struct f2fs_sb_info *sbi, block_t addr) +void f2fs_invalidate_blocks(struct f2fs_sb_info *sbi, block_t addr) { unsigned int segno = GET_SEGNO(sbi, addr); struct sit_info *sit_i = SIT_I(sbi); @@ -1987,7 +1991,7 @@ void invalidate_blocks(struct f2fs_sb_info *sbi, block_t addr) up_write(&sit_i->sentry_lock); } -bool is_checkpointed_data(struct f2fs_sb_info *sbi, block_t blkaddr) +bool f2fs_is_checkpointed_data(struct f2fs_sb_info *sbi, block_t blkaddr) { struct sit_info *sit_i = SIT_I(sbi); unsigned int segno, offset; @@ -2026,7 +2030,7 @@ static void __add_sum_entry(struct f2fs_sb_info *sbi, int type, /* * Calculate the number of current summary pages for writing */ -int npages_for_summary_flush(struct f2fs_sb_info *sbi, bool for_ra) +int f2fs_npages_for_summary_flush(struct f2fs_sb_info *sbi, bool for_ra) { int valid_sum_count = 0; int i, sum_in_page; @@ -2056,14 +2060,15 @@ int npages_for_summary_flush(struct f2fs_sb_info *sbi, bool for_ra) /* * Caller should put this summary page */ -struct page *get_sum_page(struct f2fs_sb_info *sbi, unsigned int segno) +struct page *f2fs_get_sum_page(struct f2fs_sb_info *sbi, unsigned int segno) { - return get_meta_page(sbi, GET_SUM_BLOCK(sbi, segno)); + return f2fs_get_meta_page(sbi, GET_SUM_BLOCK(sbi, segno)); } -void update_meta_page(struct f2fs_sb_info *sbi, void *src, block_t blk_addr) +void f2fs_update_meta_page(struct f2fs_sb_info *sbi, + void *src, block_t blk_addr) { - struct page *page = grab_meta_page(sbi, blk_addr); + struct page *page = f2fs_grab_meta_page(sbi, blk_addr); memcpy(page_address(page), src, PAGE_SIZE); set_page_dirty(page); @@ -2073,14 +2078,14 @@ void update_meta_page(struct f2fs_sb_info *sbi, void *src, block_t blk_addr) static void write_sum_page(struct f2fs_sb_info *sbi, struct f2fs_summary_block *sum_blk, block_t blk_addr) { - update_meta_page(sbi, (void *)sum_blk, blk_addr); + f2fs_update_meta_page(sbi, (void *)sum_blk, blk_addr); } static void write_current_sum_page(struct f2fs_sb_info *sbi, int type, block_t blk_addr) { struct curseg_info *curseg = CURSEG_I(sbi, type); - struct page *page = grab_meta_page(sbi, blk_addr); + struct page *page = f2fs_grab_meta_page(sbi, blk_addr); struct f2fs_summary_block *src = curseg->sum_blk; struct f2fs_summary_block *dst; @@ -2325,7 +2330,7 @@ static void change_curseg(struct f2fs_sb_info *sbi, int type) curseg->alloc_type = SSR; __next_free_blkoff(sbi, curseg, 0); - sum_page = get_sum_page(sbi, new_segno); + sum_page = f2fs_get_sum_page(sbi, new_segno); sum_node = (struct f2fs_summary_block *)page_address(sum_page); memcpy(curseg->sum_blk, sum_node, SUM_ENTRY_SIZE); f2fs_put_page(sum_page, 1); @@ -2339,7 +2344,7 @@ static int get_ssr_segment(struct f2fs_sb_info *sbi, int type) int i, cnt; bool reversed = false; - /* need_SSR() already forces to do this */ + /* f2fs_need_SSR() already forces to do this */ if (v_ops->get_victim(sbi, &segno, BG_GC, type, SSR)) { curseg->next_segno = segno; return 1; @@ -2391,7 +2396,7 @@ static void allocate_segment_by_default(struct f2fs_sb_info *sbi, new_curseg(sbi, type, false); else if (curseg->alloc_type == LFS && is_next_segment_free(sbi, type)) new_curseg(sbi, type, false); - else if (need_SSR(sbi) && get_ssr_segment(sbi, type)) + else if (f2fs_need_SSR(sbi) && get_ssr_segment(sbi, type)) change_curseg(sbi, type); else new_curseg(sbi, type, false); @@ -2399,7 +2404,7 @@ static void allocate_segment_by_default(struct f2fs_sb_info *sbi, stat_inc_seg_type(sbi, curseg); } -void allocate_new_segments(struct f2fs_sb_info *sbi) +void f2fs_allocate_new_segments(struct f2fs_sb_info *sbi) { struct curseg_info *curseg; unsigned int old_segno; @@ -2421,7 +2426,8 @@ static const struct segment_allocation default_salloc_ops = { .allocate_segment = allocate_segment_by_default, }; -bool exist_trim_candidates(struct f2fs_sb_info *sbi, struct cp_control *cpc) +bool f2fs_exist_trim_candidates(struct f2fs_sb_info *sbi, + struct cp_control *cpc) { __u64 trim_start = cpc->trim_start; bool has_candidate = false; @@ -2454,9 +2460,9 @@ next: issued = 0; mutex_lock(&dcc->cmd_lock); - f2fs_bug_on(sbi, !__check_rb_tree_consistence(sbi, &dcc->root)); + f2fs_bug_on(sbi, !f2fs_check_rb_tree_consistence(sbi, &dcc->root)); - dc = (struct discard_cmd *)__lookup_rb_tree_ret(&dcc->root, + dc = (struct discard_cmd *)f2fs_lookup_rb_tree_ret(&dcc->root, NULL, start, (struct rb_entry **)&prev_dc, (struct rb_entry **)&next_dc, @@ -2537,7 +2543,7 @@ int f2fs_trim_fs(struct f2fs_sb_info *sbi, struct fstrim_range *range) goto out; mutex_lock(&sbi->gc_mutex); - err = write_checkpoint(sbi, &cpc); + err = f2fs_write_checkpoint(sbi, &cpc); mutex_unlock(&sbi->gc_mutex); if (err) goto out; @@ -2571,7 +2577,7 @@ static bool __has_curseg_space(struct f2fs_sb_info *sbi, int type) return false; } -int rw_hint_to_seg_type(enum rw_hint hint) +int f2fs_rw_hint_to_seg_type(enum rw_hint hint) { switch (hint) { case WRITE_LIFE_SHORT: @@ -2644,7 +2650,7 @@ int rw_hint_to_seg_type(enum rw_hint hint) * WRITE_LIFE_LONG " WRITE_LIFE_LONG */ -enum rw_hint io_type_to_rw_hint(struct f2fs_sb_info *sbi, +enum rw_hint f2fs_io_type_to_rw_hint(struct f2fs_sb_info *sbi, enum page_type type, enum temp_type temp) { if (F2FS_OPTION(sbi).whint_mode == WHINT_MODE_USER) { @@ -2715,7 +2721,7 @@ static int __get_segment_type_6(struct f2fs_io_info *fio) is_inode_flag_set(inode, FI_ATOMIC_FILE) || is_inode_flag_set(inode, FI_VOLATILE_FILE)) return CURSEG_HOT_DATA; - /* rw_hint_to_seg_type(inode->i_write_hint); */ + /* f2fs_rw_hint_to_seg_type(inode->i_write_hint); */ return CURSEG_WARM_DATA; } else { if (IS_DNODE(fio->page)) @@ -2752,7 +2758,7 @@ static int __get_segment_type(struct f2fs_io_info *fio) return type; } -void allocate_data_block(struct f2fs_sb_info *sbi, struct page *page, +void f2fs_allocate_data_block(struct f2fs_sb_info *sbi, struct page *page, block_t old_blkaddr, block_t *new_blkaddr, struct f2fs_summary *sum, int type, struct f2fs_io_info *fio, bool add_list) @@ -2835,7 +2841,7 @@ static void update_device_state(struct f2fs_io_info *fio) devidx = f2fs_target_device_index(sbi, fio->new_blkaddr); /* update device state for fsync */ - set_dirty_device(sbi, fio->ino, devidx, FLUSH_INO); + f2fs_set_dirty_device(sbi, fio->ino, devidx, FLUSH_INO); /* update device state for checkpoint */ if (!f2fs_test_bit(devidx, (char *)&sbi->dirty_device)) { @@ -2853,7 +2859,7 @@ static void do_write_page(struct f2fs_summary *sum, struct f2fs_io_info *fio) if (keep_order) down_read(&fio->sbi->io_order_lock); reallocate: - allocate_data_block(fio->sbi, fio->page, fio->old_blkaddr, + f2fs_allocate_data_block(fio->sbi, fio->page, fio->old_blkaddr, &fio->new_blkaddr, sum, type, fio, true); /* writeout dirty page into bdev */ @@ -2869,7 +2875,7 @@ reallocate: up_read(&fio->sbi->io_order_lock); } -void write_meta_page(struct f2fs_sb_info *sbi, struct page *page, +void f2fs_do_write_meta_page(struct f2fs_sb_info *sbi, struct page *page, enum iostat_type io_type) { struct f2fs_io_info fio = { @@ -2895,7 +2901,7 @@ void write_meta_page(struct f2fs_sb_info *sbi, struct page *page, f2fs_update_iostat(sbi, io_type, F2FS_BLKSIZE); } -void write_node_page(unsigned int nid, struct f2fs_io_info *fio) +void f2fs_do_write_node_page(unsigned int nid, struct f2fs_io_info *fio) { struct f2fs_summary sum; @@ -2905,14 +2911,15 @@ void write_node_page(unsigned int nid, struct f2fs_io_info *fio) f2fs_update_iostat(fio->sbi, fio->io_type, F2FS_BLKSIZE); } -void write_data_page(struct dnode_of_data *dn, struct f2fs_io_info *fio) +void f2fs_outplace_write_data(struct dnode_of_data *dn, + struct f2fs_io_info *fio) { struct f2fs_sb_info *sbi = fio->sbi; struct f2fs_summary sum; struct node_info ni; f2fs_bug_on(sbi, dn->data_blkaddr == NULL_ADDR); - get_node_info(sbi, dn->nid, &ni); + f2fs_get_node_info(sbi, dn->nid, &ni); set_summary(&sum, dn->nid, dn->ofs_in_node, ni.version); do_write_page(&sum, fio); f2fs_update_data_blkaddr(dn, fio->new_blkaddr); @@ -2920,7 +2927,7 @@ void write_data_page(struct dnode_of_data *dn, struct f2fs_io_info *fio) f2fs_update_iostat(sbi, fio->io_type, F2FS_BLKSIZE); } -int rewrite_data_page(struct f2fs_io_info *fio) +int f2fs_inplace_write_data(struct f2fs_io_info *fio) { int err; struct f2fs_sb_info *sbi = fio->sbi; @@ -2955,7 +2962,7 @@ static inline int __f2fs_get_curseg(struct f2fs_sb_info *sbi, return i; } -void __f2fs_replace_block(struct f2fs_sb_info *sbi, struct f2fs_summary *sum, +void f2fs_do_replace_block(struct f2fs_sb_info *sbi, struct f2fs_summary *sum, block_t old_blkaddr, block_t new_blkaddr, bool recover_curseg, bool recover_newaddr) { @@ -3040,7 +3047,7 @@ void f2fs_replace_block(struct f2fs_sb_info *sbi, struct dnode_of_data *dn, set_summary(&sum, dn->nid, dn->ofs_in_node, version); - __f2fs_replace_block(sbi, &sum, old_addr, new_addr, + f2fs_do_replace_block(sbi, &sum, old_addr, new_addr, recover_curseg, recover_newaddr); f2fs_update_data_blkaddr(dn, new_addr); @@ -3086,7 +3093,7 @@ static void read_compacted_summaries(struct f2fs_sb_info *sbi) start = start_sum_block(sbi); - page = get_meta_page(sbi, start++); + page = f2fs_get_meta_page(sbi, start++); kaddr = (unsigned char *)page_address(page); /* Step 1: restore nat cache */ @@ -3126,7 +3133,7 @@ static void read_compacted_summaries(struct f2fs_sb_info *sbi) f2fs_put_page(page, 1); page = NULL; - page = get_meta_page(sbi, start++); + page = f2fs_get_meta_page(sbi, start++); kaddr = (unsigned char *)page_address(page); offset = 0; } @@ -3165,7 +3172,7 @@ static int read_normal_summaries(struct f2fs_sb_info *sbi, int type) blk_addr = GET_SUM_BLOCK(sbi, segno); } - new = get_meta_page(sbi, blk_addr); + new = f2fs_get_meta_page(sbi, blk_addr); sum = (struct f2fs_summary_block *)page_address(new); if (IS_NODESEG(type)) { @@ -3177,7 +3184,7 @@ static int read_normal_summaries(struct f2fs_sb_info *sbi, int type) ns->ofs_in_node = 0; } } else { - restore_node_summary(sbi, segno, sum); + f2fs_restore_node_summary(sbi, segno, sum); } } @@ -3209,10 +3216,10 @@ static int restore_curseg_summaries(struct f2fs_sb_info *sbi) int err; if (is_set_ckpt_flags(sbi, CP_COMPACT_SUM_FLAG)) { - int npages = npages_for_summary_flush(sbi, true); + int npages = f2fs_npages_for_summary_flush(sbi, true); if (npages >= 2) - ra_meta_pages(sbi, start_sum_block(sbi), npages, + f2fs_ra_meta_pages(sbi, start_sum_block(sbi), npages, META_CP, true); /* restore for compacted data summary */ @@ -3221,7 +3228,7 @@ static int restore_curseg_summaries(struct f2fs_sb_info *sbi) } if (__exist_node_summaries(sbi)) - ra_meta_pages(sbi, sum_blk_addr(sbi, NR_CURSEG_TYPE, type), + f2fs_ra_meta_pages(sbi, sum_blk_addr(sbi, NR_CURSEG_TYPE, type), NR_CURSEG_TYPE - type, META_CP, true); for (; type <= CURSEG_COLD_NODE; type++) { @@ -3247,7 +3254,7 @@ static void write_compacted_summaries(struct f2fs_sb_info *sbi, block_t blkaddr) int written_size = 0; int i, j; - page = grab_meta_page(sbi, blkaddr++); + page = f2fs_grab_meta_page(sbi, blkaddr++); kaddr = (unsigned char *)page_address(page); memset(kaddr, 0, PAGE_SIZE); @@ -3272,7 +3279,7 @@ static void write_compacted_summaries(struct f2fs_sb_info *sbi, block_t blkaddr) for (j = 0; j < blkoff; j++) { if (!page) { - page = grab_meta_page(sbi, blkaddr++); + page = f2fs_grab_meta_page(sbi, blkaddr++); kaddr = (unsigned char *)page_address(page); memset(kaddr, 0, PAGE_SIZE); written_size = 0; @@ -3309,7 +3316,7 @@ static void write_normal_summaries(struct f2fs_sb_info *sbi, write_current_sum_page(sbi, i, blkaddr + (i - type)); } -void write_data_summaries(struct f2fs_sb_info *sbi, block_t start_blk) +void f2fs_write_data_summaries(struct f2fs_sb_info *sbi, block_t start_blk) { if (is_set_ckpt_flags(sbi, CP_COMPACT_SUM_FLAG)) write_compacted_summaries(sbi, start_blk); @@ -3317,12 +3324,12 @@ void write_data_summaries(struct f2fs_sb_info *sbi, block_t start_blk) write_normal_summaries(sbi, start_blk, CURSEG_HOT_DATA); } -void write_node_summaries(struct f2fs_sb_info *sbi, block_t start_blk) +void f2fs_write_node_summaries(struct f2fs_sb_info *sbi, block_t start_blk) { write_normal_summaries(sbi, start_blk, CURSEG_HOT_NODE); } -int lookup_journal_in_cursum(struct f2fs_journal *journal, int type, +int f2fs_lookup_journal_in_cursum(struct f2fs_journal *journal, int type, unsigned int val, int alloc) { int i; @@ -3347,7 +3354,7 @@ int lookup_journal_in_cursum(struct f2fs_journal *journal, int type, static struct page *get_current_sit_page(struct f2fs_sb_info *sbi, unsigned int segno) { - return get_meta_page(sbi, current_sit_addr(sbi, segno)); + return f2fs_get_meta_page(sbi, current_sit_addr(sbi, segno)); } static struct page *get_next_sit_page(struct f2fs_sb_info *sbi, @@ -3360,7 +3367,7 @@ static struct page *get_next_sit_page(struct f2fs_sb_info *sbi, src_off = current_sit_addr(sbi, start); dst_off = next_sit_addr(sbi, src_off); - page = grab_meta_page(sbi, dst_off); + page = f2fs_grab_meta_page(sbi, dst_off); seg_info_to_sit_page(sbi, page, start); set_page_dirty(page); @@ -3456,7 +3463,7 @@ static void remove_sits_in_journal(struct f2fs_sb_info *sbi) * CP calls this function, which flushes SIT entries including sit_journal, * and moves prefree segs to free segs. */ -void flush_sit_entries(struct f2fs_sb_info *sbi, struct cp_control *cpc) +void f2fs_flush_sit_entries(struct f2fs_sb_info *sbi, struct cp_control *cpc) { struct sit_info *sit_i = SIT_I(sbi); unsigned long *bitmap = sit_i->dirty_sentries_bitmap; @@ -3528,7 +3535,7 @@ void flush_sit_entries(struct f2fs_sb_info *sbi, struct cp_control *cpc) } if (to_journal) { - offset = lookup_journal_in_cursum(journal, + offset = f2fs_lookup_journal_in_cursum(journal, SIT_JOURNAL, segno, 1); f2fs_bug_on(sbi, offset < 0); segno_in_journal(journal, offset) = @@ -3744,7 +3751,7 @@ static int build_sit_entries(struct f2fs_sb_info *sbi) block_t total_node_blocks = 0; do { - readed = ra_meta_pages(sbi, start_blk, BIO_MAX_PAGES, + readed = f2fs_ra_meta_pages(sbi, start_blk, BIO_MAX_PAGES, META_SIT, true); start = start_blk * sit_i->sents_per_block; @@ -3962,7 +3969,7 @@ static void init_min_max_mtime(struct f2fs_sb_info *sbi) up_write(&sit_i->sentry_lock); } -int build_segment_manager(struct f2fs_sb_info *sbi) +int f2fs_build_segment_manager(struct f2fs_sb_info *sbi) { struct f2fs_super_block *raw_super = F2FS_RAW_SUPER(sbi); struct f2fs_checkpoint *ckpt = F2FS_CKPT(sbi); @@ -3999,7 +4006,7 @@ int build_segment_manager(struct f2fs_sb_info *sbi) init_rwsem(&sm_info->curseg_lock); if (!f2fs_readonly(sbi->sb)) { - err = create_flush_cmd_control(sbi); + err = f2fs_create_flush_cmd_control(sbi); if (err) return err; } @@ -4124,13 +4131,13 @@ static void destroy_sit_info(struct f2fs_sb_info *sbi) kfree(sit_i); } -void destroy_segment_manager(struct f2fs_sb_info *sbi) +void f2fs_destroy_segment_manager(struct f2fs_sb_info *sbi) { struct f2fs_sm_info *sm_info = SM_I(sbi); if (!sm_info) return; - destroy_flush_cmd_control(sbi, true); + f2fs_destroy_flush_cmd_control(sbi, true); destroy_discard_cmd_control(sbi); destroy_dirty_segmap(sbi); destroy_curseg(sbi); @@ -4140,7 +4147,7 @@ void destroy_segment_manager(struct f2fs_sb_info *sbi) kfree(sm_info); } -int __init create_segment_manager_caches(void) +int __init f2fs_create_segment_manager_caches(void) { discard_entry_slab = f2fs_kmem_cache_create("discard_entry", sizeof(struct discard_entry)); @@ -4173,7 +4180,7 @@ fail: return -ENOMEM; } -void destroy_segment_manager_caches(void) +void f2fs_destroy_segment_manager_caches(void) { kmem_cache_destroy(sit_entry_set_slab); kmem_cache_destroy(discard_cmd_slab); -- cgit v1.2.3 From 588ecdfd7d023e7ed43fc516823d7df3c9d14fc3 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Mon, 4 Jun 2018 23:20:17 +0800 Subject: f2fs: fix to update mtime correctly If we change system time to the past, get_mtime() will return a overflowed time, and SIT_I(sbi)->max_mtime will be udpated incorrectly, this patch fixes the two issues. Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/segment.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) (limited to 'fs/f2fs/segment.c') diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index 8672bf574426..9a3dc92ecf23 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -1902,8 +1902,9 @@ static void update_sit_entry(struct f2fs_sb_info *sbi, block_t blkaddr, int del) (new_vblocks > sbi->blocks_per_seg))); se->valid_blocks = new_vblocks; - se->mtime = get_mtime(sbi); - SIT_I(sbi)->max_mtime = se->mtime; + se->mtime = get_mtime(sbi, false); + if (se->mtime > SIT_I(sbi)->max_mtime) + SIT_I(sbi)->max_mtime = se->mtime; /* Update valid block bitmap */ if (del > 0) { @@ -3965,7 +3966,7 @@ static void init_min_max_mtime(struct f2fs_sb_info *sbi) if (sit_i->min_mtime > mtime) sit_i->min_mtime = mtime; } - sit_i->max_mtime = get_mtime(sbi); + sit_i->max_mtime = get_mtime(sbi, false); up_write(&sit_i->sentry_lock); } -- cgit v1.2.3 From f15443db99c35cd3bf44d76bc4f6d181f89e4acd Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Tue, 12 Jun 2018 14:28:23 -0700 Subject: treewide: Use array_size() in f2fs_kzalloc() The f2fs_kzalloc() function has no 2-factor argument form, so multiplication factors need to be wrapped in array_size(). This patch replaces cases of: f2fs_kzalloc(handle, a * b, gfp) with: f2fs_kzalloc(handle, array_size(a, b), gfp) as well as handling cases of: f2fs_kzalloc(handle, a * b * c, gfp) with: f2fs_kzalloc(handle, array3_size(a, b, c), gfp) This does, however, attempt to ignore constant size factors like: f2fs_kzalloc(handle, 4 * 1024, gfp) though any constants defined via macros get caught up in the conversion. Any factors with a sizeof() of "unsigned char", "char", and "u8" were dropped, since they're redundant. The Coccinelle script used for this was: // Fix redundant parens around sizeof(). @@ expression HANDLE; type TYPE; expression THING, E; @@ ( f2fs_kzalloc(HANDLE, - (sizeof(TYPE)) * E + sizeof(TYPE) * E , ...) | f2fs_kzalloc(HANDLE, - (sizeof(THING)) * E + sizeof(THING) * E , ...) ) // Drop single-byte sizes and redundant parens. @@ expression HANDLE; expression COUNT; typedef u8; typedef __u8; @@ ( f2fs_kzalloc(HANDLE, - sizeof(u8) * (COUNT) + COUNT , ...) | f2fs_kzalloc(HANDLE, - sizeof(__u8) * (COUNT) + COUNT , ...) | f2fs_kzalloc(HANDLE, - sizeof(char) * (COUNT) + COUNT , ...) | f2fs_kzalloc(HANDLE, - sizeof(unsigned char) * (COUNT) + COUNT , ...) | f2fs_kzalloc(HANDLE, - sizeof(u8) * COUNT + COUNT , ...) | f2fs_kzalloc(HANDLE, - sizeof(__u8) * COUNT + COUNT , ...) | f2fs_kzalloc(HANDLE, - sizeof(char) * COUNT + COUNT , ...) | f2fs_kzalloc(HANDLE, - sizeof(unsigned char) * COUNT + COUNT , ...) ) // 2-factor product with sizeof(type/expression) and identifier or constant. @@ expression HANDLE; type TYPE; expression THING; identifier COUNT_ID; constant COUNT_CONST; @@ ( f2fs_kzalloc(HANDLE, - sizeof(TYPE) * (COUNT_ID) + array_size(COUNT_ID, sizeof(TYPE)) , ...) | f2fs_kzalloc(HANDLE, - sizeof(TYPE) * COUNT_ID + array_size(COUNT_ID, sizeof(TYPE)) , ...) | f2fs_kzalloc(HANDLE, - sizeof(TYPE) * (COUNT_CONST) + array_size(COUNT_CONST, sizeof(TYPE)) , ...) | f2fs_kzalloc(HANDLE, - sizeof(TYPE) * COUNT_CONST + array_size(COUNT_CONST, sizeof(TYPE)) , ...) | f2fs_kzalloc(HANDLE, - sizeof(THING) * (COUNT_ID) + array_size(COUNT_ID, sizeof(THING)) , ...) | f2fs_kzalloc(HANDLE, - sizeof(THING) * COUNT_ID + array_size(COUNT_ID, sizeof(THING)) , ...) | f2fs_kzalloc(HANDLE, - sizeof(THING) * (COUNT_CONST) + array_size(COUNT_CONST, sizeof(THING)) , ...) | f2fs_kzalloc(HANDLE, - sizeof(THING) * COUNT_CONST + array_size(COUNT_CONST, sizeof(THING)) , ...) ) // 2-factor product, only identifiers. @@ expression HANDLE; identifier SIZE, COUNT; @@ f2fs_kzalloc(HANDLE, - SIZE * COUNT + array_size(COUNT, SIZE) , ...) // 3-factor product with 1 sizeof(type) or sizeof(expression), with // redundant parens removed. @@ expression HANDLE; expression THING; identifier STRIDE, COUNT; type TYPE; @@ ( f2fs_kzalloc(HANDLE, - sizeof(TYPE) * (COUNT) * (STRIDE) + array3_size(COUNT, STRIDE, sizeof(TYPE)) , ...) | f2fs_kzalloc(HANDLE, - sizeof(TYPE) * (COUNT) * STRIDE + array3_size(COUNT, STRIDE, sizeof(TYPE)) , ...) | f2fs_kzalloc(HANDLE, - sizeof(TYPE) * COUNT * (STRIDE) + array3_size(COUNT, STRIDE, sizeof(TYPE)) , ...) | f2fs_kzalloc(HANDLE, - sizeof(TYPE) * COUNT * STRIDE + array3_size(COUNT, STRIDE, sizeof(TYPE)) , ...) | f2fs_kzalloc(HANDLE, - sizeof(THING) * (COUNT) * (STRIDE) + array3_size(COUNT, STRIDE, sizeof(THING)) , ...) | f2fs_kzalloc(HANDLE, - sizeof(THING) * (COUNT) * STRIDE + array3_size(COUNT, STRIDE, sizeof(THING)) , ...) | f2fs_kzalloc(HANDLE, - sizeof(THING) * COUNT * (STRIDE) + array3_size(COUNT, STRIDE, sizeof(THING)) , ...) | f2fs_kzalloc(HANDLE, - sizeof(THING) * COUNT * STRIDE + array3_size(COUNT, STRIDE, sizeof(THING)) , ...) ) // 3-factor product with 2 sizeof(variable), with redundant parens removed. @@ expression HANDLE; expression THING1, THING2; identifier COUNT; type TYPE1, TYPE2; @@ ( f2fs_kzalloc(HANDLE, - sizeof(TYPE1) * sizeof(TYPE2) * COUNT + array3_size(COUNT, sizeof(TYPE1), sizeof(TYPE2)) , ...) | f2fs_kzalloc(HANDLE, - sizeof(TYPE1) * sizeof(THING2) * (COUNT) + array3_size(COUNT, sizeof(TYPE1), sizeof(TYPE2)) , ...) | f2fs_kzalloc(HANDLE, - sizeof(THING1) * sizeof(THING2) * COUNT + array3_size(COUNT, sizeof(THING1), sizeof(THING2)) , ...) | f2fs_kzalloc(HANDLE, - sizeof(THING1) * sizeof(THING2) * (COUNT) + array3_size(COUNT, sizeof(THING1), sizeof(THING2)) , ...) | f2fs_kzalloc(HANDLE, - sizeof(TYPE1) * sizeof(THING2) * COUNT + array3_size(COUNT, sizeof(TYPE1), sizeof(THING2)) , ...) | f2fs_kzalloc(HANDLE, - sizeof(TYPE1) * sizeof(THING2) * (COUNT) + array3_size(COUNT, sizeof(TYPE1), sizeof(THING2)) , ...) ) // 3-factor product, only identifiers, with redundant parens removed. @@ expression HANDLE; identifier STRIDE, SIZE, COUNT; @@ ( f2fs_kzalloc(HANDLE, - (COUNT) * STRIDE * SIZE + array3_size(COUNT, STRIDE, SIZE) , ...) | f2fs_kzalloc(HANDLE, - COUNT * (STRIDE) * SIZE + array3_size(COUNT, STRIDE, SIZE) , ...) | f2fs_kzalloc(HANDLE, - COUNT * STRIDE * (SIZE) + array3_size(COUNT, STRIDE, SIZE) , ...) | f2fs_kzalloc(HANDLE, - (COUNT) * (STRIDE) * SIZE + array3_size(COUNT, STRIDE, SIZE) , ...) | f2fs_kzalloc(HANDLE, - COUNT * (STRIDE) * (SIZE) + array3_size(COUNT, STRIDE, SIZE) , ...) | f2fs_kzalloc(HANDLE, - (COUNT) * STRIDE * (SIZE) + array3_size(COUNT, STRIDE, SIZE) , ...) | f2fs_kzalloc(HANDLE, - (COUNT) * (STRIDE) * (SIZE) + array3_size(COUNT, STRIDE, SIZE) , ...) | f2fs_kzalloc(HANDLE, - COUNT * STRIDE * SIZE + array3_size(COUNT, STRIDE, SIZE) , ...) ) // Any remaining multi-factor products, first at least 3-factor products // when they're not all constants... @@ expression HANDLE; expression E1, E2, E3; constant C1, C2, C3; @@ ( f2fs_kzalloc(HANDLE, C1 * C2 * C3, ...) | f2fs_kzalloc(HANDLE, - E1 * E2 * E3 + array3_size(E1, E2, E3) , ...) ) // And then all remaining 2 factors products when they're not all constants. @@ expression HANDLE; expression E1, E2; constant C1, C2; @@ ( f2fs_kzalloc(HANDLE, C1 * C2, ...) | f2fs_kzalloc(HANDLE, - E1 * E2 + array_size(E1, E2) , ...) ) Signed-off-by: Kees Cook --- fs/f2fs/segment.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'fs/f2fs/segment.c') diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index 9a3dc92ecf23..97ec716ac0c1 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -3716,7 +3716,8 @@ static int build_curseg(struct f2fs_sb_info *sbi) struct curseg_info *array; int i; - array = f2fs_kzalloc(sbi, sizeof(*array) * NR_CURSEG_TYPE, GFP_KERNEL); + array = f2fs_kzalloc(sbi, array_size(NR_CURSEG_TYPE, sizeof(*array)), + GFP_KERNEL); if (!array) return -ENOMEM; -- cgit v1.2.3 From 6944da0a68ca00f8f27bd71e0e0e292ea14b5ca5 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Tue, 12 Jun 2018 14:28:35 -0700 Subject: treewide: Use array_size in f2fs_kvzalloc() The f2fs_kvzalloc() function has no 2-factor argument form, so multiplication factors need to be wrapped in array_size(). This patch replaces cases of: f2fs_kvzalloc(handle, a * b, gfp) with: f2fs_kvzalloc(handle, array_size(a, b), gfp) as well as handling cases of: f2fs_kvzalloc(handle, a * b * c, gfp) with: f2fs_kvzalloc(handle, array3_size(a, b, c), gfp) This does, however, attempt to ignore constant size factors like: f2fs_kvzalloc(handle, 4 * 1024, gfp) though any constants defined via macros get caught up in the conversion. Any factors with a sizeof() of "unsigned char", "char", and "u8" were dropped, since they're redundant. The Coccinelle script used for this was: // Fix redundant parens around sizeof(). @@ expression HANDLE; type TYPE; expression THING, E; @@ ( f2fs_kvzalloc(HANDLE, - (sizeof(TYPE)) * E + sizeof(TYPE) * E , ...) | f2fs_kvzalloc(HANDLE, - (sizeof(THING)) * E + sizeof(THING) * E , ...) ) // Drop single-byte sizes and redundant parens. @@ expression HANDLE; expression COUNT; typedef u8; typedef __u8; @@ ( f2fs_kvzalloc(HANDLE, - sizeof(u8) * (COUNT) + COUNT , ...) | f2fs_kvzalloc(HANDLE, - sizeof(__u8) * (COUNT) + COUNT , ...) | f2fs_kvzalloc(HANDLE, - sizeof(char) * (COUNT) + COUNT , ...) | f2fs_kvzalloc(HANDLE, - sizeof(unsigned char) * (COUNT) + COUNT , ...) | f2fs_kvzalloc(HANDLE, - sizeof(u8) * COUNT + COUNT , ...) | f2fs_kvzalloc(HANDLE, - sizeof(__u8) * COUNT + COUNT , ...) | f2fs_kvzalloc(HANDLE, - sizeof(char) * COUNT + COUNT , ...) | f2fs_kvzalloc(HANDLE, - sizeof(unsigned char) * COUNT + COUNT , ...) ) // 2-factor product with sizeof(type/expression) and identifier or constant. @@ expression HANDLE; type TYPE; expression THING; identifier COUNT_ID; constant COUNT_CONST; @@ ( f2fs_kvzalloc(HANDLE, - sizeof(TYPE) * (COUNT_ID) + array_size(COUNT_ID, sizeof(TYPE)) , ...) | f2fs_kvzalloc(HANDLE, - sizeof(TYPE) * COUNT_ID + array_size(COUNT_ID, sizeof(TYPE)) , ...) | f2fs_kvzalloc(HANDLE, - sizeof(TYPE) * (COUNT_CONST) + array_size(COUNT_CONST, sizeof(TYPE)) , ...) | f2fs_kvzalloc(HANDLE, - sizeof(TYPE) * COUNT_CONST + array_size(COUNT_CONST, sizeof(TYPE)) , ...) | f2fs_kvzalloc(HANDLE, - sizeof(THING) * (COUNT_ID) + array_size(COUNT_ID, sizeof(THING)) , ...) | f2fs_kvzalloc(HANDLE, - sizeof(THING) * COUNT_ID + array_size(COUNT_ID, sizeof(THING)) , ...) | f2fs_kvzalloc(HANDLE, - sizeof(THING) * (COUNT_CONST) + array_size(COUNT_CONST, sizeof(THING)) , ...) | f2fs_kvzalloc(HANDLE, - sizeof(THING) * COUNT_CONST + array_size(COUNT_CONST, sizeof(THING)) , ...) ) // 2-factor product, only identifiers. @@ expression HANDLE; identifier SIZE, COUNT; @@ f2fs_kvzalloc(HANDLE, - SIZE * COUNT + array_size(COUNT, SIZE) , ...) // 3-factor product with 1 sizeof(type) or sizeof(expression), with // redundant parens removed. @@ expression HANDLE; expression THING; identifier STRIDE, COUNT; type TYPE; @@ ( f2fs_kvzalloc(HANDLE, - sizeof(TYPE) * (COUNT) * (STRIDE) + array3_size(COUNT, STRIDE, sizeof(TYPE)) , ...) | f2fs_kvzalloc(HANDLE, - sizeof(TYPE) * (COUNT) * STRIDE + array3_size(COUNT, STRIDE, sizeof(TYPE)) , ...) | f2fs_kvzalloc(HANDLE, - sizeof(TYPE) * COUNT * (STRIDE) + array3_size(COUNT, STRIDE, sizeof(TYPE)) , ...) | f2fs_kvzalloc(HANDLE, - sizeof(TYPE) * COUNT * STRIDE + array3_size(COUNT, STRIDE, sizeof(TYPE)) , ...) | f2fs_kvzalloc(HANDLE, - sizeof(THING) * (COUNT) * (STRIDE) + array3_size(COUNT, STRIDE, sizeof(THING)) , ...) | f2fs_kvzalloc(HANDLE, - sizeof(THING) * (COUNT) * STRIDE + array3_size(COUNT, STRIDE, sizeof(THING)) , ...) | f2fs_kvzalloc(HANDLE, - sizeof(THING) * COUNT * (STRIDE) + array3_size(COUNT, STRIDE, sizeof(THING)) , ...) | f2fs_kvzalloc(HANDLE, - sizeof(THING) * COUNT * STRIDE + array3_size(COUNT, STRIDE, sizeof(THING)) , ...) ) // 3-factor product with 2 sizeof(variable), with redundant parens removed. @@ expression HANDLE; expression THING1, THING2; identifier COUNT; type TYPE1, TYPE2; @@ ( f2fs_kvzalloc(HANDLE, - sizeof(TYPE1) * sizeof(TYPE2) * COUNT + array3_size(COUNT, sizeof(TYPE1), sizeof(TYPE2)) , ...) | f2fs_kvzalloc(HANDLE, - sizeof(TYPE1) * sizeof(THING2) * (COUNT) + array3_size(COUNT, sizeof(TYPE1), sizeof(TYPE2)) , ...) | f2fs_kvzalloc(HANDLE, - sizeof(THING1) * sizeof(THING2) * COUNT + array3_size(COUNT, sizeof(THING1), sizeof(THING2)) , ...) | f2fs_kvzalloc(HANDLE, - sizeof(THING1) * sizeof(THING2) * (COUNT) + array3_size(COUNT, sizeof(THING1), sizeof(THING2)) , ...) | f2fs_kvzalloc(HANDLE, - sizeof(TYPE1) * sizeof(THING2) * COUNT + array3_size(COUNT, sizeof(TYPE1), sizeof(THING2)) , ...) | f2fs_kvzalloc(HANDLE, - sizeof(TYPE1) * sizeof(THING2) * (COUNT) + array3_size(COUNT, sizeof(TYPE1), sizeof(THING2)) , ...) ) // 3-factor product, only identifiers, with redundant parens removed. @@ expression HANDLE; identifier STRIDE, SIZE, COUNT; @@ ( f2fs_kvzalloc(HANDLE, - (COUNT) * STRIDE * SIZE + array3_size(COUNT, STRIDE, SIZE) , ...) | f2fs_kvzalloc(HANDLE, - COUNT * (STRIDE) * SIZE + array3_size(COUNT, STRIDE, SIZE) , ...) | f2fs_kvzalloc(HANDLE, - COUNT * STRIDE * (SIZE) + array3_size(COUNT, STRIDE, SIZE) , ...) | f2fs_kvzalloc(HANDLE, - (COUNT) * (STRIDE) * SIZE + array3_size(COUNT, STRIDE, SIZE) , ...) | f2fs_kvzalloc(HANDLE, - COUNT * (STRIDE) * (SIZE) + array3_size(COUNT, STRIDE, SIZE) , ...) | f2fs_kvzalloc(HANDLE, - (COUNT) * STRIDE * (SIZE) + array3_size(COUNT, STRIDE, SIZE) , ...) | f2fs_kvzalloc(HANDLE, - (COUNT) * (STRIDE) * (SIZE) + array3_size(COUNT, STRIDE, SIZE) , ...) | f2fs_kvzalloc(HANDLE, - COUNT * STRIDE * SIZE + array3_size(COUNT, STRIDE, SIZE) , ...) ) // Any remaining multi-factor products, first at least 3-factor products // when they're not all constants... @@ expression HANDLE; expression E1, E2, E3; constant C1, C2, C3; @@ ( f2fs_kvzalloc(HANDLE, C1 * C2 * C3, ...) | f2fs_kvzalloc(HANDLE, - E1 * E2 * E3 + array3_size(E1, E2, E3) , ...) ) // And then all remaining 2 factors products when they're not all constants. @@ expression HANDLE; expression E1, E2; constant C1, C2; @@ ( f2fs_kvzalloc(HANDLE, C1 * C2, ...) | f2fs_kvzalloc(HANDLE, - E1 * E2 + array_size(E1, E2) , ...) ) Signed-off-by: Kees Cook --- fs/f2fs/segment.c | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) (limited to 'fs/f2fs/segment.c') diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index 97ec716ac0c1..3d0c42ef0474 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -3598,8 +3598,10 @@ static int build_sit_info(struct f2fs_sb_info *sbi) SM_I(sbi)->sit_info = sit_i; - sit_i->sentries = f2fs_kvzalloc(sbi, MAIN_SEGS(sbi) * - sizeof(struct seg_entry), GFP_KERNEL); + sit_i->sentries = + f2fs_kvzalloc(sbi, array_size(sizeof(struct seg_entry), + MAIN_SEGS(sbi)), + GFP_KERNEL); if (!sit_i->sentries) return -ENOMEM; @@ -3639,8 +3641,10 @@ static int build_sit_info(struct f2fs_sb_info *sbi) return -ENOMEM; if (sbi->segs_per_sec > 1) { - sit_i->sec_entries = f2fs_kvzalloc(sbi, MAIN_SECS(sbi) * - sizeof(struct sec_entry), GFP_KERNEL); + sit_i->sec_entries = + f2fs_kvzalloc(sbi, array_size(sizeof(struct sec_entry), + MAIN_SECS(sbi)), + GFP_KERNEL); if (!sit_i->sec_entries) return -ENOMEM; } -- cgit v1.2.3