From 69e9c6c6dc87587d8846e447febedefe037c14f8 Mon Sep 17 00:00:00 2001 From: Stefan Behrens Date: Thu, 5 Sep 2013 16:58:43 +0200 Subject: Btrfs: eliminate the exceptional root_tree refs=0 The fact that btrfs_root_refs() returned 0 for the tree_root caused bugs in the past, therefore it is set to 1 with this patch and (hopefully) all affected code is adapted to this change. I verified this change by temporarily adding WARN_ON() checks everywhere where btrfs_root_refs() is used, checking whether the logic of the code is changed by btrfs_root_refs() returning 1 instead of 0 for root->root_key.objectid == BTRFS_ROOT_TREE_OBJECTID. With these added checks, I ran the xfstests './check -g auto'. The two roots chunk_root and log_root_tree that are only referenced by the superblock and the log_roots below the log_root_tree still have btrfs_root_refs() == 0, only the tree_root is changed. Signed-off-by: Stefan Behrens Signed-off-by: Josef Bacik Signed-off-by: Chris Mason --- fs/btrfs/disk-io.c | 1 + 1 file changed, 1 insertion(+) (limited to 'fs/btrfs/disk-io.c') diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 62176ad89846..f724397b396b 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -2670,6 +2670,7 @@ retry_root_backup: btrfs_set_root_node(&tree_root->root_item, tree_root->node); tree_root->commit_root = btrfs_root_node(tree_root); + btrfs_set_root_refs(&tree_root->root_item, 1); location.objectid = BTRFS_EXTENT_TREE_OBJECTID; location.type = BTRFS_ROOT_ITEM_KEY; -- cgit v1.2.3 From 06ea65a398a2501e94beee3a425d07e1846ff25a Mon Sep 17 00:00:00 2001 From: Josef Bacik Date: Thu, 19 Sep 2013 16:07:01 -0400 Subject: Btrfs: add a sanity test for btrfs_split_item While looking at somebodys corruption I became completely convinced that btrfs_split_item was broken, so I wrote this test to verify that it was working as it was supposed to. Thankfully it appears to be working as intended, so just add this test to make sure nobody breaks it in the future. Thanks, Signed-off-by: Josef Bacik Signed-off-by: Chris Mason --- fs/btrfs/disk-io.c | 38 ++++++++++++++++++++++++++++++++++---- 1 file changed, 34 insertions(+), 4 deletions(-) (limited to 'fs/btrfs/disk-io.c') diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index f724397b396b..db2095486a4f 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -1229,14 +1229,18 @@ static void __setup_root(u32 nodesize, u32 leafsize, u32 sectorsize, atomic_set(&root->refs, 1); root->log_transid = 0; root->last_log_commit = 0; - extent_io_tree_init(&root->dirty_log_pages, - fs_info->btree_inode->i_mapping); + if (fs_info) + extent_io_tree_init(&root->dirty_log_pages, + fs_info->btree_inode->i_mapping); memset(&root->root_key, 0, sizeof(root->root_key)); memset(&root->root_item, 0, sizeof(root->root_item)); memset(&root->defrag_progress, 0, sizeof(root->defrag_progress)); memset(&root->root_kobj, 0, sizeof(root->root_kobj)); - root->defrag_trans_start = fs_info->generation; + if (fs_info) + root->defrag_trans_start = fs_info->generation; + else + root->defrag_trans_start = 0; init_completion(&root->kobj_unregister); root->defrag_running = 0; root->root_key.objectid = objectid; @@ -1253,6 +1257,22 @@ static struct btrfs_root *btrfs_alloc_root(struct btrfs_fs_info *fs_info) return root; } +#ifdef CONFIG_BTRFS_FS_RUN_SANITY_TESTS +/* Should only be used by the testing infrastructure */ +struct btrfs_root *btrfs_alloc_dummy_root(void) +{ + struct btrfs_root *root; + + root = btrfs_alloc_root(NULL); + if (!root) + return ERR_PTR(-ENOMEM); + __setup_root(4096, 4096, 4096, 4096, root, NULL, 1); + root->dummy_root = 1; + + return root; +} +#endif + struct btrfs_root *btrfs_create_tree(struct btrfs_trans_handle *trans, struct btrfs_fs_info *fs_info, u64 objectid) @@ -3670,10 +3690,20 @@ int btrfs_set_buffer_uptodate(struct extent_buffer *buf) void btrfs_mark_buffer_dirty(struct extent_buffer *buf) { - struct btrfs_root *root = BTRFS_I(buf->pages[0]->mapping->host)->root; + struct btrfs_root *root; u64 transid = btrfs_header_generation(buf); int was_dirty; +#ifdef CONFIG_BTRFS_FS_RUN_SANITY_TESTS + /* + * This is a fast path so only do this check if we have sanity tests + * enabled. Normal people shouldn't be marking dummy buffers as dirty + * outside of the sanity tests. + */ + if (unlikely(test_bit(EXTENT_BUFFER_DUMMY, &buf->bflags))) + return; +#endif + root = BTRFS_I(buf->pages[0]->mapping->host)->root; btrfs_assert_tree_locked(buf); if (transid != root->fs_info->generation) WARN(1, KERN_CRIT "btrfs transid mismatch buffer %llu, " -- cgit v1.2.3 From 4e121c06adf53aae478ebce3035116595d063413 Mon Sep 17 00:00:00 2001 From: Josef Bacik Date: Fri, 27 Sep 2013 16:32:39 -0400 Subject: Btrfs: cleanup transaction on abort If we abort not during a transaction commit we won't clean up anything until we unmount. Unfortunately if we abort in the middle of writing out an ordered extent we won't clean it up and if somebody is waiting on that ordered extent they will wait forever. To fix this just make the transaction kthread call the cleanup transaction stuff if it notices theres an error, and make btrfs_end_transaction wake up the transaction kthread if there is an error. Thanks, Signed-off-by: Josef Bacik Signed-off-by: Chris Mason --- fs/btrfs/disk-io.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'fs/btrfs/disk-io.c') diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index db2095486a4f..113cd43530ba 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -1800,6 +1800,9 @@ sleep: wake_up_process(root->fs_info->cleaner_kthread); mutex_unlock(&root->fs_info->transaction_kthread_mutex); + if (unlikely(test_bit(BTRFS_FS_STATE_ERROR, + &root->fs_info->fs_state))) + btrfs_cleanup_transaction(root); if (!try_to_freeze()) { set_current_state(TASK_INTERRUPTIBLE); if (!kthread_should_stop() && -- cgit v1.2.3 From 1de2cfde93c20a0357ff1dffed901598470facf3 Mon Sep 17 00:00:00 2001 From: Josef Bacik Date: Fri, 27 Sep 2013 16:36:02 -0400 Subject: Btrfs: don't delete ordered roots from list during cleanup During transaction cleanup after an abort we are just removing roots from the ordered roots list which is incorrect. We have a BUG_ON() to make sure that the root is still part of the ordered roots list when we put our ordered extent which we were tripping in this case. So do like we do everywhere else and just move it to the tail of the ordered roots list and allow the normal cleanup to take care of stuff. Thanks, Signed-off-by: Josef Bacik Signed-off-by: Chris Mason --- fs/btrfs/disk-io.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'fs/btrfs/disk-io.c') diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 113cd43530ba..01a26e2eb310 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -3836,7 +3836,8 @@ static void btrfs_destroy_all_ordered_extents(struct btrfs_fs_info *fs_info) while (!list_empty(&splice)) { root = list_first_entry(&splice, struct btrfs_root, ordered_root); - list_del_init(&root->ordered_root); + list_move_tail(&root->ordered_root, + &fs_info->ordered_roots); btrfs_destroy_ordered_extents(root); -- cgit v1.2.3 From 724e2315db3d59a8201d4a87c7c7a873e60e1ce0 Mon Sep 17 00:00:00 2001 From: Josef Bacik Date: Mon, 30 Sep 2013 11:36:38 -0400 Subject: Btrfs: fix two use-after-free bugs with transaction cleanup I was noticing the slab redzone stuff going off every once and a while during transaction aborts. This was caused by two things 1) We would walk the pending snapshots and set their error to -ECANCELED. We don't need to do this, the snapshot stuff waits for a transaction commit and if there is a problem we just free our pending snapshot object and exit. Doing this was causing us to touch the pending snapshot object after the thing had already been freed. 2) We were freeing the transaction manually with wanton disregard for it's use_count reference counter. To fix this I cleaned up the transaction freeing loop to either wait for the transaction commit to finish if it was in the middle of that (since it will be cleaned and freed up there) or to do the cleanup oursevles. I also moved the global "kill all things dirty everywhere" stuff outside of the transaction cleanup loop since that only needs to be done once. With this patch I'm no longer seeing slab corruption because of use after frees. Thanks, Signed-off-by: Josef Bacik Signed-off-by: Chris Mason --- fs/btrfs/disk-io.c | 111 +++++++++++++++++++---------------------------------- 1 file changed, 40 insertions(+), 71 deletions(-) (limited to 'fs/btrfs/disk-io.c') diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 01a26e2eb310..b131d716d5a5 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -64,7 +64,6 @@ static void btrfs_destroy_ordered_operations(struct btrfs_transaction *t, static void btrfs_destroy_ordered_extents(struct btrfs_root *root); static int btrfs_destroy_delayed_refs(struct btrfs_transaction *trans, struct btrfs_root *root); -static void btrfs_evict_pending_snapshots(struct btrfs_transaction *t); static void btrfs_destroy_delalloc_inodes(struct btrfs_root *root); static int btrfs_destroy_marked_extents(struct btrfs_root *root, struct extent_io_tree *dirty_pages, @@ -3915,24 +3914,6 @@ static int btrfs_destroy_delayed_refs(struct btrfs_transaction *trans, return ret; } -static void btrfs_evict_pending_snapshots(struct btrfs_transaction *t) -{ - struct btrfs_pending_snapshot *snapshot; - struct list_head splice; - - INIT_LIST_HEAD(&splice); - - list_splice_init(&t->pending_snapshots, &splice); - - while (!list_empty(&splice)) { - snapshot = list_entry(splice.next, - struct btrfs_pending_snapshot, - list); - snapshot->error = -ECANCELED; - list_del_init(&snapshot->list); - } -} - static void btrfs_destroy_delalloc_inodes(struct btrfs_root *root) { struct btrfs_inode *btrfs_inode; @@ -4062,6 +4043,8 @@ again: void btrfs_cleanup_one_transaction(struct btrfs_transaction *cur_trans, struct btrfs_root *root) { + btrfs_destroy_ordered_operations(cur_trans, root); + btrfs_destroy_delayed_refs(cur_trans, root); btrfs_block_rsv_release(root, &root->fs_info->trans_block_rsv, cur_trans->dirty_pages.dirty_bytes); @@ -4069,8 +4052,6 @@ void btrfs_cleanup_one_transaction(struct btrfs_transaction *cur_trans, cur_trans->state = TRANS_STATE_COMMIT_START; wake_up(&root->fs_info->transaction_blocked_wait); - btrfs_evict_pending_snapshots(cur_trans); - cur_trans->state = TRANS_STATE_UNBLOCKED; wake_up(&root->fs_info->transaction_wait); @@ -4094,63 +4075,51 @@ void btrfs_cleanup_one_transaction(struct btrfs_transaction *cur_trans, static int btrfs_cleanup_transaction(struct btrfs_root *root) { struct btrfs_transaction *t; - LIST_HEAD(list); mutex_lock(&root->fs_info->transaction_kthread_mutex); spin_lock(&root->fs_info->trans_lock); - list_splice_init(&root->fs_info->trans_list, &list); - root->fs_info->running_transaction = NULL; - spin_unlock(&root->fs_info->trans_lock); - - while (!list_empty(&list)) { - t = list_entry(list.next, struct btrfs_transaction, list); - - btrfs_destroy_ordered_operations(t, root); - - btrfs_destroy_all_ordered_extents(root->fs_info); - - btrfs_destroy_delayed_refs(t, root); - - /* - * FIXME: cleanup wait for commit - * We needn't acquire the lock here, because we are during - * the umount, there is no other task which will change it. - */ - t->state = TRANS_STATE_COMMIT_START; - smp_mb(); - if (waitqueue_active(&root->fs_info->transaction_blocked_wait)) - wake_up(&root->fs_info->transaction_blocked_wait); - - btrfs_evict_pending_snapshots(t); - - t->state = TRANS_STATE_UNBLOCKED; - smp_mb(); - if (waitqueue_active(&root->fs_info->transaction_wait)) - wake_up(&root->fs_info->transaction_wait); - - btrfs_destroy_delayed_inodes(root); - btrfs_assert_delayed_root_empty(root); - - btrfs_destroy_all_delalloc_inodes(root->fs_info); - - btrfs_destroy_marked_extents(root, &t->dirty_pages, - EXTENT_DIRTY); - - btrfs_destroy_pinned_extent(root, - root->fs_info->pinned_extents); - - t->state = TRANS_STATE_COMPLETED; - smp_mb(); - if (waitqueue_active(&t->commit_wait)) - wake_up(&t->commit_wait); + while (!list_empty(&root->fs_info->trans_list)) { + t = list_first_entry(&root->fs_info->trans_list, + struct btrfs_transaction, list); + if (t->state >= TRANS_STATE_COMMIT_START) { + atomic_inc(&t->use_count); + spin_unlock(&root->fs_info->trans_lock); + btrfs_wait_for_commit(root, t->transid); + btrfs_put_transaction(t); + spin_lock(&root->fs_info->trans_lock); + continue; + } + if (t == root->fs_info->running_transaction) { + t->state = TRANS_STATE_COMMIT_DOING; + spin_unlock(&root->fs_info->trans_lock); + /* + * We wait for 0 num_writers since we don't hold a trans + * handle open currently for this transaction. + */ + wait_event(t->writer_wait, + atomic_read(&t->num_writers) == 0); + } else { + spin_unlock(&root->fs_info->trans_lock); + } + btrfs_cleanup_one_transaction(t, root); - atomic_set(&t->use_count, 0); + spin_lock(&root->fs_info->trans_lock); + if (t == root->fs_info->running_transaction) + root->fs_info->running_transaction = NULL; list_del_init(&t->list); - memset(t, 0, sizeof(*t)); - kmem_cache_free(btrfs_transaction_cachep, t); - } + spin_unlock(&root->fs_info->trans_lock); + btrfs_put_transaction(t); + trace_btrfs_transaction_commit(root); + spin_lock(&root->fs_info->trans_lock); + } + spin_unlock(&root->fs_info->trans_lock); + btrfs_destroy_all_ordered_extents(root->fs_info); + btrfs_destroy_delayed_inodes(root); + btrfs_assert_delayed_root_empty(root); + btrfs_destroy_pinned_extent(root, root->fs_info->pinned_extents); + btrfs_destroy_all_delalloc_inodes(root->fs_info); mutex_unlock(&root->fs_info->transaction_kthread_mutex); return 0; -- cgit v1.2.3 From 0a4e558609dd4df30a58a07d9eb14c5ddc2c1241 Mon Sep 17 00:00:00 2001 From: Ross Kirk Date: Tue, 24 Sep 2013 10:12:38 +0100 Subject: btrfs: remove unused parameter from btrfs_header_fsid Remove unused parameter, 'eb'. Unused since introduction in 5f39d397dfbe140a14edecd4e73c34ce23c4f9ee Updated to be rebased against current upstream and correct diff supplied this time! Signed-off-by: Ross Kirk Reviewed-by: Eric Sandeen Signed-off-by: Josef Bacik Signed-off-by: Chris Mason --- fs/btrfs/disk-io.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'fs/btrfs/disk-io.c') diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index b131d716d5a5..ade6c0e79616 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -495,7 +495,7 @@ static int check_tree_block_fsid(struct btrfs_root *root, u8 fsid[BTRFS_UUID_SIZE]; int ret = 1; - read_extent_buffer(eb, fsid, btrfs_header_fsid(eb), BTRFS_FSID_SIZE); + read_extent_buffer(eb, fsid, btrfs_header_fsid(), BTRFS_FSID_SIZE); while (fs_devices) { if (!memcmp(fsid, fs_devices->fsid, BTRFS_FSID_SIZE)) { ret = 0; @@ -1311,7 +1311,7 @@ struct btrfs_root *btrfs_create_tree(struct btrfs_trans_handle *trans, btrfs_set_header_owner(leaf, objectid); root->node = leaf; - write_extent_buffer(leaf, fs_info->fsid, btrfs_header_fsid(leaf), + write_extent_buffer(leaf, fs_info->fsid, btrfs_header_fsid(), BTRFS_FSID_SIZE); write_extent_buffer(leaf, fs_info->chunk_tree_uuid, btrfs_header_chunk_tree_uuid(leaf), @@ -1398,7 +1398,7 @@ static struct btrfs_root *alloc_log_tree(struct btrfs_trans_handle *trans, root->node = leaf; write_extent_buffer(root->node, root->fs_info->fsid, - btrfs_header_fsid(root->node), BTRFS_FSID_SIZE); + btrfs_header_fsid(), BTRFS_FSID_SIZE); btrfs_mark_buffer_dirty(root->node); btrfs_tree_unlock(root->node); return root; -- cgit v1.2.3 From 778ba82b1796e75e719a52679ae431371ca73988 Mon Sep 17 00:00:00 2001 From: Filipe David Borba Manana Date: Sun, 6 Oct 2013 22:22:33 +0100 Subject: Btrfs: improve inode hash function/inode lookup Currently the hash value used for adding an inode to the VFS's inode hash table consists of the plain inode number, which is a 64 bits integer. This results in hash table buckets (hlist_head lists) with too many elements for at least 2 important scenarios: 1) When we have many subvolumes. Each subvolume has its own btree where its files and directories are added to, and each has its own objectid (inode number) namespace. This means that if we have N subvolumes, and all have inode number X associated to a file or directory, the corresponding inodes all map to the same hash table entry, resulting in a bucket (hlist_head list) with N elements; 2) On 32 bits machines. Th VFS hash values are unsigned longs, which are 32 bits wide on 32 bits machines, and the inode (objectid) numbers are 64 bits unsigned integers. We simply cast the inode numbers to hash values, which means that for all inodes with the same 32 bits lower half, the same hash bucket is used for all of them. For example, all inodes with a number (objectid) between 0x0000_0000_ffff_ffff and 0xffff_ffff_ffff_ffff will end up in the same hash table bucket. This change ensures the inode's hash value depends both on the objectid (inode number) and its subvolume's (btree root) objectid. For 32 bits machines, this change gives better entropy by making the hash value depend on both the upper and lower 32 bits of the 64 bits hash previously computed. Signed-off-by: Filipe David Borba Manana Signed-off-by: Josef Bacik Signed-off-by: Chris Mason --- fs/btrfs/disk-io.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs/btrfs/disk-io.c') diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index ade6c0e79616..d205bddc7776 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -2294,7 +2294,7 @@ int open_ctree(struct super_block *sb, sizeof(struct btrfs_key)); set_bit(BTRFS_INODE_DUMMY, &BTRFS_I(fs_info->btree_inode)->runtime_flags); - insert_inode_hash(fs_info->btree_inode); + btrfs_insert_inode_hash(fs_info->btree_inode); spin_lock_init(&fs_info->block_group_cache_lock); fs_info->block_group_cache_tree = RB_ROOT; -- cgit v1.2.3 From eb58bb371a04d3bbab44ec0c5672ce69487bac1e Mon Sep 17 00:00:00 2001 From: Josef Bacik Date: Mon, 7 Oct 2013 10:45:07 -0400 Subject: Btrfs: do not free the dirty bytes from the trans block rsv on cleanup The transactions should be cleaning up their reservations on failure, this just causes us to have warnings on unmount because we go negative by free'ing reservations that have already been free'ed. Thanks, Signed-off-by: Josef Bacik Signed-off-by: Chris Mason --- fs/btrfs/disk-io.c | 2 -- 1 file changed, 2 deletions(-) (limited to 'fs/btrfs/disk-io.c') diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index d205bddc7776..fdc75ab09483 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -4046,8 +4046,6 @@ void btrfs_cleanup_one_transaction(struct btrfs_transaction *cur_trans, btrfs_destroy_ordered_operations(cur_trans, root); btrfs_destroy_delayed_refs(cur_trans, root); - btrfs_block_rsv_release(root, &root->fs_info->trans_block_rsv, - cur_trans->dirty_pages.dirty_bytes); cur_trans->state = TRANS_STATE_COMMIT_START; wake_up(&root->fs_info->transaction_blocked_wait); -- cgit v1.2.3 From 2b1360da35877cd969ed83884d8989ba778254d0 Mon Sep 17 00:00:00 2001 From: Josef Bacik Date: Mon, 7 Oct 2013 15:14:44 -0400 Subject: Btrfs: free up block groups after everything If we abort a transaction we will do the tree log cleanup at unmount, but this happens after we free up the block groups. This makes all the leak detection warnings go off because we think we've leaked space but in reality we just haven't cleaned it up yet. So instead do the block group cleanup stuff after free'ing the fs roots so we don't get these warnings. Thanks, Signed-off-by: Josef Bacik Signed-off-by: Chris Mason --- fs/btrfs/disk-io.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'fs/btrfs/disk-io.c') diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index fdc75ab09483..419968e3eaa0 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -3637,12 +3637,12 @@ int close_ctree(struct btrfs_root *root) percpu_counter_sum(&fs_info->delalloc_bytes)); } - btrfs_free_block_groups(fs_info); - btrfs_stop_all_workers(fs_info); del_fs_roots(fs_info); + btrfs_free_block_groups(fs_info); + free_root_pointers(fs_info, 1); iput(fs_info->btree_inode); -- cgit v1.2.3 From 96192499c27e8b58d71f4370f29ca86d4ca915d7 Mon Sep 17 00:00:00 2001 From: Josef Bacik Date: Wed, 16 Oct 2013 13:53:28 -0400 Subject: Btrfs: stop all workers after we free block groups Stefan was hitting a panic in the async worker stuff because we had outstanding read bios while we were stopping the worker threads. You could reproduce this easily if you mount -o nospace_cache and ran generic/273. This is because the caching thread stuff is still going and we were stopping all the worker threads. We need to stop the workers after this work is done, and the free block groups code will wait for all the caching threads to stop first so we don't run into this problem. With this patch we no longer panic. Thanks, Cc: stable@vger.kernel.org Reported-by: Stefan Behrens Signed-off-by: Josef Bacik Signed-off-by: Chris Mason --- fs/btrfs/disk-io.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'fs/btrfs/disk-io.c') diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 419968e3eaa0..b0ea9f4bbc70 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -3637,12 +3637,12 @@ int close_ctree(struct btrfs_root *root) percpu_counter_sum(&fs_info->delalloc_bytes)); } - btrfs_stop_all_workers(fs_info); - del_fs_roots(fs_info); btrfs_free_block_groups(fs_info); + btrfs_stop_all_workers(fs_info); + free_root_pointers(fs_info, 1); iput(fs_info->btree_inode); -- cgit v1.2.3 From 452c75c3d2187089f6e846710e6ea7883bf30f8a Mon Sep 17 00:00:00 2001 From: Chandra Seetharaman Date: Mon, 7 Oct 2013 10:45:25 -0500 Subject: Btrfs: Simplify the logic in alloc_extent_buffer() for existing extent buffer case alloc_extent_buffer() uses radix_tree_lookup() when radix_tree_insert() fails with EEXIST. That part of the code is very similar to the code in find_extent_buffer(). This patch replaces radix_tree_lookup() and surrounding code in alloc_extent_buffer() with find_extent_buffer(). Note that radix_tree_lookup() does not need to be protected by tree->buffer_lock. It is protected by eb->refs. While at it, this patch - changes the other usage of radix_tree_lookup() in alloc_extent_buffer() with find_extent_buffer() to reduce redundancy. - removes the unused argument 'len' to find_extent_buffer(). Signed-Off-by: Chandra Seetharaman Reviewed-by: Zach Brown Signed-off-by: Josef Bacik Signed-off-by: Chris Mason --- fs/btrfs/disk-io.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'fs/btrfs/disk-io.c') diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index b0ea9f4bbc70..ebc784aba3f5 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -1104,8 +1104,7 @@ struct extent_buffer *btrfs_find_tree_block(struct btrfs_root *root, { struct inode *btree_inode = root->fs_info->btree_inode; struct extent_buffer *eb; - eb = find_extent_buffer(&BTRFS_I(btree_inode)->io_tree, - bytenr, blocksize); + eb = find_extent_buffer(&BTRFS_I(btree_inode)->io_tree, bytenr); return eb; } -- cgit v1.2.3 From 8b558c5f097b636209b654f4d7775ac96054d6e3 Mon Sep 17 00:00:00 2001 From: Zach Brown Date: Wed, 16 Oct 2013 12:10:34 -0700 Subject: btrfs: remove fs/btrfs/compat.h fs/btrfs/compat.h only contained trivial macro wrappers of drop_nlink() and inc_nlink(). This doesn't belong in mainline. Signed-off-by: Zach Brown Signed-off-by: Josef Bacik Signed-off-by: Chris Mason --- fs/btrfs/disk-io.c | 1 - 1 file changed, 1 deletion(-) (limited to 'fs/btrfs/disk-io.c') diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index ebc784aba3f5..62c4aba221bb 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -33,7 +33,6 @@ #include #include #include -#include "compat.h" #include "ctree.h" #include "disk-io.h" #include "transaction.h" -- cgit v1.2.3 From 9b011adfe14977fcda977234609d43ca52463a3d Mon Sep 17 00:00:00 2001 From: Wang Shilong Date: Fri, 25 Oct 2013 19:12:02 +0800 Subject: Btrfs: remove scrub_super_lock holding in btrfs_sync_log() Originally, we introduced scrub_super_lock to synchronize tree log code with scrubbing super. However we can replace scrub_super_lock with device_list_mutex, because writing super will hold this mutex, this will reduce an extra lock holding when writing supers in sync log code. Signed-off-by: Wang Shilong Signed-off-by: Josef Bacik Signed-off-by: Chris Mason --- fs/btrfs/disk-io.c | 1 - 1 file changed, 1 deletion(-) (limited to 'fs/btrfs/disk-io.c') diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 62c4aba221bb..a3ad3a27944d 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -2250,7 +2250,6 @@ int open_ctree(struct super_block *sb, atomic_set(&fs_info->scrubs_paused, 0); atomic_set(&fs_info->scrub_cancel_req, 0); init_waitqueue_head(&fs_info->scrub_pause_wait); - init_rwsem(&fs_info->scrub_super_lock); fs_info->scrub_workers_refcnt = 0; #ifdef CONFIG_BTRFS_FS_CHECK_INTEGRITY fs_info->check_integrity_print_mask = 0; -- cgit v1.2.3 From 2e9f5954978cd5b0c26e6eeb9fd4ccbdce0f5ecc Mon Sep 17 00:00:00 2001 From: Rashika Date: Thu, 31 Oct 2013 02:45:20 +0530 Subject: btrfs: Add helper function for free_root_pointers() The function free_root_pointers() in disk-io.h contains redundant code. Therefore, this patch adds a helper function free_root_extent_buffers() to free_root_pointers() to eliminate redundancy. Reviewed-by: Zach Brown Signed-off-by: Rashika Kheria Signed-off-by: Josef Bacik Signed-off-by: Chris Mason --- fs/btrfs/disk-io.c | 60 +++++++++++++++++------------------------------------- 1 file changed, 19 insertions(+), 41 deletions(-) (limited to 'fs/btrfs/disk-io.c') diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index a3ad3a27944d..22443fac9408 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -2033,50 +2033,28 @@ static void btrfs_stop_all_workers(struct btrfs_fs_info *fs_info) btrfs_stop_workers(&fs_info->qgroup_rescan_workers); } +static void free_root_extent_buffers(struct btrfs_root *root) +{ + if (root) { + free_extent_buffer(root->node); + free_extent_buffer(root->commit_root); + root->node = NULL; + root->commit_root = NULL; + } +} + /* helper to cleanup tree roots */ static void free_root_pointers(struct btrfs_fs_info *info, int chunk_root) { - free_extent_buffer(info->tree_root->node); - free_extent_buffer(info->tree_root->commit_root); - info->tree_root->node = NULL; - info->tree_root->commit_root = NULL; - - if (info->dev_root) { - free_extent_buffer(info->dev_root->node); - free_extent_buffer(info->dev_root->commit_root); - info->dev_root->node = NULL; - info->dev_root->commit_root = NULL; - } - if (info->extent_root) { - free_extent_buffer(info->extent_root->node); - free_extent_buffer(info->extent_root->commit_root); - info->extent_root->node = NULL; - info->extent_root->commit_root = NULL; - } - if (info->csum_root) { - free_extent_buffer(info->csum_root->node); - free_extent_buffer(info->csum_root->commit_root); - info->csum_root->node = NULL; - info->csum_root->commit_root = NULL; - } - if (info->quota_root) { - free_extent_buffer(info->quota_root->node); - free_extent_buffer(info->quota_root->commit_root); - info->quota_root->node = NULL; - info->quota_root->commit_root = NULL; - } - if (info->uuid_root) { - free_extent_buffer(info->uuid_root->node); - free_extent_buffer(info->uuid_root->commit_root); - info->uuid_root->node = NULL; - info->uuid_root->commit_root = NULL; - } - if (chunk_root) { - free_extent_buffer(info->chunk_root->node); - free_extent_buffer(info->chunk_root->commit_root); - info->chunk_root->node = NULL; - info->chunk_root->commit_root = NULL; - } + free_root_extent_buffers(info->tree_root); + + free_root_extent_buffers(info->dev_root); + free_root_extent_buffers(info->extent_root); + free_root_extent_buffers(info->csum_root); + free_root_extent_buffers(info->quota_root); + free_root_extent_buffers(info->uuid_root); + if (chunk_root) + free_root_extent_buffers(info->chunk_root); } static void del_fs_roots(struct btrfs_fs_info *fs_info) -- cgit v1.2.3 From fae7f21cece9a4c181a8d8131870c7247e153f65 Mon Sep 17 00:00:00 2001 From: Dulshani Gunawardhana Date: Thu, 31 Oct 2013 10:30:08 +0530 Subject: btrfs: Use WARN_ON()'s return value in place of WARN_ON(1) Use WARN_ON()'s return value in place of WARN_ON(1) for cleaner source code that outputs a more descriptive warnings. Also fix the styling warning of redundant braces that came up as a result of this fix. Signed-off-by: Dulshani Gunawardhana Reviewed-by: Zach Brown Signed-off-by: Josef Bacik Signed-off-by: Chris Mason --- fs/btrfs/disk-io.c | 8 +------- 1 file changed, 1 insertion(+), 7 deletions(-) (limited to 'fs/btrfs/disk-io.c') diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 22443fac9408..77fa2d99252f 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -475,14 +475,8 @@ static int csum_dirty_buffer(struct btrfs_root *root, struct page *page) if (page != eb->pages[0]) return 0; found_start = btrfs_header_bytenr(eb); - if (found_start != start) { - WARN_ON(1); + if (WARN_ON(found_start != start || !PageUptodate(page))) return 0; - } - if (!PageUptodate(page)) { - WARN_ON(1); - return 0; - } csum_tree_block(root, eb, 0); return 0; } -- cgit v1.2.3 From f570e757b54ca6eceabc9b19a6342f14e836c196 Mon Sep 17 00:00:00 2001 From: Rashika Date: Thu, 31 Oct 2013 16:50:42 +0530 Subject: btrfs: Remove useless variable in write_ctree_super() The function write_ctree_super() in disk-io.c uses variable ret to return the result of function write_all_supers(). Since, this variable serves no purpose, hence the patch removes it and returns the call of the called function. Reviewed-by: Zach Brown Signed-off-by: Rashika Kheria Signed-off-by: Josef Bacik Signed-off-by: Chris Mason --- fs/btrfs/disk-io.c | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) (limited to 'fs/btrfs/disk-io.c') diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 77fa2d99252f..4c4ed0bb3da1 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -3440,10 +3440,7 @@ static int write_all_supers(struct btrfs_root *root, int max_mirrors) int write_ctree_super(struct btrfs_trans_handle *trans, struct btrfs_root *root, int max_mirrors) { - int ret; - - ret = write_all_supers(root, max_mirrors); - return ret; + return write_all_supers(root, max_mirrors); } /* Drop a fs root from the radix tree and free it. */ -- cgit v1.2.3 From d52c1bcc647ca41ee563bc38d8231e5f3816112c Mon Sep 17 00:00:00 2001 From: Liu Bo Date: Tue, 5 Nov 2013 11:45:53 +0800 Subject: Btrfs: avoid heavy operations in btrfs_commit_super The 'git blame' history shows that, the old transaction commit code has to do twice to ensure roots are updated and we have to flush metadata and super block manually, however, right now all of these can be handled well inside the transaction commit code without extra efforts. And the error handling part remains same with the current code, -- 'return to caller once we get error'. This saves us a transaction commit and a flush of super block, which are both heavy operations according to ftrace output analysis. Signed-off-by: Liu Bo Signed-off-by: Josef Bacik Signed-off-by: Chris Mason --- fs/btrfs/disk-io.c | 21 +-------------------- 1 file changed, 1 insertion(+), 20 deletions(-) (limited to 'fs/btrfs/disk-io.c') diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 4c4ed0bb3da1..8072cfa8a3b1 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -3517,7 +3517,6 @@ int btrfs_cleanup_fs_roots(struct btrfs_fs_info *fs_info) int btrfs_commit_super(struct btrfs_root *root) { struct btrfs_trans_handle *trans; - int ret; mutex_lock(&root->fs_info->cleaner_mutex); btrfs_run_delayed_iputs(root); @@ -3531,25 +3530,7 @@ int btrfs_commit_super(struct btrfs_root *root) trans = btrfs_join_transaction(root); if (IS_ERR(trans)) return PTR_ERR(trans); - ret = btrfs_commit_transaction(trans, root); - if (ret) - return ret; - /* run commit again to drop the original snapshot */ - trans = btrfs_join_transaction(root); - if (IS_ERR(trans)) - return PTR_ERR(trans); - ret = btrfs_commit_transaction(trans, root); - if (ret) - return ret; - ret = btrfs_write_and_wait_transaction(NULL, root); - if (ret) { - btrfs_error(root->fs_info, ret, - "Failed to sync btree inode to disk."); - return ret; - } - - ret = write_ctree_super(NULL, root, 0); - return ret; + return btrfs_commit_transaction(trans, root); } int close_ctree(struct btrfs_root *root) -- cgit v1.2.3