From 538042801a479ebd316582ad10a9c3156b5b7548 Mon Sep 17 00:00:00 2001 From: Jeff Mahoney Date: Thu, 1 Mar 2012 14:56:28 +0100 Subject: btrfs: avoid NULL deref in btrfs_reserve_extent with DEBUG_ENOSPC __find_space_info can return NULL but we don't check it before calling dump_space_info(). Signed-off-by: Jeff Mahoney --- fs/btrfs/extent-tree.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'fs/btrfs/extent-tree.c') diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 37e0a800d34e..c32a9bffe9e4 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -5838,7 +5838,8 @@ again: printk(KERN_ERR "btrfs allocation failed flags %llu, " "wanted %llu\n", (unsigned long long)data, (unsigned long long)num_bytes); - dump_space_info(sinfo, num_bytes, 1); + if (sinfo) + dump_space_info(sinfo, num_bytes, 1); } } -- cgit v1.2.3 From 143bede527b054a271053f41bfaca2b57baa9408 Mon Sep 17 00:00:00 2001 From: Jeff Mahoney Date: Thu, 1 Mar 2012 14:56:26 +0100 Subject: btrfs: return void in functions without error conditions Signed-off-by: Jeff Mahoney --- fs/btrfs/extent-tree.c | 60 +++++++++++++++++++++++--------------------------- 1 file changed, 27 insertions(+), 33 deletions(-) (limited to 'fs/btrfs/extent-tree.c') diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index c32a9bffe9e4..0daa1df16439 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -1010,7 +1010,7 @@ static int convert_extent_item_v0(struct btrfs_trans_handle *trans, return ret; BUG_ON(ret); - ret = btrfs_extend_item(trans, root, path, new_size); + btrfs_extend_item(trans, root, path, new_size); leaf = path->nodes[0]; item = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_extent_item); @@ -1592,13 +1592,13 @@ out: * helper to add new inline back ref */ static noinline_for_stack -int setup_inline_extent_backref(struct btrfs_trans_handle *trans, - struct btrfs_root *root, - struct btrfs_path *path, - struct btrfs_extent_inline_ref *iref, - u64 parent, u64 root_objectid, - u64 owner, u64 offset, int refs_to_add, - struct btrfs_delayed_extent_op *extent_op) +void setup_inline_extent_backref(struct btrfs_trans_handle *trans, + struct btrfs_root *root, + struct btrfs_path *path, + struct btrfs_extent_inline_ref *iref, + u64 parent, u64 root_objectid, + u64 owner, u64 offset, int refs_to_add, + struct btrfs_delayed_extent_op *extent_op) { struct extent_buffer *leaf; struct btrfs_extent_item *ei; @@ -1608,7 +1608,6 @@ int setup_inline_extent_backref(struct btrfs_trans_handle *trans, u64 refs; int size; int type; - int ret; leaf = path->nodes[0]; ei = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_extent_item); @@ -1617,7 +1616,7 @@ int setup_inline_extent_backref(struct btrfs_trans_handle *trans, type = extent_ref_type(parent, owner); size = btrfs_extent_inline_ref_size(type); - ret = btrfs_extend_item(trans, root, path, size); + btrfs_extend_item(trans, root, path, size); ei = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_extent_item); refs = btrfs_extent_refs(leaf, ei); @@ -1652,7 +1651,6 @@ int setup_inline_extent_backref(struct btrfs_trans_handle *trans, btrfs_set_extent_inline_ref_offset(leaf, iref, root_objectid); } btrfs_mark_buffer_dirty(leaf); - return 0; } static int lookup_extent_backref(struct btrfs_trans_handle *trans, @@ -1687,12 +1685,12 @@ static int lookup_extent_backref(struct btrfs_trans_handle *trans, * helper to update/remove inline back ref */ static noinline_for_stack -int update_inline_extent_backref(struct btrfs_trans_handle *trans, - struct btrfs_root *root, - struct btrfs_path *path, - struct btrfs_extent_inline_ref *iref, - int refs_to_mod, - struct btrfs_delayed_extent_op *extent_op) +void update_inline_extent_backref(struct btrfs_trans_handle *trans, + struct btrfs_root *root, + struct btrfs_path *path, + struct btrfs_extent_inline_ref *iref, + int refs_to_mod, + struct btrfs_delayed_extent_op *extent_op) { struct extent_buffer *leaf; struct btrfs_extent_item *ei; @@ -1703,7 +1701,6 @@ int update_inline_extent_backref(struct btrfs_trans_handle *trans, u32 item_size; int size; int type; - int ret; u64 refs; leaf = path->nodes[0]; @@ -1745,10 +1742,9 @@ int update_inline_extent_backref(struct btrfs_trans_handle *trans, memmove_extent_buffer(leaf, ptr, ptr + size, end - ptr - size); item_size -= size; - ret = btrfs_truncate_item(trans, root, path, item_size, 1); + btrfs_truncate_item(trans, root, path, item_size, 1); } btrfs_mark_buffer_dirty(leaf); - return 0; } static noinline_for_stack @@ -1768,13 +1764,13 @@ int insert_inline_extent_backref(struct btrfs_trans_handle *trans, root_objectid, owner, offset, 1); if (ret == 0) { BUG_ON(owner < BTRFS_FIRST_FREE_OBJECTID); - ret = update_inline_extent_backref(trans, root, path, iref, - refs_to_add, extent_op); + update_inline_extent_backref(trans, root, path, iref, + refs_to_add, extent_op); } else if (ret == -ENOENT) { - ret = setup_inline_extent_backref(trans, root, path, iref, - parent, root_objectid, - owner, offset, refs_to_add, - extent_op); + setup_inline_extent_backref(trans, root, path, iref, parent, + root_objectid, owner, offset, + refs_to_add, extent_op); + ret = 0; } return ret; } @@ -1804,12 +1800,12 @@ static int remove_extent_backref(struct btrfs_trans_handle *trans, struct btrfs_extent_inline_ref *iref, int refs_to_drop, int is_data) { - int ret; + int ret = 0; BUG_ON(!is_data && refs_to_drop != 1); if (iref) { - ret = update_inline_extent_backref(trans, root, path, iref, - -refs_to_drop, NULL); + update_inline_extent_backref(trans, root, path, iref, + -refs_to_drop, NULL); } else if (is_data) { ret = remove_extent_data_ref(trans, root, path, refs_to_drop); } else { @@ -4734,7 +4730,7 @@ static int btrfs_update_reserved_bytes(struct btrfs_block_group_cache *cache, return ret; } -int btrfs_prepare_extent_commit(struct btrfs_trans_handle *trans, +void btrfs_prepare_extent_commit(struct btrfs_trans_handle *trans, struct btrfs_root *root) { struct btrfs_fs_info *fs_info = root->fs_info; @@ -4764,7 +4760,6 @@ int btrfs_prepare_extent_commit(struct btrfs_trans_handle *trans, up_write(&fs_info->extent_commit_sem); update_global_block_rsv(fs_info); - return 0; } static int unpin_extent_range(struct btrfs_root *root, u64 start, u64 end) @@ -7189,7 +7184,7 @@ u64 btrfs_account_ro_block_groups_free_space(struct btrfs_space_info *sinfo) return free_bytes; } -int btrfs_set_block_group_rw(struct btrfs_root *root, +void btrfs_set_block_group_rw(struct btrfs_root *root, struct btrfs_block_group_cache *cache) { struct btrfs_space_info *sinfo = cache->space_info; @@ -7205,7 +7200,6 @@ int btrfs_set_block_group_rw(struct btrfs_root *root, cache->ro = 0; spin_unlock(&cache->lock); spin_unlock(&sinfo->lock); - return 0; } /* -- cgit v1.2.3 From 2c536799f1bde905bbacf7af3aa6be3f4de66005 Mon Sep 17 00:00:00 2001 From: Jeff Mahoney Date: Mon, 3 Oct 2011 23:22:41 -0400 Subject: btrfs: btrfs_drop_snapshot should return int Commit cb1b69f4 (Btrfs: forced readonly when btrfs_drop_snapshot() fails) made btrfs_drop_snapshot return void because there were no callers checking the return value. That is the wrong order to handle error propogation since the caller will have no idea that an error has occured and continue on as if nothing went wrong. Signed-off-by: Jeff Mahoney --- fs/btrfs/extent-tree.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'fs/btrfs/extent-tree.c') diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 0daa1df16439..cd6f8ae0a78d 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -6734,7 +6734,7 @@ static noinline int walk_up_tree(struct btrfs_trans_handle *trans, * also make sure backrefs for the shared block and all lower level * blocks are properly updated. */ -void btrfs_drop_snapshot(struct btrfs_root *root, +int btrfs_drop_snapshot(struct btrfs_root *root, struct btrfs_block_rsv *block_rsv, int update_ref, int for_reloc) { @@ -6902,7 +6902,7 @@ out_free: out: if (err) btrfs_std_error(root->fs_info, err); - return; + return err; } /* -- cgit v1.2.3 From 79787eaab46121d4713ed03c8fc63b9ec3eaec76 Mon Sep 17 00:00:00 2001 From: Jeff Mahoney Date: Mon, 12 Mar 2012 16:03:00 +0100 Subject: btrfs: replace many BUG_ONs with proper error handling btrfs currently handles most errors with BUG_ON. This patch is a work-in- progress but aims to handle most errors other than internal logic errors and ENOMEM more gracefully. This iteration prevents most crashes but can run into lockups with the page lock on occasion when the timing "works out." Signed-off-by: Jeff Mahoney --- fs/btrfs/extent-tree.c | 316 ++++++++++++++++++++++++++++++++++--------------- 1 file changed, 223 insertions(+), 93 deletions(-) (limited to 'fs/btrfs/extent-tree.c') diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index cd6f8ae0a78d..4b3f1eedced0 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -245,7 +245,7 @@ static int exclude_super_stripes(struct btrfs_root *root, cache->bytes_super += stripe_len; ret = add_excluded_extent(root, cache->key.objectid, stripe_len); - BUG_ON(ret); + BUG_ON(ret); /* -ENOMEM */ } for (i = 0; i < BTRFS_SUPER_MIRROR_MAX; i++) { @@ -253,13 +253,13 @@ static int exclude_super_stripes(struct btrfs_root *root, ret = btrfs_rmap_block(&root->fs_info->mapping_tree, cache->key.objectid, bytenr, 0, &logical, &nr, &stripe_len); - BUG_ON(ret); + BUG_ON(ret); /* -ENOMEM */ while (nr--) { cache->bytes_super += stripe_len; ret = add_excluded_extent(root, logical[nr], stripe_len); - BUG_ON(ret); + BUG_ON(ret); /* -ENOMEM */ } kfree(logical); @@ -321,7 +321,7 @@ static u64 add_new_free_space(struct btrfs_block_group_cache *block_group, total_added += size; ret = btrfs_add_free_space(block_group, start, size); - BUG_ON(ret); + BUG_ON(ret); /* -ENOMEM or logic error */ start = extent_end + 1; } else { break; @@ -332,7 +332,7 @@ static u64 add_new_free_space(struct btrfs_block_group_cache *block_group, size = end - start; total_added += size; ret = btrfs_add_free_space(block_group, start, size); - BUG_ON(ret); + BUG_ON(ret); /* -ENOMEM or logic error */ } return total_added; @@ -474,7 +474,8 @@ static int cache_block_group(struct btrfs_block_group_cache *cache, int ret = 0; caching_ctl = kzalloc(sizeof(*caching_ctl), GFP_NOFS); - BUG_ON(!caching_ctl); + if (!caching_ctl) + return -ENOMEM; INIT_LIST_HEAD(&caching_ctl->list); mutex_init(&caching_ctl->mutex); @@ -982,7 +983,7 @@ static int convert_extent_item_v0(struct btrfs_trans_handle *trans, ret = btrfs_next_leaf(root, path); if (ret < 0) return ret; - BUG_ON(ret > 0); + BUG_ON(ret > 0); /* Corruption */ leaf = path->nodes[0]; } btrfs_item_key_to_cpu(leaf, &found_key, @@ -1008,7 +1009,7 @@ static int convert_extent_item_v0(struct btrfs_trans_handle *trans, new_size + extra_size, 1); if (ret < 0) return ret; - BUG_ON(ret); + BUG_ON(ret); /* Corruption */ btrfs_extend_item(trans, root, path, new_size); @@ -1478,7 +1479,11 @@ int lookup_inline_extent_backref(struct btrfs_trans_handle *trans, err = ret; goto out; } - BUG_ON(ret); + if (ret && !insert) { + err = -ENOENT; + goto out; + } + BUG_ON(ret); /* Corruption */ leaf = path->nodes[0]; item_size = btrfs_item_size_nr(leaf, path->slots[0]); @@ -1831,6 +1836,7 @@ static int btrfs_discard_extent(struct btrfs_root *root, u64 bytenr, /* Tell the block device(s) that the sectors can be discarded */ ret = btrfs_map_block(&root->fs_info->mapping_tree, REQ_DISCARD, bytenr, &num_bytes, &bbio, 0); + /* Error condition is -ENOMEM */ if (!ret) { struct btrfs_bio_stripe *stripe = bbio->stripes; int i; @@ -1846,7 +1852,7 @@ static int btrfs_discard_extent(struct btrfs_root *root, u64 bytenr, if (!ret) discarded_bytes += stripe->length; else if (ret != -EOPNOTSUPP) - break; + break; /* Logic errors or -ENOMEM, or -EIO but I don't know how that could happen JDM */ /* * Just in case we get back EOPNOTSUPP for some reason, @@ -1865,6 +1871,7 @@ static int btrfs_discard_extent(struct btrfs_root *root, u64 bytenr, return ret; } +/* Can return -ENOMEM */ int btrfs_inc_extent_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root, u64 bytenr, u64 num_bytes, u64 parent, @@ -1940,7 +1947,8 @@ static int __btrfs_inc_extent_ref(struct btrfs_trans_handle *trans, ret = insert_extent_backref(trans, root->fs_info->extent_root, path, bytenr, parent, root_objectid, owner, offset, refs_to_add); - BUG_ON(ret); + if (ret) + btrfs_abort_transaction(trans, root, ret); out: btrfs_free_path(path); return err; @@ -2027,6 +2035,9 @@ static int run_delayed_extent_op(struct btrfs_trans_handle *trans, int ret; int err = 0; + if (trans->aborted) + return 0; + path = btrfs_alloc_path(); if (!path) return -ENOMEM; @@ -2124,7 +2135,11 @@ static int run_one_delayed_ref(struct btrfs_trans_handle *trans, struct btrfs_delayed_extent_op *extent_op, int insert_reserved) { - int ret; + int ret = 0; + + if (trans->aborted) + return 0; + if (btrfs_delayed_ref_is_head(node)) { struct btrfs_delayed_ref_head *head; /* @@ -2142,11 +2157,10 @@ static int run_one_delayed_ref(struct btrfs_trans_handle *trans, ret = btrfs_del_csums(trans, root, node->bytenr, node->num_bytes); - BUG_ON(ret); } } mutex_unlock(&head->mutex); - return 0; + return ret; } if (node->type == BTRFS_TREE_BLOCK_REF_KEY || @@ -2193,6 +2207,10 @@ again: return NULL; } +/* + * Returns 0 on success or if called with an already aborted transaction. + * Returns -ENOMEM or -EIO on failure and will abort the transaction. + */ static noinline int run_clustered_refs(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct list_head *cluster) @@ -2281,9 +2299,13 @@ static noinline int run_clustered_refs(struct btrfs_trans_handle *trans, ret = run_delayed_extent_op(trans, root, ref, extent_op); - BUG_ON(ret); kfree(extent_op); + if (ret) { + printk(KERN_DEBUG "btrfs: run_delayed_extent_op returned %d\n", ret); + return ret; + } + goto next; } @@ -2304,11 +2326,16 @@ static noinline int run_clustered_refs(struct btrfs_trans_handle *trans, ret = run_one_delayed_ref(trans, root, ref, extent_op, must_insert_reserved); - BUG_ON(ret); btrfs_put_delayed_ref(ref); kfree(extent_op); count++; + + if (ret) { + printk(KERN_DEBUG "btrfs: run_one_delayed_ref returned %d\n", ret); + return ret; + } + next: do_chunk_alloc(trans, root->fs_info->extent_root, 2 * 1024 * 1024, @@ -2343,6 +2370,9 @@ static void wait_for_more_refs(struct btrfs_delayed_ref_root *delayed_refs, * 0, which means to process everything in the tree at the start * of the run (but not newly added entries), or it can be some target * number you'd like to process. + * + * Returns 0 on success or if called with an aborted transaction + * Returns <0 on error and aborts the transaction */ int btrfs_run_delayed_refs(struct btrfs_trans_handle *trans, struct btrfs_root *root, unsigned long count) @@ -2358,6 +2388,10 @@ int btrfs_run_delayed_refs(struct btrfs_trans_handle *trans, unsigned long num_refs = 0; int consider_waiting; + /* We'll clean this up in btrfs_cleanup_transaction */ + if (trans->aborted) + return 0; + if (root == root->fs_info->extent_root) root = root->fs_info->tree_root; @@ -2415,7 +2449,11 @@ again: } ret = run_clustered_refs(trans, root, &cluster); - BUG_ON(ret < 0); + if (ret < 0) { + spin_unlock(&delayed_refs->lock); + btrfs_abort_transaction(trans, root, ret); + return ret; + } count -= min_t(unsigned long, ret, count); @@ -2580,7 +2618,7 @@ static noinline int check_committed_ref(struct btrfs_trans_handle *trans, ret = btrfs_search_slot(NULL, extent_root, &key, path, 0, 0); if (ret < 0) goto out; - BUG_ON(ret == 0); + BUG_ON(ret == 0); /* Corruption */ ret = -ENOENT; if (path->slots[0] == 0) @@ -2734,7 +2772,6 @@ static int __btrfs_mod_ref(struct btrfs_trans_handle *trans, } return 0; fail: - BUG(); return ret; } @@ -2763,7 +2800,7 @@ static int write_one_cache_group(struct btrfs_trans_handle *trans, ret = btrfs_search_slot(trans, extent_root, &cache->key, path, 0, 1); if (ret < 0) goto fail; - BUG_ON(ret); + BUG_ON(ret); /* Corruption */ leaf = path->nodes[0]; bi = btrfs_item_ptr_offset(leaf, path->slots[0]); @@ -2771,8 +2808,10 @@ static int write_one_cache_group(struct btrfs_trans_handle *trans, btrfs_mark_buffer_dirty(leaf); btrfs_release_path(path); fail: - if (ret) + if (ret) { + btrfs_abort_transaction(trans, root, ret); return ret; + } return 0; } @@ -2945,7 +2984,8 @@ again: if (last == 0) { err = btrfs_run_delayed_refs(trans, root, (unsigned long)-1); - BUG_ON(err); + if (err) /* File system offline */ + goto out; } cache = btrfs_lookup_first_block_group(root->fs_info, last); @@ -2972,7 +3012,9 @@ again: last = cache->key.objectid + cache->key.offset; err = write_one_cache_group(trans, root, path, cache); - BUG_ON(err); + if (err) /* File system offline */ + goto out; + btrfs_put_block_group(cache); } @@ -2985,7 +3027,8 @@ again: if (last == 0) { err = btrfs_run_delayed_refs(trans, root, (unsigned long)-1); - BUG_ON(err); + if (err) /* File system offline */ + goto out; } cache = btrfs_lookup_first_block_group(root->fs_info, last); @@ -3010,20 +3053,21 @@ again: continue; } - btrfs_write_out_cache(root, trans, cache, path); + err = btrfs_write_out_cache(root, trans, cache, path); /* * If we didn't have an error then the cache state is still * NEED_WRITE, so we can set it to WRITTEN. */ - if (cache->disk_cache_state == BTRFS_DC_NEED_WRITE) + if (!err && cache->disk_cache_state == BTRFS_DC_NEED_WRITE) cache->disk_cache_state = BTRFS_DC_WRITTEN; last = cache->key.objectid + cache->key.offset; btrfs_put_block_group(cache); } +out: btrfs_free_path(path); - return 0; + return err; } int btrfs_extent_readonly(struct btrfs_root *root, u64 bytenr) @@ -3407,9 +3451,9 @@ static int do_chunk_alloc(struct btrfs_trans_handle *trans, if (!space_info) { ret = update_space_info(extent_root->fs_info, flags, 0, 0, &space_info); - BUG_ON(ret); + BUG_ON(ret); /* -ENOMEM */ } - BUG_ON(!space_info); + BUG_ON(!space_info); /* Logic error */ again: spin_lock(&space_info->lock); @@ -3674,8 +3718,10 @@ again: ret = wait_event_interruptible(space_info->wait, !space_info->flush); /* Must have been interrupted, return */ - if (ret) + if (ret) { + printk(KERN_DEBUG "btrfs: %s returning -EINTR\n", __func__); return -EINTR; + } spin_lock(&space_info->lock); } @@ -3832,8 +3878,9 @@ out: return ret; } -static struct btrfs_block_rsv *get_block_rsv(struct btrfs_trans_handle *trans, - struct btrfs_root *root) +static struct btrfs_block_rsv *get_block_rsv( + const struct btrfs_trans_handle *trans, + const struct btrfs_root *root) { struct btrfs_block_rsv *block_rsv = NULL; @@ -4200,6 +4247,7 @@ void btrfs_trans_release_metadata(struct btrfs_trans_handle *trans, trans->bytes_reserved = 0; } +/* Can only return 0 or -ENOSPC */ int btrfs_orphan_reserve_metadata(struct btrfs_trans_handle *trans, struct inode *inode) { @@ -4536,7 +4584,7 @@ static int update_block_group(struct btrfs_trans_handle *trans, while (total) { cache = btrfs_lookup_block_group(info, bytenr); if (!cache) - return -1; + return -ENOENT; if (cache->flags & (BTRFS_BLOCK_GROUP_DUP | BTRFS_BLOCK_GROUP_RAID1 | BTRFS_BLOCK_GROUP_RAID10)) @@ -4639,7 +4687,7 @@ int btrfs_pin_extent(struct btrfs_root *root, struct btrfs_block_group_cache *cache; cache = btrfs_lookup_block_group(root->fs_info, bytenr); - BUG_ON(!cache); + BUG_ON(!cache); /* Logic error */ pin_down_extent(root, cache, bytenr, num_bytes, reserved); @@ -4657,7 +4705,7 @@ int btrfs_pin_extent_for_log_replay(struct btrfs_trans_handle *trans, struct btrfs_block_group_cache *cache; cache = btrfs_lookup_block_group(root->fs_info, bytenr); - BUG_ON(!cache); + BUG_ON(!cache); /* Logic error */ /* * pull in the free space cache (if any) so that our pin @@ -4702,6 +4750,7 @@ static int btrfs_update_reserved_bytes(struct btrfs_block_group_cache *cache, { struct btrfs_space_info *space_info = cache->space_info; int ret = 0; + spin_lock(&space_info->lock); spin_lock(&cache->lock); if (reserve != RESERVE_FREE) { @@ -4774,7 +4823,7 @@ static int unpin_extent_range(struct btrfs_root *root, u64 start, u64 end) if (cache) btrfs_put_block_group(cache); cache = btrfs_lookup_block_group(fs_info, start); - BUG_ON(!cache); + BUG_ON(!cache); /* Logic error */ } len = cache->key.objectid + cache->key.offset - start; @@ -4811,6 +4860,9 @@ int btrfs_finish_extent_commit(struct btrfs_trans_handle *trans, u64 end; int ret; + if (trans->aborted) + return 0; + if (fs_info->pinned_extents == &fs_info->freed_extents[0]) unpin = &fs_info->freed_extents[1]; else @@ -4896,7 +4948,8 @@ static int __btrfs_free_extent(struct btrfs_trans_handle *trans, ret = remove_extent_backref(trans, extent_root, path, NULL, refs_to_drop, is_data); - BUG_ON(ret); + if (ret) + goto abort; btrfs_release_path(path); path->leave_spinning = 1; @@ -4914,10 +4967,11 @@ static int __btrfs_free_extent(struct btrfs_trans_handle *trans, btrfs_print_leaf(extent_root, path->nodes[0]); } - BUG_ON(ret); + if (ret < 0) + goto abort; extent_slot = path->slots[0]; } - } else { + } else if (ret == -ENOENT) { btrfs_print_leaf(extent_root, path->nodes[0]); WARN_ON(1); printk(KERN_ERR "btrfs unable to find ref byte nr %llu " @@ -4927,6 +4981,8 @@ static int __btrfs_free_extent(struct btrfs_trans_handle *trans, (unsigned long long)root_objectid, (unsigned long long)owner_objectid, (unsigned long long)owner_offset); + } else { + goto abort; } leaf = path->nodes[0]; @@ -4936,7 +4992,8 @@ static int __btrfs_free_extent(struct btrfs_trans_handle *trans, BUG_ON(found_extent || extent_slot != path->slots[0]); ret = convert_extent_item_v0(trans, extent_root, path, owner_objectid, 0); - BUG_ON(ret < 0); + if (ret < 0) + goto abort; btrfs_release_path(path); path->leave_spinning = 1; @@ -4953,7 +5010,8 @@ static int __btrfs_free_extent(struct btrfs_trans_handle *trans, (unsigned long long)bytenr); btrfs_print_leaf(extent_root, path->nodes[0]); } - BUG_ON(ret); + if (ret < 0) + goto abort; extent_slot = path->slots[0]; leaf = path->nodes[0]; item_size = btrfs_item_size_nr(leaf, extent_slot); @@ -4990,7 +5048,8 @@ static int __btrfs_free_extent(struct btrfs_trans_handle *trans, ret = remove_extent_backref(trans, extent_root, path, iref, refs_to_drop, is_data); - BUG_ON(ret); + if (ret) + goto abort; } } else { if (found_extent) { @@ -5007,12 +5066,14 @@ static int __btrfs_free_extent(struct btrfs_trans_handle *trans, ret = btrfs_del_items(trans, extent_root, path, path->slots[0], num_to_del); - BUG_ON(ret); + if (ret) + goto abort; btrfs_release_path(path); if (is_data) { ret = btrfs_del_csums(trans, root, bytenr, num_bytes); - BUG_ON(ret); + if (ret) + goto abort; } else { invalidate_mapping_pages(info->btree_inode->i_mapping, bytenr >> PAGE_CACHE_SHIFT, @@ -5020,10 +5081,16 @@ static int __btrfs_free_extent(struct btrfs_trans_handle *trans, } ret = update_block_group(trans, root, bytenr, num_bytes, 0); - BUG_ON(ret); + if (ret) + goto abort; } +out: btrfs_free_path(path); return ret; + +abort: + btrfs_abort_transaction(trans, extent_root, ret); + goto out; } /* @@ -5119,7 +5186,7 @@ void btrfs_free_tree_block(struct btrfs_trans_handle *trans, parent, root->root_key.objectid, btrfs_header_level(buf), BTRFS_DROP_DELAYED_REF, NULL, for_cow); - BUG_ON(ret); + BUG_ON(ret); /* -ENOMEM */ } if (!last_ref) @@ -5153,6 +5220,7 @@ out: btrfs_put_block_group(cache); } +/* Can return -ENOMEM */ int btrfs_free_extent(struct btrfs_trans_handle *trans, struct btrfs_root *root, u64 bytenr, u64 num_bytes, u64 parent, u64 root_objectid, u64 owner, u64 offset, int for_cow) @@ -5174,14 +5242,12 @@ int btrfs_free_extent(struct btrfs_trans_handle *trans, struct btrfs_root *root, num_bytes, parent, root_objectid, (int)owner, BTRFS_DROP_DELAYED_REF, NULL, for_cow); - BUG_ON(ret); } else { ret = btrfs_add_delayed_data_ref(fs_info, trans, bytenr, num_bytes, parent, root_objectid, owner, offset, BTRFS_DROP_DELAYED_REF, NULL, for_cow); - BUG_ON(ret); } return ret; } @@ -5418,6 +5484,7 @@ have_block_group: found_uncached_bg = true; ret = cache_block_group(block_group, trans, orig_root, 1); + BUG_ON(ret < 0); /* -ENOMEM */ if (block_group->cached == BTRFS_CACHE_FINISHED) goto alloc; @@ -5439,7 +5506,7 @@ have_block_group: if (loop > LOOP_FIND_IDEAL) { ret = cache_block_group(block_group, trans, orig_root, 0); - BUG_ON(ret); + BUG_ON(ret); /* -ENOMEM */ } /* @@ -5712,6 +5779,11 @@ loop: ret = do_chunk_alloc(trans, root, num_bytes + 2 * 1024 * 1024, data, CHUNK_ALLOC_LIMITED); + if (ret < 0) { + btrfs_abort_transaction(trans, + root, ret); + goto out; + } allowed_chunk_alloc = 0; if (ret == 1) done_chunk_alloc = 1; @@ -5740,6 +5812,7 @@ loop: } else if (ins->objectid) { ret = 0; } +out: return ret; } @@ -5806,10 +5879,15 @@ again: * the only place that sets empty_size is btrfs_realloc_node, which * is not called recursively on allocations */ - if (empty_size || root->ref_cows) + if (empty_size || root->ref_cows) { ret = do_chunk_alloc(trans, root->fs_info->extent_root, num_bytes + 2 * 1024 * 1024, data, CHUNK_ALLOC_NO_FORCE); + if (ret < 0 && ret != -ENOSPC) { + btrfs_abort_transaction(trans, root, ret); + return ret; + } + } WARN_ON(num_bytes < root->sectorsize); ret = find_free_extent(trans, root, num_bytes, empty_size, @@ -5821,8 +5899,12 @@ again: num_bytes = num_bytes >> 1; num_bytes = num_bytes & ~(root->sectorsize - 1); num_bytes = max(num_bytes, min_alloc_size); - do_chunk_alloc(trans, root->fs_info->extent_root, + ret = do_chunk_alloc(trans, root->fs_info->extent_root, num_bytes, data, CHUNK_ALLOC_FORCE); + if (ret < 0 && ret != -ENOSPC) { + btrfs_abort_transaction(trans, root, ret); + return ret; + } if (num_bytes == min_alloc_size) final_tried = true; goto again; @@ -5913,7 +5995,10 @@ static int alloc_reserved_file_extent(struct btrfs_trans_handle *trans, path->leave_spinning = 1; ret = btrfs_insert_empty_item(trans, fs_info->extent_root, path, ins, size); - BUG_ON(ret); + if (ret) { + btrfs_free_path(path); + return ret; + } leaf = path->nodes[0]; extent_item = btrfs_item_ptr(leaf, path->slots[0], @@ -5943,7 +6028,7 @@ static int alloc_reserved_file_extent(struct btrfs_trans_handle *trans, btrfs_free_path(path); ret = update_block_group(trans, root, ins->objectid, ins->offset, 1); - if (ret) { + if (ret) { /* -ENOENT, logic error */ printk(KERN_ERR "btrfs update block group failed for %llu " "%llu\n", (unsigned long long)ins->objectid, (unsigned long long)ins->offset); @@ -5974,7 +6059,10 @@ static int alloc_reserved_tree_block(struct btrfs_trans_handle *trans, path->leave_spinning = 1; ret = btrfs_insert_empty_item(trans, fs_info->extent_root, path, ins, size); - BUG_ON(ret); + if (ret) { + btrfs_free_path(path); + return ret; + } leaf = path->nodes[0]; extent_item = btrfs_item_ptr(leaf, path->slots[0], @@ -6004,7 +6092,7 @@ static int alloc_reserved_tree_block(struct btrfs_trans_handle *trans, btrfs_free_path(path); ret = update_block_group(trans, root, ins->objectid, ins->offset, 1); - if (ret) { + if (ret) { /* -ENOENT, logic error */ printk(KERN_ERR "btrfs update block group failed for %llu " "%llu\n", (unsigned long long)ins->objectid, (unsigned long long)ins->offset); @@ -6052,28 +6140,28 @@ int btrfs_alloc_logged_file_extent(struct btrfs_trans_handle *trans, if (!caching_ctl) { BUG_ON(!block_group_cache_done(block_group)); ret = btrfs_remove_free_space(block_group, start, num_bytes); - BUG_ON(ret); + BUG_ON(ret); /* -ENOMEM */ } else { mutex_lock(&caching_ctl->mutex); if (start >= caching_ctl->progress) { ret = add_excluded_extent(root, start, num_bytes); - BUG_ON(ret); + BUG_ON(ret); /* -ENOMEM */ } else if (start + num_bytes <= caching_ctl->progress) { ret = btrfs_remove_free_space(block_group, start, num_bytes); - BUG_ON(ret); + BUG_ON(ret); /* -ENOMEM */ } else { num_bytes = caching_ctl->progress - start; ret = btrfs_remove_free_space(block_group, start, num_bytes); - BUG_ON(ret); + BUG_ON(ret); /* -ENOMEM */ start = caching_ctl->progress; num_bytes = ins->objectid + ins->offset - caching_ctl->progress; ret = add_excluded_extent(root, start, num_bytes); - BUG_ON(ret); + BUG_ON(ret); /* -ENOMEM */ } mutex_unlock(&caching_ctl->mutex); @@ -6082,7 +6170,7 @@ int btrfs_alloc_logged_file_extent(struct btrfs_trans_handle *trans, ret = btrfs_update_reserved_bytes(block_group, ins->offset, RESERVE_ALLOC_NO_ACCOUNT); - BUG_ON(ret); + BUG_ON(ret); /* logic error */ btrfs_put_block_group(block_group); ret = alloc_reserved_file_extent(trans, root, 0, root_objectid, 0, owner, offset, ins, 1); @@ -6218,7 +6306,7 @@ struct extent_buffer *btrfs_alloc_free_block(struct btrfs_trans_handle *trans, buf = btrfs_init_new_buffer(trans, root, ins.objectid, blocksize, level); - BUG_ON(IS_ERR(buf)); + BUG_ON(IS_ERR(buf)); /* -ENOMEM */ if (root_objectid == BTRFS_TREE_RELOC_OBJECTID) { if (parent == 0) @@ -6230,7 +6318,7 @@ struct extent_buffer *btrfs_alloc_free_block(struct btrfs_trans_handle *trans, if (root_objectid != BTRFS_TREE_LOG_OBJECTID) { struct btrfs_delayed_extent_op *extent_op; extent_op = kmalloc(sizeof(*extent_op), GFP_NOFS); - BUG_ON(!extent_op); + BUG_ON(!extent_op); /* -ENOMEM */ if (key) memcpy(&extent_op->key, key, sizeof(extent_op->key)); else @@ -6245,7 +6333,7 @@ struct extent_buffer *btrfs_alloc_free_block(struct btrfs_trans_handle *trans, ins.offset, parent, root_objectid, level, BTRFS_ADD_DELAYED_EXTENT, extent_op, for_cow); - BUG_ON(ret); + BUG_ON(ret); /* -ENOMEM */ } return buf; } @@ -6315,7 +6403,9 @@ static noinline void reada_walk_down(struct btrfs_trans_handle *trans, /* We don't lock the tree block, it's OK to be racy here */ ret = btrfs_lookup_extent_info(trans, root, bytenr, blocksize, &refs, &flags); - BUG_ON(ret); + /* We don't care about errors in readahead. */ + if (ret < 0) + continue; BUG_ON(refs == 0); if (wc->stage == DROP_REFERENCE) { @@ -6382,7 +6472,9 @@ static noinline int walk_down_proc(struct btrfs_trans_handle *trans, eb->start, eb->len, &wc->refs[level], &wc->flags[level]); - BUG_ON(ret); + BUG_ON(ret == -ENOMEM); + if (ret) + return ret; BUG_ON(wc->refs[level] == 0); } @@ -6401,12 +6493,12 @@ static noinline int walk_down_proc(struct btrfs_trans_handle *trans, if (!(wc->flags[level] & flag)) { BUG_ON(!path->locks[level]); ret = btrfs_inc_ref(trans, root, eb, 1, wc->for_reloc); - BUG_ON(ret); + BUG_ON(ret); /* -ENOMEM */ ret = btrfs_dec_ref(trans, root, eb, 0, wc->for_reloc); - BUG_ON(ret); + BUG_ON(ret); /* -ENOMEM */ ret = btrfs_set_disk_extent_flags(trans, root, eb->start, eb->len, flag, 0); - BUG_ON(ret); + BUG_ON(ret); /* -ENOMEM */ wc->flags[level] |= flag; } @@ -6478,7 +6570,11 @@ static noinline int do_walk_down(struct btrfs_trans_handle *trans, ret = btrfs_lookup_extent_info(trans, root, bytenr, blocksize, &wc->refs[level - 1], &wc->flags[level - 1]); - BUG_ON(ret); + if (ret < 0) { + btrfs_tree_unlock(next); + return ret; + } + BUG_ON(wc->refs[level - 1] == 0); *lookup_info = 0; @@ -6547,7 +6643,7 @@ skip: ret = btrfs_free_extent(trans, root, bytenr, blocksize, parent, root->root_key.objectid, level - 1, 0, 0); - BUG_ON(ret); + BUG_ON(ret); /* -ENOMEM */ } btrfs_tree_unlock(next); free_extent_buffer(next); @@ -6605,7 +6701,10 @@ static noinline int walk_up_proc(struct btrfs_trans_handle *trans, eb->start, eb->len, &wc->refs[level], &wc->flags[level]); - BUG_ON(ret); + if (ret < 0) { + btrfs_tree_unlock_rw(eb, path->locks[level]); + return ret; + } BUG_ON(wc->refs[level] == 0); if (wc->refs[level] == 1) { btrfs_tree_unlock_rw(eb, path->locks[level]); @@ -6625,7 +6724,7 @@ static noinline int walk_up_proc(struct btrfs_trans_handle *trans, else ret = btrfs_dec_ref(trans, root, eb, 0, wc->for_reloc); - BUG_ON(ret); + BUG_ON(ret); /* -ENOMEM */ } /* make block locked assertion in clean_tree_block happy */ if (!path->locks[level] && @@ -6762,7 +6861,10 @@ int btrfs_drop_snapshot(struct btrfs_root *root, } trans = btrfs_start_transaction(tree_root, 0); - BUG_ON(IS_ERR(trans)); + if (IS_ERR(trans)) { + err = PTR_ERR(trans); + goto out_free; + } if (block_rsv) trans->block_rsv = block_rsv; @@ -6787,7 +6889,7 @@ int btrfs_drop_snapshot(struct btrfs_root *root, path->lowest_level = 0; if (ret < 0) { err = ret; - goto out_free; + goto out_end_trans; } WARN_ON(ret > 0); @@ -6807,7 +6909,10 @@ int btrfs_drop_snapshot(struct btrfs_root *root, path->nodes[level]->len, &wc->refs[level], &wc->flags[level]); - BUG_ON(ret); + if (ret < 0) { + err = ret; + goto out_end_trans; + } BUG_ON(wc->refs[level] == 0); if (level == root_item->drop_level) @@ -6858,26 +6963,40 @@ int btrfs_drop_snapshot(struct btrfs_root *root, ret = btrfs_update_root(trans, tree_root, &root->root_key, root_item); - BUG_ON(ret); + if (ret) { + btrfs_abort_transaction(trans, tree_root, ret); + err = ret; + goto out_end_trans; + } btrfs_end_transaction_throttle(trans, tree_root); trans = btrfs_start_transaction(tree_root, 0); - BUG_ON(IS_ERR(trans)); + if (IS_ERR(trans)) { + err = PTR_ERR(trans); + goto out_free; + } if (block_rsv) trans->block_rsv = block_rsv; } } btrfs_release_path(path); - BUG_ON(err); + if (err) + goto out_end_trans; ret = btrfs_del_root(trans, tree_root, &root->root_key); - BUG_ON(ret); + if (ret) { + btrfs_abort_transaction(trans, tree_root, ret); + goto out_end_trans; + } if (root->root_key.objectid != BTRFS_TREE_RELOC_OBJECTID) { ret = btrfs_find_last_root(tree_root, root->root_key.objectid, NULL, NULL); - BUG_ON(ret < 0); - if (ret > 0) { + if (ret < 0) { + btrfs_abort_transaction(trans, tree_root, ret); + err = ret; + goto out_end_trans; + } else if (ret > 0) { /* if we fail to delete the orphan item this time * around, it'll get picked up the next time. * @@ -6895,8 +7014,9 @@ int btrfs_drop_snapshot(struct btrfs_root *root, free_extent_buffer(root->commit_root); kfree(root); } -out_free: +out_end_trans: btrfs_end_transaction_throttle(trans, tree_root); +out_free: kfree(wc); btrfs_free_path(path); out: @@ -7099,12 +7219,16 @@ int btrfs_set_block_group_ro(struct btrfs_root *root, BUG_ON(cache->ro); trans = btrfs_join_transaction(root); - BUG_ON(IS_ERR(trans)); + if (IS_ERR(trans)) + return PTR_ERR(trans); alloc_flags = update_block_group_flags(root, cache->flags); - if (alloc_flags != cache->flags) - do_chunk_alloc(trans, root, 2 * 1024 * 1024, alloc_flags, - CHUNK_ALLOC_FORCE); + if (alloc_flags != cache->flags) { + ret = do_chunk_alloc(trans, root, 2 * 1024 * 1024, alloc_flags, + CHUNK_ALLOC_FORCE); + if (ret < 0) + goto out; + } ret = set_block_group_ro(cache, 0); if (!ret) @@ -7567,7 +7691,7 @@ int btrfs_read_block_groups(struct btrfs_root *root) ret = update_space_info(info, cache->flags, found_key.offset, btrfs_block_group_used(&cache->item), &space_info); - BUG_ON(ret); + BUG_ON(ret); /* -ENOMEM */ cache->space_info = space_info; spin_lock(&cache->space_info->lock); cache->space_info->bytes_readonly += cache->bytes_super; @@ -7576,7 +7700,7 @@ int btrfs_read_block_groups(struct btrfs_root *root) __link_block_group(space_info, cache); ret = btrfs_add_block_group_cache(root->fs_info, cache); - BUG_ON(ret); + BUG_ON(ret); /* Logic error */ set_avail_alloc_bits(root->fs_info, cache->flags); if (btrfs_chunk_readonly(root, cache->key.objectid)) @@ -7658,7 +7782,7 @@ int btrfs_make_block_group(struct btrfs_trans_handle *trans, ret = update_space_info(root->fs_info, cache->flags, size, bytes_used, &cache->space_info); - BUG_ON(ret); + BUG_ON(ret); /* -ENOMEM */ update_global_block_rsv(root->fs_info); spin_lock(&cache->space_info->lock); @@ -7668,11 +7792,14 @@ int btrfs_make_block_group(struct btrfs_trans_handle *trans, __link_block_group(cache->space_info, cache); ret = btrfs_add_block_group_cache(root->fs_info, cache); - BUG_ON(ret); + BUG_ON(ret); /* Logic error */ ret = btrfs_insert_item(trans, extent_root, &cache->key, &cache->item, sizeof(cache->item)); - BUG_ON(ret); + if (ret) { + btrfs_abort_transaction(trans, extent_root, ret); + return ret; + } set_avail_alloc_bits(extent_root->fs_info, type); @@ -7753,7 +7880,10 @@ int btrfs_remove_block_group(struct btrfs_trans_handle *trans, inode = lookup_free_space_inode(tree_root, block_group, path); if (!IS_ERR(inode)) { ret = btrfs_orphan_add(trans, inode); - BUG_ON(ret); + if (ret) { + btrfs_add_delayed_iput(inode); + goto out; + } clear_nlink(inode); /* One for the block groups ref */ spin_lock(&block_group->lock); -- cgit v1.2.3 From 285ff5af6ce358e73f53b55c9efadd4335f4c2ff Mon Sep 17 00:00:00 2001 From: Josef Bacik Date: Fri, 13 Jan 2012 15:27:45 -0500 Subject: Btrfs: remove the ideal caching code This is a relic from before we had the disk space cache and it was to make bootup times when you had btrfs as root not be so damned slow. Now that we have the disk space cache this isn't a problem anymore and really having this code casues uneeded fragmentation and complexity, so just remove it. Thanks, Signed-off-by: Josef Bacik --- fs/btrfs/extent-tree.c | 93 +++++--------------------------------------------- 1 file changed, 8 insertions(+), 85 deletions(-) (limited to 'fs/btrfs/extent-tree.c') diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 37e0a800d34e..eccef6c06237 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -5260,11 +5260,10 @@ static int get_block_group_index(struct btrfs_block_group_cache *cache) } enum btrfs_loop_type { - LOOP_FIND_IDEAL = 0, - LOOP_CACHING_NOWAIT = 1, - LOOP_CACHING_WAIT = 2, - LOOP_ALLOC_CHUNK = 3, - LOOP_NO_EMPTY_SIZE = 4, + LOOP_CACHING_NOWAIT = 0, + LOOP_CACHING_WAIT = 1, + LOOP_ALLOC_CHUNK = 2, + LOOP_NO_EMPTY_SIZE = 3, }; /* @@ -5300,8 +5299,6 @@ static noinline int find_free_extent(struct btrfs_trans_handle *trans, bool failed_alloc = false; bool use_cluster = true; bool have_caching_bg = false; - u64 ideal_cache_percent = 0; - u64 ideal_cache_offset = 0; WARN_ON(num_bytes < root->sectorsize); btrfs_set_key_type(ins, BTRFS_EXTENT_ITEM_KEY); @@ -5351,7 +5348,6 @@ static noinline int find_free_extent(struct btrfs_trans_handle *trans, empty_cluster = 0; if (search_start == hint_byte) { -ideal_cache: block_group = btrfs_lookup_block_group(root->fs_info, search_start); used_block_group = block_group; @@ -5363,8 +5359,7 @@ ideal_cache: * picked out then we don't care that the block group is cached. */ if (block_group && block_group_bits(block_group, data) && - (block_group->cached != BTRFS_CACHE_NO || - search_start == ideal_cache_offset)) { + block_group->cached != BTRFS_CACHE_NO) { down_read(&space_info->groups_sem); if (list_empty(&block_group->list) || block_group->ro) { @@ -5418,44 +5413,12 @@ search: have_block_group: cached = block_group_cache_done(block_group); if (unlikely(!cached)) { - u64 free_percent; - found_uncached_bg = true; ret = cache_block_group(block_group, trans, - orig_root, 1); - if (block_group->cached == BTRFS_CACHE_FINISHED) - goto alloc; - - free_percent = btrfs_block_group_used(&block_group->item); - free_percent *= 100; - free_percent = div64_u64(free_percent, - block_group->key.offset); - free_percent = 100 - free_percent; - if (free_percent > ideal_cache_percent && - likely(!block_group->ro)) { - ideal_cache_offset = block_group->key.objectid; - ideal_cache_percent = free_percent; - } - - /* - * The caching workers are limited to 2 threads, so we - * can queue as much work as we care to. - */ - if (loop > LOOP_FIND_IDEAL) { - ret = cache_block_group(block_group, trans, - orig_root, 0); - BUG_ON(ret); - } - - /* - * If loop is set for cached only, try the next block - * group. - */ - if (loop == LOOP_FIND_IDEAL) - goto loop; + orig_root, 0); + BUG_ON(ret); } -alloc: if (unlikely(block_group->ro)) goto loop; @@ -5661,9 +5624,7 @@ loop: if (!ins->objectid && ++index < BTRFS_NR_RAID_TYPES) goto search; - /* LOOP_FIND_IDEAL, only search caching/cached bg's, and don't wait for - * for them to make caching progress. Also - * determine the best possible bg to cache + /* * LOOP_CACHING_NOWAIT, search partially cached block groups, kicking * caching kthreads as we move along * LOOP_CACHING_WAIT, search everything, and wait if our bg is caching @@ -5673,45 +5634,7 @@ loop: */ if (!ins->objectid && loop < LOOP_NO_EMPTY_SIZE) { index = 0; - if (loop == LOOP_FIND_IDEAL && found_uncached_bg) { - found_uncached_bg = false; - loop++; - if (!ideal_cache_percent) - goto search; - - /* - * 1 of the following 2 things have happened so far - * - * 1) We found an ideal block group for caching that - * is mostly full and will cache quickly, so we might - * as well wait for it. - * - * 2) We searched for cached only and we didn't find - * anything, and we didn't start any caching kthreads - * either, so chances are we will loop through and - * start a couple caching kthreads, and then come back - * around and just wait for them. This will be slower - * because we will have 2 caching kthreads reading at - * the same time when we could have just started one - * and waited for it to get far enough to give us an - * allocation, so go ahead and go to the wait caching - * loop. - */ - loop = LOOP_CACHING_WAIT; - search_start = ideal_cache_offset; - ideal_cache_percent = 0; - goto ideal_cache; - } else if (loop == LOOP_FIND_IDEAL) { - /* - * Didn't find a uncached bg, wait on anything we find - * next. - */ - loop = LOOP_CACHING_WAIT; - goto search; - } - loop++; - if (loop == LOOP_ALLOC_CHUNK) { if (allowed_chunk_alloc) { ret = do_chunk_alloc(trans, root, num_bytes + -- cgit v1.2.3 From 81c9ad237c604adec79fd4d4034264c6669e0ab3 Mon Sep 17 00:00:00 2001 From: Josef Bacik Date: Wed, 18 Jan 2012 10:56:06 -0500 Subject: Btrfs: remove search_start and search_end from find_free_extent and callers We have been passing nothing but (u64)-1 to find_free_extent for search_end in all of the callers, so it's completely useless, and we've always been passing 0 in as search_start, so just remove them as function arguments and move search_start into find_free_extent. Thanks, Signed-off-by: Josef Bacik --- fs/btrfs/extent-tree.c | 16 ++++------------ 1 file changed, 4 insertions(+), 12 deletions(-) (limited to 'fs/btrfs/extent-tree.c') diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index eccef6c06237..9b7e7682fda0 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -5277,7 +5277,6 @@ enum btrfs_loop_type { static noinline int find_free_extent(struct btrfs_trans_handle *trans, struct btrfs_root *orig_root, u64 num_bytes, u64 empty_size, - u64 search_start, u64 search_end, u64 hint_byte, struct btrfs_key *ins, u64 data) { @@ -5286,6 +5285,7 @@ static noinline int find_free_extent(struct btrfs_trans_handle *trans, struct btrfs_free_cluster *last_ptr = NULL; struct btrfs_block_group_cache *block_group = NULL; struct btrfs_block_group_cache *used_block_group; + u64 search_start = 0; int empty_cluster = 2 * 1024 * 1024; int allowed_chunk_alloc = 0; int done_chunk_alloc = 0; @@ -5569,11 +5569,6 @@ unclustered_alloc: } checks: search_start = stripe_align(root, offset); - /* move on to the next group */ - if (search_start + num_bytes >= search_end) { - btrfs_add_free_space(used_block_group, offset, num_bytes); - goto loop; - } /* move on to the next group */ if (search_start + num_bytes > @@ -5721,12 +5716,10 @@ int btrfs_reserve_extent(struct btrfs_trans_handle *trans, struct btrfs_root *root, u64 num_bytes, u64 min_alloc_size, u64 empty_size, u64 hint_byte, - u64 search_end, struct btrfs_key *ins, - u64 data) + struct btrfs_key *ins, u64 data) { bool final_tried = false; int ret; - u64 search_start = 0; data = btrfs_get_alloc_profile(root, data); again: @@ -5741,8 +5734,7 @@ again: WARN_ON(num_bytes < root->sectorsize); ret = find_free_extent(trans, root, num_bytes, empty_size, - search_start, search_end, hint_byte, - ins, data); + hint_byte, ins, data); if (ret == -ENOSPC) { if (!final_tried) { @@ -6137,7 +6129,7 @@ struct extent_buffer *btrfs_alloc_free_block(struct btrfs_trans_handle *trans, return ERR_CAST(block_rsv); ret = btrfs_reserve_extent(trans, root, blocksize, blocksize, - empty_size, hint, (u64)-1, &ins, 0); + empty_size, hint, &ins, 0); if (ret) { unuse_block_rsv(root->fs_info, block_rsv, blocksize); return ERR_PTR(ret); -- cgit v1.2.3 From 3083ee2e18b701122a3b841db83448543a87a583 Mon Sep 17 00:00:00 2001 From: Josef Bacik Date: Fri, 9 Mar 2012 16:01:49 -0500 Subject: Btrfs: introduce free_extent_buffer_stale Because btrfs cow's we can end up with extent buffers that are no longer necessary just sitting around in memory. So instead of evicting these pages, we could end up evicting things we actually care about. Thus we have free_extent_buffer_stale for use when we are freeing tree blocks. This will make it so that the ref for the eb being in the radix tree is dropped as soon as possible and then is freed when the refcount hits 0 instead of waiting to be released by releasepage. Thanks, Signed-off-by: Josef Bacik --- fs/btrfs/extent-tree.c | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) (limited to 'fs/btrfs/extent-tree.c') diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 9b7e7682fda0..1b831ac4c079 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -5018,10 +5018,6 @@ static int __btrfs_free_extent(struct btrfs_trans_handle *trans, if (is_data) { ret = btrfs_del_csums(trans, root, bytenr, num_bytes); BUG_ON(ret); - } else { - invalidate_mapping_pages(info->btree_inode->i_mapping, - bytenr >> PAGE_CACHE_SHIFT, - (bytenr + num_bytes - 1) >> PAGE_CACHE_SHIFT); } ret = update_block_group(trans, root, bytenr, num_bytes, 0); @@ -6022,6 +6018,7 @@ struct extent_buffer *btrfs_init_new_buffer(struct btrfs_trans_handle *trans, btrfs_set_buffer_lockdep_class(root->root_key.objectid, buf, level); btrfs_tree_lock(buf); clean_tree_block(trans, root, buf); + clear_bit(EXTENT_BUFFER_STALE, &buf->bflags); btrfs_set_lock_blocking(buf); btrfs_set_buffer_uptodate(buf); -- cgit v1.2.3 From e3176ca2769e420f64eba4b093bbddea6d7a89c3 Mon Sep 17 00:00:00 2001 From: Ilya Dryomov Date: Tue, 27 Mar 2012 17:09:16 +0300 Subject: Btrfs: stop silently switching single chunks to raid0 on balance This has been causing a lot of confusion for quite a while now and a lot of users were surprised by this (some of them were even stuck in a ENOSPC situation which they couldn't easily get out of). The addition of restriper gives users a clear choice between raid0 and drive concat setup so there's absolutely no excuse for us to keep doing this. Signed-off-by: Ilya Dryomov --- fs/btrfs/extent-tree.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) (limited to 'fs/btrfs/extent-tree.c') diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 1b831ac4c079..4269777f185e 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -6941,7 +6941,6 @@ static u64 update_block_group_flags(struct btrfs_root *root, u64 flags) if (flags & (BTRFS_BLOCK_GROUP_RAID1 | BTRFS_BLOCK_GROUP_RAID10)) return stripped | BTRFS_BLOCK_GROUP_DUP; - return flags; } else { /* they already had raid on here, just return */ if (flags & stripped) @@ -6954,9 +6953,9 @@ static u64 update_block_group_flags(struct btrfs_root *root, u64 flags) if (flags & BTRFS_BLOCK_GROUP_DUP) return stripped | BTRFS_BLOCK_GROUP_RAID1; - /* turn single device chunks into raid0 */ - return stripped | BTRFS_BLOCK_GROUP_RAID0; + /* this is drive concat, leave it alone */ } + return flags; } -- cgit v1.2.3 From 899c81eac890bcfa5f3636f4c43f68e8393ac1f8 Mon Sep 17 00:00:00 2001 From: Ilya Dryomov Date: Tue, 27 Mar 2012 17:09:16 +0300 Subject: Btrfs: add wrappers for working with alloc profiles Add functions to abstract the conversion between chunk and extended allocation profile formats and switch everybody to use them. Signed-off-by: Ilya Dryomov --- fs/btrfs/extent-tree.c | 25 +++++++------------------ 1 file changed, 7 insertions(+), 18 deletions(-) (limited to 'fs/btrfs/extent-tree.c') diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 4269777f185e..9f16fdb463c7 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -3098,11 +3098,8 @@ static int update_space_info(struct btrfs_fs_info *info, u64 flags, static void set_avail_alloc_bits(struct btrfs_fs_info *fs_info, u64 flags) { - u64 extra_flags = flags & BTRFS_BLOCK_GROUP_PROFILE_MASK; - - /* chunk -> extended profile */ - if (extra_flags == 0) - extra_flags = BTRFS_AVAIL_ALLOC_BIT_SINGLE; + u64 extra_flags = chunk_to_extended(flags) & + BTRFS_EXTENDED_PROFILE_MASK; if (flags & BTRFS_BLOCK_GROUP_DATA) fs_info->avail_data_alloc_bits |= extra_flags; @@ -3181,9 +3178,7 @@ u64 btrfs_reduce_alloc_profile(struct btrfs_root *root, u64 flags) } out: - /* extended -> chunk profile */ - flags &= ~BTRFS_AVAIL_ALLOC_BIT_SINGLE; - return flags; + return extended_to_chunk(flags); } static u64 get_alloc_profile(struct btrfs_root *root, u64 flags) @@ -6914,11 +6909,8 @@ static u64 update_block_group_flags(struct btrfs_root *root, u64 flags) tgt = BTRFS_BLOCK_GROUP_METADATA | bctl->meta.target; } - if (tgt) { - /* extended -> chunk profile */ - tgt &= ~BTRFS_AVAIL_ALLOC_BIT_SINGLE; - return tgt; - } + if (tgt) + return extended_to_chunk(tgt); } /* @@ -7597,11 +7589,8 @@ int btrfs_make_block_group(struct btrfs_trans_handle *trans, static void clear_avail_alloc_bits(struct btrfs_fs_info *fs_info, u64 flags) { - u64 extra_flags = flags & BTRFS_BLOCK_GROUP_PROFILE_MASK; - - /* chunk -> extended profile */ - if (extra_flags == 0) - extra_flags = BTRFS_AVAIL_ALLOC_BIT_SINGLE; + u64 extra_flags = chunk_to_extended(flags) & + BTRFS_EXTENDED_PROFILE_MASK; if (flags & BTRFS_BLOCK_GROUP_DATA) fs_info->avail_data_alloc_bits &= ~extra_flags; -- cgit v1.2.3 From e8920a640be5d4ebe3fee0670639a81d4ffc904c Mon Sep 17 00:00:00 2001 From: Ilya Dryomov Date: Tue, 27 Mar 2012 17:09:17 +0300 Subject: Btrfs: make profile_is_valid() check more strict "0" is a valid value for an on-disk chunk profile, but it is not a valid extended profile. (We have a separate bit for single chunks in extended case) Also rename it to alloc_profile_is_valid() for clarity. Signed-off-by: Ilya Dryomov --- fs/btrfs/extent-tree.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs/btrfs/extent-tree.c') diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 9f16fdb463c7..8c5bd8fa8245 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -3400,7 +3400,7 @@ static int do_chunk_alloc(struct btrfs_trans_handle *trans, int wait_for_alloc = 0; int ret = 0; - BUG_ON(!profile_is_valid(flags, 0)); + BUG_ON(!alloc_profile_is_valid(flags, 0)); space_info = __find_space_info(extent_root->fs_info, flags); if (!space_info) { -- cgit v1.2.3 From 0c460c0d70e10463e44bdf1d406e9c5ec03b1af6 Mon Sep 17 00:00:00 2001 From: Ilya Dryomov Date: Tue, 27 Mar 2012 17:09:17 +0300 Subject: Btrfs: move alloc_profile_is_valid() to volumes.c Header file is not a good place to define functions. This also moves a call to alloc_profile_is_valid() down the stack and removes a redundant check from __btrfs_alloc_chunk() - alloc_profile_is_valid() takes it into account. Signed-off-by: Ilya Dryomov --- fs/btrfs/extent-tree.c | 2 -- 1 file changed, 2 deletions(-) (limited to 'fs/btrfs/extent-tree.c') diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 8c5bd8fa8245..304710cae653 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -3400,8 +3400,6 @@ static int do_chunk_alloc(struct btrfs_trans_handle *trans, int wait_for_alloc = 0; int ret = 0; - BUG_ON(!alloc_profile_is_valid(flags, 0)); - space_info = __find_space_info(extent_root->fs_info, flags); if (!space_info) { ret = update_space_info(extent_root->fs_info, flags, -- cgit v1.2.3 From fc67c450837ec034174060a25889a55eed741a1d Mon Sep 17 00:00:00 2001 From: Ilya Dryomov Date: Tue, 27 Mar 2012 17:09:17 +0300 Subject: Btrfs: add get_restripe_target() helper Add get_restripe_target() helper and switch everybody to use it. Signed-off-by: Ilya Dryomov --- fs/btrfs/extent-tree.c | 94 +++++++++++++++++++++++++++----------------------- 1 file changed, 50 insertions(+), 44 deletions(-) (limited to 'fs/btrfs/extent-tree.c') diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 304710cae653..faf52e030315 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -3109,6 +3109,35 @@ static void set_avail_alloc_bits(struct btrfs_fs_info *fs_info, u64 flags) fs_info->avail_system_alloc_bits |= extra_flags; } +/* + * returns target flags in extended format or 0 if restripe for this + * chunk_type is not in progress + */ +static u64 get_restripe_target(struct btrfs_fs_info *fs_info, u64 flags) +{ + struct btrfs_balance_control *bctl = fs_info->balance_ctl; + u64 target = 0; + + BUG_ON(!mutex_is_locked(&fs_info->volume_mutex) && + !spin_is_locked(&fs_info->balance_lock)); + + if (!bctl) + return 0; + + if (flags & BTRFS_BLOCK_GROUP_DATA && + bctl->data.flags & BTRFS_BALANCE_ARGS_CONVERT) { + target = BTRFS_BLOCK_GROUP_DATA | bctl->data.target; + } else if (flags & BTRFS_BLOCK_GROUP_SYSTEM && + bctl->sys.flags & BTRFS_BALANCE_ARGS_CONVERT) { + target = BTRFS_BLOCK_GROUP_SYSTEM | bctl->sys.target; + } else if (flags & BTRFS_BLOCK_GROUP_METADATA && + bctl->meta.flags & BTRFS_BALANCE_ARGS_CONVERT) { + target = BTRFS_BLOCK_GROUP_METADATA | bctl->meta.target; + } + + return target; +} + /* * @flags: available profiles in extended format (see ctree.h) * @@ -3125,31 +3154,19 @@ u64 btrfs_reduce_alloc_profile(struct btrfs_root *root, u64 flags) */ u64 num_devices = root->fs_info->fs_devices->rw_devices + root->fs_info->fs_devices->missing_devices; + u64 target; - /* pick restriper's target profile if it's available */ + /* + * see if restripe for this chunk_type is in progress, if so + * try to reduce to the target profile + */ spin_lock(&root->fs_info->balance_lock); - if (root->fs_info->balance_ctl) { - struct btrfs_balance_control *bctl = root->fs_info->balance_ctl; - u64 tgt = 0; - - if ((flags & BTRFS_BLOCK_GROUP_DATA) && - (bctl->data.flags & BTRFS_BALANCE_ARGS_CONVERT) && - (flags & bctl->data.target)) { - tgt = BTRFS_BLOCK_GROUP_DATA | bctl->data.target; - } else if ((flags & BTRFS_BLOCK_GROUP_SYSTEM) && - (bctl->sys.flags & BTRFS_BALANCE_ARGS_CONVERT) && - (flags & bctl->sys.target)) { - tgt = BTRFS_BLOCK_GROUP_SYSTEM | bctl->sys.target; - } else if ((flags & BTRFS_BLOCK_GROUP_METADATA) && - (bctl->meta.flags & BTRFS_BALANCE_ARGS_CONVERT) && - (flags & bctl->meta.target)) { - tgt = BTRFS_BLOCK_GROUP_METADATA | bctl->meta.target; - } - - if (tgt) { + target = get_restripe_target(root->fs_info, flags); + if (target) { + /* pick target profile only if it's already available */ + if ((flags & target) & BTRFS_EXTENDED_PROFILE_MASK) { spin_unlock(&root->fs_info->balance_lock); - flags = tgt; - goto out; + return extended_to_chunk(target); } } spin_unlock(&root->fs_info->balance_lock); @@ -3177,7 +3194,6 @@ u64 btrfs_reduce_alloc_profile(struct btrfs_root *root, u64 flags) flags &= ~BTRFS_BLOCK_GROUP_RAID0; } -out: return extended_to_chunk(flags); } @@ -6888,28 +6904,15 @@ int btrfs_drop_subtree(struct btrfs_trans_handle *trans, static u64 update_block_group_flags(struct btrfs_root *root, u64 flags) { u64 num_devices; - u64 stripped = BTRFS_BLOCK_GROUP_RAID0 | - BTRFS_BLOCK_GROUP_RAID1 | BTRFS_BLOCK_GROUP_RAID10; - - if (root->fs_info->balance_ctl) { - struct btrfs_balance_control *bctl = root->fs_info->balance_ctl; - u64 tgt = 0; - - /* pick restriper's target profile and return */ - if (flags & BTRFS_BLOCK_GROUP_DATA && - bctl->data.flags & BTRFS_BALANCE_ARGS_CONVERT) { - tgt = BTRFS_BLOCK_GROUP_DATA | bctl->data.target; - } else if (flags & BTRFS_BLOCK_GROUP_SYSTEM && - bctl->sys.flags & BTRFS_BALANCE_ARGS_CONVERT) { - tgt = BTRFS_BLOCK_GROUP_SYSTEM | bctl->sys.target; - } else if (flags & BTRFS_BLOCK_GROUP_METADATA && - bctl->meta.flags & BTRFS_BALANCE_ARGS_CONVERT) { - tgt = BTRFS_BLOCK_GROUP_METADATA | bctl->meta.target; - } + u64 stripped; - if (tgt) - return extended_to_chunk(tgt); - } + /* + * if restripe for this chunk_type is on pick target profile and + * return, otherwise do the usual balance + */ + stripped = get_restripe_target(root->fs_info, flags); + if (stripped) + return extended_to_chunk(stripped); /* * we add in the count of missing devices because we want @@ -6919,6 +6922,9 @@ static u64 update_block_group_flags(struct btrfs_root *root, u64 flags) num_devices = root->fs_info->fs_devices->rw_devices + root->fs_info->fs_devices->missing_devices; + stripped = BTRFS_BLOCK_GROUP_RAID0 | + BTRFS_BLOCK_GROUP_RAID1 | BTRFS_BLOCK_GROUP_RAID10; + if (num_devices == 1) { stripped |= BTRFS_BLOCK_GROUP_DUP; stripped = flags & ~stripped; -- cgit v1.2.3 From 7738a53a3a3aa8d82350280ff4bc7df9c3094123 Mon Sep 17 00:00:00 2001 From: Ilya Dryomov Date: Tue, 27 Mar 2012 17:09:17 +0300 Subject: Btrfs: add __get_block_group_index() helper Add __get_block_group_index() helper to be able to derive block group index from an arbitary set of flags. Implement get_block_group_index() in terms of it. Signed-off-by: Ilya Dryomov --- fs/btrfs/extent-tree.c | 17 ++++++++++++----- 1 file changed, 12 insertions(+), 5 deletions(-) (limited to 'fs/btrfs/extent-tree.c') diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index faf52e030315..c44aa9680789 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -5248,22 +5248,29 @@ wait_block_group_cache_done(struct btrfs_block_group_cache *cache) return 0; } -static int get_block_group_index(struct btrfs_block_group_cache *cache) +static int __get_block_group_index(u64 flags) { int index; - if (cache->flags & BTRFS_BLOCK_GROUP_RAID10) + + if (flags & BTRFS_BLOCK_GROUP_RAID10) index = 0; - else if (cache->flags & BTRFS_BLOCK_GROUP_RAID1) + else if (flags & BTRFS_BLOCK_GROUP_RAID1) index = 1; - else if (cache->flags & BTRFS_BLOCK_GROUP_DUP) + else if (flags & BTRFS_BLOCK_GROUP_DUP) index = 2; - else if (cache->flags & BTRFS_BLOCK_GROUP_RAID0) + else if (flags & BTRFS_BLOCK_GROUP_RAID0) index = 3; else index = 4; + return index; } +static int get_block_group_index(struct btrfs_block_group_cache *cache) +{ + return __get_block_group_index(cache->flags); +} + enum btrfs_loop_type { LOOP_CACHING_NOWAIT = 0, LOOP_CACHING_WAIT = 1, -- cgit v1.2.3 From 4a5e98f5d61f698452e564e0cde34c16a6b65752 Mon Sep 17 00:00:00 2001 From: Ilya Dryomov Date: Tue, 27 Mar 2012 17:09:17 +0300 Subject: Btrfs: improve the logic in btrfs_can_relocate() Currently if we don't have enough space allocated we go ahead and loop though devices in the hopes of finding enough space for a chunk of the *same* type as the one we are trying to relocate. The problem with that is that if we are trying to restripe the chunk its target type can be more relaxed than the current one (eg require less devices or less space). So, when restriping, run checks against the target profile instead of the current one. Signed-off-by: Ilya Dryomov --- fs/btrfs/extent-tree.c | 24 ++++++++++++++++++------ 1 file changed, 18 insertions(+), 6 deletions(-) (limited to 'fs/btrfs/extent-tree.c') diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index c44aa9680789..9454045f091a 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -7136,6 +7136,7 @@ int btrfs_can_relocate(struct btrfs_root *root, u64 bytenr) u64 min_free; u64 dev_min = 1; u64 dev_nr = 0; + u64 target; int index; int full = 0; int ret = 0; @@ -7176,13 +7177,11 @@ int btrfs_can_relocate(struct btrfs_root *root, u64 bytenr) /* * ok we don't have enough space, but maybe we have free space on our * devices to allocate new chunks for relocation, so loop through our - * alloc devices and guess if we have enough space. However, if we - * were marked as full, then we know there aren't enough chunks, and we - * can just return. + * alloc devices and guess if we have enough space. if this block + * group is going to be restriped, run checks against the target + * profile instead of the current one. */ ret = -1; - if (full) - goto out; /* * index: @@ -7192,7 +7191,20 @@ int btrfs_can_relocate(struct btrfs_root *root, u64 bytenr) * 3: raid0 * 4: single */ - index = get_block_group_index(block_group); + target = get_restripe_target(root->fs_info, block_group->flags); + if (target) { + index = __get_block_group_index(extended_to_chunk(target)); + } else { + /* + * this is just a balance, so if we were marked as full + * we know there is no space for a new chunk + */ + if (full) + goto out; + + index = get_block_group_index(block_group); + } + if (index == 0) { dev_min = 4; /* Divide by 2 */ -- cgit v1.2.3 From 2bcc0328c3a043880796a602c75fbeb1537aa1e1 Mon Sep 17 00:00:00 2001 From: Liu Bo Date: Thu, 29 Mar 2012 09:57:44 -0400 Subject: Btrfs: show useful info in space reservation tracepoint o For space info, the type of space info is useful for debug. o For transaction handle, its transid is useful. Signed-off-by: Liu Bo Signed-off-by: Chris Mason --- fs/btrfs/extent-tree.c | 29 ++++++++++------------------- 1 file changed, 10 insertions(+), 19 deletions(-) (limited to 'fs/btrfs/extent-tree.c') diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 8fe517bd8521..7c233407beee 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -3363,8 +3363,7 @@ commit_trans: } data_sinfo->bytes_may_use += bytes; trace_btrfs_space_reservation(root->fs_info, "space_info", - (u64)(unsigned long)data_sinfo, - bytes, 1); + data_sinfo->flags, bytes, 1); spin_unlock(&data_sinfo->lock); return 0; @@ -3385,8 +3384,7 @@ void btrfs_free_reserved_data_space(struct inode *inode, u64 bytes) spin_lock(&data_sinfo->lock); data_sinfo->bytes_may_use -= bytes; trace_btrfs_space_reservation(root->fs_info, "space_info", - (u64)(unsigned long)data_sinfo, - bytes, 0); + data_sinfo->flags, bytes, 0); spin_unlock(&data_sinfo->lock); } @@ -3751,9 +3749,7 @@ again: if (used + orig_bytes <= space_info->total_bytes) { space_info->bytes_may_use += orig_bytes; trace_btrfs_space_reservation(root->fs_info, - "space_info", - (u64)(unsigned long)space_info, - orig_bytes, 1); + "space_info", space_info->flags, orig_bytes, 1); ret = 0; } else { /* @@ -3822,9 +3818,7 @@ again: if (used + num_bytes < space_info->total_bytes + avail) { space_info->bytes_may_use += orig_bytes; trace_btrfs_space_reservation(root->fs_info, - "space_info", - (u64)(unsigned long)space_info, - orig_bytes, 1); + "space_info", space_info->flags, orig_bytes, 1); ret = 0; } else { wait_ordered = true; @@ -3970,8 +3964,7 @@ static void block_rsv_release_bytes(struct btrfs_fs_info *fs_info, spin_lock(&space_info->lock); space_info->bytes_may_use -= num_bytes; trace_btrfs_space_reservation(fs_info, "space_info", - (u64)(unsigned long)space_info, - num_bytes, 0); + space_info->flags, num_bytes, 0); space_info->reservation_progress++; spin_unlock(&space_info->lock); } @@ -4189,14 +4182,14 @@ static void update_global_block_rsv(struct btrfs_fs_info *fs_info) block_rsv->reserved += num_bytes; sinfo->bytes_may_use += num_bytes; trace_btrfs_space_reservation(fs_info, "space_info", - (u64)(unsigned long)sinfo, num_bytes, 1); + sinfo->flags, num_bytes, 1); } if (block_rsv->reserved >= block_rsv->size) { num_bytes = block_rsv->reserved - block_rsv->size; sinfo->bytes_may_use -= num_bytes; trace_btrfs_space_reservation(fs_info, "space_info", - (u64)(unsigned long)sinfo, num_bytes, 0); + sinfo->flags, num_bytes, 0); sinfo->reservation_progress++; block_rsv->reserved = block_rsv->size; block_rsv->full = 1; @@ -4250,8 +4243,7 @@ void btrfs_trans_release_metadata(struct btrfs_trans_handle *trans, return; trace_btrfs_space_reservation(root->fs_info, "transaction", - (u64)(unsigned long)trans, - trans->bytes_reserved, 0); + trans->transid, trans->bytes_reserved, 0); btrfs_block_rsv_release(root, trans->block_rsv, trans->bytes_reserved); trans->bytes_reserved = 0; } @@ -4770,9 +4762,8 @@ static int btrfs_update_reserved_bytes(struct btrfs_block_group_cache *cache, space_info->bytes_reserved += num_bytes; if (reserve == RESERVE_ALLOC) { trace_btrfs_space_reservation(cache->fs_info, - "space_info", - (u64)(unsigned long)space_info, - num_bytes, 0); + "space_info", space_info->flags, + num_bytes, 0); space_info->bytes_may_use -= num_bytes; } } -- cgit v1.2.3 From 15d1ff8111aad85d8b40ee396758990d17a2caac Mon Sep 17 00:00:00 2001 From: Liu Bo Date: Thu, 29 Mar 2012 09:57:44 -0400 Subject: Btrfs: fix deadlock during allocating chunks This deadlock comes from xfstests 251. We'll hold the chunk_mutex throughout the whole of a chunk allocation. But if we find that we've used up system chunk space, we need to allocate a new system chunk, but this will lead to a recursion of chunk allocation and end up with a deadlock on chunk_mutex. So instead we need to allocate the system chunk first if we find we're in ENOSPC. Signed-off-by: Liu Bo Signed-off-by: Chris Mason --- fs/btrfs/extent-tree.c | 50 ++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 50 insertions(+) (limited to 'fs/btrfs/extent-tree.c') diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 7c233407beee..a84420491c11 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -3445,6 +3445,50 @@ static int should_alloc_chunk(struct btrfs_root *root, return 1; } +static u64 get_system_chunk_thresh(struct btrfs_root *root, u64 type) +{ + u64 num_dev; + + if (type & BTRFS_BLOCK_GROUP_RAID10 || + type & BTRFS_BLOCK_GROUP_RAID0) + num_dev = root->fs_info->fs_devices->rw_devices; + else if (type & BTRFS_BLOCK_GROUP_RAID1) + num_dev = 2; + else + num_dev = 1; /* DUP or single */ + + /* metadata for updaing devices and chunk tree */ + return btrfs_calc_trans_metadata_size(root, num_dev + 1); +} + +static void check_system_chunk(struct btrfs_trans_handle *trans, + struct btrfs_root *root, u64 type) +{ + struct btrfs_space_info *info; + u64 left; + u64 thresh; + + info = __find_space_info(root->fs_info, BTRFS_BLOCK_GROUP_SYSTEM); + spin_lock(&info->lock); + left = info->total_bytes - info->bytes_used - info->bytes_pinned - + info->bytes_reserved - info->bytes_readonly; + spin_unlock(&info->lock); + + thresh = get_system_chunk_thresh(root, type); + if (left < thresh && btrfs_test_opt(root, ENOSPC_DEBUG)) { + printk(KERN_INFO "left=%llu, need=%llu, flags=%llu\n", + left, thresh, type); + dump_space_info(info, 0, 0); + } + + if (left < thresh) { + u64 flags; + + flags = btrfs_get_alloc_profile(root->fs_info->chunk_root, 0); + btrfs_alloc_chunk(trans, root, flags); + } +} + static int do_chunk_alloc(struct btrfs_trans_handle *trans, struct btrfs_root *extent_root, u64 alloc_bytes, u64 flags, int force) @@ -3515,6 +3559,12 @@ again: force_metadata_allocation(fs_info); } + /* + * Check if we have enough space in SYSTEM chunk because we may need + * to update devices. + */ + check_system_chunk(trans, extent_root, flags); + ret = btrfs_alloc_chunk(trans, extent_root, flags); if (ret < 0 && ret != -ENOSPC) goto out; -- cgit v1.2.3