diff options
Diffstat (limited to 'fs/btrfs/extent-tree.c')
-rw-r--r-- | fs/btrfs/extent-tree.c | 246 |
1 files changed, 203 insertions, 43 deletions
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 47cdc6f3390b..978bbfed5a2c 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -2342,7 +2342,13 @@ static int run_delayed_tree_ref(struct btrfs_trans_handle *trans, ins.type = BTRFS_EXTENT_ITEM_KEY; } - BUG_ON(node->ref_mod != 1); + if (node->ref_mod != 1) { + btrfs_err(root->fs_info, + "btree block(%llu) has %d references rather than 1: action %d ref_root %llu parent %llu", + node->bytenr, node->ref_mod, node->action, ref_root, + parent); + return -EIO; + } if (node->action == BTRFS_ADD_DELAYED_REF && insert_reserved) { BUG_ON(!extent_op || !extent_op->update_flags); ret = alloc_reserved_tree_block(trans, root, @@ -2520,11 +2526,11 @@ static noinline int __btrfs_run_delayed_refs(struct btrfs_trans_handle *trans, if (ref && ref->seq && btrfs_check_delayed_seq(fs_info, delayed_refs, ref->seq)) { spin_unlock(&locked_ref->lock); - btrfs_delayed_ref_unlock(locked_ref); spin_lock(&delayed_refs->lock); locked_ref->processing = 0; delayed_refs->num_heads_ready++; spin_unlock(&delayed_refs->lock); + btrfs_delayed_ref_unlock(locked_ref); locked_ref = NULL; cond_resched(); count++; @@ -2570,7 +2576,10 @@ static noinline int __btrfs_run_delayed_refs(struct btrfs_trans_handle *trans, */ if (must_insert_reserved) locked_ref->must_insert_reserved = 1; + spin_lock(&delayed_refs->lock); locked_ref->processing = 0; + delayed_refs->num_heads_ready++; + spin_unlock(&delayed_refs->lock); btrfs_debug(fs_info, "run_delayed_extent_op returned %d", ret); btrfs_delayed_ref_unlock(locked_ref); return ret; @@ -3358,13 +3367,6 @@ again: goto again; } - /* We've already setup this transaction, go ahead and exit */ - if (block_group->cache_generation == trans->transid && - i_size_read(inode)) { - dcs = BTRFS_DC_SETUP; - goto out_put; - } - /* * We want to set the generation to 0, that way if anything goes wrong * from here on out we know not to trust this cache when we load up next @@ -3388,6 +3390,13 @@ again: } WARN_ON(ret); + /* We've already setup this transaction, go ahead and exit */ + if (block_group->cache_generation == trans->transid && + i_size_read(inode)) { + dcs = BTRFS_DC_SETUP; + goto out_put; + } + if (i_size_read(inode) > 0) { ret = btrfs_check_trunc_cache_free_space(root, &root->fs_info->global_block_rsv); @@ -3851,6 +3860,7 @@ static int update_space_info(struct btrfs_fs_info *info, u64 flags, info->space_info_kobj, "%s", alloc_name(found->flags)); if (ret) { + percpu_counter_destroy(&found->total_bytes_pinned); kfree(found); return ret; } @@ -4124,7 +4134,7 @@ commit_trans: data_sinfo->flags, bytes, 1); spin_unlock(&data_sinfo->lock); - return ret; + return 0; } /* @@ -4388,6 +4398,7 @@ again: if (wait_for_alloc) { mutex_unlock(&fs_info->chunk_mutex); wait_for_alloc = 0; + cond_resched(); goto again; } @@ -7830,6 +7841,20 @@ btrfs_init_new_buffer(struct btrfs_trans_handle *trans, struct btrfs_root *root, buf = btrfs_find_create_tree_block(root, bytenr); if (!buf) return ERR_PTR(-ENOMEM); + + /* + * Extra safety check in case the extent tree is corrupted and extent + * allocator chooses to use a tree block which is already used and + * locked. + */ + if (buf->lock_owner == current->pid) { + btrfs_err_rl(root->fs_info, +"tree block %llu owner %llu already locked by pid=%d, extent tree corruption detected", + buf->start, btrfs_header_owner(buf), current->pid); + free_extent_buffer(buf); + return ERR_PTR(-EUCLEAN); + } + btrfs_set_header_generation(buf, trans->transid); btrfs_set_buffer_lockdep_class(root->root_key.objectid, buf, level); btrfs_tree_lock(buf); @@ -8486,14 +8511,13 @@ static noinline int do_walk_down(struct btrfs_trans_handle *trans, ret = btrfs_lookup_extent_info(trans, root, bytenr, level - 1, 1, &wc->refs[level - 1], &wc->flags[level - 1]); - if (ret < 0) { - btrfs_tree_unlock(next); - return ret; - } + if (ret < 0) + goto out_unlock; if (unlikely(wc->refs[level - 1] == 0)) { btrfs_err(root->fs_info, "Missing references."); - BUG(); + ret = -EIO; + goto out_unlock; } *lookup_info = 0; @@ -8545,7 +8569,12 @@ static noinline int do_walk_down(struct btrfs_trans_handle *trans, } level--; - BUG_ON(level != btrfs_header_level(next)); + ASSERT(level == btrfs_header_level(next)); + if (level != btrfs_header_level(next)) { + btrfs_err(root->fs_info, "mismatched level"); + ret = -EIO; + goto out_unlock; + } path->nodes[level] = next; path->slots[level] = 0; path->locks[level] = BTRFS_WRITE_LOCK_BLOCKING; @@ -8560,8 +8589,15 @@ skip: if (wc->flags[level] & BTRFS_BLOCK_FLAG_FULL_BACKREF) { parent = path->nodes[level]->start; } else { - BUG_ON(root->root_key.objectid != + ASSERT(root->root_key.objectid == btrfs_header_owner(path->nodes[level])); + if (root->root_key.objectid != + btrfs_header_owner(path->nodes[level])) { + btrfs_err(root->fs_info, + "mismatched block owner"); + ret = -EIO; + goto out_unlock; + } parent = 0; } @@ -8578,12 +8614,18 @@ skip: } ret = btrfs_free_extent(trans, root, bytenr, blocksize, parent, root->root_key.objectid, level - 1, 0); - BUG_ON(ret); /* -ENOMEM */ + if (ret) + goto out_unlock; } + + *lookup_info = 1; + ret = 1; + +out_unlock: btrfs_tree_unlock(next); free_extent_buffer(next); - *lookup_info = 1; - return 1; + + return ret; } /* @@ -8682,15 +8724,14 @@ static noinline int walk_up_proc(struct btrfs_trans_handle *trans, if (eb == root->node) { if (wc->flags[level] & BTRFS_BLOCK_FLAG_FULL_BACKREF) parent = eb->start; - else - BUG_ON(root->root_key.objectid != - btrfs_header_owner(eb)); + else if (root->root_key.objectid != btrfs_header_owner(eb)) + goto owner_mismatch; } else { if (wc->flags[level + 1] & BTRFS_BLOCK_FLAG_FULL_BACKREF) parent = path->nodes[level + 1]->start; - else - BUG_ON(root->root_key.objectid != - btrfs_header_owner(path->nodes[level + 1])); + else if (root->root_key.objectid != + btrfs_header_owner(path->nodes[level + 1])) + goto owner_mismatch; } btrfs_free_tree_block(trans, root, eb, parent, wc->refs[level] == 1); @@ -8698,6 +8739,11 @@ out: wc->refs[level] = 0; wc->flags[level] = 0; return 0; + +owner_mismatch: + btrfs_err_rl(root->fs_info, "unexpected tree owner, have %llu expect %llu", + btrfs_header_owner(eb), root->root_key.objectid); + return -EUCLEAN; } static noinline int walk_down_tree(struct btrfs_trans_handle *trans, @@ -8751,6 +8797,8 @@ static noinline int walk_up_tree(struct btrfs_trans_handle *trans, ret = walk_up_proc(trans, root, path, wc); if (ret > 0) return 0; + if (ret < 0) + return ret; if (path->locks[level]) { btrfs_tree_unlock_rw(path->nodes[level], @@ -9439,6 +9487,8 @@ static int find_first_block_group(struct btrfs_root *root, int ret = 0; struct btrfs_key found_key; struct extent_buffer *leaf; + struct btrfs_block_group_item bg; + u64 flags; int slot; ret = btrfs_search_slot(NULL, root, key, path, 0, 0); @@ -9460,7 +9510,47 @@ static int find_first_block_group(struct btrfs_root *root, if (found_key.objectid >= key->objectid && found_key.type == BTRFS_BLOCK_GROUP_ITEM_KEY) { - ret = 0; + struct extent_map_tree *em_tree; + struct extent_map *em; + + em_tree = &root->fs_info->mapping_tree.map_tree; + read_lock(&em_tree->lock); + em = lookup_extent_mapping(em_tree, found_key.objectid, + found_key.offset); + read_unlock(&em_tree->lock); + if (!em) { + btrfs_err(root->fs_info, + "logical %llu len %llu found bg but no related chunk", + found_key.objectid, found_key.offset); + ret = -ENOENT; + } else if (em->start != found_key.objectid || + em->len != found_key.offset) { + btrfs_err(root->fs_info, + "block group %llu len %llu mismatch with chunk %llu len %llu", + found_key.objectid, found_key.offset, + em->start, em->len); + ret = -EUCLEAN; + } else { + read_extent_buffer(leaf, &bg, + btrfs_item_ptr_offset(leaf, slot), + sizeof(bg)); + flags = btrfs_block_group_flags(&bg) & + BTRFS_BLOCK_GROUP_TYPE_MASK; + + if (flags != (em->map_lookup->type & + BTRFS_BLOCK_GROUP_TYPE_MASK)) { + btrfs_err(root->fs_info, +"block group %llu len %llu type flags 0x%llx mismatch with chunk type flags 0x%llx", + found_key.objectid, + found_key.offset, flags, + (BTRFS_BLOCK_GROUP_TYPE_MASK & + em->map_lookup->type)); + ret = -EUCLEAN; + } else { + ret = 0; + } + } + free_extent_map(em); goto out; } path->slots[0]++; @@ -9479,6 +9569,7 @@ void btrfs_put_block_group_cache(struct btrfs_fs_info *info) block_group = btrfs_lookup_first_block_group(info, last); while (block_group) { + wait_block_group_cache_done(block_group); spin_lock(&block_group->lock); if (block_group->iref) break; @@ -9674,6 +9765,62 @@ btrfs_create_block_group_cache(struct btrfs_root *root, u64 start, u64 size) return cache; } + +/* + * Iterate all chunks and verify that each of them has the corresponding block + * group + */ +static int check_chunk_block_group_mappings(struct btrfs_fs_info *fs_info) +{ + struct btrfs_mapping_tree *map_tree = &fs_info->mapping_tree; + struct extent_map *em; + struct btrfs_block_group_cache *bg; + u64 start = 0; + int ret = 0; + + while (1) { + read_lock(&map_tree->map_tree.lock); + /* + * lookup_extent_mapping will return the first extent map + * intersecting the range, so setting @len to 1 is enough to + * get the first chunk. + */ + em = lookup_extent_mapping(&map_tree->map_tree, start, 1); + read_unlock(&map_tree->map_tree.lock); + if (!em) + break; + + bg = btrfs_lookup_block_group(fs_info, em->start); + if (!bg) { + btrfs_err(fs_info, + "chunk start=%llu len=%llu doesn't have corresponding block group", + em->start, em->len); + ret = -EUCLEAN; + free_extent_map(em); + break; + } + if (bg->key.objectid != em->start || + bg->key.offset != em->len || + (bg->flags & BTRFS_BLOCK_GROUP_TYPE_MASK) != + (em->map_lookup->type & BTRFS_BLOCK_GROUP_TYPE_MASK)) { + btrfs_err(fs_info, +"chunk start=%llu len=%llu flags=0x%llx doesn't match block group start=%llu len=%llu flags=0x%llx", + em->start, em->len, + em->map_lookup->type & BTRFS_BLOCK_GROUP_TYPE_MASK, + bg->key.objectid, bg->key.offset, + bg->flags & BTRFS_BLOCK_GROUP_TYPE_MASK); + ret = -EUCLEAN; + free_extent_map(em); + btrfs_put_block_group(bg); + break; + } + start = em->start + em->len; + free_extent_map(em); + btrfs_put_block_group(bg); + } + return ret; +} + int btrfs_read_block_groups(struct btrfs_root *root) { struct btrfs_path *path; @@ -9686,6 +9833,11 @@ int btrfs_read_block_groups(struct btrfs_root *root) struct extent_buffer *leaf; int need_clear = 0; u64 cache_gen; + u64 feature; + int mixed; + + feature = btrfs_super_incompat_flags(info->super_copy); + mixed = !!(feature & BTRFS_FEATURE_INCOMPAT_MIXED_GROUPS); root = info->extent_root; key.objectid = 0; @@ -9739,6 +9891,15 @@ int btrfs_read_block_groups(struct btrfs_root *root) btrfs_item_ptr_offset(leaf, path->slots[0]), sizeof(cache->item)); cache->flags = btrfs_block_group_flags(&cache->item); + if (!mixed && + ((cache->flags & BTRFS_BLOCK_GROUP_METADATA) && + (cache->flags & BTRFS_BLOCK_GROUP_DATA))) { + btrfs_err(info, +"bg %llu is a mixed block group but filesystem hasn't enabled mixed block groups", + cache->key.objectid); + ret = -EINVAL; + goto error; + } key.objectid = found_key.objectid + found_key.offset; btrfs_release_path(path); @@ -9846,7 +10007,7 @@ int btrfs_read_block_groups(struct btrfs_root *root) } init_global_block_rsv(info); - ret = 0; + ret = check_chunk_block_group_mappings(info); error: btrfs_free_path(path); return ret; @@ -9855,7 +10016,7 @@ error: void btrfs_create_pending_block_groups(struct btrfs_trans_handle *trans, struct btrfs_root *root) { - struct btrfs_block_group_cache *block_group, *tmp; + struct btrfs_block_group_cache *block_group; struct btrfs_root *extent_root = root->fs_info->extent_root; struct btrfs_block_group_item item; struct btrfs_key key; @@ -9863,7 +10024,10 @@ void btrfs_create_pending_block_groups(struct btrfs_trans_handle *trans, bool can_flush_pending_bgs = trans->can_flush_pending_bgs; trans->can_flush_pending_bgs = false; - list_for_each_entry_safe(block_group, tmp, &trans->new_bgs, bg_list) { + while (!list_empty(&trans->new_bgs)) { + block_group = list_first_entry(&trans->new_bgs, + struct btrfs_block_group_cache, + bg_list); if (ret) goto next; @@ -10328,7 +10492,7 @@ btrfs_start_trans_remove_block_group(struct btrfs_fs_info *fs_info, * more device items and remove one chunk item), but this is done at * btrfs_remove_chunk() through a call to check_system_chunk(). */ - map = (struct map_lookup *)em->bdev; + map = em->map_lookup; num_items = 3 + map->num_stripes; free_extent_map(em); @@ -10374,7 +10538,7 @@ void btrfs_delete_unused_bgs(struct btrfs_fs_info *fs_info) /* Don't want to race with allocators so take the groups_sem */ down_write(&space_info->groups_sem); spin_lock(&block_group->lock); - if (block_group->reserved || + if (block_group->reserved || block_group->pinned || btrfs_block_group_used(&block_group->item) || block_group->ro || list_is_singular(&block_group->list)) { @@ -10573,6 +10737,10 @@ static int btrfs_trim_free_extents(struct btrfs_device *device, *trimmed = 0; + /* Discard not supported = nothing to do. */ + if (!blk_queue_discard(bdev_get_queue(device->bdev))) + return 0; + /* Not writeable = nothing to do. */ if (!device->writeable) return 0; @@ -10644,17 +10812,9 @@ int btrfs_trim_fs(struct btrfs_root *root, struct fstrim_range *range) u64 start; u64 end; u64 trimmed = 0; - u64 total_bytes = btrfs_super_total_bytes(fs_info->super_copy); int ret = 0; - /* - * try to trim all FS space, our block group may start from non-zero. - */ - if (range->len == total_bytes) - cache = btrfs_lookup_first_block_group(fs_info, range->start); - else - cache = btrfs_lookup_block_group(fs_info, range->start); - + cache = btrfs_lookup_first_block_group(fs_info, range->start); while (cache) { if (cache->key.objectid >= (range->start + range->len)) { btrfs_put_block_group(cache); @@ -10695,8 +10855,8 @@ int btrfs_trim_fs(struct btrfs_root *root, struct fstrim_range *range) } mutex_lock(&root->fs_info->fs_devices->device_list_mutex); - devices = &root->fs_info->fs_devices->alloc_list; - list_for_each_entry(device, devices, dev_alloc_list) { + devices = &root->fs_info->fs_devices->devices; + list_for_each_entry(device, devices, dev_list) { ret = btrfs_trim_free_extents(device, range->minlen, &group_trimmed); if (ret) |