summaryrefslogtreecommitdiff
path: root/fs/btrfs/extent-tree.c
diff options
context:
space:
mode:
Diffstat (limited to 'fs/btrfs/extent-tree.c')
-rw-r--r--fs/btrfs/extent-tree.c246
1 files changed, 203 insertions, 43 deletions
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index 47cdc6f3390b..978bbfed5a2c 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -2342,7 +2342,13 @@ static int run_delayed_tree_ref(struct btrfs_trans_handle *trans,
ins.type = BTRFS_EXTENT_ITEM_KEY;
}
- BUG_ON(node->ref_mod != 1);
+ if (node->ref_mod != 1) {
+ btrfs_err(root->fs_info,
+ "btree block(%llu) has %d references rather than 1: action %d ref_root %llu parent %llu",
+ node->bytenr, node->ref_mod, node->action, ref_root,
+ parent);
+ return -EIO;
+ }
if (node->action == BTRFS_ADD_DELAYED_REF && insert_reserved) {
BUG_ON(!extent_op || !extent_op->update_flags);
ret = alloc_reserved_tree_block(trans, root,
@@ -2520,11 +2526,11 @@ static noinline int __btrfs_run_delayed_refs(struct btrfs_trans_handle *trans,
if (ref && ref->seq &&
btrfs_check_delayed_seq(fs_info, delayed_refs, ref->seq)) {
spin_unlock(&locked_ref->lock);
- btrfs_delayed_ref_unlock(locked_ref);
spin_lock(&delayed_refs->lock);
locked_ref->processing = 0;
delayed_refs->num_heads_ready++;
spin_unlock(&delayed_refs->lock);
+ btrfs_delayed_ref_unlock(locked_ref);
locked_ref = NULL;
cond_resched();
count++;
@@ -2570,7 +2576,10 @@ static noinline int __btrfs_run_delayed_refs(struct btrfs_trans_handle *trans,
*/
if (must_insert_reserved)
locked_ref->must_insert_reserved = 1;
+ spin_lock(&delayed_refs->lock);
locked_ref->processing = 0;
+ delayed_refs->num_heads_ready++;
+ spin_unlock(&delayed_refs->lock);
btrfs_debug(fs_info, "run_delayed_extent_op returned %d", ret);
btrfs_delayed_ref_unlock(locked_ref);
return ret;
@@ -3358,13 +3367,6 @@ again:
goto again;
}
- /* We've already setup this transaction, go ahead and exit */
- if (block_group->cache_generation == trans->transid &&
- i_size_read(inode)) {
- dcs = BTRFS_DC_SETUP;
- goto out_put;
- }
-
/*
* We want to set the generation to 0, that way if anything goes wrong
* from here on out we know not to trust this cache when we load up next
@@ -3388,6 +3390,13 @@ again:
}
WARN_ON(ret);
+ /* We've already setup this transaction, go ahead and exit */
+ if (block_group->cache_generation == trans->transid &&
+ i_size_read(inode)) {
+ dcs = BTRFS_DC_SETUP;
+ goto out_put;
+ }
+
if (i_size_read(inode) > 0) {
ret = btrfs_check_trunc_cache_free_space(root,
&root->fs_info->global_block_rsv);
@@ -3851,6 +3860,7 @@ static int update_space_info(struct btrfs_fs_info *info, u64 flags,
info->space_info_kobj, "%s",
alloc_name(found->flags));
if (ret) {
+ percpu_counter_destroy(&found->total_bytes_pinned);
kfree(found);
return ret;
}
@@ -4124,7 +4134,7 @@ commit_trans:
data_sinfo->flags, bytes, 1);
spin_unlock(&data_sinfo->lock);
- return ret;
+ return 0;
}
/*
@@ -4388,6 +4398,7 @@ again:
if (wait_for_alloc) {
mutex_unlock(&fs_info->chunk_mutex);
wait_for_alloc = 0;
+ cond_resched();
goto again;
}
@@ -7830,6 +7841,20 @@ btrfs_init_new_buffer(struct btrfs_trans_handle *trans, struct btrfs_root *root,
buf = btrfs_find_create_tree_block(root, bytenr);
if (!buf)
return ERR_PTR(-ENOMEM);
+
+ /*
+ * Extra safety check in case the extent tree is corrupted and extent
+ * allocator chooses to use a tree block which is already used and
+ * locked.
+ */
+ if (buf->lock_owner == current->pid) {
+ btrfs_err_rl(root->fs_info,
+"tree block %llu owner %llu already locked by pid=%d, extent tree corruption detected",
+ buf->start, btrfs_header_owner(buf), current->pid);
+ free_extent_buffer(buf);
+ return ERR_PTR(-EUCLEAN);
+ }
+
btrfs_set_header_generation(buf, trans->transid);
btrfs_set_buffer_lockdep_class(root->root_key.objectid, buf, level);
btrfs_tree_lock(buf);
@@ -8486,14 +8511,13 @@ static noinline int do_walk_down(struct btrfs_trans_handle *trans,
ret = btrfs_lookup_extent_info(trans, root, bytenr, level - 1, 1,
&wc->refs[level - 1],
&wc->flags[level - 1]);
- if (ret < 0) {
- btrfs_tree_unlock(next);
- return ret;
- }
+ if (ret < 0)
+ goto out_unlock;
if (unlikely(wc->refs[level - 1] == 0)) {
btrfs_err(root->fs_info, "Missing references.");
- BUG();
+ ret = -EIO;
+ goto out_unlock;
}
*lookup_info = 0;
@@ -8545,7 +8569,12 @@ static noinline int do_walk_down(struct btrfs_trans_handle *trans,
}
level--;
- BUG_ON(level != btrfs_header_level(next));
+ ASSERT(level == btrfs_header_level(next));
+ if (level != btrfs_header_level(next)) {
+ btrfs_err(root->fs_info, "mismatched level");
+ ret = -EIO;
+ goto out_unlock;
+ }
path->nodes[level] = next;
path->slots[level] = 0;
path->locks[level] = BTRFS_WRITE_LOCK_BLOCKING;
@@ -8560,8 +8589,15 @@ skip:
if (wc->flags[level] & BTRFS_BLOCK_FLAG_FULL_BACKREF) {
parent = path->nodes[level]->start;
} else {
- BUG_ON(root->root_key.objectid !=
+ ASSERT(root->root_key.objectid ==
btrfs_header_owner(path->nodes[level]));
+ if (root->root_key.objectid !=
+ btrfs_header_owner(path->nodes[level])) {
+ btrfs_err(root->fs_info,
+ "mismatched block owner");
+ ret = -EIO;
+ goto out_unlock;
+ }
parent = 0;
}
@@ -8578,12 +8614,18 @@ skip:
}
ret = btrfs_free_extent(trans, root, bytenr, blocksize, parent,
root->root_key.objectid, level - 1, 0);
- BUG_ON(ret); /* -ENOMEM */
+ if (ret)
+ goto out_unlock;
}
+
+ *lookup_info = 1;
+ ret = 1;
+
+out_unlock:
btrfs_tree_unlock(next);
free_extent_buffer(next);
- *lookup_info = 1;
- return 1;
+
+ return ret;
}
/*
@@ -8682,15 +8724,14 @@ static noinline int walk_up_proc(struct btrfs_trans_handle *trans,
if (eb == root->node) {
if (wc->flags[level] & BTRFS_BLOCK_FLAG_FULL_BACKREF)
parent = eb->start;
- else
- BUG_ON(root->root_key.objectid !=
- btrfs_header_owner(eb));
+ else if (root->root_key.objectid != btrfs_header_owner(eb))
+ goto owner_mismatch;
} else {
if (wc->flags[level + 1] & BTRFS_BLOCK_FLAG_FULL_BACKREF)
parent = path->nodes[level + 1]->start;
- else
- BUG_ON(root->root_key.objectid !=
- btrfs_header_owner(path->nodes[level + 1]));
+ else if (root->root_key.objectid !=
+ btrfs_header_owner(path->nodes[level + 1]))
+ goto owner_mismatch;
}
btrfs_free_tree_block(trans, root, eb, parent, wc->refs[level] == 1);
@@ -8698,6 +8739,11 @@ out:
wc->refs[level] = 0;
wc->flags[level] = 0;
return 0;
+
+owner_mismatch:
+ btrfs_err_rl(root->fs_info, "unexpected tree owner, have %llu expect %llu",
+ btrfs_header_owner(eb), root->root_key.objectid);
+ return -EUCLEAN;
}
static noinline int walk_down_tree(struct btrfs_trans_handle *trans,
@@ -8751,6 +8797,8 @@ static noinline int walk_up_tree(struct btrfs_trans_handle *trans,
ret = walk_up_proc(trans, root, path, wc);
if (ret > 0)
return 0;
+ if (ret < 0)
+ return ret;
if (path->locks[level]) {
btrfs_tree_unlock_rw(path->nodes[level],
@@ -9439,6 +9487,8 @@ static int find_first_block_group(struct btrfs_root *root,
int ret = 0;
struct btrfs_key found_key;
struct extent_buffer *leaf;
+ struct btrfs_block_group_item bg;
+ u64 flags;
int slot;
ret = btrfs_search_slot(NULL, root, key, path, 0, 0);
@@ -9460,7 +9510,47 @@ static int find_first_block_group(struct btrfs_root *root,
if (found_key.objectid >= key->objectid &&
found_key.type == BTRFS_BLOCK_GROUP_ITEM_KEY) {
- ret = 0;
+ struct extent_map_tree *em_tree;
+ struct extent_map *em;
+
+ em_tree = &root->fs_info->mapping_tree.map_tree;
+ read_lock(&em_tree->lock);
+ em = lookup_extent_mapping(em_tree, found_key.objectid,
+ found_key.offset);
+ read_unlock(&em_tree->lock);
+ if (!em) {
+ btrfs_err(root->fs_info,
+ "logical %llu len %llu found bg but no related chunk",
+ found_key.objectid, found_key.offset);
+ ret = -ENOENT;
+ } else if (em->start != found_key.objectid ||
+ em->len != found_key.offset) {
+ btrfs_err(root->fs_info,
+ "block group %llu len %llu mismatch with chunk %llu len %llu",
+ found_key.objectid, found_key.offset,
+ em->start, em->len);
+ ret = -EUCLEAN;
+ } else {
+ read_extent_buffer(leaf, &bg,
+ btrfs_item_ptr_offset(leaf, slot),
+ sizeof(bg));
+ flags = btrfs_block_group_flags(&bg) &
+ BTRFS_BLOCK_GROUP_TYPE_MASK;
+
+ if (flags != (em->map_lookup->type &
+ BTRFS_BLOCK_GROUP_TYPE_MASK)) {
+ btrfs_err(root->fs_info,
+"block group %llu len %llu type flags 0x%llx mismatch with chunk type flags 0x%llx",
+ found_key.objectid,
+ found_key.offset, flags,
+ (BTRFS_BLOCK_GROUP_TYPE_MASK &
+ em->map_lookup->type));
+ ret = -EUCLEAN;
+ } else {
+ ret = 0;
+ }
+ }
+ free_extent_map(em);
goto out;
}
path->slots[0]++;
@@ -9479,6 +9569,7 @@ void btrfs_put_block_group_cache(struct btrfs_fs_info *info)
block_group = btrfs_lookup_first_block_group(info, last);
while (block_group) {
+ wait_block_group_cache_done(block_group);
spin_lock(&block_group->lock);
if (block_group->iref)
break;
@@ -9674,6 +9765,62 @@ btrfs_create_block_group_cache(struct btrfs_root *root, u64 start, u64 size)
return cache;
}
+
+/*
+ * Iterate all chunks and verify that each of them has the corresponding block
+ * group
+ */
+static int check_chunk_block_group_mappings(struct btrfs_fs_info *fs_info)
+{
+ struct btrfs_mapping_tree *map_tree = &fs_info->mapping_tree;
+ struct extent_map *em;
+ struct btrfs_block_group_cache *bg;
+ u64 start = 0;
+ int ret = 0;
+
+ while (1) {
+ read_lock(&map_tree->map_tree.lock);
+ /*
+ * lookup_extent_mapping will return the first extent map
+ * intersecting the range, so setting @len to 1 is enough to
+ * get the first chunk.
+ */
+ em = lookup_extent_mapping(&map_tree->map_tree, start, 1);
+ read_unlock(&map_tree->map_tree.lock);
+ if (!em)
+ break;
+
+ bg = btrfs_lookup_block_group(fs_info, em->start);
+ if (!bg) {
+ btrfs_err(fs_info,
+ "chunk start=%llu len=%llu doesn't have corresponding block group",
+ em->start, em->len);
+ ret = -EUCLEAN;
+ free_extent_map(em);
+ break;
+ }
+ if (bg->key.objectid != em->start ||
+ bg->key.offset != em->len ||
+ (bg->flags & BTRFS_BLOCK_GROUP_TYPE_MASK) !=
+ (em->map_lookup->type & BTRFS_BLOCK_GROUP_TYPE_MASK)) {
+ btrfs_err(fs_info,
+"chunk start=%llu len=%llu flags=0x%llx doesn't match block group start=%llu len=%llu flags=0x%llx",
+ em->start, em->len,
+ em->map_lookup->type & BTRFS_BLOCK_GROUP_TYPE_MASK,
+ bg->key.objectid, bg->key.offset,
+ bg->flags & BTRFS_BLOCK_GROUP_TYPE_MASK);
+ ret = -EUCLEAN;
+ free_extent_map(em);
+ btrfs_put_block_group(bg);
+ break;
+ }
+ start = em->start + em->len;
+ free_extent_map(em);
+ btrfs_put_block_group(bg);
+ }
+ return ret;
+}
+
int btrfs_read_block_groups(struct btrfs_root *root)
{
struct btrfs_path *path;
@@ -9686,6 +9833,11 @@ int btrfs_read_block_groups(struct btrfs_root *root)
struct extent_buffer *leaf;
int need_clear = 0;
u64 cache_gen;
+ u64 feature;
+ int mixed;
+
+ feature = btrfs_super_incompat_flags(info->super_copy);
+ mixed = !!(feature & BTRFS_FEATURE_INCOMPAT_MIXED_GROUPS);
root = info->extent_root;
key.objectid = 0;
@@ -9739,6 +9891,15 @@ int btrfs_read_block_groups(struct btrfs_root *root)
btrfs_item_ptr_offset(leaf, path->slots[0]),
sizeof(cache->item));
cache->flags = btrfs_block_group_flags(&cache->item);
+ if (!mixed &&
+ ((cache->flags & BTRFS_BLOCK_GROUP_METADATA) &&
+ (cache->flags & BTRFS_BLOCK_GROUP_DATA))) {
+ btrfs_err(info,
+"bg %llu is a mixed block group but filesystem hasn't enabled mixed block groups",
+ cache->key.objectid);
+ ret = -EINVAL;
+ goto error;
+ }
key.objectid = found_key.objectid + found_key.offset;
btrfs_release_path(path);
@@ -9846,7 +10007,7 @@ int btrfs_read_block_groups(struct btrfs_root *root)
}
init_global_block_rsv(info);
- ret = 0;
+ ret = check_chunk_block_group_mappings(info);
error:
btrfs_free_path(path);
return ret;
@@ -9855,7 +10016,7 @@ error:
void btrfs_create_pending_block_groups(struct btrfs_trans_handle *trans,
struct btrfs_root *root)
{
- struct btrfs_block_group_cache *block_group, *tmp;
+ struct btrfs_block_group_cache *block_group;
struct btrfs_root *extent_root = root->fs_info->extent_root;
struct btrfs_block_group_item item;
struct btrfs_key key;
@@ -9863,7 +10024,10 @@ void btrfs_create_pending_block_groups(struct btrfs_trans_handle *trans,
bool can_flush_pending_bgs = trans->can_flush_pending_bgs;
trans->can_flush_pending_bgs = false;
- list_for_each_entry_safe(block_group, tmp, &trans->new_bgs, bg_list) {
+ while (!list_empty(&trans->new_bgs)) {
+ block_group = list_first_entry(&trans->new_bgs,
+ struct btrfs_block_group_cache,
+ bg_list);
if (ret)
goto next;
@@ -10328,7 +10492,7 @@ btrfs_start_trans_remove_block_group(struct btrfs_fs_info *fs_info,
* more device items and remove one chunk item), but this is done at
* btrfs_remove_chunk() through a call to check_system_chunk().
*/
- map = (struct map_lookup *)em->bdev;
+ map = em->map_lookup;
num_items = 3 + map->num_stripes;
free_extent_map(em);
@@ -10374,7 +10538,7 @@ void btrfs_delete_unused_bgs(struct btrfs_fs_info *fs_info)
/* Don't want to race with allocators so take the groups_sem */
down_write(&space_info->groups_sem);
spin_lock(&block_group->lock);
- if (block_group->reserved ||
+ if (block_group->reserved || block_group->pinned ||
btrfs_block_group_used(&block_group->item) ||
block_group->ro ||
list_is_singular(&block_group->list)) {
@@ -10573,6 +10737,10 @@ static int btrfs_trim_free_extents(struct btrfs_device *device,
*trimmed = 0;
+ /* Discard not supported = nothing to do. */
+ if (!blk_queue_discard(bdev_get_queue(device->bdev)))
+ return 0;
+
/* Not writeable = nothing to do. */
if (!device->writeable)
return 0;
@@ -10644,17 +10812,9 @@ int btrfs_trim_fs(struct btrfs_root *root, struct fstrim_range *range)
u64 start;
u64 end;
u64 trimmed = 0;
- u64 total_bytes = btrfs_super_total_bytes(fs_info->super_copy);
int ret = 0;
- /*
- * try to trim all FS space, our block group may start from non-zero.
- */
- if (range->len == total_bytes)
- cache = btrfs_lookup_first_block_group(fs_info, range->start);
- else
- cache = btrfs_lookup_block_group(fs_info, range->start);
-
+ cache = btrfs_lookup_first_block_group(fs_info, range->start);
while (cache) {
if (cache->key.objectid >= (range->start + range->len)) {
btrfs_put_block_group(cache);
@@ -10695,8 +10855,8 @@ int btrfs_trim_fs(struct btrfs_root *root, struct fstrim_range *range)
}
mutex_lock(&root->fs_info->fs_devices->device_list_mutex);
- devices = &root->fs_info->fs_devices->alloc_list;
- list_for_each_entry(device, devices, dev_alloc_list) {
+ devices = &root->fs_info->fs_devices->devices;
+ list_for_each_entry(device, devices, dev_list) {
ret = btrfs_trim_free_extents(device, range->minlen,
&group_trimmed);
if (ret)