diff options
Diffstat (limited to 'fs/btrfs/disk-io.c')
-rw-r--r-- | fs/btrfs/disk-io.c | 135 |
1 files changed, 46 insertions, 89 deletions
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 85b207d19aa5..78722aaffecd 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -49,6 +49,7 @@ #include "raid56.h" #include "sysfs.h" #include "qgroup.h" +#include "tree-checker.h" #ifdef CONFIG_X86 #include <asm/cpufeature.h> @@ -445,9 +446,9 @@ static int btree_read_extent_buffer_pages(struct btrfs_root *root, int mirror_num = 0; int failed_mirror = 0; - clear_bit(EXTENT_BUFFER_CORRUPT, &eb->bflags); io_tree = &BTRFS_I(root->fs_info->btree_inode)->io_tree; while (1) { + clear_bit(EXTENT_BUFFER_CORRUPT, &eb->bflags); ret = read_extent_buffer_pages(io_tree, eb, start, WAIT_COMPLETE, btree_get_extent, mirror_num); @@ -459,14 +460,6 @@ static int btree_read_extent_buffer_pages(struct btrfs_root *root, ret = -EIO; } - /* - * This buffer's crc is fine, but its contents are corrupted, so - * there is no reason to read the other copies, they won't be - * any less wrong. - */ - if (test_bit(EXTENT_BUFFER_CORRUPT, &eb->bflags)) - break; - num_copies = btrfs_num_copies(root->fs_info, eb->start, eb->len); if (num_copies == 1) @@ -530,72 +523,6 @@ static int check_tree_block_fsid(struct btrfs_fs_info *fs_info, return ret; } -#define CORRUPT(reason, eb, root, slot) \ - btrfs_crit(root->fs_info, "corrupt leaf, %s: block=%llu," \ - "root=%llu, slot=%d", reason, \ - btrfs_header_bytenr(eb), root->objectid, slot) - -static noinline int check_leaf(struct btrfs_root *root, - struct extent_buffer *leaf) -{ - struct btrfs_key key; - struct btrfs_key leaf_key; - u32 nritems = btrfs_header_nritems(leaf); - int slot; - - if (nritems == 0) - return 0; - - /* Check the 0 item */ - if (btrfs_item_offset_nr(leaf, 0) + btrfs_item_size_nr(leaf, 0) != - BTRFS_LEAF_DATA_SIZE(root)) { - CORRUPT("invalid item offset size pair", leaf, root, 0); - return -EIO; - } - - /* - * Check to make sure each items keys are in the correct order and their - * offsets make sense. We only have to loop through nritems-1 because - * we check the current slot against the next slot, which verifies the - * next slot's offset+size makes sense and that the current's slot - * offset is correct. - */ - for (slot = 0; slot < nritems - 1; slot++) { - btrfs_item_key_to_cpu(leaf, &leaf_key, slot); - btrfs_item_key_to_cpu(leaf, &key, slot + 1); - - /* Make sure the keys are in the right order */ - if (btrfs_comp_cpu_keys(&leaf_key, &key) >= 0) { - CORRUPT("bad key order", leaf, root, slot); - return -EIO; - } - - /* - * Make sure the offset and ends are right, remember that the - * item data starts at the end of the leaf and grows towards the - * front. - */ - if (btrfs_item_offset_nr(leaf, slot) != - btrfs_item_end_nr(leaf, slot + 1)) { - CORRUPT("slot offset bad", leaf, root, slot); - return -EIO; - } - - /* - * Check to make sure that we don't point outside of the leaf, - * just incase all the items are consistent to eachother, but - * all point outside of the leaf. - */ - if (btrfs_item_end_nr(leaf, slot) > - BTRFS_LEAF_DATA_SIZE(root)) { - CORRUPT("slot end outside of leaf", leaf, root, slot); - return -EIO; - } - } - - return 0; -} - static int btree_readpage_end_io_hook(struct btrfs_io_bio *io_bio, u64 phy_offset, struct page *page, u64 start, u64 end, int mirror) @@ -662,11 +589,14 @@ static int btree_readpage_end_io_hook(struct btrfs_io_bio *io_bio, * that we don't try and read the other copies of this block, just * return -EIO. */ - if (found_level == 0 && check_leaf(root, eb)) { + if (found_level == 0 && btrfs_check_leaf_full(root, eb)) { set_bit(EXTENT_BUFFER_CORRUPT, &eb->bflags); ret = -EIO; } + if (found_level > 0 && btrfs_check_node(root, eb)) + ret = -EIO; + if (!ret) set_extent_buffer_uptodate(eb); err: @@ -923,7 +853,7 @@ static int check_async_write(struct inode *inode, unsigned long bio_flags) if (bio_flags & EXTENT_BIO_TREE_LOG) return 0; #ifdef CONFIG_X86 - if (cpu_has_xmm4_2) + if (static_cpu_has(X86_FEATURE_XMM4_2)) return 0; #endif return 1; @@ -1011,8 +941,9 @@ static int btree_writepages(struct address_space *mapping, fs_info = BTRFS_I(mapping->host)->root->fs_info; /* this is a bit racy, but that's ok */ - ret = percpu_counter_compare(&fs_info->dirty_metadata_bytes, - BTRFS_DIRTY_METADATA_THRESH); + ret = __percpu_counter_compare(&fs_info->dirty_metadata_bytes, + BTRFS_DIRTY_METADATA_THRESH, + fs_info->dirty_metadata_batch); if (ret < 0) return 0; } @@ -1196,7 +1127,7 @@ static struct btrfs_subvolume_writers *btrfs_alloc_subvolume_writers(void) if (!writers) return ERR_PTR(-ENOMEM); - ret = percpu_counter_init(&writers->counter, 0, GFP_KERNEL); + ret = percpu_counter_init(&writers->counter, 0, GFP_NOFS); if (ret < 0) { kfree(writers); return ERR_PTR(ret); @@ -1607,8 +1538,8 @@ fail: return ret; } -static struct btrfs_root *btrfs_lookup_fs_root(struct btrfs_fs_info *fs_info, - u64 root_id) +struct btrfs_root *btrfs_lookup_fs_root(struct btrfs_fs_info *fs_info, + u64 root_id) { struct btrfs_root *root; @@ -3965,7 +3896,13 @@ void btrfs_mark_buffer_dirty(struct extent_buffer *buf) buf->len, root->fs_info->dirty_metadata_batch); #ifdef CONFIG_BTRFS_FS_CHECK_INTEGRITY - if (btrfs_header_level(buf) == 0 && check_leaf(root, buf)) { + /* + * Since btrfs_mark_buffer_dirty() can be called with item pointer set + * but item data not updated. + * So here we should only check item pointers, not item data. + */ + if (btrfs_header_level(buf) == 0 && + btrfs_check_leaf_relaxed(root, buf)) { btrfs_print_leaf(root, buf); ASSERT(0); } @@ -3987,8 +3924,9 @@ static void __btrfs_btree_balance_dirty(struct btrfs_root *root, if (flush_delayed) btrfs_balance_delayed_items(root); - ret = percpu_counter_compare(&root->fs_info->dirty_metadata_bytes, - BTRFS_DIRTY_METADATA_THRESH); + ret = __percpu_counter_compare(&root->fs_info->dirty_metadata_bytes, + BTRFS_DIRTY_METADATA_THRESH, + root->fs_info->dirty_metadata_batch); if (ret > 0) { balance_dirty_pages_ratelimited( root->fs_info->btree_inode->i_mapping); @@ -4173,6 +4111,14 @@ static void btrfs_destroy_all_ordered_extents(struct btrfs_fs_info *fs_info) spin_lock(&fs_info->ordered_root_lock); } spin_unlock(&fs_info->ordered_root_lock); + + /* + * We need this here because if we've been flipped read-only we won't + * get sync() from the umount, so we need to make sure any ordered + * extents that haven't had their dirty pages IO start writeout yet + * actually get run and error out properly. + */ + btrfs_wait_ordered_roots(fs_info, -1); } static int btrfs_destroy_delayed_refs(struct btrfs_transaction *trans, @@ -4331,6 +4277,7 @@ static int btrfs_destroy_marked_extents(struct btrfs_root *root, static int btrfs_destroy_pinned_extent(struct btrfs_root *root, struct extent_io_tree *pinned_extents) { + struct btrfs_fs_info *fs_info = root->fs_info; struct extent_io_tree *unpin; u64 start; u64 end; @@ -4340,21 +4287,31 @@ static int btrfs_destroy_pinned_extent(struct btrfs_root *root, unpin = pinned_extents; again: while (1) { + /* + * The btrfs_finish_extent_commit() may get the same range as + * ours between find_first_extent_bit and clear_extent_dirty. + * Hence, hold the unused_bg_unpin_mutex to avoid double unpin + * the same extent range. + */ + mutex_lock(&fs_info->unused_bg_unpin_mutex); ret = find_first_extent_bit(unpin, 0, &start, &end, EXTENT_DIRTY, NULL); - if (ret) + if (ret) { + mutex_unlock(&fs_info->unused_bg_unpin_mutex); break; + } clear_extent_dirty(unpin, start, end, GFP_NOFS); btrfs_error_unpin_extent_range(root, start, end); + mutex_unlock(&fs_info->unused_bg_unpin_mutex); cond_resched(); } if (loop) { - if (unpin == &root->fs_info->freed_extents[0]) - unpin = &root->fs_info->freed_extents[1]; + if (unpin == &fs_info->freed_extents[0]) + unpin = &fs_info->freed_extents[1]; else - unpin = &root->fs_info->freed_extents[0]; + unpin = &fs_info->freed_extents[0]; loop = false; goto again; } |