From 33879d4512c021ae65be9706608dacb36b4687b1 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Sat, 23 Nov 2013 22:33:32 -0800 Subject: block: submit_bio_wait() conversions It was being open coded in a few places. Signed-off-by: Kent Overstreet Cc: Jens Axboe Cc: Joern Engel Cc: Prasad Joshi Cc: Neil Brown Cc: Chris Mason Acked-by: NeilBrown --- fs/btrfs/extent_io.c | 12 +----------- 1 file changed, 1 insertion(+), 11 deletions(-) (limited to 'fs/btrfs/extent_io.c') diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c index 8e457fca0a0b..ff43802a7c88 100644 --- a/fs/btrfs/extent_io.c +++ b/fs/btrfs/extent_io.c @@ -1952,11 +1952,6 @@ static int free_io_failure(struct inode *inode, struct io_failure_record *rec, return err; } -static void repair_io_failure_callback(struct bio *bio, int err) -{ - complete(bio->bi_private); -} - /* * this bypasses the standard btrfs submit functions deliberately, as * the standard behavior is to write all copies in a raid setup. here we only @@ -1973,7 +1968,6 @@ int repair_io_failure(struct btrfs_fs_info *fs_info, u64 start, { struct bio *bio; struct btrfs_device *dev; - DECLARE_COMPLETION_ONSTACK(compl); u64 map_length = 0; u64 sector; struct btrfs_bio *bbio = NULL; @@ -1990,8 +1984,6 @@ int repair_io_failure(struct btrfs_fs_info *fs_info, u64 start, bio = btrfs_io_bio_alloc(GFP_NOFS, 1); if (!bio) return -EIO; - bio->bi_private = &compl; - bio->bi_end_io = repair_io_failure_callback; bio->bi_size = 0; map_length = length; @@ -2012,10 +2004,8 @@ int repair_io_failure(struct btrfs_fs_info *fs_info, u64 start, } bio->bi_bdev = dev->bdev; bio_add_page(bio, page, length, start - page_offset(page)); - btrfsic_submit_bio(WRITE_SYNC, bio); - wait_for_completion(&compl); - if (!test_bit(BIO_UPTODATE, &bio->bi_flags)) { + if (btrfsic_submit_bio_wait(WRITE_SYNC, bio)) { /* try to remap that extent elsewhere? */ bio_put(bio); btrfs_dev_stat_inc_and_print(dev, BTRFS_DEV_STAT_WRITE_ERRS); -- cgit v1.2.3 From 2c30c71bd653afcbed7f6754e8fe3d16e0e708a1 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Thu, 7 Nov 2013 12:20:26 -0800 Subject: block: Convert various code to bio_for_each_segment() With immutable biovecs we don't want code accessing bi_io_vec directly - the uses this patch changes weren't incorrect since they all own the bio, but it makes the code harder to audit for no good reason - also, this will help with multipage bvecs later. Signed-off-by: Kent Overstreet Cc: Jens Axboe Cc: Alexander Viro Cc: Chris Mason Cc: Jaegeuk Kim Cc: Joern Engel Cc: Prasad Joshi Cc: Trond Myklebust --- fs/btrfs/extent_io.c | 35 +++++++++++++---------------------- 1 file changed, 13 insertions(+), 22 deletions(-) (limited to 'fs/btrfs/extent_io.c') diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c index ff43802a7c88..8b5f9e1d1f0e 100644 --- a/fs/btrfs/extent_io.c +++ b/fs/btrfs/extent_io.c @@ -2332,12 +2332,13 @@ int end_extent_writepage(struct page *page, int err, u64 start, u64 end) */ static void end_bio_extent_writepage(struct bio *bio, int err) { - struct bio_vec *bvec = bio->bi_io_vec + bio->bi_vcnt - 1; + struct bio_vec *bvec; struct extent_io_tree *tree; u64 start; u64 end; + int i; - do { + bio_for_each_segment_all(bvec, bio, i) { struct page *page = bvec->bv_page; tree = &BTRFS_I(page->mapping->host)->io_tree; @@ -2355,14 +2356,11 @@ static void end_bio_extent_writepage(struct bio *bio, int err) start = page_offset(page); end = start + bvec->bv_offset + bvec->bv_len - 1; - if (--bvec >= bio->bi_io_vec) - prefetchw(&bvec->bv_page->flags); - if (end_extent_writepage(page, err, start, end)) continue; end_page_writeback(page); - } while (bvec >= bio->bi_io_vec); + } bio_put(bio); } @@ -2392,9 +2390,8 @@ endio_readpage_release_extent(struct extent_io_tree *tree, u64 start, u64 len, */ static void end_bio_extent_readpage(struct bio *bio, int err) { + struct bio_vec *bvec; int uptodate = test_bit(BIO_UPTODATE, &bio->bi_flags); - struct bio_vec *bvec_end = bio->bi_io_vec + bio->bi_vcnt - 1; - struct bio_vec *bvec = bio->bi_io_vec; struct btrfs_io_bio *io_bio = btrfs_io_bio(bio); struct extent_io_tree *tree; u64 offset = 0; @@ -2405,11 +2402,12 @@ static void end_bio_extent_readpage(struct bio *bio, int err) u64 extent_len = 0; int mirror; int ret; + int i; if (err) uptodate = 0; - do { + bio_for_each_segment_all(bvec, bio, i) { struct page *page = bvec->bv_page; struct inode *inode = page->mapping->host; @@ -2433,9 +2431,6 @@ static void end_bio_extent_readpage(struct bio *bio, int err) end = start + bvec->bv_offset + bvec->bv_len - 1; len = bvec->bv_len; - if (++bvec <= bvec_end) - prefetchw(&bvec->bv_page->flags); - mirror = io_bio->mirror_num; if (likely(uptodate && tree->ops && tree->ops->readpage_end_io_hook)) { @@ -2516,7 +2511,7 @@ readpage_ok: extent_start = start; extent_len = end + 1 - start; } - } while (bvec <= bvec_end); + } if (extent_len) endio_readpage_release_extent(tree, extent_start, extent_len, @@ -2547,7 +2542,6 @@ btrfs_bio_alloc(struct block_device *bdev, u64 first_sector, int nr_vecs, } if (bio) { - bio->bi_size = 0; bio->bi_bdev = bdev; bio->bi_sector = first_sector; btrfs_bio = btrfs_io_bio(bio); @@ -3410,20 +3404,18 @@ static void end_extent_buffer_writeback(struct extent_buffer *eb) static void end_bio_extent_buffer_writepage(struct bio *bio, int err) { - int uptodate = err == 0; - struct bio_vec *bvec = bio->bi_io_vec + bio->bi_vcnt - 1; + struct bio_vec *bvec; struct extent_buffer *eb; - int done; + int i, done; - do { + bio_for_each_segment_all(bvec, bio, i) { struct page *page = bvec->bv_page; - bvec--; eb = (struct extent_buffer *)page->private; BUG_ON(!eb); done = atomic_dec_and_test(&eb->io_pages); - if (!uptodate || test_bit(EXTENT_BUFFER_IOERR, &eb->bflags)) { + if (err || test_bit(EXTENT_BUFFER_IOERR, &eb->bflags)) { set_bit(EXTENT_BUFFER_IOERR, &eb->bflags); ClearPageUptodate(page); SetPageError(page); @@ -3435,10 +3427,9 @@ static void end_bio_extent_buffer_writepage(struct bio *bio, int err) continue; end_extent_buffer_writeback(eb); - } while (bvec >= bio->bi_io_vec); + } bio_put(bio); - } static int write_one_eb(struct extent_buffer *eb, -- cgit v1.2.3 From 4f024f3797c43cb4b73cd2c50cec728842d0e49e Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Fri, 11 Oct 2013 15:44:27 -0700 Subject: block: Abstract out bvec iterator MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Immutable biovecs are going to require an explicit iterator. To implement immutable bvecs, a later patch is going to add a bi_bvec_done member to this struct; for now, this patch effectively just renames things. Signed-off-by: Kent Overstreet Cc: Jens Axboe Cc: Geert Uytterhoeven Cc: Benjamin Herrenschmidt Cc: Paul Mackerras Cc: "Ed L. Cashin" Cc: Nick Piggin Cc: Lars Ellenberg Cc: Jiri Kosina Cc: Matthew Wilcox Cc: Geoff Levand Cc: Yehuda Sadeh Cc: Sage Weil Cc: Alex Elder Cc: ceph-devel@vger.kernel.org Cc: Joshua Morris Cc: Philip Kelleher Cc: Rusty Russell Cc: "Michael S. Tsirkin" Cc: Konrad Rzeszutek Wilk Cc: Jeremy Fitzhardinge Cc: Neil Brown Cc: Alasdair Kergon Cc: Mike Snitzer Cc: dm-devel@redhat.com Cc: Martin Schwidefsky Cc: Heiko Carstens Cc: linux390@de.ibm.com Cc: Boaz Harrosh Cc: Benny Halevy Cc: "James E.J. Bottomley" Cc: Greg Kroah-Hartman Cc: "Nicholas A. Bellinger" Cc: Alexander Viro Cc: Chris Mason Cc: "Theodore Ts'o" Cc: Andreas Dilger Cc: Jaegeuk Kim Cc: Steven Whitehouse Cc: Dave Kleikamp Cc: Joern Engel Cc: Prasad Joshi Cc: Trond Myklebust Cc: KONISHI Ryusuke Cc: Mark Fasheh Cc: Joel Becker Cc: Ben Myers Cc: xfs@oss.sgi.com Cc: Steven Rostedt Cc: Frederic Weisbecker Cc: Ingo Molnar Cc: Len Brown Cc: Pavel Machek Cc: "Rafael J. Wysocki" Cc: Herton Ronaldo Krzesinski Cc: Ben Hutchings Cc: Andrew Morton Cc: Guo Chao Cc: Tejun Heo Cc: Asai Thambi S P Cc: Selvan Mani Cc: Sam Bradshaw Cc: Wei Yongjun Cc: "Roger Pau Monné" Cc: Jan Beulich Cc: Stefano Stabellini Cc: Ian Campbell Cc: Sebastian Ott Cc: Christian Borntraeger Cc: Minchan Kim Cc: Jiang Liu Cc: Nitin Gupta Cc: Jerome Marchand Cc: Joe Perches Cc: Peng Tao Cc: Andy Adamson Cc: fanchaoting Cc: Jie Liu Cc: Sunil Mushran Cc: "Martin K. Petersen" Cc: Namjae Jeon Cc: Pankaj Kumar Cc: Dan Magenheimer Cc: Mel Gorman 6 --- fs/btrfs/extent_io.c | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) (limited to 'fs/btrfs/extent_io.c') diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c index 8b5f9e1d1f0e..bcb6f1b780d6 100644 --- a/fs/btrfs/extent_io.c +++ b/fs/btrfs/extent_io.c @@ -1984,7 +1984,7 @@ int repair_io_failure(struct btrfs_fs_info *fs_info, u64 start, bio = btrfs_io_bio_alloc(GFP_NOFS, 1); if (!bio) return -EIO; - bio->bi_size = 0; + bio->bi_iter.bi_size = 0; map_length = length; ret = btrfs_map_block(fs_info, WRITE, logical, @@ -1995,7 +1995,7 @@ int repair_io_failure(struct btrfs_fs_info *fs_info, u64 start, } BUG_ON(mirror_num != bbio->mirror_num); sector = bbio->stripes[mirror_num-1].physical >> 9; - bio->bi_sector = sector; + bio->bi_iter.bi_sector = sector; dev = bbio->stripes[mirror_num-1].dev; kfree(bbio); if (!dev || !dev->bdev || !dev->writeable) { @@ -2268,9 +2268,9 @@ static int bio_readpage_error(struct bio *failed_bio, u64 phy_offset, return -EIO; } bio->bi_end_io = failed_bio->bi_end_io; - bio->bi_sector = failrec->logical >> 9; + bio->bi_iter.bi_sector = failrec->logical >> 9; bio->bi_bdev = BTRFS_I(inode)->root->fs_info->fs_devices->latest_bdev; - bio->bi_size = 0; + bio->bi_iter.bi_size = 0; btrfs_failed_bio = btrfs_io_bio(failed_bio); if (btrfs_failed_bio->csum) { @@ -2412,7 +2412,7 @@ static void end_bio_extent_readpage(struct bio *bio, int err) struct inode *inode = page->mapping->host; pr_debug("end_bio_extent_readpage: bi_sector=%llu, err=%d, " - "mirror=%lu\n", (u64)bio->bi_sector, err, + "mirror=%lu\n", (u64)bio->bi_iter.bi_sector, err, io_bio->mirror_num); tree = &BTRFS_I(inode)->io_tree; @@ -2543,7 +2543,7 @@ btrfs_bio_alloc(struct block_device *bdev, u64 first_sector, int nr_vecs, if (bio) { bio->bi_bdev = bdev; - bio->bi_sector = first_sector; + bio->bi_iter.bi_sector = first_sector; btrfs_bio = btrfs_io_bio(bio); btrfs_bio->csum = NULL; btrfs_bio->csum_allocated = NULL; @@ -2637,7 +2637,7 @@ static int submit_extent_page(int rw, struct extent_io_tree *tree, if (bio_ret && *bio_ret) { bio = *bio_ret; if (old_compressed) - contig = bio->bi_sector == sector; + contig = bio->bi_iter.bi_sector == sector; else contig = bio_end_sector(bio) == sector; -- cgit v1.2.3 From 50892bac3b93afa5cc8de6541a8013a21bef3f74 Mon Sep 17 00:00:00 2001 From: Valentina Giusti Date: Mon, 4 Nov 2013 22:34:25 +0100 Subject: btrfs: remove unused variables from extent_io.c Remove unused variables: * tree from end_bio_extent_writepage, * item from extent_fiemap. Signed-off-by: Valentina Giusti Signed-off-by: Josef Bacik Signed-off-by: Chris Mason --- fs/btrfs/extent_io.c | 7 ------- 1 file changed, 7 deletions(-) (limited to 'fs/btrfs/extent_io.c') diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c index ff43802a7c88..d97250a4e8e6 100644 --- a/fs/btrfs/extent_io.c +++ b/fs/btrfs/extent_io.c @@ -2333,13 +2333,11 @@ int end_extent_writepage(struct page *page, int err, u64 start, u64 end) static void end_bio_extent_writepage(struct bio *bio, int err) { struct bio_vec *bvec = bio->bi_io_vec + bio->bi_vcnt - 1; - struct extent_io_tree *tree; u64 start; u64 end; do { struct page *page = bvec->bv_page; - tree = &BTRFS_I(page->mapping->host)->io_tree; /* We always issue full-page reads, but if some block * in a page fails to read, blk_update_request() will @@ -4082,12 +4080,10 @@ int extent_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, struct extent_map *em = NULL; struct extent_state *cached_state = NULL; struct btrfs_path *path; - struct btrfs_file_extent_item *item; int end = 0; u64 em_start = 0; u64 em_len = 0; u64 em_end = 0; - unsigned long emflags; if (len == 0) return -EINVAL; @@ -4112,8 +4108,6 @@ int extent_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, } WARN_ON(!ret); path->slots[0]--; - item = btrfs_item_ptr(path->nodes[0], path->slots[0], - struct btrfs_file_extent_item); btrfs_item_key_to_cpu(path->nodes[0], &found_key, path->slots[0]); found_type = btrfs_key_type(&found_key); @@ -4181,7 +4175,6 @@ int extent_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, offset_in_extent = em_start - em->start; em_end = extent_map_end(em); em_len = em_end - em_start; - emflags = em->flags; disko = 0; flags = 0; -- cgit v1.2.3 From 68ba990f7d161c31e9eddd98727ba8393089047f Mon Sep 17 00:00:00 2001 From: Filipe David Borba Manana Date: Mon, 25 Nov 2013 03:22:07 +0000 Subject: Btrfs: fix extent boundary check in bio_readpage_error Signed-off-by: Filipe David Borba Manana Signed-off-by: Josef Bacik Signed-off-by: Chris Mason --- fs/btrfs/extent_io.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs/btrfs/extent_io.c') diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c index d97250a4e8e6..3721820687d7 100644 --- a/fs/btrfs/extent_io.c +++ b/fs/btrfs/extent_io.c @@ -2156,7 +2156,7 @@ static int bio_readpage_error(struct bio *failed_bio, u64 phy_offset, return -EIO; } - if (em->start > start || em->start + em->len < start) { + if (em->start > start || em->start + em->len <= start) { free_extent_map(em); em = NULL; } -- cgit v1.2.3 From c42ac0bc9530d51029b938e09b60b5ee86e5ee70 Mon Sep 17 00:00:00 2001 From: Filipe David Borba Manana Date: Tue, 26 Nov 2013 15:01:34 +0000 Subject: Btrfs: add missing extent state caching calls When we didn't find a matching extent state, we inserted a new one but didn't cache it in the **cached_state parameter, which makes a subsequent call do a tree lookup to get it. Signed-off-by: Filipe David Borba Manana Signed-off-by: Josef Bacik Signed-off-by: Chris Mason --- fs/btrfs/extent_io.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'fs/btrfs/extent_io.c') diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c index 3721820687d7..01a141245862 100644 --- a/fs/btrfs/extent_io.c +++ b/fs/btrfs/extent_io.c @@ -817,6 +817,7 @@ again: if (err) extent_io_tree_panic(tree, err); + cache_state(prealloc, cached_state); prealloc = NULL; goto out; } @@ -1040,9 +1041,10 @@ again: goto out; } err = insert_state(tree, prealloc, start, end, &bits); - prealloc = NULL; if (err) extent_io_tree_panic(tree, err); + cache_state(prealloc, cached_state); + prealloc = NULL; goto out; } state = rb_entry(node, struct extent_state, rb_node); -- cgit v1.2.3 From 12cfbad90e02793b7a71b7591ebd5c3f9228dc5d Mon Sep 17 00:00:00 2001 From: Filipe David Borba Manana Date: Tue, 26 Nov 2013 15:41:47 +0000 Subject: Btrfs: more efficient extent state insertions Currently we do 2 traversals of an inode's extent_io_tree before inserting an extent state structure: 1 to see if a matching extent state already exists and 1 to do the insertion if the fist traversal didn't found such extent state. This change just combines those tree traversals into a single one. While running sysbench tests (random writes) I captured the number of elements in extent_io_tree trees for a while (into a procfs file backed by a seq_list from seq_file module) and got this histogram: Count: 9310 Range: 51.000 - 21386.000; Mean: 11785.243; Median: 18743.500; Stddev: 8923.688 Percentiles: 90th: 20985.000; 95th: 21155.000; 99th: 21369.000 51.000 - 93.933: 693 ######## 93.933 - 172.314: 938 ########## 172.314 - 315.408: 856 ######### 315.408 - 576.646: 95 # 576.646 - 6415.830: 888 ########## 6415.830 - 11713.809: 1024 ########### 11713.809 - 21386.000: 4816 ##################################################### So traversing such trees can take some significant time that can easily be avoided. Ran the following sysbench tests, 5 times each, for sequential and random writes, and got the following results: sysbench --test=fileio --file-num=1 --file-total-size=2G \ --file-test-mode=seqwr --num-threads=16 --file-block-size=65536 \ --max-requests=0 --max-time=60 --file-io-mode=sync sysbench --test=fileio --file-num=1 --file-total-size=2G \ --file-test-mode=rndwr --num-threads=16 --file-block-size=65536 \ --max-requests=0 --max-time=60 --file-io-mode=sync Before this change: sequential writes: 69.28Mb/sec (average of 5 runs) random writes: 4.14Mb/sec (average of 5 runs) After this change: sequential writes: 69.91Mb/sec (average of 5 runs) random writes: 5.69Mb/sec (average of 5 runs) Signed-off-by: Filipe David Borba Manana Signed-off-by: Josef Bacik Signed-off-by: Chris Mason --- fs/btrfs/extent_io.c | 76 +++++++++++++++++++++++++++++++++++++--------------- 1 file changed, 55 insertions(+), 21 deletions(-) (limited to 'fs/btrfs/extent_io.c') diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c index 01a141245862..5fc9481515f3 100644 --- a/fs/btrfs/extent_io.c +++ b/fs/btrfs/extent_io.c @@ -224,12 +224,20 @@ void free_extent_state(struct extent_state *state) } static struct rb_node *tree_insert(struct rb_root *root, u64 offset, - struct rb_node *node) + struct rb_node *node, + struct rb_node ***p_in, + struct rb_node **parent_in) { struct rb_node **p = &root->rb_node; struct rb_node *parent = NULL; struct tree_entry *entry; + if (p_in && parent_in) { + p = *p_in; + parent = *parent_in; + goto do_insert; + } + while (*p) { parent = *p; entry = rb_entry(parent, struct tree_entry, rb_node); @@ -242,35 +250,43 @@ static struct rb_node *tree_insert(struct rb_root *root, u64 offset, return parent; } +do_insert: rb_link_node(node, parent, p); rb_insert_color(node, root); return NULL; } static struct rb_node *__etree_search(struct extent_io_tree *tree, u64 offset, - struct rb_node **prev_ret, - struct rb_node **next_ret) + struct rb_node **prev_ret, + struct rb_node **next_ret, + struct rb_node ***p_ret, + struct rb_node **parent_ret) { struct rb_root *root = &tree->state; - struct rb_node *n = root->rb_node; + struct rb_node **n = &root->rb_node; struct rb_node *prev = NULL; struct rb_node *orig_prev = NULL; struct tree_entry *entry; struct tree_entry *prev_entry = NULL; - while (n) { - entry = rb_entry(n, struct tree_entry, rb_node); - prev = n; + while (*n) { + prev = *n; + entry = rb_entry(prev, struct tree_entry, rb_node); prev_entry = entry; if (offset < entry->start) - n = n->rb_left; + n = &(*n)->rb_left; else if (offset > entry->end) - n = n->rb_right; + n = &(*n)->rb_right; else - return n; + return *n; } + if (p_ret) + *p_ret = n; + if (parent_ret) + *parent_ret = prev; + if (prev_ret) { orig_prev = prev; while (prev && offset > prev_entry->end) { @@ -292,18 +308,27 @@ static struct rb_node *__etree_search(struct extent_io_tree *tree, u64 offset, return NULL; } -static inline struct rb_node *tree_search(struct extent_io_tree *tree, - u64 offset) +static inline struct rb_node * +tree_search_for_insert(struct extent_io_tree *tree, + u64 offset, + struct rb_node ***p_ret, + struct rb_node **parent_ret) { struct rb_node *prev = NULL; struct rb_node *ret; - ret = __etree_search(tree, offset, &prev, NULL); + ret = __etree_search(tree, offset, &prev, NULL, p_ret, parent_ret); if (!ret) return prev; return ret; } +static inline struct rb_node *tree_search(struct extent_io_tree *tree, + u64 offset) +{ + return tree_search_for_insert(tree, offset, NULL, NULL); +} + static void merge_cb(struct extent_io_tree *tree, struct extent_state *new, struct extent_state *other) { @@ -385,6 +410,8 @@ static void set_state_bits(struct extent_io_tree *tree, */ static int insert_state(struct extent_io_tree *tree, struct extent_state *state, u64 start, u64 end, + struct rb_node ***p, + struct rb_node **parent, unsigned long *bits) { struct rb_node *node; @@ -397,7 +424,7 @@ static int insert_state(struct extent_io_tree *tree, set_state_bits(tree, state, bits); - node = tree_insert(&tree->state, end, &state->rb_node); + node = tree_insert(&tree->state, end, &state->rb_node, p, parent); if (node) { struct extent_state *found; found = rb_entry(node, struct extent_state, rb_node); @@ -444,7 +471,8 @@ static int split_state(struct extent_io_tree *tree, struct extent_state *orig, prealloc->state = orig->state; orig->start = split; - node = tree_insert(&tree->state, prealloc->end, &prealloc->rb_node); + node = tree_insert(&tree->state, prealloc->end, &prealloc->rb_node, + NULL, NULL); if (node) { free_extent_state(prealloc); return -EEXIST; @@ -783,6 +811,8 @@ __set_extent_bit(struct extent_io_tree *tree, u64 start, u64 end, struct extent_state *state; struct extent_state *prealloc = NULL; struct rb_node *node; + struct rb_node **p; + struct rb_node *parent; int err = 0; u64 last_start; u64 last_end; @@ -809,11 +839,12 @@ again: * this search will find all the extents that end after * our range starts. */ - node = tree_search(tree, start); + node = tree_search_for_insert(tree, start, &p, &parent); if (!node) { prealloc = alloc_extent_state_atomic(prealloc); BUG_ON(!prealloc); - err = insert_state(tree, prealloc, start, end, &bits); + err = insert_state(tree, prealloc, start, end, + &p, &parent, &bits); if (err) extent_io_tree_panic(tree, err); @@ -920,7 +951,7 @@ hit_next: * the later extent. */ err = insert_state(tree, prealloc, start, this_end, - &bits); + NULL, NULL, &bits); if (err) extent_io_tree_panic(tree, err); @@ -1006,6 +1037,8 @@ int convert_extent_bit(struct extent_io_tree *tree, u64 start, u64 end, struct extent_state *state; struct extent_state *prealloc = NULL; struct rb_node *node; + struct rb_node **p; + struct rb_node *parent; int err = 0; u64 last_start; u64 last_end; @@ -1033,14 +1066,15 @@ again: * this search will find all the extents that end after * our range starts. */ - node = tree_search(tree, start); + node = tree_search_for_insert(tree, start, &p, &parent); if (!node) { prealloc = alloc_extent_state_atomic(prealloc); if (!prealloc) { err = -ENOMEM; goto out; } - err = insert_state(tree, prealloc, start, end, &bits); + err = insert_state(tree, prealloc, start, end, + &p, &parent, &bits); if (err) extent_io_tree_panic(tree, err); cache_state(prealloc, cached_state); @@ -1137,7 +1171,7 @@ hit_next: * the later extent. */ err = insert_state(tree, prealloc, start, this_end, - &bits); + NULL, NULL, &bits); if (err) extent_io_tree_panic(tree, err); cache_state(prealloc, cached_state); -- cgit v1.2.3 From a5dee37d390f3713f06e286a33b262f0fdb2b0ff Mon Sep 17 00:00:00 2001 From: Josef Bacik Date: Fri, 13 Dec 2013 10:02:44 -0500 Subject: Btrfs: deal with io_tree->mapping being NULL I need to add infrastructure to allocate dummy extent buffers for running sanity tests, and to do this I need to not have to worry about having an address_mapping for an io_tree, so just fix up the places where we assume that all io_tree's have a non-NULL ->mapping. Thanks, Signed-off-by: Josef Bacik Signed-off-by: Chris Mason --- fs/btrfs/extent_io.c | 24 ++++++++++++++++-------- 1 file changed, 16 insertions(+), 8 deletions(-) (limited to 'fs/btrfs/extent_io.c') diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c index 5fc9481515f3..1d8244d39b66 100644 --- a/fs/btrfs/extent_io.c +++ b/fs/btrfs/extent_io.c @@ -77,13 +77,19 @@ void btrfs_leak_debug_check(void) } } -#define btrfs_debug_check_extent_io_range(inode, start, end) \ - __btrfs_debug_check_extent_io_range(__func__, (inode), (start), (end)) +#define btrfs_debug_check_extent_io_range(tree, start, end) \ + __btrfs_debug_check_extent_io_range(__func__, (tree), (start), (end)) static inline void __btrfs_debug_check_extent_io_range(const char *caller, - struct inode *inode, u64 start, u64 end) + struct extent_io_tree *tree, u64 start, u64 end) { - u64 isize = i_size_read(inode); + struct inode *inode; + u64 isize; + if (!tree->mapping) + return; + + inode = tree->mapping->host; + isize = i_size_read(inode); if (end >= PAGE_SIZE && (end % 2) == 0 && end != isize - 1) { printk_ratelimited(KERN_DEBUG "btrfs: %s: ino %llu isize %llu odd range [%llu,%llu]\n", @@ -124,6 +130,8 @@ static noinline void flush_write_bio(void *data); static inline struct btrfs_fs_info * tree_fs_info(struct extent_io_tree *tree) { + if (!tree->mapping) + return NULL; return btrfs_sb(tree->mapping->host->i_sb); } @@ -570,7 +578,7 @@ int clear_extent_bit(struct extent_io_tree *tree, u64 start, u64 end, int err; int clear = 0; - btrfs_debug_check_extent_io_range(tree->mapping->host, start, end); + btrfs_debug_check_extent_io_range(tree, start, end); if (bits & EXTENT_DELALLOC) bits |= EXTENT_NORESERVE; @@ -730,7 +738,7 @@ static void wait_extent_bit(struct extent_io_tree *tree, u64 start, u64 end, struct extent_state *state; struct rb_node *node; - btrfs_debug_check_extent_io_range(tree->mapping->host, start, end); + btrfs_debug_check_extent_io_range(tree, start, end); spin_lock(&tree->lock); again: @@ -817,7 +825,7 @@ __set_extent_bit(struct extent_io_tree *tree, u64 start, u64 end, u64 last_start; u64 last_end; - btrfs_debug_check_extent_io_range(tree->mapping->host, start, end); + btrfs_debug_check_extent_io_range(tree, start, end); bits |= EXTENT_FIRST_DELALLOC; again: @@ -1043,7 +1051,7 @@ int convert_extent_bit(struct extent_io_tree *tree, u64 start, u64 end, u64 last_start; u64 last_end; - btrfs_debug_check_extent_io_range(tree->mapping->host, start, end); + btrfs_debug_check_extent_io_range(tree, start, end); again: if (!prealloc && (mask & __GFP_WAIT)) { -- cgit v1.2.3 From 34b41acec1ccc06373ec584de19618d48ceb09fc Mon Sep 17 00:00:00 2001 From: Josef Bacik Date: Fri, 13 Dec 2013 10:41:51 -0500 Subject: Btrfs: use a bit to track if we're in the radix tree For creating a dummy in-memory btree I need to be able to use the radix tree to keep track of the buffers like normal extent buffers. With dummy buffers we skip the radix tree step, and we still want to do that for the tree mod log dummy buffers but for my test buffers we need to be able to remove them from the radix tree like normal. This will give me a way to do that. Thanks, Signed-off-by: Josef Bacik Signed-off-by: Chris Mason --- fs/btrfs/extent_io.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) (limited to 'fs/btrfs/extent_io.c') diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c index 1d8244d39b66..92a3ad4656a6 100644 --- a/fs/btrfs/extent_io.c +++ b/fs/btrfs/extent_io.c @@ -4617,6 +4617,7 @@ again: } /* add one reference for the tree */ check_buffer_tree_ref(eb); + set_bit(EXTENT_BUFFER_IN_TREE, &eb->bflags); /* * there is a race where release page may have @@ -4660,9 +4661,7 @@ static int release_extent_buffer(struct extent_buffer *eb) { WARN_ON(atomic_read(&eb->refs) == 0); if (atomic_dec_and_test(&eb->refs)) { - if (test_bit(EXTENT_BUFFER_DUMMY, &eb->bflags)) { - spin_unlock(&eb->refs_lock); - } else { + if (test_and_clear_bit(EXTENT_BUFFER_IN_TREE, &eb->bflags)) { struct extent_io_tree *tree = eb->tree; spin_unlock(&eb->refs_lock); @@ -4671,6 +4670,8 @@ static int release_extent_buffer(struct extent_buffer *eb) radix_tree_delete(&tree->buffer, eb->start >> PAGE_CACHE_SHIFT); spin_unlock(&tree->buffer_lock); + } else { + spin_unlock(&eb->refs_lock); } /* Should be safe to release our pages at this point */ -- cgit v1.2.3 From f28491e0a6c46d99cbbef0f8ef7e314afa2359c8 Mon Sep 17 00:00:00 2001 From: Josef Bacik Date: Mon, 16 Dec 2013 13:24:27 -0500 Subject: Btrfs: move the extent buffer radix tree into the fs_info I need to create a fake tree to test qgroups and I don't want to have to setup a fake btree_inode. The fact is we only use the radix tree for the fs_info, so everybody else who allocates an extent_io_tree is just wasting the space anyway. This patch moves the radix tree and its lock into btrfs_fs_info so there is less stuff I have to fake to do qgroup sanity tests. Thanks, Signed-off-by: Josef Bacik Signed-off-by: Chris Mason --- fs/btrfs/extent_io.c | 47 +++++++++++++++++++++++------------------------ 1 file changed, 23 insertions(+), 24 deletions(-) (limited to 'fs/btrfs/extent_io.c') diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c index 92a3ad4656a6..2fa23b57d8df 100644 --- a/fs/btrfs/extent_io.c +++ b/fs/btrfs/extent_io.c @@ -194,11 +194,9 @@ void extent_io_tree_init(struct extent_io_tree *tree, struct address_space *mapping) { tree->state = RB_ROOT; - INIT_RADIX_TREE(&tree->buffer, GFP_ATOMIC); tree->ops = NULL; tree->dirty_bytes = 0; spin_lock_init(&tree->lock); - spin_lock_init(&tree->buffer_lock); tree->mapping = mapping; } @@ -3489,6 +3487,7 @@ static int write_one_eb(struct extent_buffer *eb, struct extent_page_data *epd) { struct block_device *bdev = fs_info->fs_devices->latest_bdev; + struct extent_io_tree *tree = &BTRFS_I(fs_info->btree_inode)->io_tree; u64 offset = eb->start; unsigned long i, num_pages; unsigned long bio_flags = 0; @@ -3506,7 +3505,7 @@ static int write_one_eb(struct extent_buffer *eb, clear_page_dirty_for_io(p); set_page_writeback(p); - ret = submit_extent_page(rw, eb->tree, p, offset >> 9, + ret = submit_extent_page(rw, tree, p, offset >> 9, PAGE_CACHE_SIZE, 0, bdev, &epd->bio, -1, end_bio_extent_buffer_writepage, 0, epd->bio_flags, bio_flags); @@ -4370,10 +4369,9 @@ static inline void btrfs_release_extent_buffer(struct extent_buffer *eb) __free_extent_buffer(eb); } -static struct extent_buffer *__alloc_extent_buffer(struct extent_io_tree *tree, - u64 start, - unsigned long len, - gfp_t mask) +static struct extent_buffer * +__alloc_extent_buffer(struct btrfs_fs_info *fs_info, u64 start, + unsigned long len, gfp_t mask) { struct extent_buffer *eb = NULL; @@ -4382,7 +4380,7 @@ static struct extent_buffer *__alloc_extent_buffer(struct extent_io_tree *tree, return NULL; eb->start = start; eb->len = len; - eb->tree = tree; + eb->fs_info = fs_info; eb->bflags = 0; rwlock_init(&eb->lock); atomic_set(&eb->write_locks, 0); @@ -4514,13 +4512,14 @@ static void mark_extent_buffer_accessed(struct extent_buffer *eb) } } -struct extent_buffer *find_extent_buffer(struct extent_io_tree *tree, - u64 start) +struct extent_buffer *find_extent_buffer(struct btrfs_fs_info *fs_info, + u64 start) { struct extent_buffer *eb; rcu_read_lock(); - eb = radix_tree_lookup(&tree->buffer, start >> PAGE_CACHE_SHIFT); + eb = radix_tree_lookup(&fs_info->buffer_radix, + start >> PAGE_CACHE_SHIFT); if (eb && atomic_inc_not_zero(&eb->refs)) { rcu_read_unlock(); mark_extent_buffer_accessed(eb); @@ -4531,7 +4530,7 @@ struct extent_buffer *find_extent_buffer(struct extent_io_tree *tree, return NULL; } -struct extent_buffer *alloc_extent_buffer(struct extent_io_tree *tree, +struct extent_buffer *alloc_extent_buffer(struct btrfs_fs_info *fs_info, u64 start, unsigned long len) { unsigned long num_pages = num_extent_pages(start, len); @@ -4540,16 +4539,15 @@ struct extent_buffer *alloc_extent_buffer(struct extent_io_tree *tree, struct extent_buffer *eb; struct extent_buffer *exists = NULL; struct page *p; - struct address_space *mapping = tree->mapping; + struct address_space *mapping = fs_info->btree_inode->i_mapping; int uptodate = 1; int ret; - - eb = find_extent_buffer(tree, start); + eb = find_extent_buffer(fs_info, start); if (eb) return eb; - eb = __alloc_extent_buffer(tree, start, len, GFP_NOFS); + eb = __alloc_extent_buffer(fs_info, start, len, GFP_NOFS); if (!eb) return NULL; @@ -4604,12 +4602,13 @@ again: if (ret) goto free_eb; - spin_lock(&tree->buffer_lock); - ret = radix_tree_insert(&tree->buffer, start >> PAGE_CACHE_SHIFT, eb); - spin_unlock(&tree->buffer_lock); + spin_lock(&fs_info->buffer_lock); + ret = radix_tree_insert(&fs_info->buffer_radix, + start >> PAGE_CACHE_SHIFT, eb); + spin_unlock(&fs_info->buffer_lock); radix_tree_preload_end(); if (ret == -EEXIST) { - exists = find_extent_buffer(tree, start); + exists = find_extent_buffer(fs_info, start); if (exists) goto free_eb; else @@ -4662,14 +4661,14 @@ static int release_extent_buffer(struct extent_buffer *eb) WARN_ON(atomic_read(&eb->refs) == 0); if (atomic_dec_and_test(&eb->refs)) { if (test_and_clear_bit(EXTENT_BUFFER_IN_TREE, &eb->bflags)) { - struct extent_io_tree *tree = eb->tree; + struct btrfs_fs_info *fs_info = eb->fs_info; spin_unlock(&eb->refs_lock); - spin_lock(&tree->buffer_lock); - radix_tree_delete(&tree->buffer, + spin_lock(&fs_info->buffer_lock); + radix_tree_delete(&fs_info->buffer_radix, eb->start >> PAGE_CACHE_SHIFT); - spin_unlock(&tree->buffer_lock); + spin_unlock(&fs_info->buffer_lock); } else { spin_unlock(&eb->refs_lock); } -- cgit v1.2.3 From efe120a067c8674a8ae21b194f0e68f098b61ee2 Mon Sep 17 00:00:00 2001 From: Frank Holton Date: Fri, 20 Dec 2013 11:37:06 -0500 Subject: Btrfs: convert printk to btrfs_ and fix BTRFS prefix Convert all applicable cases of printk and pr_* to the btrfs_* macros. Fix all uses of the BTRFS prefix. Signed-off-by: Frank Holton Signed-off-by: Josef Bacik Signed-off-by: Chris Mason --- fs/btrfs/extent_io.c | 61 +++++++++++++++++++++++++++++++--------------------- 1 file changed, 37 insertions(+), 24 deletions(-) (limited to 'fs/btrfs/extent_io.c') diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c index 2fa23b57d8df..fbe501d3bd01 100644 --- a/fs/btrfs/extent_io.c +++ b/fs/btrfs/extent_io.c @@ -59,7 +59,7 @@ void btrfs_leak_debug_check(void) while (!list_empty(&states)) { state = list_entry(states.next, struct extent_state, leak_list); - printk(KERN_ERR "btrfs state leak: start %llu end %llu " + printk(KERN_ERR "BTRFS: state leak: start %llu end %llu " "state %lu in tree %p refs %d\n", state->start, state->end, state->state, state->tree, atomic_read(&state->refs)); @@ -69,7 +69,7 @@ void btrfs_leak_debug_check(void) while (!list_empty(&buffers)) { eb = list_entry(buffers.next, struct extent_buffer, leak_list); - printk(KERN_ERR "btrfs buffer leak start %llu len %lu " + printk(KERN_ERR "BTRFS: buffer leak start %llu len %lu " "refs %d\n", eb->start, eb->len, atomic_read(&eb->refs)); list_del(&eb->leak_list); @@ -92,7 +92,7 @@ static inline void __btrfs_debug_check_extent_io_range(const char *caller, isize = i_size_read(inode); if (end >= PAGE_SIZE && (end % 2) == 0 && end != isize - 1) { printk_ratelimited(KERN_DEBUG - "btrfs: %s: ino %llu isize %llu odd range [%llu,%llu]\n", + "BTRFS: %s: ino %llu isize %llu odd range [%llu,%llu]\n", caller, btrfs_ino(inode), isize, start, end); } } @@ -423,7 +423,7 @@ static int insert_state(struct extent_io_tree *tree, struct rb_node *node; if (end < start) - WARN(1, KERN_ERR "btrfs end < start %llu %llu\n", + WARN(1, KERN_ERR "BTRFS: end < start %llu %llu\n", end, start); state->start = start; state->end = end; @@ -434,7 +434,7 @@ static int insert_state(struct extent_io_tree *tree, if (node) { struct extent_state *found; found = rb_entry(node, struct extent_state, rb_node); - printk(KERN_ERR "btrfs found node %llu %llu on insert of " + printk(KERN_ERR "BTRFS: found node %llu %llu on insert of " "%llu %llu\n", found->start, found->end, start, end); return -EEXIST; @@ -2054,9 +2054,10 @@ int repair_io_failure(struct btrfs_fs_info *fs_info, u64 start, return -EIO; } - printk_ratelimited_in_rcu(KERN_INFO "btrfs read error corrected: ino %lu off %llu " - "(dev %s sector %llu)\n", page->mapping->host->i_ino, - start, rcu_str_deref(dev->name), sector); + printk_ratelimited_in_rcu(KERN_INFO + "BTRFS: read error corrected: ino %lu off %llu " + "(dev %s sector %llu)\n", page->mapping->host->i_ino, + start, rcu_str_deref(dev->name), sector); bio_put(bio); return 0; @@ -2386,11 +2387,17 @@ static void end_bio_extent_writepage(struct bio *bio, int err) * advance bv_offset and adjust bv_len to compensate. * Print a warning for nonzero offsets, and an error * if they don't add up to a full page. */ - if (bvec->bv_offset || bvec->bv_len != PAGE_CACHE_SIZE) - printk("%s page write in btrfs with offset %u and length %u\n", - bvec->bv_offset + bvec->bv_len != PAGE_CACHE_SIZE - ? KERN_ERR "partial" : KERN_INFO "incomplete", - bvec->bv_offset, bvec->bv_len); + if (bvec->bv_offset || bvec->bv_len != PAGE_CACHE_SIZE) { + if (bvec->bv_offset + bvec->bv_len != PAGE_CACHE_SIZE) + btrfs_err(BTRFS_I(page->mapping->host)->root->fs_info, + "partial page write in btrfs with offset %u and length %u", + bvec->bv_offset, bvec->bv_len); + else + btrfs_info(BTRFS_I(page->mapping->host)->root->fs_info, + "incomplete page write in btrfs with offset %u and " + "length %u", + bvec->bv_offset, bvec->bv_len); + } start = page_offset(page); end = start + bvec->bv_offset + bvec->bv_len - 1; @@ -2463,11 +2470,17 @@ static void end_bio_extent_readpage(struct bio *bio, int err) * advance bv_offset and adjust bv_len to compensate. * Print a warning for nonzero offsets, and an error * if they don't add up to a full page. */ - if (bvec->bv_offset || bvec->bv_len != PAGE_CACHE_SIZE) - printk("%s page read in btrfs with offset %u and length %u\n", - bvec->bv_offset + bvec->bv_len != PAGE_CACHE_SIZE - ? KERN_ERR "partial" : KERN_INFO "incomplete", - bvec->bv_offset, bvec->bv_len); + if (bvec->bv_offset || bvec->bv_len != PAGE_CACHE_SIZE) { + if (bvec->bv_offset + bvec->bv_len != PAGE_CACHE_SIZE) + btrfs_err(BTRFS_I(page->mapping->host)->root->fs_info, + "partial page read in btrfs with offset %u and length %u", + bvec->bv_offset, bvec->bv_len); + else + btrfs_info(BTRFS_I(page->mapping->host)->root->fs_info, + "incomplete page read in btrfs with offset %u and " + "length %u", + bvec->bv_offset, bvec->bv_len); + } start = page_offset(page); end = start + bvec->bv_offset + bvec->bv_len - 1; @@ -3327,8 +3340,8 @@ static int __extent_writepage(struct page *page, struct writeback_control *wbc, set_range_writeback(tree, cur, cur + iosize - 1); if (!PageWriteback(page)) { - printk(KERN_ERR "btrfs warning page %lu not " - "writeback, cur %llu end %llu\n", + btrfs_err(BTRFS_I(inode)->root->fs_info, + "page %lu not writeback, cur %llu end %llu", page->index, cur, end); } @@ -5149,12 +5162,12 @@ void memcpy_extent_buffer(struct extent_buffer *dst, unsigned long dst_offset, unsigned long src_i; if (src_offset + len > dst->len) { - printk(KERN_ERR "btrfs memmove bogus src_offset %lu move " + printk(KERN_ERR "BTRFS: memmove bogus src_offset %lu move " "len %lu dst len %lu\n", src_offset, len, dst->len); BUG_ON(1); } if (dst_offset + len > dst->len) { - printk(KERN_ERR "btrfs memmove bogus dst_offset %lu move " + printk(KERN_ERR "BTRFS: memmove bogus dst_offset %lu move " "len %lu dst len %lu\n", dst_offset, len, dst->len); BUG_ON(1); } @@ -5196,12 +5209,12 @@ void memmove_extent_buffer(struct extent_buffer *dst, unsigned long dst_offset, unsigned long src_i; if (src_offset + len > dst->len) { - printk(KERN_ERR "btrfs memmove bogus src_offset %lu move " + printk(KERN_ERR "BTRFS: memmove bogus src_offset %lu move " "len %lu len %lu\n", src_offset, len, dst->len); BUG_ON(1); } if (dst_offset + len > dst->len) { - printk(KERN_ERR "btrfs memmove bogus dst_offset %lu move " + printk(KERN_ERR "BTRFS: memmove bogus dst_offset %lu move " "len %lu len %lu\n", dst_offset, len, dst->len); BUG_ON(1); } -- cgit v1.2.3