diff options
Diffstat (limited to 'fs/btrfs/extent_io.c')
-rw-r--r-- | fs/btrfs/extent_io.c | 68 |
1 files changed, 46 insertions, 22 deletions
diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c index 315f21191643..42c745dbccfa 100644 --- a/fs/btrfs/extent_io.c +++ b/fs/btrfs/extent_io.c @@ -3995,6 +3995,10 @@ retry: if (!ret) { free_extent_buffer(eb); continue; + } else if (ret < 0) { + done = 1; + free_extent_buffer(eb); + break; } ret = write_one_eb(eb, fs_info, wbc, &epd); @@ -4153,6 +4157,14 @@ retry: */ scanned = 1; index = 0; + + /* + * If we're looping we could run into a page that is locked by a + * writer and that writer could be waiting on writeback for a + * page in our current bio, and thus deadlock, so flush the + * write bio here. + */ + flush_write_bio(data); goto retry; } btrfs_add_delayed_iput(inode); @@ -4418,6 +4430,8 @@ int try_release_extent_mapping(struct extent_map_tree *map, /* once for us */ free_extent_map(em); + + cond_resched(); /* Allow large-extent preemption. */ } } return try_release_extent_state(map, tree, page, mask); @@ -4842,25 +4856,28 @@ err: static void check_buffer_tree_ref(struct extent_buffer *eb) { int refs; - /* the ref bit is tricky. We have to make sure it is set - * if we have the buffer dirty. Otherwise the - * code to free a buffer can end up dropping a dirty - * page + /* + * The TREE_REF bit is first set when the extent_buffer is added + * to the radix tree. It is also reset, if unset, when a new reference + * is created by find_extent_buffer. * - * Once the ref bit is set, it won't go away while the - * buffer is dirty or in writeback, and it also won't - * go away while we have the reference count on the - * eb bumped. + * It is only cleared in two cases: freeing the last non-tree + * reference to the extent_buffer when its STALE bit is set or + * calling releasepage when the tree reference is the only reference. * - * We can't just set the ref bit without bumping the - * ref on the eb because free_extent_buffer might - * see the ref bit and try to clear it. If this happens - * free_extent_buffer might end up dropping our original - * ref by mistake and freeing the page before we are able - * to add one more ref. + * In both cases, care is taken to ensure that the extent_buffer's + * pages are not under io. However, releasepage can be concurrently + * called with creating new references, which is prone to race + * conditions between the calls to check_buffer_tree_ref in those + * codepaths and clearing TREE_REF in try_release_extent_buffer. * - * So bump the ref count first, then set the bit. If someone - * beat us to it, drop the ref we added. + * The actual lifetime of the extent_buffer in the radix tree is + * adequately protected by the refcount, but the TREE_REF bit and + * its corresponding reference are not. To protect against this + * class of races, we call check_buffer_tree_ref from the codepaths + * which trigger io after they set eb->io_pages. Note that once io is + * initiated, TREE_REF can no longer be cleared, so that is the + * moment at which any such race is best fixed. */ refs = atomic_read(&eb->refs); if (refs >= 2 && test_bit(EXTENT_BUFFER_TREE_REF, &eb->bflags)) @@ -4937,12 +4954,14 @@ struct extent_buffer *alloc_test_extent_buffer(struct btrfs_fs_info *fs_info, return eb; eb = alloc_dummy_extent_buffer(fs_info, start); if (!eb) - return NULL; + return ERR_PTR(-ENOMEM); eb->fs_info = fs_info; again: ret = radix_tree_preload(GFP_NOFS & ~__GFP_HIGHMEM); - if (ret) + if (ret) { + exists = ERR_PTR(ret); goto free_eb; + } spin_lock(&fs_info->buffer_lock); ret = radix_tree_insert(&fs_info->buffer_radix, start >> PAGE_CACHE_SHIFT, eb); @@ -5325,6 +5344,11 @@ int read_extent_buffer_pages(struct extent_io_tree *tree, clear_bit(EXTENT_BUFFER_READ_ERR, &eb->bflags); eb->read_mirror = 0; atomic_set(&eb->io_pages, num_reads); + /* + * It is possible for releasepage to clear the TREE_REF bit before we + * set io_pages. See check_buffer_tree_ref for a more detailed comment. + */ + check_buffer_tree_ref(eb); for (i = start_i; i < num_pages; i++) { page = eb->pages[i]; if (!PageUptodate(page)) { @@ -5400,9 +5424,9 @@ void read_extent_buffer(const struct extent_buffer *eb, void *dstv, } } -int read_extent_buffer_to_user(const struct extent_buffer *eb, - void __user *dstv, - unsigned long start, unsigned long len) +int read_extent_buffer_to_user_nofault(const struct extent_buffer *eb, + void __user *dstv, + unsigned long start, unsigned long len) { size_t cur; size_t offset; @@ -5423,7 +5447,7 @@ int read_extent_buffer_to_user(const struct extent_buffer *eb, cur = min(len, (PAGE_CACHE_SIZE - offset)); kaddr = page_address(page); - if (copy_to_user(dst, kaddr + offset, cur)) { + if (probe_user_write(dst, kaddr + offset, cur)) { ret = -EFAULT; break; } |