diff options
| author | Linus Torvalds <torvalds@linux-foundation.org> | 2015-11-06 17:17:13 -0800 |
|---|---|---|
| committer | Linus Torvalds <torvalds@linux-foundation.org> | 2015-11-06 17:17:13 -0800 |
| commit | 27eb427bdc0960ad64b72da03e3596c801e7a9e9 (patch) | |
| tree | 4170a265e99d455ca53d26a19e59330e3277fccd /fs/btrfs/ordered-data.c | |
| parent | 713009809681e5a7871e96e6992692c805b4480b (diff) | |
| parent | 2959a32a858a2c44bbbce83d19c158d54cc5998a (diff) | |
Merge branch 'for-linus-4.4' of git://git.kernel.org/pub/scm/linux/kernel/git/mason/linux-btrfs
Pull btrfs updates from Chris Mason:
"We have a lot of subvolume quota improvements in here, along with big
piles of cleanups from Dave Sterba and Anand Jain and others.
Josef pitched in a batch of allocator fixes based on production use
here at FB. We found that mount -o ssd_spread greatly improved our
performance on hardware raid5/6, but it exposed some CPU bottlenecks
in the allocator. These patches make a huge difference"
* 'for-linus-4.4' of git://git.kernel.org/pub/scm/linux/kernel/git/mason/linux-btrfs: (100 commits)
Btrfs: fix hole punching when using the no-holes feature
Btrfs: find_free_extent: Do not erroneously skip LOOP_CACHING_WAIT state
btrfs: Fix a data space underflow warning
btrfs: qgroup: Fix a rebase bug which will cause qgroup double free
btrfs: qgroup: Fix a race in delayed_ref which leads to abort trans
btrfs: clear PF_NOFREEZE in cleaner_kthread()
btrfs: qgroup: Don't copy extent buffer to do qgroup rescan
btrfs: add balance filters limits, stripes and usage to supported mask
btrfs: extend balance filter usage to take minimum and maximum
btrfs: add balance filter for stripes
btrfs: extend balance filter limit to take minimum and maximum
btrfs: fix use after free iterating extrefs
btrfs: check unsupported filters in balance arguments
Btrfs: fix regression running delayed references when using qgroups
Btrfs: fix regression when running delayed references
Btrfs: don't do extra bitmap search in one bit case
Btrfs: keep track of largest extent in bitmaps
Btrfs: don't keep trying to build clusters if we are fragmented
Btrfs: cut down on loops through the allocator
Btrfs: don't continue setting up space cache when enospc
...
Diffstat (limited to 'fs/btrfs/ordered-data.c')
| -rw-r--r-- | fs/btrfs/ordered-data.c | 70 |
1 files changed, 56 insertions, 14 deletions
diff --git a/fs/btrfs/ordered-data.c b/fs/btrfs/ordered-data.c index 52170cf1757e..8c27292ea9ea 100644 --- a/fs/btrfs/ordered-data.c +++ b/fs/btrfs/ordered-data.c @@ -345,6 +345,9 @@ int btrfs_dec_test_first_ordered_pending(struct inode *inode, if (entry->bytes_left == 0) { ret = test_and_set_bit(BTRFS_ORDERED_IO_DONE, &entry->flags); + /* + * Implicit memory barrier after test_and_set_bit + */ if (waitqueue_active(&entry->wait)) wake_up(&entry->wait); } else { @@ -409,6 +412,9 @@ have_entry: if (entry->bytes_left == 0) { ret = test_and_set_bit(BTRFS_ORDERED_IO_DONE, &entry->flags); + /* + * Implicit memory barrier after test_and_set_bit + */ if (waitqueue_active(&entry->wait)) wake_up(&entry->wait); } else { @@ -484,15 +490,16 @@ void btrfs_wait_logged_extents(struct btrfs_trans_handle *trans, spin_lock_irq(&log->log_extents_lock[index]); while (!list_empty(&log->logged_list[index])) { + struct inode *inode; ordered = list_first_entry(&log->logged_list[index], struct btrfs_ordered_extent, log_list); list_del_init(&ordered->log_list); + inode = ordered->inode; spin_unlock_irq(&log->log_extents_lock[index]); if (!test_bit(BTRFS_ORDERED_IO_DONE, &ordered->flags) && !test_bit(BTRFS_ORDERED_DIRECT, &ordered->flags)) { - struct inode *inode = ordered->inode; u64 start = ordered->file_offset; u64 end = ordered->file_offset + ordered->len - 1; @@ -503,20 +510,25 @@ void btrfs_wait_logged_extents(struct btrfs_trans_handle *trans, &ordered->flags)); /* - * If our ordered extent completed it means it updated the - * fs/subvol and csum trees already, so no need to make the - * current transaction's commit wait for it, as we end up - * holding memory unnecessarily and delaying the inode's iput - * until the transaction commit (we schedule an iput for the - * inode when the ordered extent's refcount drops to 0), which - * prevents it from being evictable until the transaction - * commits. + * In order to keep us from losing our ordered extent + * information when committing the transaction we have to make + * sure that any logged extents are completed when we go to + * commit the transaction. To do this we simply increase the + * current transactions pending_ordered counter and decrement it + * when the ordered extent completes. */ - if (test_bit(BTRFS_ORDERED_COMPLETE, &ordered->flags)) - btrfs_put_ordered_extent(ordered); - else - list_add_tail(&ordered->trans_list, &trans->ordered); - + if (!test_bit(BTRFS_ORDERED_COMPLETE, &ordered->flags)) { + struct btrfs_ordered_inode_tree *tree; + + tree = &BTRFS_I(inode)->ordered_tree; + spin_lock_irq(&tree->lock); + if (!test_bit(BTRFS_ORDERED_COMPLETE, &ordered->flags)) { + set_bit(BTRFS_ORDERED_PENDING, &ordered->flags); + atomic_inc(&trans->transaction->pending_ordered); + } + spin_unlock_irq(&tree->lock); + } + btrfs_put_ordered_extent(ordered); spin_lock_irq(&log->log_extents_lock[index]); } spin_unlock_irq(&log->log_extents_lock[index]); @@ -578,6 +590,7 @@ void btrfs_remove_ordered_extent(struct inode *inode, struct btrfs_ordered_inode_tree *tree; struct btrfs_root *root = BTRFS_I(inode)->root; struct rb_node *node; + bool dec_pending_ordered = false; tree = &BTRFS_I(inode)->ordered_tree; spin_lock_irq(&tree->lock); @@ -587,8 +600,37 @@ void btrfs_remove_ordered_extent(struct inode *inode, if (tree->last == node) tree->last = NULL; set_bit(BTRFS_ORDERED_COMPLETE, &entry->flags); + if (test_and_clear_bit(BTRFS_ORDERED_PENDING, &entry->flags)) + dec_pending_ordered = true; spin_unlock_irq(&tree->lock); + /* + * The current running transaction is waiting on us, we need to let it + * know that we're complete and wake it up. + */ + if (dec_pending_ordered) { + struct btrfs_transaction *trans; + + /* + * The checks for trans are just a formality, it should be set, + * but if it isn't we don't want to deref/assert under the spin + * lock, so be nice and check if trans is set, but ASSERT() so + * if it isn't set a developer will notice. + */ + spin_lock(&root->fs_info->trans_lock); + trans = root->fs_info->running_transaction; + if (trans) + atomic_inc(&trans->use_count); + spin_unlock(&root->fs_info->trans_lock); + + ASSERT(trans); + if (trans) { + if (atomic_dec_and_test(&trans->pending_ordered)) + wake_up(&trans->pending_wait); + btrfs_put_transaction(trans); + } + } + spin_lock(&root->ordered_extent_lock); list_del_init(&entry->root_extent_list); root->nr_ordered_extents--; |
