summaryrefslogtreecommitdiff
path: root/fs/btrfs/tree-log.c
diff options
context:
space:
mode:
Diffstat (limited to 'fs/btrfs/tree-log.c')
-rw-r--r--fs/btrfs/tree-log.c63
1 files changed, 53 insertions, 10 deletions
diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c
index 6ba022ed4a52..c7190f322576 100644
--- a/fs/btrfs/tree-log.c
+++ b/fs/btrfs/tree-log.c
@@ -2223,8 +2223,10 @@ again:
nritems = btrfs_header_nritems(path->nodes[0]);
if (path->slots[0] >= nritems) {
ret = btrfs_next_leaf(root, path);
- if (ret)
+ if (ret == 1)
break;
+ else if (ret < 0)
+ goto out;
}
btrfs_item_key_to_cpu(path->nodes[0], &found_key,
path->slots[0]);
@@ -2959,8 +2961,11 @@ out_wake_log_root:
mutex_unlock(&log_root_tree->log_mutex);
/*
- * The barrier before waitqueue_active is implied by mutex_unlock
+ * The barrier before waitqueue_active is needed so all the updates
+ * above are seen by the woken threads. It might not be necessary, but
+ * proving that seems to be hard.
*/
+ smp_mb();
if (waitqueue_active(&log_root_tree->log_commit_wait[index2]))
wake_up(&log_root_tree->log_commit_wait[index2]);
out:
@@ -2971,8 +2976,11 @@ out:
mutex_unlock(&root->log_mutex);
/*
- * The barrier before waitqueue_active is implied by mutex_unlock
+ * The barrier before waitqueue_active is needed so all the updates
+ * above are seen by the woken threads. It might not be necessary, but
+ * proving that seems to be hard.
*/
+ smp_mb();
if (waitqueue_active(&root->log_commit_wait[index1]))
wake_up(&root->log_commit_wait[index1]);
return ret;
@@ -3313,9 +3321,16 @@ static noinline int log_dir_items(struct btrfs_trans_handle *trans,
}
btrfs_release_path(path);
- /* find the first key from this transaction again */
+ /*
+ * Find the first key from this transaction again. See the note for
+ * log_new_dir_dentries, if we're logging a directory recursively we
+ * won't be holding its i_mutex, which means we can modify the directory
+ * while we're logging it. If we remove an entry between our first
+ * search and this search we'll not find the key again and can just
+ * bail.
+ */
ret = btrfs_search_slot(NULL, root, &min_key, path, 0, 0);
- if (WARN_ON(ret != 0))
+ if (ret != 0)
goto done;
/*
@@ -3378,8 +3393,11 @@ static noinline int log_dir_items(struct btrfs_trans_handle *trans,
* from this directory and from this transaction
*/
ret = btrfs_next_leaf(root, path);
- if (ret == 1) {
- last_offset = (u64)-1;
+ if (ret) {
+ if (ret == 1)
+ last_offset = (u64)-1;
+ else
+ err = ret;
goto done;
}
btrfs_item_key_to_cpu(path->nodes[0], &tmp, path->slots[0]);
@@ -3830,6 +3848,7 @@ fill_holes:
ASSERT(ret == 0);
src = src_path->nodes[0];
i = 0;
+ need_find_last_extent = true;
}
btrfs_item_key_to_cpu(src, &key, i);
@@ -5228,9 +5247,33 @@ static int btrfs_log_all_parents(struct btrfs_trans_handle *trans,
dir_inode = btrfs_iget(root->fs_info->sb, &inode_key,
root, NULL);
- /* If parent inode was deleted, skip it. */
- if (IS_ERR(dir_inode))
- continue;
+ /*
+ * If the parent inode was deleted, return an error to
+ * fallback to a transaction commit. This is to prevent
+ * getting an inode that was moved from one parent A to
+ * a parent B, got its former parent A deleted and then
+ * it got fsync'ed, from existing at both parents after
+ * a log replay (and the old parent still existing).
+ * Example:
+ *
+ * mkdir /mnt/A
+ * mkdir /mnt/B
+ * touch /mnt/B/bar
+ * sync
+ * mv /mnt/B/bar /mnt/A/bar
+ * mv -T /mnt/A /mnt/B
+ * fsync /mnt/B/bar
+ * <power fail>
+ *
+ * If we ignore the old parent B which got deleted,
+ * after a log replay we would have file bar linked
+ * at both parents and the old parent B would still
+ * exist.
+ */
+ if (IS_ERR(dir_inode)) {
+ ret = PTR_ERR(dir_inode);
+ goto out;
+ }
ret = btrfs_log_inode(trans, root, dir_inode,
LOG_INODE_ALL, 0, LLONG_MAX, ctx);