From 16e7549f045d33b0c5b0ebf19d08439e9221d40c Mon Sep 17 00:00:00 2001 From: Josef Bacik Date: Tue, 22 Oct 2013 12:18:51 -0400 Subject: Btrfs: incompatible format change to remove hole extents Btrfs has always had these filler extent data items for holes in inodes. This has made somethings very easy, like logging hole punches and sending hole punches. However for large holey files these extent data items are pure overhead. So add an incompatible feature to no longer add hole extents to reduce the amount of metadata used by these sort of files. This has a few changes for logging and send obviously since they will need to detect holes and log/send the holes if there are any. I've tested this thoroughly with xfstests and it doesn't cause any issues with and without the incompat format set. Thanks, Signed-off-by: Josef Bacik Signed-off-by: Chris Mason --- fs/btrfs/send.c | 158 ++++++++++++++++++++++++++++++++++++++++++++++++++++---- 1 file changed, 149 insertions(+), 9 deletions(-) (limited to 'fs/btrfs/send.c') diff --git a/fs/btrfs/send.c b/fs/btrfs/send.c index 945d1db98f26..29803b4129fc 100644 --- a/fs/btrfs/send.c +++ b/fs/btrfs/send.c @@ -111,6 +111,7 @@ struct send_ctx { int cur_inode_deleted; u64 cur_inode_size; u64 cur_inode_mode; + u64 cur_inode_last_extent; u64 send_progress; @@ -145,6 +146,13 @@ struct name_cache_entry { char name[]; }; +static int need_send_hole(struct send_ctx *sctx) +{ + return (sctx->parent_root && !sctx->cur_inode_new && + !sctx->cur_inode_new_gen && !sctx->cur_inode_deleted && + S_ISREG(sctx->cur_inode_mode)); +} + static void fs_path_reset(struct fs_path *p) { if (p->reversed) { @@ -3752,6 +3760,39 @@ out: return ret; } +static int send_hole(struct send_ctx *sctx, u64 end) +{ + struct fs_path *p = NULL; + u64 offset = sctx->cur_inode_last_extent; + u64 len; + int ret = 0; + + p = fs_path_alloc(); + if (!p) + return -ENOMEM; + memset(sctx->read_buf, 0, BTRFS_SEND_READ_SIZE); + while (offset < end) { + len = min_t(u64, end - offset, BTRFS_SEND_READ_SIZE); + + ret = begin_cmd(sctx, BTRFS_SEND_C_WRITE); + if (ret < 0) + break; + ret = get_cur_path(sctx, sctx->cur_ino, sctx->cur_inode_gen, p); + if (ret < 0) + break; + TLV_PUT_PATH(sctx, BTRFS_SEND_A_PATH, p); + TLV_PUT_U64(sctx, BTRFS_SEND_A_FILE_OFFSET, offset); + TLV_PUT(sctx, BTRFS_SEND_A_DATA, sctx->read_buf, len); + ret = send_cmd(sctx); + if (ret < 0) + break; + offset += len; + } +tlv_put_failure: + fs_path_free(p); + return ret; +} + static int send_write_or_clone(struct send_ctx *sctx, struct btrfs_path *path, struct btrfs_key *key, @@ -3979,6 +4020,84 @@ out: return ret; } +static int get_last_extent(struct send_ctx *sctx, u64 offset) +{ + struct btrfs_path *path; + struct btrfs_root *root = sctx->send_root; + struct btrfs_file_extent_item *fi; + struct btrfs_key key; + u64 extent_end; + u8 type; + int ret; + + path = alloc_path_for_send(); + if (!path) + return -ENOMEM; + + sctx->cur_inode_last_extent = 0; + + key.objectid = sctx->cur_ino; + key.type = BTRFS_EXTENT_DATA_KEY; + key.offset = offset; + ret = btrfs_search_slot_for_read(root, &key, path, 0, 1); + if (ret < 0) + goto out; + ret = 0; + btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]); + if (key.objectid != sctx->cur_ino || key.type != BTRFS_EXTENT_DATA_KEY) + goto out; + + fi = btrfs_item_ptr(path->nodes[0], path->slots[0], + struct btrfs_file_extent_item); + type = btrfs_file_extent_type(path->nodes[0], fi); + if (type == BTRFS_FILE_EXTENT_INLINE) { + u64 size = btrfs_file_extent_inline_len(path->nodes[0], fi); + extent_end = ALIGN(key.offset + size, + sctx->send_root->sectorsize); + } else { + extent_end = key.offset + + btrfs_file_extent_num_bytes(path->nodes[0], fi); + } + sctx->cur_inode_last_extent = extent_end; +out: + btrfs_free_path(path); + return ret; +} + +static int maybe_send_hole(struct send_ctx *sctx, struct btrfs_path *path, + struct btrfs_key *key) +{ + struct btrfs_file_extent_item *fi; + u64 extent_end; + u8 type; + int ret = 0; + + if (sctx->cur_ino != key->objectid || !need_send_hole(sctx)) + return 0; + + if (sctx->cur_inode_last_extent == (u64)-1) { + ret = get_last_extent(sctx, key->offset - 1); + if (ret) + return ret; + } + + fi = btrfs_item_ptr(path->nodes[0], path->slots[0], + struct btrfs_file_extent_item); + type = btrfs_file_extent_type(path->nodes[0], fi); + if (type == BTRFS_FILE_EXTENT_INLINE) { + u64 size = btrfs_file_extent_inline_len(path->nodes[0], fi); + extent_end = ALIGN(key->offset + size, + sctx->send_root->sectorsize); + } else { + extent_end = key->offset + + btrfs_file_extent_num_bytes(path->nodes[0], fi); + } + if (sctx->cur_inode_last_extent < key->offset) + ret = send_hole(sctx, key->offset); + sctx->cur_inode_last_extent = extent_end; + return ret; +} + static int process_extent(struct send_ctx *sctx, struct btrfs_path *path, struct btrfs_key *key) @@ -3995,7 +4114,7 @@ static int process_extent(struct send_ctx *sctx, goto out; if (ret) { ret = 0; - goto out; + goto out_hole; } } else { struct btrfs_file_extent_item *ei; @@ -4031,7 +4150,10 @@ static int process_extent(struct send_ctx *sctx, goto out; ret = send_write_or_clone(sctx, path, key, found_clone); - + if (ret) + goto out; +out_hole: + ret = maybe_send_hole(sctx, path, key); out: return ret; } @@ -4157,6 +4279,19 @@ static int finish_inode_if_needed(struct send_ctx *sctx, int at_end) } if (S_ISREG(sctx->cur_inode_mode)) { + if (need_send_hole(sctx)) { + if (sctx->cur_inode_last_extent == (u64)-1) { + ret = get_last_extent(sctx, (u64)-1); + if (ret) + goto out; + } + if (sctx->cur_inode_last_extent < + sctx->cur_inode_size) { + ret = send_hole(sctx, sctx->cur_inode_size); + if (ret) + goto out; + } + } ret = send_truncate(sctx, sctx->cur_ino, sctx->cur_inode_gen, sctx->cur_inode_size); if (ret < 0) @@ -4200,6 +4335,7 @@ static int changed_inode(struct send_ctx *sctx, sctx->cur_ino = key->objectid; sctx->cur_inode_new_gen = 0; + sctx->cur_inode_last_extent = (u64)-1; /* * Set send_progress to current inode. This will tell all get_cur_xxx @@ -4480,14 +4616,18 @@ static int changed_cb(struct btrfs_root *left_root, struct send_ctx *sctx = ctx; if (result == BTRFS_COMPARE_TREE_SAME) { - if (key->type != BTRFS_INODE_REF_KEY && - key->type != BTRFS_INODE_EXTREF_KEY) - return 0; - ret = compare_refs(sctx, left_path, key); - if (!ret) + if (key->type == BTRFS_INODE_REF_KEY || + key->type == BTRFS_INODE_EXTREF_KEY) { + ret = compare_refs(sctx, left_path, key); + if (!ret) + return 0; + if (ret < 0) + return ret; + } else if (key->type == BTRFS_EXTENT_DATA_KEY) { + return maybe_send_hole(sctx, left_path, key); + } else { return 0; - if (ret < 0) - return ret; + } result = BTRFS_COMPARE_TREE_CHANGED; ret = 0; } -- cgit v1.2.3 From 5a0f4e2c2b47a755e37dbbb6f691e6504e3147b3 Mon Sep 17 00:00:00 2001 From: Filipe David Borba Manana Date: Tue, 3 Dec 2013 15:55:48 +0000 Subject: Btrfs: fix pass of transid with wrong endianness in send.c fs/btrfs/send.c:2190:9: warning: incorrect type in argument 3 (different base types) fs/btrfs/send.c:2190:9: expected unsigned long long [unsigned] [usertype] value fs/btrfs/send.c:2190:9: got restricted __le64 [usertype] ctransid fs/btrfs/send.c:2195:17: warning: incorrect type in argument 3 (different base types) fs/btrfs/send.c:2195:17: expected unsigned long long [unsigned] [usertype] value fs/btrfs/send.c:2195:17: got restricted __le64 [usertype] ctransid fs/btrfs/send.c:3716:9: warning: incorrect type in argument 3 (different base types) fs/btrfs/send.c:3716:9: expected unsigned long long [unsigned] [usertype] value fs/btrfs/send.c:3716:9: got restricted __le64 [usertype] ctransid Signed-off-by: Filipe David Borba Manana Signed-off-by: Josef Bacik Signed-off-by: Chris Mason --- fs/btrfs/send.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'fs/btrfs/send.c') diff --git a/fs/btrfs/send.c b/fs/btrfs/send.c index 29803b4129fc..1896e394d59f 100644 --- a/fs/btrfs/send.c +++ b/fs/btrfs/send.c @@ -2188,12 +2188,12 @@ static int send_subvol_begin(struct send_ctx *sctx) TLV_PUT_UUID(sctx, BTRFS_SEND_A_UUID, sctx->send_root->root_item.uuid); TLV_PUT_U64(sctx, BTRFS_SEND_A_CTRANSID, - sctx->send_root->root_item.ctransid); + le64_to_cpu(sctx->send_root->root_item.ctransid)); if (parent_root) { TLV_PUT_UUID(sctx, BTRFS_SEND_A_CLONE_UUID, sctx->parent_root->root_item.uuid); TLV_PUT_U64(sctx, BTRFS_SEND_A_CLONE_CTRANSID, - sctx->parent_root->root_item.ctransid); + le64_to_cpu(sctx->parent_root->root_item.ctransid)); } ret = send_cmd(sctx); @@ -3714,7 +3714,7 @@ verbose_printk("btrfs: send_clone offset=%llu, len=%d, clone_root=%llu, " TLV_PUT_UUID(sctx, BTRFS_SEND_A_CLONE_UUID, clone_root->root->root_item.uuid); TLV_PUT_U64(sctx, BTRFS_SEND_A_CLONE_CTRANSID, - clone_root->root->root_item.ctransid); + le64_to_cpu(clone_root->root->root_item.ctransid)); TLV_PUT_PATH(sctx, BTRFS_SEND_A_CLONE_PATH, p); TLV_PUT_U64(sctx, BTRFS_SEND_A_CLONE_OFFSET, clone_root->offset); -- cgit v1.2.3 From 95bc79d50d0ec20c0cdb071629dc3f276a053782 Mon Sep 17 00:00:00 2001 From: David Sterba Date: Mon, 16 Dec 2013 17:34:10 +0100 Subject: btrfs: send: clean up dead code Remove ifdefed code: - tlv_put for 8, 16 and 32, add a generic tempalte if needed in future - tlv_put_timespec - the btrfs_timespec fields are used - fs_path_remove obsoleted long ago Signed-off-by: David Sterba Signed-off-by: Josef Bacik Signed-off-by: Chris Mason --- fs/btrfs/send.c | 58 ++++++++------------------------------------------------- 1 file changed, 8 insertions(+), 50 deletions(-) (limited to 'fs/btrfs/send.c') diff --git a/fs/btrfs/send.c b/fs/btrfs/send.c index 1896e394d59f..8230d11f2cca 100644 --- a/fs/btrfs/send.c +++ b/fs/btrfs/send.c @@ -344,16 +344,6 @@ out: return ret; } -#if 0 -static void fs_path_remove(struct fs_path *p) -{ - BUG_ON(p->reversed); - while (p->start != p->end && *p->end != '/') - p->end--; - *p->end = 0; -} -#endif - static int fs_path_copy(struct fs_path *p, struct fs_path *from) { int ret; @@ -444,30 +434,15 @@ static int tlv_put(struct send_ctx *sctx, u16 attr, const void *data, int len) return 0; } -#if 0 -static int tlv_put_u8(struct send_ctx *sctx, u16 attr, u8 value) -{ - return tlv_put(sctx, attr, &value, sizeof(value)); -} - -static int tlv_put_u16(struct send_ctx *sctx, u16 attr, u16 value) -{ - __le16 tmp = cpu_to_le16(value); - return tlv_put(sctx, attr, &tmp, sizeof(tmp)); -} - -static int tlv_put_u32(struct send_ctx *sctx, u16 attr, u32 value) -{ - __le32 tmp = cpu_to_le32(value); - return tlv_put(sctx, attr, &tmp, sizeof(tmp)); -} -#endif +#define TLV_PUT_DEFINE_INT(bits) \ + static int tlv_put_u##bits(struct send_ctx *sctx, \ + u##bits attr, u##bits value) \ + { \ + __le##bits __tmp = cpu_to_le##bits(value); \ + return tlv_put(sctx, attr, &__tmp, sizeof(__tmp)); \ + } -static int tlv_put_u64(struct send_ctx *sctx, u16 attr, u64 value) -{ - __le64 tmp = cpu_to_le64(value); - return tlv_put(sctx, attr, &tmp, sizeof(tmp)); -} +TLV_PUT_DEFINE_INT(64) static int tlv_put_string(struct send_ctx *sctx, u16 attr, const char *str, int len) @@ -483,17 +458,6 @@ static int tlv_put_uuid(struct send_ctx *sctx, u16 attr, return tlv_put(sctx, attr, uuid, BTRFS_UUID_SIZE); } -#if 0 -static int tlv_put_timespec(struct send_ctx *sctx, u16 attr, - struct timespec *ts) -{ - struct btrfs_timespec bts; - bts.sec = cpu_to_le64(ts->tv_sec); - bts.nsec = cpu_to_le32(ts->tv_nsec); - return tlv_put(sctx, attr, &bts, sizeof(bts)); -} -#endif - static int tlv_put_btrfs_timespec(struct send_ctx *sctx, u16 attr, struct extent_buffer *eb, struct btrfs_timespec *ts) @@ -541,12 +505,6 @@ static int tlv_put_btrfs_timespec(struct send_ctx *sctx, u16 attr, if (ret < 0) \ goto tlv_put_failure; \ } while (0) -#define TLV_PUT_TIMESPEC(sctx, attrtype, ts) \ - do { \ - ret = tlv_put_timespec(sctx, attrtype, ts); \ - if (ret < 0) \ - goto tlv_put_failure; \ - } while (0) #define TLV_PUT_BTRFS_TIMESPEC(sctx, attrtype, eb, ts) \ do { \ ret = tlv_put_btrfs_timespec(sctx, attrtype, eb, ts); \ -- cgit v1.2.3 From a8d89f5ba0e17622cde8f5ac48ef745a9fb1e13b Mon Sep 17 00:00:00 2001 From: David Sterba Date: Mon, 16 Dec 2013 17:34:14 +0100 Subject: btrfs: remove unused mnt from send_ctx Unused since ed2590953bd06b892f0411fc94e19175d32f197a "Btrfs: stop using vfs_read in send". Signed-off-by: David Sterba Signed-off-by: Josef Bacik Signed-off-by: Chris Mason --- fs/btrfs/send.c | 4 ---- 1 file changed, 4 deletions(-) (limited to 'fs/btrfs/send.c') diff --git a/fs/btrfs/send.c b/fs/btrfs/send.c index 8230d11f2cca..e98c9bc003c8 100644 --- a/fs/btrfs/send.c +++ b/fs/btrfs/send.c @@ -88,8 +88,6 @@ struct send_ctx { u64 cmd_send_size[BTRFS_SEND_C_MAX + 1]; u64 flags; /* 'flags' member of btrfs_ioctl_send_args is u64 */ - struct vfsmount *mnt; - struct btrfs_root *send_root; struct btrfs_root *parent_root; struct clone_root *clone_roots; @@ -4851,8 +4849,6 @@ long btrfs_ioctl_send(struct file *mnt_file, void __user *arg_) goto out; } - sctx->mnt = mnt_file->f_path.mnt; - sctx->send_root = send_root; sctx->clone_roots_cnt = arg->clone_sources_count; -- cgit v1.2.3 From 2c68653787f91c62f8891209dc1f617088c822e4 Mon Sep 17 00:00:00 2001 From: David Sterba Date: Mon, 16 Dec 2013 17:34:17 +0100 Subject: btrfs: Check read-only status of roots during send All the subvolues that are involved in send must be read-only during the whole operation. The ioctl SUBVOL_SETFLAGS could be used to change the status to read-write and the result of send stream is undefined if the data change unexpectedly. Fix that by adding a refcount for all involved roots and verify that there's no send in progress during SUBVOL_SETFLAGS ioctl call that does read-only -> read-write transition. We need refcounts because there are no restrictions on number of send parallel operations currently run on a single subvolume, be it source, parent or one of the multiple clone sources. Kernel is silent when the RO checks fail and returns EPERM. The same set of checks is done already in userspace before send starts. Signed-off-by: David Sterba Signed-off-by: Josef Bacik Signed-off-by: Chris Mason --- fs/btrfs/send.c | 54 ++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 54 insertions(+) (limited to 'fs/btrfs/send.c') diff --git a/fs/btrfs/send.c b/fs/btrfs/send.c index e98c9bc003c8..572e8c758712 100644 --- a/fs/btrfs/send.c +++ b/fs/btrfs/send.c @@ -4769,6 +4769,7 @@ long btrfs_ioctl_send(struct file *mnt_file, void __user *arg_) struct send_ctx *sctx = NULL; u32 i; u64 *clone_sources_tmp = NULL; + int clone_sources_to_rollback = 0; if (!capable(CAP_SYS_ADMIN)) return -EPERM; @@ -4776,6 +4777,14 @@ long btrfs_ioctl_send(struct file *mnt_file, void __user *arg_) send_root = BTRFS_I(file_inode(mnt_file))->root; fs_info = send_root->fs_info; + /* + * The subvolume must remain read-only during send, protect against + * making it RW. + */ + spin_lock(&send_root->root_item_lock); + send_root->send_in_progress++; + spin_unlock(&send_root->root_item_lock); + /* * This is done when we lookup the root, it should already be complete * by the time we get here. @@ -4811,6 +4820,15 @@ long btrfs_ioctl_send(struct file *mnt_file, void __user *arg_) up_read(&send_root->fs_info->extent_commit_sem); } + /* + * Userspace tools do the checks and warn the user if it's + * not RO. + */ + if (!btrfs_root_readonly(send_root)) { + ret = -EPERM; + goto out; + } + arg = memdup_user(arg_, sizeof(*arg)); if (IS_ERR(arg)) { ret = PTR_ERR(arg); @@ -4897,6 +4915,15 @@ long btrfs_ioctl_send(struct file *mnt_file, void __user *arg_) ret = PTR_ERR(clone_root); goto out; } + clone_sources_to_rollback = i + 1; + spin_lock(&clone_root->root_item_lock); + clone_root->send_in_progress++; + if (!btrfs_root_readonly(clone_root)) { + spin_unlock(&clone_root->root_item_lock); + ret = -EPERM; + goto out; + } + spin_unlock(&clone_root->root_item_lock); sctx->clone_roots[i].root = clone_root; } vfree(clone_sources_tmp); @@ -4912,6 +4939,14 @@ long btrfs_ioctl_send(struct file *mnt_file, void __user *arg_) ret = PTR_ERR(sctx->parent_root); goto out; } + spin_lock(&sctx->parent_root->root_item_lock); + sctx->parent_root->send_in_progress++; + if (!btrfs_root_readonly(sctx->parent_root)) { + spin_unlock(&sctx->parent_root->root_item_lock); + ret = -EPERM; + goto out; + } + spin_unlock(&sctx->parent_root->root_item_lock); } /* @@ -4940,6 +4975,25 @@ long btrfs_ioctl_send(struct file *mnt_file, void __user *arg_) } out: + for (i = 0; sctx && i < clone_sources_to_rollback; i++) { + struct btrfs_root *r = sctx->clone_roots[i].root; + + spin_lock(&r->root_item_lock); + r->send_in_progress--; + spin_unlock(&r->root_item_lock); + } + if (sctx && !IS_ERR_OR_NULL(sctx->parent_root)) { + struct btrfs_root *r = sctx->parent_root; + + spin_lock(&r->root_item_lock); + r->send_in_progress--; + spin_unlock(&r->root_item_lock); + } + + spin_lock(&send_root->root_item_lock); + send_root->send_in_progress--; + spin_unlock(&send_root->root_item_lock); + kfree(arg); vfree(clone_sources_tmp); -- cgit v1.2.3 From 41ce9970a8a6a362ae8df145f7a03d789e9ef9d2 Mon Sep 17 00:00:00 2001 From: Wang Shilong Date: Tue, 17 Dec 2013 19:57:21 +0800 Subject: Btrfs: remove transaction from btrfs send Since daivd did the work that force us to use readonly snapshot, we can safely remove transaction protection from btrfs send. Signed-off-by: Wang Shilong Signed-off-by: Josef Bacik Signed-off-by: Chris Mason --- fs/btrfs/send.c | 33 --------------------------------- 1 file changed, 33 deletions(-) (limited to 'fs/btrfs/send.c') diff --git a/fs/btrfs/send.c b/fs/btrfs/send.c index 572e8c758712..78a43b2e5c8e 100644 --- a/fs/btrfs/send.c +++ b/fs/btrfs/send.c @@ -4618,7 +4618,6 @@ out: static int full_send_tree(struct send_ctx *sctx) { int ret; - struct btrfs_trans_handle *trans = NULL; struct btrfs_root *send_root = sctx->send_root; struct btrfs_key key; struct btrfs_key found_key; @@ -4640,19 +4639,6 @@ static int full_send_tree(struct send_ctx *sctx) key.type = BTRFS_INODE_ITEM_KEY; key.offset = 0; -join_trans: - /* - * We need to make sure the transaction does not get committed - * while we do anything on commit roots. Join a transaction to prevent - * this. - */ - trans = btrfs_join_transaction(send_root); - if (IS_ERR(trans)) { - ret = PTR_ERR(trans); - trans = NULL; - goto out; - } - /* * Make sure the tree has not changed after re-joining. We detect this * by comparing start_ctransid and ctransid. They should always match. @@ -4676,19 +4662,6 @@ join_trans: goto out_finish; while (1) { - /* - * When someone want to commit while we iterate, end the - * joined transaction and rejoin. - */ - if (btrfs_should_end_transaction(trans, send_root)) { - ret = btrfs_end_transaction(trans, send_root); - trans = NULL; - if (ret < 0) - goto out; - btrfs_release_path(path); - goto join_trans; - } - eb = path->nodes[0]; slot = path->slots[0]; btrfs_item_key_to_cpu(eb, &found_key, slot); @@ -4716,12 +4689,6 @@ out_finish: out: btrfs_free_path(path); - if (trans) { - if (!ret) - ret = btrfs_end_transaction(trans, send_root); - else - btrfs_end_transaction(trans, send_root); - } return ret; } -- cgit v1.2.3 From 66ef7d65c3fc6e5300b9359f1c6537efb23781bb Mon Sep 17 00:00:00 2001 From: David Sterba Date: Tue, 17 Dec 2013 15:07:20 +0100 Subject: btrfs: check balance of send_in_progress Warn if the balance goes below zero, which appears to be unlikely though. Otherwise cleans up the code a bit. Signed-off-by: David Sterba Signed-off-by: Josef Bacik Signed-off-by: Chris Mason --- fs/btrfs/send.c | 38 ++++++++++++++++++++------------------ 1 file changed, 20 insertions(+), 18 deletions(-) (limited to 'fs/btrfs/send.c') diff --git a/fs/btrfs/send.c b/fs/btrfs/send.c index 78a43b2e5c8e..8877adc45394 100644 --- a/fs/btrfs/send.c +++ b/fs/btrfs/send.c @@ -4725,6 +4725,21 @@ out: return ret; } +static void btrfs_root_dec_send_in_progress(struct btrfs_root* root) +{ + spin_lock(&root->root_item_lock); + root->send_in_progress--; + /* + * Not much left to do, we don't know why it's unbalanced and + * can't blindly reset it to 0. + */ + if (root->send_in_progress < 0) + btrfs_err(root->fs_info, + "send_in_progres unbalanced %d root %llu\n", + root->send_in_progress, root->root_key.objectid); + spin_unlock(&root->root_item_lock); +} + long btrfs_ioctl_send(struct file *mnt_file, void __user *arg_) { int ret = 0; @@ -4942,24 +4957,11 @@ long btrfs_ioctl_send(struct file *mnt_file, void __user *arg_) } out: - for (i = 0; sctx && i < clone_sources_to_rollback; i++) { - struct btrfs_root *r = sctx->clone_roots[i].root; - - spin_lock(&r->root_item_lock); - r->send_in_progress--; - spin_unlock(&r->root_item_lock); - } - if (sctx && !IS_ERR_OR_NULL(sctx->parent_root)) { - struct btrfs_root *r = sctx->parent_root; - - spin_lock(&r->root_item_lock); - r->send_in_progress--; - spin_unlock(&r->root_item_lock); - } - - spin_lock(&send_root->root_item_lock); - send_root->send_in_progress--; - spin_unlock(&send_root->root_item_lock); + for (i = 0; sctx && i < clone_sources_to_rollback; i++) + btrfs_root_dec_send_in_progress(sctx->clone_roots[i].root); + if (sctx && !IS_ERR_OR_NULL(sctx->parent_root)) + btrfs_root_dec_send_in_progress(sctx->parent_root); + btrfs_root_dec_send_in_progress(send_root); kfree(arg); vfree(clone_sources_tmp); -- cgit v1.2.3 From efe120a067c8674a8ae21b194f0e68f098b61ee2 Mon Sep 17 00:00:00 2001 From: Frank Holton Date: Fri, 20 Dec 2013 11:37:06 -0500 Subject: Btrfs: convert printk to btrfs_ and fix BTRFS prefix Convert all applicable cases of printk and pr_* to the btrfs_* macros. Fix all uses of the BTRFS prefix. Signed-off-by: Frank Holton Signed-off-by: Josef Bacik Signed-off-by: Chris Mason --- fs/btrfs/send.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'fs/btrfs/send.c') diff --git a/fs/btrfs/send.c b/fs/btrfs/send.c index 8877adc45394..bff0b1ac3be7 100644 --- a/fs/btrfs/send.c +++ b/fs/btrfs/send.c @@ -1234,7 +1234,7 @@ static int find_extent_clone(struct send_ctx *sctx, if (!backref_ctx->found_itself) { /* found a bug in backref code? */ ret = -EIO; - printk(KERN_ERR "btrfs: ERROR did not find backref in " + btrfs_err(sctx->send_root->fs_info, "did not find backref in " "send_root. inode=%llu, offset=%llu, " "disk_byte=%llu found extent=%llu\n", ino, data_offset, disk_byte, found_key.objectid); @@ -4648,7 +4648,7 @@ static int full_send_tree(struct send_ctx *sctx) spin_unlock(&send_root->root_item_lock); if (ctransid != start_ctransid) { - WARN(1, KERN_WARNING "btrfs: the root that you're trying to " + WARN(1, KERN_WARNING "BTRFS: the root that you're trying to " "send was modified in between. This is " "probably a bug.\n"); ret = -EIO; -- cgit v1.2.3 From 896c14f97f700aec6565154f2451605d7c5ce3ed Mon Sep 17 00:00:00 2001 From: Wang Shilong Date: Tue, 7 Jan 2014 17:25:18 +0800 Subject: Btrfs: fix wrong send_in_progress accounting Steps to reproduce: # mkfs.btrfs -f /dev/sda8 # mount /dev/sda8 /mnt # btrfs sub snapshot -r /mnt /mnt/snap1 # btrfs sub snapshot -r /mnt /mnt/snap2 # btrfs send /mnt/snap1 -p /mnt/snap2 -f /mnt/1 # dmesg The problem is that we will sort clone roots(include @send_root), it might push @send_root before thus @send_root's @send_in_progress will be decreased twice. Cc: David Sterba Signed-off-by: Wang Shilong Signed-off-by: Josef Bacik Signed-off-by: Chris Mason --- fs/btrfs/send.c | 16 +++++++++++++--- 1 file changed, 13 insertions(+), 3 deletions(-) (limited to 'fs/btrfs/send.c') diff --git a/fs/btrfs/send.c b/fs/btrfs/send.c index bff0b1ac3be7..5b6978516461 100644 --- a/fs/btrfs/send.c +++ b/fs/btrfs/send.c @@ -4752,6 +4752,7 @@ long btrfs_ioctl_send(struct file *mnt_file, void __user *arg_) u32 i; u64 *clone_sources_tmp = NULL; int clone_sources_to_rollback = 0; + int sort_clone_roots = 0; if (!capable(CAP_SYS_ADMIN)) return -EPERM; @@ -4942,6 +4943,7 @@ long btrfs_ioctl_send(struct file *mnt_file, void __user *arg_) sort(sctx->clone_roots, sctx->clone_roots_cnt, sizeof(*sctx->clone_roots), __clone_root_cmp_sort, NULL); + sort_clone_roots = 1; ret = send_subvol(sctx); if (ret < 0) @@ -4957,11 +4959,19 @@ long btrfs_ioctl_send(struct file *mnt_file, void __user *arg_) } out: - for (i = 0; sctx && i < clone_sources_to_rollback; i++) - btrfs_root_dec_send_in_progress(sctx->clone_roots[i].root); + if (sort_clone_roots) { + for (i = 0; i < sctx->clone_roots_cnt; i++) + btrfs_root_dec_send_in_progress( + sctx->clone_roots[i].root); + } else { + for (i = 0; sctx && i < clone_sources_to_rollback; i++) + btrfs_root_dec_send_in_progress( + sctx->clone_roots[i].root); + + btrfs_root_dec_send_in_progress(send_root); + } if (sctx && !IS_ERR_OR_NULL(sctx->parent_root)) btrfs_root_dec_send_in_progress(sctx->parent_root); - btrfs_root_dec_send_in_progress(send_root); kfree(arg); vfree(clone_sources_tmp); -- cgit v1.2.3 From 18f687d538449373c37cbe52b03f5f3d42b7c7ed Mon Sep 17 00:00:00 2001 From: Wang Shilong Date: Tue, 7 Jan 2014 17:25:19 +0800 Subject: Btrfs: fix protection between send and root deletion We should gurantee that parent and clone roots can not be destroyed during send, for this we have two ideas. 1.by holding @subvol_sem, this might be a nightmare, because it will block all subvolumes deletion for a long time. 2.Miao pointed out we can reuse @send_in_progress, that mean we will skip snapshot deletion if root sending is in progress. Here we adopt the second approach since it won't block other subvolumes deletion for a long time. Besides in btrfs_clean_one_deleted_snapshot(), we only check first root , if this root is involved in send, we return directly rather than continue to check.There are several reasons about it: 1.this case happen seldomly. 2.after sending,cleaner thread can continue to drop that root. 3.make code simple Cc: David Sterba Signed-off-by: Wang Shilong Reviewed-by: Miao Xie Signed-off-by: Josef Bacik Signed-off-by: Chris Mason --- fs/btrfs/send.c | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) (limited to 'fs/btrfs/send.c') diff --git a/fs/btrfs/send.c b/fs/btrfs/send.c index 5b6978516461..4e2461b857f3 100644 --- a/fs/btrfs/send.c +++ b/fs/btrfs/send.c @@ -4753,6 +4753,7 @@ long btrfs_ioctl_send(struct file *mnt_file, void __user *arg_) u64 *clone_sources_tmp = NULL; int clone_sources_to_rollback = 0; int sort_clone_roots = 0; + int index; if (!capable(CAP_SYS_ADMIN)) return -EPERM; @@ -4893,8 +4894,12 @@ long btrfs_ioctl_send(struct file *mnt_file, void __user *arg_) key.objectid = clone_sources_tmp[i]; key.type = BTRFS_ROOT_ITEM_KEY; key.offset = (u64)-1; + + index = srcu_read_lock(&fs_info->subvol_srcu); + clone_root = btrfs_read_fs_root_no_name(fs_info, &key); if (IS_ERR(clone_root)) { + srcu_read_unlock(&fs_info->subvol_srcu, index); ret = PTR_ERR(clone_root); goto out; } @@ -4903,10 +4908,13 @@ long btrfs_ioctl_send(struct file *mnt_file, void __user *arg_) clone_root->send_in_progress++; if (!btrfs_root_readonly(clone_root)) { spin_unlock(&clone_root->root_item_lock); + srcu_read_unlock(&fs_info->subvol_srcu, index); ret = -EPERM; goto out; } spin_unlock(&clone_root->root_item_lock); + srcu_read_unlock(&fs_info->subvol_srcu, index); + sctx->clone_roots[i].root = clone_root; } vfree(clone_sources_tmp); @@ -4917,19 +4925,27 @@ long btrfs_ioctl_send(struct file *mnt_file, void __user *arg_) key.objectid = arg->parent_root; key.type = BTRFS_ROOT_ITEM_KEY; key.offset = (u64)-1; + + index = srcu_read_lock(&fs_info->subvol_srcu); + sctx->parent_root = btrfs_read_fs_root_no_name(fs_info, &key); if (IS_ERR(sctx->parent_root)) { + srcu_read_unlock(&fs_info->subvol_srcu, index); ret = PTR_ERR(sctx->parent_root); goto out; } + spin_lock(&sctx->parent_root->root_item_lock); sctx->parent_root->send_in_progress++; if (!btrfs_root_readonly(sctx->parent_root)) { spin_unlock(&sctx->parent_root->root_item_lock); + srcu_read_unlock(&fs_info->subvol_srcu, index); ret = -EPERM; goto out; } spin_unlock(&sctx->parent_root->root_item_lock); + + srcu_read_unlock(&fs_info->subvol_srcu, index); } /* -- cgit v1.2.3 From 8e56338d7d0ee38ecae86d35dae43020356acca1 Mon Sep 17 00:00:00 2001 From: Wang Shilong Date: Tue, 7 Jan 2014 17:26:57 +0800 Subject: Btrfs: remove unnecessary transaction commit before send We will finish orphan cleanups during snapshot, so we don't have to commit transaction here. Signed-off-by: Wang Shilong Reviewed-by: Miao Xie Signed-off-by: Josef Bacik Signed-off-by: Chris Mason --- fs/btrfs/send.c | 29 ----------------------------- 1 file changed, 29 deletions(-) (limited to 'fs/btrfs/send.c') diff --git a/fs/btrfs/send.c b/fs/btrfs/send.c index 4e2461b857f3..591063dac0e6 100644 --- a/fs/btrfs/send.c +++ b/fs/btrfs/send.c @@ -4775,35 +4775,6 @@ long btrfs_ioctl_send(struct file *mnt_file, void __user *arg_) */ WARN_ON(send_root->orphan_cleanup_state != ORPHAN_CLEANUP_DONE); - /* - * If we just created this root we need to make sure that the orphan - * cleanup has been done and committed since we search the commit root, - * so check its commit root transid with our otransid and if they match - * commit the transaction to make sure everything is updated. - */ - down_read(&send_root->fs_info->extent_commit_sem); - if (btrfs_header_generation(send_root->commit_root) == - btrfs_root_otransid(&send_root->root_item)) { - struct btrfs_trans_handle *trans; - - up_read(&send_root->fs_info->extent_commit_sem); - - trans = btrfs_attach_transaction_barrier(send_root); - if (IS_ERR(trans)) { - if (PTR_ERR(trans) != -ENOENT) { - ret = PTR_ERR(trans); - goto out; - } - /* ENOENT means theres no transaction */ - } else { - ret = btrfs_commit_transaction(trans, send_root); - if (ret) - goto out; - } - } else { - up_read(&send_root->fs_info->extent_commit_sem); - } - /* * Userspace tools do the checks and warn the user if it's * not RO. -- cgit v1.2.3 From 28e5dd8f35202ff56b2eb1725ac77f0d0fcb4758 Mon Sep 17 00:00:00 2001 From: Filipe David Borba Manana Date: Sun, 12 Jan 2014 02:26:28 +0000 Subject: Btrfs: fix send to not send non-aligned clone operations It is possible for the send feature to send clone operations that request a cloning range (offset + length) that is not aligned with the block size. This makes the btrfs receive command send issue a clone ioctl call that will fail, as the ioctl will return an -EINVAL error because of the unaligned range. Fix this by not sending clone operations for non block aligned ranges, and instead send regular write operation for these (less common) cases. The following xfstest reproduces this issue, which fails on the second btrfs receive command without this change: seq=`basename $0` seqres=$RESULT_DIR/$seq echo "QA output created by $seq" tmp=`mktemp -d` status=1 # failure is the default! trap "_cleanup; exit \$status" 0 1 2 3 15 _cleanup() { rm -fr $tmp } # get standard environment, filters and checks . ./common/rc . ./common/filter # real QA test starts here _supported_fs btrfs _supported_os Linux _require_scratch _need_to_be_root rm -f $seqres.full _scratch_mkfs >/dev/null 2>&1 _scratch_mount $XFS_IO_PROG -f -c "truncate 819200" $SCRATCH_MNT/foo | _filter_xfs_io $BTRFS_UTIL_PROG filesystem sync $SCRATCH_MNT | _filter_scratch $XFS_IO_PROG -c "falloc -k 819200 667648" $SCRATCH_MNT/foo | _filter_xfs_io $BTRFS_UTIL_PROG filesystem sync $SCRATCH_MNT | _filter_scratch $XFS_IO_PROG -f -c "pwrite 1482752 2978" $SCRATCH_MNT/foo | _filter_xfs_io $BTRFS_UTIL_PROG filesystem sync $SCRATCH_MNT | _filter_scratch $BTRFS_UTIL_PROG subvol snapshot -r $SCRATCH_MNT $SCRATCH_MNT/mysnap1 | \ _filter_scratch $XFS_IO_PROG -f -c "truncate 883305" $SCRATCH_MNT/foo | _filter_xfs_io $BTRFS_UTIL_PROG filesystem sync $SCRATCH_MNT | _filter_scratch $BTRFS_UTIL_PROG subvol snapshot -r $SCRATCH_MNT $SCRATCH_MNT/mysnap2 | \ _filter_scratch $BTRFS_UTIL_PROG send $SCRATCH_MNT/mysnap1 -f $tmp/1.snap 2>&1 | _filter_scratch $BTRFS_UTIL_PROG send -p $SCRATCH_MNT/mysnap1 $SCRATCH_MNT/mysnap2 \ -f $tmp/2.snap 2>&1 | _filter_scratch md5sum $SCRATCH_MNT/foo | _filter_scratch md5sum $SCRATCH_MNT/mysnap1/foo | _filter_scratch md5sum $SCRATCH_MNT/mysnap2/foo | _filter_scratch _scratch_unmount _check_btrfs_filesystem $SCRATCH_DEV _scratch_mkfs >/dev/null 2>&1 _scratch_mount $BTRFS_UTIL_PROG receive $SCRATCH_MNT -f $tmp/1.snap md5sum $SCRATCH_MNT/mysnap1/foo | _filter_scratch $BTRFS_UTIL_PROG receive $SCRATCH_MNT -f $tmp/2.snap md5sum $SCRATCH_MNT/mysnap2/foo | _filter_scratch _scratch_unmount _check_btrfs_filesystem $SCRATCH_DEV status=0 exit The tests expected output is: QA output created by 025 FSSync 'SCRATCH_MNT' FSSync 'SCRATCH_MNT' wrote 2978/2978 bytes at offset 1482752 XXX Bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) FSSync 'SCRATCH_MNT' Create a readonly snapshot of 'SCRATCH_MNT' in 'SCRATCH_MNT/mysnap1' FSSync 'SCRATCH_MNT' Create a readonly snapshot of 'SCRATCH_MNT' in 'SCRATCH_MNT/mysnap2' At subvol SCRATCH_MNT/mysnap1 At subvol SCRATCH_MNT/mysnap2 129b8eaee8d3c2bcad49bec596591cb3 SCRATCH_MNT/foo 42b6369eae2a8725c1aacc0440e597aa SCRATCH_MNT/mysnap1/foo 129b8eaee8d3c2bcad49bec596591cb3 SCRATCH_MNT/mysnap2/foo At subvol mysnap1 42b6369eae2a8725c1aacc0440e597aa SCRATCH_MNT/mysnap1/foo At snapshot mysnap2 129b8eaee8d3c2bcad49bec596591cb3 SCRATCH_MNT/mysnap2/foo Signed-off-by: Filipe David Borba Manana Signed-off-by: Josef Bacik Signed-off-by: Chris Mason --- fs/btrfs/send.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'fs/btrfs/send.c') diff --git a/fs/btrfs/send.c b/fs/btrfs/send.c index 591063dac0e6..84aed2f30aa2 100644 --- a/fs/btrfs/send.c +++ b/fs/btrfs/send.c @@ -3761,6 +3761,7 @@ static int send_write_or_clone(struct send_ctx *sctx, u64 len; u32 l; u8 type; + u64 bs = sctx->send_root->fs_info->sb->s_blocksize; ei = btrfs_item_ptr(path->nodes[0], path->slots[0], struct btrfs_file_extent_item); @@ -3784,7 +3785,7 @@ static int send_write_or_clone(struct send_ctx *sctx, goto out; } - if (clone_root) { + if (clone_root && IS_ALIGNED(offset + len, bs)) { ret = send_clone(sctx, offset, len, clone_root); } else if (sctx->flags & BTRFS_SEND_FLAG_NO_FILE_DATA) { ret = send_update_extent(sctx, offset, len); -- cgit v1.2.3 From ffcfaf81795471be3c07d6e3143bff31edca5d5a Mon Sep 17 00:00:00 2001 From: Wang Shilong Date: Wed, 15 Jan 2014 00:26:43 +0800 Subject: Btrfs: fix wrong search path initialization before searching tree root To search tree root without transaction protection, we should neither search commit root nor skip locking here, fix it. Signed-off-by: Wang Shilong Signed-off-by: Josef Bacik Signed-off-by: Chris Mason --- fs/btrfs/send.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs/btrfs/send.c') diff --git a/fs/btrfs/send.c b/fs/btrfs/send.c index 84aed2f30aa2..aa60cbe7066c 100644 --- a/fs/btrfs/send.c +++ b/fs/btrfs/send.c @@ -2095,7 +2095,7 @@ static int send_subvol_begin(struct send_ctx *sctx) char *name = NULL; int namelen; - path = alloc_path_for_send(); + path = btrfs_alloc_path(); if (!path) return -ENOMEM; -- cgit v1.2.3 From f74b86d85533a98ef7f573487af38f9dd514becb Mon Sep 17 00:00:00 2001 From: Filipe David Borba Manana Date: Tue, 21 Jan 2014 23:36:38 +0000 Subject: Btrfs: fix snprintf usage by send's gen_unique_name The buffer size argument passed to snprintf must account for the trailing null byte added by snprintf, and it returns a value >= then sizeof(buffer) when the string can't fit in the buffer. Since our buffer has a size of 64 characters, and the maximum orphan name we can generate is 63 characters wide, we must pass 64 as the buffer size to snprintf, and not 63. Signed-off-by: Filipe David Borba Manana Reviewed-by: David Sterba Signed-off-by: Josef Bacik Signed-off-by: Chris Mason --- fs/btrfs/send.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs/btrfs/send.c') diff --git a/fs/btrfs/send.c b/fs/btrfs/send.c index aa60cbe7066c..fc1f0abb8fe4 100644 --- a/fs/btrfs/send.c +++ b/fs/btrfs/send.c @@ -1336,7 +1336,7 @@ static int gen_unique_name(struct send_ctx *sctx, return -ENOMEM; while (1) { - len = snprintf(tmp, sizeof(tmp) - 1, "o%llu-%llu-%llu", + len = snprintf(tmp, sizeof(tmp), "o%llu-%llu-%llu", ino, gen, idx); if (len >= sizeof(tmp)) { /* should really not happen */ -- cgit v1.2.3 From 9f03740a956d7ac6a1b8f8c455da6fa5cae11c22 Mon Sep 17 00:00:00 2001 From: Filipe David Borba Manana Date: Wed, 22 Jan 2014 10:00:53 +0000 Subject: Btrfs: fix infinite path build loops in incremental send The send operation processes inodes by their ascending number, and assumes that any rename/move operation can be successfully performed (sent to the caller) once all previous inodes (those with a smaller inode number than the one we're currently processing) were processed. This is not true when an incremental send had to process an hierarchical change between 2 snapshots where the parent-children relationship between directory inodes was reversed - that is, parents became children and children became parents. This situation made the path building code go into an infinite loop, which kept allocating more and more memory that eventually lead to a krealloc warning being displayed in dmesg: WARNING: CPU: 1 PID: 5705 at mm/page_alloc.c:2477 __alloc_pages_nodemask+0x365/0xad0() Modules linked in: btrfs raid6_pq xor pci_stub vboxpci(O) vboxnetadp(O) vboxnetflt(O) vboxdrv(O) snd_hda_codec_hdmi snd_hda_codec_realtek joydev radeon snd_hda_intel snd_hda_codec snd_hwdep snd_seq_midi snd_pcm psmouse i915 snd_rawmidi serio_raw snd_seq_midi_event lpc_ich snd_seq snd_timer ttm snd_seq_device rfcomm drm_kms_helper parport_pc bnep bluetooth drm ppdev snd soundcore i2c_algo_bit snd_page_alloc binfmt_misc video lp parport r8169 mii hid_generic usbhid hid CPU: 1 PID: 5705 Comm: btrfs Tainted: G O 3.13.0-rc7-fdm-btrfs-next-18+ #3 Hardware name: To Be Filled By O.E.M. To Be Filled By O.E.M./Z77 Pro4, BIOS P1.50 09/04/2012 [ 5381.660441] 00000000000009ad ffff8806f6f2f4e8 ffffffff81777434 0000000000000007 [ 5381.660447] 0000000000000000 ffff8806f6f2f528 ffffffff8104a9ec ffff8807038f36f0 [ 5381.660452] 0000000000000000 0000000000000206 ffff8807038f2490 ffff8807038f36f0 [ 5381.660457] Call Trace: [ 5381.660464] [] dump_stack+0x4e/0x68 [ 5381.660471] [] warn_slowpath_common+0x8c/0xc0 [ 5381.660476] [] warn_slowpath_null+0x1a/0x20 [ 5381.660480] [] __alloc_pages_nodemask+0x365/0xad0 [ 5381.660487] [] ? local_clock+0x4f/0x60 [ 5381.660491] [] ? free_one_page+0x98/0x440 [ 5381.660495] [] ? local_clock+0x4f/0x60 [ 5381.660502] [] ? __get_free_pages+0x14/0x50 [ 5381.660508] [] ? trace_hardirqs_off_caller+0x28/0xd0 [ 5381.660515] [] alloc_pages_current+0x10f/0x1f0 [ 5381.660520] [] ? __get_free_pages+0x14/0x50 [ 5381.660524] [] __get_free_pages+0x14/0x50 [ 5381.660530] [] kmalloc_order_trace+0x3e/0x100 [ 5381.660536] [] __kmalloc_track_caller+0x220/0x230 [ 5381.660560] [] ? fs_path_ensure_buf.part.12+0x6b/0x200 [btrfs] [ 5381.660564] [] ? retint_restore_args+0xe/0xe [ 5381.660569] [] krealloc+0x6f/0xb0 [ 5381.660586] [] fs_path_ensure_buf.part.12+0x6b/0x200 [btrfs] [ 5381.660601] [] fs_path_prepare_for_add+0x98/0xb0 [btrfs] [ 5381.660615] [] fs_path_add_path+0x2c/0x60 [btrfs] [ 5381.660628] [] get_cur_path+0x7c/0x1c0 [btrfs] Even without this loop, the incremental send couldn't succeed, because it would attempt to send a rename/move operation for the lower inode before the highest inode number was renamed/move. This issue is easy to trigger with the following steps: $ mkfs.btrfs -f /dev/sdb3 $ mount /dev/sdb3 /mnt/btrfs $ mkdir -p /mnt/btrfs/a/b/c/d $ mkdir /mnt/btrfs/a/b/c2 $ btrfs subvol snapshot -r /mnt/btrfs /mnt/btrfs/snap1 $ mv /mnt/btrfs/a/b/c/d /mnt/btrfs/a/b/c2/d2 $ mv /mnt/btrfs/a/b/c /mnt/btrfs/a/b/c2/d2/cc $ btrfs subvol snapshot -r /mnt/btrfs /mnt/btrfs/snap2 $ btrfs send -p /mnt/btrfs/snap1 /mnt/btrfs/snap2 > /tmp/incremental.send The structure of the filesystem when the first snapshot is taken is: . (ino 256) |-- a (ino 257) |-- b (ino 258) |-- c (ino 259) | |-- d (ino 260) | |-- c2 (ino 261) And its structure when the second snapshot is taken is: . (ino 256) |-- a (ino 257) |-- b (ino 258) |-- c2 (ino 261) |-- d2 (ino 260) |-- cc (ino 259) Before the move/rename operation is performed for the inode 259, the move/rename for inode 260 must be performed, since 259 is now a child of 260. A test case for xfstests, with a more complex scenario, will follow soon. Signed-off-by: Filipe David Borba Manana Signed-off-by: Josef Bacik Signed-off-by: Chris Mason --- fs/btrfs/send.c | 539 +++++++++++++++++++++++++++++++++++++++++++++++++++++--- 1 file changed, 518 insertions(+), 21 deletions(-) (limited to 'fs/btrfs/send.c') diff --git a/fs/btrfs/send.c b/fs/btrfs/send.c index fc1f0abb8fe4..c96e879bcb16 100644 --- a/fs/btrfs/send.c +++ b/fs/btrfs/send.c @@ -121,6 +121,74 @@ struct send_ctx { int name_cache_size; char *read_buf; + + /* + * We process inodes by their increasing order, so if before an + * incremental send we reverse the parent/child relationship of + * directories such that a directory with a lower inode number was + * the parent of a directory with a higher inode number, and the one + * becoming the new parent got renamed too, we can't rename/move the + * directory with lower inode number when we finish processing it - we + * must process the directory with higher inode number first, then + * rename/move it and then rename/move the directory with lower inode + * number. Example follows. + * + * Tree state when the first send was performed: + * + * . + * |-- a (ino 257) + * |-- b (ino 258) + * | + * | + * |-- c (ino 259) + * | |-- d (ino 260) + * | + * |-- c2 (ino 261) + * + * Tree state when the second (incremental) send is performed: + * + * . + * |-- a (ino 257) + * |-- b (ino 258) + * |-- c2 (ino 261) + * |-- d2 (ino 260) + * |-- cc (ino 259) + * + * The sequence of steps that lead to the second state was: + * + * mv /a/b/c/d /a/b/c2/d2 + * mv /a/b/c /a/b/c2/d2/cc + * + * "c" has lower inode number, but we can't move it (2nd mv operation) + * before we move "d", which has higher inode number. + * + * So we just memorize which move/rename operations must be performed + * later when their respective parent is processed and moved/renamed. + */ + + /* Indexed by parent directory inode number. */ + struct rb_root pending_dir_moves; + + /* + * Reverse index, indexed by the inode number of a directory that + * is waiting for the move/rename of its immediate parent before its + * own move/rename can be performed. + */ + struct rb_root waiting_dir_moves; +}; + +struct pending_dir_move { + struct rb_node node; + struct list_head list; + u64 parent_ino; + u64 ino; + u64 gen; + struct list_head update_refs; +}; + +struct waiting_dir_move { + struct rb_node node; + u64 ino; }; struct name_cache_entry { @@ -144,6 +212,8 @@ struct name_cache_entry { char name[]; }; +static int is_waiting_for_move(struct send_ctx *sctx, u64 ino); + static int need_send_hole(struct send_ctx *sctx) { return (sctx->parent_root && !sctx->cur_inode_new && @@ -1897,6 +1967,7 @@ static void name_cache_free(struct send_ctx *sctx) */ static int __get_cur_name_and_parent(struct send_ctx *sctx, u64 ino, u64 gen, + int skip_name_cache, u64 *parent_ino, u64 *parent_gen, struct fs_path *dest) @@ -1906,6 +1977,8 @@ static int __get_cur_name_and_parent(struct send_ctx *sctx, struct btrfs_path *path = NULL; struct name_cache_entry *nce = NULL; + if (skip_name_cache) + goto get_ref; /* * First check if we already did a call to this function with the same * ino/gen. If yes, check if the cache entry is still up-to-date. If yes @@ -1950,11 +2023,12 @@ static int __get_cur_name_and_parent(struct send_ctx *sctx, goto out_cache; } +get_ref: /* * Depending on whether the inode was already processed or not, use * send_root or parent_root for ref lookup. */ - if (ino < sctx->send_progress) + if (ino < sctx->send_progress && !skip_name_cache) ret = get_first_ref(sctx->send_root, ino, parent_ino, parent_gen, dest); else @@ -1978,6 +2052,8 @@ static int __get_cur_name_and_parent(struct send_ctx *sctx, goto out; ret = 1; } + if (skip_name_cache) + goto out; out_cache: /* @@ -2045,6 +2121,9 @@ static int get_cur_path(struct send_ctx *sctx, u64 ino, u64 gen, u64 parent_inode = 0; u64 parent_gen = 0; int stop = 0; + u64 start_ino = ino; + u64 start_gen = gen; + int skip_name_cache = 0; name = fs_path_alloc(); if (!name) { @@ -2052,19 +2131,32 @@ static int get_cur_path(struct send_ctx *sctx, u64 ino, u64 gen, goto out; } + if (is_waiting_for_move(sctx, ino)) + skip_name_cache = 1; + +again: dest->reversed = 1; fs_path_reset(dest); while (!stop && ino != BTRFS_FIRST_FREE_OBJECTID) { fs_path_reset(name); - ret = __get_cur_name_and_parent(sctx, ino, gen, + ret = __get_cur_name_and_parent(sctx, ino, gen, skip_name_cache, &parent_inode, &parent_gen, name); if (ret < 0) goto out; if (ret) stop = 1; + if (!skip_name_cache && + is_waiting_for_move(sctx, parent_inode)) { + ino = start_ino; + gen = start_gen; + stop = 0; + skip_name_cache = 1; + goto again; + } + ret = fs_path_add_path(dest, name); if (ret < 0) goto out; @@ -2636,10 +2728,349 @@ out: return ret; } +static int is_waiting_for_move(struct send_ctx *sctx, u64 ino) +{ + struct rb_node *n = sctx->waiting_dir_moves.rb_node; + struct waiting_dir_move *entry; + + while (n) { + entry = rb_entry(n, struct waiting_dir_move, node); + if (ino < entry->ino) + n = n->rb_left; + else if (ino > entry->ino) + n = n->rb_right; + else + return 1; + } + return 0; +} + +static int add_waiting_dir_move(struct send_ctx *sctx, u64 ino) +{ + struct rb_node **p = &sctx->waiting_dir_moves.rb_node; + struct rb_node *parent = NULL; + struct waiting_dir_move *entry, *dm; + + dm = kmalloc(sizeof(*dm), GFP_NOFS); + if (!dm) + return -ENOMEM; + dm->ino = ino; + + while (*p) { + parent = *p; + entry = rb_entry(parent, struct waiting_dir_move, node); + if (ino < entry->ino) { + p = &(*p)->rb_left; + } else if (ino > entry->ino) { + p = &(*p)->rb_right; + } else { + kfree(dm); + return -EEXIST; + } + } + + rb_link_node(&dm->node, parent, p); + rb_insert_color(&dm->node, &sctx->waiting_dir_moves); + return 0; +} + +#ifdef CONFIG_BTRFS_ASSERT + +static int del_waiting_dir_move(struct send_ctx *sctx, u64 ino) +{ + struct rb_node *n = sctx->waiting_dir_moves.rb_node; + struct waiting_dir_move *entry; + + while (n) { + entry = rb_entry(n, struct waiting_dir_move, node); + if (ino < entry->ino) { + n = n->rb_left; + } else if (ino > entry->ino) { + n = n->rb_right; + } else { + rb_erase(&entry->node, &sctx->waiting_dir_moves); + kfree(entry); + return 0; + } + } + return -ENOENT; +} + +#endif + +static int add_pending_dir_move(struct send_ctx *sctx, u64 parent_ino) +{ + struct rb_node **p = &sctx->pending_dir_moves.rb_node; + struct rb_node *parent = NULL; + struct pending_dir_move *entry, *pm; + struct recorded_ref *cur; + int exists = 0; + int ret; + + pm = kmalloc(sizeof(*pm), GFP_NOFS); + if (!pm) + return -ENOMEM; + pm->parent_ino = parent_ino; + pm->ino = sctx->cur_ino; + pm->gen = sctx->cur_inode_gen; + INIT_LIST_HEAD(&pm->list); + INIT_LIST_HEAD(&pm->update_refs); + RB_CLEAR_NODE(&pm->node); + + while (*p) { + parent = *p; + entry = rb_entry(parent, struct pending_dir_move, node); + if (parent_ino < entry->parent_ino) { + p = &(*p)->rb_left; + } else if (parent_ino > entry->parent_ino) { + p = &(*p)->rb_right; + } else { + exists = 1; + break; + } + } + + list_for_each_entry(cur, &sctx->deleted_refs, list) { + ret = dup_ref(cur, &pm->update_refs); + if (ret < 0) + goto out; + } + list_for_each_entry(cur, &sctx->new_refs, list) { + ret = dup_ref(cur, &pm->update_refs); + if (ret < 0) + goto out; + } + + ret = add_waiting_dir_move(sctx, pm->ino); + if (ret) + goto out; + + if (exists) { + list_add_tail(&pm->list, &entry->list); + } else { + rb_link_node(&pm->node, parent, p); + rb_insert_color(&pm->node, &sctx->pending_dir_moves); + } + ret = 0; +out: + if (ret) { + __free_recorded_refs(&pm->update_refs); + kfree(pm); + } + return ret; +} + +static struct pending_dir_move *get_pending_dir_moves(struct send_ctx *sctx, + u64 parent_ino) +{ + struct rb_node *n = sctx->pending_dir_moves.rb_node; + struct pending_dir_move *entry; + + while (n) { + entry = rb_entry(n, struct pending_dir_move, node); + if (parent_ino < entry->parent_ino) + n = n->rb_left; + else if (parent_ino > entry->parent_ino) + n = n->rb_right; + else + return entry; + } + return NULL; +} + +static int apply_dir_move(struct send_ctx *sctx, struct pending_dir_move *pm) +{ + struct fs_path *from_path = NULL; + struct fs_path *to_path = NULL; + u64 orig_progress = sctx->send_progress; + struct recorded_ref *cur; + int ret; + + from_path = fs_path_alloc(); + if (!from_path) + return -ENOMEM; + + sctx->send_progress = pm->ino; + ret = get_cur_path(sctx, pm->ino, pm->gen, from_path); + if (ret < 0) + goto out; + + to_path = fs_path_alloc(); + if (!to_path) { + ret = -ENOMEM; + goto out; + } + + sctx->send_progress = sctx->cur_ino + 1; + ASSERT(del_waiting_dir_move(sctx, pm->ino) == 0); + ret = get_cur_path(sctx, pm->ino, pm->gen, to_path); + if (ret < 0) + goto out; + + ret = send_rename(sctx, from_path, to_path); + if (ret < 0) + goto out; + + ret = send_utimes(sctx, pm->ino, pm->gen); + if (ret < 0) + goto out; + + /* + * After rename/move, need to update the utimes of both new parent(s) + * and old parent(s). + */ + list_for_each_entry(cur, &pm->update_refs, list) { + ret = send_utimes(sctx, cur->dir, cur->dir_gen); + if (ret < 0) + goto out; + } + +out: + fs_path_free(from_path); + fs_path_free(to_path); + sctx->send_progress = orig_progress; + + return ret; +} + +static void free_pending_move(struct send_ctx *sctx, struct pending_dir_move *m) +{ + if (!list_empty(&m->list)) + list_del(&m->list); + if (!RB_EMPTY_NODE(&m->node)) + rb_erase(&m->node, &sctx->pending_dir_moves); + __free_recorded_refs(&m->update_refs); + kfree(m); +} + +static void tail_append_pending_moves(struct pending_dir_move *moves, + struct list_head *stack) +{ + if (list_empty(&moves->list)) { + list_add_tail(&moves->list, stack); + } else { + LIST_HEAD(list); + list_splice_init(&moves->list, &list); + list_add_tail(&moves->list, stack); + list_splice_tail(&list, stack); + } +} + +static int apply_children_dir_moves(struct send_ctx *sctx) +{ + struct pending_dir_move *pm; + struct list_head stack; + u64 parent_ino = sctx->cur_ino; + int ret = 0; + + pm = get_pending_dir_moves(sctx, parent_ino); + if (!pm) + return 0; + + INIT_LIST_HEAD(&stack); + tail_append_pending_moves(pm, &stack); + + while (!list_empty(&stack)) { + pm = list_first_entry(&stack, struct pending_dir_move, list); + parent_ino = pm->ino; + ret = apply_dir_move(sctx, pm); + free_pending_move(sctx, pm); + if (ret) + goto out; + pm = get_pending_dir_moves(sctx, parent_ino); + if (pm) + tail_append_pending_moves(pm, &stack); + } + return 0; + +out: + while (!list_empty(&stack)) { + pm = list_first_entry(&stack, struct pending_dir_move, list); + free_pending_move(sctx, pm); + } + return ret; +} + +static int wait_for_parent_move(struct send_ctx *sctx, + struct recorded_ref *parent_ref) +{ + int ret; + u64 ino = parent_ref->dir; + u64 parent_ino_before, parent_ino_after; + u64 new_gen, old_gen; + struct fs_path *path_before = NULL; + struct fs_path *path_after = NULL; + int len1, len2; + + if (parent_ref->dir <= sctx->cur_ino) + return 0; + + if (is_waiting_for_move(sctx, ino)) + return 1; + + ret = get_inode_info(sctx->parent_root, ino, NULL, &old_gen, + NULL, NULL, NULL, NULL); + if (ret == -ENOENT) + return 0; + else if (ret < 0) + return ret; + + ret = get_inode_info(sctx->send_root, ino, NULL, &new_gen, + NULL, NULL, NULL, NULL); + if (ret < 0) + return ret; + + if (new_gen != old_gen) + return 0; + + path_before = fs_path_alloc(); + if (!path_before) + return -ENOMEM; + + ret = get_first_ref(sctx->parent_root, ino, &parent_ino_before, + NULL, path_before); + if (ret == -ENOENT) { + ret = 0; + goto out; + } else if (ret < 0) { + goto out; + } + + path_after = fs_path_alloc(); + if (!path_after) { + ret = -ENOMEM; + goto out; + } + + ret = get_first_ref(sctx->send_root, ino, &parent_ino_after, + NULL, path_after); + if (ret == -ENOENT) { + ret = 0; + goto out; + } else if (ret < 0) { + goto out; + } + + len1 = fs_path_len(path_before); + len2 = fs_path_len(path_after); + if ((parent_ino_before != parent_ino_after) && (len1 != len2 || + memcmp(path_before->start, path_after->start, len1))) { + ret = 1; + goto out; + } + ret = 0; + +out: + fs_path_free(path_before); + fs_path_free(path_after); + + return ret; +} + /* * This does all the move/link/unlink/rmdir magic. */ -static int process_recorded_refs(struct send_ctx *sctx) +static int process_recorded_refs(struct send_ctx *sctx, int *pending_move) { int ret = 0; struct recorded_ref *cur; @@ -2788,11 +3219,17 @@ verbose_printk("btrfs: process_recorded_refs %llu\n", sctx->cur_ino); * dirs, we always have one new and one deleted * ref. The deleted ref is ignored later. */ - ret = send_rename(sctx, valid_path, - cur->full_path); - if (ret < 0) - goto out; - ret = fs_path_copy(valid_path, cur->full_path); + if (wait_for_parent_move(sctx, cur)) { + ret = add_pending_dir_move(sctx, + cur->dir); + *pending_move = 1; + } else { + ret = send_rename(sctx, valid_path, + cur->full_path); + if (!ret) + ret = fs_path_copy(valid_path, + cur->full_path); + } if (ret < 0) goto out; } else { @@ -3161,6 +3598,7 @@ static int process_all_refs(struct send_ctx *sctx, struct extent_buffer *eb; int slot; iterate_inode_ref_t cb; + int pending_move = 0; path = alloc_path_for_send(); if (!path) @@ -3204,7 +3642,9 @@ static int process_all_refs(struct send_ctx *sctx, } btrfs_release_path(path); - ret = process_recorded_refs(sctx); + ret = process_recorded_refs(sctx, &pending_move); + /* Only applicable to an incremental send. */ + ASSERT(pending_move == 0); out: btrfs_free_path(path); @@ -4165,7 +4605,9 @@ out: return ret; } -static int process_recorded_refs_if_needed(struct send_ctx *sctx, int at_end) +static int process_recorded_refs_if_needed(struct send_ctx *sctx, int at_end, + int *pending_move, + int *refs_processed) { int ret = 0; @@ -4177,17 +4619,11 @@ static int process_recorded_refs_if_needed(struct send_ctx *sctx, int at_end) if (list_empty(&sctx->new_refs) && list_empty(&sctx->deleted_refs)) goto out; - ret = process_recorded_refs(sctx); + ret = process_recorded_refs(sctx, pending_move); if (ret < 0) goto out; - /* - * We have processed the refs and thus need to advance send_progress. - * Now, calls to get_cur_xxx will take the updated refs of the current - * inode into account. - */ - sctx->send_progress = sctx->cur_ino + 1; - + *refs_processed = 1; out: return ret; } @@ -4203,11 +4639,29 @@ static int finish_inode_if_needed(struct send_ctx *sctx, int at_end) u64 right_gid; int need_chmod = 0; int need_chown = 0; + int pending_move = 0; + int refs_processed = 0; - ret = process_recorded_refs_if_needed(sctx, at_end); + ret = process_recorded_refs_if_needed(sctx, at_end, &pending_move, + &refs_processed); if (ret < 0) goto out; + /* + * We have processed the refs and thus need to advance send_progress. + * Now, calls to get_cur_xxx will take the updated refs of the current + * inode into account. + * + * On the other hand, if our current inode is a directory and couldn't + * be moved/renamed because its parent was renamed/moved too and it has + * a higher inode number, we can only move/rename our current inode + * after we moved/renamed its parent. Therefore in this case operate on + * the old path (pre move/rename) of our current inode, and the + * move/rename will be performed later. + */ + if (refs_processed && !pending_move) + sctx->send_progress = sctx->cur_ino + 1; + if (sctx->cur_ino == 0 || sctx->cur_inode_deleted) goto out; if (!at_end && sctx->cmp_key->objectid == sctx->cur_ino) @@ -4269,9 +4723,21 @@ static int finish_inode_if_needed(struct send_ctx *sctx, int at_end) } /* - * Need to send that every time, no matter if it actually changed - * between the two trees as we have done changes to the inode before. + * If other directory inodes depended on our current directory + * inode's move/rename, now do their move/rename operations. */ + if (!is_waiting_for_move(sctx, sctx->cur_ino)) { + ret = apply_children_dir_moves(sctx); + if (ret) + goto out; + } + + /* + * Need to send that every time, no matter if it actually + * changed between the two trees as we have done changes to + * the inode before. + */ + sctx->send_progress = sctx->cur_ino + 1; ret = send_utimes(sctx, sctx->cur_ino, sctx->cur_inode_gen); if (ret < 0) goto out; @@ -4839,6 +5305,9 @@ long btrfs_ioctl_send(struct file *mnt_file, void __user *arg_) goto out; } + sctx->pending_dir_moves = RB_ROOT; + sctx->waiting_dir_moves = RB_ROOT; + sctx->clone_roots = vzalloc(sizeof(struct clone_root) * (arg->clone_sources_count + 1)); if (!sctx->clone_roots) { @@ -4947,6 +5416,34 @@ long btrfs_ioctl_send(struct file *mnt_file, void __user *arg_) } out: + WARN_ON(sctx && !ret && !RB_EMPTY_ROOT(&sctx->pending_dir_moves)); + while (sctx && !RB_EMPTY_ROOT(&sctx->pending_dir_moves)) { + struct rb_node *n; + struct pending_dir_move *pm; + + n = rb_first(&sctx->pending_dir_moves); + pm = rb_entry(n, struct pending_dir_move, node); + while (!list_empty(&pm->list)) { + struct pending_dir_move *pm2; + + pm2 = list_first_entry(&pm->list, + struct pending_dir_move, list); + free_pending_move(sctx, pm2); + } + free_pending_move(sctx, pm); + } + + WARN_ON(sctx && !ret && !RB_EMPTY_ROOT(&sctx->waiting_dir_moves)); + while (sctx && !RB_EMPTY_ROOT(&sctx->waiting_dir_moves)) { + struct rb_node *n; + struct waiting_dir_move *dm; + + n = rb_first(&sctx->waiting_dir_moves); + dm = rb_entry(n, struct waiting_dir_move, node); + rb_erase(&dm->node, &sctx->waiting_dir_moves); + kfree(dm); + } + if (sort_clone_roots) { for (i = 0; i < sctx->clone_roots_cnt; i++) btrfs_root_dec_send_in_progress( -- cgit v1.2.3 From 7fdd29d02e0ab595a857fe9c7b71e752ff665372 Mon Sep 17 00:00:00 2001 From: Filipe David Borba Manana Date: Fri, 24 Jan 2014 17:42:09 +0000 Subject: Btrfs: make send's file extent item search more efficient Instead of looking for a file extent item, process it, release the path and do a btree search for the next file extent item, just process all file extent items in a leaf without intermediate btree searches. This way we save cpu and we're not blocking other tasks or affecting concurrency on the btree, because send's paths use the commit root and skip btree node/leaf locking. Signed-off-by: Filipe David Borba Manana Signed-off-by: Josef Bacik Signed-off-by: Chris Mason --- fs/btrfs/send.c | 27 +++++++++++++++++---------- 1 file changed, 17 insertions(+), 10 deletions(-) (limited to 'fs/btrfs/send.c') diff --git a/fs/btrfs/send.c b/fs/btrfs/send.c index c96e879bcb16..4d31f72bdf41 100644 --- a/fs/btrfs/send.c +++ b/fs/btrfs/send.c @@ -4573,17 +4573,25 @@ static int process_all_extents(struct send_ctx *sctx) key.objectid = sctx->cmp_key->objectid; key.type = BTRFS_EXTENT_DATA_KEY; key.offset = 0; - while (1) { - ret = btrfs_search_slot_for_read(root, &key, path, 1, 0); - if (ret < 0) - goto out; - if (ret) { - ret = 0; - goto out; - } + ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); + if (ret < 0) + goto out; + while (1) { eb = path->nodes[0]; slot = path->slots[0]; + + if (slot >= btrfs_header_nritems(eb)) { + ret = btrfs_next_leaf(root, path); + if (ret < 0) { + goto out; + } else if (ret > 0) { + ret = 0; + break; + } + continue; + } + btrfs_item_key_to_cpu(eb, &found_key, slot); if (found_key.objectid != key.objectid || @@ -4596,8 +4604,7 @@ static int process_all_extents(struct send_ctx *sctx) if (ret < 0) goto out; - btrfs_release_path(path); - key.offset = found_key.offset + 1; + path->slots[0]++; } out: -- cgit v1.2.3 From bf54f412f0624786ac8a115110b5203430a9eebb Mon Sep 17 00:00:00 2001 From: Filipe David Borba Manana Date: Tue, 28 Jan 2014 01:38:06 +0000 Subject: Btrfs: fix send file hole detection leading to data corruption There was a case where file hole detection was incorrect and it would cause an incremental send to override a section of a file with zeroes. This happened in the case where between the last leaf we processed which contained a file extent item for our current inode and the leaf we're currently are at (and has a file extent item for our current inode) there are only leafs containing exclusively file extent items for our current inode, and none of them was updated since the previous send operation. The file hole detection code would incorrectly consider the file range covered by these leafs as a hole. A test case for xfstests follows soon. Signed-off-by: Filipe David Borba Manana Signed-off-by: Josef Bacik Signed-off-by: Chris Mason --- fs/btrfs/send.c | 15 +++++++++++++++ 1 file changed, 15 insertions(+) (limited to 'fs/btrfs/send.c') diff --git a/fs/btrfs/send.c b/fs/btrfs/send.c index 4d31f72bdf41..85259cba784a 100644 --- a/fs/btrfs/send.c +++ b/fs/btrfs/send.c @@ -4489,6 +4489,21 @@ static int maybe_send_hole(struct send_ctx *sctx, struct btrfs_path *path, extent_end = key->offset + btrfs_file_extent_num_bytes(path->nodes[0], fi); } + + if (path->slots[0] == 0 && + sctx->cur_inode_last_extent < key->offset) { + /* + * We might have skipped entire leafs that contained only + * file extent items for our current inode. These leafs have + * a generation number smaller (older) than the one in the + * current leaf and the leaf our last extent came from, and + * are located between these 2 leafs. + */ + ret = get_last_extent(sctx, key->offset - 1); + if (ret) + return ret; + } + if (sctx->cur_inode_last_extent < key->offset) ret = send_hole(sctx, key->offset); sctx->cur_inode_last_extent = extent_end; -- cgit v1.2.3 From 514ac8ad8793a097c0c9d89202c642479d6dfa34 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Fri, 3 Jan 2014 21:07:00 -0800 Subject: Btrfs: don't use ram_bytes for uncompressed inline items If we truncate an uncompressed inline item, ram_bytes isn't updated to reflect the new size. The fixe uses the size directly from the item header when reading uncompressed inlines, and also fixes truncate to update the size as it goes. Reported-by: Jens Axboe Signed-off-by: Chris Mason CC: stable@vger.kernel.org --- fs/btrfs/send.c | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) (limited to 'fs/btrfs/send.c') diff --git a/fs/btrfs/send.c b/fs/btrfs/send.c index 85259cba784a..730dce395858 100644 --- a/fs/btrfs/send.c +++ b/fs/btrfs/send.c @@ -1377,7 +1377,7 @@ static int read_symlink(struct btrfs_root *root, BUG_ON(compression); off = btrfs_file_extent_inline_start(ei); - len = btrfs_file_extent_inline_len(path->nodes[0], ei); + len = btrfs_file_extent_inline_len(path->nodes[0], path->slots[0], ei); ret = fs_path_add_from_extent_buffer(dest, path->nodes[0], off, len); @@ -4207,7 +4207,8 @@ static int send_write_or_clone(struct send_ctx *sctx, struct btrfs_file_extent_item); type = btrfs_file_extent_type(path->nodes[0], ei); if (type == BTRFS_FILE_EXTENT_INLINE) { - len = btrfs_file_extent_inline_len(path->nodes[0], ei); + len = btrfs_file_extent_inline_len(path->nodes[0], + path->slots[0], ei); /* * it is possible the inline item won't cover the whole page, * but there may be items after this page. Make @@ -4448,7 +4449,8 @@ static int get_last_extent(struct send_ctx *sctx, u64 offset) struct btrfs_file_extent_item); type = btrfs_file_extent_type(path->nodes[0], fi); if (type == BTRFS_FILE_EXTENT_INLINE) { - u64 size = btrfs_file_extent_inline_len(path->nodes[0], fi); + u64 size = btrfs_file_extent_inline_len(path->nodes[0], + path->slots[0], fi); extent_end = ALIGN(key.offset + size, sctx->send_root->sectorsize); } else { @@ -4482,7 +4484,8 @@ static int maybe_send_hole(struct send_ctx *sctx, struct btrfs_path *path, struct btrfs_file_extent_item); type = btrfs_file_extent_type(path->nodes[0], fi); if (type == BTRFS_FILE_EXTENT_INLINE) { - u64 size = btrfs_file_extent_inline_len(path->nodes[0], fi); + u64 size = btrfs_file_extent_inline_len(path->nodes[0], + path->slots[0], fi); extent_end = ALIGN(key->offset + size, sctx->send_root->sectorsize); } else { -- cgit v1.2.3