From 1c1c8747cd0528fe1d225badf25bf5346d799ea3 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Wed, 11 Dec 2013 23:07:51 -0500 Subject: btrfs: sanitize BTRFS_IOC_FILE_EXTENT_SAME * don't assume that ->dest_count won't change between copy_from_user() and memdup_user() * use fdget instead of fget * don't bother comparing superblocks when we'd already compared vfsmounts * get rid of excessive goto * use file_inode() instead of open-coding the sucker Signed-off-by: Al Viro --- fs/btrfs/ioctl.c | 70 +++++++++++++++++++------------------------------------- 1 file changed, 24 insertions(+), 46 deletions(-) (limited to 'fs/btrfs/ioctl.c') diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c index 21da5762b0b1..ad27dcea319c 100644 --- a/fs/btrfs/ioctl.c +++ b/fs/btrfs/ioctl.c @@ -2686,14 +2686,11 @@ out_unlock: #define BTRFS_MAX_DEDUPE_LEN (16 * 1024 * 1024) static long btrfs_ioctl_file_extent_same(struct file *file, - void __user *argp) + struct btrfs_ioctl_same_args __user *argp) { - struct btrfs_ioctl_same_args tmp; struct btrfs_ioctl_same_args *same; struct btrfs_ioctl_same_extent_info *info; - struct inode *src = file->f_dentry->d_inode; - struct file *dst_file = NULL; - struct inode *dst; + struct inode *src = file_inode(file); u64 off; u64 len; int i; @@ -2701,6 +2698,7 @@ static long btrfs_ioctl_file_extent_same(struct file *file, unsigned long size; u64 bs = BTRFS_I(src)->root->fs_info->sb->s_blocksize; bool is_admin = capable(CAP_SYS_ADMIN); + u16 count; if (!(file->f_mode & FMODE_READ)) return -EINVAL; @@ -2709,17 +2707,14 @@ static long btrfs_ioctl_file_extent_same(struct file *file, if (ret) return ret; - if (copy_from_user(&tmp, - (struct btrfs_ioctl_same_args __user *)argp, - sizeof(tmp))) { + if (get_user(count, &argp->dest_count)) { ret = -EFAULT; goto out; } - size = sizeof(tmp) + - tmp.dest_count * sizeof(struct btrfs_ioctl_same_extent_info); + size = offsetof(struct btrfs_ioctl_same_args __user, info[count]); - same = memdup_user((struct btrfs_ioctl_same_args __user *)argp, size); + same = memdup_user(argp, size); if (IS_ERR(same)) { ret = PTR_ERR(same); @@ -2756,52 +2751,35 @@ static long btrfs_ioctl_file_extent_same(struct file *file, goto out; /* pre-format output fields to sane values */ - for (i = 0; i < same->dest_count; i++) { + for (i = 0; i < count; i++) { same->info[i].bytes_deduped = 0ULL; same->info[i].status = 0; } - ret = 0; - for (i = 0; i < same->dest_count; i++) { - info = &same->info[i]; - - dst_file = fget(info->fd); - if (!dst_file) { + for (i = 0, info = same->info; i < count; i++, info++) { + struct inode *dst; + struct fd dst_file = fdget(info->fd); + if (!dst_file.file) { info->status = -EBADF; - goto next; + continue; } + dst = file_inode(dst_file.file); - if (!(is_admin || (dst_file->f_mode & FMODE_WRITE))) { + if (!(is_admin || (dst_file.file->f_mode & FMODE_WRITE))) { info->status = -EINVAL; - goto next; - } - - info->status = -EXDEV; - if (file->f_path.mnt != dst_file->f_path.mnt) - goto next; - - dst = dst_file->f_dentry->d_inode; - if (src->i_sb != dst->i_sb) - goto next; - - if (S_ISDIR(dst->i_mode)) { + } else if (file->f_path.mnt != dst_file.file->f_path.mnt) { + info->status = -EXDEV; + } else if (S_ISDIR(dst->i_mode)) { info->status = -EISDIR; - goto next; - } - - if (!S_ISREG(dst->i_mode)) { + } else if (!S_ISREG(dst->i_mode)) { info->status = -EACCES; - goto next; + } else { + info->status = btrfs_extent_same(src, off, len, dst, + info->logical_offset); + if (info->status == 0) + info->bytes_deduped += len; } - - info->status = btrfs_extent_same(src, off, len, dst, - info->logical_offset); - if (info->status == 0) - info->bytes_deduped += len; - -next: - if (dst_file) - fput(dst_file); + fdput(dst_file); } ret = copy_to_user(argp, same, size); -- cgit v1.2.3 From 2eaa055fab4e3127c9f572fda1b710cbb2acdf1c Mon Sep 17 00:00:00 2001 From: Jeff Mahoney Date: Fri, 15 Nov 2013 15:33:55 -0500 Subject: btrfs: add ioctls to query/change feature bits online There are some feature bits that require no offline setup and can be enabled online. I've only reviewed extended irefs, but there will probably be more. We introduce three new ioctls: - BTRFS_IOC_GET_SUPPORTED_FEATURES: query the kernel for supported features. - BTRFS_IOC_GET_FEATURES: query the kernel for enabled features on a per-fs basis, as well as querying for which features are changeable with mounted. - BTRFS_IOC_SET_FEATURES: change features on a per-fs basis. We introduce two new masks per feature set (_SAFE_SET and _SAFE_CLEAR) that allow us to define which features are safe to change at runtime. The failure modes for BTRFS_IOC_SET_FEATURES are as follows: - Enabling a completely unsupported feature: warns and returns -ENOTSUPP - Enabling a feature that can only be done offline: warns and returns -EPERM Signed-off-by: Jeff Mahoney Signed-off-by: Josef Bacik Signed-off-by: Chris Mason --- fs/btrfs/ioctl.c | 142 +++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 142 insertions(+) (limited to 'fs/btrfs/ioctl.c') diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c index 21da5762b0b1..d4e105b54091 100644 --- a/fs/btrfs/ioctl.c +++ b/fs/btrfs/ioctl.c @@ -4480,6 +4480,142 @@ out_unlock: return ret; } +#define INIT_FEATURE_FLAGS(suffix) \ + { .compat_flags = BTRFS_FEATURE_COMPAT_##suffix, \ + .compat_ro_flags = BTRFS_FEATURE_COMPAT_RO_##suffix, \ + .incompat_flags = BTRFS_FEATURE_INCOMPAT_##suffix } + +static int btrfs_ioctl_get_supported_features(struct file *file, + void __user *arg) +{ + static struct btrfs_ioctl_feature_flags features[3] = { + INIT_FEATURE_FLAGS(SUPP), + INIT_FEATURE_FLAGS(SAFE_SET), + INIT_FEATURE_FLAGS(SAFE_CLEAR) + }; + + if (copy_to_user(arg, &features, sizeof(features))) + return -EFAULT; + + return 0; +} + +static int btrfs_ioctl_get_features(struct file *file, void __user *arg) +{ + struct btrfs_root *root = BTRFS_I(file_inode(file))->root; + struct btrfs_super_block *super_block = root->fs_info->super_copy; + struct btrfs_ioctl_feature_flags features; + + features.compat_flags = btrfs_super_compat_flags(super_block); + features.compat_ro_flags = btrfs_super_compat_ro_flags(super_block); + features.incompat_flags = btrfs_super_incompat_flags(super_block); + + if (copy_to_user(arg, &features, sizeof(features))) + return -EFAULT; + + return 0; +} + +static int check_feature_bits(struct btrfs_root *root, const char *type, + u64 change_mask, u64 flags, u64 supported_flags, + u64 safe_set, u64 safe_clear) +{ + u64 disallowed, unsupported; + u64 set_mask = flags & change_mask; + u64 clear_mask = ~flags & change_mask; + + unsupported = set_mask & ~supported_flags; + if (unsupported) { + btrfs_warn(root->fs_info, + "this kernel does not support %s bits 0x%llx", + type, unsupported); + return -EOPNOTSUPP; + } + + disallowed = set_mask & ~safe_set; + if (disallowed) { + btrfs_warn(root->fs_info, + "can't set %s bits 0x%llx while mounted", + type, disallowed); + return -EPERM; + } + + disallowed = clear_mask & ~safe_clear; + if (disallowed) { + btrfs_warn(root->fs_info, + "can't clear %s bits 0x%llx while mounted", + type, disallowed); + return -EPERM; + } + + return 0; +} + +#define check_feature(root, change_mask, flags, mask_base) \ +check_feature_bits(root, # mask_base, change_mask, flags, \ + BTRFS_FEATURE_ ## mask_base ## _SUPP, \ + BTRFS_FEATURE_ ## mask_base ## _SAFE_SET, \ + BTRFS_FEATURE_ ## mask_base ## _SAFE_CLEAR) + +static int btrfs_ioctl_set_features(struct file *file, void __user *arg) +{ + struct btrfs_root *root = BTRFS_I(file_inode(file))->root; + struct btrfs_super_block *super_block = root->fs_info->super_copy; + struct btrfs_ioctl_feature_flags flags[2]; + struct btrfs_trans_handle *trans; + u64 newflags; + int ret; + + if (!capable(CAP_SYS_ADMIN)) + return -EPERM; + + if (copy_from_user(flags, arg, sizeof(flags))) + return -EFAULT; + + /* Nothing to do */ + if (!flags[0].compat_flags && !flags[0].compat_ro_flags && + !flags[0].incompat_flags) + return 0; + + ret = check_feature(root, flags[0].compat_flags, + flags[1].compat_flags, COMPAT); + if (ret) + return ret; + + ret = check_feature(root, flags[0].compat_ro_flags, + flags[1].compat_ro_flags, COMPAT_RO); + if (ret) + return ret; + + ret = check_feature(root, flags[0].incompat_flags, + flags[1].incompat_flags, INCOMPAT); + if (ret) + return ret; + + trans = btrfs_start_transaction(root, 1); + if (IS_ERR(trans)) + return PTR_ERR(trans); + + spin_lock(&root->fs_info->super_lock); + newflags = btrfs_super_compat_flags(super_block); + newflags |= flags[0].compat_flags & flags[1].compat_flags; + newflags &= ~(flags[0].compat_flags & ~flags[1].compat_flags); + btrfs_set_super_compat_flags(super_block, newflags); + + newflags = btrfs_super_compat_ro_flags(super_block); + newflags |= flags[0].compat_ro_flags & flags[1].compat_ro_flags; + newflags &= ~(flags[0].compat_ro_flags & ~flags[1].compat_ro_flags); + btrfs_set_super_compat_ro_flags(super_block, newflags); + + newflags = btrfs_super_incompat_flags(super_block); + newflags |= flags[0].incompat_flags & flags[1].incompat_flags; + newflags &= ~(flags[0].incompat_flags & ~flags[1].incompat_flags); + btrfs_set_super_incompat_flags(super_block, newflags); + spin_unlock(&root->fs_info->super_lock); + + return btrfs_end_transaction(trans, root); +} + long btrfs_ioctl(struct file *file, unsigned int cmd, unsigned long arg) { @@ -4598,6 +4734,12 @@ long btrfs_ioctl(struct file *file, unsigned int return btrfs_ioctl_set_fslabel(file, argp); case BTRFS_IOC_FILE_EXTENT_SAME: return btrfs_ioctl_file_extent_same(file, argp); + case BTRFS_IOC_GET_SUPPORTED_FEATURES: + return btrfs_ioctl_get_supported_features(file, argp); + case BTRFS_IOC_GET_FEATURES: + return btrfs_ioctl_get_features(file, argp); + case BTRFS_IOC_SET_FEATURES: + return btrfs_ioctl_set_features(file, argp); } return -ENOTTY; -- cgit v1.2.3 From 3b02a68a636400590dd6831a5fc046f0a7909a77 Mon Sep 17 00:00:00 2001 From: Jeff Mahoney Date: Fri, 1 Nov 2013 13:07:02 -0400 Subject: btrfs: use feature attribute names to print better error messages Now that we have the feature name strings available in the kernel via the sysfs attributes, we can use them for printing better failure messages from the ioctl path. Signed-off-by: Jeff Mahoney Signed-off-by: Josef Bacik Signed-off-by: Chris Mason --- fs/btrfs/ioctl.c | 35 ++++++++++++++++++++++++++++++----- 1 file changed, 30 insertions(+), 5 deletions(-) (limited to 'fs/btrfs/ioctl.c') diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c index d4e105b54091..93b01fe6c731 100644 --- a/fs/btrfs/ioctl.c +++ b/fs/btrfs/ioctl.c @@ -56,6 +56,7 @@ #include "rcu-string.h" #include "send.h" #include "dev-replace.h" +#include "sysfs.h" static int btrfs_clone(struct inode *src, struct inode *inode, u64 off, u64 olen, u64 olen_aligned, u64 destoff); @@ -4516,17 +4517,27 @@ static int btrfs_ioctl_get_features(struct file *file, void __user *arg) return 0; } -static int check_feature_bits(struct btrfs_root *root, const char *type, +static int check_feature_bits(struct btrfs_root *root, + enum btrfs_feature_set set, u64 change_mask, u64 flags, u64 supported_flags, u64 safe_set, u64 safe_clear) { + const char *type = btrfs_feature_set_names[set]; + char *names; u64 disallowed, unsupported; u64 set_mask = flags & change_mask; u64 clear_mask = ~flags & change_mask; unsupported = set_mask & ~supported_flags; if (unsupported) { - btrfs_warn(root->fs_info, + names = btrfs_printable_features(set, unsupported); + if (names) { + btrfs_warn(root->fs_info, + "this kernel does not support the %s feature bit%s", + names, strchr(names, ',') ? "s" : ""); + kfree(names); + } else + btrfs_warn(root->fs_info, "this kernel does not support %s bits 0x%llx", type, unsupported); return -EOPNOTSUPP; @@ -4534,7 +4545,14 @@ static int check_feature_bits(struct btrfs_root *root, const char *type, disallowed = set_mask & ~safe_set; if (disallowed) { - btrfs_warn(root->fs_info, + names = btrfs_printable_features(set, disallowed); + if (names) { + btrfs_warn(root->fs_info, + "can't set the %s feature bit%s while mounted", + names, strchr(names, ',') ? "s" : ""); + kfree(names); + } else + btrfs_warn(root->fs_info, "can't set %s bits 0x%llx while mounted", type, disallowed); return -EPERM; @@ -4542,7 +4560,14 @@ static int check_feature_bits(struct btrfs_root *root, const char *type, disallowed = clear_mask & ~safe_clear; if (disallowed) { - btrfs_warn(root->fs_info, + names = btrfs_printable_features(set, disallowed); + if (names) { + btrfs_warn(root->fs_info, + "can't clear the %s feature bit%s while mounted", + names, strchr(names, ',') ? "s" : ""); + kfree(names); + } else + btrfs_warn(root->fs_info, "can't clear %s bits 0x%llx while mounted", type, disallowed); return -EPERM; @@ -4552,7 +4577,7 @@ static int check_feature_bits(struct btrfs_root *root, const char *type, } #define check_feature(root, change_mask, flags, mask_base) \ -check_feature_bits(root, # mask_base, change_mask, flags, \ +check_feature_bits(root, FEAT_##mask_base, change_mask, flags, \ BTRFS_FEATURE_ ## mask_base ## _SUPP, \ BTRFS_FEATURE_ ## mask_base ## _SAFE_SET, \ BTRFS_FEATURE_ ## mask_base ## _SAFE_CLEAR) -- cgit v1.2.3 From 01e219e8069516cdb98594d417b8bb8d906ed30d Mon Sep 17 00:00:00 2001 From: Jeff Mahoney Date: Fri, 1 Nov 2013 13:07:03 -0400 Subject: btrfs: add ioctl to export size of global metadata reservation btrfs filesystem df output will show the size of the metadata space and how much of it is used, and the user assumes that the difference is all usable space. Since that's not actually the case due to the global metadata reservation, we should provide the full picture to the user. This patch adds an ioctl that exports the size of the global metadata reservation so that btrfs filesystem df can report it. Signed-off-by: Jeff Mahoney Signed-off-by: Josef Bacik Signed-off-by: Chris Mason --- fs/btrfs/ioctl.c | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) (limited to 'fs/btrfs/ioctl.c') diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c index 93b01fe6c731..60d3d37ef9a2 100644 --- a/fs/btrfs/ioctl.c +++ b/fs/btrfs/ioctl.c @@ -3498,6 +3498,20 @@ out: return ret; } +static long btrfs_ioctl_global_rsv(struct btrfs_root *root, void __user *arg) +{ + struct btrfs_block_rsv *block_rsv = &root->fs_info->global_block_rsv; + u64 reserved; + + spin_lock(&block_rsv->lock); + reserved = block_rsv->reserved; + spin_unlock(&block_rsv->lock); + + if (arg && copy_to_user(arg, &reserved, sizeof(reserved))) + return -EFAULT; + return 0; +} + /* * there are many ways the trans_start and trans_end ioctls can lead * to deadlocks. They should only be used by applications that @@ -4706,6 +4720,8 @@ long btrfs_ioctl(struct file *file, unsigned int return btrfs_ioctl_logical_to_ino(root, argp); case BTRFS_IOC_SPACE_INFO: return btrfs_ioctl_space_info(root, argp); + case BTRFS_IOC_GLOBAL_RSV: + return btrfs_ioctl_global_rsv(root, argp); case BTRFS_IOC_SYNC: { int ret; -- cgit v1.2.3 From 5662344b3c0d9ddd9afd48716d795166f982d5e2 Mon Sep 17 00:00:00 2001 From: Tsutomu Itoh Date: Fri, 13 Dec 2013 09:51:42 +0900 Subject: Btrfs: fix error check of btrfs_lookup_dentry() Clean up btrfs_lookup_dentry() to never return NULL, but PTR_ERR(-ENOENT) instead. This keeps the return value convention consistent. Callers who use btrfs_lookup_dentry() require a trivial update. create_snapshot() in particular looks like it can also lose a BUG_ON(!inode) which is not really needed - there seems less harm in returning ENOENT to userspace at that point in the stack than there is to crash the machine. Signed-off-by: Tsutomu Itoh Signed-off-by: Josef Bacik Signed-off-by: Chris Mason --- fs/btrfs/ioctl.c | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) (limited to 'fs/btrfs/ioctl.c') diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c index 60d3d37ef9a2..89c2f6169b92 100644 --- a/fs/btrfs/ioctl.c +++ b/fs/btrfs/ioctl.c @@ -393,6 +393,7 @@ static noinline int create_subvol(struct inode *dir, struct btrfs_root *new_root; struct btrfs_block_rsv block_rsv; struct timespec cur_time = CURRENT_TIME; + struct inode *inode; int ret; int err; u64 objectid; @@ -554,8 +555,14 @@ fail: if (err && !ret) ret = err; - if (!ret) - d_instantiate(dentry, btrfs_lookup_dentry(dir, dentry)); + if (!ret) { + inode = btrfs_lookup_dentry(dir, dentry); + if (IS_ERR(inode)) { + ret = PTR_ERR(inode); + goto out; + } + d_instantiate(dentry, inode); + } out: btrfs_subvolume_release_metadata(root, &block_rsv, qgroup_reserved); return ret; @@ -643,7 +650,7 @@ static int create_snapshot(struct btrfs_root *root, struct inode *dir, ret = PTR_ERR(inode); goto fail; } - BUG_ON(!inode); + d_instantiate(dentry, inode); ret = 0; fail: -- cgit v1.2.3 From 2c68653787f91c62f8891209dc1f617088c822e4 Mon Sep 17 00:00:00 2001 From: David Sterba Date: Mon, 16 Dec 2013 17:34:17 +0100 Subject: btrfs: Check read-only status of roots during send All the subvolues that are involved in send must be read-only during the whole operation. The ioctl SUBVOL_SETFLAGS could be used to change the status to read-write and the result of send stream is undefined if the data change unexpectedly. Fix that by adding a refcount for all involved roots and verify that there's no send in progress during SUBVOL_SETFLAGS ioctl call that does read-only -> read-write transition. We need refcounts because there are no restrictions on number of send parallel operations currently run on a single subvolume, be it source, parent or one of the multiple clone sources. Kernel is silent when the RO checks fail and returns EPERM. The same set of checks is done already in userspace before send starts. Signed-off-by: David Sterba Signed-off-by: Josef Bacik Signed-off-by: Chris Mason --- fs/btrfs/ioctl.c | 22 +++++++++++++++++++--- 1 file changed, 19 insertions(+), 3 deletions(-) (limited to 'fs/btrfs/ioctl.c') diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c index 89c2f6169b92..c0dc05467ce8 100644 --- a/fs/btrfs/ioctl.c +++ b/fs/btrfs/ioctl.c @@ -1706,12 +1706,28 @@ static noinline int btrfs_ioctl_subvol_setflags(struct file *file, goto out_drop_sem; root_flags = btrfs_root_flags(&root->root_item); - if (flags & BTRFS_SUBVOL_RDONLY) + if (flags & BTRFS_SUBVOL_RDONLY) { btrfs_set_root_flags(&root->root_item, root_flags | BTRFS_ROOT_SUBVOL_RDONLY); - else - btrfs_set_root_flags(&root->root_item, + } else { + /* + * Block RO -> RW transition if this subvolume is involved in + * send + */ + spin_lock(&root->root_item_lock); + if (root->send_in_progress == 0) { + btrfs_set_root_flags(&root->root_item, root_flags & ~BTRFS_ROOT_SUBVOL_RDONLY); + spin_unlock(&root->root_item_lock); + } else { + spin_unlock(&root->root_item_lock); + btrfs_warn(root->fs_info, + "Attempt to set subvolume %llu read-write during send", + root->root_key.objectid); + ret = -EPERM; + goto out_drop_sem; + } + } trans = btrfs_start_transaction(root, 1); if (IS_ERR(trans)) { -- cgit v1.2.3 From efe120a067c8674a8ae21b194f0e68f098b61ee2 Mon Sep 17 00:00:00 2001 From: Frank Holton Date: Fri, 20 Dec 2013 11:37:06 -0500 Subject: Btrfs: convert printk to btrfs_ and fix BTRFS prefix Convert all applicable cases of printk and pr_* to the btrfs_* macros. Fix all uses of the BTRFS prefix. Signed-off-by: Frank Holton Signed-off-by: Josef Bacik Signed-off-by: Chris Mason --- fs/btrfs/ioctl.c | 30 +++++++++++++++--------------- 1 file changed, 15 insertions(+), 15 deletions(-) (limited to 'fs/btrfs/ioctl.c') diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c index c0dc05467ce8..edf5f0093f22 100644 --- a/fs/btrfs/ioctl.c +++ b/fs/btrfs/ioctl.c @@ -1262,7 +1262,7 @@ int btrfs_defrag_file(struct inode *inode, struct file *file, break; if (btrfs_defrag_cancelled(root->fs_info)) { - printk(KERN_DEBUG "btrfs: defrag_file cancelled\n"); + printk(KERN_DEBUG "BTRFS: defrag_file cancelled\n"); ret = -EAGAIN; break; } @@ -1424,20 +1424,20 @@ static noinline int btrfs_ioctl_resize(struct file *file, ret = -EINVAL; goto out_free; } - printk(KERN_INFO "btrfs: resizing devid %llu\n", devid); + btrfs_info(root->fs_info, "resizing devid %llu", devid); } device = btrfs_find_device(root->fs_info, devid, NULL, NULL); if (!device) { - printk(KERN_INFO "btrfs: resizer unable to find device %llu\n", + btrfs_info(root->fs_info, "resizer unable to find device %llu", devid); ret = -ENODEV; goto out_free; } if (!device->writeable) { - printk(KERN_INFO "btrfs: resizer unable to apply on " - "readonly device %llu\n", + btrfs_info(root->fs_info, + "resizer unable to apply on readonly device %llu", devid); ret = -EPERM; goto out_free; @@ -1489,7 +1489,7 @@ static noinline int btrfs_ioctl_resize(struct file *file, do_div(new_size, root->sectorsize); new_size *= root->sectorsize; - printk_in_rcu(KERN_INFO "btrfs: new size for %s is %llu\n", + printk_in_rcu(KERN_INFO "BTRFS: new size for %s is %llu\n", rcu_str_deref(device->name), new_size); if (new_size > old_size) { @@ -1550,8 +1550,8 @@ static noinline int btrfs_ioctl_snap_create_transid(struct file *file, src_inode = file_inode(src.file); if (src_inode->i_sb != file_inode(file)->i_sb) { - printk(KERN_INFO "btrfs: Snapshot src from " - "another FS\n"); + btrfs_info(BTRFS_I(src_inode)->root->fs_info, + "Snapshot src from another FS"); ret = -EINVAL; } else { ret = btrfs_mksubvol(&file->f_path, name, namelen, @@ -1934,7 +1934,7 @@ static noinline int search_ioctl(struct inode *inode, key.offset = (u64)-1; root = btrfs_read_fs_root_no_name(info, &key); if (IS_ERR(root)) { - printk(KERN_ERR "could not find root %llu\n", + printk(KERN_ERR "BTRFS: could not find root %llu\n", sk->tree_id); btrfs_free_path(path); return -ENOENT; @@ -2024,7 +2024,7 @@ static noinline int btrfs_search_path_in_tree(struct btrfs_fs_info *info, key.offset = (u64)-1; root = btrfs_read_fs_root_no_name(info, &key); if (IS_ERR(root)) { - printk(KERN_ERR "could not find root %llu\n", tree_id); + printk(KERN_ERR "BTRFS: could not find root %llu\n", tree_id); ret = -ENOENT; goto out; } @@ -3367,8 +3367,8 @@ static long btrfs_ioctl_default_subvol(struct file *file, void __user *argp) if (IS_ERR_OR_NULL(di)) { btrfs_free_path(path); btrfs_end_transaction(trans, root); - printk(KERN_ERR "Umm, you don't have the default dir item, " - "this isn't going to work\n"); + btrfs_err(new_root->fs_info, "Umm, you don't have the default dir" + "item, this isn't going to work"); ret = -ENOENT; goto out; } @@ -4469,8 +4469,8 @@ static int btrfs_ioctl_get_fslabel(struct file *file, void __user *arg) len = strnlen(label, BTRFS_LABEL_SIZE); if (len == BTRFS_LABEL_SIZE) { - pr_warn("btrfs: label is too long, return the first %zu bytes\n", - --len); + btrfs_warn(root->fs_info, + "label is too long, return the first %zu bytes", --len); } ret = copy_to_user(arg, label, len); @@ -4493,7 +4493,7 @@ static int btrfs_ioctl_set_fslabel(struct file *file, void __user *arg) return -EFAULT; if (strnlen(label, BTRFS_LABEL_SIZE) == BTRFS_LABEL_SIZE) { - pr_err("btrfs: unable to set label with more than %d bytes\n", + btrfs_err(root->fs_info, "unable to set label with more than %d bytes", BTRFS_LABEL_SIZE - 1); return -EINVAL; } -- cgit v1.2.3 From eb8052e015f2c015926db45943f8ee724ace97e5 Mon Sep 17 00:00:00 2001 From: Wenliang Fan Date: Fri, 20 Dec 2013 15:28:56 +0800 Subject: fs/btrfs: Integer overflow in btrfs_ioctl_resize() The local variable 'new_size' comes from userspace. If a large number was passed, there would be an integer overflow in the following line: new_size = old_size + new_size; Signed-off-by: Wenliang Fan Signed-off-by: Josef Bacik Signed-off-by: Chris Mason --- fs/btrfs/ioctl.c | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'fs/btrfs/ioctl.c') diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c index edf5f0093f22..ed3edc283255 100644 --- a/fs/btrfs/ioctl.c +++ b/fs/btrfs/ioctl.c @@ -1474,6 +1474,10 @@ static noinline int btrfs_ioctl_resize(struct file *file, } new_size = old_size - new_size; } else if (mod > 0) { + if (new_size > ULLONG_MAX - old_size) { + ret = -EINVAL; + goto out_free; + } new_size = old_size + new_size; } -- cgit v1.2.3 From 63541927c8d11d2686778b1e8ec71c14b4fd53e4 Mon Sep 17 00:00:00 2001 From: Filipe David Borba Manana Date: Tue, 7 Jan 2014 11:47:46 +0000 Subject: Btrfs: add support for inode properties This change adds infrastructure to allow for generic properties for inodes. Properties are name/value pairs that can be associated with inodes for different purposes. They are stored as xattrs with the prefix "btrfs." Properties can be inherited - this means when a directory inode has inheritable properties set, these are added to new inodes created under that directory. Further, subvolumes can also have properties associated with them, and they can be inherited from their parent subvolume. Naturally, directory properties have priority over subvolume properties (in practice a subvolume property is just a regular property associated with the root inode, objectid 256, of the subvolume's fs tree). This change also adds one specific property implementation, named "compression", whose values can be "lzo" or "zlib" and it's an inheritable property. The corresponding changes to btrfs-progs were also implemented. A patch with xfstests for this feature will follow once there's agreement on this change/feature. Further, the script at the bottom of this commit message was used to do some benchmarks to measure any performance penalties of this feature. Basically the tests correspond to: Test 1 - create a filesystem and mount it with compress-force=lzo, then sequentially create N files of 64Kb each, measure how long it took to create the files, unmount the filesystem, mount the filesystem and perform an 'ls -lha' against the test directory holding the N files, and report the time the command took. Test 2 - create a filesystem and don't use any compression option when mounting it - instead set the compression property of the subvolume's root to 'lzo'. Then create N files of 64Kb, and report the time it took. The unmount the filesystem, mount it again and perform an 'ls -lha' like in the former test. This means every single file ends up with a property (xattr) associated to it. Test 3 - same as test 2, but uses 4 properties - 3 are duplicates of the compression property, have no real effect other than adding more work when inheriting properties and taking more btree leaf space. Test 4 - same as test 3 but with 10 properties per file. Results (in seconds, and averages of 5 runs each), for different N numbers of files follow. * Without properties (test 1) file creation time ls -lha time 10 000 files 3.49 0.76 100 000 files 47.19 8.37 1 000 000 files 518.51 107.06 * With 1 property (compression property set to lzo - test 2) file creation time ls -lha time 10 000 files 3.63 0.93 100 000 files 48.56 9.74 1 000 000 files 537.72 125.11 * With 4 properties (test 3) file creation time ls -lha time 10 000 files 3.94 1.20 100 000 files 52.14 11.48 1 000 000 files 572.70 142.13 * With 10 properties (test 4) file creation time ls -lha time 10 000 files 4.61 1.35 100 000 files 58.86 13.83 1 000 000 files 656.01 177.61 The increased latencies with properties are essencialy because of: *) When creating an inode, we now synchronously write 1 more item (an xattr item) for each property inherited from the parent dir (or subvolume). This could be done in an asynchronous way such as we do for dir intex items (delayed-inode.c), which could help reduce the file creation latency; *) With properties, we now have larger fs trees. For this particular test each xattr item uses 75 bytes of leaf space in the fs tree. This could be less by using a new item for xattr items, instead of the current btrfs_dir_item, since we could cut the 'location' and 'type' fields (saving 18 bytes) and maybe 'transid' too (saving a total of 26 bytes per xattr item) from the btrfs_dir_item type. Also tried batching the xattr insertions (ignoring proper hash collision handling, since it didn't exist) when creating files that inherit properties from their parent inode/subvolume, but the end results were (surprisingly) essentially the same. Test script: $ cat test.pl #!/usr/bin/perl -w use strict; use Time::HiRes qw(time); use constant NUM_FILES => 10_000; use constant FILE_SIZES => (64 * 1024); use constant DEV => '/dev/sdb4'; use constant MNT_POINT => '/home/fdmanana/btrfs-tests/dev'; use constant TEST_DIR => (MNT_POINT . '/testdir'); system("mkfs.btrfs", "-l", "16384", "-f", DEV) == 0 or die "mkfs.btrfs failed!"; # following line for testing without properties #system("mount", "-o", "compress-force=lzo", DEV, MNT_POINT) == 0 or die "mount failed!"; # following 2 lines for testing with properties system("mount", DEV, MNT_POINT) == 0 or die "mount failed!"; system("btrfs", "prop", "set", MNT_POINT, "compression", "lzo") == 0 or die "set prop failed!"; system("mkdir", TEST_DIR) == 0 or die "mkdir failed!"; my ($t1, $t2); $t1 = time(); for (my $i = 1; $i <= NUM_FILES; $i++) { my $p = TEST_DIR . '/file_' . $i; open(my $f, '>', $p) or die "Error opening file!"; $f->autoflush(1); for (my $j = 0; $j < FILE_SIZES; $j += 4096) { print $f ('A' x 4096) or die "Error writing to file!"; } close($f); } $t2 = time(); print "Time to create " . NUM_FILES . ": " . ($t2 - $t1) . " seconds.\n"; system("umount", DEV) == 0 or die "umount failed!"; system("mount", DEV, MNT_POINT) == 0 or die "mount failed!"; $t1 = time(); system("bash -c 'ls -lha " . TEST_DIR . " > /dev/null'") == 0 or die "ls failed!"; $t2 = time(); print "Time to ls -lha all files: " . ($t2 - $t1) . " seconds.\n"; system("umount", DEV) == 0 or die "umount failed!"; Signed-off-by: Filipe David Borba Manana Signed-off-by: Josef Bacik Signed-off-by: Chris Mason --- fs/btrfs/ioctl.c | 19 ++++++++++++++++++- 1 file changed, 18 insertions(+), 1 deletion(-) (limited to 'fs/btrfs/ioctl.c') diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c index ed3edc283255..3970f32b2b80 100644 --- a/fs/btrfs/ioctl.c +++ b/fs/btrfs/ioctl.c @@ -56,6 +56,7 @@ #include "rcu-string.h" #include "send.h" #include "dev-replace.h" +#include "props.h" #include "sysfs.h" static int btrfs_clone(struct inode *src, struct inode *inode, @@ -281,9 +282,25 @@ static int btrfs_ioctl_setflags(struct file *file, void __user *arg) if (flags & FS_NOCOMP_FL) { ip->flags &= ~BTRFS_INODE_COMPRESS; ip->flags |= BTRFS_INODE_NOCOMPRESS; + + ret = btrfs_set_prop(inode, "btrfs.compression", NULL, 0, 0); + if (ret && ret != -ENODATA) + goto out_drop; } else if (flags & FS_COMPR_FL) { + const char *comp; + ip->flags |= BTRFS_INODE_COMPRESS; ip->flags &= ~BTRFS_INODE_NOCOMPRESS; + + if (root->fs_info->compress_type == BTRFS_COMPRESS_LZO) + comp = "lzo"; + else + comp = "zlib"; + ret = btrfs_set_prop(inode, "btrfs.compression", + comp, strlen(comp), 0); + if (ret) + goto out_drop; + } else { ip->flags &= ~(BTRFS_INODE_COMPRESS | BTRFS_INODE_NOCOMPRESS); } @@ -502,7 +519,7 @@ static noinline int create_subvol(struct inode *dir, btrfs_record_root_in_trans(trans, new_root); - ret = btrfs_create_subvol_root(trans, new_root, new_dirid); + ret = btrfs_create_subvol_root(trans, new_root, root, new_dirid); if (ret) { /* We potentially lose an unused inode item here */ btrfs_abort_transaction(trans, root, ret); -- cgit v1.2.3 From de6e8200669f9b60694ca87eadf0a0a99cbdb6aa Mon Sep 17 00:00:00 2001 From: Liu Bo Date: Thu, 9 Jan 2014 14:57:06 +0800 Subject: Btrfs: release subvolume's block_rsv before transaction commit We don't have to keep subvolume's block_rsv during transaction commit, and within transaction commit, we may also need the free space reclaimed from this block_rsv to process delayed refs. Signed-off-by: Liu Bo Signed-off-by: Josef Bacik Signed-off-by: Chris Mason --- fs/btrfs/ioctl.c | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) (limited to 'fs/btrfs/ioctl.c') diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c index 3970f32b2b80..332b624e25db 100644 --- a/fs/btrfs/ioctl.c +++ b/fs/btrfs/ioctl.c @@ -436,7 +436,9 @@ static noinline int create_subvol(struct inode *dir, trans = btrfs_start_transaction(root, 0); if (IS_ERR(trans)) { ret = PTR_ERR(trans); - goto out; + btrfs_subvolume_release_metadata(root, &block_rsv, + qgroup_reserved); + return ret; } trans->block_rsv = &block_rsv; trans->bytes_reserved = block_rsv.size; @@ -561,6 +563,8 @@ static noinline int create_subvol(struct inode *dir, fail: trans->block_rsv = NULL; trans->bytes_reserved = 0; + btrfs_subvolume_release_metadata(root, &block_rsv, qgroup_reserved); + if (async_transid) { *async_transid = trans->transid; err = btrfs_commit_transaction_async(trans, root, 1); @@ -574,14 +578,10 @@ fail: if (!ret) { inode = btrfs_lookup_dentry(dir, dentry); - if (IS_ERR(inode)) { - ret = PTR_ERR(inode); - goto out; - } + if (IS_ERR(inode)) + return PTR_ERR(inode); d_instantiate(dentry, inode); } -out: - btrfs_subvolume_release_metadata(root, &block_rsv, qgroup_reserved); return ret; } -- cgit v1.2.3 From c57c2b3ed248b3f1712e4172eb85b361199582f2 Mon Sep 17 00:00:00 2001 From: Filipe David Borba Manana Date: Sat, 11 Jan 2014 21:31:25 +0000 Subject: Btrfs: unlock inodes in correct order in clone ioctl In the clone ioctl, when the source and target inodes are different, we can acquire their mutexes in 2 possible different orders. After we're done cloning, we were releasing the mutexes always in the same order - the most correct way of doing it is to release them by the reverse order they were acquired. Signed-off-by: Filipe David Borba Manana Signed-off-by: Josef Bacik Signed-off-by: Chris Mason --- fs/btrfs/ioctl.c | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) (limited to 'fs/btrfs/ioctl.c') diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c index 332b624e25db..5ed5bd001084 100644 --- a/fs/btrfs/ioctl.c +++ b/fs/btrfs/ioctl.c @@ -3263,9 +3263,17 @@ static noinline long btrfs_ioctl_clone(struct file *file, unsigned long srcfd, unlock_extent(&BTRFS_I(src)->io_tree, off, off + len - 1); out_unlock: - mutex_unlock(&src->i_mutex); - if (!same_inode) - mutex_unlock(&inode->i_mutex); + if (!same_inode) { + if (inode < src) { + mutex_unlock(&src->i_mutex); + mutex_unlock(&inode->i_mutex); + } else { + mutex_unlock(&inode->i_mutex); + mutex_unlock(&src->i_mutex); + } + } else { + mutex_unlock(&src->i_mutex); + } out_fput: fdput(src_file); out_drop_write: -- cgit v1.2.3 From e4355f34ef9fc75a93875fd075137ef2ea378883 Mon Sep 17 00:00:00 2001 From: Filipe David Borba Manana Date: Mon, 13 Jan 2014 19:35:01 +0000 Subject: Btrfs: faster file extent item search in clone ioctl When we are looking for file extent items that intersect the cloning range, for each one that falls completely outside the range, don't release the path and do another full tree search - just move on to the next slot and copy the file extent item into our buffer only if the item intersects the cloning range. Signed-off-by: Filipe David Borba Manana Signed-off-by: Josef Bacik Signed-off-by: Chris Mason --- fs/btrfs/ioctl.c | 23 ++++++++++++++--------- 1 file changed, 14 insertions(+), 9 deletions(-) (limited to 'fs/btrfs/ioctl.c') diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c index 5ed5bd001084..829999dafd93 100644 --- a/fs/btrfs/ioctl.c +++ b/fs/btrfs/ioctl.c @@ -2905,12 +2905,14 @@ static int btrfs_clone(struct inode *src, struct inode *inode, * note the key will change type as we walk through the * tree. */ + path->leave_spinning = 1; ret = btrfs_search_slot(NULL, BTRFS_I(src)->root, &key, path, 0, 0); if (ret < 0) goto out; nritems = btrfs_header_nritems(path->nodes[0]); +process_slot: if (path->slots[0] >= nritems) { ret = btrfs_next_leaf(BTRFS_I(src)->root, path); if (ret < 0) @@ -2937,11 +2939,6 @@ static int btrfs_clone(struct inode *src, struct inode *inode, u8 comp; u64 endoff; - size = btrfs_item_size_nr(leaf, slot); - read_extent_buffer(leaf, buf, - btrfs_item_ptr_offset(leaf, slot), - size); - extent = btrfs_item_ptr(leaf, slot, struct btrfs_file_extent_item); comp = btrfs_file_extent_compression(leaf, extent); @@ -2960,11 +2957,20 @@ static int btrfs_clone(struct inode *src, struct inode *inode, datal = btrfs_file_extent_ram_bytes(leaf, extent); } - btrfs_release_path(path); if (key.offset + datal <= off || - key.offset >= off + len - 1) - goto next; + key.offset >= off + len - 1) { + path->slots[0]++; + goto process_slot; + } + + size = btrfs_item_size_nr(leaf, slot); + read_extent_buffer(leaf, buf, + btrfs_item_ptr_offset(leaf, slot), + size); + + btrfs_release_path(path); + path->leave_spinning = 0; memcpy(&new_key, &key, sizeof(new_key)); new_key.objectid = btrfs_ino(inode); @@ -3135,7 +3141,6 @@ static int btrfs_clone(struct inode *src, struct inode *inode, } ret = btrfs_end_transaction(trans, root); } -next: btrfs_release_path(path); key.offset++; } -- cgit v1.2.3 From d024206133ce21936b3d5780359afc00247655b7 Mon Sep 17 00:00:00 2001 From: David Sterba Date: Wed, 15 Jan 2014 18:15:52 +0100 Subject: btrfs: restrict snapshotting to own subvolumes Currently, any user can snapshot any subvolume if the path is accessible and thus indirectly create and keep files he does not own under his direcotries. This is not possible with traditional directories. In security context, a user can snapshot root filesystem and pin any potentially buggy binaries, even if the updates are applied. All the snapshots are visible to the administrator, so it's possible to verify if there are suspicious snapshots. Another more practical problem is that any user can pin the space used by eg. root and cause ENOSPC. Original report: https://bugs.launchpad.net/ubuntu/+source/apparmor/+bug/484786 CC: stable@vger.kernel.org Signed-off-by: David Sterba Signed-off-by: Josef Bacik Signed-off-by: Chris Mason --- fs/btrfs/ioctl.c | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'fs/btrfs/ioctl.c') diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c index 829999dafd93..8d4457f329f5 100644 --- a/fs/btrfs/ioctl.c +++ b/fs/btrfs/ioctl.c @@ -1574,6 +1574,12 @@ static noinline int btrfs_ioctl_snap_create_transid(struct file *file, btrfs_info(BTRFS_I(src_inode)->root->fs_info, "Snapshot src from another FS"); ret = -EINVAL; + } else if (!inode_owner_or_capable(src_inode)) { + /* + * Subvolume creation is not restricted, but snapshots + * are limited to own subvolumes only + */ + ret = -EPERM; } else { ret = btrfs_mksubvol(&file->f_path, name, namelen, BTRFS_I(src_inode)->root, -- cgit v1.2.3 From bd60ea0fe947029df4b7b7aa9d6557baf2a5a138 Mon Sep 17 00:00:00 2001 From: David Sterba Date: Thu, 16 Jan 2014 15:50:22 +0100 Subject: btrfs: call permission checks earlier in ioctls and return EPERM The owner and capability checks in IOC_SUBVOL_SETFLAGS and SET_RECEIVED_SUBVOL should be called before any other checks are done. Also unify the error code to EPERM. Signed-off-by: David Sterba Signed-off-by: Josef Bacik Signed-off-by: Chris Mason --- fs/btrfs/ioctl.c | 22 +++++++++------------- 1 file changed, 9 insertions(+), 13 deletions(-) (limited to 'fs/btrfs/ioctl.c') diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c index 8d4457f329f5..62c62b4fa55b 100644 --- a/fs/btrfs/ioctl.c +++ b/fs/btrfs/ioctl.c @@ -192,6 +192,9 @@ static int btrfs_ioctl_setflags(struct file *file, void __user *arg) unsigned int i_oldflags; umode_t mode; + if (!inode_owner_or_capable(inode)) + return -EPERM; + if (btrfs_root_readonly(root)) return -EROFS; @@ -202,9 +205,6 @@ static int btrfs_ioctl_setflags(struct file *file, void __user *arg) if (ret) return ret; - if (!inode_owner_or_capable(inode)) - return -EACCES; - ret = mnt_want_write_file(file); if (ret) return ret; @@ -1697,6 +1697,9 @@ static noinline int btrfs_ioctl_subvol_setflags(struct file *file, u64 flags; int ret = 0; + if (!inode_owner_or_capable(inode)) + return -EPERM; + ret = mnt_want_write_file(file); if (ret) goto out; @@ -1721,11 +1724,6 @@ static noinline int btrfs_ioctl_subvol_setflags(struct file *file, goto out_drop_write; } - if (!inode_owner_or_capable(inode)) { - ret = -EACCES; - goto out_drop_write; - } - down_write(&root->fs_info->subvol_sem); /* nothing to do */ @@ -4403,6 +4401,9 @@ static long btrfs_ioctl_set_received_subvol(struct file *file, int ret = 0; int received_uuid_changed; + if (!inode_owner_or_capable(inode)) + return -EPERM; + ret = mnt_want_write_file(file); if (ret < 0) return ret; @@ -4419,11 +4420,6 @@ static long btrfs_ioctl_set_received_subvol(struct file *file, goto out; } - if (!inode_owner_or_capable(inode)) { - ret = -EACCES; - goto out; - } - sa = memdup_user(arg, sizeof(*sa)); if (IS_ERR(sa)) { ret = PTR_ERR(sa); -- cgit v1.2.3 From c41570c9d29764f797fa35490d72b7395a0105c3 Mon Sep 17 00:00:00 2001 From: Justin Maggard Date: Tue, 21 Jan 2014 11:18:29 -0800 Subject: btrfs: fix defrag 32-bit integer overflow When defragging a very large file, the cluster variable can wrap its 32-bit signed int type and become negative, which eventually gets passed to btrfs_force_ra() as a very large unsigned long value. On 32-bit platforms, this eventually results in an Oops from the SLAB allocator. Change the cluster and max_cluster signed int variables to unsigned long to match the readahead functions. This also allows the min() comparison in btrfs_defrag_file() to work as intended. Signed-off-by: Josef Bacik Signed-off-by: Chris Mason --- fs/btrfs/ioctl.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'fs/btrfs/ioctl.c') diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c index 62c62b4fa55b..34772cbcc7aa 100644 --- a/fs/btrfs/ioctl.c +++ b/fs/btrfs/ioctl.c @@ -1036,7 +1036,7 @@ out: static int cluster_pages_for_defrag(struct inode *inode, struct page **pages, unsigned long start_index, - int num_pages) + unsigned long num_pages) { unsigned long file_end; u64 isize = i_size_read(inode); @@ -1194,8 +1194,8 @@ int btrfs_defrag_file(struct inode *inode, struct file *file, int defrag_count = 0; int compress_type = BTRFS_COMPRESS_ZLIB; int extent_thresh = range->extent_thresh; - int max_cluster = (256 * 1024) >> PAGE_CACHE_SHIFT; - int cluster = max_cluster; + unsigned long max_cluster = (256 * 1024) >> PAGE_CACHE_SHIFT; + unsigned long cluster = max_cluster; u64 new_align = ~((u64)128 * 1024 - 1); struct page **pages = NULL; -- cgit v1.2.3