summaryrefslogtreecommitdiff
path: root/fs
diff options
context:
space:
mode:
Diffstat (limited to 'fs')
-rw-r--r--fs/btrfs/backref.c18
-rw-r--r--fs/btrfs/extent-tree.c25
-rw-r--r--fs/btrfs/file.c12
-rw-r--r--fs/btrfs/sysfs.c7
-rw-r--r--fs/btrfs/tree-log.c8
-rw-r--r--fs/ceph/super.c7
-rw-r--r--fs/char_dev.c6
-rw-r--r--fs/cifs/file.c4
-rw-r--r--fs/cifs/smb2ops.c14
-rw-r--r--fs/ext4/extents.c17
-rw-r--r--fs/ext4/inode.c2
-rw-r--r--fs/ext4/ioctl.c2
-rw-r--r--fs/ext4/super.c2
-rw-r--r--fs/fs-writeback.c51
-rw-r--r--fs/fuse/file.c13
-rw-r--r--fs/gfs2/glock.c22
-rw-r--r--fs/gfs2/lock_dlm.c9
-rw-r--r--fs/hugetlbfs/inode.c8
-rw-r--r--fs/nfs/nfs4state.c4
-rw-r--r--fs/ocfs2/export.c30
-rw-r--r--fs/open.c18
-rw-r--r--fs/read_write.c5
-rw-r--r--fs/ufs/util.h2
-rw-r--r--fs/userfaultfd.c41
24 files changed, 257 insertions, 70 deletions
diff --git a/fs/btrfs/backref.c b/fs/btrfs/backref.c
index e2f659dc5745..81c5d07a2af1 100644
--- a/fs/btrfs/backref.c
+++ b/fs/btrfs/backref.c
@@ -1685,13 +1685,19 @@ int iterate_extent_inodes(struct btrfs_fs_info *fs_info,
extent_item_objectid);
if (!search_commit_root) {
- trans = btrfs_join_transaction(fs_info->extent_root);
- if (IS_ERR(trans))
- return PTR_ERR(trans);
+ trans = btrfs_attach_transaction(fs_info->extent_root);
+ if (IS_ERR(trans)) {
+ if (PTR_ERR(trans) != -ENOENT &&
+ PTR_ERR(trans) != -EROFS)
+ return PTR_ERR(trans);
+ trans = NULL;
+ }
+ }
+
+ if (trans)
btrfs_get_tree_mod_seq(fs_info, &tree_mod_seq_elem);
- } else {
+ else
down_read(&fs_info->commit_root_sem);
- }
ret = btrfs_find_all_leafs(trans, fs_info, extent_item_objectid,
tree_mod_seq_elem.seq, &refs,
@@ -1721,7 +1727,7 @@ int iterate_extent_inodes(struct btrfs_fs_info *fs_info,
free_leaf_list(refs);
out:
- if (!search_commit_root) {
+ if (trans) {
btrfs_put_tree_mod_seq(fs_info, &tree_mod_seq_elem);
btrfs_end_transaction(trans, fs_info->extent_root);
} else {
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index 978bbfed5a2c..df2bb4b61a00 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -10730,9 +10730,9 @@ int btrfs_error_unpin_extent_range(struct btrfs_root *root, u64 start, u64 end)
* transaction.
*/
static int btrfs_trim_free_extents(struct btrfs_device *device,
- u64 minlen, u64 *trimmed)
+ struct fstrim_range *range, u64 *trimmed)
{
- u64 start = 0, len = 0;
+ u64 start = range->start, len = 0;
int ret;
*trimmed = 0;
@@ -10768,8 +10768,8 @@ static int btrfs_trim_free_extents(struct btrfs_device *device,
atomic_inc(&trans->use_count);
spin_unlock(&fs_info->trans_lock);
- ret = find_free_dev_extent_start(trans, device, minlen, start,
- &start, &len);
+ ret = find_free_dev_extent_start(trans, device, range->minlen,
+ start, &start, &len);
if (trans)
btrfs_put_transaction(trans);
@@ -10781,6 +10781,16 @@ static int btrfs_trim_free_extents(struct btrfs_device *device,
break;
}
+ /* If we are out of the passed range break */
+ if (start > range->start + range->len - 1) {
+ mutex_unlock(&fs_info->chunk_mutex);
+ ret = 0;
+ break;
+ }
+
+ start = max(range->start, start);
+ len = min(range->len, len);
+
ret = btrfs_issue_discard(device->bdev, start, len, &bytes);
up_read(&fs_info->commit_root_sem);
mutex_unlock(&fs_info->chunk_mutex);
@@ -10791,6 +10801,10 @@ static int btrfs_trim_free_extents(struct btrfs_device *device,
start += len;
*trimmed += bytes;
+ /* We've trimmed enough */
+ if (*trimmed >= range->len)
+ break;
+
if (fatal_signal_pending(current)) {
ret = -ERESTARTSYS;
break;
@@ -10857,8 +10871,7 @@ int btrfs_trim_fs(struct btrfs_root *root, struct fstrim_range *range)
mutex_lock(&root->fs_info->fs_devices->device_list_mutex);
devices = &root->fs_info->fs_devices->devices;
list_for_each_entry(device, devices, dev_list) {
- ret = btrfs_trim_free_extents(device, range->minlen,
- &group_trimmed);
+ ret = btrfs_trim_free_extents(device, range, &group_trimmed);
if (ret)
break;
diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c
index 052973620595..d056060529f8 100644
--- a/fs/btrfs/file.c
+++ b/fs/btrfs/file.c
@@ -1901,6 +1901,18 @@ int btrfs_sync_file(struct file *file, loff_t start, loff_t end, int datasync)
u64 len;
/*
+ * If the inode needs a full sync, make sure we use a full range to
+ * avoid log tree corruption, due to hole detection racing with ordered
+ * extent completion for adjacent ranges, and assertion failures during
+ * hole detection.
+ */
+ if (test_bit(BTRFS_INODE_NEEDS_FULL_SYNC,
+ &BTRFS_I(inode)->runtime_flags)) {
+ start = 0;
+ end = LLONG_MAX;
+ }
+
+ /*
* The range length can be represented by u64, we have to do the typecasts
* to avoid signed overflow if it's [0, LLONG_MAX] eg. from fsync()
*/
diff --git a/fs/btrfs/sysfs.c b/fs/btrfs/sysfs.c
index e0ac85949067..e6aaa15505c5 100644
--- a/fs/btrfs/sysfs.c
+++ b/fs/btrfs/sysfs.c
@@ -733,7 +733,12 @@ int btrfs_sysfs_add_fsid(struct btrfs_fs_devices *fs_devs,
fs_devs->fsid_kobj.kset = btrfs_kset;
error = kobject_init_and_add(&fs_devs->fsid_kobj,
&btrfs_ktype, parent, "%pU", fs_devs->fsid);
- return error;
+ if (error) {
+ kobject_put(&fs_devs->fsid_kobj);
+ return error;
+ }
+
+ return 0;
}
int btrfs_sysfs_add_mounted(struct btrfs_fs_info *fs_info)
diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c
index c7190f322576..57a46093656a 100644
--- a/fs/btrfs/tree-log.c
+++ b/fs/btrfs/tree-log.c
@@ -2809,6 +2809,12 @@ int btrfs_sync_log(struct btrfs_trans_handle *trans,
log->log_transid = root->log_transid;
root->log_start_pid = 0;
/*
+ * Update or create log root item under the root's log_mutex to prevent
+ * races with concurrent log syncs that can lead to failure to update
+ * log root item because it was not created yet.
+ */
+ ret = update_log_root(trans, log);
+ /*
* IO has been started, blocks of the log tree have WRITTEN flag set
* in their headers. new modifications of the log will be written to
* new positions. so it's safe to allow log writers to go in.
@@ -2827,8 +2833,6 @@ int btrfs_sync_log(struct btrfs_trans_handle *trans,
mutex_unlock(&log_root_tree->log_mutex);
- ret = update_log_root(trans, log);
-
mutex_lock(&log_root_tree->log_mutex);
if (atomic_dec_and_test(&log_root_tree->log_writers)) {
/*
diff --git a/fs/ceph/super.c b/fs/ceph/super.c
index f446afada328..ab8a8c9c74f2 100644
--- a/fs/ceph/super.c
+++ b/fs/ceph/super.c
@@ -712,6 +712,12 @@ static void ceph_umount_begin(struct super_block *sb)
return;
}
+static int ceph_remount(struct super_block *sb, int *flags, char *data)
+{
+ sync_filesystem(sb);
+ return 0;
+}
+
static const struct super_operations ceph_super_ops = {
.alloc_inode = ceph_alloc_inode,
.destroy_inode = ceph_destroy_inode,
@@ -719,6 +725,7 @@ static const struct super_operations ceph_super_ops = {
.drop_inode = ceph_drop_inode,
.sync_fs = ceph_sync_fs,
.put_super = ceph_put_super,
+ .remount_fs = ceph_remount,
.show_options = ceph_show_options,
.statfs = ceph_statfs,
.umount_begin = ceph_umount_begin,
diff --git a/fs/char_dev.c b/fs/char_dev.c
index 24b142569ca9..d0655ca89481 100644
--- a/fs/char_dev.c
+++ b/fs/char_dev.c
@@ -130,6 +130,12 @@ __register_chrdev_region(unsigned int major, unsigned int baseminor,
ret = -EBUSY;
goto out;
}
+
+ if (new_min < old_min && new_max > old_max) {
+ ret = -EBUSY;
+ goto out;
+ }
+
}
cd->next = *cp;
diff --git a/fs/cifs/file.c b/fs/cifs/file.c
index 23a8374fa97f..309c134fb66f 100644
--- a/fs/cifs/file.c
+++ b/fs/cifs/file.c
@@ -2829,7 +2829,9 @@ cifs_read_allocate_pages(struct cifs_readdata *rdata, unsigned int nr_pages)
}
if (rc) {
- for (i = 0; i < nr_pages; i++) {
+ unsigned int nr_page_failed = i;
+
+ for (i = 0; i < nr_page_failed; i++) {
put_page(rdata->pages[i]);
rdata->pages[i] = NULL;
}
diff --git a/fs/cifs/smb2ops.c b/fs/cifs/smb2ops.c
index eae3cdffaf7f..591c93de8c20 100644
--- a/fs/cifs/smb2ops.c
+++ b/fs/cifs/smb2ops.c
@@ -1329,26 +1329,28 @@ smb21_set_oplock_level(struct cifsInodeInfo *cinode, __u32 oplock,
unsigned int epoch, bool *purge_cache)
{
char message[5] = {0};
+ unsigned int new_oplock = 0;
oplock &= 0xFF;
if (oplock == SMB2_OPLOCK_LEVEL_NOCHANGE)
return;
- cinode->oplock = 0;
if (oplock & SMB2_LEASE_READ_CACHING_HE) {
- cinode->oplock |= CIFS_CACHE_READ_FLG;
+ new_oplock |= CIFS_CACHE_READ_FLG;
strcat(message, "R");
}
if (oplock & SMB2_LEASE_HANDLE_CACHING_HE) {
- cinode->oplock |= CIFS_CACHE_HANDLE_FLG;
+ new_oplock |= CIFS_CACHE_HANDLE_FLG;
strcat(message, "H");
}
if (oplock & SMB2_LEASE_WRITE_CACHING_HE) {
- cinode->oplock |= CIFS_CACHE_WRITE_FLG;
+ new_oplock |= CIFS_CACHE_WRITE_FLG;
strcat(message, "W");
}
- if (!cinode->oplock)
- strcat(message, "None");
+ if (!new_oplock)
+ strncpy(message, "None", sizeof(message));
+
+ cinode->oplock = new_oplock;
cifs_dbg(FYI, "%s Lease granted on inode %p\n", message,
&cinode->vfs_inode);
}
diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c
index 1708597659a1..8d98c9ac9205 100644
--- a/fs/ext4/extents.c
+++ b/fs/ext4/extents.c
@@ -1049,6 +1049,7 @@ static int ext4_ext_split(handle_t *handle, struct inode *inode,
__le32 border;
ext4_fsblk_t *ablocks = NULL; /* array of allocated blocks */
int err = 0;
+ size_t ext_size = 0;
/* make decision: where to split? */
/* FIXME: now decision is simplest: at current extent */
@@ -1140,6 +1141,10 @@ static int ext4_ext_split(handle_t *handle, struct inode *inode,
le16_add_cpu(&neh->eh_entries, m);
}
+ /* zero out unused area in the extent block */
+ ext_size = sizeof(struct ext4_extent_header) +
+ sizeof(struct ext4_extent) * le16_to_cpu(neh->eh_entries);
+ memset(bh->b_data + ext_size, 0, inode->i_sb->s_blocksize - ext_size);
ext4_extent_block_csum_set(inode, neh);
set_buffer_uptodate(bh);
unlock_buffer(bh);
@@ -1219,6 +1224,11 @@ static int ext4_ext_split(handle_t *handle, struct inode *inode,
sizeof(struct ext4_extent_idx) * m);
le16_add_cpu(&neh->eh_entries, m);
}
+ /* zero out unused area in the extent block */
+ ext_size = sizeof(struct ext4_extent_header) +
+ (sizeof(struct ext4_extent) * le16_to_cpu(neh->eh_entries));
+ memset(bh->b_data + ext_size, 0,
+ inode->i_sb->s_blocksize - ext_size);
ext4_extent_block_csum_set(inode, neh);
set_buffer_uptodate(bh);
unlock_buffer(bh);
@@ -1284,6 +1294,7 @@ static int ext4_ext_grow_indepth(handle_t *handle, struct inode *inode,
ext4_fsblk_t newblock, goal = 0;
struct ext4_super_block *es = EXT4_SB(inode->i_sb)->s_es;
int err = 0;
+ size_t ext_size = 0;
/* Try to prepend new index to old one */
if (ext_depth(inode))
@@ -1309,9 +1320,11 @@ static int ext4_ext_grow_indepth(handle_t *handle, struct inode *inode,
goto out;
}
+ ext_size = sizeof(EXT4_I(inode)->i_data);
/* move top-level index/leaf into new block */
- memmove(bh->b_data, EXT4_I(inode)->i_data,
- sizeof(EXT4_I(inode)->i_data));
+ memmove(bh->b_data, EXT4_I(inode)->i_data, ext_size);
+ /* zero out unused area in the extent block */
+ memset(bh->b_data + ext_size, 0, inode->i_sb->s_blocksize - ext_size);
/* set size of new block */
neh = ext_block_hdr(bh);
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index b06109979e94..f72505cdf6a2 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -5001,7 +5001,7 @@ int ext4_setattr(struct dentry *dentry, struct iattr *attr)
up_write(&EXT4_I(inode)->i_data_sem);
ext4_journal_stop(handle);
if (error) {
- if (orphan)
+ if (orphan && inode->i_nlink)
ext4_orphan_del(NULL, inode);
goto err_out;
}
diff --git a/fs/ext4/ioctl.c b/fs/ext4/ioctl.c
index 49857101bb57..06ca0e647a97 100644
--- a/fs/ext4/ioctl.c
+++ b/fs/ext4/ioctl.c
@@ -577,7 +577,7 @@ group_add_out:
if (err == 0)
err = err2;
mnt_drop_write_file(filp);
- if (!err && (o_group > EXT4_SB(sb)->s_groups_count) &&
+ if (!err && (o_group < EXT4_SB(sb)->s_groups_count) &&
ext4_has_group_desc_csum(sb) &&
test_opt(sb, INIT_INODE_TABLE))
err = ext4_register_li_request(sb, o_group);
diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index 0ec3689a8990..9a652931eef8 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -3866,7 +3866,7 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
"data=, fs mounted w/o journal");
goto failed_mount_wq;
}
- sbi->s_def_mount_opt &= EXT4_MOUNT_JOURNAL_CHECKSUM;
+ sbi->s_def_mount_opt &= ~EXT4_MOUNT_JOURNAL_CHECKSUM;
clear_opt(sb, JOURNAL_CHECKSUM);
clear_opt(sb, DATA_FLAGS);
sbi->s_journal = NULL;
diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c
index 0fe11984db09..de1138e4b049 100644
--- a/fs/fs-writeback.c
+++ b/fs/fs-writeback.c
@@ -331,11 +331,22 @@ struct inode_switch_wbs_context {
struct work_struct work;
};
+static void bdi_down_write_wb_switch_rwsem(struct backing_dev_info *bdi)
+{
+ down_write(&bdi->wb_switch_rwsem);
+}
+
+static void bdi_up_write_wb_switch_rwsem(struct backing_dev_info *bdi)
+{
+ up_write(&bdi->wb_switch_rwsem);
+}
+
static void inode_switch_wbs_work_fn(struct work_struct *work)
{
struct inode_switch_wbs_context *isw =
container_of(work, struct inode_switch_wbs_context, work);
struct inode *inode = isw->inode;
+ struct backing_dev_info *bdi = inode_to_bdi(inode);
struct address_space *mapping = inode->i_mapping;
struct bdi_writeback *old_wb = inode->i_wb;
struct bdi_writeback *new_wb = isw->new_wb;
@@ -344,6 +355,12 @@ static void inode_switch_wbs_work_fn(struct work_struct *work)
void **slot;
/*
+ * If @inode switches cgwb membership while sync_inodes_sb() is
+ * being issued, sync_inodes_sb() might miss it. Synchronize.
+ */
+ down_read(&bdi->wb_switch_rwsem);
+
+ /*
* By the time control reaches here, RCU grace period has passed
* since I_WB_SWITCH assertion and all wb stat update transactions
* between unlocked_inode_to_wb_begin/end() are guaranteed to be
@@ -435,6 +452,8 @@ skip_switch:
spin_unlock(&new_wb->list_lock);
spin_unlock(&old_wb->list_lock);
+ up_read(&bdi->wb_switch_rwsem);
+
if (switched) {
wb_wakeup(new_wb);
wb_put(old_wb);
@@ -475,9 +494,18 @@ static void inode_switch_wbs(struct inode *inode, int new_wb_id)
if (inode->i_state & I_WB_SWITCH)
return;
+ /*
+ * Avoid starting new switches while sync_inodes_sb() is in
+ * progress. Otherwise, if the down_write protected issue path
+ * blocks heavily, we might end up starting a large number of
+ * switches which will block on the rwsem.
+ */
+ if (!down_read_trylock(&bdi->wb_switch_rwsem))
+ return;
+
isw = kzalloc(sizeof(*isw), GFP_ATOMIC);
if (!isw)
- return;
+ goto out_unlock;
/* find and pin the new wb */
rcu_read_lock();
@@ -502,8 +530,6 @@ static void inode_switch_wbs(struct inode *inode, int new_wb_id)
ihold(inode);
isw->inode = inode;
- atomic_inc(&isw_nr_in_flight);
-
/*
* In addition to synchronizing among switchers, I_WB_SWITCH tells
* the RCU protected stat update paths to grab the mapping's
@@ -511,12 +537,17 @@ static void inode_switch_wbs(struct inode *inode, int new_wb_id)
* Let's continue after I_WB_SWITCH is guaranteed to be visible.
*/
call_rcu(&isw->rcu_head, inode_switch_wbs_rcu_fn);
- return;
+
+ atomic_inc(&isw_nr_in_flight);
+
+ goto out_unlock;
out_free:
if (isw->new_wb)
wb_put(isw->new_wb);
kfree(isw);
+out_unlock:
+ up_read(&bdi->wb_switch_rwsem);
}
/**
@@ -880,7 +911,11 @@ restart:
void cgroup_writeback_umount(void)
{
if (atomic_read(&isw_nr_in_flight)) {
- synchronize_rcu();
+ /*
+ * Use rcu_barrier() to wait for all pending callbacks to
+ * ensure that all in-flight wb switches are in the workqueue.
+ */
+ rcu_barrier();
flush_workqueue(isw_wq);
}
}
@@ -896,6 +931,9 @@ fs_initcall(cgroup_writeback_init);
#else /* CONFIG_CGROUP_WRITEBACK */
+static void bdi_down_write_wb_switch_rwsem(struct backing_dev_info *bdi) { }
+static void bdi_up_write_wb_switch_rwsem(struct backing_dev_info *bdi) { }
+
static struct bdi_writeback *
locked_inode_to_wb_and_lock_list(struct inode *inode)
__releases(&inode->i_lock)
@@ -2341,8 +2379,11 @@ void sync_inodes_sb(struct super_block *sb)
return;
WARN_ON(!rwsem_is_locked(&sb->s_umount));
+ /* protect against inode wb switch, see inode_switch_wbs_work_fn() */
+ bdi_down_write_wb_switch_rwsem(bdi);
bdi_split_work_to_wbs(bdi, &work, false);
wb_wait_for_completion(bdi, &done);
+ bdi_up_write_wb_switch_rwsem(bdi);
wait_sb_inodes(sb);
}
diff --git a/fs/fuse/file.c b/fs/fuse/file.c
index d40c2451487c..ab93c4591f8c 100644
--- a/fs/fuse/file.c
+++ b/fs/fuse/file.c
@@ -178,7 +178,9 @@ void fuse_finish_open(struct inode *inode, struct file *file)
file->f_op = &fuse_direct_io_file_operations;
if (!(ff->open_flags & FOPEN_KEEP_CACHE))
invalidate_inode_pages2(inode->i_mapping);
- if (ff->open_flags & FOPEN_NONSEEKABLE)
+ if (ff->open_flags & FOPEN_STREAM)
+ stream_open(inode, file);
+ else if (ff->open_flags & FOPEN_NONSEEKABLE)
nonseekable_open(inode, file);
if (fc->atomic_o_trunc && (file->f_flags & O_TRUNC)) {
struct fuse_inode *fi = get_fuse_inode(inode);
@@ -1533,7 +1535,7 @@ __acquires(fc->lock)
{
struct fuse_conn *fc = get_fuse_conn(inode);
struct fuse_inode *fi = get_fuse_inode(inode);
- size_t crop = i_size_read(inode);
+ loff_t crop = i_size_read(inode);
struct fuse_req *req;
while (fi->writectr >= 0 && !list_empty(&fi->queued_writes)) {
@@ -2947,6 +2949,13 @@ static long fuse_file_fallocate(struct file *file, int mode, loff_t offset,
}
}
+ if (!(mode & FALLOC_FL_KEEP_SIZE) &&
+ offset + length > i_size_read(inode)) {
+ err = inode_newsize_ok(inode, offset + length);
+ if (err)
+ goto out;
+ }
+
if (!(mode & FALLOC_FL_KEEP_SIZE))
set_bit(FUSE_I_SIZE_UNSTABLE, &fi->state);
diff --git a/fs/gfs2/glock.c b/fs/gfs2/glock.c
index 09a0cf5f3dd8..1eb737c466dd 100644
--- a/fs/gfs2/glock.c
+++ b/fs/gfs2/glock.c
@@ -136,22 +136,26 @@ static int demote_ok(const struct gfs2_glock *gl)
void gfs2_glock_add_to_lru(struct gfs2_glock *gl)
{
+ if (!(gl->gl_ops->go_flags & GLOF_LRU))
+ return;
+
spin_lock(&lru_lock);
- if (!list_empty(&gl->gl_lru))
- list_del_init(&gl->gl_lru);
- else
+ list_del(&gl->gl_lru);
+ list_add_tail(&gl->gl_lru, &lru_list);
+
+ if (!test_bit(GLF_LRU, &gl->gl_flags)) {
+ set_bit(GLF_LRU, &gl->gl_flags);
atomic_inc(&lru_count);
+ }
- list_add_tail(&gl->gl_lru, &lru_list);
- set_bit(GLF_LRU, &gl->gl_flags);
spin_unlock(&lru_lock);
}
static void gfs2_glock_remove_from_lru(struct gfs2_glock *gl)
{
spin_lock(&lru_lock);
- if (!list_empty(&gl->gl_lru)) {
+ if (test_bit(GLF_LRU, &gl->gl_flags)) {
list_del_init(&gl->gl_lru);
atomic_dec(&lru_count);
clear_bit(GLF_LRU, &gl->gl_flags);
@@ -1040,8 +1044,7 @@ void gfs2_glock_dq(struct gfs2_holder *gh)
!test_bit(GLF_DEMOTE, &gl->gl_flags))
fast_path = 1;
}
- if (!test_bit(GLF_LFLUSH, &gl->gl_flags) && demote_ok(gl) &&
- (glops->go_flags & GLOF_LRU))
+ if (!test_bit(GLF_LFLUSH, &gl->gl_flags) && demote_ok(gl))
gfs2_glock_add_to_lru(gl);
trace_gfs2_glock_queue(gh, 0);
@@ -1341,6 +1344,7 @@ __acquires(&lru_lock)
if (!spin_trylock(&gl->gl_lockref.lock)) {
add_back_to_lru:
list_add(&gl->gl_lru, &lru_list);
+ set_bit(GLF_LRU, &gl->gl_flags);
atomic_inc(&lru_count);
continue;
}
@@ -1348,7 +1352,6 @@ add_back_to_lru:
spin_unlock(&gl->gl_lockref.lock);
goto add_back_to_lru;
}
- clear_bit(GLF_LRU, &gl->gl_flags);
gl->gl_lockref.count++;
if (demote_ok(gl))
handle_callback(gl, LM_ST_UNLOCKED, 0, false);
@@ -1384,6 +1387,7 @@ static long gfs2_scan_glock_lru(int nr)
if (!test_bit(GLF_LOCK, &gl->gl_flags)) {
list_move(&gl->gl_lru, &dispose);
atomic_dec(&lru_count);
+ clear_bit(GLF_LRU, &gl->gl_flags);
freed++;
continue;
}
diff --git a/fs/gfs2/lock_dlm.c b/fs/gfs2/lock_dlm.c
index 8b907c5cc913..3c3d037df824 100644
--- a/fs/gfs2/lock_dlm.c
+++ b/fs/gfs2/lock_dlm.c
@@ -32,9 +32,10 @@ extern struct workqueue_struct *gfs2_control_wq;
* @delta is the difference between the current rtt sample and the
* running average srtt. We add 1/8 of that to the srtt in order to
* update the current srtt estimate. The variance estimate is a bit
- * more complicated. We subtract the abs value of the @delta from
- * the current variance estimate and add 1/4 of that to the running
- * total.
+ * more complicated. We subtract the current variance estimate from
+ * the abs value of the @delta and add 1/4 of that to the running
+ * total. That's equivalent to 3/4 of the current variance
+ * estimate plus 1/4 of the abs of @delta.
*
* Note that the index points at the array entry containing the smoothed
* mean value, and the variance is always in the following entry
@@ -50,7 +51,7 @@ static inline void gfs2_update_stats(struct gfs2_lkstats *s, unsigned index,
s64 delta = sample - s->stats[index];
s->stats[index] += (delta >> 3);
index++;
- s->stats[index] += ((abs(delta) - s->stats[index]) >> 2);
+ s->stats[index] += (s64)(abs(delta) - s->stats[index]) >> 2;
}
/**
diff --git a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c
index 27c4e2ac39a9..937c6ee1786f 100644
--- a/fs/hugetlbfs/inode.c
+++ b/fs/hugetlbfs/inode.c
@@ -414,9 +414,7 @@ static void remove_inode_hugepages(struct inode *inode, loff_t lstart,
if (next >= end)
break;
- hash = hugetlb_fault_mutex_hash(h, current->mm,
- &pseudo_vma,
- mapping, next, 0);
+ hash = hugetlb_fault_mutex_hash(h, mapping, next, 0);
mutex_lock(&hugetlb_fault_mutex_table[hash]);
lock_page(page);
@@ -569,7 +567,6 @@ static long hugetlbfs_fallocate(struct file *file, int mode, loff_t offset,
struct address_space *mapping = inode->i_mapping;
struct hstate *h = hstate_inode(inode);
struct vm_area_struct pseudo_vma;
- struct mm_struct *mm = current->mm;
loff_t hpage_size = huge_page_size(h);
unsigned long hpage_shift = huge_page_shift(h);
pgoff_t start, index, end;
@@ -633,8 +630,7 @@ static long hugetlbfs_fallocate(struct file *file, int mode, loff_t offset,
addr = index * hpage_size;
/* mutex taken here, fault path and hole punch */
- hash = hugetlb_fault_mutex_hash(h, mm, &pseudo_vma, mapping,
- index, addr);
+ hash = hugetlb_fault_mutex_hash(h, mapping, index, addr);
mutex_lock(&hugetlb_fault_mutex_table[hash]);
/* See if already present in mapping to avoid alloc/free */
diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c
index 44f5cea49699..5be61affeefd 100644
--- a/fs/nfs/nfs4state.c
+++ b/fs/nfs/nfs4state.c
@@ -140,6 +140,10 @@ int nfs40_discover_server_trunking(struct nfs_client *clp,
/* Sustain the lease, even if it's empty. If the clientid4
* goes stale it's of no use for trunking discovery. */
nfs4_schedule_state_renewal(*result);
+
+ /* If the client state need to recover, do it. */
+ if (clp->cl_state)
+ nfs4_schedule_state_manager(clp);
}
out:
return status;
diff --git a/fs/ocfs2/export.c b/fs/ocfs2/export.c
index 3494e220b510..bed15dec3c16 100644
--- a/fs/ocfs2/export.c
+++ b/fs/ocfs2/export.c
@@ -148,16 +148,24 @@ static struct dentry *ocfs2_get_parent(struct dentry *child)
u64 blkno;
struct dentry *parent;
struct inode *dir = d_inode(child);
+ int set;
trace_ocfs2_get_parent(child, child->d_name.len, child->d_name.name,
(unsigned long long)OCFS2_I(dir)->ip_blkno);
+ status = ocfs2_nfs_sync_lock(OCFS2_SB(dir->i_sb), 1);
+ if (status < 0) {
+ mlog(ML_ERROR, "getting nfs sync lock(EX) failed %d\n", status);
+ parent = ERR_PTR(status);
+ goto bail;
+ }
+
status = ocfs2_inode_lock(dir, NULL, 0);
if (status < 0) {
if (status != -ENOENT)
mlog_errno(status);
parent = ERR_PTR(status);
- goto bail;
+ goto unlock_nfs_sync;
}
status = ocfs2_lookup_ino_from_name(dir, "..", 2, &blkno);
@@ -166,11 +174,31 @@ static struct dentry *ocfs2_get_parent(struct dentry *child)
goto bail_unlock;
}
+ status = ocfs2_test_inode_bit(OCFS2_SB(dir->i_sb), blkno, &set);
+ if (status < 0) {
+ if (status == -EINVAL) {
+ status = -ESTALE;
+ } else
+ mlog(ML_ERROR, "test inode bit failed %d\n", status);
+ parent = ERR_PTR(status);
+ goto bail_unlock;
+ }
+
+ trace_ocfs2_get_dentry_test_bit(status, set);
+ if (!set) {
+ status = -ESTALE;
+ parent = ERR_PTR(status);
+ goto bail_unlock;
+ }
+
parent = d_obtain_alias(ocfs2_iget(OCFS2_SB(dir->i_sb), blkno, 0, 0));
bail_unlock:
ocfs2_inode_unlock(dir, 0);
+unlock_nfs_sync:
+ ocfs2_nfs_sync_unlock(OCFS2_SB(dir->i_sb), 1);
+
bail:
trace_ocfs2_get_parent_end(parent);
diff --git a/fs/open.c b/fs/open.c
index 1fd96c5d3895..e6a55dd45cba 100644
--- a/fs/open.c
+++ b/fs/open.c
@@ -1165,3 +1165,21 @@ int nonseekable_open(struct inode *inode, struct file *filp)
}
EXPORT_SYMBOL(nonseekable_open);
+
+/*
+ * stream_open is used by subsystems that want stream-like file descriptors.
+ * Such file descriptors are not seekable and don't have notion of position
+ * (file.f_pos is always 0). Contrary to file descriptors of other regular
+ * files, .read() and .write() can run simultaneously.
+ *
+ * stream_open never fails and is marked to return int so that it could be
+ * directly used as file_operations.open .
+ */
+int stream_open(struct inode *inode, struct file *filp)
+{
+ filp->f_mode &= ~(FMODE_LSEEK | FMODE_PREAD | FMODE_PWRITE | FMODE_ATOMIC_POS);
+ filp->f_mode |= FMODE_STREAM;
+ return 0;
+}
+
+EXPORT_SYMBOL(stream_open);
diff --git a/fs/read_write.c b/fs/read_write.c
index 16e554ba885d..7b175b9134ec 100644
--- a/fs/read_write.c
+++ b/fs/read_write.c
@@ -553,12 +553,13 @@ EXPORT_SYMBOL(vfs_write);
static inline loff_t file_pos_read(struct file *file)
{
- return file->f_pos;
+ return file->f_mode & FMODE_STREAM ? 0 : file->f_pos;
}
static inline void file_pos_write(struct file *file, loff_t pos)
{
- file->f_pos = pos;
+ if ((file->f_mode & FMODE_STREAM) == 0)
+ file->f_pos = pos;
}
SYSCALL_DEFINE3(read, unsigned int, fd, char __user *, buf, size_t, count)
diff --git a/fs/ufs/util.h b/fs/ufs/util.h
index 3f9463f8cf2f..f877d5cadd98 100644
--- a/fs/ufs/util.h
+++ b/fs/ufs/util.h
@@ -228,7 +228,7 @@ ufs_get_inode_gid(struct super_block *sb, struct ufs_inode *inode)
case UFS_UID_44BSD:
return fs32_to_cpu(sb, inode->ui_u3.ui_44.ui_gid);
case UFS_UID_EFT:
- if (inode->ui_u1.oldids.ui_suid == 0xFFFF)
+ if (inode->ui_u1.oldids.ui_sgid == 0xFFFF)
return fs32_to_cpu(sb, inode->ui_u3.ui_sun.ui_gid);
/* Fall through */
default:
diff --git a/fs/userfaultfd.c b/fs/userfaultfd.c
index d859d8bd1f96..188021d74d8d 100644
--- a/fs/userfaultfd.c
+++ b/fs/userfaultfd.c
@@ -137,7 +137,7 @@ static void userfaultfd_ctx_put(struct userfaultfd_ctx *ctx)
VM_BUG_ON(waitqueue_active(&ctx->fault_wqh));
VM_BUG_ON(spin_is_locked(&ctx->fd_wqh.lock));
VM_BUG_ON(waitqueue_active(&ctx->fd_wqh));
- mmput(ctx->mm);
+ mmdrop(ctx->mm);
kmem_cache_free(userfaultfd_ctx_cachep, ctx);
}
}
@@ -434,6 +434,9 @@ static int userfaultfd_release(struct inode *inode, struct file *file)
ACCESS_ONCE(ctx->released) = true;
+ if (!mmget_not_zero(mm))
+ goto wakeup;
+
/*
* Flush page faults out of all CPUs. NOTE: all page faults
* must be retried without returning VM_FAULT_SIGBUS if
@@ -467,7 +470,8 @@ static int userfaultfd_release(struct inode *inode, struct file *file)
vma->vm_userfaultfd_ctx = NULL_VM_UFFD_CTX;
}
up_write(&mm->mmap_sem);
-
+ mmput(mm);
+wakeup:
/*
* After no new page faults can wait on this fault_*wqh, flush
* the last page faults that may have been already waiting on
@@ -761,10 +765,12 @@ static int userfaultfd_register(struct userfaultfd_ctx *ctx,
start = uffdio_register.range.start;
end = start + uffdio_register.range.len;
+ ret = -ENOMEM;
+ if (!mmget_not_zero(mm))
+ goto out;
+
down_write(&mm->mmap_sem);
vma = find_vma_prev(mm, start, &prev);
-
- ret = -ENOMEM;
if (!vma)
goto out_unlock;
@@ -866,6 +872,7 @@ static int userfaultfd_register(struct userfaultfd_ctx *ctx,
} while (vma && vma->vm_start < end);
out_unlock:
up_write(&mm->mmap_sem);
+ mmput(mm);
if (!ret) {
/*
* Now that we scanned all vmas we can already tell
@@ -904,10 +911,12 @@ static int userfaultfd_unregister(struct userfaultfd_ctx *ctx,
start = uffdio_unregister.start;
end = start + uffdio_unregister.len;
+ ret = -ENOMEM;
+ if (!mmget_not_zero(mm))
+ goto out;
+
down_write(&mm->mmap_sem);
vma = find_vma_prev(mm, start, &prev);
-
- ret = -ENOMEM;
if (!vma)
goto out_unlock;
@@ -1001,6 +1010,7 @@ static int userfaultfd_unregister(struct userfaultfd_ctx *ctx,
} while (vma && vma->vm_start < end);
out_unlock:
up_write(&mm->mmap_sem);
+ mmput(mm);
out:
return ret;
}
@@ -1070,9 +1080,11 @@ static int userfaultfd_copy(struct userfaultfd_ctx *ctx,
goto out;
if (uffdio_copy.mode & ~UFFDIO_COPY_MODE_DONTWAKE)
goto out;
-
- ret = mcopy_atomic(ctx->mm, uffdio_copy.dst, uffdio_copy.src,
- uffdio_copy.len);
+ if (mmget_not_zero(ctx->mm)) {
+ ret = mcopy_atomic(ctx->mm, uffdio_copy.dst, uffdio_copy.src,
+ uffdio_copy.len);
+ mmput(ctx->mm);
+ }
if (unlikely(put_user(ret, &user_uffdio_copy->copy)))
return -EFAULT;
if (ret < 0)
@@ -1113,8 +1125,11 @@ static int userfaultfd_zeropage(struct userfaultfd_ctx *ctx,
if (uffdio_zeropage.mode & ~UFFDIO_ZEROPAGE_MODE_DONTWAKE)
goto out;
- ret = mfill_zeropage(ctx->mm, uffdio_zeropage.range.start,
- uffdio_zeropage.range.len);
+ if (mmget_not_zero(ctx->mm)) {
+ ret = mfill_zeropage(ctx->mm, uffdio_zeropage.range.start,
+ uffdio_zeropage.range.len);
+ mmput(ctx->mm);
+ }
if (unlikely(put_user(ret, &user_uffdio_zeropage->zeropage)))
return -EFAULT;
if (ret < 0)
@@ -1292,12 +1307,12 @@ static struct file *userfaultfd_file_create(int flags)
ctx->released = false;
ctx->mm = current->mm;
/* prevent the mm struct to be freed */
- atomic_inc(&ctx->mm->mm_users);
+ atomic_inc(&ctx->mm->mm_count);
file = anon_inode_getfile("[userfaultfd]", &userfaultfd_fops, ctx,
O_RDWR | (flags & UFFD_SHARED_FCNTL_FLAGS));
if (IS_ERR(file)) {
- mmput(ctx->mm);
+ mmdrop(ctx->mm);
kmem_cache_free(userfaultfd_ctx_cachep, ctx);
}
out: