diff options
Diffstat (limited to 'fs')
| -rw-r--r-- | fs/btrfs/backref.c | 18 | ||||
| -rw-r--r-- | fs/btrfs/extent-tree.c | 25 | ||||
| -rw-r--r-- | fs/btrfs/file.c | 12 | ||||
| -rw-r--r-- | fs/btrfs/sysfs.c | 7 | ||||
| -rw-r--r-- | fs/btrfs/tree-log.c | 8 | ||||
| -rw-r--r-- | fs/ceph/super.c | 7 | ||||
| -rw-r--r-- | fs/char_dev.c | 6 | ||||
| -rw-r--r-- | fs/cifs/file.c | 4 | ||||
| -rw-r--r-- | fs/cifs/smb2ops.c | 14 | ||||
| -rw-r--r-- | fs/ext4/extents.c | 17 | ||||
| -rw-r--r-- | fs/ext4/inode.c | 2 | ||||
| -rw-r--r-- | fs/ext4/ioctl.c | 2 | ||||
| -rw-r--r-- | fs/ext4/super.c | 2 | ||||
| -rw-r--r-- | fs/fs-writeback.c | 51 | ||||
| -rw-r--r-- | fs/fuse/file.c | 13 | ||||
| -rw-r--r-- | fs/gfs2/glock.c | 22 | ||||
| -rw-r--r-- | fs/gfs2/lock_dlm.c | 9 | ||||
| -rw-r--r-- | fs/hugetlbfs/inode.c | 8 | ||||
| -rw-r--r-- | fs/nfs/nfs4state.c | 4 | ||||
| -rw-r--r-- | fs/ocfs2/export.c | 30 | ||||
| -rw-r--r-- | fs/open.c | 18 | ||||
| -rw-r--r-- | fs/read_write.c | 5 | ||||
| -rw-r--r-- | fs/ufs/util.h | 2 | ||||
| -rw-r--r-- | fs/userfaultfd.c | 41 |
24 files changed, 257 insertions, 70 deletions
diff --git a/fs/btrfs/backref.c b/fs/btrfs/backref.c index e2f659dc5745..81c5d07a2af1 100644 --- a/fs/btrfs/backref.c +++ b/fs/btrfs/backref.c @@ -1685,13 +1685,19 @@ int iterate_extent_inodes(struct btrfs_fs_info *fs_info, extent_item_objectid); if (!search_commit_root) { - trans = btrfs_join_transaction(fs_info->extent_root); - if (IS_ERR(trans)) - return PTR_ERR(trans); + trans = btrfs_attach_transaction(fs_info->extent_root); + if (IS_ERR(trans)) { + if (PTR_ERR(trans) != -ENOENT && + PTR_ERR(trans) != -EROFS) + return PTR_ERR(trans); + trans = NULL; + } + } + + if (trans) btrfs_get_tree_mod_seq(fs_info, &tree_mod_seq_elem); - } else { + else down_read(&fs_info->commit_root_sem); - } ret = btrfs_find_all_leafs(trans, fs_info, extent_item_objectid, tree_mod_seq_elem.seq, &refs, @@ -1721,7 +1727,7 @@ int iterate_extent_inodes(struct btrfs_fs_info *fs_info, free_leaf_list(refs); out: - if (!search_commit_root) { + if (trans) { btrfs_put_tree_mod_seq(fs_info, &tree_mod_seq_elem); btrfs_end_transaction(trans, fs_info->extent_root); } else { diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 978bbfed5a2c..df2bb4b61a00 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -10730,9 +10730,9 @@ int btrfs_error_unpin_extent_range(struct btrfs_root *root, u64 start, u64 end) * transaction. */ static int btrfs_trim_free_extents(struct btrfs_device *device, - u64 minlen, u64 *trimmed) + struct fstrim_range *range, u64 *trimmed) { - u64 start = 0, len = 0; + u64 start = range->start, len = 0; int ret; *trimmed = 0; @@ -10768,8 +10768,8 @@ static int btrfs_trim_free_extents(struct btrfs_device *device, atomic_inc(&trans->use_count); spin_unlock(&fs_info->trans_lock); - ret = find_free_dev_extent_start(trans, device, minlen, start, - &start, &len); + ret = find_free_dev_extent_start(trans, device, range->minlen, + start, &start, &len); if (trans) btrfs_put_transaction(trans); @@ -10781,6 +10781,16 @@ static int btrfs_trim_free_extents(struct btrfs_device *device, break; } + /* If we are out of the passed range break */ + if (start > range->start + range->len - 1) { + mutex_unlock(&fs_info->chunk_mutex); + ret = 0; + break; + } + + start = max(range->start, start); + len = min(range->len, len); + ret = btrfs_issue_discard(device->bdev, start, len, &bytes); up_read(&fs_info->commit_root_sem); mutex_unlock(&fs_info->chunk_mutex); @@ -10791,6 +10801,10 @@ static int btrfs_trim_free_extents(struct btrfs_device *device, start += len; *trimmed += bytes; + /* We've trimmed enough */ + if (*trimmed >= range->len) + break; + if (fatal_signal_pending(current)) { ret = -ERESTARTSYS; break; @@ -10857,8 +10871,7 @@ int btrfs_trim_fs(struct btrfs_root *root, struct fstrim_range *range) mutex_lock(&root->fs_info->fs_devices->device_list_mutex); devices = &root->fs_info->fs_devices->devices; list_for_each_entry(device, devices, dev_list) { - ret = btrfs_trim_free_extents(device, range->minlen, - &group_trimmed); + ret = btrfs_trim_free_extents(device, range, &group_trimmed); if (ret) break; diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c index 052973620595..d056060529f8 100644 --- a/fs/btrfs/file.c +++ b/fs/btrfs/file.c @@ -1901,6 +1901,18 @@ int btrfs_sync_file(struct file *file, loff_t start, loff_t end, int datasync) u64 len; /* + * If the inode needs a full sync, make sure we use a full range to + * avoid log tree corruption, due to hole detection racing with ordered + * extent completion for adjacent ranges, and assertion failures during + * hole detection. + */ + if (test_bit(BTRFS_INODE_NEEDS_FULL_SYNC, + &BTRFS_I(inode)->runtime_flags)) { + start = 0; + end = LLONG_MAX; + } + + /* * The range length can be represented by u64, we have to do the typecasts * to avoid signed overflow if it's [0, LLONG_MAX] eg. from fsync() */ diff --git a/fs/btrfs/sysfs.c b/fs/btrfs/sysfs.c index e0ac85949067..e6aaa15505c5 100644 --- a/fs/btrfs/sysfs.c +++ b/fs/btrfs/sysfs.c @@ -733,7 +733,12 @@ int btrfs_sysfs_add_fsid(struct btrfs_fs_devices *fs_devs, fs_devs->fsid_kobj.kset = btrfs_kset; error = kobject_init_and_add(&fs_devs->fsid_kobj, &btrfs_ktype, parent, "%pU", fs_devs->fsid); - return error; + if (error) { + kobject_put(&fs_devs->fsid_kobj); + return error; + } + + return 0; } int btrfs_sysfs_add_mounted(struct btrfs_fs_info *fs_info) diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c index c7190f322576..57a46093656a 100644 --- a/fs/btrfs/tree-log.c +++ b/fs/btrfs/tree-log.c @@ -2809,6 +2809,12 @@ int btrfs_sync_log(struct btrfs_trans_handle *trans, log->log_transid = root->log_transid; root->log_start_pid = 0; /* + * Update or create log root item under the root's log_mutex to prevent + * races with concurrent log syncs that can lead to failure to update + * log root item because it was not created yet. + */ + ret = update_log_root(trans, log); + /* * IO has been started, blocks of the log tree have WRITTEN flag set * in their headers. new modifications of the log will be written to * new positions. so it's safe to allow log writers to go in. @@ -2827,8 +2833,6 @@ int btrfs_sync_log(struct btrfs_trans_handle *trans, mutex_unlock(&log_root_tree->log_mutex); - ret = update_log_root(trans, log); - mutex_lock(&log_root_tree->log_mutex); if (atomic_dec_and_test(&log_root_tree->log_writers)) { /* diff --git a/fs/ceph/super.c b/fs/ceph/super.c index f446afada328..ab8a8c9c74f2 100644 --- a/fs/ceph/super.c +++ b/fs/ceph/super.c @@ -712,6 +712,12 @@ static void ceph_umount_begin(struct super_block *sb) return; } +static int ceph_remount(struct super_block *sb, int *flags, char *data) +{ + sync_filesystem(sb); + return 0; +} + static const struct super_operations ceph_super_ops = { .alloc_inode = ceph_alloc_inode, .destroy_inode = ceph_destroy_inode, @@ -719,6 +725,7 @@ static const struct super_operations ceph_super_ops = { .drop_inode = ceph_drop_inode, .sync_fs = ceph_sync_fs, .put_super = ceph_put_super, + .remount_fs = ceph_remount, .show_options = ceph_show_options, .statfs = ceph_statfs, .umount_begin = ceph_umount_begin, diff --git a/fs/char_dev.c b/fs/char_dev.c index 24b142569ca9..d0655ca89481 100644 --- a/fs/char_dev.c +++ b/fs/char_dev.c @@ -130,6 +130,12 @@ __register_chrdev_region(unsigned int major, unsigned int baseminor, ret = -EBUSY; goto out; } + + if (new_min < old_min && new_max > old_max) { + ret = -EBUSY; + goto out; + } + } cd->next = *cp; diff --git a/fs/cifs/file.c b/fs/cifs/file.c index 23a8374fa97f..309c134fb66f 100644 --- a/fs/cifs/file.c +++ b/fs/cifs/file.c @@ -2829,7 +2829,9 @@ cifs_read_allocate_pages(struct cifs_readdata *rdata, unsigned int nr_pages) } if (rc) { - for (i = 0; i < nr_pages; i++) { + unsigned int nr_page_failed = i; + + for (i = 0; i < nr_page_failed; i++) { put_page(rdata->pages[i]); rdata->pages[i] = NULL; } diff --git a/fs/cifs/smb2ops.c b/fs/cifs/smb2ops.c index eae3cdffaf7f..591c93de8c20 100644 --- a/fs/cifs/smb2ops.c +++ b/fs/cifs/smb2ops.c @@ -1329,26 +1329,28 @@ smb21_set_oplock_level(struct cifsInodeInfo *cinode, __u32 oplock, unsigned int epoch, bool *purge_cache) { char message[5] = {0}; + unsigned int new_oplock = 0; oplock &= 0xFF; if (oplock == SMB2_OPLOCK_LEVEL_NOCHANGE) return; - cinode->oplock = 0; if (oplock & SMB2_LEASE_READ_CACHING_HE) { - cinode->oplock |= CIFS_CACHE_READ_FLG; + new_oplock |= CIFS_CACHE_READ_FLG; strcat(message, "R"); } if (oplock & SMB2_LEASE_HANDLE_CACHING_HE) { - cinode->oplock |= CIFS_CACHE_HANDLE_FLG; + new_oplock |= CIFS_CACHE_HANDLE_FLG; strcat(message, "H"); } if (oplock & SMB2_LEASE_WRITE_CACHING_HE) { - cinode->oplock |= CIFS_CACHE_WRITE_FLG; + new_oplock |= CIFS_CACHE_WRITE_FLG; strcat(message, "W"); } - if (!cinode->oplock) - strcat(message, "None"); + if (!new_oplock) + strncpy(message, "None", sizeof(message)); + + cinode->oplock = new_oplock; cifs_dbg(FYI, "%s Lease granted on inode %p\n", message, &cinode->vfs_inode); } diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c index 1708597659a1..8d98c9ac9205 100644 --- a/fs/ext4/extents.c +++ b/fs/ext4/extents.c @@ -1049,6 +1049,7 @@ static int ext4_ext_split(handle_t *handle, struct inode *inode, __le32 border; ext4_fsblk_t *ablocks = NULL; /* array of allocated blocks */ int err = 0; + size_t ext_size = 0; /* make decision: where to split? */ /* FIXME: now decision is simplest: at current extent */ @@ -1140,6 +1141,10 @@ static int ext4_ext_split(handle_t *handle, struct inode *inode, le16_add_cpu(&neh->eh_entries, m); } + /* zero out unused area in the extent block */ + ext_size = sizeof(struct ext4_extent_header) + + sizeof(struct ext4_extent) * le16_to_cpu(neh->eh_entries); + memset(bh->b_data + ext_size, 0, inode->i_sb->s_blocksize - ext_size); ext4_extent_block_csum_set(inode, neh); set_buffer_uptodate(bh); unlock_buffer(bh); @@ -1219,6 +1224,11 @@ static int ext4_ext_split(handle_t *handle, struct inode *inode, sizeof(struct ext4_extent_idx) * m); le16_add_cpu(&neh->eh_entries, m); } + /* zero out unused area in the extent block */ + ext_size = sizeof(struct ext4_extent_header) + + (sizeof(struct ext4_extent) * le16_to_cpu(neh->eh_entries)); + memset(bh->b_data + ext_size, 0, + inode->i_sb->s_blocksize - ext_size); ext4_extent_block_csum_set(inode, neh); set_buffer_uptodate(bh); unlock_buffer(bh); @@ -1284,6 +1294,7 @@ static int ext4_ext_grow_indepth(handle_t *handle, struct inode *inode, ext4_fsblk_t newblock, goal = 0; struct ext4_super_block *es = EXT4_SB(inode->i_sb)->s_es; int err = 0; + size_t ext_size = 0; /* Try to prepend new index to old one */ if (ext_depth(inode)) @@ -1309,9 +1320,11 @@ static int ext4_ext_grow_indepth(handle_t *handle, struct inode *inode, goto out; } + ext_size = sizeof(EXT4_I(inode)->i_data); /* move top-level index/leaf into new block */ - memmove(bh->b_data, EXT4_I(inode)->i_data, - sizeof(EXT4_I(inode)->i_data)); + memmove(bh->b_data, EXT4_I(inode)->i_data, ext_size); + /* zero out unused area in the extent block */ + memset(bh->b_data + ext_size, 0, inode->i_sb->s_blocksize - ext_size); /* set size of new block */ neh = ext_block_hdr(bh); diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index b06109979e94..f72505cdf6a2 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c @@ -5001,7 +5001,7 @@ int ext4_setattr(struct dentry *dentry, struct iattr *attr) up_write(&EXT4_I(inode)->i_data_sem); ext4_journal_stop(handle); if (error) { - if (orphan) + if (orphan && inode->i_nlink) ext4_orphan_del(NULL, inode); goto err_out; } diff --git a/fs/ext4/ioctl.c b/fs/ext4/ioctl.c index 49857101bb57..06ca0e647a97 100644 --- a/fs/ext4/ioctl.c +++ b/fs/ext4/ioctl.c @@ -577,7 +577,7 @@ group_add_out: if (err == 0) err = err2; mnt_drop_write_file(filp); - if (!err && (o_group > EXT4_SB(sb)->s_groups_count) && + if (!err && (o_group < EXT4_SB(sb)->s_groups_count) && ext4_has_group_desc_csum(sb) && test_opt(sb, INIT_INODE_TABLE)) err = ext4_register_li_request(sb, o_group); diff --git a/fs/ext4/super.c b/fs/ext4/super.c index 0ec3689a8990..9a652931eef8 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -3866,7 +3866,7 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) "data=, fs mounted w/o journal"); goto failed_mount_wq; } - sbi->s_def_mount_opt &= EXT4_MOUNT_JOURNAL_CHECKSUM; + sbi->s_def_mount_opt &= ~EXT4_MOUNT_JOURNAL_CHECKSUM; clear_opt(sb, JOURNAL_CHECKSUM); clear_opt(sb, DATA_FLAGS); sbi->s_journal = NULL; diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c index 0fe11984db09..de1138e4b049 100644 --- a/fs/fs-writeback.c +++ b/fs/fs-writeback.c @@ -331,11 +331,22 @@ struct inode_switch_wbs_context { struct work_struct work; }; +static void bdi_down_write_wb_switch_rwsem(struct backing_dev_info *bdi) +{ + down_write(&bdi->wb_switch_rwsem); +} + +static void bdi_up_write_wb_switch_rwsem(struct backing_dev_info *bdi) +{ + up_write(&bdi->wb_switch_rwsem); +} + static void inode_switch_wbs_work_fn(struct work_struct *work) { struct inode_switch_wbs_context *isw = container_of(work, struct inode_switch_wbs_context, work); struct inode *inode = isw->inode; + struct backing_dev_info *bdi = inode_to_bdi(inode); struct address_space *mapping = inode->i_mapping; struct bdi_writeback *old_wb = inode->i_wb; struct bdi_writeback *new_wb = isw->new_wb; @@ -344,6 +355,12 @@ static void inode_switch_wbs_work_fn(struct work_struct *work) void **slot; /* + * If @inode switches cgwb membership while sync_inodes_sb() is + * being issued, sync_inodes_sb() might miss it. Synchronize. + */ + down_read(&bdi->wb_switch_rwsem); + + /* * By the time control reaches here, RCU grace period has passed * since I_WB_SWITCH assertion and all wb stat update transactions * between unlocked_inode_to_wb_begin/end() are guaranteed to be @@ -435,6 +452,8 @@ skip_switch: spin_unlock(&new_wb->list_lock); spin_unlock(&old_wb->list_lock); + up_read(&bdi->wb_switch_rwsem); + if (switched) { wb_wakeup(new_wb); wb_put(old_wb); @@ -475,9 +494,18 @@ static void inode_switch_wbs(struct inode *inode, int new_wb_id) if (inode->i_state & I_WB_SWITCH) return; + /* + * Avoid starting new switches while sync_inodes_sb() is in + * progress. Otherwise, if the down_write protected issue path + * blocks heavily, we might end up starting a large number of + * switches which will block on the rwsem. + */ + if (!down_read_trylock(&bdi->wb_switch_rwsem)) + return; + isw = kzalloc(sizeof(*isw), GFP_ATOMIC); if (!isw) - return; + goto out_unlock; /* find and pin the new wb */ rcu_read_lock(); @@ -502,8 +530,6 @@ static void inode_switch_wbs(struct inode *inode, int new_wb_id) ihold(inode); isw->inode = inode; - atomic_inc(&isw_nr_in_flight); - /* * In addition to synchronizing among switchers, I_WB_SWITCH tells * the RCU protected stat update paths to grab the mapping's @@ -511,12 +537,17 @@ static void inode_switch_wbs(struct inode *inode, int new_wb_id) * Let's continue after I_WB_SWITCH is guaranteed to be visible. */ call_rcu(&isw->rcu_head, inode_switch_wbs_rcu_fn); - return; + + atomic_inc(&isw_nr_in_flight); + + goto out_unlock; out_free: if (isw->new_wb) wb_put(isw->new_wb); kfree(isw); +out_unlock: + up_read(&bdi->wb_switch_rwsem); } /** @@ -880,7 +911,11 @@ restart: void cgroup_writeback_umount(void) { if (atomic_read(&isw_nr_in_flight)) { - synchronize_rcu(); + /* + * Use rcu_barrier() to wait for all pending callbacks to + * ensure that all in-flight wb switches are in the workqueue. + */ + rcu_barrier(); flush_workqueue(isw_wq); } } @@ -896,6 +931,9 @@ fs_initcall(cgroup_writeback_init); #else /* CONFIG_CGROUP_WRITEBACK */ +static void bdi_down_write_wb_switch_rwsem(struct backing_dev_info *bdi) { } +static void bdi_up_write_wb_switch_rwsem(struct backing_dev_info *bdi) { } + static struct bdi_writeback * locked_inode_to_wb_and_lock_list(struct inode *inode) __releases(&inode->i_lock) @@ -2341,8 +2379,11 @@ void sync_inodes_sb(struct super_block *sb) return; WARN_ON(!rwsem_is_locked(&sb->s_umount)); + /* protect against inode wb switch, see inode_switch_wbs_work_fn() */ + bdi_down_write_wb_switch_rwsem(bdi); bdi_split_work_to_wbs(bdi, &work, false); wb_wait_for_completion(bdi, &done); + bdi_up_write_wb_switch_rwsem(bdi); wait_sb_inodes(sb); } diff --git a/fs/fuse/file.c b/fs/fuse/file.c index d40c2451487c..ab93c4591f8c 100644 --- a/fs/fuse/file.c +++ b/fs/fuse/file.c @@ -178,7 +178,9 @@ void fuse_finish_open(struct inode *inode, struct file *file) file->f_op = &fuse_direct_io_file_operations; if (!(ff->open_flags & FOPEN_KEEP_CACHE)) invalidate_inode_pages2(inode->i_mapping); - if (ff->open_flags & FOPEN_NONSEEKABLE) + if (ff->open_flags & FOPEN_STREAM) + stream_open(inode, file); + else if (ff->open_flags & FOPEN_NONSEEKABLE) nonseekable_open(inode, file); if (fc->atomic_o_trunc && (file->f_flags & O_TRUNC)) { struct fuse_inode *fi = get_fuse_inode(inode); @@ -1533,7 +1535,7 @@ __acquires(fc->lock) { struct fuse_conn *fc = get_fuse_conn(inode); struct fuse_inode *fi = get_fuse_inode(inode); - size_t crop = i_size_read(inode); + loff_t crop = i_size_read(inode); struct fuse_req *req; while (fi->writectr >= 0 && !list_empty(&fi->queued_writes)) { @@ -2947,6 +2949,13 @@ static long fuse_file_fallocate(struct file *file, int mode, loff_t offset, } } + if (!(mode & FALLOC_FL_KEEP_SIZE) && + offset + length > i_size_read(inode)) { + err = inode_newsize_ok(inode, offset + length); + if (err) + goto out; + } + if (!(mode & FALLOC_FL_KEEP_SIZE)) set_bit(FUSE_I_SIZE_UNSTABLE, &fi->state); diff --git a/fs/gfs2/glock.c b/fs/gfs2/glock.c index 09a0cf5f3dd8..1eb737c466dd 100644 --- a/fs/gfs2/glock.c +++ b/fs/gfs2/glock.c @@ -136,22 +136,26 @@ static int demote_ok(const struct gfs2_glock *gl) void gfs2_glock_add_to_lru(struct gfs2_glock *gl) { + if (!(gl->gl_ops->go_flags & GLOF_LRU)) + return; + spin_lock(&lru_lock); - if (!list_empty(&gl->gl_lru)) - list_del_init(&gl->gl_lru); - else + list_del(&gl->gl_lru); + list_add_tail(&gl->gl_lru, &lru_list); + + if (!test_bit(GLF_LRU, &gl->gl_flags)) { + set_bit(GLF_LRU, &gl->gl_flags); atomic_inc(&lru_count); + } - list_add_tail(&gl->gl_lru, &lru_list); - set_bit(GLF_LRU, &gl->gl_flags); spin_unlock(&lru_lock); } static void gfs2_glock_remove_from_lru(struct gfs2_glock *gl) { spin_lock(&lru_lock); - if (!list_empty(&gl->gl_lru)) { + if (test_bit(GLF_LRU, &gl->gl_flags)) { list_del_init(&gl->gl_lru); atomic_dec(&lru_count); clear_bit(GLF_LRU, &gl->gl_flags); @@ -1040,8 +1044,7 @@ void gfs2_glock_dq(struct gfs2_holder *gh) !test_bit(GLF_DEMOTE, &gl->gl_flags)) fast_path = 1; } - if (!test_bit(GLF_LFLUSH, &gl->gl_flags) && demote_ok(gl) && - (glops->go_flags & GLOF_LRU)) + if (!test_bit(GLF_LFLUSH, &gl->gl_flags) && demote_ok(gl)) gfs2_glock_add_to_lru(gl); trace_gfs2_glock_queue(gh, 0); @@ -1341,6 +1344,7 @@ __acquires(&lru_lock) if (!spin_trylock(&gl->gl_lockref.lock)) { add_back_to_lru: list_add(&gl->gl_lru, &lru_list); + set_bit(GLF_LRU, &gl->gl_flags); atomic_inc(&lru_count); continue; } @@ -1348,7 +1352,6 @@ add_back_to_lru: spin_unlock(&gl->gl_lockref.lock); goto add_back_to_lru; } - clear_bit(GLF_LRU, &gl->gl_flags); gl->gl_lockref.count++; if (demote_ok(gl)) handle_callback(gl, LM_ST_UNLOCKED, 0, false); @@ -1384,6 +1387,7 @@ static long gfs2_scan_glock_lru(int nr) if (!test_bit(GLF_LOCK, &gl->gl_flags)) { list_move(&gl->gl_lru, &dispose); atomic_dec(&lru_count); + clear_bit(GLF_LRU, &gl->gl_flags); freed++; continue; } diff --git a/fs/gfs2/lock_dlm.c b/fs/gfs2/lock_dlm.c index 8b907c5cc913..3c3d037df824 100644 --- a/fs/gfs2/lock_dlm.c +++ b/fs/gfs2/lock_dlm.c @@ -32,9 +32,10 @@ extern struct workqueue_struct *gfs2_control_wq; * @delta is the difference between the current rtt sample and the * running average srtt. We add 1/8 of that to the srtt in order to * update the current srtt estimate. The variance estimate is a bit - * more complicated. We subtract the abs value of the @delta from - * the current variance estimate and add 1/4 of that to the running - * total. + * more complicated. We subtract the current variance estimate from + * the abs value of the @delta and add 1/4 of that to the running + * total. That's equivalent to 3/4 of the current variance + * estimate plus 1/4 of the abs of @delta. * * Note that the index points at the array entry containing the smoothed * mean value, and the variance is always in the following entry @@ -50,7 +51,7 @@ static inline void gfs2_update_stats(struct gfs2_lkstats *s, unsigned index, s64 delta = sample - s->stats[index]; s->stats[index] += (delta >> 3); index++; - s->stats[index] += ((abs(delta) - s->stats[index]) >> 2); + s->stats[index] += (s64)(abs(delta) - s->stats[index]) >> 2; } /** diff --git a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c index 27c4e2ac39a9..937c6ee1786f 100644 --- a/fs/hugetlbfs/inode.c +++ b/fs/hugetlbfs/inode.c @@ -414,9 +414,7 @@ static void remove_inode_hugepages(struct inode *inode, loff_t lstart, if (next >= end) break; - hash = hugetlb_fault_mutex_hash(h, current->mm, - &pseudo_vma, - mapping, next, 0); + hash = hugetlb_fault_mutex_hash(h, mapping, next, 0); mutex_lock(&hugetlb_fault_mutex_table[hash]); lock_page(page); @@ -569,7 +567,6 @@ static long hugetlbfs_fallocate(struct file *file, int mode, loff_t offset, struct address_space *mapping = inode->i_mapping; struct hstate *h = hstate_inode(inode); struct vm_area_struct pseudo_vma; - struct mm_struct *mm = current->mm; loff_t hpage_size = huge_page_size(h); unsigned long hpage_shift = huge_page_shift(h); pgoff_t start, index, end; @@ -633,8 +630,7 @@ static long hugetlbfs_fallocate(struct file *file, int mode, loff_t offset, addr = index * hpage_size; /* mutex taken here, fault path and hole punch */ - hash = hugetlb_fault_mutex_hash(h, mm, &pseudo_vma, mapping, - index, addr); + hash = hugetlb_fault_mutex_hash(h, mapping, index, addr); mutex_lock(&hugetlb_fault_mutex_table[hash]); /* See if already present in mapping to avoid alloc/free */ diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c index 44f5cea49699..5be61affeefd 100644 --- a/fs/nfs/nfs4state.c +++ b/fs/nfs/nfs4state.c @@ -140,6 +140,10 @@ int nfs40_discover_server_trunking(struct nfs_client *clp, /* Sustain the lease, even if it's empty. If the clientid4 * goes stale it's of no use for trunking discovery. */ nfs4_schedule_state_renewal(*result); + + /* If the client state need to recover, do it. */ + if (clp->cl_state) + nfs4_schedule_state_manager(clp); } out: return status; diff --git a/fs/ocfs2/export.c b/fs/ocfs2/export.c index 3494e220b510..bed15dec3c16 100644 --- a/fs/ocfs2/export.c +++ b/fs/ocfs2/export.c @@ -148,16 +148,24 @@ static struct dentry *ocfs2_get_parent(struct dentry *child) u64 blkno; struct dentry *parent; struct inode *dir = d_inode(child); + int set; trace_ocfs2_get_parent(child, child->d_name.len, child->d_name.name, (unsigned long long)OCFS2_I(dir)->ip_blkno); + status = ocfs2_nfs_sync_lock(OCFS2_SB(dir->i_sb), 1); + if (status < 0) { + mlog(ML_ERROR, "getting nfs sync lock(EX) failed %d\n", status); + parent = ERR_PTR(status); + goto bail; + } + status = ocfs2_inode_lock(dir, NULL, 0); if (status < 0) { if (status != -ENOENT) mlog_errno(status); parent = ERR_PTR(status); - goto bail; + goto unlock_nfs_sync; } status = ocfs2_lookup_ino_from_name(dir, "..", 2, &blkno); @@ -166,11 +174,31 @@ static struct dentry *ocfs2_get_parent(struct dentry *child) goto bail_unlock; } + status = ocfs2_test_inode_bit(OCFS2_SB(dir->i_sb), blkno, &set); + if (status < 0) { + if (status == -EINVAL) { + status = -ESTALE; + } else + mlog(ML_ERROR, "test inode bit failed %d\n", status); + parent = ERR_PTR(status); + goto bail_unlock; + } + + trace_ocfs2_get_dentry_test_bit(status, set); + if (!set) { + status = -ESTALE; + parent = ERR_PTR(status); + goto bail_unlock; + } + parent = d_obtain_alias(ocfs2_iget(OCFS2_SB(dir->i_sb), blkno, 0, 0)); bail_unlock: ocfs2_inode_unlock(dir, 0); +unlock_nfs_sync: + ocfs2_nfs_sync_unlock(OCFS2_SB(dir->i_sb), 1); + bail: trace_ocfs2_get_parent_end(parent); diff --git a/fs/open.c b/fs/open.c index 1fd96c5d3895..e6a55dd45cba 100644 --- a/fs/open.c +++ b/fs/open.c @@ -1165,3 +1165,21 @@ int nonseekable_open(struct inode *inode, struct file *filp) } EXPORT_SYMBOL(nonseekable_open); + +/* + * stream_open is used by subsystems that want stream-like file descriptors. + * Such file descriptors are not seekable and don't have notion of position + * (file.f_pos is always 0). Contrary to file descriptors of other regular + * files, .read() and .write() can run simultaneously. + * + * stream_open never fails and is marked to return int so that it could be + * directly used as file_operations.open . + */ +int stream_open(struct inode *inode, struct file *filp) +{ + filp->f_mode &= ~(FMODE_LSEEK | FMODE_PREAD | FMODE_PWRITE | FMODE_ATOMIC_POS); + filp->f_mode |= FMODE_STREAM; + return 0; +} + +EXPORT_SYMBOL(stream_open); diff --git a/fs/read_write.c b/fs/read_write.c index 16e554ba885d..7b175b9134ec 100644 --- a/fs/read_write.c +++ b/fs/read_write.c @@ -553,12 +553,13 @@ EXPORT_SYMBOL(vfs_write); static inline loff_t file_pos_read(struct file *file) { - return file->f_pos; + return file->f_mode & FMODE_STREAM ? 0 : file->f_pos; } static inline void file_pos_write(struct file *file, loff_t pos) { - file->f_pos = pos; + if ((file->f_mode & FMODE_STREAM) == 0) + file->f_pos = pos; } SYSCALL_DEFINE3(read, unsigned int, fd, char __user *, buf, size_t, count) diff --git a/fs/ufs/util.h b/fs/ufs/util.h index 3f9463f8cf2f..f877d5cadd98 100644 --- a/fs/ufs/util.h +++ b/fs/ufs/util.h @@ -228,7 +228,7 @@ ufs_get_inode_gid(struct super_block *sb, struct ufs_inode *inode) case UFS_UID_44BSD: return fs32_to_cpu(sb, inode->ui_u3.ui_44.ui_gid); case UFS_UID_EFT: - if (inode->ui_u1.oldids.ui_suid == 0xFFFF) + if (inode->ui_u1.oldids.ui_sgid == 0xFFFF) return fs32_to_cpu(sb, inode->ui_u3.ui_sun.ui_gid); /* Fall through */ default: diff --git a/fs/userfaultfd.c b/fs/userfaultfd.c index d859d8bd1f96..188021d74d8d 100644 --- a/fs/userfaultfd.c +++ b/fs/userfaultfd.c @@ -137,7 +137,7 @@ static void userfaultfd_ctx_put(struct userfaultfd_ctx *ctx) VM_BUG_ON(waitqueue_active(&ctx->fault_wqh)); VM_BUG_ON(spin_is_locked(&ctx->fd_wqh.lock)); VM_BUG_ON(waitqueue_active(&ctx->fd_wqh)); - mmput(ctx->mm); + mmdrop(ctx->mm); kmem_cache_free(userfaultfd_ctx_cachep, ctx); } } @@ -434,6 +434,9 @@ static int userfaultfd_release(struct inode *inode, struct file *file) ACCESS_ONCE(ctx->released) = true; + if (!mmget_not_zero(mm)) + goto wakeup; + /* * Flush page faults out of all CPUs. NOTE: all page faults * must be retried without returning VM_FAULT_SIGBUS if @@ -467,7 +470,8 @@ static int userfaultfd_release(struct inode *inode, struct file *file) vma->vm_userfaultfd_ctx = NULL_VM_UFFD_CTX; } up_write(&mm->mmap_sem); - + mmput(mm); +wakeup: /* * After no new page faults can wait on this fault_*wqh, flush * the last page faults that may have been already waiting on @@ -761,10 +765,12 @@ static int userfaultfd_register(struct userfaultfd_ctx *ctx, start = uffdio_register.range.start; end = start + uffdio_register.range.len; + ret = -ENOMEM; + if (!mmget_not_zero(mm)) + goto out; + down_write(&mm->mmap_sem); vma = find_vma_prev(mm, start, &prev); - - ret = -ENOMEM; if (!vma) goto out_unlock; @@ -866,6 +872,7 @@ static int userfaultfd_register(struct userfaultfd_ctx *ctx, } while (vma && vma->vm_start < end); out_unlock: up_write(&mm->mmap_sem); + mmput(mm); if (!ret) { /* * Now that we scanned all vmas we can already tell @@ -904,10 +911,12 @@ static int userfaultfd_unregister(struct userfaultfd_ctx *ctx, start = uffdio_unregister.start; end = start + uffdio_unregister.len; + ret = -ENOMEM; + if (!mmget_not_zero(mm)) + goto out; + down_write(&mm->mmap_sem); vma = find_vma_prev(mm, start, &prev); - - ret = -ENOMEM; if (!vma) goto out_unlock; @@ -1001,6 +1010,7 @@ static int userfaultfd_unregister(struct userfaultfd_ctx *ctx, } while (vma && vma->vm_start < end); out_unlock: up_write(&mm->mmap_sem); + mmput(mm); out: return ret; } @@ -1070,9 +1080,11 @@ static int userfaultfd_copy(struct userfaultfd_ctx *ctx, goto out; if (uffdio_copy.mode & ~UFFDIO_COPY_MODE_DONTWAKE) goto out; - - ret = mcopy_atomic(ctx->mm, uffdio_copy.dst, uffdio_copy.src, - uffdio_copy.len); + if (mmget_not_zero(ctx->mm)) { + ret = mcopy_atomic(ctx->mm, uffdio_copy.dst, uffdio_copy.src, + uffdio_copy.len); + mmput(ctx->mm); + } if (unlikely(put_user(ret, &user_uffdio_copy->copy))) return -EFAULT; if (ret < 0) @@ -1113,8 +1125,11 @@ static int userfaultfd_zeropage(struct userfaultfd_ctx *ctx, if (uffdio_zeropage.mode & ~UFFDIO_ZEROPAGE_MODE_DONTWAKE) goto out; - ret = mfill_zeropage(ctx->mm, uffdio_zeropage.range.start, - uffdio_zeropage.range.len); + if (mmget_not_zero(ctx->mm)) { + ret = mfill_zeropage(ctx->mm, uffdio_zeropage.range.start, + uffdio_zeropage.range.len); + mmput(ctx->mm); + } if (unlikely(put_user(ret, &user_uffdio_zeropage->zeropage))) return -EFAULT; if (ret < 0) @@ -1292,12 +1307,12 @@ static struct file *userfaultfd_file_create(int flags) ctx->released = false; ctx->mm = current->mm; /* prevent the mm struct to be freed */ - atomic_inc(&ctx->mm->mm_users); + atomic_inc(&ctx->mm->mm_count); file = anon_inode_getfile("[userfaultfd]", &userfaultfd_fops, ctx, O_RDWR | (flags & UFFD_SHARED_FCNTL_FLAGS)); if (IS_ERR(file)) { - mmput(ctx->mm); + mmdrop(ctx->mm); kmem_cache_free(userfaultfd_ctx_cachep, ctx); } out: |
