diff options
Diffstat (limited to 'fs/ext4/inode.c')
-rw-r--r-- | fs/ext4/inode.c | 109 |
1 files changed, 94 insertions, 15 deletions
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index ea433a7f4bca..e31d762eedce 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c @@ -657,6 +657,34 @@ has_zeroout: return retval; } +/* + * Update EXT4_MAP_FLAGS in bh->b_state. For buffer heads attached to pages + * we have to be careful as someone else may be manipulating b_state as well. + */ +static void ext4_update_bh_state(struct buffer_head *bh, unsigned long flags) +{ + unsigned long old_state; + unsigned long new_state; + + flags &= EXT4_MAP_FLAGS; + + /* Dummy buffer_head? Set non-atomically. */ + if (!bh->b_page) { + bh->b_state = (bh->b_state & ~EXT4_MAP_FLAGS) | flags; + return; + } + /* + * Someone else may be modifying b_state. Be careful! This is ugly but + * once we get rid of using bh as a container for mapping information + * to pass to / from get_block functions, this can go away. + */ + do { + old_state = READ_ONCE(bh->b_state); + new_state = (old_state & ~EXT4_MAP_FLAGS) | flags; + } while (unlikely( + cmpxchg(&bh->b_state, old_state, new_state) != old_state)); +} + /* Maximum number of blocks we map for direct IO at once. */ #define DIO_MAX_BLOCKS 4096 @@ -693,7 +721,7 @@ static int _ext4_get_block(struct inode *inode, sector_t iblock, ext4_io_end_t *io_end = ext4_inode_aio(inode); map_bh(bh, inode->i_sb, map.m_pblk); - bh->b_state = (bh->b_state & ~EXT4_MAP_FLAGS) | map.m_flags; + ext4_update_bh_state(bh, map.m_flags); if (IS_DAX(inode) && buffer_unwritten(bh)) { /* * dgc: I suspect unwritten conversion on ext4+DAX is @@ -1669,7 +1697,7 @@ int ext4_da_get_block_prep(struct inode *inode, sector_t iblock, return ret; map_bh(bh, inode->i_sb, map.m_pblk); - bh->b_state = (bh->b_state & ~EXT4_MAP_FLAGS) | map.m_flags; + ext4_update_bh_state(bh, map.m_flags); if (buffer_unwritten(bh)) { /* A delayed write to unwritten bh should be marked @@ -3559,6 +3587,35 @@ int ext4_can_truncate(struct inode *inode) } /* + * We have to make sure i_disksize gets properly updated before we truncate + * page cache due to hole punching or zero range. Otherwise i_disksize update + * can get lost as it may have been postponed to submission of writeback but + * that will never happen after we truncate page cache. + */ +int ext4_update_disksize_before_punch(struct inode *inode, loff_t offset, + loff_t len) +{ + handle_t *handle; + loff_t size = i_size_read(inode); + + WARN_ON(!mutex_is_locked(&inode->i_mutex)); + if (offset > size || offset + len < size) + return 0; + + if (EXT4_I(inode)->i_disksize >= size) + return 0; + + handle = ext4_journal_start(inode, EXT4_HT_MISC, 1); + if (IS_ERR(handle)) + return PTR_ERR(handle); + ext4_update_i_disksize(inode, size); + ext4_mark_inode_dirty(handle, inode); + ext4_journal_stop(handle); + + return 0; +} + +/* * ext4_punch_hole: punches a hole in a file by releaseing the blocks * associated with the given offset and length * @@ -3623,17 +3680,26 @@ int ext4_punch_hole(struct inode *inode, loff_t offset, loff_t length) } + /* Wait all existing dio workers, newcomers will block on i_mutex */ + ext4_inode_block_unlocked_dio(inode); + inode_dio_wait(inode); + + /* + * Prevent page faults from reinstantiating pages we have released from + * page cache. + */ + down_write(&EXT4_I(inode)->i_mmap_sem); first_block_offset = round_up(offset, sb->s_blocksize); last_block_offset = round_down((offset + length), sb->s_blocksize) - 1; /* Now release the pages and zero block aligned part of pages*/ - if (last_block_offset > first_block_offset) + if (last_block_offset > first_block_offset) { + ret = ext4_update_disksize_before_punch(inode, offset, length); + if (ret) + goto out_dio; truncate_pagecache_range(inode, first_block_offset, last_block_offset); - - /* Wait all existing dio workers, newcomers will block on i_mutex */ - ext4_inode_block_unlocked_dio(inode); - inode_dio_wait(inode); + } if (ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)) credits = ext4_writepage_trans_blocks(inode); @@ -3680,16 +3746,12 @@ int ext4_punch_hole(struct inode *inode, loff_t offset, loff_t length) if (IS_SYNC(inode)) ext4_handle_sync(handle); - /* Now release the pages again to reduce race window */ - if (last_block_offset > first_block_offset) - truncate_pagecache_range(inode, first_block_offset, - last_block_offset); - inode->i_mtime = inode->i_ctime = ext4_current_time(inode); ext4_mark_inode_dirty(handle, inode); out_stop: ext4_journal_stop(handle); out_dio: + up_write(&EXT4_I(inode)->i_mmap_sem); ext4_inode_resume_unlocked_dio(inode); out_mutex: mutex_unlock(&inode->i_mutex); @@ -4823,6 +4885,7 @@ int ext4_setattr(struct dentry *dentry, struct iattr *attr) } else ext4_wait_for_tail_page_commit(inode); } + down_write(&EXT4_I(inode)->i_mmap_sem); /* * Truncate pagecache after we've waited for commit * in data=journal mode to make pages freeable. @@ -4830,6 +4893,7 @@ int ext4_setattr(struct dentry *dentry, struct iattr *attr) truncate_pagecache(inode, inode->i_size); if (shrink) ext4_truncate(inode); + up_write(&EXT4_I(inode)->i_mmap_sem); } if (!rc) { @@ -5081,6 +5145,8 @@ int ext4_mark_inode_dirty(handle_t *handle, struct inode *inode) might_sleep(); trace_ext4_mark_inode_dirty(inode, _RET_IP_); err = ext4_reserve_inode_write(handle, inode, &iloc); + if (err) + return err; if (ext4_handle_valid(handle) && EXT4_I(inode)->i_extra_isize < sbi->s_want_extra_isize && !ext4_test_inode_state(inode, EXT4_STATE_NO_EXPAND)) { @@ -5111,9 +5177,7 @@ int ext4_mark_inode_dirty(handle_t *handle, struct inode *inode) } } } - if (!err) - err = ext4_mark_iloc_dirty(handle, inode, &iloc); - return err; + return ext4_mark_iloc_dirty(handle, inode, &iloc); } /* @@ -5278,6 +5342,8 @@ int ext4_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf) sb_start_pagefault(inode->i_sb); file_update_time(vma->vm_file); + + down_read(&EXT4_I(inode)->i_mmap_sem); /* Delalloc case is easy... */ if (test_opt(inode->i_sb, DELALLOC) && !ext4_should_journal_data(inode) && @@ -5347,6 +5413,19 @@ retry_alloc: out_ret: ret = block_page_mkwrite_return(ret); out: + up_read(&EXT4_I(inode)->i_mmap_sem); sb_end_pagefault(inode->i_sb); return ret; } + +int ext4_filemap_fault(struct vm_area_struct *vma, struct vm_fault *vmf) +{ + struct inode *inode = file_inode(vma->vm_file); + int err; + + down_read(&EXT4_I(inode)->i_mmap_sem); + err = filemap_fault(vma, vmf); + up_read(&EXT4_I(inode)->i_mmap_sem); + + return err; +} |