diff options
Diffstat (limited to 'fs/fs-writeback.c')
-rw-r--r-- | fs/fs-writeback.c | 120 |
1 files changed, 55 insertions, 65 deletions
diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c index 67fbe6837fdb..0ce7ff7a2ce8 100644 --- a/fs/fs-writeback.c +++ b/fs/fs-writeback.c @@ -45,7 +45,6 @@ struct wb_completion { struct wb_writeback_work { long nr_pages; struct super_block *sb; - unsigned long *older_than_this; enum writeback_sync_modes sync_mode; unsigned int tagged_writepages:1; unsigned int for_kupdate:1; @@ -160,7 +159,9 @@ static void inode_io_list_del_locked(struct inode *inode, struct bdi_writeback *wb) { assert_spin_locked(&wb->list_lock); + assert_spin_locked(&inode->i_lock); + inode->i_state &= ~I_SYNC_QUEUED; list_del_init(&inode->i_io_list); wb_io_lists_depopulated(wb); } @@ -269,6 +270,7 @@ void __inode_attach_wb(struct inode *inode, struct page *page) if (unlikely(cmpxchg(&inode->i_wb, NULL, wb))) wb_put(wb); } +EXPORT_SYMBOL_GPL(__inode_attach_wb); /** * locked_inode_to_wb_and_lock_list - determine a locked inode's wb and lock it @@ -510,9 +512,14 @@ static void inode_switch_wbs(struct inode *inode, int new_wb_id) /* find and pin the new wb */ rcu_read_lock(); memcg_css = css_from_id(new_wb_id, &memory_cgrp_subsys); - if (memcg_css) - isw->new_wb = wb_get_create(bdi, memcg_css, GFP_ATOMIC); + if (memcg_css && !css_tryget(memcg_css)) + memcg_css = NULL; rcu_read_unlock(); + if (!memcg_css) + goto out_free; + + isw->new_wb = wb_get_create(bdi, memcg_css, GFP_ATOMIC); + css_put(memcg_css); if (!isw->new_wb) goto out_free; @@ -1033,7 +1040,9 @@ void inode_io_list_del(struct inode *inode) struct bdi_writeback *wb; wb = inode_to_wb_and_lock_list(inode); + spin_lock(&inode->i_lock); inode_io_list_del_locked(inode, wb); + spin_unlock(&inode->i_lock); spin_unlock(&wb->list_lock); } @@ -1046,8 +1055,10 @@ void inode_io_list_del(struct inode *inode) * the case then the inode must have been redirtied while it was being written * out and we don't reset its dirtied_when. */ -static void redirty_tail(struct inode *inode, struct bdi_writeback *wb) +static void redirty_tail_locked(struct inode *inode, struct bdi_writeback *wb) { + assert_spin_locked(&inode->i_lock); + if (!list_empty(&wb->b_dirty)) { struct inode *tail; @@ -1056,6 +1067,14 @@ static void redirty_tail(struct inode *inode, struct bdi_writeback *wb) inode->dirtied_when = jiffies; } inode_io_list_move_locked(inode, wb, &wb->b_dirty); + inode->i_state &= ~I_SYNC_QUEUED; +} + +static void redirty_tail(struct inode *inode, struct bdi_writeback *wb) +{ + spin_lock(&inode->i_lock); + redirty_tail_locked(inode, wb); + spin_unlock(&inode->i_lock); } /* @@ -1094,16 +1113,13 @@ static bool inode_dirtied_after(struct inode *inode, unsigned long t) #define EXPIRE_DIRTY_ATIME 0x0001 /* - * Move expired (dirtied before work->older_than_this) dirty inodes from + * Move expired (dirtied before dirtied_before) dirty inodes from * @delaying_queue to @dispatch_queue. */ static int move_expired_inodes(struct list_head *delaying_queue, struct list_head *dispatch_queue, - int flags, - struct wb_writeback_work *work) + int flags, unsigned long dirtied_before) { - unsigned long *older_than_this = NULL; - unsigned long expire_time; LIST_HEAD(tmp); struct list_head *pos, *node; struct super_block *sb = NULL; @@ -1111,21 +1127,17 @@ static int move_expired_inodes(struct list_head *delaying_queue, int do_sb_sort = 0; int moved = 0; - if ((flags & EXPIRE_DIRTY_ATIME) == 0) - older_than_this = work->older_than_this; - else if (!work->for_sync) { - expire_time = jiffies - (dirtytime_expire_interval * HZ); - older_than_this = &expire_time; - } while (!list_empty(delaying_queue)) { inode = wb_inode(delaying_queue->prev); - if (older_than_this && - inode_dirtied_after(inode, *older_than_this)) + if (inode_dirtied_after(inode, dirtied_before)) break; list_move(&inode->i_io_list, &tmp); moved++; + spin_lock(&inode->i_lock); if (flags & EXPIRE_DIRTY_ATIME) - set_bit(__I_DIRTY_TIME_EXPIRED, &inode->i_state); + inode->i_state |= I_DIRTY_TIME_EXPIRED; + inode->i_state |= I_SYNC_QUEUED; + spin_unlock(&inode->i_lock); if (sb_is_blkdev_sb(inode->i_sb)) continue; if (sb && sb != inode->i_sb) @@ -1163,18 +1175,22 @@ out: * | * +--> dequeue for IO */ -static void queue_io(struct bdi_writeback *wb, struct wb_writeback_work *work) +static void queue_io(struct bdi_writeback *wb, struct wb_writeback_work *work, + unsigned long dirtied_before) { int moved; + unsigned long time_expire_jif = dirtied_before; assert_spin_locked(&wb->list_lock); list_splice_init(&wb->b_more_io, &wb->b_io); - moved = move_expired_inodes(&wb->b_dirty, &wb->b_io, 0, work); + moved = move_expired_inodes(&wb->b_dirty, &wb->b_io, 0, dirtied_before); + if (!work->for_sync) + time_expire_jif = jiffies - dirtytime_expire_interval * HZ; moved += move_expired_inodes(&wb->b_dirty_time, &wb->b_io, - EXPIRE_DIRTY_ATIME, work); + EXPIRE_DIRTY_ATIME, time_expire_jif); if (moved) wb_io_lists_populated(wb); - trace_writeback_queue_io(wb, work, moved); + trace_writeback_queue_io(wb, work, dirtied_before, moved); } static int write_inode(struct inode *inode, struct writeback_control *wbc) @@ -1268,7 +1284,7 @@ static void requeue_inode(struct inode *inode, struct bdi_writeback *wb, * writeback is not making progress due to locked * buffers. Skip this inode for now. */ - redirty_tail(inode, wb); + redirty_tail_locked(inode, wb); return; } @@ -1288,7 +1304,7 @@ static void requeue_inode(struct inode *inode, struct bdi_writeback *wb, * retrying writeback of the dirty page/inode * that cannot be performed immediately. */ - redirty_tail(inode, wb); + redirty_tail_locked(inode, wb); } } else if (inode->i_state & I_DIRTY) { /* @@ -1296,10 +1312,11 @@ static void requeue_inode(struct inode *inode, struct bdi_writeback *wb, * such as delayed allocation during submission or metadata * updates after data IO completion. */ - redirty_tail(inode, wb); + redirty_tail_locked(inode, wb); } else if (inode->i_state & I_DIRTY_TIME) { inode->dirtied_when = jiffies; inode_io_list_move_locked(inode, wb, &wb->b_dirty_time); + inode->i_state &= ~I_SYNC_QUEUED; } else { /* The inode is clean. Remove from writeback lists. */ inode_io_list_del_locked(inode, wb); @@ -1542,8 +1559,8 @@ static long writeback_sb_inodes(struct super_block *sb, */ spin_lock(&inode->i_lock); if (inode->i_state & (I_NEW | I_FREEING | I_WILL_FREE)) { + redirty_tail_locked(inode, wb); spin_unlock(&inode->i_lock); - redirty_tail(inode, wb); continue; } if ((inode->i_state & I_SYNC) && wbc.sync_mode != WB_SYNC_ALL) { @@ -1684,7 +1701,7 @@ static long writeback_inodes_wb(struct bdi_writeback *wb, long nr_pages, blk_start_plug(&plug); spin_lock(&wb->list_lock); if (list_empty(&wb->b_io)) - queue_io(wb, &work); + queue_io(wb, &work, jiffies); __writeback_inodes_wb(wb, &work); spin_unlock(&wb->list_lock); blk_finish_plug(&plug); @@ -1704,7 +1721,7 @@ static long writeback_inodes_wb(struct bdi_writeback *wb, long nr_pages, * takes longer than a dirty_writeback_interval interval, then leave a * one-second gap. * - * older_than_this takes precedence over nr_to_write. So we'll only write back + * dirtied_before takes precedence over nr_to_write. So we'll only write back * all dirty pages if they are all attached to "old" mappings. */ static long wb_writeback(struct bdi_writeback *wb, @@ -1712,14 +1729,11 @@ static long wb_writeback(struct bdi_writeback *wb, { unsigned long wb_start = jiffies; long nr_pages = work->nr_pages; - unsigned long oldest_jif; + unsigned long dirtied_before = jiffies; struct inode *inode; long progress; struct blk_plug plug; - oldest_jif = jiffies; - work->older_than_this = &oldest_jif; - blk_start_plug(&plug); spin_lock(&wb->list_lock); for (;;) { @@ -1753,14 +1767,14 @@ static long wb_writeback(struct bdi_writeback *wb, * safe. */ if (work->for_kupdate) { - oldest_jif = jiffies - + dirtied_before = jiffies - msecs_to_jiffies(dirty_expire_interval * 10); } else if (work->for_background) - oldest_jif = jiffies; + dirtied_before = jiffies; trace_writeback_start(wb, work); if (list_empty(&wb->b_io)) - queue_io(wb, work); + queue_io(wb, work, dirtied_before); if (work->sb) progress = writeback_sb_inodes(work->sb, wb, work); else @@ -1920,7 +1934,7 @@ void wb_workfn(struct work_struct *work) struct bdi_writeback, dwork); long pages_written; - set_worker_desc("flush-%s", dev_name(wb->bdi->dev)); + set_worker_desc("flush-%s", bdi_dev_name(wb->bdi)); current->flags |= PF_SWAPWRITE; if (likely(!current_is_workqueue_rescuer() || @@ -2031,28 +2045,6 @@ int dirtytime_interval_handler(struct ctl_table *table, int write, return ret; } -static noinline void block_dump___mark_inode_dirty(struct inode *inode) -{ - if (inode->i_ino || strcmp(inode->i_sb->s_id, "bdev")) { - struct dentry *dentry; - const char *name = "?"; - - dentry = d_find_alias(inode); - if (dentry) { - spin_lock(&dentry->d_lock); - name = (const char *) dentry->d_name.name; - } - printk(KERN_DEBUG - "%s(%d): dirtied inode %lu (%s) on %s\n", - current->comm, task_pid_nr(current), inode->i_ino, - name, inode->i_sb->s_id); - if (dentry) { - spin_unlock(&dentry->d_lock); - dput(dentry); - } - } -} - /** * __mark_inode_dirty - internal function * @inode: inode to mark @@ -2111,9 +2103,6 @@ void __mark_inode_dirty(struct inode *inode, int flags) (dirtytime && (inode->i_state & I_DIRTY_INODE))) return; - if (unlikely(block_dump > 1)) - block_dump___mark_inode_dirty(inode); - spin_lock(&inode->i_lock); if (dirtytime && (inode->i_state & I_DIRTY_INODE)) goto out_unlock_inode; @@ -2127,11 +2116,12 @@ void __mark_inode_dirty(struct inode *inode, int flags) inode->i_state |= flags; /* - * If the inode is being synced, just update its dirty state. - * The unlocker will place the inode on the appropriate - * superblock list, based upon its state. + * If the inode is queued for writeback by flush worker, just + * update its dirty state. Once the flush worker is done with + * the inode it will place it on the appropriate superblock + * list, based upon its state. */ - if (inode->i_state & I_SYNC) + if (inode->i_state & I_SYNC_QUEUED) goto out_unlock_inode; /* |