From d8a8559cd7a9ccac98d5f6f13297a2ff68a43627 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Wed, 2 Sep 2009 12:34:32 +0200 Subject: writeback: get rid of generic_sync_sb_inodes() export This adds two new exported functions: - writeback_inodes_sb(), which only attempts to writeback dirty inodes on this super_block, for WB_SYNC_NONE writeout. - sync_inodes_sb(), which writes out all dirty inodes on this super_block and also waits for the IO to complete. Acked-by: Jan Kara Signed-off-by: Jens Axboe --- include/linux/writeback.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include/linux/writeback.h') diff --git a/include/linux/writeback.h b/include/linux/writeback.h index 3224820c8514..07039299603d 100644 --- a/include/linux/writeback.h +++ b/include/linux/writeback.h @@ -78,7 +78,8 @@ struct writeback_control { */ void writeback_inodes(struct writeback_control *wbc); int inode_wait(void *); -void sync_inodes_sb(struct super_block *, int wait); +long writeback_inodes_sb(struct super_block *); +long sync_inodes_sb(struct super_block *); /* writeback.h requires fs.h; it, too, is not included from here. */ static inline void wait_on_inode(struct inode *inode) -- cgit v1.2.3 From 03ba3782e8dcc5b0e1efe440d33084f066e38cae Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Wed, 9 Sep 2009 09:08:54 +0200 Subject: writeback: switch to per-bdi threads for flushing data This gets rid of pdflush for bdi writeout and kupdated style cleaning. pdflush writeout suffers from lack of locality and also requires more threads to handle the same workload, since it has to work in a non-blocking fashion against each queue. This also introduces lumpy behaviour and potential request starvation, since pdflush can be starved for queue access if others are accessing it. A sample ffsb workload that does random writes to files is about 8% faster here on a simple SATA drive during the benchmark phase. File layout also seems a LOT more smooth in vmstat: r b swpd free buff cache si so bi bo in cs us sy id wa 0 1 0 608848 2652 375372 0 0 0 71024 604 24 1 10 48 42 0 1 0 549644 2712 433736 0 0 0 60692 505 27 1 8 48 44 1 0 0 476928 2784 505192 0 0 4 29540 553 24 0 9 53 37 0 1 0 457972 2808 524008 0 0 0 54876 331 16 0 4 38 58 0 1 0 366128 2928 614284 0 0 4 92168 710 58 0 13 53 34 0 1 0 295092 3000 684140 0 0 0 62924 572 23 0 9 53 37 0 1 0 236592 3064 741704 0 0 4 58256 523 17 0 8 48 44 0 1 0 165608 3132 811464 0 0 0 57460 560 21 0 8 54 38 0 1 0 102952 3200 873164 0 0 4 74748 540 29 1 10 48 41 0 1 0 48604 3252 926472 0 0 0 53248 469 29 0 7 47 45 where vanilla tends to fluctuate a lot in the creation phase: r b swpd free buff cache si so bi bo in cs us sy id wa 1 1 0 678716 5792 303380 0 0 0 74064 565 50 1 11 52 36 1 0 0 662488 5864 319396 0 0 4 352 302 329 0 2 47 51 0 1 0 599312 5924 381468 0 0 0 78164 516 55 0 9 51 40 0 1 0 519952 6008 459516 0 0 4 78156 622 56 1 11 52 37 1 1 0 436640 6092 541632 0 0 0 82244 622 54 0 11 48 41 0 1 0 436640 6092 541660 0 0 0 8 152 39 0 0 51 49 0 1 0 332224 6200 644252 0 0 4 102800 728 46 1 13 49 36 1 0 0 274492 6260 701056 0 0 4 12328 459 49 0 7 50 43 0 1 0 211220 6324 763356 0 0 0 106940 515 37 1 10 51 39 1 0 0 160412 6376 813468 0 0 0 8224 415 43 0 6 49 45 1 1 0 85980 6452 886556 0 0 4 113516 575 39 1 11 54 34 0 2 0 85968 6452 886620 0 0 0 1640 158 211 0 0 46 54 A 10 disk test with btrfs performs 26% faster with per-bdi flushing. A SSD based writeback test on XFS performs over 20% better as well, with the throughput being very stable around 1GB/sec, where pdflush only manages 750MB/sec and fluctuates wildly while doing so. Random buffered writes to many files behave a lot better as well, as does random mmap'ed writes. A separate thread is added to sync the super blocks. In the long term, adding sync_supers_bdi() functionality could get rid of this thread again. Signed-off-by: Jens Axboe --- include/linux/writeback.h | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) (limited to 'include/linux/writeback.h') diff --git a/include/linux/writeback.h b/include/linux/writeback.h index 07039299603d..cef75527a14c 100644 --- a/include/linux/writeback.h +++ b/include/linux/writeback.h @@ -40,6 +40,8 @@ enum writeback_sync_modes { struct writeback_control { struct backing_dev_info *bdi; /* If !NULL, only write back this queue */ + struct super_block *sb; /* if !NULL, only write inodes from + this super_block */ enum writeback_sync_modes sync_mode; unsigned long *older_than_this; /* If !NULL, only write back inodes older than this */ @@ -76,10 +78,13 @@ struct writeback_control { /* * fs/fs-writeback.c */ -void writeback_inodes(struct writeback_control *wbc); +struct bdi_writeback; int inode_wait(void *); long writeback_inodes_sb(struct super_block *); long sync_inodes_sb(struct super_block *); +void writeback_inodes_wbc(struct writeback_control *wbc); +long wb_do_writeback(struct bdi_writeback *wb, int force_wait); +void wakeup_flusher_threads(long nr_pages); /* writeback.h requires fs.h; it, too, is not included from here. */ static inline void wait_on_inode(struct inode *inode) @@ -99,7 +104,6 @@ static inline void inode_sync_wait(struct inode *inode) /* * mm/page-writeback.c */ -int wakeup_pdflush(long nr_pages); void laptop_io_completion(void); void laptop_sync_completion(void); void throttle_vm_writeout(gfp_t gfp_mask); -- cgit v1.2.3 From d0bceac747b547c0b4769b91fec7d3c15600153f Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Mon, 18 May 2009 08:20:32 +0200 Subject: writeback: get rid of pdflush completely It is now unused, so kill it off. Signed-off-by: Jens Axboe --- include/linux/writeback.h | 12 ------------ 1 file changed, 12 deletions(-) (limited to 'include/linux/writeback.h') diff --git a/include/linux/writeback.h b/include/linux/writeback.h index cef75527a14c..78b1e4684cc9 100644 --- a/include/linux/writeback.h +++ b/include/linux/writeback.h @@ -13,17 +13,6 @@ extern spinlock_t inode_lock; extern struct list_head inode_in_use; extern struct list_head inode_unused; -/* - * Yes, writeback.h requires sched.h - * No, sched.h is not included from here. - */ -static inline int task_is_pdflush(struct task_struct *task) -{ - return task->flags & PF_FLUSHER; -} - -#define current_is_pdflush() task_is_pdflush(current) - /* * fs/fs-writeback.c */ @@ -155,7 +144,6 @@ balance_dirty_pages_ratelimited(struct address_space *mapping) typedef int (*writepage_t)(struct page *page, struct writeback_control *wbc, void *data); -int pdflush_operation(void (*fn)(unsigned long), unsigned long arg0); int generic_writepages(struct address_space *mapping, struct writeback_control *wbc); int write_cache_pages(struct address_space *mapping, -- cgit v1.2.3 From 18f2ee705d98034b0f229a3202d827468d4bffd9 Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Tue, 18 Aug 2009 18:43:15 +0200 Subject: vfs: Remove generic_osync_inode() and sync_page_range{_nolock}() Remove these three functions since nobody uses them anymore. Signed-off-by: Jan Kara --- include/linux/writeback.h | 4 ---- 1 file changed, 4 deletions(-) (limited to 'include/linux/writeback.h') diff --git a/include/linux/writeback.h b/include/linux/writeback.h index 78b1e4684cc9..d347632f1861 100644 --- a/include/linux/writeback.h +++ b/include/linux/writeback.h @@ -150,10 +150,6 @@ int write_cache_pages(struct address_space *mapping, struct writeback_control *wbc, writepage_t writepage, void *data); int do_writepages(struct address_space *mapping, struct writeback_control *wbc); -int sync_page_range(struct inode *inode, struct address_space *mapping, - loff_t pos, loff_t count); -int sync_page_range_nolock(struct inode *inode, struct address_space *mapping, - loff_t pos, loff_t count); void set_page_dirty_balance(struct page *page, int page_mkwrite); void writeback_set_ratelimit(void); -- cgit v1.2.3 From 1fe06ad89255c211fe100d7f690d10b161398df8 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Tue, 15 Sep 2009 15:10:20 +0200 Subject: writeback: get rid of wbc->for_writepages It's only set, it's never checked. Kill it. Acked-by: Jan Kara Signed-off-by: Jens Axboe --- include/linux/writeback.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux/writeback.h') diff --git a/include/linux/writeback.h b/include/linux/writeback.h index d347632f1861..48a054e2b716 100644 --- a/include/linux/writeback.h +++ b/include/linux/writeback.h @@ -50,7 +50,6 @@ struct writeback_control { unsigned encountered_congestion:1; /* An output: a queue is full */ unsigned for_kupdate:1; /* A kupdate writeback */ unsigned for_reclaim:1; /* Invoked from the page allocator */ - unsigned for_writepages:1; /* This is a writepages() call */ unsigned range_cyclic:1; /* range_start is cyclic */ unsigned more_io:1; /* more io to be dispatched */ /* -- cgit v1.2.3 From b6e51316daede0633e9274e1e30391cfa4747877 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Wed, 16 Sep 2009 15:13:54 +0200 Subject: writeback: separate starting of sync vs opportunistic writeback bdi_start_writeback() is currently split into two paths, one for WB_SYNC_NONE and one for WB_SYNC_ALL. Add bdi_sync_writeback() for WB_SYNC_ALL writeback and let bdi_start_writeback() handle only WB_SYNC_NONE. Push down the writeback_control allocation and only accept the parameters that make sense for each function. This cleans up the API considerably. Signed-off-by: Jens Axboe --- include/linux/writeback.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux/writeback.h') diff --git a/include/linux/writeback.h b/include/linux/writeback.h index 48a054e2b716..75cf58666ff9 100644 --- a/include/linux/writeback.h +++ b/include/linux/writeback.h @@ -68,8 +68,8 @@ struct writeback_control { */ struct bdi_writeback; int inode_wait(void *); -long writeback_inodes_sb(struct super_block *); -long sync_inodes_sb(struct super_block *); +void writeback_inodes_sb(struct super_block *); +void sync_inodes_sb(struct super_block *); void writeback_inodes_wbc(struct writeback_control *wbc); long wb_do_writeback(struct bdi_writeback *wb, int force_wait); void wakeup_flusher_threads(long nr_pages); -- cgit v1.2.3 From 8d65af789f3e2cf4cfbdbf71a0f7a61ebcd41d38 Mon Sep 17 00:00:00 2001 From: Alexey Dobriyan Date: Wed, 23 Sep 2009 15:57:19 -0700 Subject: sysctl: remove "struct file *" argument of ->proc_handler It's unused. It isn't needed -- read or write flag is already passed and sysctl shouldn't care about the rest. It _was_ used in two places at arch/frv for some reason. Signed-off-by: Alexey Dobriyan Cc: David Howells Cc: "Eric W. Biederman" Cc: Al Viro Cc: Ralf Baechle Cc: Martin Schwidefsky Cc: Ingo Molnar Cc: "David S. Miller" Cc: James Morris Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/writeback.h | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) (limited to 'include/linux/writeback.h') diff --git a/include/linux/writeback.h b/include/linux/writeback.h index 75cf58666ff9..66ebddcff664 100644 --- a/include/linux/writeback.h +++ b/include/linux/writeback.h @@ -110,21 +110,20 @@ extern int laptop_mode; extern unsigned long determine_dirtyable_memory(void); extern int dirty_background_ratio_handler(struct ctl_table *table, int write, - struct file *filp, void __user *buffer, size_t *lenp, + void __user *buffer, size_t *lenp, loff_t *ppos); extern int dirty_background_bytes_handler(struct ctl_table *table, int write, - struct file *filp, void __user *buffer, size_t *lenp, + void __user *buffer, size_t *lenp, loff_t *ppos); extern int dirty_ratio_handler(struct ctl_table *table, int write, - struct file *filp, void __user *buffer, size_t *lenp, + void __user *buffer, size_t *lenp, loff_t *ppos); extern int dirty_bytes_handler(struct ctl_table *table, int write, - struct file *filp, void __user *buffer, size_t *lenp, + void __user *buffer, size_t *lenp, loff_t *ppos); struct ctl_table; -struct file; -int dirty_writeback_centisecs_handler(struct ctl_table *, int, struct file *, +int dirty_writeback_centisecs_handler(struct ctl_table *, int, void __user *, size_t *, loff_t *); void get_dirty_limits(unsigned long *pbackground, unsigned long *pdirty, -- cgit v1.2.3 From b17621fed6aa039387e35f9b4d34d98f213e5673 Mon Sep 17 00:00:00 2001 From: Wu Fengguang Date: Thu, 3 Dec 2009 13:54:25 +0100 Subject: writeback: introduce wbc.for_background It will lower the flush priority for NFS, and maybe more in future. Signed-off-by: Wu Fengguang Cc: Trond Myklebust Cc: Jens Axboe Cc: Christoph Hellwig Signed-off-by: Jens Axboe --- include/linux/writeback.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux/writeback.h') diff --git a/include/linux/writeback.h b/include/linux/writeback.h index 66ebddcff664..705f01fe413a 100644 --- a/include/linux/writeback.h +++ b/include/linux/writeback.h @@ -49,6 +49,7 @@ struct writeback_control { unsigned nonblocking:1; /* Don't get stuck on request queues */ unsigned encountered_congestion:1; /* An output: a queue is full */ unsigned for_kupdate:1; /* A kupdate writeback */ + unsigned for_background:1; /* A background writeback */ unsigned for_reclaim:1; /* Invoked from the page allocator */ unsigned range_cyclic:1; /* range_start is cyclic */ unsigned more_io:1; /* more io to be dispatched */ -- cgit v1.2.3 From eaff8079d4f1016a12e34ab323737314f24127dd Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 17 Dec 2009 14:25:01 +0100 Subject: kill I_LOCK After I_SYNC was split from I_LOCK the leftover is always used together with I_NEW and thus superflous. Signed-off-by: Christoph Hellwig Signed-off-by: Al Viro --- include/linux/writeback.h | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'include/linux/writeback.h') diff --git a/include/linux/writeback.h b/include/linux/writeback.h index 705f01fe413a..c18c008f4bbf 100644 --- a/include/linux/writeback.h +++ b/include/linux/writeback.h @@ -79,8 +79,7 @@ void wakeup_flusher_threads(long nr_pages); static inline void wait_on_inode(struct inode *inode) { might_sleep(); - wait_on_bit(&inode->i_state, __I_LOCK, inode_wait, - TASK_UNINTERRUPTIBLE); + wait_on_bit(&inode->i_state, __I_NEW, inode_wait, TASK_UNINTERRUPTIBLE); } static inline void inode_sync_wait(struct inode *inode) { -- cgit v1.2.3 From 17bd55d037a02b04d9119511cfd1a4b985d20f63 Mon Sep 17 00:00:00 2001 From: Eric Sandeen Date: Wed, 23 Dec 2009 07:57:07 -0500 Subject: fs-writeback: Add helper function to start writeback if idle ext4, at least, would like to start pushing on writeback if it starts to get close to ENOSPC when reserving worst-case blocks for delalloc writes. Writing out delalloc data will convert those worst-case predictions into usually smaller actual usage, freeing up space before we hit ENOSPC based on this speculation. Thanks to Jens for the suggestion for the helper function, & the naming help. I've made the helper return status on whether writeback was started even though I don't plan to use it in the ext4 patch; it seems like it would be potentially useful to test this in some cases. Signed-off-by: Eric Sandeen Acked-by: Jan Kara --- include/linux/writeback.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux/writeback.h') diff --git a/include/linux/writeback.h b/include/linux/writeback.h index c18c008f4bbf..76e8903cd204 100644 --- a/include/linux/writeback.h +++ b/include/linux/writeback.h @@ -70,6 +70,7 @@ struct writeback_control { struct bdi_writeback; int inode_wait(void *); void writeback_inodes_sb(struct super_block *); +int writeback_inodes_sb_if_idle(struct super_block *); void sync_inodes_sb(struct super_block *); void writeback_inodes_wbc(struct writeback_control *wbc); long wb_do_writeback(struct bdi_writeback *wb, int force_wait); -- cgit v1.2.3 From f11c9c5c259cb2c3d698548dc3936f773ab1f5b9 Mon Sep 17 00:00:00 2001 From: Edward Shishkin Date: Thu, 11 Mar 2010 14:09:47 -0800 Subject: vfs: improve writeback_inodes_wb() Do not pin/unpin superblock for every inode in writeback_inodes_wb(), pin it for the whole group of inodes which belong to the same superblock and call writeback_sb_inodes() handler for them. Signed-off-by: Edward Shishkin Cc: Jens Axboe Cc: Wu Fengguang Signed-off-by: Andrew Morton Signed-off-by: Jens Axboe --- include/linux/writeback.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux/writeback.h') diff --git a/include/linux/writeback.h b/include/linux/writeback.h index 76e8903cd204..36520ded3e06 100644 --- a/include/linux/writeback.h +++ b/include/linux/writeback.h @@ -34,6 +34,9 @@ struct writeback_control { enum writeback_sync_modes sync_mode; unsigned long *older_than_this; /* If !NULL, only write back inodes older than this */ + unsigned long wb_start; /* Time writeback_inodes_wb was + called. This is needed to avoid + extra jobs and livelock */ long nr_to_write; /* Write this many pages, and decrement this for each page written */ long pages_skipped; /* Pages which were not written */ -- cgit v1.2.3 From 31373d09da5b7fe21fe6f781e92bd534a3495f00 Mon Sep 17 00:00:00 2001 From: Matthew Garrett Date: Tue, 6 Apr 2010 14:25:14 +0200 Subject: laptop-mode: Make flushes per-device One of the features of laptop-mode is that it forces a writeout of dirty pages if something else triggers a physical read or write from a device. The current implementation flushes pages on all devices, rather than only the one that triggered the flush. This patch alters the behaviour so that only the recently accessed block device is flushed, preventing other disks being spun up for no terribly good reason. Signed-off-by: Matthew Garrett Signed-off-by: Jens Axboe --- include/linux/writeback.h | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'include/linux/writeback.h') diff --git a/include/linux/writeback.h b/include/linux/writeback.h index 36520ded3e06..eb38a2c645f6 100644 --- a/include/linux/writeback.h +++ b/include/linux/writeback.h @@ -96,8 +96,10 @@ static inline void inode_sync_wait(struct inode *inode) /* * mm/page-writeback.c */ -void laptop_io_completion(void); +void laptop_io_completion(struct backing_dev_info *info); void laptop_sync_completion(void); +void laptop_mode_sync(struct work_struct *work); +void laptop_mode_timer_fn(unsigned long data); void throttle_vm_writeout(gfp_t gfp_mask); /* These are exported to sysctl. */ -- cgit v1.2.3 From e913fc825dc685a444cb4c1d0f9d32f372f59861 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Mon, 17 May 2010 12:55:07 +0200 Subject: writeback: fix WB_SYNC_NONE writeback from umount When umount calls sync_filesystem(), we first do a WB_SYNC_NONE writeback to kick off writeback of pending dirty inodes, then follow that up with a WB_SYNC_ALL to wait for it. Since umount already holds the sb s_umount mutex, WB_SYNC_NONE ends up doing nothing and all writeback happens as WB_SYNC_ALL. This can greatly slow down umount, since WB_SYNC_ALL writeback is a data integrity operation and thus a bigger hammer than simple WB_SYNC_NONE. For barrier aware file systems it's a lot slower. Signed-off-by: Jens Axboe --- include/linux/writeback.h | 10 ++++++++++ 1 file changed, 10 insertions(+) (limited to 'include/linux/writeback.h') diff --git a/include/linux/writeback.h b/include/linux/writeback.h index eb38a2c645f6..47e1c686cb02 100644 --- a/include/linux/writeback.h +++ b/include/linux/writeback.h @@ -65,6 +65,15 @@ struct writeback_control { * so we use a single control to update them */ unsigned no_nrwrite_index_update:1; + + /* + * For WB_SYNC_ALL, the sb must always be pinned. For WB_SYNC_NONE, + * the writeback code will pin the sb for the caller. However, + * for eg umount, the caller does WB_SYNC_NONE but already has + * the sb pinned. If the below is set, caller already has the + * sb pinned. + */ + unsigned sb_pinned:1; }; /* @@ -73,6 +82,7 @@ struct writeback_control { struct bdi_writeback; int inode_wait(void *); void writeback_inodes_sb(struct super_block *); +void writeback_inodes_sb_locked(struct super_block *); int writeback_inodes_sb_if_idle(struct super_block *); void sync_inodes_sb(struct super_block *); void writeback_inodes_wbc(struct writeback_control *wbc); -- cgit v1.2.3 From c2c4986eddaa7dc3d036cb2bfa5c8c5f1f2492a0 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Thu, 20 May 2010 09:18:47 +0200 Subject: writeback: fix problem with !CONFIG_BLOCK compilation When CONFIG_BLOCK isn't enabled: mm/page-writeback.c: In function 'laptop_mode_timer_fn': mm/page-writeback.c:708: error: dereferencing pointer to incomplete type mm/page-writeback.c:709: error: dereferencing pointer to incomplete type Fix this by essentially eliminating the laptop sync handlers when CONFIG_BLOCK isn't set, as most are only used from the block layer code. The exception is laptop_sync_completion() which is used from sys_sync(), make that an empty declaration in that case. Reported-by: Randy Dunlap Signed-off-by: Jens Axboe --- include/linux/writeback.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include/linux/writeback.h') diff --git a/include/linux/writeback.h b/include/linux/writeback.h index 47e1c686cb02..cc97d6caf2b3 100644 --- a/include/linux/writeback.h +++ b/include/linux/writeback.h @@ -106,10 +106,14 @@ static inline void inode_sync_wait(struct inode *inode) /* * mm/page-writeback.c */ +#ifdef CONFIG_BLOCK void laptop_io_completion(struct backing_dev_info *info); void laptop_sync_completion(void); void laptop_mode_sync(struct work_struct *work); void laptop_mode_timer_fn(unsigned long data); +#else +static inline void laptop_sync_completion(void) { } +#endif void throttle_vm_writeout(gfp_t gfp_mask); /* These are exported to sysctl. */ -- cgit v1.2.3 From 0e3c9a2284f5417f196e327c254d0b84c9ee8929 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Tue, 1 Jun 2010 11:08:43 +0200 Subject: Revert "writeback: fix WB_SYNC_NONE writeback from umount" This reverts commit e913fc825dc685a444cb4c1d0f9d32f372f59861. We are investigating a hang associated with the WB_SYNC_NONE changes, so revert them for now. Conflicts: fs/fs-writeback.c mm/page-writeback.c Signed-off-by: Jens Axboe --- include/linux/writeback.h | 10 ---------- 1 file changed, 10 deletions(-) (limited to 'include/linux/writeback.h') diff --git a/include/linux/writeback.h b/include/linux/writeback.h index cc97d6caf2b3..f64134653a8c 100644 --- a/include/linux/writeback.h +++ b/include/linux/writeback.h @@ -65,15 +65,6 @@ struct writeback_control { * so we use a single control to update them */ unsigned no_nrwrite_index_update:1; - - /* - * For WB_SYNC_ALL, the sb must always be pinned. For WB_SYNC_NONE, - * the writeback code will pin the sb for the caller. However, - * for eg umount, the caller does WB_SYNC_NONE but already has - * the sb pinned. If the below is set, caller already has the - * sb pinned. - */ - unsigned sb_pinned:1; }; /* @@ -82,7 +73,6 @@ struct writeback_control { struct bdi_writeback; int inode_wait(void *); void writeback_inodes_sb(struct super_block *); -void writeback_inodes_sb_locked(struct super_block *); int writeback_inodes_sb_if_idle(struct super_block *); void sync_inodes_sb(struct super_block *); void writeback_inodes_wbc(struct writeback_control *wbc); -- cgit v1.2.3 From 0b5649278e39a068aaf91399941bab1b4a4a3cc2 Mon Sep 17 00:00:00 2001 From: Dave Chinner Date: Wed, 9 Jun 2010 10:37:18 +1000 Subject: writeback: pay attention to wbc->nr_to_write in write_cache_pages If a filesystem writes more than one page in ->writepage, write_cache_pages fails to notice this and continues to attempt writeback when wbc->nr_to_write has gone negative - this trace was captured from XFS: wbc_writeback_start: towrt=1024 wbc_writepage: towrt=1024 wbc_writepage: towrt=0 wbc_writepage: towrt=-1 wbc_writepage: towrt=-5 wbc_writepage: towrt=-21 wbc_writepage: towrt=-85 This has adverse effects on filesystem writeback behaviour. write_cache_pages() needs to terminate after a certain number of pages are written, not after a certain number of calls to ->writepage are made. This is a regression introduced by 17bc6c30cf6bfffd816bdc53682dd46fc34a2cf4 ("vfs: Add no_nrwrite_index_update writeback control flag"), but cannot be reverted directly due to subsequent bug fixes that have gone in on top of it. Signed-off-by: Dave Chinner Reviewed-by: Christoph Hellwig Signed-off-by: Linus Torvalds --- include/linux/writeback.h | 9 --------- 1 file changed, 9 deletions(-) (limited to 'include/linux/writeback.h') diff --git a/include/linux/writeback.h b/include/linux/writeback.h index f64134653a8c..d63ef8f9609f 100644 --- a/include/linux/writeback.h +++ b/include/linux/writeback.h @@ -56,15 +56,6 @@ struct writeback_control { unsigned for_reclaim:1; /* Invoked from the page allocator */ unsigned range_cyclic:1; /* range_start is cyclic */ unsigned more_io:1; /* more io to be dispatched */ - /* - * write_cache_pages() won't update wbc->nr_to_write and - * mapping->writeback_index if no_nrwrite_index_update - * is set. write_cache_pages() may write more than we - * requested and we want to make sure nr_to_write and - * writeback_index are updated in a consistent manner - * so we use a single control to update them - */ - unsigned no_nrwrite_index_update:1; }; /* -- cgit v1.2.3 From 9c3a8ee8a1d72c5c0d7fbdf426d80e270ddfa54c Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 10 Jun 2010 12:07:27 +0200 Subject: writeback: remove writeback_inodes_wbc This was just an odd wrapper around writeback_inodes_wb. Removing this also allows to get rid of the bdi member of struct writeback_control which was rather out of place there. Signed-off-by: Christoph Hellwig Signed-off-by: Jens Axboe --- include/linux/writeback.h | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) (limited to 'include/linux/writeback.h') diff --git a/include/linux/writeback.h b/include/linux/writeback.h index d63ef8f9609f..f6756f6a610c 100644 --- a/include/linux/writeback.h +++ b/include/linux/writeback.h @@ -27,8 +27,6 @@ enum writeback_sync_modes { * in a manner such that unspecified fields are set to zero. */ struct writeback_control { - struct backing_dev_info *bdi; /* If !NULL, only write back this - queue */ struct super_block *sb; /* if !NULL, only write inodes from this super_block */ enum writeback_sync_modes sync_mode; @@ -66,7 +64,8 @@ int inode_wait(void *); void writeback_inodes_sb(struct super_block *); int writeback_inodes_sb_if_idle(struct super_block *); void sync_inodes_sb(struct super_block *); -void writeback_inodes_wbc(struct writeback_control *wbc); +void writeback_inodes_wb(struct bdi_writeback *wb, + struct writeback_control *wbc); long wb_do_writeback(struct bdi_writeback *wb, int force_wait); void wakeup_flusher_threads(long nr_pages); -- cgit v1.2.3 From edadfb10ba35da7253541e4155aa92eff758ebe6 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 10 Jun 2010 12:07:54 +0200 Subject: writeback: split writeback_inodes_wb The case where we have a superblock doesn't require a loop here as we scan over all inodes in writeback_sb_inodes. Split it out into a separate helper to make the code simpler. This also allows to get rid of the sb member in struct writeback_control, which was rather out of place there. Also update the comments in writeback_sb_inodes that explain the handling of inodes from wrong superblocks. Signed-off-by: Christoph Hellwig Signed-off-by: Jens Axboe --- include/linux/writeback.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include/linux/writeback.h') diff --git a/include/linux/writeback.h b/include/linux/writeback.h index f6756f6a610c..c24eca71e80c 100644 --- a/include/linux/writeback.h +++ b/include/linux/writeback.h @@ -27,8 +27,6 @@ enum writeback_sync_modes { * in a manner such that unspecified fields are set to zero. */ struct writeback_control { - struct super_block *sb; /* if !NULL, only write inodes from - this super_block */ enum writeback_sync_modes sync_mode; unsigned long *older_than_this; /* If !NULL, only write back inodes older than this */ -- cgit v1.2.3 From 16c4042f08919f447d6b2a55679546c9b97c7264 Mon Sep 17 00:00:00 2001 From: Wu Fengguang Date: Wed, 11 Aug 2010 14:17:39 -0700 Subject: writeback: avoid unnecessary calculation of bdi dirty thresholds Split get_dirty_limits() into global_dirty_limits()+bdi_dirty_limit(), so that the latter can be avoided when under global dirty background threshold (which is the normal state for most systems). Signed-off-by: Wu Fengguang Cc: Peter Zijlstra Cc: Christoph Hellwig Cc: Dave Chinner Cc: Jens Axboe Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/writeback.h | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'include/linux/writeback.h') diff --git a/include/linux/writeback.h b/include/linux/writeback.h index c24eca71e80c..72a5d647a5f2 100644 --- a/include/linux/writeback.h +++ b/include/linux/writeback.h @@ -124,8 +124,9 @@ struct ctl_table; int dirty_writeback_centisecs_handler(struct ctl_table *, int, void __user *, size_t *, loff_t *); -void get_dirty_limits(unsigned long *pbackground, unsigned long *pdirty, - unsigned long *pbdi_dirty, struct backing_dev_info *bdi); +void global_dirty_limits(unsigned long *pbackground, unsigned long *pdirty); +unsigned long bdi_dirty_limit(struct backing_dev_info *bdi, + unsigned long dirty); void page_writeback_init(void); void balance_dirty_pages_ratelimited_nr(struct address_space *mapping, -- cgit v1.2.3 From 9e38d86ff2d8a8db99570e982230861046df32b5 Mon Sep 17 00:00:00 2001 From: Nick Piggin Date: Sat, 23 Oct 2010 06:55:17 -0400 Subject: fs: Implement lazy LRU updates for inodes Convert the inode LRU to use lazy updates to reduce lock and cacheline traffic. We avoid moving inodes around in the LRU list during iget/iput operations so these frequent operations don't need to access the LRUs. Instead, we defer the refcount checks to reclaim-time and use a per-inode state flag, I_REFERENCED, to tell reclaim that iget has touched the inode in the past. This means that only reclaim should be touching the LRU with any frequency, hence significantly reducing lock acquisitions and the amount contention on LRU updates. This also removes the inode_in_use list, which means we now only have one list for tracking the inode LRU status. This makes it much simpler to split out the LRU list operations under it's own lock. Signed-off-by: Nick Piggin Signed-off-by: Dave Chinner Reviewed-by: Christoph Hellwig Signed-off-by: Al Viro --- include/linux/writeback.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include/linux/writeback.h') diff --git a/include/linux/writeback.h b/include/linux/writeback.h index 72a5d647a5f2..242b6f812ba6 100644 --- a/include/linux/writeback.h +++ b/include/linux/writeback.h @@ -10,8 +10,6 @@ struct backing_dev_info; extern spinlock_t inode_lock; -extern struct list_head inode_in_use; -extern struct list_head inode_unused; /* * fs/fs-writeback.c -- cgit v1.2.3 From 92c09c041f15fc88b35f8628e07639f52e1fbb38 Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Tue, 26 Oct 2010 14:22:03 -0700 Subject: mm: declare some external symbols Declare 'bdi_pending_list' and 'tag_pages_for_writeback()' to remove following sparse warnings: mm/backing-dev.c:46:1: warning: symbol 'bdi_pending_list' was not declared. Should it be static? mm/page-writeback.c:825:6: warning: symbol 'tag_pages_for_writeback' was not declared. Should it be static? Signed-off-by: Namhyung Kim Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/writeback.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux/writeback.h') diff --git a/include/linux/writeback.h b/include/linux/writeback.h index 72a5d647a5f2..c7299d2ace6b 100644 --- a/include/linux/writeback.h +++ b/include/linux/writeback.h @@ -149,6 +149,8 @@ int write_cache_pages(struct address_space *mapping, int do_writepages(struct address_space *mapping, struct writeback_control *wbc); void set_page_dirty_balance(struct page *page, int page_mkwrite); void writeback_set_ratelimit(void); +void tag_pages_for_writeback(struct address_space *mapping, + pgoff_t start, pgoff_t end); /* pdflush.c */ extern int nr_pdflush_threads; /* Global so it can be exported to sysctl -- cgit v1.2.3 From 5b41d92437f1ae19b3f3ffa3b16589fd5df50ac0 Mon Sep 17 00:00:00 2001 From: Eric Sandeen Date: Wed, 27 Oct 2010 21:30:13 -0400 Subject: ext4: implement writeback livelock avoidance using page tagging This is analogous to Jan Kara's commit, f446daaea9d4a420d16c606f755f3689dcb2d0ce mm: implement writeback livelock avoidance using page tagging but since we forked write_cache_pages, we need to reimplement it there (and in ext4_da_writepages, since range_cyclic handling was moved to there) If you start a large buffered IO to a file, and then set fsync after it, you'll find that fsync does not complete until the other IO stops. If you continue re-dirtying the file (say, putting dd with conv=notrunc in a loop), when fsync finally completes (after all IO is done), it reports via tracing that it has written many more pages than the file contains; in other words it has synced and re-synced pages in the file multiple times. This then leads to problems with our writeback_index update, since it advances it by pages written, and essentially sets writeback_index off the end of the file... With the following patch, we only sync as much as was dirty at the time of the sync. Signed-off-by: Eric Sandeen Signed-off-by: "Theodore Ts'o" --- include/linux/writeback.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux/writeback.h') diff --git a/include/linux/writeback.h b/include/linux/writeback.h index 72a5d647a5f2..3d132bfb4f3d 100644 --- a/include/linux/writeback.h +++ b/include/linux/writeback.h @@ -143,6 +143,8 @@ typedef int (*writepage_t)(struct page *page, struct writeback_control *wbc, int generic_writepages(struct address_space *mapping, struct writeback_control *wbc); +void tag_pages_for_writeback(struct address_space *mapping, + pgoff_t start, pgoff_t end); int write_cache_pages(struct address_space *mapping, struct writeback_control *wbc, writepage_t writepage, void *data); -- cgit v1.2.3 From 3259f8bed2f0f57c2fdcdac1b510c3fa319ef97e Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Fri, 29 Oct 2010 11:16:17 -0400 Subject: Add new functions for triggering inode writeback When btrfs is running low on metadata space, it needs to force delayed allocation pages to disk. It currently does this with a suboptimal walk of a private list of inodes with delayed allocation, and it would be much better if we used the generic flusher threads. writeback_inodes_sb_if_idle would be ideal, but it waits for the flusher thread to start IO on all the dirty pages in the FS before it returns. This adds variants of writeback_inodes_sb* that allow the caller to control how many pages get sent down. Signed-off-by: Chris Mason --- include/linux/writeback.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux/writeback.h') diff --git a/include/linux/writeback.h b/include/linux/writeback.h index 72a5d647a5f2..a4cf84511e79 100644 --- a/include/linux/writeback.h +++ b/include/linux/writeback.h @@ -60,7 +60,9 @@ struct writeback_control { struct bdi_writeback; int inode_wait(void *); void writeback_inodes_sb(struct super_block *); +void writeback_inodes_sb_nr(struct super_block *, unsigned long nr); int writeback_inodes_sb_if_idle(struct super_block *); +int writeback_inodes_sb_nr_if_idle(struct super_block *, unsigned long nr); void sync_inodes_sb(struct super_block *); void writeback_inodes_wb(struct bdi_writeback *wb, struct writeback_control *wbc); -- cgit v1.2.3