From d645ded91de8f29ec5bedbddaf8cb908cea428cf Mon Sep 17 00:00:00 2001 From: Qian Cai Date: Wed, 25 Sep 2019 16:46:16 -0700 Subject: include/trace/events/writeback.h: fix -Wstringop-truncation warnings [ Upstream commit d1a445d3b86c9341ce7a0954c23be0edb5c9bec5 ] There are many of those warnings. In file included from ./arch/powerpc/include/asm/paca.h:15, from ./arch/powerpc/include/asm/current.h:13, from ./include/linux/thread_info.h:21, from ./include/asm-generic/preempt.h:5, from ./arch/powerpc/include/generated/asm/preempt.h:1, from ./include/linux/preempt.h:78, from ./include/linux/spinlock.h:51, from fs/fs-writeback.c:19: In function 'strncpy', inlined from 'perf_trace_writeback_page_template' at ./include/trace/events/writeback.h:56:1: ./include/linux/string.h:260:9: warning: '__builtin_strncpy' specified bound 32 equals destination size [-Wstringop-truncation] return __builtin_strncpy(p, q, size); ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Fix it by using the new strscpy_pad() which was introduced in "lib/string: Add strscpy_pad() function" and will always be NUL-terminated instead of strncpy(). Also, change strlcpy() to use strscpy_pad() in this file for consistency. Link: http://lkml.kernel.org/r/1564075099-27750-1-git-send-email-cai@lca.pw Fixes: 455b2864686d ("writeback: Initial tracing support") Fixes: 028c2dd184c0 ("writeback: Add tracing to balance_dirty_pages") Fixes: e84d0a4f8e39 ("writeback: trace event writeback_queue_io") Fixes: b48c104d2211 ("writeback: trace event bdi_dirty_ratelimit") Fixes: cc1676d917f3 ("writeback: Move requeueing when I_SYNC set to writeback_sb_inodes()") Fixes: 9fb0a7da0c52 ("writeback: add more tracepoints") Signed-off-by: Qian Cai Reviewed-by: Jan Kara Cc: Tobin C. Harding Cc: Steven Rostedt (VMware) Cc: Ingo Molnar Cc: Tejun Heo Cc: Dave Chinner Cc: Fengguang Wu Cc: Jens Axboe Cc: Joe Perches Cc: Kees Cook Cc: Jann Horn Cc: Jonathan Corbet Cc: Nitin Gote Cc: Rasmus Villemoes Cc: Stephen Kitt Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Sasha Levin --- include/trace/events/writeback.h | 38 ++++++++++++++++++++------------------ 1 file changed, 20 insertions(+), 18 deletions(-) (limited to 'include/trace') diff --git a/include/trace/events/writeback.h b/include/trace/events/writeback.h index 2609b1c3549e..07a912af1dd7 100644 --- a/include/trace/events/writeback.h +++ b/include/trace/events/writeback.h @@ -65,8 +65,9 @@ TRACE_EVENT(writeback_dirty_page, ), TP_fast_assign( - strncpy(__entry->name, - mapping ? dev_name(inode_to_bdi(mapping->host)->dev) : "(unknown)", 32); + strscpy_pad(__entry->name, + mapping ? dev_name(inode_to_bdi(mapping->host)->dev) : "(unknown)", + 32); __entry->ino = mapping ? mapping->host->i_ino : 0; __entry->index = page->index; ), @@ -95,8 +96,8 @@ DECLARE_EVENT_CLASS(writeback_dirty_inode_template, struct backing_dev_info *bdi = inode_to_bdi(inode); /* may be called for files on pseudo FSes w/ unregistered bdi */ - strncpy(__entry->name, - bdi->dev ? dev_name(bdi->dev) : "(unknown)", 32); + strscpy_pad(__entry->name, + bdi->dev ? dev_name(bdi->dev) : "(unknown)", 32); __entry->ino = inode->i_ino; __entry->state = inode->i_state; __entry->flags = flags; @@ -205,8 +206,8 @@ DECLARE_EVENT_CLASS(writeback_write_inode_template, ), TP_fast_assign( - strncpy(__entry->name, - dev_name(inode_to_bdi(inode)->dev), 32); + strscpy_pad(__entry->name, + dev_name(inode_to_bdi(inode)->dev), 32); __entry->ino = inode->i_ino; __entry->sync_mode = wbc->sync_mode; __trace_wbc_assign_cgroup(__get_str(cgroup), wbc); @@ -249,8 +250,9 @@ DECLARE_EVENT_CLASS(writeback_work_class, __dynamic_array(char, cgroup, __trace_wb_cgroup_size(wb)) ), TP_fast_assign( - strncpy(__entry->name, - wb->bdi->dev ? dev_name(wb->bdi->dev) : "(unknown)", 32); + strscpy_pad(__entry->name, + wb->bdi->dev ? dev_name(wb->bdi->dev) : + "(unknown)", 32); __entry->nr_pages = work->nr_pages; __entry->sb_dev = work->sb ? work->sb->s_dev : 0; __entry->sync_mode = work->sync_mode; @@ -303,7 +305,7 @@ DECLARE_EVENT_CLASS(writeback_class, __dynamic_array(char, cgroup, __trace_wb_cgroup_size(wb)) ), TP_fast_assign( - strncpy(__entry->name, dev_name(wb->bdi->dev), 32); + strscpy_pad(__entry->name, dev_name(wb->bdi->dev), 32); __trace_wb_assign_cgroup(__get_str(cgroup), wb); ), TP_printk("bdi %s: cgroup=%s", @@ -326,7 +328,7 @@ TRACE_EVENT(writeback_bdi_register, __array(char, name, 32) ), TP_fast_assign( - strncpy(__entry->name, dev_name(bdi->dev), 32); + strscpy_pad(__entry->name, dev_name(bdi->dev), 32); ), TP_printk("bdi %s", __entry->name @@ -351,7 +353,7 @@ DECLARE_EVENT_CLASS(wbc_class, ), TP_fast_assign( - strncpy(__entry->name, dev_name(bdi->dev), 32); + strscpy_pad(__entry->name, dev_name(bdi->dev), 32); __entry->nr_to_write = wbc->nr_to_write; __entry->pages_skipped = wbc->pages_skipped; __entry->sync_mode = wbc->sync_mode; @@ -402,7 +404,7 @@ TRACE_EVENT(writeback_queue_io, __dynamic_array(char, cgroup, __trace_wb_cgroup_size(wb)) ), TP_fast_assign( - strncpy(__entry->name, dev_name(wb->bdi->dev), 32); + strncpy_pad(__entry->name, dev_name(wb->bdi->dev), 32); __entry->older = dirtied_before; __entry->age = (jiffies - dirtied_before) * 1000 / HZ; __entry->moved = moved; @@ -487,7 +489,7 @@ TRACE_EVENT(bdi_dirty_ratelimit, ), TP_fast_assign( - strlcpy(__entry->bdi, dev_name(wb->bdi->dev), 32); + strscpy_pad(__entry->bdi, dev_name(wb->bdi->dev), 32); __entry->write_bw = KBps(wb->write_bandwidth); __entry->avg_write_bw = KBps(wb->avg_write_bandwidth); __entry->dirty_rate = KBps(dirty_rate); @@ -552,7 +554,7 @@ TRACE_EVENT(balance_dirty_pages, TP_fast_assign( unsigned long freerun = (thresh + bg_thresh) / 2; - strlcpy(__entry->bdi, dev_name(wb->bdi->dev), 32); + strscpy_pad(__entry->bdi, dev_name(wb->bdi->dev), 32); __entry->limit = global_wb_domain.dirty_limit; __entry->setpoint = (global_wb_domain.dirty_limit + @@ -613,8 +615,8 @@ TRACE_EVENT(writeback_sb_inodes_requeue, ), TP_fast_assign( - strncpy(__entry->name, - dev_name(inode_to_bdi(inode)->dev), 32); + strscpy_pad(__entry->name, + dev_name(inode_to_bdi(inode)->dev), 32); __entry->ino = inode->i_ino; __entry->state = inode->i_state; __entry->dirtied_when = inode->dirtied_when; @@ -687,8 +689,8 @@ DECLARE_EVENT_CLASS(writeback_single_inode_template, ), TP_fast_assign( - strncpy(__entry->name, - dev_name(inode_to_bdi(inode)->dev), 32); + strscpy_pad(__entry->name, + dev_name(inode_to_bdi(inode)->dev), 32); __entry->ino = inode->i_ino; __entry->state = inode->i_state; __entry->dirtied_when = inode->dirtied_when; -- cgit v1.2.3 From f7fbca3741244070099a4f8a673b80202ffca8e4 Mon Sep 17 00:00:00 2001 From: Theodore Ts'o Date: Thu, 30 Jan 2020 22:11:04 -0800 Subject: memcg: fix a crash in wb_workfn when a device disappears [ Upstream commit 68f23b89067fdf187763e75a56087550624fdbee ] Without memcg, there is a one-to-one mapping between the bdi and bdi_writeback structures. In this world, things are fairly straightforward; the first thing bdi_unregister() does is to shutdown the bdi_writeback structure (or wb), and part of that writeback ensures that no other work queued against the wb, and that the wb is fully drained. With memcg, however, there is a one-to-many relationship between the bdi and bdi_writeback structures; that is, there are multiple wb objects which can all point to a single bdi. There is a refcount which prevents the bdi object from being released (and hence, unregistered). So in theory, the bdi_unregister() *should* only get called once its refcount goes to zero (bdi_put will drop the refcount, and when it is zero, release_bdi gets called, which calls bdi_unregister). Unfortunately, del_gendisk() in block/gen_hd.c never got the memo about the Brave New memcg World, and calls bdi_unregister directly. It does this without informing the file system, or the memcg code, or anything else. This causes the root wb associated with the bdi to be unregistered, but none of the memcg-specific wb's are shutdown. So when one of these wb's are woken up to do delayed work, they try to dereference their wb->bdi->dev to fetch the device name, but unfortunately bdi->dev is now NULL, thanks to the bdi_unregister() called by del_gendisk(). As a result, *boom*. Fortunately, it looks like the rest of the writeback path is perfectly happy with bdi->dev and bdi->owner being NULL, so the simplest fix is to create a bdi_dev_name() function which can handle bdi->dev being NULL. This also allows us to bulletproof the writeback tracepoints to prevent them from dereferencing a NULL pointer and crashing the kernel if one is tracing with memcg's enabled, and an iSCSI device dies or a USB storage stick is pulled. The most common way of triggering this will be hotremoval of a device while writeback with memcg enabled is going on. It was triggering several times a day in a heavily loaded production environment. Google Bug Id: 145475544 Link: https://lore.kernel.org/r/20191227194829.150110-1-tytso@mit.edu Link: http://lkml.kernel.org/r/20191228005211.163952-1-tytso@mit.edu Signed-off-by: Theodore Ts'o Cc: Chris Mason Cc: Tejun Heo Cc: Jens Axboe Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Sasha Levin --- include/trace/events/writeback.h | 29 +++++++++++++---------------- 1 file changed, 13 insertions(+), 16 deletions(-) (limited to 'include/trace') diff --git a/include/trace/events/writeback.h b/include/trace/events/writeback.h index 07a912af1dd7..d01217407d6d 100644 --- a/include/trace/events/writeback.h +++ b/include/trace/events/writeback.h @@ -66,8 +66,8 @@ TRACE_EVENT(writeback_dirty_page, TP_fast_assign( strscpy_pad(__entry->name, - mapping ? dev_name(inode_to_bdi(mapping->host)->dev) : "(unknown)", - 32); + bdi_dev_name(mapping ? inode_to_bdi(mapping->host) : + NULL), 32); __entry->ino = mapping ? mapping->host->i_ino : 0; __entry->index = page->index; ), @@ -96,8 +96,7 @@ DECLARE_EVENT_CLASS(writeback_dirty_inode_template, struct backing_dev_info *bdi = inode_to_bdi(inode); /* may be called for files on pseudo FSes w/ unregistered bdi */ - strscpy_pad(__entry->name, - bdi->dev ? dev_name(bdi->dev) : "(unknown)", 32); + strscpy_pad(__entry->name, bdi_dev_name(bdi), 32); __entry->ino = inode->i_ino; __entry->state = inode->i_state; __entry->flags = flags; @@ -207,7 +206,7 @@ DECLARE_EVENT_CLASS(writeback_write_inode_template, TP_fast_assign( strscpy_pad(__entry->name, - dev_name(inode_to_bdi(inode)->dev), 32); + bdi_dev_name(inode_to_bdi(inode)), 32); __entry->ino = inode->i_ino; __entry->sync_mode = wbc->sync_mode; __trace_wbc_assign_cgroup(__get_str(cgroup), wbc); @@ -250,9 +249,7 @@ DECLARE_EVENT_CLASS(writeback_work_class, __dynamic_array(char, cgroup, __trace_wb_cgroup_size(wb)) ), TP_fast_assign( - strscpy_pad(__entry->name, - wb->bdi->dev ? dev_name(wb->bdi->dev) : - "(unknown)", 32); + strscpy_pad(__entry->name, bdi_dev_name(wb->bdi), 32); __entry->nr_pages = work->nr_pages; __entry->sb_dev = work->sb ? work->sb->s_dev : 0; __entry->sync_mode = work->sync_mode; @@ -305,7 +302,7 @@ DECLARE_EVENT_CLASS(writeback_class, __dynamic_array(char, cgroup, __trace_wb_cgroup_size(wb)) ), TP_fast_assign( - strscpy_pad(__entry->name, dev_name(wb->bdi->dev), 32); + strscpy_pad(__entry->name, bdi_dev_name(wb->bdi), 32); __trace_wb_assign_cgroup(__get_str(cgroup), wb); ), TP_printk("bdi %s: cgroup=%s", @@ -328,7 +325,7 @@ TRACE_EVENT(writeback_bdi_register, __array(char, name, 32) ), TP_fast_assign( - strscpy_pad(__entry->name, dev_name(bdi->dev), 32); + strscpy_pad(__entry->name, bdi_dev_name(bdi), 32); ), TP_printk("bdi %s", __entry->name @@ -353,7 +350,7 @@ DECLARE_EVENT_CLASS(wbc_class, ), TP_fast_assign( - strscpy_pad(__entry->name, dev_name(bdi->dev), 32); + strscpy_pad(__entry->name, bdi_dev_name(bdi), 32); __entry->nr_to_write = wbc->nr_to_write; __entry->pages_skipped = wbc->pages_skipped; __entry->sync_mode = wbc->sync_mode; @@ -404,7 +401,7 @@ TRACE_EVENT(writeback_queue_io, __dynamic_array(char, cgroup, __trace_wb_cgroup_size(wb)) ), TP_fast_assign( - strncpy_pad(__entry->name, dev_name(wb->bdi->dev), 32); + strscpy_pad(__entry->name, bdi_dev_name(wb->bdi), 32); __entry->older = dirtied_before; __entry->age = (jiffies - dirtied_before) * 1000 / HZ; __entry->moved = moved; @@ -489,7 +486,7 @@ TRACE_EVENT(bdi_dirty_ratelimit, ), TP_fast_assign( - strscpy_pad(__entry->bdi, dev_name(wb->bdi->dev), 32); + strscpy_pad(__entry->bdi, bdi_dev_name(wb->bdi), 32); __entry->write_bw = KBps(wb->write_bandwidth); __entry->avg_write_bw = KBps(wb->avg_write_bandwidth); __entry->dirty_rate = KBps(dirty_rate); @@ -554,7 +551,7 @@ TRACE_EVENT(balance_dirty_pages, TP_fast_assign( unsigned long freerun = (thresh + bg_thresh) / 2; - strscpy_pad(__entry->bdi, dev_name(wb->bdi->dev), 32); + strscpy_pad(__entry->bdi, bdi_dev_name(wb->bdi), 32); __entry->limit = global_wb_domain.dirty_limit; __entry->setpoint = (global_wb_domain.dirty_limit + @@ -616,7 +613,7 @@ TRACE_EVENT(writeback_sb_inodes_requeue, TP_fast_assign( strscpy_pad(__entry->name, - dev_name(inode_to_bdi(inode)->dev), 32); + bdi_dev_name(inode_to_bdi(inode)), 32); __entry->ino = inode->i_ino; __entry->state = inode->i_state; __entry->dirtied_when = inode->dirtied_when; @@ -690,7 +687,7 @@ DECLARE_EVENT_CLASS(writeback_single_inode_template, TP_fast_assign( strscpy_pad(__entry->name, - dev_name(inode_to_bdi(inode)->dev), 32); + bdi_dev_name(inode_to_bdi(inode)), 32); __entry->ino = inode->i_ino; __entry->state = inode->i_state; __entry->dirtied_when = inode->dirtied_when; -- cgit v1.2.3