From 18741986a4b1dc4b1f171634c4191abc3b0fa023 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Mon, 10 Feb 2014 09:29:00 -0700 Subject: blk-mq: rework flush sequencing logic Witch to using a preallocated flush_rq for blk-mq similar to what's done with the old request path. This allows us to set up the request properly with a tag from the actually allowed range and ->rq_disk as needed by some drivers. To make life easier we also switch to dynamic allocation of ->flush_rq for the old path. This effectively reverts most of "blk-mq: fix for flush deadlock" and "blk-mq: Don't reserve a tag for flush request" Signed-off-by: Christoph Hellwig Signed-off-by: Jens Axboe --- include/linux/blkdev.h | 11 +++-------- 1 file changed, 3 insertions(+), 8 deletions(-) (limited to 'include/linux/blkdev.h') diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 0375654adb28..b2d25ecbcbc1 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -101,7 +101,7 @@ struct request { }; union { struct call_single_data csd; - struct work_struct mq_flush_data; + struct work_struct mq_flush_work; }; struct request_queue *q; @@ -451,13 +451,8 @@ struct request_queue { unsigned long flush_pending_since; struct list_head flush_queue[2]; struct list_head flush_data_in_flight; - union { - struct request flush_rq; - struct { - spinlock_t mq_flush_lock; - struct work_struct mq_flush_work; - }; - }; + struct request *flush_rq; + spinlock_t mq_flush_lock; struct mutex sysfs_lock; -- cgit v1.2.3 From 8b4922d3173d2eee7b43be8e5caec3ab7d30feff Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Mon, 24 Feb 2014 16:39:52 +0100 Subject: block: Stop abusing csd.list for fifo_time Block layer currently abuses rq->csd.list.next for storing fifo_time. That is a terrible hack and completely unnecessary as well. Union achieves the same space saving in a cleaner way. Signed-off-by: Jan Kara Cc: Andrew Morton Cc: Christoph Hellwig Cc: Ingo Molnar Cc: Jens Axboe Signed-off-by: Frederic Weisbecker Signed-off-by: Jens Axboe --- include/linux/blkdev.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux/blkdev.h') diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 4afa4f8f6090..1e1fa3f93d5f 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -99,6 +99,7 @@ struct request { union { struct call_single_data csd; struct work_struct mq_flush_work; + unsigned long fifo_time; }; struct request_queue *q; -- cgit v1.2.3 From 86d564c84c38b1ec06d9f2120d6a7373dcaeff0c Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sat, 8 Feb 2014 20:42:52 -0500 Subject: constify blk_rq_map_user_iov() and friends sg_iovec array passed to it can be const Signed-off-by: Al Viro --- include/linux/blkdev.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux/blkdev.h') diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 4afa4f8f6090..a639fd8a6d7b 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -823,8 +823,8 @@ extern int blk_rq_map_user(struct request_queue *, struct request *, extern int blk_rq_unmap_user(struct bio *); extern int blk_rq_map_kern(struct request_queue *, struct request *, void *, unsigned int, gfp_t); extern int blk_rq_map_user_iov(struct request_queue *, struct request *, - struct rq_map_data *, struct sg_iovec *, int, - unsigned int, gfp_t); + struct rq_map_data *, const struct sg_iovec *, + int, unsigned int, gfp_t); extern int blk_execute_rq(struct request_queue *, struct gendisk *, struct request *, int); extern void blk_execute_rq_nowait(struct request_queue *, struct gendisk *, -- cgit v1.2.3 From 360f92c2443073143467a0088daffec96a17910b Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Wed, 9 Apr 2014 20:27:01 -0600 Subject: block: fix regression with block enabled tagging Martin reported that his test system would not boot with current git, it oopsed with this: BUG: unable to handle kernel paging request at ffff88046c6c9e80 IP: [] blk_queue_start_tag+0x90/0x150 PGD 1ddf067 PUD 1de2067 PMD 47fc7d067 PTE 800000046c6c9060 Oops: 0002 [#1] SMP DEBUG_PAGEALLOC Modules linked in: sd_mod lpfc(+) scsi_transport_fc scsi_tgt oracleasm rpcsec_gss_krb5 ipv6 igb dca i2c_algo_bit i2c_core hwmon CPU: 3 PID: 87 Comm: kworker/u17:1 Not tainted 3.14.0+ #246 Hardware name: Supermicro X9DRX+-F/X9DRX+-F, BIOS 3.00 07/09/2013 Workqueue: events_unbound async_run_entry_fn task: ffff8802743c2150 ti: ffff880273d02000 task.ti: ffff880273d02000 RIP: 0010:[] [] blk_queue_start_tag+0x90/0x150 RSP: 0018:ffff880273d03a58 EFLAGS: 00010092 RAX: ffff88046c6c9e78 RBX: ffff880077208e78 RCX: 00000000fffc8da6 RDX: 00000000fffc186d RSI: 0000000000000009 RDI: 00000000fffc8d9d RBP: ffff880273d03a88 R08: 0000000000000001 R09: ffff8800021c2410 R10: 0000000000000005 R11: 0000000000015b30 R12: ffff88046c5bb8a0 R13: ffff88046c5c0890 R14: 000000000000001e R15: 000000000000001e FS: 0000000000000000(0000) GS:ffff880277b00000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: ffff88046c6c9e80 CR3: 00000000018f6000 CR4: 00000000000407e0 Stack: ffff880273d03a98 ffff880474b18800 0000000000000000 ffff880474157000 ffff88046c5c0890 ffff880077208e78 ffff880273d03ae8 ffffffff813b9e62 ffff880200000010 ffff880474b18968 ffff880474b18848 ffff88046c5c0cd8 Call Trace: [] scsi_request_fn+0xf2/0x510 [] __blk_run_queue+0x37/0x50 [] blk_execute_rq_nowait+0xb3/0x130 [] blk_execute_rq+0x64/0xf0 [] ? bit_waitqueue+0xd0/0xd0 [] scsi_execute+0xe5/0x180 [] scsi_execute_req_flags+0x9a/0x110 [] sd_spinup_disk+0x94/0x460 [sd_mod] [] ? __unmap_hugepage_range+0x200/0x2f0 [] sd_revalidate_disk+0xaa/0x3f0 [sd_mod] [] sd_probe_async+0xd8/0x200 [sd_mod] [] async_run_entry_fn+0x3f/0x140 [] process_one_work+0x175/0x410 [] worker_thread+0x123/0x400 [] ? manage_workers+0x160/0x160 [] kthread+0xce/0xf0 [] ? kthread_freezable_should_stop+0x70/0x70 [] ret_from_fork+0x7c/0xb0 [] ? kthread_freezable_should_stop+0x70/0x70 Code: 48 0f ab 11 72 db 48 81 4b 40 00 00 10 00 89 83 08 01 00 00 48 89 df 49 8b 04 24 48 89 1c d0 e8 f7 a8 ff ff 49 8b 85 28 05 00 00 <48> 89 58 08 48 89 03 49 8d 85 28 05 00 00 48 89 43 08 49 89 9d RIP [] blk_queue_start_tag+0x90/0x150 RSP CR2: ffff88046c6c9e80 Martin bisected and found this to be the problem patch; commit 6d113398dcf4dfcd9787a4ead738b186f7b7ff0f Author: Jan Kara Date: Mon Feb 24 16:39:54 2014 +0100 block: Stop abusing rq->csd.list in blk-softirq and the problem was immediately apparent. The patch states that it is safe to reuse queuelist at completion time, since it is no longer used. However, that is not true if a device is using block enabled tagging. If that is the case, then the queuelist is reused to keep track of busy tags. If a device also ended up using softirq completions, we'd reuse ->queuelist for the IPI handling while block tagging was still using it. Boom. Fix this by adding a new ipi_list list head, and share the memory used with the request hash table. The hash table is never used after the request is moved to the dispatch list, which happens long before any potential completion of the request. Add a new request bit for this, so we don't have cases that check rq->hash while it could potentially have been reused for the IPI completion. Reported-by: Martin K. Petersen Tested-by: Benjamin Herrenschmidt Signed-off-by: Jens Axboe --- include/linux/blkdev.h | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) (limited to 'include/linux/blkdev.h') diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 1e1fa3f93d5f..99617cf7dd1a 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -118,7 +118,18 @@ struct request { struct bio *bio; struct bio *biotail; - struct hlist_node hash; /* merge hash */ + /* + * The hash is used inside the scheduler, and killed once the + * request reaches the dispatch list. The ipi_list is only used + * to queue the request for softirq completion, which is long + * after the request has been unhashed (and even removed from + * the dispatch list). + */ + union { + struct hlist_node hash; /* merge hash */ + struct list_head ipi_list; + }; + /* * The rb_node is only used inside the io scheduler, requests * are pruned when moved to the dispatch queue. So let the -- cgit v1.2.3