From aa387cc895672b00f807ad7c734a2defaf677712 Mon Sep 17 00:00:00 2001 From: Mike Christie Date: Sun, 31 Jul 2011 22:05:09 +0200 Subject: block: add bsg helper library This moves the FC classes bsg code to the block layer and makes it a lib so that other classes like iscsi and SAS can use it. It is helpful because working with the request queue, bios, creating scatterlists, etc are a pain that the LLD does not have to worry about with normal IOs and should not have to worry about for bsg requests. Signed-off-by: Mike Christie Signed-off-by: Jens Axboe --- include/linux/blkdev.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include/linux/blkdev.h') diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 0e67c45b3bc9..847928546076 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -30,6 +30,7 @@ struct request_pm_state; struct blk_trace; struct request; struct sg_io_hdr; +struct bsg_job; #define BLKDEV_MIN_RQ 4 #define BLKDEV_MAX_RQ 128 /* Default maximum */ @@ -209,6 +210,7 @@ typedef int (merge_bvec_fn) (struct request_queue *, struct bvec_merge_data *, typedef void (softirq_done_fn)(struct request *); typedef int (dma_drain_needed_fn)(struct request *); typedef int (lld_busy_fn) (struct request_queue *q); +typedef int (bsg_job_fn) (struct bsg_job *); enum blk_eh_timer_return { BLK_EH_NOT_HANDLED, @@ -375,6 +377,8 @@ struct request_queue { struct mutex sysfs_lock; #if defined(CONFIG_BLK_DEV_BSG) + bsg_job_fn *bsg_job_fn; + int bsg_job_size; struct bsg_class_device bsg_dev; #endif -- cgit v1.2.3 From 4853abaae7e4a2af938115ce9071ef8684fb7af4 Mon Sep 17 00:00:00 2001 From: Jeff Moyer Date: Mon, 15 Aug 2011 21:37:25 +0200 Subject: block: fix flush machinery for stacking drivers with differring flush flags Commit ae1b1539622fb46e51b4d13b3f9e5f4c713f86ae, block: reimplement FLUSH/FUA to support merge, introduced a performance regression when running any sort of fsyncing workload using dm-multipath and certain storage (in our case, an HP EVA). The test I ran was fs_mark, and it dropped from ~800 files/sec on ext4 to ~100 files/sec. It turns out that dm-multipath always advertised flush+fua support, and passed commands on down the stack, where those flags used to get stripped off. The above commit changed that behavior: static inline struct request *__elv_next_request(struct request_queue *q) { struct request *rq; while (1) { - while (!list_empty(&q->queue_head)) { + if (!list_empty(&q->queue_head)) { rq = list_entry_rq(q->queue_head.next); - if (!(rq->cmd_flags & (REQ_FLUSH | REQ_FUA)) || - (rq->cmd_flags & REQ_FLUSH_SEQ)) - return rq; - rq = blk_do_flush(q, rq); - if (rq) - return rq; + return rq; } Note that previously, a command would come in here, have REQ_FLUSH|REQ_FUA set, and then get handed off to blk_do_flush: struct request *blk_do_flush(struct request_queue *q, struct request *rq) { unsigned int fflags = q->flush_flags; /* may change, cache it */ bool has_flush = fflags & REQ_FLUSH, has_fua = fflags & REQ_FUA; bool do_preflush = has_flush && (rq->cmd_flags & REQ_FLUSH); bool do_postflush = has_flush && !has_fua && (rq->cmd_flags & REQ_FUA); unsigned skip = 0; ... if (blk_rq_sectors(rq) && !do_preflush && !do_postflush) { rq->cmd_flags &= ~REQ_FLUSH; if (!has_fua) rq->cmd_flags &= ~REQ_FUA; return rq; } So, the flush machinery was bypassed in such cases (q->flush_flags == 0 && rq->cmd_flags & (REQ_FLUSH|REQ_FUA)). Now, however, we don't get into the flush machinery at all. Instead, __elv_next_request just hands a request with flush and fua bits set to the scsi_request_fn, even if the underlying request_queue does not support flush or fua. The agreed upon approach is to fix the flush machinery to allow stacking. While this isn't used in practice (since there is only one request-based dm target, and that target will now reflect the flush flags of the underlying device), it does future-proof the solution, and make it function as designed. In order to make this work, I had to add a field to the struct request, inside the flush structure (to store the original req->end_io). Shaohua had suggested overloading the union with rb_node and completion_data, but the completion data is used by device mapper and can also be used by other drivers. So, I didn't see a way around the additional field. I tested this patch on an HP EVA with both ext4 and xfs, and it recovers the lost performance. Comments and other testers, as always, are appreciated. Cheers, Jeff Signed-off-by: Jeff Moyer Acked-by: Tejun Heo Signed-off-by: Jens Axboe --- include/linux/blkdev.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux/blkdev.h') diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 847928546076..84b15d54f8c2 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -118,6 +118,7 @@ struct request { struct { unsigned int seq; struct list_head list; + rq_end_io_fn *saved_end_io; } flush; }; -- cgit v1.2.3 From 56ebdaf2fa3c5276be201c5d1aff1490b682ecf2 Mon Sep 17 00:00:00 2001 From: Shaohua Li Date: Wed, 24 Aug 2011 16:04:34 +0200 Subject: block: simplify force plug flush code a little bit Cleaning up the code a little bit. attempt_plug_merge() traverses the plug list anyway, we can do the request counting there, so stack size is reduced a little bit. The motivation here is I suspect if we should count the requests for each queue (task could handle multiple disks in the meantime), but my test doesn't show it's worthy doing. If somebody proves we should do it, below change will make that more easier. Signed-off-by: Shaohua Li Signed-off-by: Shaohua Li Signed-off-by: Jens Axboe --- include/linux/blkdev.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux/blkdev.h') diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 84b15d54f8c2..7fbaa9103344 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -873,7 +873,6 @@ struct blk_plug { struct list_head list; struct list_head cb_list; unsigned int should_sort; - unsigned int count; }; #define BLK_MAX_REQUEST_COUNT 16 -- cgit v1.2.3 From 166e1f901b01872e8b70733a3f2e2c6980389cf8 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Mon, 12 Sep 2011 12:08:27 +0200 Subject: block: export __make_request Avoid the hacks need for request based device mappers currently by simply exporting the symbol instead of trying to get it through the back door. Signed-off-by: Christoph Hellwig Signed-off-by: Jens Axboe --- include/linux/blkdev.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux/blkdev.h') diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 0e67c45b3bc9..e9c3d9b07630 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -675,6 +675,8 @@ extern int scsi_cmd_ioctl(struct request_queue *, struct gendisk *, fmode_t, extern int sg_scsi_ioctl(struct request_queue *, struct gendisk *, fmode_t, struct scsi_ioctl_command __user *); +extern int __make_request(struct request_queue *q, struct bio *bio); + /* * A queue has just exitted congestion. Note this in the global counter of * congested queues, and wake up anyone who was waiting for requests to be -- cgit v1.2.3 From c20e8de27fef9f59869c81c288ad6cf28200e00c Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Mon, 12 Sep 2011 12:03:37 +0200 Subject: block: rename __make_request() to blk_queue_bio() Now that it's exported, lets put it in a more sane namespace. Signed-off-by: Jens Axboe --- include/linux/blkdev.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux/blkdev.h') diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index e9c3d9b07630..085f95414c7f 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -675,7 +675,7 @@ extern int scsi_cmd_ioctl(struct request_queue *, struct gendisk *, fmode_t, extern int sg_scsi_ioctl(struct request_queue *, struct gendisk *, fmode_t, struct scsi_ioctl_command __user *); -extern int __make_request(struct request_queue *q, struct bio *bio); +extern int blk_queue_bio(struct request_queue *q, struct bio *bio); /* * A queue has just exitted congestion. Note this in the global counter of -- cgit v1.2.3 From 5a7bbad27a410350e64a2d7f5ec18fc73836c14f Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Mon, 12 Sep 2011 12:12:01 +0200 Subject: block: remove support for bio remapping from ->make_request There is very little benefit in allowing to let a ->make_request instance update the bios device and sector and loop around it in __generic_make_request when we can archive the same through calling generic_make_request from the driver and letting the loop in generic_make_request handle it. Note that various drivers got the return value from ->make_request and returned non-zero values for errors. Signed-off-by: Christoph Hellwig Acked-by: NeilBrown Signed-off-by: Jens Axboe --- include/linux/blkdev.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux/blkdev.h') diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 085f95414c7f..c712efdafc3f 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -193,7 +193,7 @@ struct request_pm_state #include typedef void (request_fn_proc) (struct request_queue *q); -typedef int (make_request_fn) (struct request_queue *q, struct bio *bio); +typedef void (make_request_fn) (struct request_queue *q, struct bio *bio); typedef int (prep_rq_fn) (struct request_queue *, struct request *); typedef void (unprep_rq_fn) (struct request_queue *, struct request *); @@ -675,7 +675,7 @@ extern int scsi_cmd_ioctl(struct request_queue *, struct gendisk *, fmode_t, extern int sg_scsi_ioctl(struct request_queue *, struct gendisk *, fmode_t, struct scsi_ioctl_command __user *); -extern int blk_queue_bio(struct request_queue *q, struct bio *bio); +extern void blk_queue_bio(struct request_queue *q, struct bio *bio); /* * A queue has just exitted congestion. Note this in the global counter of -- cgit v1.2.3 From 75df713627f28f88b901b329c8857747545fd4ab Mon Sep 17 00:00:00 2001 From: Suresh Jayaraman Date: Wed, 21 Sep 2011 10:00:16 +0200 Subject: block: document blk-plug Thus spake Andrew Morton: "And I have the usual maintainability whine. If someone comes up to vmscan.c and sees it calling blk_start_plug(), how are they supposed to work out why that call is there? They go look at the blk_start_plug() definition and it is undocumented. I think we can do better than this?" Adapted from the LWN article - http://lwn.net/Articles/438256/ by Jens Axboe and from an earlier attempt by Shaohua Li to document blk-plug. [akpm@linux-foundation.org: grammatical and spelling tweaks] Signed-off-by: Suresh Jayaraman Cc: Shaohua Li Cc: Jonathan Corbet Signed-off-by: Andrew Morton Signed-off-by: Jens Axboe --- include/linux/blkdev.h | 24 +++++++++++++++--------- 1 file changed, 15 insertions(+), 9 deletions(-) (limited to 'include/linux/blkdev.h') diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index c712efdafc3f..1978655faa3b 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -860,17 +860,23 @@ struct request_queue *blk_alloc_queue_node(gfp_t, int); extern void blk_put_queue(struct request_queue *); /* - * Note: Code in between changing the blk_plug list/cb_list or element of such - * lists is preemptable, but such code can't do sleep (or be very careful), - * otherwise data is corrupted. For details, please check schedule() where - * blk_schedule_flush_plug() is called. + * blk_plug permits building a queue of related requests by holding the I/O + * fragments for a short period. This allows merging of sequential requests + * into single larger request. As the requests are moved from a per-task list to + * the device's request_queue in a batch, this results in improved scalability + * as the lock contention for request_queue lock is reduced. + * + * It is ok not to disable preemption when adding the request to the plug list + * or when attempting a merge, because blk_schedule_flush_list() will only flush + * the plug list when the task sleeps by itself. For details, please see + * schedule() where blk_schedule_flush_plug() is called. */ struct blk_plug { - unsigned long magic; - struct list_head list; - struct list_head cb_list; - unsigned int should_sort; - unsigned int count; + unsigned long magic; /* detect uninitialized use-cases */ + struct list_head list; /* requests */ + struct list_head cb_list; /* md requires an unplug callback */ + unsigned int should_sort; /* list to be sorted before flushing? */ + unsigned int count; /* number of queued requests */ }; #define BLK_MAX_REQUEST_COUNT 16 -- cgit v1.2.3 From bc9fcbf9cb8ec76d340da16fbf48a9a316e14c52 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Wed, 19 Oct 2011 14:31:18 +0200 Subject: block: move blk_throtl prototypes to block/blk.h blk_throtl interface is block internal and there's no reason to have them in linux/blkdev.h. Move them to block/blk.h. This patch doesn't introduce any functional change. Signed-off-by: Tejun Heo Cc: Vivek Goyal Signed-off-by: Jens Axboe --- include/linux/blkdev.h | 14 -------------- 1 file changed, 14 deletions(-) (limited to 'include/linux/blkdev.h') diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 0b68044e7abb..5267cd2f20dc 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -1197,20 +1197,6 @@ static inline uint64_t rq_io_start_time_ns(struct request *req) } #endif -#ifdef CONFIG_BLK_DEV_THROTTLING -extern int blk_throtl_init(struct request_queue *q); -extern void blk_throtl_exit(struct request_queue *q); -extern int blk_throtl_bio(struct request_queue *q, struct bio **bio); -#else /* CONFIG_BLK_DEV_THROTTLING */ -static inline int blk_throtl_bio(struct request_queue *q, struct bio **bio) -{ - return 0; -} - -static inline int blk_throtl_init(struct request_queue *q) { return 0; } -static inline int blk_throtl_exit(struct request_queue *q) { return 0; } -#endif /* CONFIG_BLK_DEV_THROTTLING */ - #define MODULE_ALIAS_BLOCKDEV(major,minor) \ MODULE_ALIAS("block-major-" __stringify(major) "-" __stringify(minor)) #define MODULE_ALIAS_BLOCKDEV_MAJOR(major) \ -- cgit v1.2.3 From de47725421ad5627a5c905f4e40bb844ebc06d29 Mon Sep 17 00:00:00 2001 From: Paul Gortmaker Date: Thu, 26 May 2011 13:46:22 -0400 Subject: include: replace linux/module.h with "struct module" wherever possible The pretty much brings in the kitchen sink along with it, so it should be avoided wherever reasonably possible in terms of being included from other commonly used files, as it results in a measureable increase on compile times. The worst culprit was probably device.h since it is used everywhere. This file also had an implicit dependency/usage of mutex.h which was masked by module.h, and is also fixed here at the same time. There are over a dozen other headers that simply declare the struct instead of pulling in the whole file, so follow their lead and simply make it a few more. Most of the implicit dependencies on module.h being present by these headers pulling it in have been now weeded out, so we can finally make this change with hopefully minimal breakage. Signed-off-by: Paul Gortmaker --- include/linux/blkdev.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux/blkdev.h') diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 7fbaa9103344..d750a3a79299 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -14,7 +14,6 @@ #include #include #include -#include #include #include #include @@ -22,6 +21,7 @@ #include +struct module; struct scsi_ioctl_command; struct request_queue; -- cgit v1.2.3