diff options
Diffstat (limited to 'kernel/trace/blktrace.c')
-rw-r--r-- | kernel/trace/blktrace.c | 215 |
1 files changed, 167 insertions, 48 deletions
diff --git a/kernel/trace/blktrace.c b/kernel/trace/blktrace.c index c39fc68c4778..8a9ff85df78f 100644 --- a/kernel/trace/blktrace.c +++ b/kernel/trace/blktrace.c @@ -15,6 +15,9 @@ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA * */ + +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + #include <linux/kernel.h> #include <linux/blkdev.h> #include <linux/blktrace_api.h> @@ -319,11 +322,12 @@ static void put_probe_ref(void) static void blk_trace_cleanup(struct blk_trace *bt) { + synchronize_rcu(); blk_trace_free(bt); put_probe_ref(); } -int blk_trace_remove(struct request_queue *q) +static int __blk_trace_remove(struct request_queue *q) { struct blk_trace *bt; @@ -336,6 +340,17 @@ int blk_trace_remove(struct request_queue *q) return 0; } + +int blk_trace_remove(struct request_queue *q) +{ + int ret; + + mutex_lock(&q->blk_trace_mutex); + ret = __blk_trace_remove(q); + mutex_unlock(&q->blk_trace_mutex); + + return ret; +} EXPORT_SYMBOL_GPL(blk_trace_remove); static ssize_t blk_dropped_read(struct file *filp, char __user *buffer, @@ -469,6 +484,16 @@ int do_blk_trace_setup(struct request_queue *q, char *name, dev_t dev, */ strreplace(buts->name, '/', '_'); + /* + * bdev can be NULL, as with scsi-generic, this is a helpful as + * we can be. + */ + if (q->blk_trace) { + pr_warn("Concurrent blktraces are not allowed on %s\n", + buts->name); + return -EBUSY; + } + bt = kzalloc(sizeof(*bt), GFP_KERNEL); if (!bt) return -ENOMEM; @@ -546,9 +571,8 @@ err: return ret; } -int blk_trace_setup(struct request_queue *q, char *name, dev_t dev, - struct block_device *bdev, - char __user *arg) +static int __blk_trace_setup(struct request_queue *q, char *name, dev_t dev, + struct block_device *bdev, char __user *arg) { struct blk_user_trace_setup buts; int ret; @@ -562,11 +586,24 @@ int blk_trace_setup(struct request_queue *q, char *name, dev_t dev, return ret; if (copy_to_user(arg, &buts, sizeof(buts))) { - blk_trace_remove(q); + __blk_trace_remove(q); return -EFAULT; } return 0; } + +int blk_trace_setup(struct request_queue *q, char *name, dev_t dev, + struct block_device *bdev, + char __user *arg) +{ + int ret; + + mutex_lock(&q->blk_trace_mutex); + ret = __blk_trace_setup(q, name, dev, bdev, arg); + mutex_unlock(&q->blk_trace_mutex); + + return ret; +} EXPORT_SYMBOL_GPL(blk_trace_setup); #if defined(CONFIG_COMPAT) && defined(CONFIG_X86_64) @@ -595,7 +632,7 @@ static int compat_blk_trace_setup(struct request_queue *q, char *name, return ret; if (copy_to_user(arg, &buts.name, ARRAY_SIZE(buts.name))) { - blk_trace_remove(q); + __blk_trace_remove(q); return -EFAULT; } @@ -603,11 +640,13 @@ static int compat_blk_trace_setup(struct request_queue *q, char *name, } #endif -int blk_trace_startstop(struct request_queue *q, int start) +static int __blk_trace_startstop(struct request_queue *q, int start) { int ret; - struct blk_trace *bt = q->blk_trace; + struct blk_trace *bt; + bt = rcu_dereference_protected(q->blk_trace, + lockdep_is_held(&q->blk_trace_mutex)); if (bt == NULL) return -EINVAL; @@ -642,8 +681,25 @@ int blk_trace_startstop(struct request_queue *q, int start) return ret; } + +int blk_trace_startstop(struct request_queue *q, int start) +{ + int ret; + + mutex_lock(&q->blk_trace_mutex); + ret = __blk_trace_startstop(q, start); + mutex_unlock(&q->blk_trace_mutex); + + return ret; +} EXPORT_SYMBOL_GPL(blk_trace_startstop); +/* + * When reading or writing the blktrace sysfs files, the references to the + * opened sysfs or device files should prevent the underlying block device + * from being removed. So no further delete protection is really needed. + */ + /** * blk_trace_ioctl: - handle the ioctls associated with tracing * @bdev: the block device @@ -661,12 +717,12 @@ int blk_trace_ioctl(struct block_device *bdev, unsigned cmd, char __user *arg) if (!q) return -ENXIO; - mutex_lock(&bdev->bd_mutex); + mutex_lock(&q->blk_trace_mutex); switch (cmd) { case BLKTRACESETUP: bdevname(bdev, b); - ret = blk_trace_setup(q, b, bdev->bd_dev, bdev, arg); + ret = __blk_trace_setup(q, b, bdev->bd_dev, bdev, arg); break; #if defined(CONFIG_COMPAT) && defined(CONFIG_X86_64) case BLKTRACESETUP32: @@ -677,17 +733,17 @@ int blk_trace_ioctl(struct block_device *bdev, unsigned cmd, char __user *arg) case BLKTRACESTART: start = 1; case BLKTRACESTOP: - ret = blk_trace_startstop(q, start); + ret = __blk_trace_startstop(q, start); break; case BLKTRACETEARDOWN: - ret = blk_trace_remove(q); + ret = __blk_trace_remove(q); break; default: ret = -ENOTTY; break; } - mutex_unlock(&bdev->bd_mutex); + mutex_unlock(&q->blk_trace_mutex); return ret; } @@ -698,10 +754,14 @@ int blk_trace_ioctl(struct block_device *bdev, unsigned cmd, char __user *arg) **/ void blk_trace_shutdown(struct request_queue *q) { - if (q->blk_trace) { - blk_trace_startstop(q, 0); - blk_trace_remove(q); + mutex_lock(&q->blk_trace_mutex); + if (rcu_dereference_protected(q->blk_trace, + lockdep_is_held(&q->blk_trace_mutex))) { + __blk_trace_startstop(q, 0); + __blk_trace_remove(q); } + + mutex_unlock(&q->blk_trace_mutex); } /* @@ -722,11 +782,15 @@ void blk_trace_shutdown(struct request_queue *q) static void blk_add_trace_rq(struct request_queue *q, struct request *rq, unsigned int nr_bytes, u32 what) { - struct blk_trace *bt = q->blk_trace; + struct blk_trace *bt; struct task_struct *tsk = current; - if (likely(!bt)) + rcu_read_lock(); + bt = rcu_dereference(q->blk_trace); + if (likely(!bt)) { + rcu_read_unlock(); return; + } /* * Use the bio context for all events except ISSUE and @@ -751,6 +815,7 @@ static void blk_add_trace_rq(struct request_queue *q, struct request *rq, __blk_add_trace(bt, blk_rq_pos(rq), nr_bytes, rq->cmd_flags, what, rq->errors, 0, NULL, tsk); } + rcu_read_unlock(); } static void blk_add_trace_rq_abort(void *ignore, @@ -800,11 +865,15 @@ static void blk_add_trace_rq_complete(void *ignore, static void blk_add_trace_bio(struct request_queue *q, struct bio *bio, u32 what, int error) { - struct blk_trace *bt = q->blk_trace; + struct blk_trace *bt; struct task_struct *tsk = current; - if (likely(!bt)) + rcu_read_lock(); + bt = rcu_dereference(q->blk_trace); + if (likely(!bt)) { + rcu_read_unlock(); return; + } /* * Not all the pages in the bio are dirtied by the same task but @@ -817,6 +886,7 @@ static void blk_add_trace_bio(struct request_queue *q, struct bio *bio, __blk_add_trace(bt, bio->bi_iter.bi_sector, bio->bi_iter.bi_size, bio->bi_rw, what, error, 0, NULL, tsk); + rcu_read_unlock(); } static void blk_add_trace_bio_bounce(void *ignore, @@ -861,11 +931,14 @@ static void blk_add_trace_getrq(void *ignore, if (bio) blk_add_trace_bio(q, bio, BLK_TA_GETRQ, 0); else { - struct blk_trace *bt = q->blk_trace; + struct blk_trace *bt; + rcu_read_lock(); + bt = rcu_dereference(q->blk_trace); if (bt) __blk_add_trace(bt, 0, 0, rw, BLK_TA_GETRQ, 0, 0, NULL, current); + rcu_read_unlock(); } } @@ -877,28 +950,36 @@ static void blk_add_trace_sleeprq(void *ignore, if (bio) blk_add_trace_bio(q, bio, BLK_TA_SLEEPRQ, 0); else { - struct blk_trace *bt = q->blk_trace; + struct blk_trace *bt; + rcu_read_lock(); + bt = rcu_dereference(q->blk_trace); if (bt) __blk_add_trace(bt, 0, 0, rw, BLK_TA_SLEEPRQ, 0, 0, NULL, current); + rcu_read_unlock(); } } static void blk_add_trace_plug(void *ignore, struct request_queue *q) { - struct blk_trace *bt = q->blk_trace; + struct blk_trace *bt; + rcu_read_lock(); + bt = rcu_dereference(q->blk_trace); if (bt) __blk_add_trace(bt, 0, 0, 0, BLK_TA_PLUG, 0, 0, NULL, current); + rcu_read_unlock(); } static void blk_add_trace_unplug(void *ignore, struct request_queue *q, unsigned int depth, bool explicit) { - struct blk_trace *bt = q->blk_trace; + struct blk_trace *bt; + rcu_read_lock(); + bt = rcu_dereference(q->blk_trace); if (bt) { __be64 rpdu = cpu_to_be64(depth); u32 what; @@ -911,15 +992,18 @@ static void blk_add_trace_unplug(void *ignore, struct request_queue *q, __blk_add_trace(bt, 0, 0, 0, what, 0, sizeof(rpdu), &rpdu, current); } + rcu_read_unlock(); } static void blk_add_trace_split(void *ignore, struct request_queue *q, struct bio *bio, unsigned int pdu) { - struct blk_trace *bt = q->blk_trace; + struct blk_trace *bt; struct task_struct *tsk = current; + rcu_read_lock(); + bt = rcu_dereference(q->blk_trace); if (bt) { __be64 rpdu = cpu_to_be64(pdu); @@ -932,6 +1016,7 @@ static void blk_add_trace_split(void *ignore, bio->bi_iter.bi_size, bio->bi_rw, BLK_TA_SPLIT, bio->bi_error, sizeof(rpdu), &rpdu, tsk); } + rcu_read_unlock(); } /** @@ -951,12 +1036,16 @@ static void blk_add_trace_bio_remap(void *ignore, struct request_queue *q, struct bio *bio, dev_t dev, sector_t from) { - struct blk_trace *bt = q->blk_trace; + struct blk_trace *bt; struct blk_io_trace_remap r; struct task_struct *tsk = current; - if (likely(!bt)) + rcu_read_lock(); + bt = rcu_dereference(q->blk_trace); + if (likely(!bt)) { + rcu_read_unlock(); return; + } r.device_from = cpu_to_be32(dev); r.device_to = cpu_to_be32(bio->bi_bdev->bd_dev); @@ -970,6 +1059,7 @@ static void blk_add_trace_bio_remap(void *ignore, __blk_add_trace(bt, bio->bi_iter.bi_sector, bio->bi_iter.bi_size, bio->bi_rw, BLK_TA_REMAP, bio->bi_error, sizeof(r), &r, tsk); + rcu_read_unlock(); } /** @@ -990,12 +1080,16 @@ static void blk_add_trace_rq_remap(void *ignore, struct request *rq, dev_t dev, sector_t from) { - struct blk_trace *bt = q->blk_trace; + struct blk_trace *bt; struct blk_io_trace_remap r; struct task_struct *tsk = current; - if (likely(!bt)) + rcu_read_lock(); + bt = rcu_dereference(q->blk_trace); + if (likely(!bt)) { + rcu_read_unlock(); return; + } r.device_from = cpu_to_be32(dev); r.device_to = cpu_to_be32(disk_devt(rq->rq_disk)); @@ -1009,6 +1103,7 @@ static void blk_add_trace_rq_remap(void *ignore, __blk_add_trace(bt, blk_rq_pos(rq), blk_rq_bytes(rq), rq_data_dir(rq), BLK_TA_REMAP, !!rq->errors, sizeof(r), &r, tsk); + rcu_read_unlock(); } /** @@ -1026,11 +1121,15 @@ void blk_add_driver_data(struct request_queue *q, struct request *rq, void *data, size_t len) { - struct blk_trace *bt = q->blk_trace; + struct blk_trace *bt; struct task_struct *tsk = current; - if (likely(!bt)) + rcu_read_lock(); + bt = rcu_dereference(q->blk_trace); + if (likely(!bt)) { + rcu_read_unlock(); return; + } if (bio_has_data(rq->bio) && rq->bio->bi_io_vec && rq->bio->bi_io_vec->bv_page && @@ -1043,6 +1142,7 @@ void blk_add_driver_data(struct request_queue *q, else __blk_add_trace(bt, blk_rq_pos(rq), blk_rq_bytes(rq), 0, BLK_TA_DRV_DATA, rq->errors, len, data, tsk); + rcu_read_unlock(); } EXPORT_SYMBOL_GPL(blk_add_driver_data); @@ -1466,7 +1566,8 @@ blk_trace_event_print_binary(struct trace_iterator *iter, int flags, static enum print_line_t blk_tracer_print_line(struct trace_iterator *iter) { - if (!(blk_tracer_flags.val & TRACE_BLK_OPT_CLASSIC)) + if ((iter->ent->type != TRACE_BLK) || + !(blk_tracer_flags.val & TRACE_BLK_OPT_CLASSIC)) return TRACE_TYPE_UNHANDLED; return print_one_line(iter, true); @@ -1533,7 +1634,16 @@ static int blk_trace_remove_queue(struct request_queue *q) if (bt == NULL) return -EINVAL; + if (bt->trace_state == Blktrace_running) { + bt->trace_state = Blktrace_stopped; + spin_lock_irq(&running_trace_lock); + list_del_init(&bt->running_list); + spin_unlock_irq(&running_trace_lock); + relay_flush(bt->rchan); + } + put_probe_ref(); + synchronize_rcu(); blk_trace_free(bt); return 0; } @@ -1694,6 +1804,7 @@ static ssize_t sysfs_blk_trace_attr_show(struct device *dev, struct hd_struct *p = dev_to_part(dev); struct request_queue *q; struct block_device *bdev; + struct blk_trace *bt; ssize_t ret = -ENXIO; bdev = bdget(part_devt(p)); @@ -1704,26 +1815,28 @@ static ssize_t sysfs_blk_trace_attr_show(struct device *dev, if (q == NULL) goto out_bdput; - mutex_lock(&bdev->bd_mutex); + mutex_lock(&q->blk_trace_mutex); + bt = rcu_dereference_protected(q->blk_trace, + lockdep_is_held(&q->blk_trace_mutex)); if (attr == &dev_attr_enable) { - ret = sprintf(buf, "%u\n", !!q->blk_trace); + ret = sprintf(buf, "%u\n", !!bt); goto out_unlock_bdev; } - if (q->blk_trace == NULL) + if (bt == NULL) ret = sprintf(buf, "disabled\n"); else if (attr == &dev_attr_act_mask) - ret = blk_trace_mask2str(buf, q->blk_trace->act_mask); + ret = blk_trace_mask2str(buf, bt->act_mask); else if (attr == &dev_attr_pid) - ret = sprintf(buf, "%u\n", q->blk_trace->pid); + ret = sprintf(buf, "%u\n", bt->pid); else if (attr == &dev_attr_start_lba) - ret = sprintf(buf, "%llu\n", q->blk_trace->start_lba); + ret = sprintf(buf, "%llu\n", bt->start_lba); else if (attr == &dev_attr_end_lba) - ret = sprintf(buf, "%llu\n", q->blk_trace->end_lba); + ret = sprintf(buf, "%llu\n", bt->end_lba); out_unlock_bdev: - mutex_unlock(&bdev->bd_mutex); + mutex_unlock(&q->blk_trace_mutex); out_bdput: bdput(bdev); out: @@ -1737,6 +1850,7 @@ static ssize_t sysfs_blk_trace_attr_store(struct device *dev, struct block_device *bdev; struct request_queue *q; struct hd_struct *p; + struct blk_trace *bt; u64 value; ssize_t ret = -EINVAL; @@ -1765,10 +1879,12 @@ static ssize_t sysfs_blk_trace_attr_store(struct device *dev, if (q == NULL) goto out_bdput; - mutex_lock(&bdev->bd_mutex); + mutex_lock(&q->blk_trace_mutex); + bt = rcu_dereference_protected(q->blk_trace, + lockdep_is_held(&q->blk_trace_mutex)); if (attr == &dev_attr_enable) { - if (!!value == !!q->blk_trace) { + if (!!value == !!bt) { ret = 0; goto out_unlock_bdev; } @@ -1780,22 +1896,25 @@ static ssize_t sysfs_blk_trace_attr_store(struct device *dev, } ret = 0; - if (q->blk_trace == NULL) + if (bt == NULL) { ret = blk_trace_setup_queue(q, bdev); + bt = rcu_dereference_protected(q->blk_trace, + lockdep_is_held(&q->blk_trace_mutex)); + } if (ret == 0) { if (attr == &dev_attr_act_mask) - q->blk_trace->act_mask = value; + bt->act_mask = value; else if (attr == &dev_attr_pid) - q->blk_trace->pid = value; + bt->pid = value; else if (attr == &dev_attr_start_lba) - q->blk_trace->start_lba = value; + bt->start_lba = value; else if (attr == &dev_attr_end_lba) - q->blk_trace->end_lba = value; + bt->end_lba = value; } out_unlock_bdev: - mutex_unlock(&bdev->bd_mutex); + mutex_unlock(&q->blk_trace_mutex); out_bdput: bdput(bdev); out: |