From 5f3ea37c7716db4e894a480e0c18b24399595b6b Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Thu, 30 Oct 2008 08:34:33 +0100 Subject: blktrace: port to tracepoints This was a forward port of work done by Mathieu Desnoyers, I changed it to encode the 'what' parameter on the tracepoint name, so that one can register interest in specific events and not on classes of events to then check the 'what' parameter. Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: Jens Axboe Signed-off-by: Ingo Molnar --- block/Kconfig | 1 + block/blk-core.c | 33 +++--- block/blktrace.c | 332 ++++++++++++++++++++++++++++++++++++++++++++++++++++++- block/elevator.c | 7 +- 4 files changed, 348 insertions(+), 25 deletions(-) (limited to 'block') diff --git a/block/Kconfig b/block/Kconfig index 1ab7c15c8d7..290b219fad9 100644 --- a/block/Kconfig +++ b/block/Kconfig @@ -47,6 +47,7 @@ config BLK_DEV_IO_TRACE depends on SYSFS select RELAY select DEBUG_FS + select TRACEPOINTS help Say Y here if you want to be able to trace the block layer actions on a given queue. Tracing allows you to see any traffic happening diff --git a/block/blk-core.c b/block/blk-core.c index 10e8a64a5a5..04267d66a2b 100644 --- a/block/blk-core.c +++ b/block/blk-core.c @@ -28,6 +28,7 @@ #include #include #include +#include #include "blk.h" @@ -205,7 +206,7 @@ void blk_plug_device(struct request_queue *q) if (!queue_flag_test_and_set(QUEUE_FLAG_PLUGGED, q)) { mod_timer(&q->unplug_timer, jiffies + q->unplug_delay); - blk_add_trace_generic(q, NULL, 0, BLK_TA_PLUG); + trace_block_plug(q); } } EXPORT_SYMBOL(blk_plug_device); @@ -292,9 +293,7 @@ void blk_unplug_work(struct work_struct *work) struct request_queue *q = container_of(work, struct request_queue, unplug_work); - blk_add_trace_pdu_int(q, BLK_TA_UNPLUG_IO, NULL, - q->rq.count[READ] + q->rq.count[WRITE]); - + trace_block_unplug_io(q); q->unplug_fn(q); } @@ -302,9 +301,7 @@ void blk_unplug_timeout(unsigned long data) { struct request_queue *q = (struct request_queue *)data; - blk_add_trace_pdu_int(q, BLK_TA_UNPLUG_TIMER, NULL, - q->rq.count[READ] + q->rq.count[WRITE]); - + trace_block_unplug_timer(q); kblockd_schedule_work(q, &q->unplug_work); } @@ -314,9 +311,7 @@ void blk_unplug(struct request_queue *q) * devices don't necessarily have an ->unplug_fn defined */ if (q->unplug_fn) { - blk_add_trace_pdu_int(q, BLK_TA_UNPLUG_IO, NULL, - q->rq.count[READ] + q->rq.count[WRITE]); - + trace_block_unplug_io(q); q->unplug_fn(q); } } @@ -822,7 +817,7 @@ rq_starved: if (ioc_batching(q, ioc)) ioc->nr_batch_requests--; - blk_add_trace_generic(q, bio, rw, BLK_TA_GETRQ); + trace_block_getrq(q, bio, rw); out: return rq; } @@ -848,7 +843,7 @@ static struct request *get_request_wait(struct request_queue *q, int rw_flags, prepare_to_wait_exclusive(&rl->wait[rw], &wait, TASK_UNINTERRUPTIBLE); - blk_add_trace_generic(q, bio, rw, BLK_TA_SLEEPRQ); + trace_block_sleeprq(q, bio, rw); __generic_unplug_device(q); spin_unlock_irq(q->queue_lock); @@ -928,7 +923,7 @@ void blk_requeue_request(struct request_queue *q, struct request *rq) { blk_delete_timer(rq); blk_clear_rq_complete(rq); - blk_add_trace_rq(q, rq, BLK_TA_REQUEUE); + trace_block_rq_requeue(q, rq); if (blk_rq_tagged(rq)) blk_queue_end_tag(q, rq); @@ -1167,7 +1162,7 @@ static int __make_request(struct request_queue *q, struct bio *bio) if (!ll_back_merge_fn(q, req, bio)) break; - blk_add_trace_bio(q, bio, BLK_TA_BACKMERGE); + trace_block_bio_backmerge(q, bio); req->biotail->bi_next = bio; req->biotail = bio; @@ -1186,7 +1181,7 @@ static int __make_request(struct request_queue *q, struct bio *bio) if (!ll_front_merge_fn(q, req, bio)) break; - blk_add_trace_bio(q, bio, BLK_TA_FRONTMERGE); + trace_block_bio_frontmerge(q, bio); bio->bi_next = req->bio; req->bio = bio; @@ -1269,7 +1264,7 @@ static inline void blk_partition_remap(struct bio *bio) bio->bi_sector += p->start_sect; bio->bi_bdev = bdev->bd_contains; - blk_add_trace_remap(bdev_get_queue(bio->bi_bdev), bio, + trace_block_remap(bdev_get_queue(bio->bi_bdev), bio, bdev->bd_dev, bio->bi_sector, bio->bi_sector - p->start_sect); } @@ -1441,10 +1436,10 @@ end_io: goto end_io; if (old_sector != -1) - blk_add_trace_remap(q, bio, old_dev, bio->bi_sector, + trace_block_remap(q, bio, old_dev, bio->bi_sector, old_sector); - blk_add_trace_bio(q, bio, BLK_TA_QUEUE); + trace_block_bio_queue(q, bio); old_sector = bio->bi_sector; old_dev = bio->bi_bdev->bd_dev; @@ -1656,7 +1651,7 @@ static int __end_that_request_first(struct request *req, int error, int total_bytes, bio_nbytes, next_idx = 0; struct bio *bio; - blk_add_trace_rq(req->q, req, BLK_TA_COMPLETE); + trace_block_rq_complete(req->q, req); /* * for a REQ_TYPE_BLOCK_PC request, we want to carry any eventual diff --git a/block/blktrace.c b/block/blktrace.c index 85049a7e7a1..b0a2cae886d 100644 --- a/block/blktrace.c +++ b/block/blktrace.c @@ -23,10 +23,18 @@ #include #include #include +#include #include static unsigned int blktrace_seq __read_mostly = 1; +/* Global reference count of probes */ +static DEFINE_MUTEX(blk_probe_mutex); +static atomic_t blk_probes_ref = ATOMIC_INIT(0); + +static int blk_register_tracepoints(void); +static void blk_unregister_tracepoints(void); + /* * Send out a notify message. */ @@ -119,7 +127,7 @@ static u32 ddir_act[2] __read_mostly = { BLK_TC_ACT(BLK_TC_READ), BLK_TC_ACT(BLK * The worker for the various blk_add_trace*() types. Fills out a * blk_io_trace structure and places it in a per-cpu subbuffer. */ -void __blk_add_trace(struct blk_trace *bt, sector_t sector, int bytes, +static void __blk_add_trace(struct blk_trace *bt, sector_t sector, int bytes, int rw, u32 what, int error, int pdu_len, void *pdu_data) { struct task_struct *tsk = current; @@ -177,8 +185,6 @@ void __blk_add_trace(struct blk_trace *bt, sector_t sector, int bytes, local_irq_restore(flags); } -EXPORT_SYMBOL_GPL(__blk_add_trace); - static struct dentry *blk_tree_root; static DEFINE_MUTEX(blk_tree_mutex); static unsigned int root_users; @@ -237,6 +243,10 @@ static void blk_trace_cleanup(struct blk_trace *bt) free_percpu(bt->sequence); free_percpu(bt->msg_data); kfree(bt); + mutex_lock(&blk_probe_mutex); + if (atomic_dec_and_test(&blk_probes_ref)) + blk_unregister_tracepoints(); + mutex_unlock(&blk_probe_mutex); } int blk_trace_remove(struct request_queue *q) @@ -428,6 +438,14 @@ int do_blk_trace_setup(struct request_queue *q, char *name, dev_t dev, bt->pid = buts->pid; bt->trace_state = Blktrace_setup; + mutex_lock(&blk_probe_mutex); + if (atomic_add_return(1, &blk_probes_ref) == 1) { + ret = blk_register_tracepoints(); + if (ret) + goto probe_err; + } + mutex_unlock(&blk_probe_mutex); + ret = -EBUSY; old_bt = xchg(&q->blk_trace, bt); if (old_bt) { @@ -436,6 +454,9 @@ int do_blk_trace_setup(struct request_queue *q, char *name, dev_t dev, } return 0; +probe_err: + atomic_dec(&blk_probes_ref); + mutex_unlock(&blk_probe_mutex); err: if (dir) blk_remove_tree(dir); @@ -562,3 +583,308 @@ void blk_trace_shutdown(struct request_queue *q) blk_trace_remove(q); } } + +/* + * blktrace probes + */ + +/** + * blk_add_trace_rq - Add a trace for a request oriented action + * @q: queue the io is for + * @rq: the source request + * @what: the action + * + * Description: + * Records an action against a request. Will log the bio offset + size. + * + **/ +static void blk_add_trace_rq(struct request_queue *q, struct request *rq, + u32 what) +{ + struct blk_trace *bt = q->blk_trace; + int rw = rq->cmd_flags & 0x03; + + if (likely(!bt)) + return; + + if (blk_discard_rq(rq)) + rw |= (1 << BIO_RW_DISCARD); + + if (blk_pc_request(rq)) { + what |= BLK_TC_ACT(BLK_TC_PC); + __blk_add_trace(bt, 0, rq->data_len, rw, what, rq->errors, + sizeof(rq->cmd), rq->cmd); + } else { + what |= BLK_TC_ACT(BLK_TC_FS); + __blk_add_trace(bt, rq->hard_sector, rq->hard_nr_sectors << 9, + rw, what, rq->errors, 0, NULL); + } +} + +static void blk_add_trace_rq_abort(struct request_queue *q, struct request *rq) +{ + blk_add_trace_rq(q, rq, BLK_TA_ABORT); +} + +static void blk_add_trace_rq_insert(struct request_queue *q, struct request *rq) +{ + blk_add_trace_rq(q, rq, BLK_TA_INSERT); +} + +static void blk_add_trace_rq_issue(struct request_queue *q, struct request *rq) +{ + blk_add_trace_rq(q, rq, BLK_TA_ISSUE); +} + +static void blk_add_trace_rq_requeue(struct request_queue *q, struct request *rq) +{ + blk_add_trace_rq(q, rq, BLK_TA_REQUEUE); +} + +static void blk_add_trace_rq_complete(struct request_queue *q, struct request *rq) +{ + blk_add_trace_rq(q, rq, BLK_TA_COMPLETE); +} + +/** + * blk_add_trace_bio - Add a trace for a bio oriented action + * @q: queue the io is for + * @bio: the source bio + * @what: the action + * + * Description: + * Records an action against a bio. Will log the bio offset + size. + * + **/ +static void blk_add_trace_bio(struct request_queue *q, struct bio *bio, + u32 what) +{ + struct blk_trace *bt = q->blk_trace; + + if (likely(!bt)) + return; + + __blk_add_trace(bt, bio->bi_sector, bio->bi_size, bio->bi_rw, what, + !bio_flagged(bio, BIO_UPTODATE), 0, NULL); +} + +static void blk_add_trace_bio_bounce(struct request_queue *q, struct bio *bio) +{ + blk_add_trace_bio(q, bio, BLK_TA_BOUNCE); +} + +static void blk_add_trace_bio_complete(struct request_queue *q, struct bio *bio) +{ + blk_add_trace_bio(q, bio, BLK_TA_COMPLETE); +} + +static void blk_add_trace_bio_backmerge(struct request_queue *q, struct bio *bio) +{ + blk_add_trace_bio(q, bio, BLK_TA_BACKMERGE); +} + +static void blk_add_trace_bio_frontmerge(struct request_queue *q, struct bio *bio) +{ + blk_add_trace_bio(q, bio, BLK_TA_FRONTMERGE); +} + +static void blk_add_trace_bio_queue(struct request_queue *q, struct bio *bio) +{ + blk_add_trace_bio(q, bio, BLK_TA_QUEUE); +} + +static void blk_add_trace_getrq(struct request_queue *q, struct bio *bio, int rw) +{ + if (bio) + blk_add_trace_bio(q, bio, BLK_TA_GETRQ); + else { + struct blk_trace *bt = q->blk_trace; + + if (bt) + __blk_add_trace(bt, 0, 0, rw, BLK_TA_GETRQ, 0, 0, NULL); + } +} + + +static void blk_add_trace_sleeprq(struct request_queue *q, struct bio *bio, int rw) +{ + if (bio) + blk_add_trace_bio(q, bio, BLK_TA_SLEEPRQ); + else { + struct blk_trace *bt = q->blk_trace; + + if (bt) + __blk_add_trace(bt, 0, 0, rw, BLK_TA_SLEEPRQ, 0, 0, NULL); + } +} + +static void blk_add_trace_plug(struct request_queue *q) +{ + struct blk_trace *bt = q->blk_trace; + + if (bt) + __blk_add_trace(bt, 0, 0, 0, BLK_TA_PLUG, 0, 0, NULL); +} + +static void blk_add_trace_unplug_io(struct request_queue *q) +{ + struct blk_trace *bt = q->blk_trace; + + if (bt) { + unsigned int pdu = q->rq.count[READ] + q->rq.count[WRITE]; + __be64 rpdu = cpu_to_be64(pdu); + + __blk_add_trace(bt, 0, 0, 0, BLK_TA_UNPLUG_IO, 0, + sizeof(rpdu), &rpdu); + } +} + +static void blk_add_trace_unplug_timer(struct request_queue *q) +{ + struct blk_trace *bt = q->blk_trace; + + if (bt) { + unsigned int pdu = q->rq.count[READ] + q->rq.count[WRITE]; + __be64 rpdu = cpu_to_be64(pdu); + + __blk_add_trace(bt, 0, 0, 0, BLK_TA_UNPLUG_TIMER, 0, + sizeof(rpdu), &rpdu); + } +} + +static void blk_add_trace_split(struct request_queue *q, struct bio *bio, + unsigned int pdu) +{ + struct blk_trace *bt = q->blk_trace; + + if (bt) { + __be64 rpdu = cpu_to_be64(pdu); + + __blk_add_trace(bt, bio->bi_sector, bio->bi_size, bio->bi_rw, + BLK_TA_SPLIT, !bio_flagged(bio, BIO_UPTODATE), + sizeof(rpdu), &rpdu); + } +} + +/** + * blk_add_trace_remap - Add a trace for a remap operation + * @q: queue the io is for + * @bio: the source bio + * @dev: target device + * @from: source sector + * @to: target sector + * + * Description: + * Device mapper or raid target sometimes need to split a bio because + * it spans a stripe (or similar). Add a trace for that action. + * + **/ +static void blk_add_trace_remap(struct request_queue *q, struct bio *bio, + dev_t dev, sector_t from, sector_t to) +{ + struct blk_trace *bt = q->blk_trace; + struct blk_io_trace_remap r; + + if (likely(!bt)) + return; + + r.device = cpu_to_be32(dev); + r.device_from = cpu_to_be32(bio->bi_bdev->bd_dev); + r.sector = cpu_to_be64(to); + + __blk_add_trace(bt, from, bio->bi_size, bio->bi_rw, BLK_TA_REMAP, + !bio_flagged(bio, BIO_UPTODATE), sizeof(r), &r); +} + +/** + * blk_add_driver_data - Add binary message with driver-specific data + * @q: queue the io is for + * @rq: io request + * @data: driver-specific data + * @len: length of driver-specific data + * + * Description: + * Some drivers might want to write driver-specific data per request. + * + **/ +void blk_add_driver_data(struct request_queue *q, + struct request *rq, + void *data, size_t len) +{ + struct blk_trace *bt = q->blk_trace; + + if (likely(!bt)) + return; + + if (blk_pc_request(rq)) + __blk_add_trace(bt, 0, rq->data_len, 0, BLK_TA_DRV_DATA, + rq->errors, len, data); + else + __blk_add_trace(bt, rq->hard_sector, rq->hard_nr_sectors << 9, + 0, BLK_TA_DRV_DATA, rq->errors, len, data); +} +EXPORT_SYMBOL_GPL(blk_add_driver_data); + +static int blk_register_tracepoints(void) +{ + int ret; + + ret = register_trace_block_rq_abort(blk_add_trace_rq_abort); + WARN_ON(ret); + ret = register_trace_block_rq_insert(blk_add_trace_rq_insert); + WARN_ON(ret); + ret = register_trace_block_rq_issue(blk_add_trace_rq_issue); + WARN_ON(ret); + ret = register_trace_block_rq_requeue(blk_add_trace_rq_requeue); + WARN_ON(ret); + ret = register_trace_block_rq_complete(blk_add_trace_rq_complete); + WARN_ON(ret); + ret = register_trace_block_bio_bounce(blk_add_trace_bio_bounce); + WARN_ON(ret); + ret = register_trace_block_bio_complete(blk_add_trace_bio_complete); + WARN_ON(ret); + ret = register_trace_block_bio_backmerge(blk_add_trace_bio_backmerge); + WARN_ON(ret); + ret = register_trace_block_bio_frontmerge(blk_add_trace_bio_frontmerge); + WARN_ON(ret); + ret = register_trace_block_bio_queue(blk_add_trace_bio_queue); + WARN_ON(ret); + ret = register_trace_block_getrq(blk_add_trace_getrq); + WARN_ON(ret); + ret = register_trace_block_sleeprq(blk_add_trace_sleeprq); + WARN_ON(ret); + ret = register_trace_block_plug(blk_add_trace_plug); + WARN_ON(ret); + ret = register_trace_block_unplug_timer(blk_add_trace_unplug_timer); + WARN_ON(ret); + ret = register_trace_block_unplug_io(blk_add_trace_unplug_io); + WARN_ON(ret); + ret = register_trace_block_split(blk_add_trace_split); + WARN_ON(ret); + ret = register_trace_block_remap(blk_add_trace_remap); + WARN_ON(ret); + return 0; +} + +static void blk_unregister_tracepoints(void) +{ + unregister_trace_block_remap(blk_add_trace_remap); + unregister_trace_block_split(blk_add_trace_split); + unregister_trace_block_unplug_io(blk_add_trace_unplug_io); + unregister_trace_block_unplug_timer(blk_add_trace_unplug_timer); + unregister_trace_block_plug(blk_add_trace_plug); + unregister_trace_block_sleeprq(blk_add_trace_sleeprq); + unregister_trace_block_getrq(blk_add_trace_getrq); + unregister_trace_block_bio_queue(blk_add_trace_bio_queue); + unregister_trace_block_bio_frontmerge(blk_add_trace_bio_frontmerge); + unregister_trace_block_bio_backmerge(blk_add_trace_bio_backmerge); + unregister_trace_block_bio_complete(blk_add_trace_bio_complete); + unregister_trace_block_bio_bounce(blk_add_trace_bio_bounce); + unregister_trace_block_rq_complete(blk_add_trace_rq_complete); + unregister_trace_block_rq_requeue(blk_add_trace_rq_requeue); + unregister_trace_block_rq_issue(blk_add_trace_rq_issue); + unregister_trace_block_rq_insert(blk_add_trace_rq_insert); + unregister_trace_block_rq_abort(blk_add_trace_rq_abort); + + tracepoint_synchronize_unregister(); +} diff --git a/block/elevator.c b/block/elevator.c index 9ac82dde99d..530fcfe2ef0 100644 --- a/block/elevator.c +++ b/block/elevator.c @@ -33,6 +33,7 @@ #include #include #include +#include #include #include @@ -586,7 +587,7 @@ void elv_insert(struct request_queue *q, struct request *rq, int where) unsigned ordseq; int unplug_it = 1; - blk_add_trace_rq(q, rq, BLK_TA_INSERT); + trace_block_rq_insert(q, rq); rq->q = q; @@ -772,7 +773,7 @@ struct request *elv_next_request(struct request_queue *q) * not be passed by new incoming requests */ rq->cmd_flags |= REQ_STARTED; - blk_add_trace_rq(q, rq, BLK_TA_ISSUE); + trace_block_rq_issue(q, rq); } if (!q->boundary_rq || q->boundary_rq == rq) { @@ -921,7 +922,7 @@ void elv_abort_queue(struct request_queue *q) while (!list_empty(&q->queue_head)) { rq = list_entry_rq(q->queue_head.next); rq->cmd_flags |= REQ_QUIET; - blk_add_trace_rq(q, rq, BLK_TA_ABORT); + trace_block_rq_abort(q, rq); __blk_end_request(rq, -EIO, blk_rq_bytes(rq)); } } -- cgit v1.2.3 From 0bfc24559d7945506184d86739fe365a181f06b7 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Wed, 26 Nov 2008 11:59:56 +0100 Subject: blktrace: port to tracepoints, update Port to the new tracepoints API: split DEFINE_TRACE() and DECLARE_TRACE() sites. Spread them out to the usage sites, as suggested by Mathieu Desnoyers. Signed-off-by: Ingo Molnar Acked-by: Mathieu Desnoyers --- block/blk-core.c | 13 +++++++++++++ block/elevator.c | 5 +++++ 2 files changed, 18 insertions(+) (limited to 'block') diff --git a/block/blk-core.c b/block/blk-core.c index 04267d66a2b..0c06cf5aaaf 100644 --- a/block/blk-core.c +++ b/block/blk-core.c @@ -32,6 +32,19 @@ #include "blk.h" +DEFINE_TRACE(block_plug); +DEFINE_TRACE(block_unplug_io); +DEFINE_TRACE(block_unplug_timer); +DEFINE_TRACE(block_getrq); +DEFINE_TRACE(block_sleeprq); +DEFINE_TRACE(block_rq_requeue); +DEFINE_TRACE(block_bio_backmerge); +DEFINE_TRACE(block_bio_frontmerge); +DEFINE_TRACE(block_bio_queue); +DEFINE_TRACE(block_rq_complete); +DEFINE_TRACE(block_remap); /* Also used in drivers/md/dm.c */ +EXPORT_TRACEPOINT_SYMBOL_GPL(block_remap); + static int __make_request(struct request_queue *q, struct bio *bio); /* diff --git a/block/elevator.c b/block/elevator.c index 530fcfe2ef0..e5677fe4f41 100644 --- a/block/elevator.c +++ b/block/elevator.c @@ -42,6 +42,8 @@ static DEFINE_SPINLOCK(elv_list_lock); static LIST_HEAD(elv_list); +DEFINE_TRACE(block_rq_abort); + /* * Merge hash stuff. */ @@ -53,6 +55,9 @@ static const int elv_hash_shift = 6; #define rq_hash_key(rq) ((rq)->sector + (rq)->nr_sectors) #define ELV_ON_HASH(rq) (!hlist_unhashed(&(rq)->hash)) +DEFINE_TRACE(block_rq_insert); +DEFINE_TRACE(block_rq_issue); + /* * Query io scheduler to see if the current process issuing bio may be * merged with rq. -- cgit v1.2.3