Sophie

Sophie

distrib > Scientific%20Linux > 5x > x86_64 > by-pkgid > fc11cd6e1c513a17304da94a5390f3cd > files > 3754

kernel-2.6.18-194.11.1.el5.src.rpm

From: Arnaldo Carvalho de Melo <acme@redhat.com>
Date: Sat, 9 May 2009 03:26:01 -0300
Subject: [trace] blk tracepoints
Message-id: 20090509062601.GB4103@ghostprotocols.net
O-Subject: [RHEL 5-U4 Patch V9] bz493454 blk tracepoints
Bugzilla: 493454
RH-Acked-by: Jeff Moyer <jmoyer@redhat.com>
RH-Acked-by: Jason Baron <jbaron@redhat.com>
RH-Acked-by: Larry Woodman <lwoodman@redhat.com>
RH-Acked-by: Dave Anderson <anderson@redhat.com>

commit 5a6a5c5775e6d1edbd6cb757bf5440652b33b9c8
Author: Arnaldo de Melo <acme@hs20-bc2-1.build.redhat.com>
Date:   Sat May 9 02:00:28 2009 -0400

    Backport the blktrace tracepoints conversion

    Upstream cset: 5f3ea37c7716db4e894a480e0c18b24399595b6b

    Had to be modified to keep __blk_add_trace exported as it is in our kABI
    whitelist, the blk_add_trace_{rq,bio,generic,pdu_int} functions were uninlined
    because they are redirected to multiple tracepoints, blk_add_trace_remap
    remains inlined because it redirects to a single tracepoint.

    This way 3rd party drivers will continue being able to use __blk_add_trace, and
    thus the userspace blktrace utility can continue being used with them and as
    soon as these drivers are recompiled with the new codebase they will also be
    usable with the tracepoint probes in systemtap.

    Also has a fix for the remap information in DM to be in the right order, found
    in c7149d6bce2561aeaa48caaa1700aa8b3b22008f.

    It also requires that these patches be applied first:

    linux-2.6-s390-add-fcp-performance-data-collection.patch
    linux-2.6-s390-blktrace-add-ioctls-to-scsi-generic-devices.patch
    linux-2.6-md-dm-fix-oops-in-mempool_free-when-device-removed.patch
    linux-2.6-md-dm-raid45-corrupt-data-and-premature-end-of-synch.patch

    As they add infrastructure that is covered by these patches (BLK_TA_DRV_DATA
    mainly) or change blk_add_trace_ calls.

    Additionally the blk_register_tracepoints now unwinds successfull tracepoint
    registrations if one of the fails, returning the error value blk_trace_setup,
    that bails out when not all tracepoints are successfully registered as
    suggested by Jason Baron.

    Acked-by: Jeff Moyer <jmoyer@redhat.com>
    Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>

diff --git a/block/blktrace.c b/block/blktrace.c
index a2cc2a8..6f190d2 100644
--- a/block/blktrace.c
+++ b/block/blktrace.c
@@ -22,11 +22,19 @@
 #include <linux/init.h>
 #include <linux/mutex.h>
 #include <linux/debugfs.h>
+#include <trace/block.h>
 #include <asm/uaccess.h>
 
 static DEFINE_PER_CPU(unsigned long long, blk_trace_cpu_offset) = { 0, };
 static unsigned int blktrace_seq __read_mostly = 1;
 
+/* Global reference count of probes */
+static DEFINE_MUTEX(blk_probe_mutex);
+static atomic_t blk_probes_ref = ATOMIC_INIT(0);
+
+static int blk_register_tracepoints(void);
+static void blk_unregister_tracepoints(void);
+
 /*
  * Send out a notify for this process, if we haven't done so since a trace
  * started
@@ -198,6 +206,10 @@ static void blk_trace_cleanup(struct blk_trace *bt)
 	blk_remove_tree(bt->dir);
 	free_percpu(bt->sequence);
 	kfree(bt);
+	mutex_lock(&blk_probe_mutex);
+	if (atomic_dec_and_test(&blk_probes_ref))
+		blk_unregister_tracepoints();
+	mutex_unlock(&blk_probe_mutex);
 }
 
 int blk_trace_remove(request_queue_t *q)
@@ -349,14 +361,28 @@ int blk_trace_setup(request_queue_t *q, char *name, dev_t dev,
 	bt->pid = buts.pid;
 	bt->trace_state = Blktrace_setup;
 
+	ret = 0;
+	mutex_lock(&blk_probe_mutex);
+	if (atomic_add_return(1, &blk_probes_ref) == 1)
+		ret = blk_register_tracepoints();
+	mutex_unlock(&blk_probe_mutex);
+	if (ret != 0)
+		goto err;
+
 	ret = -EBUSY;
 	old_bt = xchg(&q->blk_trace, bt);
 	if (old_bt) {
 		(void) xchg(&q->blk_trace, old_bt);
-		goto err;
+		goto err_unregister_tracepoints;
 	}
 
 	return 0;
+
+err_unregister_tracepoints:
+	mutex_lock(&blk_probe_mutex);
+	if (atomic_dec_and_test(&blk_probes_ref))
+		blk_unregister_tracepoints();
+	mutex_unlock(&blk_probe_mutex);
 err:
 	if (dir)
 		blk_remove_tree(dir);
@@ -542,3 +568,449 @@ static __init int blk_trace_init(void)
 
 module_init(blk_trace_init);
 
+/*
+ * blktrace probes
+ */
+
+/**
+ * __blk_add_trace_rq - Add a trace for a request oriented action
+ * @q:		queue the io is for
+ * @rq:		the source request
+ * @what:	the action
+ *
+ * Description:
+ *     Records an action against a request. Will log the bio offset + size.
+ *
+ **/
+static void __blk_add_trace_rq(struct request_queue *q, struct request *rq,
+			       u32 what)
+{
+	struct blk_trace *bt = q->blk_trace;
+	int rw = rq->flags & 0x03;
+
+	if (likely(!bt))
+		return;
+
+	if (blk_pc_request(rq)) {
+		what |= BLK_TC_ACT(BLK_TC_PC);
+		__blk_add_trace(bt, 0, rq->data_len, rw, what, rq->errors,
+				sizeof(rq->cmd), rq->cmd);
+	} else  {
+		what |= BLK_TC_ACT(BLK_TC_FS);
+		__blk_add_trace(bt, rq->hard_sector, rq->hard_nr_sectors << 9,
+				rw, what, rq->errors, 0, NULL);
+	}
+}
+
+static void blk_add_trace_rq_insert(struct request_queue *q, struct request *rq)
+{
+	__blk_add_trace_rq(q, rq, BLK_TA_INSERT);
+}
+
+static void blk_add_trace_rq_issue(struct request_queue *q, struct request *rq)
+{
+	__blk_add_trace_rq(q, rq, BLK_TA_ISSUE);
+}
+
+static void blk_add_trace_rq_requeue(struct request_queue *q, struct request *rq)
+{
+	__blk_add_trace_rq(q, rq, BLK_TA_REQUEUE);
+}
+
+static void blk_add_trace_rq_complete(struct request_queue *q, struct request *rq)
+{
+	__blk_add_trace_rq(q, rq, BLK_TA_COMPLETE);
+}
+
+#define WARN_MISSING_TRACEPOINT(what) do { \
+	static char warnings_about_missing_tracepoints; \
+	if (warnings_about_missing_tracepoints < 10) { \
+		printk(KERN_WARNING "%s: no tracepoint for %d", \
+		       __func__, what); \
+	       ++warnings_about_missing_tracepoints; \
+	} \
+} while (0);
+
+void blk_add_trace_rq(struct request_queue *q, struct request *rq, u32 what)
+{
+	/*
+	 * We redirect here to the upstream tracepoints so that we can
+	 * catch these events while keeping the blk_add_trace_rq
+	 * function for 3rd party drivers. Warn about newer actions
+	 * but push them into the relay channel for the benefit of 
+	 * blktrace(8) users.
+	 */
+	switch (what) {
+	case BLK_TA_INSERT:	trace_block_rq_insert(q, rq);	break;
+	case BLK_TA_ISSUE:	trace_block_rq_issue(q, rq);	break;
+	case BLK_TA_REQUEUE:	trace_block_rq_requeue(q, rq);	break;
+	case BLK_TA_COMPLETE:	trace_block_rq_complete(q, rq);	break;
+	default: {
+		struct blk_trace *bt = q->blk_trace;
+
+		WARN_MISSING_TRACEPOINT(what);
+
+		if (likely(!bt))
+			return;
+
+		__blk_add_trace_rq(q, rq, what);
+	}
+		break;
+	}
+}
+
+EXPORT_SYMBOL_GPL(blk_add_trace_rq);
+
+/**
+ * __blk_add_trace_bio - Add a trace for a bio oriented action
+ * @q:		queue the io is for
+ * @bio:	the source bio
+ * @what:	the action
+ *
+ * Description:
+ *     Records an action against a bio. Will log the bio offset + size.
+ *
+ **/
+static void __blk_add_trace_bio(struct request_queue *q, struct bio *bio, u32 what)
+{
+	struct blk_trace *bt = q->blk_trace;
+
+	if (likely(!bt))
+		return;
+
+	__blk_add_trace(bt, bio->bi_sector, bio->bi_size, bio->bi_rw, what,
+			!bio_flagged(bio, BIO_UPTODATE), 0, NULL);
+}
+
+static void blk_add_trace_bio_bounce(struct request_queue *q, struct bio *bio)
+{
+	__blk_add_trace_bio(q, bio, BLK_TA_BOUNCE);
+}
+
+static void blk_add_trace_bio_complete(struct request_queue *q, struct bio *bio)
+{
+	__blk_add_trace_bio(q, bio, BLK_TA_COMPLETE);
+}
+
+static void blk_add_trace_bio_backmerge(struct request_queue *q, struct bio *bio)
+{
+	__blk_add_trace_bio(q, bio, BLK_TA_BACKMERGE);
+}
+
+static void blk_add_trace_bio_frontmerge(struct request_queue *q, struct bio *bio)
+{
+	__blk_add_trace_bio(q, bio, BLK_TA_FRONTMERGE);
+}
+
+static void blk_add_trace_bio_queue(struct request_queue *q, struct bio *bio)
+{
+	__blk_add_trace_bio(q, bio, BLK_TA_QUEUE);
+}
+
+static void blk_add_trace_getrq(struct request_queue *q, struct bio *bio, int rw)
+{
+	if (bio)
+		__blk_add_trace_bio(q, bio, BLK_TA_GETRQ);
+	else {
+		struct blk_trace *bt = q->blk_trace;
+
+		if (bt)
+			__blk_add_trace(bt, 0, 0, rw, BLK_TA_GETRQ, 0, 0, NULL);
+	}
+}
+
+
+static void blk_add_trace_sleeprq(struct request_queue *q, struct bio *bio, int rw)
+{
+	if (bio)
+		__blk_add_trace_bio(q, bio, BLK_TA_SLEEPRQ);
+	else {
+		struct blk_trace *bt = q->blk_trace;
+
+		if (bt)
+			__blk_add_trace(bt, 0, 0, rw, BLK_TA_SLEEPRQ, 0, 0, NULL);
+	}
+}
+
+static void blk_add_trace_plug(struct request_queue *q)
+{
+	struct blk_trace *bt = q->blk_trace;
+
+	if (bt)
+		__blk_add_trace(bt, 0, 0, 0, BLK_TA_PLUG, 0, 0, NULL);
+}
+
+static void blk_add_trace_unplug_io(struct request_queue *q)
+{
+	struct blk_trace *bt = q->blk_trace;
+
+	if (bt) {
+		unsigned int pdu = q->rq.count[READ] + q->rq.count[WRITE];
+		__be64 rpdu = cpu_to_be64(pdu);
+
+		__blk_add_trace(bt, 0, 0, 0, BLK_TA_UNPLUG_IO, 0,
+				sizeof(rpdu), &rpdu);
+	}
+}
+
+static void blk_add_trace_unplug_timer(struct request_queue *q)
+{
+	struct blk_trace *bt = q->blk_trace;
+
+	if (bt) {
+		unsigned int pdu = q->rq.count[READ] + q->rq.count[WRITE];
+		__be64 rpdu = cpu_to_be64(pdu);
+
+		__blk_add_trace(bt, 0, 0, 0, BLK_TA_UNPLUG_TIMER, 0,
+				sizeof(rpdu), &rpdu);
+	}
+}
+
+static void blk_add_trace_split(struct request_queue *q, struct bio *bio,
+				unsigned int pdu)
+{
+	struct blk_trace *bt = q->blk_trace;
+
+	if (bt) {
+		__be64 rpdu = cpu_to_be64(pdu);
+
+		__blk_add_trace(bt, bio->bi_sector, bio->bi_size, bio->bi_rw,
+				BLK_TA_SPLIT, !bio_flagged(bio, BIO_UPTODATE),
+				sizeof(rpdu), &rpdu);
+	}
+}
+
+/**
+ * __blk_add_trace_remap - Add a trace for a remap operation
+ * @q:		queue the io is for
+ * @bio:	the source bio
+ * @dev:	target device
+ * @from:	source sector
+ * @to:		target sector
+ *
+ * Description:
+ *     Device mapper or raid target sometimes need to split a bio because
+ *     it spans a stripe (or similar). Add a trace for that action.
+ *
+ **/
+static void __blk_add_trace_remap(struct request_queue *q, struct bio *bio,
+				  dev_t dev, sector_t from, sector_t to)
+{
+	struct blk_trace *bt = q->blk_trace;
+	struct blk_io_trace_remap r;
+
+	if (likely(!bt))
+		return;
+
+	r.device = cpu_to_be32(dev);
+	r.sector = cpu_to_be64(to);
+
+	__blk_add_trace(bt, from, bio->bi_size, bio->bi_rw, BLK_TA_REMAP,
+			!bio_flagged(bio, BIO_UPTODATE), sizeof(r), &r);
+}
+
+void blk_add_trace_bio(struct request_queue *q, struct bio *bio, u32 what)
+{
+	/*
+	 * We redirect here to the upstream tracepoints so that we can
+	 * catch these events while keeping the blk_add_trace_bio
+	 * function for 3rd party drivers. Warn about newer actions
+	 * but push them into the relay channel for the benefit of 
+	 * blktrace(8) users.
+	 */
+	switch (what) {
+	case BLK_TA_BACKMERGE:	trace_block_bio_backmerge(q, bio);  break;
+	case BLK_TA_FRONTMERGE:	trace_block_bio_frontmerge(q, bio); break;
+	case BLK_TA_QUEUE:	trace_block_bio_queue(q, bio);	    break;
+	case BLK_TA_COMPLETE:	trace_block_bio_complete(q, bio);   break;
+	case BLK_TA_BOUNCE:	trace_block_bio_bounce(q, bio);	    break;
+	default: {
+		struct blk_trace *bt = q->blk_trace;
+
+		WARN_MISSING_TRACEPOINT(what);
+
+		if (likely(!bt))
+			return;
+
+		__blk_add_trace_bio(q, bio, what);
+	}
+		break;
+	}
+}
+
+EXPORT_SYMBOL_GPL(blk_add_trace_bio);
+
+/**
+ * blk_add_trace_generic - Add a trace for a generic action
+ * @q:		queue the io is for
+ * @bio:	the source bio
+ * @rw:		the data direction
+ * @what:	the action
+ *
+ * Description:
+ *     Records a simple trace
+ *
+ **/
+void blk_add_trace_generic(struct request_queue *q, struct bio *bio,
+			   int rw, u32 what)
+{
+	/*
+	 * We redirect here to the upstream tracepoints so that we can
+	 * catch these events while keeping the blk_add_trace_generic
+	 * function for 3rd party drivers. Warn about newer actions
+	 * but push them into the relay channel for the benefit of 
+	 * blktrace(8) users.
+	 */
+	switch (what) {
+	case BLK_TA_PLUG:    trace_block_plug(q);	      break;
+	case BLK_TA_GETRQ:   trace_block_getrq(q, bio, rw);   break;
+	case BLK_TA_SLEEPRQ: trace_block_sleeprq(q, bio, rw); break;
+	default: {
+		struct blk_trace *bt = q->blk_trace;
+
+		WARN_MISSING_TRACEPOINT(what);
+
+		if (likely(!bt))
+			return;
+
+		if (bio)
+			__blk_add_trace_bio(q, bio, what);
+		else
+			__blk_add_trace(bt, 0, 0, rw, what, 0, 0, NULL);
+	}
+		break;
+	}
+}
+
+EXPORT_SYMBOL_GPL(blk_add_trace_generic);
+
+/**
+ * blk_add_trace_pdu_int - Add a trace for a bio with an integer payload
+ * @q:		queue the io is for
+ * @what:	the action
+ * @bio:	the source bio
+ * @pdu:	the integer payload
+ *
+ * Description:
+ *     Adds a trace with some integer payload. This might be an unplug
+ *     option given as the action, with the depth at unplug time given
+ *     as the payload
+ *
+ **/
+void blk_add_trace_pdu_int(struct request_queue *q, u32 what,
+			   struct bio *bio, unsigned int pdu)
+{
+	/*
+	 * We redirect here to the upstream tracepoints so that we can
+	 * catch these events while keeping the blk_add_trace_pdu_int
+	 * function for 3rd party drivers. Warn about newer actions
+	 * but push them into the relay channel for the benefit of 
+	 * blktrace(8) users.
+	 */
+	switch (what) {
+	case BLK_TA_UNPLUG_IO:	  trace_block_unplug_io(q);	  break;
+	case BLK_TA_UNPLUG_TIMER: trace_block_unplug_timer(q);	  break;
+	case BLK_TA_SPLIT:	  trace_block_split(q, bio, pdu); break;
+	default: {
+		struct blk_trace *bt = q->blk_trace;
+		__be64 rpdu = cpu_to_be64(pdu);
+
+		WARN_MISSING_TRACEPOINT(what);
+
+		if (likely(!bt))
+			return;
+
+		if (bio)
+			__blk_add_trace(bt, bio->bi_sector, bio->bi_size,
+					bio->bi_rw, what,
+					!bio_flagged(bio, BIO_UPTODATE),
+					sizeof(rpdu), &rpdu);
+		else
+			__blk_add_trace(bt, 0, 0, 0, what, 0, sizeof(rpdu),
+					&rpdu);
+	}
+		break;
+	}
+}
+
+EXPORT_SYMBOL_GPL(blk_add_trace_pdu_int);
+
+#define last_register_trace_block(tpoint)				\
+	ret = register_trace_block_##tpoint(blk_add_trace_##tpoint);	\
+	if (ret) {							\
+               pr_info("blktrace: register_trace_block_%s failed\n",	\
+			#tpoint);					\
+               goto *exit_point;					\
+	}
+
+#define register_trace_block(tpoint)					\
+	last_register_trace_block(tpoint) 				\
+	else exit_point = &&fail_unregister_probe_##tpoint;
+
+#define fail_trace_block(tpoint)					\
+	fail_unregister_probe_##tpoint:					\
+               unregister_trace_block_##tpoint(blk_add_trace_##tpoint)
+
+
+static int blk_register_tracepoints(void)
+{
+	int ret;
+	void *exit_point = &&error;
+
+	register_trace_block(bio_bounce);
+	register_trace_block(bio_complete);
+	register_trace_block(bio_backmerge);
+	register_trace_block(bio_frontmerge);
+	register_trace_block(bio_queue);
+	register_trace_block(rq_insert);
+	register_trace_block(rq_issue);
+	register_trace_block(rq_requeue);
+	register_trace_block(rq_complete);
+	register_trace_block(getrq);
+	register_trace_block(sleeprq);
+	register_trace_block(plug);
+	register_trace_block(unplug_timer);
+	register_trace_block(unplug_io);
+	register_trace_block(split);
+	last_register_trace_block(remap);
+
+	return 0;
+
+	fail_trace_block(split);
+	fail_trace_block(unplug_io);
+	fail_trace_block(unplug_timer);
+	fail_trace_block(plug);
+	fail_trace_block(sleeprq);
+	fail_trace_block(getrq);
+	fail_trace_block(rq_complete);
+	fail_trace_block(rq_requeue);
+	fail_trace_block(rq_issue);
+	fail_trace_block(rq_insert);
+	fail_trace_block(bio_queue);
+	fail_trace_block(bio_frontmerge);
+	fail_trace_block(bio_backmerge);
+	fail_trace_block(bio_complete);
+	fail_trace_block(bio_bounce);
+error:
+	return ret;
+}
+
+static void blk_unregister_tracepoints(void)
+{
+	unregister_trace_block_remap(__blk_add_trace_remap);
+	unregister_trace_block_split(blk_add_trace_split);
+	unregister_trace_block_unplug_io(blk_add_trace_unplug_io);
+	unregister_trace_block_unplug_timer(blk_add_trace_unplug_timer);
+	unregister_trace_block_plug(blk_add_trace_plug);
+	unregister_trace_block_sleeprq(blk_add_trace_sleeprq);
+	unregister_trace_block_getrq(blk_add_trace_getrq);
+	unregister_trace_block_bio_queue(blk_add_trace_bio_queue);
+	unregister_trace_block_bio_frontmerge(blk_add_trace_bio_frontmerge);
+	unregister_trace_block_bio_backmerge(blk_add_trace_bio_backmerge);
+	unregister_trace_block_bio_complete(blk_add_trace_bio_complete);
+	unregister_trace_block_bio_bounce(blk_add_trace_bio_bounce);
+	unregister_trace_block_rq_complete(blk_add_trace_rq_complete);
+	unregister_trace_block_rq_requeue(blk_add_trace_rq_requeue);
+	unregister_trace_block_rq_issue(blk_add_trace_rq_issue);
+	unregister_trace_block_rq_insert(blk_add_trace_rq_insert);
+}
diff --git a/block/elevator.c b/block/elevator.c
index 159af39..d56ceae 100644
--- a/block/elevator.c
+++ b/block/elevator.c
@@ -33,6 +33,7 @@
 #include <linux/compiler.h>
 #include <linux/delay.h>
 #include <linux/blktrace_api.h>
+#include <trace/block.h>
 
 #include <asm/uaccess.h>
 
@@ -338,7 +339,7 @@ void elv_insert(request_queue_t *q, struct request *rq, int where)
 	unsigned ordseq;
 	int unplug_it = 1;
 
-	blk_add_trace_rq(q, rq, BLK_TA_INSERT);
+	trace_block_rq_insert(q, rq);
 
 	rq->q = q;
 
@@ -512,7 +513,7 @@ struct request *elv_next_request(request_queue_t *q)
 			 * not be passed by new incoming requests
 			 */
 			rq->flags |= REQ_STARTED;
-			blk_add_trace_rq(q, rq, BLK_TA_ISSUE);
+			trace_block_rq_issue(q, rq);
 		}
 
 		if (!q->boundary_rq || q->boundary_rq == rq) {
diff --git a/block/ll_rw_blk.c b/block/ll_rw_blk.c
index 6abdc04..c9c227a 100644
--- a/block/ll_rw_blk.c
+++ b/block/ll_rw_blk.c
@@ -29,6 +29,7 @@
 #include <linux/interrupt.h>
 #include <linux/cpu.h>
 #include <linux/blktrace_api.h>
+#include <trace/block.h>
 
 /*
  * for max sense size
@@ -1663,7 +1664,7 @@ void blk_plug_device(request_queue_t *q)
 
 	if (!test_and_set_bit(QUEUE_FLAG_PLUGGED, &q->queue_flags)) {
 		mod_timer(&q->unplug_timer, jiffies + q->unplug_delay);
-		blk_add_trace_generic(q, NULL, 0, BLK_TA_PLUG);
+		trace_block_plug(q);
 	}
 }
 
@@ -1731,9 +1732,7 @@ static void blk_backing_dev_unplug(struct backing_dev_info *bdi,
 	 * devices don't necessarily have an ->unplug_fn defined
 	 */
 	if (q->unplug_fn) {
-		blk_add_trace_pdu_int(q, BLK_TA_UNPLUG_IO, NULL,
-					q->rq.count[READ] + q->rq.count[WRITE]);
-
+		trace_block_unplug_io(q);
 		q->unplug_fn(q);
 	}
 }
@@ -1742,9 +1741,7 @@ static void blk_unplug_work(void *data)
 {
 	request_queue_t *q = data;
 
-	blk_add_trace_pdu_int(q, BLK_TA_UNPLUG_IO, NULL,
-				q->rq.count[READ] + q->rq.count[WRITE]);
-
+	trace_block_unplug_io(q);
 	q->unplug_fn(q);
 }
 
@@ -1752,9 +1749,7 @@ static void blk_unplug_timeout(unsigned long data)
 {
 	request_queue_t *q = (request_queue_t *)data;
 
-	blk_add_trace_pdu_int(q, BLK_TA_UNPLUG_TIMER, NULL,
-				q->rq.count[READ] + q->rq.count[WRITE]);
-
+	trace_block_unplug_timer(q);
 	kblockd_schedule_work(&q->unplug_work);
 }
 
@@ -2272,7 +2267,7 @@ rq_starved:
 	rq_init(q, rq);
 	rq->rl = rl;
 
-	blk_add_trace_generic(q, bio, rw, BLK_TA_GETRQ);
+	trace_block_getrq(q, bio, rw);
 out:
 	return rq;
 }
@@ -2301,7 +2296,7 @@ static struct request *get_request_wait(request_queue_t *q, int rw,
 		if (!rq) {
 			struct io_context *ioc;
 
-			blk_add_trace_generic(q, bio, rw, BLK_TA_SLEEPRQ);
+			trace_block_sleeprq(q, bio, rw);
 
 			__generic_unplug_device(q);
 			spin_unlock_irq(q->queue_lock);
@@ -2356,7 +2351,7 @@ EXPORT_SYMBOL(blk_get_request);
  */
 void blk_requeue_request(request_queue_t *q, struct request *rq)
 {
-	blk_add_trace_rq(q, rq, BLK_TA_REQUEUE);
+	trace_block_rq_requeue(q, rq);
 
 	if (blk_rq_tagged(rq))
 		blk_queue_end_tag(q, rq);
@@ -3041,7 +3036,7 @@ static int __make_request(request_queue_t *q, struct bio *bio)
 			if (!q->back_merge_fn(q, req, bio))
 				break;
 
-			blk_add_trace_bio(q, bio, BLK_TA_BACKMERGE);
+			trace_block_bio_backmerge(q, bio);
 
 			req->biotail->bi_next = bio;
 			req->biotail = bio;
@@ -3058,7 +3053,7 @@ static int __make_request(request_queue_t *q, struct bio *bio)
 			if (!q->front_merge_fn(q, req, bio))
 				break;
 
-			blk_add_trace_bio(q, bio, BLK_TA_FRONTMERGE);
+			trace_block_bio_frontmerge(q, bio);
 
 			bio->bi_next = req->bio;
 			req->bio = bio;
@@ -3127,6 +3122,9 @@ static inline void blk_partition_remap(struct bio *bio)
 
 		bio->bi_sector += p->start_sect;
 		bio->bi_bdev = bdev->bd_contains;
+		trace_block_remap(bdev_get_queue(bdev), bio,
+				  bdev->bd_dev, bio->bi_sector,
+				  bio->bi_sector - p->start_sect);
 	}
 }
 
@@ -3236,10 +3234,10 @@ end_io:
 		blk_partition_remap(bio);
 
 		if (old_sector != -1)
-			blk_add_trace_remap(q, bio, old_dev, bio->bi_sector, 
+			trace_block_remap(q, bio, old_dev, bio->bi_sector,
 					    old_sector);
 
-		blk_add_trace_bio(q, bio, BLK_TA_QUEUE);
+		trace_block_bio_queue(q, bio);
 
 		old_sector = bio->bi_sector;
 		old_dev = bio->bi_bdev->bd_dev;
@@ -3417,7 +3415,7 @@ static int __end_that_request_first(struct request *req, int uptodate,
 	int total_bytes, bio_nbytes, error, next_idx = 0;
 	struct bio *bio;
 
-	blk_add_trace_rq(req->q, req, BLK_TA_COMPLETE);
+	trace_block_rq_complete(req->q, req);
 
 	/*
 	 * extend uptodate bool to allow < 0 value to be direct io error
diff --git a/drivers/block/cciss.c b/drivers/block/cciss.c
index 9b1a175..19aaf54 100644
--- a/drivers/block/cciss.c
+++ b/drivers/block/cciss.c
@@ -39,6 +39,7 @@
 #include <linux/spinlock.h>
 #include <linux/compat.h>
 #include <linux/blktrace_api.h>
+#include <trace/block.h>
 #include <asm/uaccess.h>
 #include <asm/io.h>
 
@@ -2828,7 +2829,7 @@ after_error_processing:
 
 	cmd->rq->data_len = 0;
 	cmd->rq->completion_data = cmd;
-	blk_add_trace_rq(cmd->rq->q, cmd->rq, BLK_TA_COMPLETE);
+	trace_block_rq_complete(cmd->rq->q, cmd->rq);
 	blk_complete_request(cmd->rq);
 }
 
diff --git a/drivers/md/dm-raid45.c b/drivers/md/dm-raid45.c
index c7cf642..bd32c7a 100644
--- a/drivers/md/dm-raid45.c
+++ b/drivers/md/dm-raid45.c
@@ -48,6 +48,7 @@ static const char *version = "v0.2594b";
 
 #include <linux/kernel.h>
 #include <linux/vmalloc.h>
+#include <trace/block.h>
 
 #include <linux/dm-io.h>
 #include <linux/dm-dirty-log.h>
@@ -3277,8 +3278,10 @@ static void do_unplug(struct raid_set *rs)
 			request_queue_t *q = bdev_get_queue(dev->dev->bdev);
 
 			if (likely(q)) {
-				if (q->unplug_fn)
+				if (q->unplug_fn) {
+					trace_block_unplug_io(q);
 					q->unplug_fn(q);
+				}
 			}
 		}
 	}
diff --git a/drivers/md/dm.c b/drivers/md/dm.c
index 94dc422..461e686 100644
--- a/drivers/md/dm.c
+++ b/drivers/md/dm.c
@@ -22,6 +22,7 @@
 #include <linux/hdreg.h>
 #include <linux/blktrace_api.h>
 #include <linux/smp_lock.h>
+#include <trace/block.h>
 
 #define DM_MSG_PREFIX "core"
 
@@ -508,9 +509,7 @@ static void dec_pending(struct dm_io *io, int error)
 		free_io(md, io);
 
 		if (io_error != DM_ENDIO_REQUEUE) {
-			blk_add_trace_bio(md->queue, bio,
-					  BLK_TA_COMPLETE);
-
+			trace_block_bio_complete(md->queue, bio);
 			bio_endio(bio, bio->bi_size, io_error);
 		}
 	}
@@ -604,9 +603,9 @@ static void __map_bio(struct dm_target *ti, struct bio *clone,
 	if (r == DM_MAPIO_REMAPPED) {
 		/* the bio has been remapped so dispatch it */
 
-		blk_add_trace_remap(bdev_get_queue(clone->bi_bdev), clone,
-				    tio->io->bio->bi_bdev->bd_dev, sector,
-				    clone->bi_sector);
+		trace_block_remap(bdev_get_queue(clone->bi_bdev), clone,
+				    tio->io->bio->bi_bdev->bd_dev,
+				    clone->bi_sector, sector);
 
 		generic_make_request(clone);
 	} else if (r < 0 || r == DM_MAPIO_REQUEUE) {
diff --git a/fs/bio.c b/fs/bio.c
index 6a0b9ad..f20ccac 100644
--- a/fs/bio.c
+++ b/fs/bio.c
@@ -27,6 +27,7 @@
 #include <linux/workqueue.h>
 #include <linux/blktrace_api.h>
 #include <scsi/sg.h>		/* for struct sg_iovec */
+#include <trace/block.h>
 
 #define BIO_POOL_SIZE 256
 
@@ -1095,7 +1096,7 @@ struct bio_pair *bio_split(struct bio *bi, mempool_t *pool, int first_sectors)
 	if (!bp)
 		return bp;
 
-	blk_add_trace_pdu_int(bdev_get_queue(bi->bi_bdev), BLK_TA_SPLIT, bi,
+	trace_block_split(bdev_get_queue(bi->bi_bdev), bi,
 				bi->bi_sector + first_sectors);
 
 	BUG_ON(bi->bi_vcnt != 1);
diff --git a/include/linux/blktrace_api.h b/include/linux/blktrace_api.h
index d419568..399c8cc 100644
--- a/include/linux/blktrace_api.h
+++ b/include/linux/blktrace_api.h
@@ -134,138 +134,23 @@ struct blk_user_trace_setup {
 };
 
 #if defined(CONFIG_BLK_DEV_IO_TRACE)
+#include <trace/block.h>
+
 extern int blk_trace_ioctl(struct block_device *, unsigned, char __user *);
 extern void blk_trace_shutdown(request_queue_t *);
 extern void __blk_add_trace(struct blk_trace *, sector_t, int, int, u32, int, int, void *);
-
-/**
- * blk_add_trace_rq - Add a trace for a request oriented action
- * @q:		queue the io is for
- * @rq:		the source request
- * @what:	the action
- *
- * Description:
- *     Records an action against a request. Will log the bio offset + size.
- *
- **/
-static inline void blk_add_trace_rq(struct request_queue *q, struct request *rq,
-				    u32 what)
-{
-	struct blk_trace *bt = q->blk_trace;
-	int rw = rq->flags & 0x03;
-
-	if (likely(!bt))
-		return;
-
-	if (blk_pc_request(rq)) {
-		what |= BLK_TC_ACT(BLK_TC_PC);
-		__blk_add_trace(bt, 0, rq->data_len, rw, what, rq->errors, sizeof(rq->cmd), rq->cmd);
-	} else  {
-		what |= BLK_TC_ACT(BLK_TC_FS);
-		__blk_add_trace(bt, rq->hard_sector, rq->hard_nr_sectors << 9, rw, what, rq->errors, 0, NULL);
-	}
-}
-
-/**
- * blk_add_trace_bio - Add a trace for a bio oriented action
- * @q:		queue the io is for
- * @bio:	the source bio
- * @what:	the action
- *
- * Description:
- *     Records an action against a bio. Will log the bio offset + size.
- *
- **/
-static inline void blk_add_trace_bio(struct request_queue *q, struct bio *bio,
-				     u32 what)
-{
-	struct blk_trace *bt = q->blk_trace;
-
-	if (likely(!bt))
-		return;
-
-	__blk_add_trace(bt, bio->bi_sector, bio->bi_size, bio->bi_rw, what, !bio_flagged(bio, BIO_UPTODATE), 0, NULL);
-}
-
-/**
- * blk_add_trace_generic - Add a trace for a generic action
- * @q:		queue the io is for
- * @bio:	the source bio
- * @rw:		the data direction
- * @what:	the action
- *
- * Description:
- *     Records a simple trace
- *
- **/
-static inline void blk_add_trace_generic(struct request_queue *q,
-					 struct bio *bio, int rw, u32 what)
-{
-	struct blk_trace *bt = q->blk_trace;
-
-	if (likely(!bt))
-		return;
-
-	if (bio)
-		blk_add_trace_bio(q, bio, what);
-	else
-		__blk_add_trace(bt, 0, 0, rw, what, 0, 0, NULL);
-}
-
-/**
- * blk_add_trace_pdu_int - Add a trace for a bio with an integer payload
- * @q:		queue the io is for
- * @what:	the action
- * @bio:	the source bio
- * @pdu:	the integer payload
- *
- * Description:
- *     Adds a trace with some integer payload. This might be an unplug
- *     option given as the action, with the depth at unplug time given
- *     as the payload
- *
- **/
-static inline void blk_add_trace_pdu_int(struct request_queue *q, u32 what,
-					 struct bio *bio, unsigned int pdu)
-{
-	struct blk_trace *bt = q->blk_trace;
-	__be64 rpdu = cpu_to_be64(pdu);
-
-	if (likely(!bt))
-		return;
-
-	if (bio)
-		__blk_add_trace(bt, bio->bi_sector, bio->bi_size, bio->bi_rw, what, !bio_flagged(bio, BIO_UPTODATE), sizeof(rpdu), &rpdu);
-	else
-		__blk_add_trace(bt, 0, 0, 0, what, 0, sizeof(rpdu), &rpdu);
-}
-
-/**
- * blk_add_trace_remap - Add a trace for a remap operation
- * @q:		queue the io is for
- * @bio:	the source bio
- * @dev:	target device
- * @from:	source sector
- * @to:		target sector
- *
- * Description:
- *     Device mapper or raid target sometimes need to split a bio because
- *     it spans a stripe (or similar). Add a trace for that action.
- *
- **/
+extern void blk_add_trace_rq(struct request_queue *q, struct request *rq,
+			     u32 what);
+extern void blk_add_trace_bio(struct request_queue *q, struct bio *bio,
+			      u32 what);
+extern void blk_add_trace_generic(struct request_queue *q,
+				  struct bio *bio, int rw, u32 what);
+extern void blk_add_trace_pdu_int(struct request_queue *q, u32 what,
+				  struct bio *bio, unsigned int pdu);
 static inline void blk_add_trace_remap(struct request_queue *q, struct bio *bio,
-				       dev_t dev, sector_t from, sector_t to)
+                                      dev_t dev, sector_t from, sector_t to)
 {
-	struct blk_trace *bt = q->blk_trace;
-	struct blk_io_trace_remap r;
-
-	if (likely(!bt))
-		return;
-
-	r.device = cpu_to_be32(dev);
-	r.sector = cpu_to_be64(to);
-
-	__blk_add_trace(bt, from, bio->bi_size, bio->bi_rw, BLK_TA_REMAP, !bio_flagged(bio, BIO_UPTODATE), sizeof(r), &r);
+	trace_block_remap(q, bio, dev, from, to);
 }
 
 /**
diff --git a/include/trace/block.h b/include/trace/block.h
new file mode 100644
index 0000000..09d6339
--- /dev/null
+++ b/include/trace/block.h
@@ -0,0 +1,57 @@
+#ifndef _TRACE_BLOCK_H
+#define _TRACE_BLOCK_H
+
+#include <linux/blkdev.h>
+#include <linux/tracepoint.h>
+
+DEFINE_TRACE(block_rq_insert,
+	TPPROTO(struct request_queue *q, struct request *rq),
+	TPARGS(q, rq));
+DEFINE_TRACE(block_rq_issue,
+	TPPROTO(struct request_queue *q, struct request *rq),
+	TPARGS(q, rq));
+DEFINE_TRACE(block_rq_requeue,
+	TPPROTO(struct request_queue *q, struct request *rq),
+	TPARGS(q, rq));
+DEFINE_TRACE(block_rq_complete,
+	TPPROTO(struct request_queue *q, struct request *rq),
+	TPARGS(q, rq));
+DEFINE_TRACE(block_bio_bounce,
+	TPPROTO(struct request_queue *q, struct bio *bio),
+	TPARGS(q, bio));
+DEFINE_TRACE(block_bio_complete,
+	TPPROTO(struct request_queue *q, struct bio *bio),
+	TPARGS(q, bio));
+DEFINE_TRACE(block_bio_backmerge,
+	TPPROTO(struct request_queue *q, struct bio *bio),
+	TPARGS(q, bio));
+DEFINE_TRACE(block_bio_frontmerge,
+	TPPROTO(struct request_queue *q, struct bio *bio),
+	TPARGS(q, bio));
+DEFINE_TRACE(block_bio_queue,
+	TPPROTO(struct request_queue *q, struct bio *bio),
+	TPARGS(q, bio));
+DEFINE_TRACE(block_getrq,
+	TPPROTO(struct request_queue *q, struct bio *bio, int rw),
+	TPARGS(q, bio, rw));
+DEFINE_TRACE(block_sleeprq,
+	TPPROTO(struct request_queue *q, struct bio *bio, int rw),
+	TPARGS(q, bio, rw));
+DEFINE_TRACE(block_plug,
+	TPPROTO(struct request_queue *q),
+	TPARGS(q));
+DEFINE_TRACE(block_unplug_timer,
+	TPPROTO(struct request_queue *q),
+	TPARGS(q));
+DEFINE_TRACE(block_unplug_io,
+	TPPROTO(struct request_queue *q),
+	TPARGS(q));
+DEFINE_TRACE(block_split,
+	TPPROTO(struct request_queue *q, struct bio *bio, unsigned int pdu),
+	TPARGS(q, bio, pdu));
+DEFINE_TRACE(block_remap,
+	TPPROTO(struct request_queue *q, struct bio *bio, dev_t dev,
+		sector_t from, sector_t to),
+	TPARGS(q, bio, dev, from, to));
+
+#endif
diff --git a/mm/highmem.c b/mm/highmem.c
index f7c57b3..547afb4 100644
--- a/mm/highmem.c
+++ b/mm/highmem.c
@@ -28,6 +28,7 @@
 #include <linux/highmem.h>
 #include <linux/blktrace_api.h>
 #include <asm/tlbflush.h>
+#include <trace/block.h>
 
 static mempool_t *page_pool, *isa_page_pool;
 
@@ -427,6 +428,8 @@ static void __blk_queue_bounce(request_queue_t *q, struct bio **bio_orig,
 	if (!bio)
 		return;
 
+	trace_block_bio_bounce(q, *bio_orig);
+
 	/*
 	 * at least one page was bounced, fill in possible non-highmem
 	 * pages
@@ -481,8 +484,6 @@ void blk_queue_bounce(request_queue_t *q, struct bio **bio_orig)
 		pool = isa_page_pool;
 	}
 
-	blk_add_trace_bio(q, *bio_orig, BLK_TA_BOUNCE);
-
 	/*
 	 * slow path
 	 */