Sophie

Sophie

distrib > Scientific%20Linux > 5x > x86_64 > by-pkgid > 27922b4260f65d317aabda37e42bbbff > files > 377

kernel-2.6.18-238.el5.src.rpm

From: Jerome Marchand <jmarchan@redhat.com>
Date: Mon, 4 May 2009 15:41:08 +0200
Subject: [block] disable iostat collection in gendisk
Message-id: 49FEF074.1070104@redhat.com
O-Subject: [Patch RHEL5.4 2/2 v2] BZ484158: Disable iostat collection in gendisk
Bugzilla: 484158
RH-Acked-by: Anton Arapov <aarapov@redhat.com>
RH-Acked-by: Larry Woodman <lwoodman@redhat.com>
RH-Acked-by: Jeff Moyer <jmoyer@redhat.com>

Bugzilla:
https://bugzilla.redhat.com/show_bug.cgi?id=484158

Description:
The accounting of partition I/O stats have caused performance
regression with some workload, so add a feature which allow to turn
I/O accounting off through a sysfs file.
This new bersion of that patch includes commit
42dad7647aec49b3ad20dd0cb832b232a6ae514f which simplifies the code.

Upstream status:
The feature is upstream.
commit bc58ba9468d94d62c56ab9b47173583ec140b165 (main patch)
commit fb8ec18c316d869271137c97320dbfd2def56569 (fix)
commit 26308eab69aa193f7b3fb50764a64ae14544a39b (fix)
commit 42dad7647aec49b3ad20dd0cb832b232a6ae514f (simplification)

Brew:
https://brewweb.devel.redhat.com/taskinfo?taskID=1782554

Test status:
Built on all arch and successfully tested by me on i686.

Regards,
Jerome

diff --git a/block/ll_rw_blk.c b/block/ll_rw_blk.c
index 5d1da32..6abdc04 100644
--- a/block/ll_rw_blk.c
+++ b/block/ll_rw_blk.c
@@ -1285,6 +1285,7 @@ static const char * const rq_flags[] = {
 	"REQ_FAILFAST_DEV",
 	"REQ_FAILFAST_TRANSPORT",
 	"REQ_FAILFAST_DRIVER",
+	"REQ_IO_STAT",
 };
 
 void blk_dump_rq_flags(struct request *rq, char *msg)
@@ -2031,7 +2032,7 @@ blk_init_queue_node(request_fn_proc *rfn, spinlock_t *lock, int node_id)
 	q->merge_requests_fn	= ll_merge_requests_fn;
 	q->prep_rq_fn		= NULL;
 	q->unplug_fn		= generic_unplug_device;
-	q->queue_flags		= (1 << QUEUE_FLAG_CLUSTER);
+	q->queue_flags		= QUEUE_FLAG_DEFAULT;
 	q->queue_lock		= lock;
 
 	blk_queue_segment_boundary(q, BLK_SEG_BOUNDARY_MASK);
@@ -2077,7 +2078,7 @@ static inline void blk_free_request(request_queue_t *q, struct request *rq)
 }
 
 static inline struct request *
-blk_alloc_request(request_queue_t *q, int rw, struct bio *bio,
+blk_alloc_request(request_queue_t *q, int flags, struct bio *bio,
 		  int priv, gfp_t gfp_mask)
 {
 	struct request *rq = mempool_alloc(q->rq.rq_pool, gfp_mask);
@@ -2089,7 +2090,7 @@ blk_alloc_request(request_queue_t *q, int rw, struct bio *bio,
 	 * first three bits are identical in rq->flags and bio->bi_rw,
 	 * see bio.h and blkdev.h
 	 */
-	rq->flags = rw;
+	rq->flags = flags;
 
 	if (priv) {
 		if (unlikely(elv_set_request(q, rq, bio, gfp_mask))) {
@@ -2229,6 +2230,8 @@ static struct request *get_request(request_queue_t *q, int rw, struct bio *bio,
 	if (priv)
 		rl->elvpriv++;
 
+	if (blk_queue_io_stat(q))
+		rw |= REQ_IO_STAT;
 	spin_unlock_irq(q->queue_lock);
 
 	rq = blk_alloc_request(q, rw, bio, priv, gfp_mask);
@@ -2685,7 +2688,7 @@ static void drive_stat_acct(struct request *rq, int nr_sectors, int new_io)
 {
 	int rw = rq_data_dir(rq);
 
-	if (!blk_fs_request(rq) || !rq->rq_disk)
+	if (!blk_fs_request(rq) || !blk_do_io_stat(rq))
 		return;
 
 	if (!new_io) {
@@ -2863,6 +2866,20 @@ long blk_congestion_wait(int rw, long timeout)
 
 EXPORT_SYMBOL(blk_congestion_wait);
 
+static void blk_account_io_merge(struct request *req)
+{
+	if (blk_do_io_stat(req)) {
+		struct hd_struct *part
+			= get_part(req->rq_disk, req->sector);
+		disk_round_stats(req->rq_disk);
+		req->rq_disk->in_flight--;
+		if (part) {
+			part_round_stats(part);
+			get_partstats(part)->in_flight--;
+		}
+	}
+}
+
 /*
  * Has to be called with the request spinlock acquired
  */
@@ -2908,16 +2925,10 @@ static int attempt_merge(request_queue_t *q, struct request *req,
 
 	elv_merge_requests(q, req, next);
 
-	if (req->rq_disk) {
-		struct hd_struct *part
-			= get_part(req->rq_disk, req->sector);
-		disk_round_stats(req->rq_disk);
-		req->rq_disk->in_flight--;
-		if (part) {
-			part_round_stats(part);
-			get_partstats(part)->in_flight--;
-		}
-	}
+	/*
+	 * 'next' is going away, so update stats accordingly
+	 */
+	blk_account_io_merge(next);
 
 	req->ioprio = ioprio_best(req->ioprio, next->ioprio);
 
@@ -3363,6 +3374,43 @@ static void blk_recalc_rq_sectors(struct request *rq, int nsect)
 	}
 }
 
+static void blk_account_io_completion(struct request *req, unsigned int bytes)
+{
+	if (blk_fs_request(req) && blk_do_io_stat(req)) {
+		const int rw = rq_data_dir(req);
+		all_stat_add(req->rq_disk, sectors[rw],
+			     bytes >> 9, req->sector);
+
+	}
+}
+
+static void blk_account_io_done(struct request *req)
+{
+	if (!blk_do_io_stat(req))
+		return;
+
+	/*
+	 * Account IO completion.  bar_rq isn't accounted as a normal
+	 * IO on queueing nor completion.  Accounting the containing
+	 * request is enough.
+	 */
+	if (blk_fs_request(req) && req != &req->q->bar_rq) {
+		unsigned long duration = jiffies - req->start_time;
+		const int rw = rq_data_dir(req);
+		struct gendisk *disk = req->rq_disk;
+		struct hd_struct *part = get_part(disk, req->sector);
+
+		__all_stat_inc(disk, ios[rw], req->sector);
+		__all_stat_add(disk, ticks[rw], duration, req->sector);
+		disk_round_stats(disk);
+		disk->in_flight--;
+		if (part) {
+			part_round_stats(part);
+			get_partstats(part)->in_flight--;
+		}
+	}
+}
+
 static int __end_that_request_first(struct request *req, int uptodate,
 				    int nr_bytes)
 {
@@ -3392,12 +3440,7 @@ static int __end_that_request_first(struct request *req, int uptodate,
 				(unsigned long long)req->sector);
 	}
 
-	if (blk_fs_request(req) && req->rq_disk) {
-		const int rw = rq_data_dir(req);
-
-		all_stat_add(req->rq_disk, sectors[rw],
-			     nr_bytes >> 9, req->sector);
-	}
+	blk_account_io_completion(req, nr_bytes);
 
 	total_bytes = bio_nbytes = 0;
 	while ((bio = req->bio) != NULL) {
@@ -3602,7 +3645,6 @@ EXPORT_SYMBOL(blk_complete_request);
  */
 void end_that_request_last(struct request *req, int uptodate)
 {
-	struct gendisk *disk = req->rq_disk;
 	int error;
 
 	/*
@@ -3615,25 +3657,8 @@ void end_that_request_last(struct request *req, int uptodate)
 	if (unlikely(laptop_mode) && blk_fs_request(req))
 		laptop_io_completion();
 
-	/*
-	 * Account IO completion.  bar_rq isn't accounted as a normal
-	 * IO on queueing nor completion.  Accounting the containing
-	 * request is enough.
-	 */
-	if (disk && blk_fs_request(req) && req != &req->q->bar_rq) {
-		unsigned long duration = jiffies - req->start_time;
-		const int rw = rq_data_dir(req);
-		struct hd_struct *part = get_part(disk, req->sector);
+	blk_account_io_done(req);
 
-		__all_stat_inc(disk, ios[rw], req->sector);
-		__all_stat_add(disk, ticks[rw], duration, req->sector);
-		disk_round_stats(disk);
-		disk->in_flight--;
-		if (part) {
-			part_round_stats(part);
-			get_partstats(part)->in_flight--;
-		}
-	}
 	if (req->end_io)
 		req->end_io(req, error);
 	else
@@ -3976,6 +4001,28 @@ static ssize_t queue_max_hw_sectors_show(struct request_queue *q, char *page)
 	return queue_var_show(max_hw_sectors_kb, (page));
 }
 
+static ssize_t queue_iostats_show(struct request_queue *q, char *page)
+{
+	return queue_var_show(blk_queue_io_stat(q), page);
+}
+
+static ssize_t queue_iostats_store(struct request_queue *q, const char *page,
+				   size_t count)
+{
+	unsigned long stats;
+	ssize_t ret = queue_var_store(&stats, page, count);
+
+	spin_lock_irq(q->queue_lock);
+	if (stats)
+		set_bit(QUEUE_FLAG_IO_STAT, &q->queue_flags);
+	else
+		clear_bit(QUEUE_FLAG_IO_STAT, &q->queue_flags);
+	spin_unlock_irq(q->queue_lock);
+
+	return ret;
+}
+
+
 
 static struct queue_sysfs_entry queue_requests_entry = {
 	.attr = {.name = "nr_requests", .mode = S_IRUGO | S_IWUSR },
@@ -3983,6 +4030,12 @@ static struct queue_sysfs_entry queue_requests_entry = {
 	.store = queue_requests_store,
 };
 
+static struct queue_sysfs_entry queue_iostats_entry = {
+	.attr = {.name = "iostats", .mode = S_IRUGO | S_IWUSR },
+	.show = queue_iostats_show,
+	.store = queue_iostats_store,
+};
+
 static struct queue_sysfs_entry queue_ra_entry = {
 	.attr = {.name = "read_ahead_kb", .mode = S_IRUGO | S_IWUSR },
 	.show = queue_ra_show,
@@ -4012,6 +4065,7 @@ static struct attribute *default_attrs[] = {
 	&queue_max_hw_sectors_entry.attr,
 	&queue_max_sectors_entry.attr,
 	&queue_iosched_entry.attr,
+	&queue_iostats_entry.attr,
 	NULL,
 };
 
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index fcfa093..748ff68 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -239,6 +239,7 @@ enum rq_flag_bits {
 	__REQ_FAILFAST_DEV,	/* no driver retries of device errors */
 	__REQ_FAILFAST_TRANSPORT, /* no driver retries of transport errors */
 	__REQ_FAILFAST_DRIVER,	/* no driver retries of driver errors */
+ 	__REQ_IO_STAT,		/* account I/O stat */
 	__REQ_NR_BITS,		/* stops here */
 };
 
@@ -272,6 +273,7 @@ enum rq_flag_bits {
 #define REQ_PM_SHUTDOWN	(1 << __REQ_PM_SHUTDOWN)
 #define REQ_ORDERED_COLOR	(1 << __REQ_ORDERED_COLOR)
 #define REQ_RW_SYNC	(1 << __REQ_RW_SYNC)
+#define REQ_IO_STAT	(1 << __REQ_IO_STAT)
 
 /*
  * State information carried for REQ_PM_SUSPEND and REQ_PM_RESUME
@@ -450,6 +452,10 @@ struct request_queue
 #define QUEUE_FLAG_REENTER	6	/* Re-entrancy avoidance */
 #define QUEUE_FLAG_PLUGGED	7	/* queue is plugged */
 #define QUEUE_FLAG_ELVSWITCH	8	/* don't use elevator, just do FIFO */
+#define QUEUE_FLAG_IO_STAT     15	/* do IO stats */
+
+#define QUEUE_FLAG_DEFAULT     ((1 << QUEUE_FLAG_IO_STAT) |	\
+				(1 << QUEUE_FLAG_CLUSTER))
 
 enum {
 	/*
@@ -495,10 +501,17 @@ enum {
 #define blk_queue_tagged(q)	test_bit(QUEUE_FLAG_QUEUED, &(q)->queue_flags)
 #define blk_queue_stopped(q)	test_bit(QUEUE_FLAG_STOPPED, &(q)->queue_flags)
 #define blk_queue_flushing(q)	((q)->ordseq)
+#define blk_queue_io_stat(q)	test_bit(QUEUE_FLAG_IO_STAT, &(q)->queue_flags)
 
 #define blk_fs_request(rq)	((rq)->flags & REQ_CMD)
 #define blk_pc_request(rq)	((rq)->flags & REQ_BLOCK_PC)
 #define blk_rq_started(rq)	((rq)->flags & REQ_STARTED)
+#define blk_rq_io_stat(rq)	((rq)->flags & REQ_IO_STAT)
+
+static inline int blk_do_io_stat(struct request *rq)
+{
+	return rq->rq_disk && blk_rq_io_stat(rq);
+}
 
 #define blk_noretry_request(rq)	((rq)->flags & REQ_FAILFAST)
 #define blk_failfast_dev(rq)	((rq)->flags & REQ_FAILFAST_DEV)