Sophie

Sophie

distrib > Scientific%20Linux > 5x > x86_64 > by-pkgid > 27922b4260f65d317aabda37e42bbbff > files > 368

kernel-2.6.18-238.el5.src.rpm

From: Jeff Moyer <jmoyer@redhat.com>
Date: Wed, 25 Nov 2009 20:13:35 -0500
Subject: [block] cfq-iosched: get rid of cfqq hash
Message-id: <x49skc2s6q8.fsf@segfault.boston.devel.redhat.com>
Patchwork-id: 21503
O-Subject: [rhel5 patch] cfq-iosched: get rid of cfqq hash
Bugzilla: 427709 448130 456181
RH-Acked-by: Vivek Goyal <vgoyal@redhat.com>
RH-Acked-by: Jerome Marchand <jmarchan@redhat.com>

Hi,

This is a follow-on to the cfq patch set I sent to resolve the
interleaved I/O performance regression.  It is a backport of the
following upstream commit:

commit 91fac317a34859986a2359a5a5c0e37dc17a9c3d
Author: Vasily Tarasov <vtaras@openvz.org>
Date:   Wed Apr 25 12:29:51 2007 +0200

    cfq-iosched: get rid of cfqq hash

    cfq hash is no more necessary.  We always can get cfqq from io context.
    cfq_get_io_context_noalloc() function is introduced, because we don't
    want to allocate cic on merging and checking may_queue.  In order to
    identify sync queue we've used hash key = CFQ_KEY_ASYNC. Since hash is
    eliminated we need to use other criterion: sync flag for queue is added.
    In all places where we dig in rb_tree we're in current context, so no
    additional locking is required.

    Advantages of this patch: no additional memory for hash, no seeking in
    hash, code is cleaner. But it is necessary now to seek cic in per-ioc
    rbtree, but it is faster:
    - most processes work only with few devices
    - most systems have only few block devices
    - it is a rb-tree

    Signed-off-by: Vasily Tarasov <vtaras@openvz.org>

    Changes by [Jens]:

    - Merge into CFQ devel branch
    - Get rid of cfq_get_io_context_noalloc()
    - Fix various bugs with dereferencing cic->cfqq[] with offset other
      than 0 or 1.
    - Fix bug in cfqq setup, is_sync condition was reversed.
    - Fix bug where only bio_sync() is used, we need to check for a READ too

The reason this is necessary is that I've broken the 1:1 relationship
between process and cfq_queue.  As such, looking up the cfqq by PID is
broken.  This affects the front merge code such that a front-merge
candidate may be missed which could affect performance.  It also can
have implications for PID reuse, but that is such a remote possibility
that I won't even go into the details.

I've run this through some rather vigorous testing and was unable to
break it.  I also have a brew build that I am running through the kernel
tier1 tests.

This is related to:
bug 427709
bug 448130
bug 456181

Comments, as always, are appreciated.

Cheers,
Jeff


diff --git a/block/cfq-iosched.c b/block/cfq-iosched.c
index cfe37b4..8ead467 100644
--- a/block/cfq-iosched.c
+++ b/block/cfq-iosched.c
@@ -36,18 +36,9 @@ static int cfq_slice_idle = HZ / 125;
 #define CFQ_IDLE_GRACE		(HZ / 10)
 #define CFQ_SLICE_SCALE		(5)
 
-#define CFQ_KEY_ASYNC		(0)
-
 static DEFINE_SPINLOCK(cfq_exit_lock);
 
 /*
- * for the hash of cfqq inside the cfqd
- */
-#define CFQ_QHASH_SHIFT		6
-#define CFQ_QHASH_ENTRIES	(1 << CFQ_QHASH_SHIFT)
-#define list_entry_qhash(entry)	hlist_entry((entry), struct cfq_queue, cfq_hash)
-
-/*
  * for the hash of crq inside the cfqq
  */
 #define CFQ_MHASH_SHIFT		6
@@ -86,11 +77,6 @@ static struct completion *ioc_gone;
 #define cfq_cfqq_dispatched(cfqq)	\
 	((cfqq)->on_dispatch[ASYNC] + (cfqq)->on_dispatch[SYNC])
 
-#define cfq_cfqq_class_sync(cfqq)	((cfqq)->key != CFQ_KEY_ASYNC)
-
-#define cfq_cfqq_sync(cfqq)		\
-	(cfq_cfqq_class_sync(cfqq) || (cfqq)->on_dispatch[SYNC])
-
 #define sample_valid(samples)	((samples) > 80)
 
 /*
@@ -122,11 +108,6 @@ struct cfq_data {
 	struct list_head empty_list;
 
 	/*
-	 * cfqq lookup hash
-	 */
-	struct hlist_head *cfq_hash;
-
-	/*
 	 * global crq hash for all queues
 	 */
 	struct hlist_head *crq_hash;
@@ -180,10 +161,6 @@ struct cfq_queue {
 	atomic_t ref;
 	/* parent cfq_data */
 	struct cfq_data *cfqd;
-	/* cfqq lookup hash */
-	struct hlist_node cfq_hash;
-	/* hash key */
-	unsigned int key;
 	/* on either rr or empty list of cfqd */
 	struct list_head cfq_list;
 	/* prio tree member */
@@ -246,6 +223,7 @@ enum cfqq_state_flags {
 	CFQ_CFQQ_FLAG_fifo_expire,
 	CFQ_CFQQ_FLAG_idle_window,
 	CFQ_CFQQ_FLAG_prio_changed,
+	CFQ_CFQQ_FLAG_sync,
 	CFQ_CFQQ_FLAG_coop,
 };
 
@@ -271,6 +249,7 @@ CFQ_CFQQ_FNS(must_dispatch);
 CFQ_CFQQ_FNS(fifo_expire);
 CFQ_CFQQ_FNS(idle_window);
 CFQ_CFQQ_FNS(prio_changed);
+CFQ_CFQQ_FNS(sync);
 CFQ_CFQQ_FNS(coop);
 #undef CFQ_CFQQ_FNS
 
@@ -295,9 +274,34 @@ static inline int cfq_crq_##name(const struct cfq_rq *crq)		\
 CFQ_CRQ_FNS(is_sync);
 #undef CFQ_CRQ_FNS
 
-static struct cfq_queue *cfq_find_cfq_hash(struct cfq_data *, unsigned int, unsigned short);
 static void cfq_dispatch_insert(request_queue_t *, struct cfq_rq *);
-static struct cfq_queue *cfq_get_queue(struct cfq_data *cfqd, unsigned int key, struct task_struct *tsk, gfp_t gfp_mask);
+static struct cfq_queue *cfq_get_queue(struct cfq_data *, int,
+				       struct task_struct *, gfp_t);
+static struct cfq_io_context *cfq_cic_rb_lookup(struct cfq_data *,
+						struct io_context *);
+
+static inline struct cfq_queue *cic_to_cfqq(struct cfq_io_context *cic,
+					    int is_sync)
+{
+	return cic->cfqq[!!is_sync];
+}
+
+static inline void cic_set_cfqq(struct cfq_io_context *cic,
+				struct cfq_queue *cfqq, int is_sync)
+{
+	cic->cfqq[!!is_sync] = cfqq;
+}
+
+/*
+ * We regard a request as SYNC, if it's either a read or has the SYNC bit
+ * set (in which case it could also be direct WRITE).
+ */
+static inline int cfq_bio_sync(struct bio *bio)
+{
+	if (bio_data_dir(bio) == READ || bio_sync(bio))
+		return 1;
+	return 0;
+}
 
 /*
  * lots of deadline iosched dupes, can be abstracted later...
@@ -352,14 +356,6 @@ static int cfq_queue_empty(request_queue_t *q)
 	return !cfqd->busy_queues;
 }
 
-static inline pid_t cfq_queue_pid(struct task_struct *task, int rw)
-{
-	if (rw == READ || rw == WRITE_SYNC)
-		return task->pid;
-
-	return CFQ_KEY_ASYNC;
-}
-
 /*
  * Lifted from AS - choose which of crq1 and crq2 that is best served now.
  * We choose the request that is closest to the head right now. Distance
@@ -724,12 +720,16 @@ static struct request *
 cfq_find_rq_fmerge(struct cfq_data *cfqd, struct bio *bio)
 {
 	struct task_struct *tsk = current;
-	pid_t key = cfq_queue_pid(tsk, bio_data_dir(bio));
+	struct cfq_io_context *cic;
 	struct cfq_queue *cfqq;
 	struct rb_node *n;
 	sector_t sector;
 
-	cfqq = cfq_find_cfq_hash(cfqd, key, tsk->ioprio);
+	cic = cfq_cic_rb_lookup(cfqd, tsk->io_context);
+	if (!cic)
+		goto out;
+
+	cfqq = cic_to_cfqq(cic, cfq_bio_sync(bio));
 	if (!cfqq)
 		goto out;
 
@@ -1182,7 +1182,7 @@ static inline struct cfq_rq *cfq_check_fifo(struct cfq_queue *cfqq)
 		return NULL;
 
 	if (!list_empty(&cfqq->fifo)) {
-		int fifo = cfq_cfqq_class_sync(cfqq);
+		int fifo = cfq_cfqq_sync(cfqq);
 
 		crq = RQ_DATA(list_entry_fifo(cfqq->fifo.next));
 		rq = crq->request;
@@ -1309,7 +1309,7 @@ static struct cfq_queue *cfq_select_queue(struct cfq_data *cfqd)
 	} else if (cfq_cfqq_dispatched(cfqq)) {
 		cfqq = NULL;
 		goto keep_queue;
-	} else if (cfq_cfqq_class_sync(cfqq)) {
+	} else if (cfq_cfqq_sync(cfqq)) {
 		if (cfq_arm_slice_timer(cfqd, cfqq))
 			return NULL;
 	}
@@ -1484,37 +1484,12 @@ static void cfq_put_queue(struct cfq_queue *cfqq)
 		__cfq_slice_expired(cfqd, cfqq, 0);
 
 	/*
-	 * it's on the empty list and still hashed
+	 * it's on the empty list
 	 */
 	list_del(&cfqq->cfq_list);
-	hlist_del(&cfqq->cfq_hash);
 	kmem_cache_free(cfq_pool, cfqq);
 }
 
-static inline struct cfq_queue *
-__cfq_find_cfq_hash(struct cfq_data *cfqd, unsigned int key, unsigned int prio,
-		    const int hashval)
-{
-	struct hlist_head *hash_list = &cfqd->cfq_hash[hashval];
-	struct hlist_node *entry;
-	struct cfq_queue *__cfqq;
-
-	hlist_for_each_entry(__cfqq, entry, hash_list, cfq_hash) {
-		const unsigned short __p = IOPRIO_PRIO_VALUE(__cfqq->org_ioprio_class, __cfqq->org_ioprio);
-
-		if (__cfqq->key == key && (__p == prio || !prio))
-			return __cfqq;
-	}
-
-	return NULL;
-}
-
-static struct cfq_queue *
-cfq_find_cfq_hash(struct cfq_data *cfqd, unsigned int key, unsigned short prio)
-{
-	return __cfq_find_cfq_hash(cfqd, key, prio, hash_long(key, CFQ_QHASH_SHIFT));
-}
-
 static void cfq_free_io_context(struct io_context *ioc)
 {
 	struct cfq_io_context *__cic;
@@ -1697,7 +1672,7 @@ static inline void changed_ioprio(struct cfq_io_context *cic)
 	cfqq = cic->cfqq[ASYNC];
 	if (cfqq) {
 		struct cfq_queue *new_cfqq;
-		new_cfqq = cfq_get_queue(cfqd, CFQ_KEY_ASYNC, cic->ioc->task,
+		new_cfqq = cfq_get_queue(cfqd, ASYNC, cic->ioc->task,
 					 GFP_ATOMIC);
 		if (new_cfqq) {
 			cic->cfqq[ASYNC] = new_cfqq;
@@ -1736,16 +1711,16 @@ static int cfq_ioc_set_ioprio(struct io_context *ioc, unsigned int ioprio)
 }
 
 static struct cfq_queue *
-cfq_get_queue(struct cfq_data *cfqd, unsigned int key, struct task_struct *tsk,
+cfq_get_queue(struct cfq_data *cfqd, int is_sync, struct task_struct *tsk,
 	      gfp_t gfp_mask)
 {
-	const int hashval = hash_long(key, CFQ_QHASH_SHIFT);
 	struct cfq_queue *cfqq, *new_cfqq = NULL;
-	unsigned short ioprio;
+	struct cfq_io_context *cic;
 
 retry:
-	ioprio = tsk->ioprio;
-	cfqq = __cfq_find_cfq_hash(cfqd, key, ioprio, hashval);
+	cic = cfq_cic_rb_lookup(cfqd, tsk->io_context);
+	/* cic always exists here */
+	cfqq = cic_to_cfqq(cic, is_sync);
 
 	if (!cfqq) {
 		if (new_cfqq) {
@@ -1764,16 +1739,15 @@ retry:
 
 		memset(cfqq, 0, sizeof(*cfqq));
 
-		INIT_HLIST_NODE(&cfqq->cfq_hash);
 		INIT_LIST_HEAD(&cfqq->cfq_list);
 		INIT_LIST_HEAD(&cfqq->fifo);
 		RB_CLEAR_NODE(&cfqq->p_node);
 
-		cfqq->key = key;
-		hlist_add_head(&cfqq->cfq_hash, &cfqd->cfq_hash[hashval]);
 		atomic_set(&cfqq->ref, 0);
 		cfqq->cfqd = cfqd;
 		cfqq->service_last = 0;
+		if (is_sync)
+			cfq_mark_cfqq_sync(cfqq);
 		/*
 		 * set ->slice_left to allow preemption for a new process
 		 */
@@ -1810,6 +1784,9 @@ cfq_cic_rb_lookup(struct cfq_data *cfqd, struct io_context *ioc)
 	struct cfq_io_context *cic;
 	void *k, *key = cfqd;
 
+	if (unlikely(!ioc))
+		return NULL;
+
 restart:
 	n = ioc->cic_root.rb_node;
 	while (n) {
@@ -2270,6 +2247,7 @@ static int cfq_may_queue(request_queue_t *q, int rw, struct bio *bio)
 {
 	struct cfq_data *cfqd = q->elevator->elevator_data;
 	struct task_struct *tsk = current;
+	struct cfq_io_context *cic;
 	struct cfq_queue *cfqq;
 
 	/*
@@ -2278,7 +2256,11 @@ static int cfq_may_queue(request_queue_t *q, int rw, struct bio *bio)
 	 * so just lookup a possibly existing queue, or return 'may queue'
 	 * if that fails
 	 */
-	cfqq = cfq_find_cfq_hash(cfqd, cfq_queue_pid(tsk, rw), tsk->ioprio);
+	cic = cfq_cic_rb_lookup(cfqd, tsk->io_context);
+	if (!cic)
+		return ELV_MQUEUE_MAY;
+
+	cfqq = cic_to_cfqq(cic, rw & REQ_RW_SYNC);
 	if (cfqq) {
 		cfq_init_prio_data(cfqq);
 		cfq_prio_boost(cfqq);
@@ -2376,11 +2358,10 @@ cfq_set_request(request_queue_t *q, struct request *rq, struct bio *bio,
 	struct task_struct *tsk = current;
 	struct cfq_io_context *cic;
 	const int rw = rq_data_dir(rq);
-	pid_t key = cfq_queue_pid(tsk, rw);
+	const int is_sync = (rw == READ || rw == WRITE_SYNC);
 	struct cfq_queue *cfqq;
 	struct cfq_rq *crq;
 	unsigned long flags;
-	int is_sync = key != CFQ_KEY_ASYNC;
 
 	might_sleep_if(gfp_mask & __GFP_WAIT);
 
@@ -2392,17 +2373,17 @@ new_queue:
 	if (!cic)
 		goto queue_fail;
 
-	if (!cic->cfqq[is_sync]) {
-		cfqq = cfq_get_queue(cfqd, key, tsk, gfp_mask);
+	cfqq = cic_to_cfqq(cic, is_sync);
+	if (!cfqq) {
+		cfqq = cfq_get_queue(cfqd, is_sync, tsk, gfp_mask);
 		if (!cfqq)
 			goto queue_fail;
 
-		cic->cfqq[is_sync] = cfqq;
+		cic_set_cfqq(cic, cfqq, is_sync);
 	} else {
 		/*
 		 * If the queue was seeky for too long, break it apart.
 		 */
-		cfqq = cic->cfqq[is_sync];
 		if (cfq_cfqq_coop(cfqq) && should_split_cfqq(cfqq)) {
 			cfqq = split_cfqq(cic, cfqq);
 			if (!cfqq)
@@ -2589,7 +2570,6 @@ static void cfq_exit_queue(elevator_t *e)
 
 	mempool_destroy(cfqd->crq_pool);
 	kfree(cfqd->crq_hash);
-	kfree(cfqd->cfq_hash);
 	kfree(cfqd);
 }
 
@@ -2617,18 +2597,12 @@ static void *cfq_init_queue(request_queue_t *q, elevator_t *e)
 	if (!cfqd->crq_hash)
 		goto out_crqhash;
 
-	cfqd->cfq_hash = kmalloc(sizeof(struct hlist_head) * CFQ_QHASH_ENTRIES, GFP_KERNEL);
-	if (!cfqd->cfq_hash)
-		goto out_cfqhash;
-
 	cfqd->crq_pool = mempool_create_slab_pool(BLKDEV_MIN_RQ, crq_pool);
 	if (!cfqd->crq_pool)
 		goto out_crqpool;
 
 	for (i = 0; i < CFQ_MHASH_ENTRIES; i++)
 		INIT_HLIST_HEAD(&cfqd->crq_hash[i]);
-	for (i = 0; i < CFQ_QHASH_ENTRIES; i++)
-		INIT_HLIST_HEAD(&cfqd->cfq_hash[i]);
 
 	cfqd->queue = q;
 
@@ -2655,8 +2629,6 @@ static void *cfq_init_queue(request_queue_t *q, elevator_t *e)
 
 	return cfqd;
 out_crqpool:
-	kfree(cfqd->cfq_hash);
-out_cfqhash:
 	kfree(cfqd->crq_hash);
 out_crqhash:
 	kfree(cfqd);