Sophie: kernel-2.6.18-238.el5 src

kernel-2.6.18-238.el5.src.rpm

From: Jesse Larrew <jlarrew@redhat.com>
Date: Fri, 15 May 2009 08:50:58 -0400
Subject: [md] retry immediate in 2 seconds
Message-id: 20090515125058.GB13612@squad5-lp1.lab.bos.redhat.com
O-Subject: Re: [PATCH RHEL5.4 11/11 BZ489582] Retry immediate in 2 seconds
Bugzilla: 489582
RH-Acked-by: Mike Christie <mchristi@redhat.com>

Reposting with Mike Christie's suggestions included in the patch.

RHBZ#:
======
https://bugzilla.redhat.com/show_bug.cgi?id=489582

Description:
===========
This is a bug fix for all archs.

SCSI device handlers return SCSI_DH_IMM_RETRY if we could retry
immediately and SCSI_DH_RETRY in cases where it is better to retry
after some delay.

Currently, we retry immediately irrespective of SCSI_DH_IMM_RETRY and
SCSI_DH_RETRY. This patch adds a user configurable attribute
pg_init_delay_secs which specifies the number of seconds to delay
before retrying scsi_dh_activate when SCSI_DH_RETRY is returned.

The default for this attribute is set to 2 seconds.

RHEL Version Found:
================
RHEL 5.3

kABI Status:
============
No symbols were harmed.

Brew:
=====
Built on all platforms.
http://brewweb.devel.redhat.com/brew/taskinfo?taskID=1800889

Upstream Status:
================
This has been submitted upstream, but is not yet accepted:
http://patchwork.kernel.org/patch/8067/
https://www.redhat.com/archives/dm-devel/2009-May/msg00119.html

Test Status:
============
This patch has been tested by Babu Moger at LSI ().

Testing methodology:

Have two Linux hosts (host A and B). Both are accessing same storage
array. One host (A) has about 100 Luns. And another host (B)
has only couple of luns. Run the failover tests. Both these hosts will
compete to send the mode select commands. Note that both the hosts
cannot send the mode select simultaneously. One host will succeed and
one will fail. For example host A succeeds. Host B will fail. If we have
delayed retry then host B will submit mode select after 2 second delay.
This will continue pg_init_retry number of times. This will give about
2* pg_init_retry seconds time for mode select to succeed. Eventually host B
will succeed.

Without this patch, host B will retry the mode selects immediately and
there are chances that it might exhaust all the pg_init_retry
immediately. This will lead to I/O error.

Babu has tested with pg_init_retries set to 50 (max).

===============================================================

Jesse Larrew
IBM Onsite Partner
978-392-3183
jlarrew@redhat.com

Proposed Patch:
===============
This patch is based on 2.6.18-136.el5.

diff --git a/drivers/md/dm-mpath.c b/drivers/md/dm-mpath.c
index ef42277..5d875dd 100644
--- a/drivers/md/dm-mpath.c
+++ b/drivers/md/dm-mpath.c
@@ -25,6 +25,7 @@
 
 #define DM_MSG_PREFIX "multipath"
 #define MESG_STR(x) x, sizeof(x)
+#define DM_PG_INIT_RETRY_DELAY 2
 
 /* Path properties */
 struct pgpath {
@@ -69,6 +70,7 @@ struct multipath {
 	struct list_head priority_groups;
 	unsigned pg_init_required;	/* pg_init needs calling? */
 	unsigned pg_init_in_progress;	/* Only one pg_init allowed at once */
+	unsigned pg_init_delay;		/* To delay or not to delay */
 
 	unsigned nr_valid_paths;	/* Total number of usable paths */
 	struct pgpath *current_pgpath;
@@ -81,6 +83,7 @@ struct multipath {
 	unsigned saved_queue_if_no_path;/* Saved state during suspension */
 	unsigned pg_init_retries;	/* Number of times to retry pg_init */
 	unsigned pg_init_count;		/* Number of times pg_init called */
+	unsigned pg_init_delay_secs;	/* Delay in seconds before retry */
 
 	struct work_struct process_queued_ios;
 	struct bio_list queued_ios;
@@ -188,6 +191,7 @@ static struct multipath *alloc_multipath(void)
 		INIT_LIST_HEAD(&m->priority_groups);
 		spin_lock_init(&m->lock);
 		m->queue_io = 1;
+		m->pg_init_delay_secs = DM_PG_INIT_RETRY_DELAY;
 		INIT_WORK(&m->process_queued_ios, process_queued_ios, m);
 		INIT_WORK(&m->trigger_event, trigger_event, m);
 		m->mpio_pool = mempool_create_slab_pool(MIN_IOS, _mpio_cache);
@@ -438,10 +442,13 @@ static void process_queued_ios(void *data)
 			init_required = 1;
 		} else {
 	 		list_for_each_entry(tmp, &pgpath->pg->pgpaths, list) {
- 				queue_work(kmpath_handlerd, &tmp->activate_path);
+				queue_delayed_work(kmpath_handlerd,
+				    &tmp->activate_path, m->pg_init_delay ?
+					m->pg_init_delay_secs * HZ : 0);
  				m->pg_init_in_progress++;
  			}
 		}
+		m->pg_init_delay = 0;
 	}
 
 out:
@@ -738,8 +745,9 @@ static int parse_features(struct arg_set *as, struct multipath *m,
 	const char *param_name;
 
 	static struct param _params[] = {
-		{0, 3, "invalid number of feature args"},
+		{0, 5, "invalid number of feature args"},
 		{1, 50, "pg_init_retries must be between 1 and 50"},
+		{1, 50, "pg_init_delay_secs must be between 1 and 50"},
 	};
 
 	r = read_param(_params, shift(as), &argc, &ti->error);
@@ -766,6 +774,14 @@ static int parse_features(struct arg_set *as, struct multipath *m,
 			continue;
 		}
 
+		if (!strnicmp(param_name, MESG_STR("pg_init_delay_secs")) &&
+		    (argc >= 1)) {
+			r = read_param(_params + 1, shift(as),
+				       &m->pg_init_delay_secs, &ti->error);
+			argc--;
+			continue;
+		}
+
 		ti->error = "Unrecognised multipath feature request";
 		r = -EINVAL;
 	} while (argc && !r);
@@ -1123,6 +1139,7 @@ static void pg_init_done(struct path *path, int errors)
 	struct priority_group *pg = pgpath->pg;
 	struct multipath *m = pg->m;
 	unsigned long flags;
+	unsigned int delay = 0;
 
 	/* device or driver problems */
 	switch (errors) {
@@ -1147,8 +1164,11 @@ static void pg_init_done(struct path *path, int errors)
 		 */
 		bypass_pg(m, pg, 1);
 		break;
-	/* TODO: For SCSI_DH_RETRY we should wait a couple seconds */
+	/*
+	 * For SCSI_DH_RETRY we wait before retrying.
+	 */
 	case SCSI_DH_RETRY:
+		delay = 1;
 	case SCSI_DH_IMM_RETRY:
 	case SCSI_DH_RES_TEMP_UNAVAIL:
 		if (pg_init_limit_reached(m, pgpath))
@@ -1177,8 +1197,10 @@ static void pg_init_done(struct path *path, int errors)
 	}
 
 	m->pg_init_in_progress--;
-	if (!m->pg_init_in_progress)
+	if (!m->pg_init_in_progress) {
+		m->pg_init_delay = delay;
 		queue_work(kmultipathd, &m->process_queued_ios);
+	}
 	spin_unlock_irqrestore(&m->lock, flags);
 }
 
@@ -1336,11 +1358,15 @@ static int multipath_status(struct dm_target *ti, status_type_t type,
 		DMEMIT("2 %u %u ", m->queue_size, m->pg_init_count);
 	else {
 		DMEMIT("%u ", m->queue_if_no_path +
-			      (m->pg_init_retries > 0) * 2);
+			      (m->pg_init_retries > 0) * 2 +
+			      (m->pg_init_delay_secs !=
+					DM_PG_INIT_RETRY_DELAY) * 2);
 		if (m->queue_if_no_path)
 			DMEMIT("queue_if_no_path ");
 		if (m->pg_init_retries)
 			DMEMIT("pg_init_retries %u ", m->pg_init_retries);
+		if (m->pg_init_delay_secs != DM_PG_INIT_RETRY_DELAY)
+			DMEMIT("pg_init_delay_secs %u ", m->pg_init_delay_secs);
 	}
 
 	if (hwh->type && hwh->type->status)