Sophie

Sophie

distrib > Scientific%20Linux > 5x > x86_64 > by-pkgid > 89877e42827f16fa5f86b1df0c2860b1 > files > 1942

kernel-2.6.18-128.1.10.el5.src.rpm

From: Jan Glauber <jglauber@redhat.com>
Subject: [RHEL5 PATCH s390] common i/o layer fixes from upstream
Date: Thu, 30 Nov 2006 10:42:56 +0100
Bugzilla: 217799
Message-Id: <1164879776.5610.21.camel@bender>
Changelog: s390: common i/o layer fixes


BZ 217799. 

There are some cio patches that missed 2.6.18 that should go into RHEL5.
Without these patches setting paths offline/online wont work, therefore
a device qualification for RHEL5 would fail.

All patches are upstream (2.6.19) and tested by IBM.

Changelog:
- css_probe_device() must be called enabled
- 0 is a valid chpid
- incorrect device operational notification
- inaccessible device after CHPID deactivation
- always query all paths on path verification
- subchannel evaluation function must operate with lock
- subchannels remain in no-path state although channel paths reappeared
- add timeout for internal operations
- update path groups on logical CHPID changes
- start path verification after I/O finished during grace period

Jan
-- 
jglauber@redhat.com
jang@de.ibm.com

 chsc.c          |   69 +++++++++++++------
 cio.c           |    7 -
 css.c           |  202 ++++++++++++++++++++++++++++----------------------------
 css.h           |    2 
 device.c        |    3 
 device_fsm.c    |   65 +++++++++++++-----
 device_id.c     |   12 ++-
 device_ops.c    |   27 ++++++-
 device_pgid.c   |  125 ++++++++++++++++++++++------------
 device_status.c |    3 
 10 files changed, 324 insertions(+), 191 deletions(-)

diff -urNp linux-2.6.18.s390x/drivers/s390/cio/chsc.c linux-2.6.18.s390x.cio/drivers/s390/cio/chsc.c
--- linux-2.6.18.s390x/drivers/s390/cio/chsc.c	2006-11-29 16:40:29.000000000 +0100
+++ linux-2.6.18.s390x.cio/drivers/s390/cio/chsc.c	2006-11-29 18:05:46.000000000 +0100
@@ -200,11 +200,13 @@ css_get_ssd_info(struct subchannel *sch)
 	spin_unlock_irq(&sch->lock);
 	free_page((unsigned long)page);
 	if (!ret) {
-		int j, chpid;
+		int j, chpid, mask;
 		/* Allocate channel path structures, if needed. */
 		for (j = 0; j < 8; j++) {
+			mask = 0x80 >> j;
 			chpid = sch->ssd_info.chpid[j];
-			if (chpid && (get_chp_status(chpid) < 0))
+			if ((sch->schib.pmcw.pim & mask) &&
+			    (get_chp_status(chpid) < 0))
 			    new_channel_path(chpid);
 		}
 	}
@@ -222,13 +224,15 @@ s390_subchannel_remove_chpid(struct devi
 
 	sch = to_subchannel(dev);
 	chpid = data;
-	for (j = 0; j < 8; j++)
-		if (sch->schib.pmcw.chpid[j] == chpid->id)
+	for (j = 0; j < 8; j++) {
+		mask = 0x80 >> j;
+		if ((sch->schib.pmcw.pim & mask) &&
+		    (sch->schib.pmcw.chpid[j] == chpid->id))
 			break;
+	}
 	if (j >= 8)
 		return 0;
 
-	mask = 0x80 >> j;
 	spin_lock_irq(&sch->lock);
 
 	stsch(sch->schid, &schib);
@@ -247,6 +251,8 @@ s390_subchannel_remove_chpid(struct devi
 		cc = cio_clear(sch);
 		if (cc == -ENODEV)
 			goto out_unreg;
+		/* Request retry of internal operation. */
+		device_set_intretry(sch);
 		/* Call handler. */
 		if (sch->driver && sch->driver->termination)
 			sch->driver->termination(&sch->dev);
@@ -256,7 +262,7 @@ s390_subchannel_remove_chpid(struct devi
 	/* trigger path verification. */
 	if (sch->driver && sch->driver->verify)
 		sch->driver->verify(&sch->dev);
-	else if (sch->vpm == mask)
+	else if (sch->lpm == mask)
 		goto out_unreg;
 out_unlock:
 	spin_unlock_irq(&sch->lock);
@@ -378,6 +384,7 @@ __s390_process_res_acc(struct subchannel
 
 	if (chp_mask == 0) {
 		spin_unlock_irq(&sch->lock);
+		put_device(&sch->dev);
 		return 0;
 	}
 	old_lpm = sch->lpm;
@@ -392,7 +399,7 @@ __s390_process_res_acc(struct subchannel
 
 	spin_unlock_irq(&sch->lock);
 	put_device(&sch->dev);
-	return (res_data->fla_mask == 0xffff) ? -ENODEV : 0;
+	return 0;
 }
 

@@ -619,7 +626,7 @@ __chp_add_new_sch(struct subchannel_id s
 static int
 __chp_add(struct subchannel_id schid, void *data)
 {
-	int i;
+	int i, mask;
 	struct channel_path *chp;
 	struct subchannel *sch;
 
@@ -629,8 +636,10 @@ __chp_add(struct subchannel_id schid, vo
 		/* Check if the subchannel is now available. */
 		return __chp_add_new_sch(schid);
 	spin_lock_irq(&sch->lock);
-	for (i=0; i<8; i++)
-		if (sch->schib.pmcw.chpid[i] == chp->id) {
+	for (i=0; i<8; i++) {
+		mask = 0x80 >> i;
+		if ((sch->schib.pmcw.pim & mask) &&
+		    (sch->schib.pmcw.chpid[i] == chp->id)) {
 			if (stsch(sch->schid, &sch->schib) != 0) {
 				/* Endgame. */
 				spin_unlock_irq(&sch->lock);
@@ -638,6 +647,7 @@ __chp_add(struct subchannel_id schid, vo
 			}
 			break;
 		}
+	}
 	if (i==8) {
 		spin_unlock_irq(&sch->lock);
 		return 0;
@@ -645,7 +655,7 @@ __chp_add(struct subchannel_id schid, vo
 	sch->lpm = ((sch->schib.pmcw.pim &
 		     sch->schib.pmcw.pam &
 		     sch->schib.pmcw.pom)
-		    | 0x80 >> i) & sch->opm;
+		    | mask) & sch->opm;
 
 	if (sch->driver && sch->driver->verify)
 		sch->driver->verify(&sch->dev);
@@ -700,23 +710,37 @@ chp_process_crw(int chpid, int on)
 }
 
 static inline int
-__check_for_io_and_kill(struct subchannel *sch, int index)
+check_for_io_on_path(struct subchannel *sch, int index)
 {
 	int cc;
 
-	if (!device_is_online(sch))
-		/* cio could be doing I/O. */
-		return 0;
 	cc = stsch(sch->schid, &sch->schib);
 	if (cc)
 		return 0;
-	if (sch->schib.scsw.actl && sch->schib.pmcw.lpum == (0x80 >> index)) {
-		device_set_waiting(sch);
+	if (sch->schib.scsw.actl && sch->schib.pmcw.lpum == (0x80 >> index))
 		return 1;
-	}
 	return 0;
 }
 
+static void
+terminate_internal_io(struct subchannel *sch)
+{
+	if (cio_clear(sch)) {
+		/* Recheck device in case clear failed */
+		sch->lpm = 0;
+		if (css_enqueue_subchannel_slow(sch->schid)) {
+			css_clear_subchannel_slow_list();
+			need_rescan = 1;
+		}
+		return;
+	}
+	/* Request retry of internal operation. */
+	device_set_intretry(sch);
+	/* Call handler. */
+	if (sch->driver && sch->driver->termination)
+		sch->driver->termination(&sch->dev);
+}
+
 static inline void
 __s390_subchannel_vary_chpid(struct subchannel *sch, __u8 chpid, int on)
 {
@@ -747,7 +771,14 @@ __s390_subchannel_vary_chpid(struct subc
 			 * can successfully terminate, even using the
 			 * just varied off path. Then kill it.
 			 */
-			if (!__check_for_io_and_kill(sch, chp) && !sch->lpm) {
+			if (check_for_io_on_path(sch, chp)) {
+				if (device_is_online(sch))
+					/* Wait for I/O to finish */
+					device_set_waiting(sch);
+				else
+					/* Kill and retry internal I/O */
+					terminate_internal_io(sch);
+			} else if (!sch->lpm) {
 				if (css_enqueue_subchannel_slow(sch->schid)) {
 					css_clear_subchannel_slow_list();
 					need_rescan = 1;
diff -urNp linux-2.6.18.s390x/drivers/s390/cio/cio.c linux-2.6.18.s390x.cio/drivers/s390/cio/cio.c
--- linux-2.6.18.s390x/drivers/s390/cio/cio.c	2006-11-29 16:40:29.000000000 +0100
+++ linux-2.6.18.s390x.cio/drivers/s390/cio/cio.c	2006-11-29 18:05:46.000000000 +0100
@@ -192,7 +192,7 @@ cio_start_key (struct subchannel *sch,	/
 	sch->orb.pfch = sch->options.prefetch == 0;
 	sch->orb.spnd = sch->options.suspend;
 	sch->orb.ssic = sch->options.suspend && sch->options.inter;
-	sch->orb.lpm = (lpm != 0) ? (lpm & sch->opm) : sch->lpm;
+	sch->orb.lpm = (lpm != 0) ? lpm : sch->lpm;
 #ifdef CONFIG_64BIT
 	/*
 	 * for 64 bit we always support 64 bit IDAWs with 4k page size only
@@ -570,10 +570,7 @@ cio_validate_subchannel (struct subchann
 	sch->opm = 0xff;
 	if (!cio_is_console(sch->schid))
 		chsc_validate_chpids(sch);
-	sch->lpm = sch->schib.pmcw.pim &
-		sch->schib.pmcw.pam &
-		sch->schib.pmcw.pom &
-		sch->opm;
+	sch->lpm = sch->schib.pmcw.pam & sch->opm;
 
 	CIO_DEBUG(KERN_INFO, 0,
 		  "Detected device %04x on subchannel 0.%x.%04X"
diff -urNp linux-2.6.18.s390x/drivers/s390/cio/css.c linux-2.6.18.s390x.cio/drivers/s390/cio/css.c
--- linux-2.6.18.s390x/drivers/s390/cio/css.c	2006-11-29 16:40:29.000000000 +0100
+++ linux-2.6.18.s390x.cio/drivers/s390/cio/css.c	2006-11-29 18:05:46.000000000 +0100
@@ -182,136 +182,140 @@ get_subchannel_by_schid(struct subchanne
 	return dev ? to_subchannel(dev) : NULL;
 }
 
-
-static inline int
-css_get_subchannel_status(struct subchannel *sch, struct subchannel_id schid)
+static inline int css_get_subchannel_status(struct subchannel *sch)
 {
 	struct schib schib;
-	int cc;
 
-	cc = stsch(schid, &schib);
-	if (cc)
-		return CIO_GONE;
-	if (!schib.pmcw.dnv)
+	if (stsch(sch->schid, &schib) || !schib.pmcw.dnv)
 		return CIO_GONE;
-	if (sch && sch->schib.pmcw.dnv &&
-	    (schib.pmcw.dev != sch->schib.pmcw.dev))
+	if (sch->schib.pmcw.dnv && (schib.pmcw.dev != sch->schib.pmcw.dev))
 		return CIO_REVALIDATE;
-	if (sch && !sch->lpm)
+	if (!sch->lpm)
 		return CIO_NO_PATH;
 	return CIO_OPER;
 }
-	
-static int
-css_evaluate_subchannel(struct subchannel_id schid, int slow)
+
+static int css_evaluate_known_subchannel(struct subchannel *sch, int slow)
 {
 	int event, ret, disc;
-	struct subchannel *sch;
 	unsigned long flags;
+	enum { NONE, UNREGISTER, UNREGISTER_PROBE, REPROBE } action;
 
-	sch = get_subchannel_by_schid(schid);
-	disc = sch ? device_is_disconnected(sch) : 0;
+	spin_lock_irqsave(&sch->lock, flags);
+	disc = device_is_disconnected(sch);
 	if (disc && slow) {
-		if (sch)
-			put_device(&sch->dev);
-		return 0; /* Already processed. */
+		/* Disconnected devices are evaluated directly only.*/
+		spin_unlock_irqrestore(&sch->lock, flags);
+		return 0;
 	}
-	/*
-	 * We've got a machine check, so running I/O won't get an interrupt.
-	 * Kill any pending timers.
-	 */
-	if (sch)
-		device_kill_pending_timer(sch);
+	/* No interrupt after machine check - kill pending timers. */
+	device_kill_pending_timer(sch);
 	if (!disc && !slow) {
-		if (sch)
-			put_device(&sch->dev);
-		return -EAGAIN; /* Will be done on the slow path. */
+		/* Non-disconnected devices are evaluated on the slow path. */
+		spin_unlock_irqrestore(&sch->lock, flags);
+		return -EAGAIN;
 	}
-	event = css_get_subchannel_status(sch, schid);
+	event = css_get_subchannel_status(sch);
 	CIO_MSG_EVENT(4, "Evaluating schid 0.%x.%04x, event %d, %s, %s path.\n",
-		      schid.ssid, schid.sch_no, event,
-		      sch?(disc?"disconnected":"normal"):"unknown",
-		      slow?"slow":"fast");
+		      sch->schid.ssid, sch->schid.sch_no, event,
+		      disc ? "disconnected" : "normal",
+		      slow ? "slow" : "fast");
+	/* Analyze subchannel status. */
+	action = NONE;
 	switch (event) {
 	case CIO_NO_PATH:
-	case CIO_GONE:
-		if (!sch) {
-			/* Never used this subchannel. Ignore. */
-			ret = 0;
+		if (disc) {
+			/* Check if paths have become available. */
+			action = REPROBE;
 			break;
 		}
-		if (disc && (event == CIO_NO_PATH)) {
-			/*
-			 * Uargh, hack again. Because we don't get a machine
-			 * check on configure on, our path bookkeeping can
-			 * be out of date here (it's fine while we only do
-			 * logical varying or get chsc machine checks). We
-			 * need to force reprobing or we might miss devices
-			 * coming operational again. It won't do harm in real
-			 * no path situations.
-			 */
-			spin_lock_irqsave(&sch->lock, flags);
-			device_trigger_reprobe(sch);
+		/* fall through */
+	case CIO_GONE:
+		/* Prevent unwanted effects when opening lock. */
+		cio_disable_subchannel(sch);
+		device_set_disconnected(sch);
+		/* Ask driver what to do with device. */
+		action = UNREGISTER;
+		if (sch->driver && sch->driver->notify) {
 			spin_unlock_irqrestore(&sch->lock, flags);
-			ret = 0;
-			break;
-		}
-		if (sch->driver && sch->driver->notify &&
-		    sch->driver->notify(&sch->dev, event)) {
-			cio_disable_subchannel(sch);
-			device_set_disconnected(sch);
-			ret = 0;
-			break;
+			ret = sch->driver->notify(&sch->dev, event);
+			spin_lock_irqsave(&sch->lock, flags);
+			if (ret)
+				action = NONE;
 		}
-		/*
-		 * Unregister subchannel.
-		 * The device will be killed automatically.
-		 */
-		cio_disable_subchannel(sch);
-		css_sch_device_unregister(sch);
-		/* Reset intparm to zeroes. */
-		sch->schib.pmcw.intparm = 0;
-		cio_modify(sch);
-		put_device(&sch->dev);
-		ret = 0;
 		break;
 	case CIO_REVALIDATE:
-		/* 
-		 * Revalidation machine check. Sick.
-		 * We don't notify the driver since we have to throw the device
-		 * away in any case.
-		 */
-		if (!disc) {
-			css_sch_device_unregister(sch);
-			/* Reset intparm to zeroes. */
-			sch->schib.pmcw.intparm = 0;
-			cio_modify(sch);
-			put_device(&sch->dev);
-			ret = css_probe_device(schid);
-		} else {
-			/*
-			 * We can't immediately deregister the disconnected
-			 * device since it might block.
-			 */
-			spin_lock_irqsave(&sch->lock, flags);
-			device_trigger_reprobe(sch);
-			spin_unlock_irqrestore(&sch->lock, flags);
-			ret = 0;
-		}
+		/* Device will be removed, so no notify necessary. */
+		if (disc)
+			/* Reprobe because immediate unregister might block. */
+			action = REPROBE;
+		else
+			action = UNREGISTER_PROBE;
 		break;
 	case CIO_OPER:
-		if (disc) {
-			spin_lock_irqsave(&sch->lock, flags);
+		if (disc)
 			/* Get device operational again. */
-			device_trigger_reprobe(sch);
-			spin_unlock_irqrestore(&sch->lock, flags);
-		}
-		ret = sch ? 0 : css_probe_device(schid);
+			action = REPROBE;
+		break;
+	}
+	/* Perform action. */
+	ret = 0;
+	switch (action) {
+	case UNREGISTER:
+	case UNREGISTER_PROBE:
+		/* Unregister device (will use subchannel lock). */
+		spin_unlock_irqrestore(&sch->lock, flags);
+		css_sch_device_unregister(sch);
+		spin_lock_irqsave(&sch->lock, flags);
+
+		/* Reset intparm to zeroes. */
+		sch->schib.pmcw.intparm = 0;
+		cio_modify(sch);
+		break;
+	case REPROBE:
+		device_trigger_reprobe(sch);
 		break;
 	default:
-		BUG();
-		ret = 0;
+		break;
 	}
+	spin_unlock_irqrestore(&sch->lock, flags);
+	/* Probe if necessary. */
+	if (action == UNREGISTER_PROBE)
+		ret = css_probe_device(sch->schid);
+
+	return ret;
+}
+
+static int css_evaluate_new_subchannel(struct subchannel_id schid, int slow)
+{
+	struct schib schib;
+
+	if (!slow) {
+		/* Will be done on the slow path. */
+		return -EAGAIN;
+	}
+	if (stsch(schid, &schib) || !schib.pmcw.dnv) {
+		/* Unusable - ignore. */
+		return 0;
+	}
+	CIO_MSG_EVENT(4, "Evaluating schid 0.%x.%04x, event %d, unknown, "
+			 "slow path.\n", schid.ssid, schid.sch_no, CIO_OPER);
+
+	return css_probe_device(schid);
+}
+
+static int css_evaluate_subchannel(struct subchannel_id schid, int slow)
+{
+	struct subchannel *sch;
+	int ret;
+
+	sch = get_subchannel_by_schid(schid);
+	if (sch) {
+		ret = css_evaluate_known_subchannel(sch, slow);
+		put_device(&sch->dev);
+	} else
+		ret = css_evaluate_new_subchannel(schid, slow);
+
 	return ret;
 }
 
diff -urNp linux-2.6.18.s390x/drivers/s390/cio/css.h linux-2.6.18.s390x.cio/drivers/s390/cio/css.h
--- linux-2.6.18.s390x/drivers/s390/cio/css.h	2006-11-29 16:40:27.000000000 +0100
+++ linux-2.6.18.s390x.cio/drivers/s390/cio/css.h	2006-11-29 18:05:46.000000000 +0100
@@ -95,6 +95,7 @@ struct ccw_device_private {
 		unsigned int donotify:1;    /* call notify function */
 		unsigned int recog_done:1;  /* dev. recog. complete */
 		unsigned int fake_irb:1;    /* deliver faked irb */
+		unsigned int intretry:1;    /* retry internal operation */
 	} __attribute__((packed)) flags;
 	unsigned long intparm;	/* user interruption parameter */
 	struct qdio_irq *qdio_data;
@@ -172,6 +173,7 @@ void device_trigger_reprobe(struct subch
 /* Helper functions for vary on/off. */
 int device_is_online(struct subchannel *);
 void device_set_waiting(struct subchannel *);
+void device_set_intretry(struct subchannel *sch);
 
 /* Machine check helper function. */
 void device_kill_pending_timer(struct subchannel *);
diff -urNp linux-2.6.18.s390x/drivers/s390/cio/device.c linux-2.6.18.s390x.cio/drivers/s390/cio/device.c
--- linux-2.6.18.s390x/drivers/s390/cio/device.c	2006-11-29 18:00:57.000000000 +0100
+++ linux-2.6.18.s390x.cio/drivers/s390/cio/device.c	2006-11-29 18:05:46.000000000 +0100
@@ -951,6 +951,9 @@ io_subchannel_ioterm(struct device *dev)
 	cdev = dev->driver_data;
 	if (!cdev)
 		return;
+	/* Internal I/O will be retried by the interrupt handler */
+	if (cdev->private->flags.intretry)
+		return;
 	cdev->private->state = DEV_STATE_CLEAR_VERIFY;
 	if (cdev->handler)
 		cdev->handler(cdev, cdev->private->intparm,
diff -urNp linux-2.6.18.s390x/drivers/s390/cio/device_fsm.c linux-2.6.18.s390x.cio/drivers/s390/cio/device_fsm.c
--- linux-2.6.18.s390x/drivers/s390/cio/device_fsm.c	2006-11-29 16:40:28.000000000 +0100
+++ linux-2.6.18.s390x.cio/drivers/s390/cio/device_fsm.c	2006-11-29 18:05:46.000000000 +0100
@@ -68,9 +68,21 @@ device_set_waiting(struct subchannel *sc
 		return;
 	cdev = sch->dev.driver_data;
 	ccw_device_set_timeout(cdev, 10*HZ);
+	cdev->private->flags.doverify = 1;
 	cdev->private->state = DEV_STATE_WAIT4IO;
 }
 
+void
+device_set_intretry(struct subchannel *sch)
+{
+	struct ccw_device *cdev;
+
+	cdev = sch->dev.driver_data;
+	if (!cdev)
+		return;
+	cdev->private->flags.intretry = 1;
+}
+
 /*
  * Timeout function. It just triggers a DEV_EVENT_TIMEOUT.
  */
@@ -232,10 +244,7 @@ ccw_device_recog_done(struct ccw_device 
 	 */
 	old_lpm = sch->lpm;
 	stsch(sch->schid, &sch->schib);
-	sch->lpm = sch->schib.pmcw.pim &
-		sch->schib.pmcw.pam &
-		sch->schib.pmcw.pom &
-		sch->opm;
+	sch->lpm = sch->schib.pmcw.pam & sch->opm;
 	/* Check since device may again have become not operational. */
 	if (!sch->schib.pmcw.dnv)
 		state = DEV_STATE_NOT_OPER;
@@ -351,6 +360,8 @@ ccw_device_done(struct ccw_device *cdev,
 
 	sch = to_subchannel(cdev->dev.parent);
 
+	ccw_device_set_timeout(cdev, 0);
+
 	if (state != DEV_STATE_ONLINE)
 		cio_disable_subchannel(sch);
 
@@ -454,8 +465,8 @@ ccw_device_sense_pgid_done(struct ccw_de
 		return;
 	}
 	/* Start Path Group verification. */
-	sch->vpm = 0;	/* Start with no path groups set. */
 	cdev->private->state = DEV_STATE_VERIFY;
+	cdev->private->flags.doverify = 0;
 	ccw_device_verify_start(cdev);
 }
 
@@ -555,7 +566,19 @@ ccw_device_nopath_notify(void *data)
 void
 ccw_device_verify_done(struct ccw_device *cdev, int err)
 {
-	cdev->private->flags.doverify = 0;
+	struct subchannel *sch;
+
+	sch = to_subchannel(cdev->dev.parent);
+	/* Update schib - pom may have changed. */
+	stsch(sch->schid, &sch->schib);
+	/* Update lpm with verified path mask. */
+	sch->lpm = sch->vpm;
+	/* Repeat path verification? */
+	if (cdev->private->flags.doverify) {
+		cdev->private->flags.doverify = 0;
+		ccw_device_verify_start(cdev);
+		return;
+	}
 	switch (err) {
 	case -EOPNOTSUPP: /* path grouping not supported, just set online. */
 		cdev->private->options.pgroup = 0;
@@ -576,9 +599,13 @@ ccw_device_verify_done(struct ccw_device
 		}
 		break;
 	case -ETIME:
+		/* Reset oper notify indication after verify error. */
+		cdev->private->flags.donotify = 0;
 		ccw_device_done(cdev, DEV_STATE_BOXED);
 		break;
 	default:
+		/* Reset oper notify indication after verify error. */
+		cdev->private->flags.donotify = 0;
 		PREPARE_WORK(&cdev->private->kick_work,
 			     ccw_device_nopath_notify, (void *)cdev);
 		queue_work(ccw_device_notify_work, &cdev->private->kick_work);
@@ -613,6 +640,7 @@ ccw_device_online(struct ccw_device *cde
 	if (!cdev->private->options.pgroup) {
 		/* Start initial path verification. */
 		cdev->private->state = DEV_STATE_VERIFY;
+		cdev->private->flags.doverify = 0;
 		ccw_device_verify_start(cdev);
 		return 0;
 	}
@@ -659,7 +687,6 @@ ccw_device_offline(struct ccw_device *cd
 	/* Are we doing path grouping? */
 	if (!cdev->private->options.pgroup) {
 		/* No, set state offline immediately. */
-		sch->vpm = 0;
 		ccw_device_done(cdev, DEV_STATE_OFFLINE);
 		return 0;
 	}
@@ -780,6 +807,7 @@ ccw_device_online_verify(struct ccw_devi
 	}
 	/* Device is idle, we can do the path verification. */
 	cdev->private->state = DEV_STATE_VERIFY;
+	cdev->private->flags.doverify = 0;
 	ccw_device_verify_start(cdev);
 }
 
@@ -885,6 +913,12 @@ ccw_device_w4sense(struct ccw_device *cd
 	 * had killed the original request.
 	 */
 	if (irb->scsw.fctl & (SCSW_FCTL_CLEAR_FUNC | SCSW_FCTL_HALT_FUNC)) {
+		/* Retry Basic Sense if requested. */
+		if (cdev->private->flags.intretry) {
+			cdev->private->flags.intretry = 0;
+			ccw_device_do_sense(cdev, irb);
+			return;
+		}
 		cdev->private->flags.dosense = 0;
 		memset(&cdev->private->irb, 0, sizeof(struct irb));
 		ccw_device_accumulate_irb(cdev, irb);
@@ -1001,7 +1035,7 @@ ccw_device_wait4io_irq(struct ccw_device
 		PREPARE_WORK(&cdev->private->kick_work,
 			     ccw_device_nopath_notify, (void *)cdev);
 		queue_work(ccw_device_notify_work, &cdev->private->kick_work);
-	} else if (cdev->private->flags.doverify)
+	} else
 		ccw_device_online_verify(cdev, 0);
 }
 
@@ -1042,9 +1076,9 @@ ccw_device_wait4io_timeout(struct ccw_de
 }
 
 static void
-ccw_device_wait4io_verify(struct ccw_device *cdev, enum dev_event dev_event)
+ccw_device_delay_verify(struct ccw_device *cdev, enum dev_event dev_event)
 {
-	/* When the I/O has terminated, we have to start verification. */
+	/* Start verification after current task finished. */
 	cdev->private->flags.doverify = 1;
 }
 
@@ -1110,10 +1144,7 @@ device_trigger_reprobe(struct subchannel
 	 * The pim, pam, pom values may not be accurate, but they are the best
 	 * we have before performing device selection :/
 	 */
-	sch->lpm = sch->schib.pmcw.pim &
-		sch->schib.pmcw.pam &
-		sch->schib.pmcw.pom &
-		sch->opm;
+	sch->lpm = sch->schib.pmcw.pam & sch->opm;
 	/* Re-set some bits in the pmcw that were lost. */
 	sch->schib.pmcw.isc = 3;
 	sch->schib.pmcw.csense = 1;
@@ -1237,7 +1268,7 @@ fsm_func_t *dev_jumptable[NR_DEV_STATES]
 		[DEV_EVENT_NOTOPER]	= ccw_device_online_notoper,
 		[DEV_EVENT_INTERRUPT]	= ccw_device_verify_irq,
 		[DEV_EVENT_TIMEOUT]	= ccw_device_onoff_timeout,
-		[DEV_EVENT_VERIFY]	= ccw_device_nop,
+		[DEV_EVENT_VERIFY]	= ccw_device_delay_verify,
 	},
 	[DEV_STATE_ONLINE] = {
 		[DEV_EVENT_NOTOPER]	= ccw_device_online_notoper,
@@ -1280,7 +1311,7 @@ fsm_func_t *dev_jumptable[NR_DEV_STATES]
 		[DEV_EVENT_NOTOPER]	= ccw_device_online_notoper,
 		[DEV_EVENT_INTERRUPT]	= ccw_device_wait4io_irq,
 		[DEV_EVENT_TIMEOUT]	= ccw_device_wait4io_timeout,
-		[DEV_EVENT_VERIFY]	= ccw_device_wait4io_verify,
+		[DEV_EVENT_VERIFY]	= ccw_device_delay_verify,
 	},
 	[DEV_STATE_QUIESCE] = {
 		[DEV_EVENT_NOTOPER]	= ccw_device_quiesce_done,
@@ -1293,7 +1324,7 @@ fsm_func_t *dev_jumptable[NR_DEV_STATES]
 		[DEV_EVENT_NOTOPER]	= ccw_device_nop,
 		[DEV_EVENT_INTERRUPT]	= ccw_device_start_id,
 		[DEV_EVENT_TIMEOUT]	= ccw_device_bug,
-		[DEV_EVENT_VERIFY]	= ccw_device_nop,
+		[DEV_EVENT_VERIFY]	= ccw_device_start_id,
 	},
 	[DEV_STATE_DISCONNECTED_SENSE_ID] = {
 		[DEV_EVENT_NOTOPER]	= ccw_device_recog_notoper,
diff -urNp linux-2.6.18.s390x/drivers/s390/cio/device_id.c linux-2.6.18.s390x.cio/drivers/s390/cio/device_id.c
--- linux-2.6.18.s390x/drivers/s390/cio/device_id.c	2006-11-29 16:40:27.000000000 +0100
+++ linux-2.6.18.s390x.cio/drivers/s390/cio/device_id.c	2006-11-29 18:05:46.000000000 +0100
@@ -197,6 +197,8 @@ __ccw_device_sense_id_start(struct ccw_d
 		if ((sch->opm & cdev->private->imask) != 0 &&
 		    cdev->private->iretry > 0) {
 			cdev->private->iretry--;
+			/* Reset internal retry indication. */
+			cdev->private->flags.intretry = 0;
 			ret = cio_start (sch, cdev->private->iccws,
 					 cdev->private->imask);
 			/* ret is 0, -EBUSY, -EACCES or -ENODEV */
@@ -243,8 +245,14 @@ ccw_device_check_sense_id(struct ccw_dev
 		return 0; /* Success */
 	}
 	/* Check the error cases. */
-	if (irb->scsw.fctl & (SCSW_FCTL_HALT_FUNC | SCSW_FCTL_CLEAR_FUNC))
-		return -ETIME;
+	if (irb->scsw.fctl & (SCSW_FCTL_HALT_FUNC | SCSW_FCTL_CLEAR_FUNC)) {
+		/* Retry Sense ID if requested. */
+		if (cdev->private->flags.intretry) {
+			cdev->private->flags.intretry = 0;
+			return -EAGAIN;
+		} else
+			return -ETIME;
+	}
 	if (irb->esw.esw0.erw.cons && (irb->ecw[0] & SNS0_CMD_REJECT)) {
 		/*
 		 * if the device doesn't support the SenseID
diff -urNp linux-2.6.18.s390x/drivers/s390/cio/device_ops.c linux-2.6.18.s390x.cio/drivers/s390/cio/device_ops.c
--- linux-2.6.18.s390x/drivers/s390/cio/device_ops.c	2006-11-29 16:40:29.000000000 +0100
+++ linux-2.6.18.s390x.cio/drivers/s390/cio/device_ops.c	2006-11-29 18:05:46.000000000 +0100
@@ -96,6 +96,12 @@ ccw_device_start_key(struct ccw_device *
 	ret = cio_set_options (sch, flags);
 	if (ret)
 		return ret;
+	/* Adjust requested path mask to excluded varied off paths. */
+	if (lpm) {
+		lpm &= sch->opm;
+		if (lpm == 0)
+			return -EACCES;
+	}
 	ret = cio_start_key (sch, cpa, lpm, key);
 	if (ret == 0)
 		cdev->private->intparm = intparm;
@@ -210,6 +216,9 @@ ccw_device_call_handler(struct ccw_devic
 	      (stctl & SCSW_STCTL_PRIM_STATUS)))
 		return 0;
 
+	/* Clear pending timers for device driver initiated I/O. */
+	if (ending_status)
+		ccw_device_set_timeout(cdev, 0);
 	/*
 	 * Now we are ready to call the device driver interrupt handler.
 	 */
@@ -250,7 +259,7 @@ ccw_device_get_path_mask(struct ccw_devi
 	if (!sch)
 		return 0;
 	else
-		return sch->vpm;
+		return sch->lpm;
 }
 
 static void
@@ -276,10 +285,10 @@ ccw_device_wake_up(struct ccw_device *cd
 		 * or intervention required. Also check for long busy
 		 * conditions.
 		 */
-		 if (cdev->private->flags.doverify ||
+		if (cdev->private->flags.doverify ||
 			 cdev->private->state == DEV_STATE_VERIFY)
 			 cdev->private->intparm = -EAGAIN;
-		 if ((irb->scsw.dstat & DEV_STAT_UNIT_CHECK) &&
+		else if ((irb->scsw.dstat & DEV_STAT_UNIT_CHECK) &&
 		     !(irb->ecw[0] &
 		       (SNS0_CMD_REJECT | SNS0_INTERVENTION_REQ)))
 			 cdev->private->intparm = -EAGAIN;
@@ -303,8 +312,11 @@ __ccw_device_retry_loop(struct ccw_devic
 
 	sch = to_subchannel(cdev->dev.parent);
 	do {
+		ccw_device_set_timeout(cdev, 60 * HZ);
 		ret = cio_start (sch, ccw, lpm);
-		if ((ret == -EBUSY) || (ret == -EACCES)) {
+		if (ret != 0)
+			ccw_device_set_timeout(cdev, 0);
+		if (ret == -EBUSY) {
 			/* Try again later. */
 			spin_unlock_irq(&sch->lock);
 			msleep(10);
@@ -433,6 +445,13 @@ read_conf_data_lpm (struct ccw_device *c
 	if (!ciw || ciw->cmd == 0)
 		return -EOPNOTSUPP;
 
+	/* Adjust requested path mask to excluded varied off paths. */
+	if (lpm) {
+		lpm &= sch->opm;
+		if (lpm == 0)
+			return -EACCES;
+	}
+
 	rcd_ccw = kzalloc(sizeof(struct ccw1), GFP_KERNEL | GFP_DMA);
 	if (!rcd_ccw)
 		return -ENOMEM;
diff -urNp linux-2.6.18.s390x/drivers/s390/cio/device_pgid.c linux-2.6.18.s390x.cio/drivers/s390/cio/device_pgid.c
--- linux-2.6.18.s390x/drivers/s390/cio/device_pgid.c	2006-11-29 16:40:28.000000000 +0100
+++ linux-2.6.18.s390x.cio/drivers/s390/cio/device_pgid.c	2006-11-29 18:05:46.000000000 +0100
@@ -71,6 +71,8 @@ __ccw_device_sense_pgid_start(struct ccw
 		ccw->cda = (__u32) __pa (&cdev->private->pgid[i]);
 		if (cdev->private->iretry > 0) {
 			cdev->private->iretry--;
+			/* Reset internal retry indication. */
+			cdev->private->flags.intretry = 0;
 			ret = cio_start (sch, cdev->private->iccws, 
 					 cdev->private->imask);
 			/* ret is 0, -EBUSY, -EACCES or -ENODEV */
@@ -96,6 +98,9 @@ ccw_device_sense_pgid_start(struct ccw_d
 {
 	int ret;
 
+	/* Set a timeout of 60s */
+	ccw_device_set_timeout(cdev, 60*HZ);
+
 	cdev->private->state = DEV_STATE_SENSE_PGID;
 	cdev->private->imask = 0x80;
 	cdev->private->iretry = 5;
@@ -118,8 +123,14 @@ __ccw_device_check_sense_pgid(struct ccw
 
 	sch = to_subchannel(cdev->dev.parent);
 	irb = &cdev->private->irb;
-	if (irb->scsw.fctl & (SCSW_FCTL_HALT_FUNC | SCSW_FCTL_CLEAR_FUNC))
-		return -ETIME;
+	if (irb->scsw.fctl & (SCSW_FCTL_HALT_FUNC | SCSW_FCTL_CLEAR_FUNC)) {
+		/* Retry Sense PGID if requested. */
+		if (cdev->private->flags.intretry) {
+			cdev->private->flags.intretry = 0;
+			return -EAGAIN;
+		} else
+			return -ETIME;
+	}
 	if (irb->esw.esw0.erw.cons &&
 	    (irb->ecw[0]&(SNS0_CMD_REJECT|SNS0_INTERVENTION_REQ))) {
 		/*
@@ -245,18 +256,19 @@ __ccw_device_do_pgid(struct ccw_device *
 	memset(&cdev->private->irb, 0, sizeof(struct irb));
 
 	/* Try multiple times. */
-	ret = -ENODEV;
+	ret = -EACCES;
 	if (cdev->private->iretry > 0) {
 		cdev->private->iretry--;
+		/* Reset internal retry indication. */
+		cdev->private->flags.intretry = 0;
 		ret = cio_start (sch, cdev->private->iccws,
 				 cdev->private->imask);
-		/* ret is 0, -EBUSY, -EACCES or -ENODEV */
-		if ((ret != -EACCES) && (ret != -ENODEV))
+		/* We expect an interrupt in case of success or busy
+		 * indication. */
+		if ((ret == 0) || (ret == -EBUSY))
 			return ret;
 	}
-	/* PGID command failed on this path. Switch it off. */
-	sch->lpm &= ~cdev->private->imask;
-	sch->vpm &= ~cdev->private->imask;
+	/* PGID command failed on this path. */
 	CIO_MSG_EVENT(2, "SPID - Device %04x on Subchannel "
 		      "0.%x.%04x, lpm %02X, became 'not operational'\n",
 		      cdev->private->devno, sch->schid.ssid,
@@ -286,18 +298,19 @@ static int __ccw_device_do_nop(struct cc
 	memset(&cdev->private->irb, 0, sizeof(struct irb));
 
 	/* Try multiple times. */
-	ret = -ENODEV;
+	ret = -EACCES;
 	if (cdev->private->iretry > 0) {
 		cdev->private->iretry--;
+		/* Reset internal retry indication. */
+		cdev->private->flags.intretry = 0;
 		ret = cio_start (sch, cdev->private->iccws,
 				 cdev->private->imask);
-		/* ret is 0, -EBUSY, -EACCES or -ENODEV */
-		if ((ret != -EACCES) && (ret != -ENODEV))
+		/* We expect an interrupt in case of success or busy
+		 * indication. */
+		if ((ret == 0) || (ret == -EBUSY))
 			return ret;
 	}
-	/* nop command failed on this path. Switch it off. */
-	sch->lpm &= ~cdev->private->imask;
-	sch->vpm &= ~cdev->private->imask;
+	/* nop command failed on this path. */
 	CIO_MSG_EVENT(2, "NOP - Device %04x on Subchannel "
 		      "0.%x.%04x, lpm %02X, became 'not operational'\n",
 		      cdev->private->devno, sch->schid.ssid,
@@ -318,8 +331,14 @@ __ccw_device_check_pgid(struct ccw_devic
 
 	sch = to_subchannel(cdev->dev.parent);
 	irb = &cdev->private->irb;
-	if (irb->scsw.fctl & (SCSW_FCTL_HALT_FUNC | SCSW_FCTL_CLEAR_FUNC))
-		return -ETIME;
+	if (irb->scsw.fctl & (SCSW_FCTL_HALT_FUNC | SCSW_FCTL_CLEAR_FUNC)) {
+		/* Retry Set Path Group ID if requested. */
+		if (cdev->private->flags.intretry) {
+			cdev->private->flags.intretry = 0;
+			return -EAGAIN;
+		} else
+			return -ETIME;
+	}
 	if (irb->esw.esw0.erw.cons) {
 		if (irb->ecw[0] & SNS0_CMD_REJECT)
 			return -EOPNOTSUPP;
@@ -356,8 +375,14 @@ static int __ccw_device_check_nop(struct
 
 	sch = to_subchannel(cdev->dev.parent);
 	irb = &cdev->private->irb;
-	if (irb->scsw.fctl & (SCSW_FCTL_HALT_FUNC | SCSW_FCTL_CLEAR_FUNC))
-		return -ETIME;
+	if (irb->scsw.fctl & (SCSW_FCTL_HALT_FUNC | SCSW_FCTL_CLEAR_FUNC)) {
+		/* Retry NOP if requested. */
+		if (cdev->private->flags.intretry) {
+			cdev->private->flags.intretry = 0;
+			return -EAGAIN;
+		} else
+			return -ETIME;
+	}
 	if (irb->scsw.cc == 3) {
 		CIO_MSG_EVENT(2, "NOP - Device %04x on Subchannel 0.%x.%04x,"
 			      " lpm %02X, became 'not operational'\n",
@@ -372,27 +397,32 @@ static void
 __ccw_device_verify_start(struct ccw_device *cdev)
 {
 	struct subchannel *sch;
-	__u8 imask, func;
+	__u8 func;
 	int ret;
 
 	sch = to_subchannel(cdev->dev.parent);
-	while (sch->vpm != sch->lpm) {
-		/* Find first unequal bit in vpm vs. lpm */
-		for (imask = 0x80; imask != 0; imask >>= 1)
-			if ((sch->vpm & imask) != (sch->lpm & imask))
-				break;
-		cdev->private->imask = imask;
+	/* Repeat for all paths. */
+	for (; cdev->private->imask; cdev->private->imask >>= 1,
+				     cdev->private->iretry = 5) {
+		if ((cdev->private->imask & sch->schib.pmcw.pam) == 0)
+			/* Path not available, try next. */
+			continue;
 		if (cdev->private->options.pgroup) {
-			func = (sch->vpm & imask) ?
-				SPID_FUNC_RESIGN : SPID_FUNC_ESTABLISH;
+			if (sch->opm & cdev->private->imask)
+				func = SPID_FUNC_ESTABLISH;
+			else
+				func = SPID_FUNC_RESIGN;
 			ret = __ccw_device_do_pgid(cdev, func);
 		} else
 			ret = __ccw_device_do_nop(cdev);
+		/* We expect an interrupt in case of success or busy
+		 * indication. */
 		if (ret == 0 || ret == -EBUSY)
 			return;
-		cdev->private->iretry = 5;
+		/* Permanent path failure, try next. */
 	}
-	ccw_device_verify_done(cdev, (sch->lpm != 0) ? 0 : -ENODEV);
+	/* Done with all paths. */
+	ccw_device_verify_done(cdev, (sch->vpm != 0) ? 0 : -ENODEV);
 }
 		
 /*
@@ -421,14 +451,14 @@ ccw_device_verify_irq(struct ccw_device 
 	else
 		ret = __ccw_device_check_nop(cdev);
 	memset(&cdev->private->irb, 0, sizeof(struct irb));
+
 	switch (ret) {
 	/* 0, -ETIME, -EAGAIN, -EOPNOTSUPP or -EACCES */
 	case 0:
-		/* Establish or Resign Path Group done. Update vpm. */
-		if ((sch->lpm & cdev->private->imask) != 0)
-			sch->vpm |= cdev->private->imask;
-		else
-			sch->vpm &= ~cdev->private->imask;
+		/* Path verification ccw finished successfully, update lpm. */
+		sch->vpm |= sch->opm & cdev->private->imask;
+		/* Go on with next path. */
+		cdev->private->imask >>= 1;
 		cdev->private->iretry = 5;
 		__ccw_device_verify_start(cdev);
 		break;
@@ -441,6 +471,10 @@ ccw_device_verify_irq(struct ccw_device 
 			cdev->private->options.pgroup = 0;
 		else
 			cdev->private->flags.pgid_single = 1;
+		/* Retry */
+		sch->vpm = 0;
+		cdev->private->imask = 0x80;
+		cdev->private->iretry = 5;
 		/* fall through. */
 	case -EAGAIN:		/* Try again. */
 		__ccw_device_verify_start(cdev);
@@ -449,8 +483,7 @@ ccw_device_verify_irq(struct ccw_device 
 		ccw_device_verify_done(cdev, -ETIME);
 		break;
 	case -EACCES:		/* channel is not operational. */
-		sch->lpm &= ~cdev->private->imask;
-		sch->vpm &= ~cdev->private->imask;
+		cdev->private->imask >>= 1;
 		cdev->private->iretry = 5;
 		__ccw_device_verify_start(cdev);
 		break;
@@ -463,19 +496,19 @@ ccw_device_verify_start(struct ccw_devic
 	struct subchannel *sch = to_subchannel(cdev->dev.parent);
 
 	cdev->private->flags.pgid_single = 0;
+	cdev->private->imask = 0x80;
 	cdev->private->iretry = 5;
-	/*
-	 * Update sch->lpm with current values to catch paths becoming
-	 * available again.
-	 */
+
+	/* Start with empty vpm. */
+	sch->vpm = 0;
+
+	/* Get current pam. */
 	if (stsch(sch->schid, &sch->schib)) {
 		ccw_device_verify_done(cdev, -ENODEV);
 		return;
 	}
-	sch->lpm = sch->schib.pmcw.pim &
-		sch->schib.pmcw.pam &
-		sch->schib.pmcw.pom &
-		sch->opm;
+	/* After 60s path verification is considered to have failed. */
+	ccw_device_set_timeout(cdev, 60*HZ);
 	__ccw_device_verify_start(cdev);
 }
 
@@ -524,7 +557,6 @@ ccw_device_disband_irq(struct ccw_device
 	switch (ret) {
 	/* 0, -ETIME, -EAGAIN, -EOPNOTSUPP or -EACCES */
 	case 0:			/* disband successful. */
-		sch->vpm = 0;
 		ccw_device_disband_done(cdev, ret);
 		break;
 	case -EOPNOTSUPP:
@@ -551,6 +583,9 @@ ccw_device_disband_irq(struct ccw_device
 void
 ccw_device_disband_start(struct ccw_device *cdev)
 {
+	/* After 60s disbanding is considered to have failed. */
+	ccw_device_set_timeout(cdev, 60*HZ);
+
 	cdev->private->flags.pgid_single = 0;
 	cdev->private->iretry = 5;
 	cdev->private->imask = 0x80;
diff -urNp linux-2.6.18.s390x/drivers/s390/cio/device_status.c linux-2.6.18.s390x.cio/drivers/s390/cio/device_status.c
--- linux-2.6.18.s390x/drivers/s390/cio/device_status.c	2006-11-29 16:40:27.000000000 +0100
+++ linux-2.6.18.s390x.cio/drivers/s390/cio/device_status.c	2006-11-29 18:05:46.000000000 +0100
@@ -320,6 +320,9 @@ ccw_device_do_sense(struct ccw_device *c
 	sch->sense_ccw.count = SENSE_MAX_COUNT;
 	sch->sense_ccw.flags = CCW_FLAG_SLI;
 
+	/* Reset internal retry indication. */
+	cdev->private->flags.intretry = 0;
+
 	return cio_start (sch, &sch->sense_ccw, 0xff);
 }