Sophie

Sophie

distrib > Scientific%20Linux > 5x > x86_64 > by-pkgid > fc11cd6e1c513a17304da94a5390f3cd > files > 3094

kernel-2.6.18-194.11.1.el5.src.rpm

From: Hans-Joachim Picht <hpicht@redhat.com>
Date: Wed, 22 Oct 2008 14:16:57 +0200
Subject: [s390] cio: reduce cpu utilization during device scan
Message-id: 20081022121657.GA26181@redhat.com
O-Subject: [RHEL5 U4 PATCH 1/5] s390 - cio: reduce cpu utilization during device scan
Bugzilla: 459793
RH-Acked-by: Pete Zaitcev <zaitcev@redhat.com>

Description
============

After plugging in a FICON cable to a z10 process
kmcheck/kslowcrw seizes a logical CPU, then the whole
system (15 LPARs) goes slow-down.
The problem is caused by an inefficient algorithm when all devices
must be scanned.
The problem is fixed with a new improved algorithm to scan all devices.

Bugzilla
=========

BZ 459793
https://bugzilla.redhat.com/show_bug.cgi?id=459793

Upstream status of the patch:
=============================

The patch is upstream as of kernel version 2.6.25

http://git.kernel.org/?p=linux/kernel/git/torvalds/linux-2.6.git;a=commit;h=e82a1567e4b22eb035da2499d20ddd573c9acf75

Test status:
============

The patch has been tested and fixes the problem.
The fix has been verified by the IBM test department.

Please ACK.

With best regards,

	--Hans

diff --git a/drivers/s390/cio/Makefile b/drivers/s390/cio/Makefile
index fe7b3ff..cfaf77b 100644
--- a/drivers/s390/cio/Makefile
+++ b/drivers/s390/cio/Makefile
@@ -2,7 +2,7 @@
 # Makefile for the S/390 common i/o drivers
 #
 
-obj-y += airq.o blacklist.o chsc.o cio.o css.o chp.o
+obj-y += airq.o blacklist.o chsc.o cio.o css.o chp.o idset.o
 ccw_device-objs += device.o device_fsm.o device_ops.o
 ccw_device-objs += device_id.o device_pgid.o device_status.o
 obj-y += ccw_device.o cmf.o
diff --git a/drivers/s390/cio/chp.c b/drivers/s390/cio/chp.c
index b56e9d8..d1d7268 100644
--- a/drivers/s390/cio/chp.c
+++ b/drivers/s390/cio/chp.c
@@ -493,7 +493,7 @@ void *chp_get_chp_desc(struct chp_id chpid)
  * Handle channel-report-words indicating that the status of a channel-path
  * has changed.
  */
-int chp_process_crw(int id, int status)
+void chp_process_crw(int id, int status)
 {
 	struct chp_id chpid;
 
@@ -502,11 +502,9 @@ int chp_process_crw(int id, int status)
 	if (status) {
 		if (!chp_is_registered(chpid))
 			chp_new(chpid);
-		return chsc_chp_online(chpid);
-	} else {
+		chsc_chp_online(chpid);
+	} else
 		chsc_chp_offline(chpid);
-		return 0;
-	}
 }
 
 static inline int info_bit_num(struct chp_id id)
diff --git a/drivers/s390/cio/chp.h b/drivers/s390/cio/chp.h
index 862af69..6528656 100644
--- a/drivers/s390/cio/chp.h
+++ b/drivers/s390/cio/chp.h
@@ -42,7 +42,7 @@ int chp_get_status(struct chp_id chpid);
 u8 chp_get_sch_opm(struct subchannel *sch);
 int chp_is_registered(struct chp_id chpid);
 void *chp_get_chp_desc(struct chp_id chpid);
-int chp_process_crw(int id, int available);
+void chp_process_crw(int id, int available);
 void chp_remove_cmg_attr(struct channel_path *chp);
 int chp_add_cmg_attr(struct channel_path *chp);
 int chp_new(struct chp_id chpid);
diff --git a/drivers/s390/cio/chsc.c b/drivers/s390/cio/chsc.c
index 2c87998..9eb78a1 100644
--- a/drivers/s390/cio/chsc.c
+++ b/drivers/s390/cio/chsc.c
@@ -200,10 +200,8 @@ terminate_internal_io(struct subchannel *sch)
 	if (cio_clear(sch)) {
 		/* Recheck device in case clear failed */
 		sch->lpm = 0;
-		if (css_enqueue_subchannel_slow(sch->schid)) {
-			css_clear_subchannel_slow_list();
-			need_rescan = 1;
-		}
+		if (device_trigger_verify(sch) != 0)
+			css_schedule_eval(sch->schid);
 		return;
 	}
 	/* Request retry of internal operation. */
@@ -214,17 +212,13 @@ terminate_internal_io(struct subchannel *sch)
 		sch->driver->termination(&sch->dev);
 }
 
-static int
-s390_subchannel_remove_chpid(struct device *dev, void *data)
+static int s390_subchannel_remove_chpid(struct subchannel *sch, void *data)
 {
 	int j;
 	int mask;
-	struct subchannel *sch;
-	struct chp_id *chpid;
+	struct chp_id *chpid = data;
 	struct schib schib;
 
-	sch = to_subchannel(dev);
-	chpid = data;
 	for (j = 0; j < 8; j++) {
 		mask = 0x80 >> j;
 		if ((sch->schib.pmcw.pim & mask) &&
@@ -266,11 +260,8 @@ s390_subchannel_remove_chpid(struct device *dev, void *data)
 
 out_unreg:
 	sch->lpm = 0;
-	if (css_enqueue_subchannel_slow(sch->schid)) {
-		css_clear_subchannel_slow_list();
-		need_rescan = 1;
-	}
 	spin_unlock_irq(&sch->lock);
+	css_schedule_eval(sch->schid);
 	return 0;
 }
 
@@ -285,11 +276,7 @@ void chsc_chp_offline(struct chp_id chpid)
 		return;
 	/* Wait until previous actions have settled. */
 	css_wait_for_slow_path();
-	bus_for_each_dev(&css_bus_type, NULL, &chpid,
-			 s390_subchannel_remove_chpid);
-
-	if (need_rescan || css_slow_subchannels_exist())
-		queue_work(slow_path_wq, &slow_path_work);
+	for_each_subchannel_staged(s390_subchannel_remove_chpid, NULL, &chpid);
 }
 
 struct res_acc_data {
@@ -333,11 +320,9 @@ static int s390_process_res_acc_sch(struct res_acc_data *res_data,
 	return 0x80 >> chp;
 }
 
-static inline int
-s390_process_res_acc_new_sch(struct subchannel_id schid)
+static int s390_process_res_acc_new_sch(struct subchannel_id schid, void *data)
 {
 	struct schib schib;
-	int ret;
 	/*
 	 * We don't know the device yet, but since a path
 	 * may be available now to the device we'll have
@@ -348,38 +333,23 @@ s390_process_res_acc_new_sch(struct subchannel_id schid)
 	 */
 	if (stsch_err(schid, &schib))
 		/* We're through */
-		return need_rescan ? -EAGAIN : -ENXIO;
+		return -ENXIO;
 
 	/* Put it on the slow path. */
-	ret = css_enqueue_subchannel_slow(schid);
-	if (ret) {
-		css_clear_subchannel_slow_list();
-		need_rescan = 1;
-		return -EAGAIN;
-	}
+	css_schedule_eval(schid);
 	return 0;
 }
 
-static int
-__s390_process_res_acc(struct subchannel_id schid, void *data)
+static int __s390_process_res_acc(struct subchannel *sch, void *data)
 {
 	int chp_mask, old_lpm;
-	struct res_acc_data *res_data;
-	struct subchannel *sch;
-
-	res_data = (struct res_acc_data *)data;
-	sch = get_subchannel_by_schid(schid);
-	if (!sch)
-		/* Check if a subchannel is newly available. */
-		return s390_process_res_acc_new_sch(schid);
+	struct res_acc_data *res_data = data;
 
 	spin_lock_irq(&sch->lock);
-
 	chp_mask = s390_process_res_acc_sch(res_data, sch);
 
 	if (chp_mask == 0) {
 		spin_unlock_irq(&sch->lock);
-		put_device(&sch->dev);
 		return 0;
 	}
 	old_lpm = sch->lpm;
@@ -393,15 +363,13 @@ __s390_process_res_acc(struct subchannel_id schid, void *data)
 		sch->driver->verify(&sch->dev);
 
 	spin_unlock_irq(&sch->lock);
-	put_device(&sch->dev);
+
 	return 0;
 }
 
 
-static int
-s390_process_res_acc (struct res_acc_data *res_data)
+static void s390_process_res_acc(struct res_acc_data *res_data)
 {
-	int rc;
 	char dbf_txt[15];
 
 	sprintf(dbf_txt, "accpr%x.%02x", res_data->chpid.cssid,
@@ -420,12 +388,8 @@ s390_process_res_acc (struct res_acc_data *res_data)
 	 * The more information we have (info), the less scanning
 	 * will we have to do.
 	 */
-	rc = for_each_subchannel(__s390_process_res_acc, res_data);
-	if (css_slow_subchannels_exist())
-		rc = -EAGAIN;
-	else if (rc != -EAGAIN)
-		rc = 0;
-	return rc;
+	for_each_subchannel_staged(__s390_process_res_acc,
+				   s390_process_res_acc_new_sch, res_data);
 }
 
 static int
@@ -483,7 +447,7 @@ struct chsc_sei_area {
 	/* ccdf has to be big enough for a link-incident record */
 };
 
-static int chsc_process_sei_chp_config(struct chsc_sei_area *sei_area)
+static void chsc_process_sei_chp_config(struct chsc_sei_area *sei_area)
 {
 	struct chp_config_data *data;
 	struct chp_id chpid;
@@ -491,7 +455,7 @@ static int chsc_process_sei_chp_config(struct chsc_sei_area *sei_area)
 
 	CIO_CRW_EVENT(4, "chsc: channel-path-configuration notification\n");
 	if (sei_area->rs != 0)
-		return 0;
+		return;
 	data = (struct chp_config_data *) &(sei_area->ccdf);
 	chp_id_init(&chpid);
 	for (num = 0; num <= __MAX_CHPID; num++) {
@@ -512,20 +476,17 @@ static int chsc_process_sei_chp_config(struct chsc_sei_area *sei_area)
 			break;
 		}
 	}
-
-	return 0;
 }
 
-int
-chsc_process_crw(void)
+void chsc_process_crw(void)
 {
 	struct chp_id chpid;
-	int id, ret;
+	int id;
 	struct res_acc_data res_data;
 	struct chsc_sei_area *sei_area;
 
 	if (!sei_page)
-		return 0;
+		return;
 	/*
 	 * build the chsc request block for store event information
 	 * and do the call
@@ -535,7 +496,6 @@ chsc_process_crw(void)
 	sei_area = sei_page;
 
 	CIO_TRACE_EVENT( 2, "prcss");
-	ret = 0;
 	do {
 		int ccode, status;
 		memset(sei_area, 0, sizeof(*sei_area));
@@ -545,7 +505,7 @@ chsc_process_crw(void)
 
 		ccode = chsc(sei_area);
 		if (ccode > 0)
-			return 0;
+			return;
 
 		switch (sei_area->response.code) {
 			/* for debug purposes, check for problems */
@@ -556,19 +516,19 @@ chsc_process_crw(void)
 		case 0x0002:
 			CIO_CRW_EVENT(2,
 				      "chsc_process_crw: invalid command!\n");
-			return 0;
+			return;
 		case 0x0003:
 			CIO_CRW_EVENT(2, "chsc_process_crw: error in chsc "
 				      "request block!\n");
-			return 0;
+			return;
 		case 0x0005:
 			CIO_CRW_EVENT(2, "chsc_process_crw: no event "
 				      "information stored\n");
-			return 0;
+			return;
 		default:
 			CIO_CRW_EVENT(2, "chsc_process_crw: chsc response %d\n",
 				      sei_area->response.code);
-			return 0;
+			return;
 		}
 
 		/* Check if we might have lost some information. */
@@ -634,11 +594,11 @@ chsc_process_crw(void)
 					res_data.fla_mask = 0xff00;
 				}
 			}
-			ret = s390_process_res_acc(&res_data);
+			s390_process_res_acc(&res_data);
 			pr_debug("\n\n");
 			break;
 		case 8: /* channel-path-configuration notification */
-			ret = chsc_process_sei_chp_config(sei_area);
+			chsc_process_sei_chp_config(sei_area);
 			break;
 		default: /* other stuff */
 			CIO_CRW_EVENT(4, "chsc_process_crw: event %d\n",
@@ -646,59 +606,43 @@ chsc_process_crw(void)
 			break;
 		}
 	} while (sei_area->flags & 0x80);
-	return ret;
 }
 
-static inline int
-__chp_add_new_sch(struct subchannel_id schid)
+static int __chp_add_new_sch(struct subchannel_id schid, void *data)
 {
 	struct schib schib;
-	int ret;
 
 	if (stsch_err(schid, &schib))
 		/* We're through */
-		return need_rescan ? -EAGAIN : -ENXIO;
+		return -ENXIO;
 
 	/* Put it on the slow path. */
-	ret = css_enqueue_subchannel_slow(schid);
-	if (ret) {
-		css_clear_subchannel_slow_list();
-		need_rescan = 1;
-		return -EAGAIN;
-	}
+	css_schedule_eval(schid);
 	return 0;
 }
 
 
-static int
-__chp_add(struct subchannel_id schid, void *data)
+static int __chp_add(struct subchannel *sch, void *data)
 {
 	int i, mask;
-	struct chp_id *chpid;
-	struct subchannel *sch;
-
-	chpid = data;
-	sch = get_subchannel_by_schid(schid);
-	if (!sch)
-		/* Check if the subchannel is now available. */
-		return __chp_add_new_sch(schid);
+	struct chp_id *chpid = data;
+
 	spin_lock_irq(&sch->lock);
 	for (i=0; i<8; i++) {
 		mask = 0x80 >> i;
 		if ((sch->schib.pmcw.pim & mask) &&
-		    (sch->schib.pmcw.chpid[i] == chpid->id)) {
-			if (stsch(sch->schid, &sch->schib) != 0) {
-				/* Endgame. */
-				spin_unlock_irq(&sch->lock);
-				return -ENXIO;
-			}
+		    (sch->schib.pmcw.chpid[i] == chpid->id))
 			break;
-		}
 	}
 	if (i==8) {
 		spin_unlock_irq(&sch->lock);
 		return 0;
 	}
+	if (stsch(sch->schid, &sch->schib)) {
+		spin_unlock_irq(&sch->lock);
+		css_schedule_eval(sch->schid);
+		return 0;
+	}
 	sch->lpm = ((sch->schib.pmcw.pim &
 		     sch->schib.pmcw.pam &
 		     sch->schib.pmcw.pom)
@@ -708,28 +652,23 @@ __chp_add(struct subchannel_id schid, void *data)
 		sch->driver->verify(&sch->dev);
 
 	spin_unlock_irq(&sch->lock);
-	put_device(&sch->dev);
+
 	return 0;
 }
 
-int chsc_chp_online(struct chp_id chpid)
+void chsc_chp_online(struct chp_id chpid)
 {
-	int rc;
 	char dbf_txt[15];
 
 	sprintf(dbf_txt, "cadd%x.%02x", chpid.cssid, chpid.id);
 	CIO_TRACE_EVENT(2, dbf_txt);
 
-	if (chp_get_status(chpid) == 0)
-		return 0;
 	/* Wait until previous actions have settled. */
 	css_wait_for_slow_path();
-	rc = for_each_subchannel(__chp_add, &chpid);
-	if (css_slow_subchannels_exist())
-		rc = -EAGAIN;
-	if (rc != -EAGAIN)
-		rc = 0;
-	return rc;
+
+	if (chp_get_status(chpid) != 0)
+		for_each_subchannel_staged(__chp_add, __chp_add_new_sch,
+					   &chpid);
 }
 
 static void __s390_subchannel_vary_chpid(struct subchannel *sch,
@@ -777,12 +716,8 @@ static void __s390_subchannel_vary_chpid(struct subchannel *sch,
 					sch->driver->verify(&sch->dev);
 			}
 		} else if (!sch->lpm) {
-			if (device_trigger_verify(sch) != 0) {
-				if (css_enqueue_subchannel_slow(sch->schid)) {
-					css_clear_subchannel_slow_list();
-					need_rescan = 1;
-				}
-			}
+			if (device_trigger_verify(sch) != 0)
+				css_schedule_eval(sch->schid);
 		} else if (sch->driver && sch->driver->verify)
 			sch->driver->verify(&sch->dev);
 		break;
@@ -790,25 +725,17 @@ static void __s390_subchannel_vary_chpid(struct subchannel *sch,
 	spin_unlock_irqrestore(&sch->lock, flags);
 }
 
-static int s390_subchannel_vary_chpid_off(struct device *dev, void *data)
+static int s390_subchannel_vary_chpid_off(struct subchannel *sch, void *data)
 {
-	struct subchannel *sch;
-	struct chp_id *chpid;
-
-	sch = to_subchannel(dev);
-	chpid = data;
+	struct chp_id *chpid = data;
 
 	__s390_subchannel_vary_chpid(sch, *chpid, 0);
 	return 0;
 }
 
-static int s390_subchannel_vary_chpid_on(struct device *dev, void *data)
+static int s390_subchannel_vary_chpid_on(struct subchannel *sch, void *data)
 {
-	struct subchannel *sch;
-	struct chp_id *chpid;
-
-	sch = to_subchannel(dev);
-	chpid = data;
+	struct chp_id *chpid = data;
 
 	__s390_subchannel_vary_chpid(sch, *chpid, 1);
 	return 0;
@@ -818,22 +745,12 @@ static int
 __s390_vary_chpid_on(struct subchannel_id schid, void *data)
 {
 	struct schib schib;
-	struct subchannel *sch;
 
-	sch = get_subchannel_by_schid(schid);
-	if (sch) {
-		put_device(&sch->dev);
-		return 0;
-	}
 	if (stsch_err(schid, &schib))
 		/* We're through */
 		return -ENXIO;
 	/* Put it on the slow path. */
-	if (css_enqueue_subchannel_slow(schid)) {
-		css_clear_subchannel_slow_list();
-		need_rescan = 1;
-		return -EAGAIN;
-	}
+	css_schedule_eval(schid);
 	return 0;
 }
 
@@ -850,14 +767,12 @@ int chsc_chp_vary(struct chp_id chpid, int on)
 	 * Redo PathVerification on the devices the chpid connects to
 	 */
 
-	bus_for_each_dev(&css_bus_type, NULL, &chpid, on ?
-			 s390_subchannel_vary_chpid_on :
-			 s390_subchannel_vary_chpid_off);
 	if (on)
-		/* Scan for new devices on varied on path. */
-		for_each_subchannel(__s390_vary_chpid_on, NULL);
-	if (need_rescan || css_slow_subchannels_exist())
-		queue_work(slow_path_wq, &slow_path_work);
+		for_each_subchannel_staged(s390_subchannel_vary_chpid_on,
+					   __s390_vary_chpid_on, &chpid);
+	else
+		for_each_subchannel_staged(s390_subchannel_vary_chpid_off,
+					   NULL, &chpid);
 	return 0;
 }
 
diff --git a/drivers/s390/cio/chsc.h b/drivers/s390/cio/chsc.h
index 662e16d..6e55620 100644
--- a/drivers/s390/cio/chsc.h
+++ b/drivers/s390/cio/chsc.h
@@ -78,7 +78,7 @@ extern int chsc_secm(struct channel_subsystem *, int);
 int chsc_chp_vary(struct chp_id chpid, int on);
 int chsc_determine_channel_path_description(struct chp_id chpid,
 					    struct channel_path_desc *desc);
-int chsc_chp_online(struct chp_id chpid);
+void chsc_chp_online(struct chp_id chpid);
 void chsc_chp_offline(struct chp_id chpid);
 int chsc_get_channel_measurement_chars(struct channel_path *chp);
 
diff --git a/drivers/s390/cio/cio.c b/drivers/s390/cio/cio.c
index 78a8b34..acb393d 100644
--- a/drivers/s390/cio/cio.c
+++ b/drivers/s390/cio/cio.c
@@ -522,6 +522,7 @@ cio_validate_subchannel (struct subchannel *sch, struct subchannel_id schid)
 
 	spin_lock_init(&sch->lock);
 	mutex_init(&sch->reg_mutex);
+	INIT_WORK(&sch->kick_work, NULL, NULL);
 
 	/* Set a name for the subchannel */
 	snprintf (sch->dev.bus_id, BUS_ID_SIZE, "0.%x.%04x", schid.ssid,
diff --git a/drivers/s390/cio/cio.h b/drivers/s390/cio/cio.h
index adb1dce..f8c7f50 100644
--- a/drivers/s390/cio/cio.h
+++ b/drivers/s390/cio/cio.h
@@ -112,6 +112,7 @@ struct subchannel {
 	struct ssd_info ssd_info;	/* subchannel description */
 	struct device dev;	/* entry in device tree */
 	struct css_driver *driver;
+	struct work_struct kick_work;
 } __attribute__ ((aligned(8)));
 
 #define IO_INTERRUPT_TYPE	   0 /* I/O interrupt type */
diff --git a/drivers/s390/cio/css.c b/drivers/s390/cio/css.c
index 51ec6ff..436494e 100644
--- a/drivers/s390/cio/css.c
+++ b/drivers/s390/cio/css.c
@@ -21,8 +21,8 @@
 #include "ioasm.h"
 #include "chsc.h"
 #include "device.h"
+#include "idset.h"
 
-int need_rescan = 0;
 int css_init_done = 0;
 static int need_reprobe = 0;
 static int max_ssid = 0;
@@ -31,7 +31,7 @@ struct channel_subsystem *css[__MAX_CSSID + 1];
 
 int css_characteristics_avail = 0;
 
-inline int
+int
 for_each_subchannel(int(*fn)(struct subchannel_id, void *), void *data)
 {
 	struct subchannel_id schid;
@@ -50,6 +50,62 @@ for_each_subchannel(int(*fn)(struct subchannel_id, void *), void *data)
 	return ret;
 }
 
+struct cb_data {
+	void *data;
+	struct idset *set;
+	int (*fn_known_sch)(struct subchannel *, void *);
+	int (*fn_unknown_sch)(struct subchannel_id, void *);
+};
+
+static int call_fn_known_sch(struct device *dev, void *data)
+{
+	struct subchannel *sch = to_subchannel(dev);
+	struct cb_data *cb = data;
+	int rc = 0;
+
+	idset_sch_del(cb->set, sch->schid);
+	if (cb->fn_known_sch)
+		rc = cb->fn_known_sch(sch, cb->data);
+	return rc;
+}
+
+static int call_fn_unknown_sch(struct subchannel_id schid, void *data)
+{
+	struct cb_data *cb = data;
+	int rc = 0;
+
+	if (idset_sch_contains(cb->set, schid))
+		rc = cb->fn_unknown_sch(schid, cb->data);
+	return rc;
+}
+
+int for_each_subchannel_staged(int (*fn_known)(struct subchannel *, void *),
+			       int (*fn_unknown)(struct subchannel_id,
+			       void *), void *data)
+{
+	struct cb_data cb;
+	int rc;
+
+	cb.set = idset_sch_new();
+	if (!cb.set)
+		return -ENOMEM;
+	idset_fill(cb.set);
+	cb.data = data;
+	cb.fn_known_sch = fn_known;
+	cb.fn_unknown_sch = fn_unknown;
+	/* Process registered subchannels. */
+	rc = bus_for_each_dev(&css_bus_type, NULL, &cb, call_fn_known_sch);
+	if (rc)
+		goto out;
+	/* Process unregistered subchannels. */
+	if (fn_unknown)
+		rc = for_each_subchannel(call_fn_unknown_sch, &cb);
+out:
+	idset_free(cb.set);
+
+	return rc;
+}
+
 static struct subchannel *
 css_alloc_subchannel(struct subchannel_id schid)
 {
@@ -109,7 +165,6 @@ css_subchannel_release(struct device *dev)
 
 extern int css_get_ssd_info(struct subchannel *sch);
 
-
 int css_sch_device_register(struct subchannel *sch)
 {
 	int ret;
@@ -128,6 +183,15 @@ void css_sch_device_unregister(struct subchannel *sch)
 	mutex_unlock(&sch->reg_mutex);
 }
 
+void css_sch_device_unregister_wq(void *data)
+{
+	struct subchannel *sch = data;
+
+	css_sch_device_unregister(sch);
+	/* Release reference for work queue processing */
+	put_device(&sch->dev);
+}
+
 static int
 css_register_subchannel(struct subchannel *sch)
 {
@@ -137,7 +201,7 @@ css_register_subchannel(struct subchannel *sch)
 	sch->dev.parent = &css[0]->device;
 	sch->dev.bus = &css_bus_type;
 	sch->dev.release = &css_subchannel_release;
-	
+
 	/* make it known to the system */
 	ret = css_sch_device_register(sch);
 	if (ret)
@@ -197,6 +261,8 @@ static inline int css_get_subchannel_status(struct subchannel *sch)
 	return CIO_OPER;
 }
 
+typedef void (*workfunc)(void *);
+
 static int css_evaluate_known_subchannel(struct subchannel *sch, int slow)
 {
 	int event, ret, disc;
@@ -267,11 +333,14 @@ static int css_evaluate_known_subchannel(struct subchannel *sch, int slow)
 	switch (action) {
 	case UNREGISTER:
 	case UNREGISTER_PROBE:
-		/* Unregister device (will use subchannel lock). */
-		spin_unlock_irqrestore(&sch->lock, flags);
-		css_sch_device_unregister(sch);
-		spin_lock_irqsave(&sch->lock, flags);
-
+		/* Unregister device */
+		/* Get reference for work queue processing */
+		if (get_device(&sch->dev)) {
+			PREPARE_WORK(&sch->kick_work,
+				     (workfunc)css_sch_device_unregister_wq,
+				     (void *)sch);
+			queue_work(slow_path_wq, &sch->kick_work);
+		}
 		/* Reset intparm to zeroes. */
 		sch->schib.pmcw.intparm = 0;
 		cio_modify(sch);
@@ -308,7 +377,7 @@ static int css_evaluate_new_subchannel(struct subchannel_id schid, int slow)
 	return css_probe_device(schid);
 }
 
-static int css_evaluate_subchannel(struct subchannel_id schid, int slow)
+static void css_evaluate_subchannel(struct subchannel_id schid, int slow)
 {
 	struct subchannel *sch;
 	int ret;
@@ -319,52 +388,80 @@ static int css_evaluate_subchannel(struct subchannel_id schid, int slow)
 		put_device(&sch->dev);
 	} else
 		ret = css_evaluate_new_subchannel(schid, slow);
-
-	return ret;
+	if (ret == -EAGAIN)
+		css_schedule_eval(schid);
 }
 
-static int
-css_rescan_devices(struct subchannel_id schid, void *data)
+static struct idset *slow_subchannel_set;
+static spinlock_t slow_subchannel_lock;
+
+static int __init slow_subchannel_init(void)
 {
-	return css_evaluate_subchannel(schid, 1);
+	spin_lock_init(&slow_subchannel_lock);
+	slow_subchannel_set = idset_sch_new();
+	if (!slow_subchannel_set) {
+		printk(KERN_WARNING "cio: could not allocate slow subchannel "
+		       "set\n");
+		return -ENOMEM;
+	}
+	return 0;
 }
 
-struct slow_subchannel {
-	struct list_head slow_list;
-	struct subchannel_id schid;
-};
+subsys_initcall(slow_subchannel_init);
 
-static LIST_HEAD(slow_subchannels_head);
-static DEFINE_SPINLOCK(slow_subchannel_lock);
-
-static void
-css_trigger_slow_path(void)
+static int slow_eval_known_fn(struct subchannel *sch, void *data)
 {
-	CIO_TRACE_EVENT(4, "slowpath");
+	int eval;
+	int rc;
 
-	if (need_rescan) {
-		need_rescan = 0;
-		for_each_subchannel(css_rescan_devices, NULL);
-		return;
+	spin_lock_irq(&slow_subchannel_lock);
+	eval = idset_sch_contains(slow_subchannel_set, sch->schid);
+	idset_sch_del(slow_subchannel_set, sch->schid);
+	spin_unlock_irq(&slow_subchannel_lock);
+	if (eval) {
+		rc = css_evaluate_known_subchannel(sch, 1);
+		if (rc == -EAGAIN)
+			css_schedule_eval(sch->schid);
 	}
+	return 0;
+}
+
+static int slow_eval_unknown_fn(struct subchannel_id schid, void *data)
+{
+	int eval;
+	int rc = 0;
 
 	spin_lock_irq(&slow_subchannel_lock);
-	while (!list_empty(&slow_subchannels_head)) {
-		struct slow_subchannel *slow_sch =
-			list_entry(slow_subchannels_head.next,
-				   struct slow_subchannel, slow_list);
-
-		list_del_init(slow_subchannels_head.next);
-		spin_unlock_irq(&slow_subchannel_lock);
-		css_evaluate_subchannel(slow_sch->schid, 1);
-		spin_lock_irq(&slow_subchannel_lock);
-		kfree(slow_sch);
-	}
+	eval = idset_sch_contains(slow_subchannel_set, schid);
+	idset_sch_del(slow_subchannel_set, schid);
 	spin_unlock_irq(&slow_subchannel_lock);
+	if (eval) {
+		rc = css_evaluate_new_subchannel(schid, 1);
+		switch (rc) {
+		case -EAGAIN:
+			css_schedule_eval(schid);
+			rc = 0;
+			break;
+		case -ENXIO:
+		case -ENOMEM:
+		case -EIO:
+			/* These should abort looping */
+			break;
+		default:
+			rc = 0;
+		}
+	}
+	return rc;
 }
 
-typedef void (*workfunc)(void *);
-DECLARE_WORK(slow_path_work, (workfunc)css_trigger_slow_path, NULL);
+static void css_slow_path_func(struct work_struct *unused)
+{
+	CIO_TRACE_EVENT(4, "slowpath");
+	for_each_subchannel_staged(slow_eval_known_fn, slow_eval_unknown_fn,
+				   NULL);
+}
+
+DECLARE_WORK(slow_path_work, (workfunc)css_slow_path_func, NULL);
 struct workqueue_struct *slow_path_wq;
 
 static int css_end_grace_period(struct device *dev, void *data)
@@ -394,10 +491,29 @@ void css_wait_for_slow_path(void)
 	flush_workqueue(slow_path_wq);
 }
 
+void css_schedule_eval(struct subchannel_id schid)
+{
+	unsigned long flags;
+
+	spin_lock_irqsave(&slow_subchannel_lock, flags);
+	idset_sch_add(slow_subchannel_set, schid);
+	queue_work(slow_path_wq, &slow_path_work);
+	spin_unlock_irqrestore(&slow_subchannel_lock, flags);
+}
+
+void css_schedule_eval_all(void)
+{
+	unsigned long flags;
+
+	spin_lock_irqsave(&slow_subchannel_lock, flags);
+	idset_fill(slow_subchannel_set);
+	queue_work(slow_path_wq, &slow_path_work);
+	spin_unlock_irqrestore(&slow_subchannel_lock, flags);
+}
+
 /* Reprobe subchannel if unregistered. */
 static int reprobe_subchannel(struct subchannel_id schid, void *data)
 {
-	struct subchannel *sch;
 	int ret;
 
 	CIO_DEBUG(KERN_INFO, 6, "cio: reprobe 0.%x.%04x\n",
@@ -405,13 +521,6 @@ static int reprobe_subchannel(struct subchannel_id schid, void *data)
 	if (need_reprobe)
 		return -EAGAIN;
 
-	sch = get_subchannel_by_schid(schid);
-	if (sch) {
-		/* Already known. */
-		put_device(&sch->dev);
-		return 0;
-	}
-
 	ret = css_probe_device(schid);
 	switch (ret) {
 	case 0:
@@ -429,7 +538,7 @@ static int reprobe_subchannel(struct subchannel_id schid, void *data)
 }
 
 /* Work function used to reprobe all unregistered subchannels. */
-static void reprobe_all(void *data)
+static void reprobe_all(struct work_struct *unused)
 {
 	int ret;
 
@@ -439,13 +548,13 @@ static void reprobe_all(void *data)
 	/* Make sure initial subchannel scan is done. */
 	wait_event(ccw_device_init_wq,
 		   atomic_read(&ccw_device_init_count) == 0);
-	ret = for_each_subchannel(reprobe_subchannel, NULL);
+	ret = for_each_subchannel_staged(NULL, reprobe_subchannel, NULL);
 
 	CIO_MSG_EVENT(2, "reprobe done (rc=%d, need_reprobe=%d)\n", ret,
 		      need_reprobe);
 }
 
-DECLARE_WORK(css_reprobe_work, reprobe_all, NULL);
+DECLARE_WORK(css_reprobe_work, (workfunc)reprobe_all, NULL);
 
 /* Schedule reprobing of all unregistered subchannels. */
 void css_schedule_reprobe(void)
@@ -457,33 +566,14 @@ void css_schedule_reprobe(void)
 EXPORT_SYMBOL_GPL(css_schedule_reprobe);
 
 /*
- * Rescan for new devices. FIXME: This is slow.
- * This function is called when we have lost CRWs due to overflows and we have
- * to do subchannel housekeeping.
- */
-void
-css_reiterate_subchannels(void)
-{
-	css_clear_subchannel_slow_list();
-	need_rescan = 1;
-}
-
-/*
  * Called from the machine check handler for subchannel report words.
  */
-int
-css_process_crw(int rsid1, int rsid2)
+void css_process_crw(int rsid1, int rsid2)
 {
-	int ret;
 	struct subchannel_id mchk_schid;
 
 	CIO_CRW_EVENT(2, "source is subchannel %04X, subsystem id %x\n",
 		      rsid1, rsid2);
-
-	if (need_rescan)
-		/* We need to iterate all subchannels anyway. */
-		return -EAGAIN;
-
 	init_subchannel_id(&mchk_schid);
 	mchk_schid.sch_no = rsid1;
 	if (rsid2 != 0)
@@ -494,14 +584,7 @@ css_process_crw(int rsid1, int rsid2)
 	 * use stsch() to find out if the subchannel in question has come
 	 * or gone.
 	 */
-	ret = css_evaluate_subchannel(mchk_schid, 0);
-	if (ret == -EAGAIN) {
-		if (css_enqueue_subchannel_slow(mchk_schid)) {
-			css_clear_subchannel_slow_list();
-			need_rescan = 1;
-		}
-	}
-	return ret;
+	css_evaluate_subchannel(mchk_schid, 0);
 }
 
 static int __init
@@ -779,47 +862,6 @@ struct bus_type css_bus_type = {
 
 subsys_initcall(init_channel_subsystem);
 
-int
-css_enqueue_subchannel_slow(struct subchannel_id schid)
-{
-	struct slow_subchannel *new_slow_sch;
-	unsigned long flags;
-
-	new_slow_sch = kzalloc(sizeof(struct slow_subchannel), GFP_ATOMIC);
-	if (!new_slow_sch)
-		return -ENOMEM;
-	new_slow_sch->schid = schid;
-	spin_lock_irqsave(&slow_subchannel_lock, flags);
-	list_add_tail(&new_slow_sch->slow_list, &slow_subchannels_head);
-	spin_unlock_irqrestore(&slow_subchannel_lock, flags);
-	return 0;
-}
-
-void
-css_clear_subchannel_slow_list(void)
-{
-	unsigned long flags;
-
-	spin_lock_irqsave(&slow_subchannel_lock, flags);
-	while (!list_empty(&slow_subchannels_head)) {
-		struct slow_subchannel *slow_sch =
-			list_entry(slow_subchannels_head.next,
-				   struct slow_subchannel, slow_list);
-
-		list_del_init(slow_subchannels_head.next);
-		kfree(slow_sch);
-	}
-	spin_unlock_irqrestore(&slow_subchannel_lock, flags);
-}
-
-
-
-int
-css_slow_subchannels_exist(void)
-{
-	return (!list_empty(&slow_subchannels_head));
-}
-
 MODULE_LICENSE("GPL");
 EXPORT_SYMBOL(css_bus_type);
 EXPORT_SYMBOL_GPL(css_characteristics_avail);
diff --git a/drivers/s390/cio/css.h b/drivers/s390/cio/css.h
index 2f55aaf..87a39a3 100644
--- a/drivers/s390/cio/css.h
+++ b/drivers/s390/cio/css.h
@@ -144,7 +144,11 @@ extern int css_sch_device_register(struct subchannel *);
 extern void css_sch_device_unregister(struct subchannel *);
 extern struct subchannel * get_subchannel_by_schid(struct subchannel_id);
 extern int css_init_done;
+int for_each_subchannel_staged(int (*fn_known)(struct subchannel *, void *),
+			       int (*fn_unknown)(struct subchannel_id,
+			       void *), void *data);
 extern int for_each_subchannel(int(*fn)(struct subchannel_id, void *), void *);
+extern void css_process_crw(int, int);
 
 #define __MAX_SUBCHANNEL 65535
 #define __MAX_SSID 3
@@ -185,13 +189,9 @@ void device_wake_up_wait_q(struct subchannel *sch);
 void device_kill_pending_timer(struct subchannel *);
 
 /* Helper functions to build lists for the slow path. */
-extern int css_enqueue_subchannel_slow(struct subchannel_id schid);
-void css_walk_subchannel_slow_list(void (*fn)(unsigned long));
-void css_clear_subchannel_slow_list(void);
-int css_slow_subchannels_exist(void);
-extern int need_rescan;
+void css_schedule_eval(struct subchannel_id schid);
+void css_schedule_eval_all(void);
 
 extern struct workqueue_struct *slow_path_wq;
-extern struct work_struct slow_path_work;
 void css_wait_for_slow_path(void);
 #endif
diff --git a/drivers/s390/cio/device_fsm.c b/drivers/s390/cio/device_fsm.c
index 1b896df..b2f590e 100644
--- a/drivers/s390/cio/device_fsm.c
+++ b/drivers/s390/cio/device_fsm.c
@@ -302,10 +302,8 @@ __recover_lost_chpids(struct subchannel *sch, int old_lpm)
 		if (old_lpm & mask)
 			continue;
 		chpid.id = sch->schib.pmcw.chpid[i];
-		if (!chp_is_registered(chpid)) {
-			need_rescan = 1;
-			queue_work(slow_path_wq, &slow_path_work);
-		}
+		if (!chp_is_registered(chpid))
+			css_schedule_eval_all();
 	}
 }
 
@@ -806,11 +804,7 @@ static void ccw_device_generic_notoper(struct ccw_device *cdev,
 
 	cdev->private->state = DEV_STATE_NOT_OPER;
 	sch = to_subchannel(cdev->dev.parent);
-	if (css_enqueue_subchannel_slow(sch->schid)) {
-		css_clear_subchannel_slow_list();
-		need_rescan = 1;
-	}
-	queue_work(slow_path_wq, &slow_path_work);
+	css_schedule_eval(sch->schid);
 }
 
 /*
diff --git a/drivers/s390/cio/idset.c b/drivers/s390/cio/idset.c
new file mode 100644
index 0000000..cf8f24a
--- /dev/null
+++ b/drivers/s390/cio/idset.c
@@ -0,0 +1,112 @@
+/*
+ *  drivers/s390/cio/idset.c
+ *
+ *    Copyright IBM Corp. 2007
+ *    Author(s): Peter Oberparleiter <peter.oberparleiter@de.ibm.com>
+ */
+
+#include <linux/vmalloc.h>
+#include <linux/bitops.h>
+#include "idset.h"
+#include "css.h"
+
+struct idset {
+	int num_ssid;
+	int num_id;
+	unsigned long bitmap[0];
+};
+
+static inline unsigned long bitmap_size(int num_ssid, int num_id)
+{
+	return __BITOPS_WORDS(num_ssid * num_id) * sizeof(unsigned long);
+}
+
+static struct idset *idset_new(int num_ssid, int num_id)
+{
+	struct idset *set;
+
+	set = vmalloc(sizeof(struct idset) + bitmap_size(num_ssid, num_id));
+	if (set) {
+		set->num_ssid = num_ssid;
+		set->num_id = num_id;
+		memset(set->bitmap, 0, bitmap_size(num_ssid, num_id));
+	}
+	return set;
+}
+
+void idset_free(struct idset *set)
+{
+	vfree(set);
+}
+
+void idset_clear(struct idset *set)
+{
+	memset(set->bitmap, 0, bitmap_size(set->num_ssid, set->num_id));
+}
+
+void idset_fill(struct idset *set)
+{
+	memset(set->bitmap, 0xff, bitmap_size(set->num_ssid, set->num_id));
+}
+
+static inline void idset_add(struct idset *set, int ssid, int id)
+{
+	set_bit(ssid * set->num_id + id, set->bitmap);
+}
+
+static inline void idset_del(struct idset *set, int ssid, int id)
+{
+	clear_bit(ssid * set->num_id + id, set->bitmap);
+}
+
+static inline int idset_contains(struct idset *set, int ssid, int id)
+{
+	return test_bit(ssid * set->num_id + id, set->bitmap);
+}
+
+static inline int idset_get_first(struct idset *set, int *ssid, int *id)
+{
+	int bitnum;
+
+	bitnum = find_first_bit(set->bitmap, set->num_ssid * set->num_id);
+	if (bitnum >= set->num_ssid * set->num_id)
+		return 0;
+	*ssid = bitnum / set->num_id;
+	*id = bitnum % set->num_id;
+	return 1;
+}
+
+struct idset *idset_sch_new(void)
+{
+	return idset_new(__MAX_SSID + 1, __MAX_SUBCHANNEL + 1);
+}
+
+void idset_sch_add(struct idset *set, struct subchannel_id schid)
+{
+	idset_add(set, schid.ssid, schid.sch_no);
+}
+
+void idset_sch_del(struct idset *set, struct subchannel_id schid)
+{
+	idset_del(set, schid.ssid, schid.sch_no);
+}
+
+int idset_sch_contains(struct idset *set, struct subchannel_id schid)
+{
+	return idset_contains(set, schid.ssid, schid.sch_no);
+}
+
+int idset_sch_get_first(struct idset *set, struct subchannel_id *schid)
+{
+	int ssid = 0;
+	int id = 0;
+	int rc;
+
+	rc = idset_get_first(set, &ssid, &id);
+	if (rc) {
+		init_subchannel_id(schid);
+		schid->ssid = ssid;
+		schid->sch_no = id;
+	}
+	return rc;
+}
diff --git a/drivers/s390/cio/idset.h b/drivers/s390/cio/idset.h
new file mode 100644
index 0000000..144466a
--- /dev/null
+++ b/drivers/s390/cio/idset.h
@@ -0,0 +1,25 @@
+/*
+ *  drivers/s390/cio/idset.h
+ *
+ *    Copyright IBM Corp. 2007
+ *    Author(s): Peter Oberparleiter <peter.oberparleiter@de.ibm.com>
+ */
+
+#ifndef S390_IDSET_H
+#define S390_IDSET_H S390_IDSET_H
+
+#include "schid.h"
+
+struct idset;
+
+void idset_free(struct idset *set);
+void idset_clear(struct idset *set);
+void idset_fill(struct idset *set);
+
+struct idset *idset_sch_new(void);
+void idset_sch_add(struct idset *set, struct subchannel_id id);
+void idset_sch_del(struct idset *set, struct subchannel_id id);
+int idset_sch_contains(struct idset *set, struct subchannel_id id);
+int idset_sch_get_first(struct idset *set, struct subchannel_id *id);
+
+#endif /* S390_IDSET_H */
diff --git a/drivers/s390/s390mach.c b/drivers/s390/s390mach.c
index 5399c5d..7c176af 100644
--- a/drivers/s390/s390mach.c
+++ b/drivers/s390/s390mach.c
@@ -16,7 +16,7 @@
 #include <linux/kthread.h>
 
 #include <asm/lowcore.h>
-
+#include "cio/css.h"
 #include "s390mach.h"
 
 #define DBG printk
@@ -24,7 +24,6 @@
 
 static struct semaphore m_sem;
 
-extern int css_process_crw(int, int);
 extern int chsc_process_crw(void);
 extern int chp_process_crw(int, int);
 extern void css_reiterate_subchannels(void);
@@ -51,14 +50,13 @@ static int
 s390_collect_crw_info(void *param)
 {
 	struct crw crw[2];
-	int ccode, ret, slow;
+	int ccode;
 	struct semaphore *sem;
 	unsigned int chain;
 
 	sem = (struct semaphore *)param;
 repeat:
 	down_interruptible(sem);
-	slow = 0;
 	chain = 0;
 	while (1) {
 		if (unlikely(chain > 1)) {
@@ -91,9 +89,8 @@ repeat:
 		/* Check for overflows. */
 		if (crw[chain].oflw) {
 			pr_debug("%s: crw overflow detected!\n", __FUNCTION__);
-			css_reiterate_subchannels();
+			css_schedule_eval_all();
 			chain = 0;
-			slow = 1;
 			continue;
 		}
 		switch (crw[chain].rsc) {
@@ -101,10 +98,7 @@ repeat:
 			if (crw[0].chn && !chain)
 				break;
 			pr_debug("source is subchannel %04X\n", crw[0].rsid);
-			ret = css_process_crw (crw[0].rsid,
-					       chain ? crw[1].rsid : 0);
-			if (ret == -EAGAIN)
-				slow = 1;
+			css_process_crw(crw[0].rsid, chain ? crw[1].rsid : 0);
 			break;
 		case CRW_RSC_MONITOR:
 			pr_debug("source is monitoring facility\n");
@@ -123,28 +117,23 @@ repeat:
 			}
 			switch (crw[0].erc) {
 			case CRW_ERC_IPARM: /* Path has come. */
-				ret = chp_process_crw(crw[0].rsid, 1);
+				chp_process_crw(crw[0].rsid, 1);
 				break;
 			case CRW_ERC_PERRI: /* Path has gone. */
 			case CRW_ERC_PERRN:
-				ret = chp_process_crw(crw[0].rsid, 0);
+				chp_process_crw(crw[0].rsid, 0);
 				break;
 			default:
 				pr_debug("Don't know how to handle erc=%x\n",
 					 crw[0].erc);
-				ret = 0;
 			}
-			if (ret == -EAGAIN)
-				slow = 1;
 			break;
 		case CRW_RSC_CONFIG:
 			pr_debug("source is configuration-alert facility\n");
 			break;
 		case CRW_RSC_CSS:
 			pr_debug("source is channel subsystem\n");
-			ret = chsc_process_crw();
-			if (ret == -EAGAIN)
-				slow = 1;
+			chsc_process_crw();
 			break;
 		default:
 			pr_debug("unknown source\n");
@@ -153,8 +142,6 @@ repeat:
 		/* chain is always 0 or 1 here. */
 		chain = crw[chain].chn ? chain + 1 : 0;
 	}
-	if (slow)
-		queue_work(slow_path_wq, &slow_path_work);
 	goto repeat;
 	return 0;
 }