Sophie

Sophie

distrib > Scientific%20Linux > 5x > x86_64 > by-pkgid > fc11cd6e1c513a17304da94a5390f3cd > files > 3073

kernel-2.6.18-194.11.1.el5.src.rpm

From: Hans-Joachim Picht <hpicht@redhat.com>
Date: Fri, 25 Apr 2008 15:08:26 +0200
Subject: [s390] cio: avoid machine check vs. not operational race
Message-id: 20080425130826.GE22728@redhat.com
O-Subject: [RHEL5 U3 PATCH 5/5] s390 - cio: Avoid machine check vs. not operational races.
Bugzilla: 444082
RH-Acked-by: Pete Zaitcev <zaitcev@redhat.com>

Description
============

There was the possibilty that an action like ccw_device_set_offline()
triggered by a device gone machine check might trigger a not oper
event. Unfortunately, this could lead to the situation that we tried
to unregister a subchannel twice: Once from the slow path evaluation,
and once via the not oper event.

Fix this by always using the same mechanism
(css_enqueue_subchannel_slow()) for triggering the unregister. This
makes sure that unregistration will only be done once. As an added
bonus, it also simplyfies the code.

Bugzilla
=========

BZ 444082
https://bugzilla.redhat.com/show_bug.cgi?id=444082

Upstream status of the patch:
=============================

Patch is contained in linux-2.6 as git commit
3f4cf6e72f9f6a0b046b32881acc4f829f3aaa46

Test status:
============

The patch has been tested and fixes the problem.
The fix has been verified by the IBM test department.

Please ACK.

With best regards,

Hans

diff --git a/drivers/s390/cio/device.c b/drivers/s390/cio/device.c
index 287aa30..ce01b63 100644
--- a/drivers/s390/cio/device.c
+++ b/drivers/s390/cio/device.c
@@ -726,8 +726,7 @@ out:
 		wake_up(&ccw_device_init_wq);
 }
 
-void
-ccw_device_call_sch_unregister(void *data)
+static void ccw_device_call_sch_unregister(void *data)
 {
 	struct ccw_device *cdev = data;
 	struct subchannel *sch;
diff --git a/drivers/s390/cio/device.h b/drivers/s390/cio/device.h
index 4f936b2..bd6c8f3 100644
--- a/drivers/s390/cio/device.h
+++ b/drivers/s390/cio/device.h
@@ -81,7 +81,6 @@ int ccw_device_cancel_halt_clear(struct ccw_device *);
 
 int ccw_device_register(struct ccw_device *);
 void ccw_device_do_unreg_rereg(void *);
-void ccw_device_call_sch_unregister(void *);
 void ccw_device_schedule_verify(struct ccw_device *cdev, unsigned long delay);
 
 int ccw_device_recognition(struct ccw_device *);
diff --git a/drivers/s390/cio/device_fsm.c b/drivers/s390/cio/device_fsm.c
index 2cfe4e3..c2c326b 100644
--- a/drivers/s390/cio/device_fsm.c
+++ b/drivers/s390/cio/device_fsm.c
@@ -589,46 +589,6 @@ ccw_device_recog_timeout(struct ccw_device *cdev, enum dev_event dev_event)
 }
 
 
-static void
-ccw_device_nopath_notify(void *data)
-{
-	unsigned long flags;
-	struct ccw_device *cdev;
-	struct subchannel *sch;
-	int ret;
-
-	cdev = (struct ccw_device *)data;
-	sch = to_subchannel(cdev->dev.parent);
-	/* Extra sanity. */
-	if (sch->lpm)
-		return;
-	ret = (sch->driver && sch->driver->notify) ?
-		sch->driver->notify(&sch->dev, CIO_NO_PATH) : 0;
-	if (!ret) {
-		if (get_device(&sch->dev)) {
-			/* Driver doesn't want to keep device. */
-			cio_disable_subchannel(sch);
-			if (get_device(&cdev->dev)) {
-				PREPARE_WORK(&cdev->private->kick_work,
-					     ccw_device_call_sch_unregister,
-					     (void *)cdev);
-				queue_work(ccw_device_work,
-					   &cdev->private->kick_work);
-			} else
-				put_device(&sch->dev);
-		}
-	} else {
-		spin_lock_irqsave(cdev->ccwlock, flags);
-		cio_disable_subchannel(sch);
-		ccw_device_set_timeout(cdev, 0);
-		cdev->private->flags.fake_irb = 0;
-		cdev->private->state = DEV_STATE_DISCONNECTED;
-		ccw_device_schedule_verify(cdev, 3*HZ);
-		spin_unlock_irqrestore(cdev->ccwlock, flags);
-		wake_up(&cdev->private->wait_q);
-	}
-}
-
 void
 ccw_device_verify_done(struct ccw_device *cdev, int err)
 {
@@ -672,9 +632,7 @@ ccw_device_verify_done(struct ccw_device *cdev, int err)
 	default:
 		/* Reset oper notify indication after verify error. */
 		cdev->private->flags.donotify = 0;
-		PREPARE_WORK(&cdev->private->kick_work,
-			     ccw_device_nopath_notify, (void *)cdev);
-		queue_work(ccw_device_notify_work, &cdev->private->kick_work);
+		dev_fsm_event(cdev, DEV_EVENT_NOTOPER);
 		ccw_device_done(cdev, DEV_STATE_NOT_OPER);
 		break;
 	}
@@ -793,53 +751,19 @@ ccw_device_recog_notoper(struct ccw_device *cdev, enum dev_event dev_event)
 }
 
 /*
- * Handle not operational event while offline.
+ * Handle not operational event in non-special state.
  */
-static void
-ccw_device_offline_notoper(struct ccw_device *cdev, enum dev_event dev_event)
+static void ccw_device_generic_notoper(struct ccw_device *cdev,
+				       enum dev_event dev_event)
 {
 	struct subchannel *sch;
 
 	cdev->private->state = DEV_STATE_NOT_OPER;
 	sch = to_subchannel(cdev->dev.parent);
-	if (get_device(&cdev->dev)) {
-		PREPARE_WORK(&cdev->private->kick_work,
-			     ccw_device_call_sch_unregister, (void *)cdev);
-		queue_work(ccw_device_work, &cdev->private->kick_work);
+	if (css_enqueue_subchannel_slow(sch->schid)) {
+		css_clear_subchannel_slow_list();
+		need_rescan = 1;
 	}
-	wake_up(&cdev->private->wait_q);
-}
-
-/*
- * Handle not operational event while online.
- */
-static void
-ccw_device_online_notoper(struct ccw_device *cdev, enum dev_event dev_event)
-{
-	struct subchannel *sch;
-
-	sch = to_subchannel(cdev->dev.parent);
-	if (sch->driver->notify &&
-	    sch->driver->notify(&sch->dev, sch->lpm ? CIO_GONE : CIO_NO_PATH)) {
-			ccw_device_set_timeout(cdev, 0);
-			cdev->private->flags.fake_irb = 0;
-			cdev->private->state = DEV_STATE_DISCONNECTED;
-			ccw_device_schedule_verify(cdev, 3*HZ);
-			wake_up(&cdev->private->wait_q);
-			return;
-	}
-	cdev->private->state = DEV_STATE_NOT_OPER;
-	cio_disable_subchannel(sch);
-	if (sch->schib.scsw.actl != 0) {
-		// FIXME: not-oper indication to device driver ?
-		ccw_device_call_handler(cdev);
-	}
-	if (get_device(&cdev->dev)) {
-		PREPARE_WORK(&cdev->private->kick_work,
-			     ccw_device_call_sch_unregister, (void *)cdev);
-		queue_work(ccw_device_work, &cdev->private->kick_work);
-	}
-	wake_up(&cdev->private->wait_q);
 }
 
 /*
@@ -935,18 +859,9 @@ ccw_device_online_timeout(struct ccw_device *cdev, enum dev_event dev_event)
 		cdev->private->state = DEV_STATE_TIMEOUT_KILL;
 		return;
 	}
-	if (ret == -ENODEV) {
-		struct subchannel *sch;
-
-		sch = to_subchannel(cdev->dev.parent);
-		if (!sch->lpm) {
-			PREPARE_WORK(&cdev->private->kick_work,
-				     ccw_device_nopath_notify, (void *)cdev);
-			queue_work(ccw_device_notify_work,
-				   &cdev->private->kick_work);
-		} else
-			dev_fsm_event(cdev, DEV_EVENT_NOTOPER);
-	} else if (cdev->handler)
+	if (ret == -ENODEV)
+		dev_fsm_event(cdev, DEV_EVENT_NOTOPER);
+	else if (cdev->handler)
 		cdev->handler(cdev, cdev->private->intparm,
 			      ERR_PTR(-ETIMEDOUT));
 }
@@ -1033,11 +948,9 @@ ccw_device_killing_irq(struct ccw_device *cdev, enum dev_event dev_event)
 	if (cdev->handler)
 		cdev->handler(cdev, cdev->private->intparm,
 			      ERR_PTR(-ETIMEDOUT));
-	if (!sch->lpm) {
-		PREPARE_WORK(&cdev->private->kick_work,
-			     ccw_device_nopath_notify, (void *)cdev);
-		queue_work(ccw_device_notify_work, &cdev->private->kick_work);
-	} else if (cdev->private->flags.doverify)
+	if (!sch->lpm)
+		dev_fsm_event(cdev, DEV_EVENT_NOTOPER);
+	else if (cdev->private->flags.doverify)
 		/* Start delayed path verification. */
 		ccw_device_online_verify(cdev, 0);
 }
@@ -1053,16 +966,7 @@ ccw_device_killing_timeout(struct ccw_device *cdev, enum dev_event dev_event)
 		return;
 	}
 	if (ret == -ENODEV) {
-		struct subchannel *sch;
-
-		sch = to_subchannel(cdev->dev.parent);
-		if (!sch->lpm) {
-			PREPARE_WORK(&cdev->private->kick_work,
-				     ccw_device_nopath_notify, (void *)cdev);
-			queue_work(ccw_device_notify_work,
-				   &cdev->private->kick_work);
-		} else
-			dev_fsm_event(cdev, DEV_EVENT_NOTOPER);
+		dev_fsm_event(cdev, DEV_EVENT_NOTOPER);
 		return;
 	}
 	//FIXME: Can we get here?
@@ -1084,22 +988,14 @@ void device_kill_io(struct subchannel *sch)
 		return;
 	}
 	if (ret == -ENODEV) {
-		if (!sch->lpm) {
-			PREPARE_WORK(&cdev->private->kick_work,
-				ccw_device_nopath_notify, cdev);
-			queue_work(ccw_device_notify_work,
-				&cdev->private->kick_work);
-		} else
-			dev_fsm_event(cdev, DEV_EVENT_NOTOPER);
+		dev_fsm_event(cdev, DEV_EVENT_NOTOPER);
 		return;
 	}
 	if (cdev->handler)
 		cdev->handler(cdev, cdev->private->intparm, ERR_PTR(-EIO));
-	if (!sch->lpm) {
-		PREPARE_WORK(&cdev->private->kick_work,
-			ccw_device_nopath_notify, cdev);
-		queue_work(ccw_device_notify_work, &cdev->private->kick_work);
-	} else
+	if (!sch->lpm)
+		dev_fsm_event(cdev, DEV_EVENT_NOTOPER);
+	else
 		/* Start delayed path verification. */
 		ccw_device_online_verify(cdev, 0);
 }
@@ -1130,11 +1026,9 @@ ccw_device_wait4io_irq(struct ccw_device *cdev, enum dev_event dev_event)
 			ccw_device_set_timeout(cdev, 0);
 	/* Call the handler. */
 	ccw_device_call_handler(cdev);
-	if (!sch->lpm) {
-		PREPARE_WORK(&cdev->private->kick_work,
-			     ccw_device_nopath_notify, (void *)cdev);
-		queue_work(ccw_device_notify_work, &cdev->private->kick_work);
-	} else
+	if (!sch->lpm)
+		dev_fsm_event(cdev, DEV_EVENT_NOTOPER);
+	else
 		ccw_device_online_verify(cdev, 0);
 }
 
@@ -1153,23 +1047,15 @@ ccw_device_wait4io_timeout(struct ccw_device *cdev, enum dev_event dev_event)
 		return;
 	}
 	if (ret == -ENODEV) {
-		if (!sch->lpm) {
-			PREPARE_WORK(&cdev->private->kick_work,
-				     ccw_device_nopath_notify, (void *)cdev);
-			queue_work(ccw_device_notify_work,
-				   &cdev->private->kick_work);
-		} else
-			dev_fsm_event(cdev, DEV_EVENT_NOTOPER);
+		dev_fsm_event(cdev, DEV_EVENT_NOTOPER);
 		return;
 	}
 	if (cdev->handler)
 		cdev->handler(cdev, cdev->private->intparm,
 			      ERR_PTR(-ETIMEDOUT));
-	if (!sch->lpm) {
-		PREPARE_WORK(&cdev->private->kick_work,
-			     ccw_device_nopath_notify, (void *)cdev);
-		queue_work(ccw_device_notify_work, &cdev->private->kick_work);
-	} else if (cdev->private->flags.doverify)
+	if (!sch->lpm)
+		dev_fsm_event(cdev, DEV_EVENT_NOTOPER);
+	else if (cdev->private->flags.doverify)
 		/* Start delayed path verification. */
 		ccw_device_online_verify(cdev, 0);
 }
@@ -1352,7 +1238,7 @@ fsm_func_t *dev_jumptable[NR_DEV_STATES][NR_DEV_EVENTS] = {
 		[DEV_EVENT_VERIFY]	= ccw_device_nop,
 	},
 	[DEV_STATE_SENSE_PGID] = {
-		[DEV_EVENT_NOTOPER]	= ccw_device_online_notoper,
+		[DEV_EVENT_NOTOPER]	= ccw_device_generic_notoper,
 		[DEV_EVENT_INTERRUPT]	= ccw_device_sense_pgid_irq,
 		[DEV_EVENT_TIMEOUT]	= ccw_device_onoff_timeout,
 		[DEV_EVENT_VERIFY]	= ccw_device_nop,
@@ -1364,56 +1250,56 @@ fsm_func_t *dev_jumptable[NR_DEV_STATES][NR_DEV_EVENTS] = {
 		[DEV_EVENT_VERIFY]	= ccw_device_nop,
 	},
 	[DEV_STATE_OFFLINE] = {
-		[DEV_EVENT_NOTOPER]	= ccw_device_offline_notoper,
+		[DEV_EVENT_NOTOPER]	= ccw_device_generic_notoper,
 		[DEV_EVENT_INTERRUPT]	= ccw_device_offline_irq,
 		[DEV_EVENT_TIMEOUT]	= ccw_device_nop,
 		[DEV_EVENT_VERIFY]	= ccw_device_nop,
 	},
 	[DEV_STATE_VERIFY] = {
-		[DEV_EVENT_NOTOPER]	= ccw_device_online_notoper,
+		[DEV_EVENT_NOTOPER]	= ccw_device_generic_notoper,
 		[DEV_EVENT_INTERRUPT]	= ccw_device_verify_irq,
 		[DEV_EVENT_TIMEOUT]	= ccw_device_onoff_timeout,
 		[DEV_EVENT_VERIFY]	= ccw_device_delay_verify,
 	},
 	[DEV_STATE_ONLINE] = {
-		[DEV_EVENT_NOTOPER]	= ccw_device_online_notoper,
+		[DEV_EVENT_NOTOPER]	= ccw_device_generic_notoper,
 		[DEV_EVENT_INTERRUPT]	= ccw_device_irq,
 		[DEV_EVENT_TIMEOUT]	= ccw_device_online_timeout,
 		[DEV_EVENT_VERIFY]	= ccw_device_online_verify,
 	},
 	[DEV_STATE_W4SENSE] = {
-		[DEV_EVENT_NOTOPER]	= ccw_device_online_notoper,
+		[DEV_EVENT_NOTOPER]	= ccw_device_generic_notoper,
 		[DEV_EVENT_INTERRUPT]	= ccw_device_w4sense,
 		[DEV_EVENT_TIMEOUT]	= ccw_device_nop,
 		[DEV_EVENT_VERIFY]	= ccw_device_online_verify,
 	},
 	[DEV_STATE_DISBAND_PGID] = {
-		[DEV_EVENT_NOTOPER]	= ccw_device_online_notoper,
+		[DEV_EVENT_NOTOPER]	= ccw_device_generic_notoper,
 		[DEV_EVENT_INTERRUPT]	= ccw_device_disband_irq,
 		[DEV_EVENT_TIMEOUT]	= ccw_device_onoff_timeout,
 		[DEV_EVENT_VERIFY]	= ccw_device_nop,
 	},
 	[DEV_STATE_BOXED] = {
-		[DEV_EVENT_NOTOPER]	= ccw_device_offline_notoper,
+		[DEV_EVENT_NOTOPER]	= ccw_device_generic_notoper,
 		[DEV_EVENT_INTERRUPT]	= ccw_device_stlck_done,
 		[DEV_EVENT_TIMEOUT]	= ccw_device_stlck_done,
 		[DEV_EVENT_VERIFY]	= ccw_device_nop,
 	},
 	/* states to wait for i/o completion before doing something */
 	[DEV_STATE_CLEAR_VERIFY] = {
-		[DEV_EVENT_NOTOPER]     = ccw_device_online_notoper,
+		[DEV_EVENT_NOTOPER]     = ccw_device_generic_notoper,
 		[DEV_EVENT_INTERRUPT]   = ccw_device_clear_verify,
 		[DEV_EVENT_TIMEOUT]	= ccw_device_nop,
 		[DEV_EVENT_VERIFY]	= ccw_device_nop,
 	},
 	[DEV_STATE_TIMEOUT_KILL] = {
-		[DEV_EVENT_NOTOPER]	= ccw_device_online_notoper,
+		[DEV_EVENT_NOTOPER]	= ccw_device_generic_notoper,
 		[DEV_EVENT_INTERRUPT]	= ccw_device_killing_irq,
 		[DEV_EVENT_TIMEOUT]	= ccw_device_killing_timeout,
 		[DEV_EVENT_VERIFY]	= ccw_device_nop, //FIXME
 	},
 	[DEV_STATE_WAIT4IO] = {
-		[DEV_EVENT_NOTOPER]	= ccw_device_online_notoper,
+		[DEV_EVENT_NOTOPER]	= ccw_device_generic_notoper,
 		[DEV_EVENT_INTERRUPT]	= ccw_device_wait4io_irq,
 		[DEV_EVENT_TIMEOUT]	= ccw_device_wait4io_timeout,
 		[DEV_EVENT_VERIFY]	= ccw_device_delay_verify,