From: Hans-Joachim Picht <hpicht@redhat.com> Date: Wed, 22 Oct 2008 14:17:35 +0200 Subject: [s390] qdio: repair timeout handling for qdio_shutdown Message-id: 20081022121735.GC26181@redhat.com O-Subject: [RHEL5 U4 PATCH 3/5] s390 - qdio: Repair timeout handling for qdio_shutdown Bugzilla: 463164 RH-Acked-by: Pete Zaitcev <zaitcev@redhat.com> Description ============ If qdio shutdown runs in parallel with a channel error, the qdio_timeout_handler might not be triggered. In this case neither state INACTIVE nor state ERR is reached and the following wait_event hangs forever. Solution: do not make use of ccw_device_set_timeout(), but add a timeout to the following wait_event. And make sure, wake_up is called in case of an i/o error on the qdio-device. Bugzilla ========= BZ 463164 https://bugzilla.redhat.com/show_bug.cgi?id=463164 Upstream status of the patch: ============================= The problem has been fixed upstream as part of the qdio-driver rewrite. See: http://lkml.org/lkml/2008/7/1/240 Test status: ============ The patch has been tested and fixes the problem. The fix has been verified by the IBM test department. Please ACK. With best regards, --Hans diff --git a/drivers/s390/cio/qdio.c b/drivers/s390/cio/qdio.c index df55883..c10667e 100644 --- a/drivers/s390/cio/qdio.c +++ b/drivers/s390/cio/qdio.c @@ -2089,7 +2089,6 @@ qdio_timeout_handler(struct ccw_device *cdev) default: BUG(); } - ccw_device_set_timeout(cdev, 0); wake_up(&cdev->private->wait_q); } @@ -2128,6 +2127,8 @@ qdio_handler(struct ccw_device *cdev, unsigned long intparm, struct irb *irb) case -EIO: QDIO_PRINT_ERR("i/o error on device %s\n", cdev->dev.bus_id); + qdio_set_state(irq_ptr, QDIO_IRQ_STATE_ERR); + wake_up(&cdev->private->wait_q); return; case -ETIMEDOUT: qdio_timeout_handler(cdev); @@ -2674,12 +2675,12 @@ qdio_shutdown(struct ccw_device *cdev, int how) spin_unlock_irqrestore(get_ccwdev_lock(cdev), flags); } else if (rc == 0) { qdio_set_state(irq_ptr, QDIO_IRQ_STATE_CLEANUP); - ccw_device_set_timeout(cdev, timeout); spin_unlock_irqrestore(get_ccwdev_lock(cdev),flags); - wait_event(cdev->private->wait_q, - irq_ptr->state == QDIO_IRQ_STATE_INACTIVE || - irq_ptr->state == QDIO_IRQ_STATE_ERR); + wait_event_interruptible_timeout(cdev->private->wait_q, + irq_ptr->state == QDIO_IRQ_STATE_INACTIVE || + irq_ptr->state == QDIO_IRQ_STATE_ERR, + timeout); } else { QDIO_PRINT_INFO("ccw_device_{halt,clear} returned %d for " "device %s\n", result, cdev->dev.bus_id); @@ -2699,7 +2700,6 @@ qdio_shutdown(struct ccw_device *cdev, int how) /* Ignore errors. */ qdio_set_state(irq_ptr, QDIO_IRQ_STATE_INACTIVE); - ccw_device_set_timeout(cdev, 0); out: up(&irq_ptr->setting_up_sema); return result; @@ -2913,13 +2913,10 @@ qdio_establish_handle_irq(struct ccw_device *cdev, int cstat, int dstat) QDIO_DBF_TEXT0(0,setup,dbf_text); QDIO_DBF_TEXT0(0,trace,dbf_text); - if (qdio_establish_irq_check_for_errors(cdev, cstat, dstat)) { - ccw_device_set_timeout(cdev, 0); + if (qdio_establish_irq_check_for_errors(cdev, cstat, dstat)) return; - } qdio_set_state(irq_ptr,QDIO_IRQ_STATE_ESTABLISHED); - ccw_device_set_timeout(cdev, 0); } int @@ -3195,8 +3192,6 @@ qdio_establish(struct qdio_initialize *init_data) irq_ptr->schid.ssid, irq_ptr->schid.sch_no, result, result2); result=result2; - if (result) - ccw_device_set_timeout(cdev, 0); } spin_unlock_irqrestore(get_ccwdev_lock(cdev),saveflags); @@ -3279,7 +3274,6 @@ qdio_activate(struct ccw_device *cdev, int flags) spin_lock_irqsave(get_ccwdev_lock(cdev),saveflags); - ccw_device_set_timeout(cdev, 0); ccw_device_set_options(cdev, CCWDEV_REPORT_ALL); result=ccw_device_start(cdev,&irq_ptr->ccw,QDIO_DOING_ACTIVATE, 0, DOIO_DENY_PREFETCH);