Sophie

Sophie

distrib > Scientific%20Linux > 5x > x86_64 > by-pkgid > fc11cd6e1c513a17304da94a5390f3cd > files > 4231

kernel-2.6.18-194.11.1.el5.src.rpm

From: Markus Armbruster <armbru@redhat.com>
Date: Mon, 4 Aug 2008 12:18:53 -0400
Subject: [xen] event channel lock and barrier
Message-id: m34p603g9e.fsf@crossbow.pond.sub.org
O-Subject: [PATCH RHEL-5.3] Event channel lock and barrier
Bugzilla: 457086
RH-Acked-by: Chris Lalancette <clalance@redhat.com>
RH-Acked-by: Don Dutile <ddutile@redhat.com>

The evtchn driver is the kernel part of Xen's event channel
communication mechanism.

The version in RHEL-5 lacks locks and memory barriers.  This has led
to xenstore hangs on IA-64, but there's really no telling what else
the bug could break under load.

Fix backported from upstream, see patch for details.

I asked reporter (Fujitsu) to test the fix on IA-64.  I'll follow up
with results.

Bug 457086

Please ACK.

Trivially derived from

# HG changeset patch
# User kfraser@localhost.localdomain
# Date 1175086323 -3600
# Node ID 77b210daefee957faa220e94501ace6bf50e3b97
# Parent  7e6ef2b914aa739c51f15b0700aa54de41b4707d
linux evtchn: Read function in evtchn driver requires locking.
Signed-off-by: Keir Fraser <keir@xensource.com>

# HG changeset patch
# User Keir Fraser <keir.fraser@citrix.com>
# Date 1216724382 -3600
# Node ID 8a3dc4fdb4785447398e983c22241c38f128663b
# Parent  905f275ed4d8a7bca99dba273e9bc838f605e8b9
linux/evtchn: Add memory barriers to evtchn ring accesses.

Xenstore infrequently hangs up on IA64.
Actually the xenstored is still alive but no response from
xenstore-XXX commands.

After tracking down, I've found that evtchn_read() infrequently
returns a wrong evtchn port number and evtchn_write() never
unmask the exact port.

Signed-off-by: Kouya Shimura <kouya@jp.fujitsu.com>

Yes, updates of the ring_prod and ring_cons are separately protected
by different locks/mutexes, but the data communication between
producer and consumer is lock-free. Barriers are needed.

Acked-by: Keir Fraser <keir.fraser@citrix.com>

diff --git a/drivers/xen/evtchn/evtchn.c b/drivers/xen/evtchn/evtchn.c
index 4323b58..d9b95c3 100644
--- a/drivers/xen/evtchn/evtchn.c
+++ b/drivers/xen/evtchn/evtchn.c
@@ -47,6 +47,7 @@
 #include <linux/irq.h>
 #include <linux/init.h>
 #include <linux/gfp.h>
+#include <linux/mutex.h>
 #include <xen/evtchn.h>
 #include <xen/public/evtchn.h>
 
@@ -56,6 +57,7 @@ struct per_user_data {
 #define EVTCHN_RING_MASK(_i) ((_i)&(EVTCHN_RING_SIZE-1))
 	evtchn_port_t *ring;
 	unsigned int ring_cons, ring_prod, ring_overflow;
+	struct mutex ring_cons_mutex; /* protect against concurrent readers */
 
 	/* Processes wait on this queue when ring is empty. */
 	wait_queue_head_t evtchn_wait;
@@ -78,6 +80,7 @@ void evtchn_device_upcall(int port)
 	if ((u = port_user[port]) != NULL) {
 		if ((u->ring_prod - u->ring_cons) < EVTCHN_RING_SIZE) {
 			u->ring[EVTCHN_RING_MASK(u->ring_prod)] = port;
+			wmb(); /* Ensure ring contents visible */
 			if (u->ring_cons == u->ring_prod++) {
 				wake_up_interruptible(&u->evtchn_wait);
 				kill_fasync(&u->evtchn_async_queue,
@@ -108,12 +111,17 @@ static ssize_t evtchn_read(struct file *file, char __user *buf,
 		count = PAGE_SIZE;
 
 	for (;;) {
+		mutex_lock(&u->ring_cons_mutex);
+
+		rc = -EFBIG;
 		if (u->ring_overflow)
-			return -EFBIG;
+			goto unlock_out;
 
 		if ((c = u->ring_cons) != (p = u->ring_prod))
 			break;
 
+		mutex_unlock(&u->ring_cons_mutex);
+
 		if (file->f_flags & O_NONBLOCK)
 			return -EAGAIN;
 
@@ -141,20 +149,25 @@ static ssize_t evtchn_read(struct file *file, char __user *buf,
 		bytes2 = count - bytes1;
 	}
 
+	rc = -EFAULT;
+	rmb(); /* Ensure that we see the port before we copy it. */
 	if (copy_to_user(buf, &u->ring[EVTCHN_RING_MASK(c)], bytes1) ||
 	    ((bytes2 != 0) &&
 	     copy_to_user(&buf[bytes1], &u->ring[0], bytes2)))
-		return -EFAULT;
+		goto unlock_out;
 
 	u->ring_cons += (bytes1 + bytes2) / sizeof(evtchn_port_t);
+	rc = bytes1 + bytes2;
 
-	return bytes1 + bytes2;
+ unlock_out:
+	mutex_unlock(&u->ring_cons_mutex);
+	return rc;
 }
 
 static ssize_t evtchn_write(struct file *file, const char __user *buf,
 			    size_t count, loff_t *ppos)
 {
-	int  rc, i;
+	int rc, i;
 	evtchn_port_t *kbuf = (evtchn_port_t *)__get_free_page(GFP_KERNEL);
 	struct per_user_data *u = file->private_data;
 
@@ -164,18 +177,16 @@ static ssize_t evtchn_write(struct file *file, const char __user *buf,
 	/* Whole number of ports. */
 	count &= ~(sizeof(evtchn_port_t)-1);
 
-	if (count == 0) {
-		rc = 0;
+	rc = 0;
+	if (count == 0)
 		goto out;
-	}
 
 	if (count > PAGE_SIZE)
 		count = PAGE_SIZE;
 
-	if (copy_from_user(kbuf, buf, count) != 0) {
-		rc = -EFAULT;
+	rc = -EFAULT;
+	if (copy_from_user(kbuf, buf, count) != 0)
 		goto out;
-	}
 
 	spin_lock_irq(&port_user_lock);
 	for (i = 0; i < (count/sizeof(evtchn_port_t)); i++)
@@ -321,9 +332,11 @@ static int evtchn_ioctl(struct inode *inode, struct file *file,
 
 	case IOCTL_EVTCHN_RESET: {
 		/* Initialise the ring to empty. Clear errors. */
+		mutex_lock(&u->ring_cons_mutex);
 		spin_lock_irq(&port_user_lock);
 		u->ring_cons = u->ring_prod = u->ring_overflow = 0;
 		spin_unlock_irq(&port_user_lock);
+		mutex_unlock(&u->ring_cons_mutex);
 		rc = 0;
 		break;
 	}
@@ -371,6 +384,8 @@ static int evtchn_open(struct inode *inode, struct file *filp)
 		return -ENOMEM;
 	}
 
+	mutex_init(&u->ring_cons_mutex);
+
 	filp->private_data = u;
 
 	return 0;