Sophie

Sophie

distrib > Scientific%20Linux > 5x > x86_64 > by-pkgid > 89877e42827f16fa5f86b1df0c2860b1 > files > 1719

kernel-2.6.18-128.1.10.el5.src.rpm

From: AMEET M. PARANJAPE <aparanja@redhat.com>
Date: Mon, 27 Oct 2008 14:19:58 -0500
Subject: [openib] race in ipoib_cm_post_receive_nonsrq
Message-id: 4906145E.6030706@REDHAT.COM
O-Subject: Re: [Fwd: Re: [PATCH RHEL5.3 BZ463485] Race in ipoib_cm_post_receive_nonsrq()]
Bugzilla: 463485
RH-Acked-by: Doug Ledford <dledford@redhat.com>
RH-Acked-by: David Howells <dhowells@redhat.com>

RHBZ#
======
https://bugzilla.redhat.com/show_bug.cgi?id=463485

Description:
===========
If a customer is running with ehca and has enabled Connect Mode (CM) mode,
they
will see random panics in the kernel because of a race condition.

This patch eliminates the race during a receive Work Request/Scatter Gather
List(WR/SGL) on ipoib_cm_post_receive_nonsrq() and
ipoib_cm_post_receive_nonsqr().

RHEL Version Found:
================
RHEL 5.2

kABI Status:
============
No symbols were harmed.

Brew:
=====
Built on all platforms.
http://brewweb.devel.redhat.com/brew/taskinfo?taskID=1500347

Upstream Status:
================
Here is the community posting:
http://lists.openfabrics.org/pipermail/general/2008-June/052233.html

Test Status:
============
This patch was tested on two separate systems running multiple netperfs
with ib_ipoib set to Connect mode.  The test ran for 24 hours without error.
Without the patch the test fails in less than 2 hours everytime.

===============================================================

diff --git a/drivers/infiniband/ulp/ipoib/ipoib_cm.c b/drivers/infiniband/ulp/ipoib/ipoib_cm.c
index 5eba8af..5f87d20 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_cm.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_cm.c
@@ -145,18 +145,20 @@ static int ipoib_cm_post_receive_srq(struct net_device *dev, int id, int pi)
 }
 
 static int ipoib_cm_post_receive_nonsrq(struct net_device *dev,
-					struct ipoib_cm_rx *rx, int id)
+					struct ipoib_cm_rx *rx,
+					struct ib_recv_wr *wr,
+					struct ib_sge *sge, int id)
 {
 	struct ipoib_dev_priv *priv = netdev_priv(dev);
 	struct ib_recv_wr *bad_wr;
 	int i, ret;
 
-	priv->cm.rx_wr.wr_id = id | IPOIB_OP_CM | IPOIB_OP_RECV;
+	wr->wr_id = id | IPOIB_OP_CM | IPOIB_OP_RECV;
 
 	for (i = 0; i < IPOIB_CM_RX_SG; ++i)
-		priv->cm.rx_sge[i].addr = rx->rx_ring[id].mapping[i];
+		sge[i].addr = rx->rx_ring[id].mapping[i];
 
-	ret = ib_post_recv(rx->qp, &priv->cm.rx_wr, &bad_wr);
+	ret = ib_post_recv(rx->qp, wr, &bad_wr);
 	if (unlikely(ret)) {
 		ipoib_warn(priv, "post recv failed for buf %d (%d)\n", id, ret);
 		ipoib_cm_dma_unmap_rx(priv, IPOIB_CM_RX_SG - 1,
@@ -357,10 +359,35 @@ static int ipoib_cm_modify_rx_qp(struct net_device *dev,
 	return 0;
 }
 
+static void ipoib_cm_init_rx_wr(struct net_device *dev,
+struct ib_recv_wr *wr,
+struct ib_sge *sge)
+{
+
+	struct ipoib_dev_priv *priv = netdev_priv(dev);
+	int i;
+
+	for (i = 0; i < priv->cm.num_frags; ++i)
+			sge[i].lkey = priv->mr->lkey;
+
+	sge[0].length = IPOIB_CM_HEAD_SIZE;
+	for (i = 1; i < priv->cm.num_frags; ++i)
+			sge[i].length = PAGE_SIZE;
+
+	wr->next    = NULL;
+	wr->sg_list = priv->cm.rx_sge;
+	wr->num_sge = priv->cm.num_frags;
+}
+
 static int ipoib_cm_nonsrq_init_rx(struct net_device *dev, struct ib_cm_id *cm_id,
 				   struct ipoib_cm_rx *rx)
 {
 	struct ipoib_dev_priv *priv = netdev_priv(dev);
+	struct {
+		struct ib_recv_wr wr;
+		struct ib_sge sge[IPOIB_CM_RX_SG];
+	} *t;
+
 	int ret;
 	int i;
 
@@ -370,6 +397,13 @@ static int ipoib_cm_nonsrq_init_rx(struct net_device *dev, struct ib_cm_id *cm_i
 		"try using a lower value of recv_queue_size.");
 		return -ENOMEM;
 	}
+	t = kmalloc(sizeof *t, GFP_KERNEL);
+	if (!t) {
+		ret = -ENOMEM;
+		goto err_free_ring;
+	}
+
+	ipoib_cm_init_rx_wr(dev, &t->wr, t->sge);
 
 	spin_lock_irq(&priv->lock);
 
@@ -389,8 +423,8 @@ static int ipoib_cm_nonsrq_init_rx(struct net_device *dev, struct ib_cm_id *cm_i
 			ipoib_warn(priv, "failed to allocate receive buffer %d\n", i);
 				ret = -ENOMEM;
 				goto err_count;
-			}
-		ret = ipoib_cm_post_receive_nonsrq(dev, rx, i);
+		}
+		ret = ipoib_cm_post_receive_nonsrq(dev, rx, &t->wr, t->sge, i);
 		if (ret) {
 			ipoib_warn(priv, "ipoib_cm_post_receive_nonsrq "
 				   "failed for buf %d\n", i);
@@ -401,6 +435,8 @@ static int ipoib_cm_nonsrq_init_rx(struct net_device *dev, struct ib_cm_id *cm_i
 
 	rx->recv_count = ipoib_recvq_size;
 
+	kfree(t);
+
 	return 0;
 
 err_count:
@@ -409,6 +445,8 @@ err_count:
 	spin_unlock_irq(&priv->lock);
 
 err_free:
+	kfree(t);
+err_free_ring:
 	ipoib_cm_free_rx_ring(dev, rx->rx_ring);
 
 	return ret;
@@ -677,7 +715,10 @@ repost:
 			ipoib_warn(priv, "ipoib_cm_post_receive_srq failed "
 				   "for buf %d\n", wr_id);
 	} else {
-		if (unlikely(ipoib_cm_post_receive_nonsrq(dev, p, wr_id))) {
+		if (unlikely(ipoib_cm_post_receive_nonsrq(dev, p,
+							&priv->cm.rx_wr,
+							priv->cm.rx_sge,
+							wr_id))) {
 			--p->recv_count;
 			ipoib_warn(priv, "ipoib_cm_post_receive_nonsrq failed "
 				   "for buf %d\n", wr_id);
@@ -1522,7 +1563,7 @@ destory_srq:
 int ipoib_cm_dev_init(struct net_device *dev)
 {
 	struct ipoib_dev_priv *priv = netdev_priv(dev);
-	int i, ret, j;
+	int i, ret;
 	struct ib_device_attr attr;
 
 	INIT_LIST_HEAD(&priv->cm.passive_ids);
@@ -1560,30 +1601,7 @@ int ipoib_cm_dev_init(struct net_device *dev)
 		priv->cm.num_frags  = IPOIB_CM_RX_SG;
 	}
 
-	for (i = 0; i < priv->cm.num_frags; ++i)
-		priv->cm.rx_sge[i].lkey	= priv->mr->lkey;
-
-	if (ipoib_cm_has_srq(dev)) {
-		for (j = 0; j < ipoib_recvq_size; ++j) {
-			for (i = 0; i < priv->cm.num_frags; ++i)
-				priv->cm.rx_wr_arr[j].rx_sge[i].lkey = priv->mr->lkey;
-
-			priv->cm.rx_wr_arr[j].rx_sge[0].length = IPOIB_CM_HEAD_SIZE;
-			for (i = 1; i < priv->cm.num_frags; ++i)
-				priv->cm.rx_wr_arr[j].rx_sge[i].length = PAGE_SIZE;
-
-			priv->cm.rx_wr_arr[j].wr.sg_list = priv->cm.rx_wr_arr[j].rx_sge;
-			priv->cm.rx_wr_arr[j].wr.num_sge = priv->cm.num_frags;
-		}
-        	priv->cm.head = &priv->cm.rx_wr_arr[0];
-	}
-
-	priv->cm.rx_sge[0].length = IPOIB_CM_HEAD_SIZE;
-	for (i = 1; i < priv->cm.num_frags; ++i)
-		priv->cm.rx_sge[i].length = PAGE_SIZE;
-	priv->cm.rx_wr.next = NULL;
-	priv->cm.rx_wr.sg_list = priv->cm.rx_sge;
-	priv->cm.rx_wr.num_sge = priv->cm.num_frags;
+	ipoib_cm_init_rx_wr(dev, &priv->cm.rx_wr, priv->cm.rx_sge);
 
 	if (ipoib_cm_has_srq(dev)) {
 		for (i = 0; i < ipoib_recvq_size; ++i) {