Sophie

Sophie

distrib > Mageia > 3 > i586 > media > core-release-src > by-pkgid > 0ffedb546e146cfbef3c4cdabc46b7c3 > files > 1

librdmacm-1.0.15-5.mga3.src.rpm

From e92afcbe5284095d55aad937b8c23333368d4d62 Mon Sep 17 00:00:00 2001
From: Sean Hefty <sean.hefty@intel.com>
Date: Fri, 16 Sep 2011 12:06:40 -0700
Subject: [PATCH 1/9] rdma/verbs: Fix race polling for completions

To avoid hanging in rdma_get_send/recv_comp, we need to rearm
the CQ inside of the while loop.  If the CQ is armed,
the HCA will write an entry to the CQ, then generate a CQ
event.  However, a caller could poll the CQ, find the entry,
then attempt to rearm the CQ before the HCA generates the CQ
event.  In this case, the rearm call (ibv_req_notify_cq) will
act as a no-op, since the HCA hasn't finished generating the
event for the previous completion.  At this point, the event
will be queued.

A call to ibv_get_cq_event will find the event, but not
a CQ entry.  The CQ is now not armed, and a call to
ibv_get_cq_event will block waiting for an event that will
never occur.

Problem was found in an rdma_cm example test under development.
The test can ping-pong messages between two applications.

Signed-off-by: Sean Hefty <sean.hefty@intel.com>
---
 include/rdma/rdma_verbs.h |   44 ++++++++++++++++++++++++++------------------
 1 files changed, 26 insertions(+), 18 deletions(-)

diff --git a/include/rdma/rdma_verbs.h b/include/rdma/rdma_verbs.h
index eca2c7a..2b1a961 100644
--- a/include/rdma/rdma_verbs.h
+++ b/include/rdma/rdma_verbs.h
@@ -254,23 +254,27 @@ rdma_get_send_comp(struct rdma_cm_id *id, struct ibv_wc *wc)
 	void *context;
 	int ret;
 
-	ret = ibv_poll_cq(id->send_cq, 1, wc);
-	if (ret)
-		goto out;
+	do {
+		ret = ibv_poll_cq(id->send_cq, 1, wc);
+		if (ret)
+			break;
 
-	ret = ibv_req_notify_cq(id->send_cq, 0);
-	if (ret)
-		return rdma_seterrno(ret);
+		ret = ibv_req_notify_cq(id->send_cq, 0);
+		if (ret)
+			return rdma_seterrno(ret);
+
+		ret = ibv_poll_cq(id->send_cq, 1, wc);
+		if (ret)
+			break;
 
-	while (!(ret = ibv_poll_cq(id->send_cq, 1, wc))) {
 		ret = ibv_get_cq_event(id->send_cq_channel, &cq, &context);
 		if (ret)
 			return rdma_seterrno(ret);
 
 		assert(cq == id->send_cq && context == id);
 		ibv_ack_cq_events(id->send_cq, 1);
-	}
-out:
+	} while (1);
+
 	return (ret < 0) ? rdma_seterrno(ret) : ret;
 }
 
@@ -281,23 +285,27 @@ rdma_get_recv_comp(struct rdma_cm_id *id, struct ibv_wc *wc)
 	void *context;
 	int ret;
 
-	ret = ibv_poll_cq(id->recv_cq, 1, wc);
-	if (ret)
-		goto out;
+	do {
+		ret = ibv_poll_cq(id->recv_cq, 1, wc);
+		if (ret)
+			break;
 
-	ret = ibv_req_notify_cq(id->recv_cq, 0);
-	if (ret)
-		return rdma_seterrno(ret);
+		ret = ibv_req_notify_cq(id->recv_cq, 0);
+		if (ret)
+			return rdma_seterrno(ret);
+
+		ret = ibv_poll_cq(id->recv_cq, 1, wc);
+		if (ret)
+			break;
 
-	while (!(ret = ibv_poll_cq(id->recv_cq, 1, wc))) {
 		ret = ibv_get_cq_event(id->recv_cq_channel, &cq, &context);
 		if (ret)
 			return rdma_seterrno(ret);
 
 		assert(cq == id->recv_cq && context == id);
 		ibv_ack_cq_events(id->recv_cq, 1);
-	}
-out:
+	} while (1);
+
 	return (ret < 0) ? rdma_seterrno(ret) : ret;
 }
 
-- 
1.7.6.4