Sophie

Sophie

distrib > Scientific%20Linux > 5x > x86_64 > by-pkgid > fc11cd6e1c513a17304da94a5390f3cd > files > 2698

kernel-2.6.18-194.11.1.el5.src.rpm

From: AMEET M. PARANJAPE <aparanja@redhat.com>
Date: Mon, 4 May 2009 10:40:26 -0400
Subject: [openib] ehca: fix performance during creation of QPs
Message-id: 20090504143743.23800.31697.sendpatchset@squad5-lp1.lab.bos.redhat.com
O-Subject: [PATCH RHEL5.4 BZ498527] ehca performance impact during creation of queue pairs
Bugzilla: 498527
RH-Acked-by: Doug Ledford <dledford@redhat.com>

RHBZ#:
======
https://bugzilla.redhat.com/show_bug.cgi?id=498527

Description:
===========
This patch contains performance improvments for ehca driver.
It will skip code which is not necessary for userspace queue pairs
and will replace vmalloc() calls with kmalloc().

RHEL Version Found:
================
RHEL 5.3

kABI Status:
============
No symbols were harmed.

Brew:
=====
Built on all platforms.
http://brewweb.devel.redhat.com/brew/taskinfo?taskID=1784974

Upstream Status:
================
The patch is already applied for linux-2.6.31 as you can see below:
http://lkml.org/lkml/2009/4/21/290
http://lkml.org/lkml/2009/4/21/292
http://lkml.org/lkml/2009/4/21/293

Test  Status:
============
If a userspace application tries to allocate a large number of queue pairs the
performance of the creation process degrade rapidly and results in softlookup
errors:

BUG: soft lockup - CPU#10 stuck for 10s! [mpi_lapi_gen_64:21687]
REGS: c000001bc72a7340 TRAP: 0901   Tainted: G       (2.6.18-128.el5)
TASK = c000001e4ad98d40[21687] 'mpi_lapi_gen_64' THREAD: c000001bc72a4000 CPU:
10
NIP [C0000000003C8E3C] ._write_lock+0x44/0x80
LR [C0000000000DB550] .__get_vm_area_node+0xd0/0x1f8
Call Trace:

After applying this patch this problem is not seen.
===============================================================
Ameet Paranjape 978-392-3903 ext 23903
IBM on-site partner

Proposed Patch:
===============

diff --git a/drivers/infiniband/hw/ehca/ehca_main.c b/drivers/infiniband/hw/ehca/ehca_main.c
index c57e9f6..88f9bb9 100644
--- a/drivers/infiniband/hw/ehca/ehca_main.c
+++ b/drivers/infiniband/hw/ehca/ehca_main.c
@@ -50,7 +50,7 @@
 #include "ehca_tools.h"
 #include "hcp_if.h"
 
-#define HCAD_VERSION "SVNEHCA_0026"
+#define HCAD_VERSION "SVNEHCA_0027"
 
 MODULE_LICENSE("Dual BSD/GPL");
 MODULE_AUTHOR("Christoph Raisch <raisch@de.ibm.com>");
diff --git a/drivers/infiniband/hw/ehca/ehca_qp.c b/drivers/infiniband/hw/ehca/ehca_qp.c
index e3537e2..9eb7605 100644
--- a/drivers/infiniband/hw/ehca/ehca_qp.c
+++ b/drivers/infiniband/hw/ehca/ehca_qp.c
@@ -461,7 +461,7 @@ static struct ehca_qp *internal_create_qp(
 					      ib_device);
 	struct ib_ucontext *context = NULL;
 	u64 h_ret;
-	int is_llqp = 0, has_srq = 0;
+	int is_llqp = 0, has_srq = 0, is_user = 0;
 	int qp_type, max_send_sge, max_recv_sge, ret;
 
 	/* h_call's out parameters */
@@ -603,9 +603,6 @@ static struct ehca_qp *internal_create_qp(
 		}
 	}
 
-	if (pd->uobject && udata)
-		context = pd->uobject->context;
-
 	my_qp = kmem_cache_zalloc(qp_cache, GFP_KERNEL);
 	if (!my_qp) {
 		ehca_err(pd->device, "pd=%p not enough memory to alloc qp", pd);
@@ -613,6 +610,11 @@ static struct ehca_qp *internal_create_qp(
 		return ERR_PTR(-ENOMEM);
 	}
 
+	if (pd->uobject && udata) {
+		is_user = 1;
+		context = pd->uobject->context;
+	}
+
 	atomic_set(&my_qp->nr_events, 0);
 	init_waitqueue_head(&my_qp->wait_completion);
 	spin_lock_init(&my_qp->spinlock_s);
@@ -701,7 +703,7 @@ static struct ehca_qp *internal_create_qp(
 			(parms.squeue.is_small || parms.rqueue.is_small);
 	}
 
-	h_ret = hipz_h_alloc_resource_qp(shca->ipz_hca_handle, &parms);
+	h_ret = hipz_h_alloc_resource_qp(shca->ipz_hca_handle, &parms, is_user);
 	if (h_ret != H_SUCCESS) {
 		ehca_err(pd->device, "h_alloc_resource_qp() failed h_ret=%li",
 			 h_ret);
@@ -763,18 +765,20 @@ static struct ehca_qp *internal_create_qp(
 			goto create_qp_exit2;
 		}
 
-		my_qp->sq_map.entries = my_qp->ipz_squeue.queue_length /
-			 my_qp->ipz_squeue.qe_size;
-		my_qp->sq_map.map = vmalloc(my_qp->sq_map.entries *
+		if (!is_user) {
+			my_qp->sq_map.entries = my_qp->ipz_squeue.queue_length /
+				my_qp->ipz_squeue.qe_size;
+			my_qp->sq_map.map = vmalloc(my_qp->sq_map.entries *
 					sizeof(struct ehca_qmap_entry));
-		if (!my_qp->sq_map.map) {
-			ehca_err(pd->device, "Couldn't allocate squeue "
-				 "map ret=%i", ret);
-			goto create_qp_exit3;
+			if (!my_qp->sq_map.map) {
+				ehca_err(pd->device, "Couldn't allocate squeue "
+					 "map ret=%i", ret);
+				goto create_qp_exit3;
+			}
+			INIT_LIST_HEAD(&my_qp->sq_err_node);
+			/* to avoid the generation of bogus flush CQEs */
+			reset_queue_map(&my_qp->sq_map);
 		}
-		INIT_LIST_HEAD(&my_qp->sq_err_node);
-		/* to avoid the generation of bogus flush CQEs */
-		reset_queue_map(&my_qp->sq_map);
 	}
 
 	if (HAS_RQ(my_qp)) {
@@ -786,20 +790,21 @@ static struct ehca_qp *internal_create_qp(
 				 "and pages ret=%i", ret);
 			goto create_qp_exit4;
 		}
-
-		my_qp->rq_map.entries = my_qp->ipz_rqueue.queue_length /
-			my_qp->ipz_rqueue.qe_size;
-		my_qp->rq_map.map = vmalloc(my_qp->rq_map.entries *
+		if (!is_user) {
+			my_qp->rq_map.entries = my_qp->ipz_rqueue.queue_length /
+				my_qp->ipz_rqueue.qe_size;
+			my_qp->rq_map.map = vmalloc(my_qp->rq_map.entries *
 				sizeof(struct ehca_qmap_entry));
-		if (!my_qp->rq_map.map) {
-			ehca_err(pd->device, "Couldn't allocate squeue "
+			if (!my_qp->rq_map.map) {
+				ehca_err(pd->device, "Couldn't allocate squeue "
 					"map ret=%i", ret);
-			goto create_qp_exit5;
+				goto create_qp_exit5;
+			}
+			INIT_LIST_HEAD(&my_qp->rq_err_node);
+			/* to avoid the generation of bogus flush CQEs */
+			reset_queue_map(&my_qp->rq_map);
 		}
-		INIT_LIST_HEAD(&my_qp->rq_err_node);
-		/* to avoid the generation of bogus flush CQEs */
-		reset_queue_map(&my_qp->rq_map);
-	} else if (init_attr->srq) {
+	} else if (init_attr->srq && !is_user) {
 		/* this is a base QP, use the queue map of the SRQ */
 		my_qp->rq_map = my_srq->rq_map;
 		INIT_LIST_HEAD(&my_qp->rq_err_node);
@@ -912,7 +917,7 @@ create_qp_exit7:
 	kfree(my_qp->mod_qp_parm);
 
 create_qp_exit6:
-	if (HAS_RQ(my_qp))
+	if (HAS_RQ(my_qp) && !is_user)
 		vfree(my_qp->rq_map.map);
 
 create_qp_exit5:
@@ -920,7 +925,7 @@ create_qp_exit5:
 		ipz_queue_dtor(my_pd, &my_qp->ipz_rqueue);
 
 create_qp_exit4:
-	if (HAS_SQ(my_qp))
+	if (HAS_SQ(my_qp) && !is_user)
 		vfree(my_qp->sq_map.map);
 
 create_qp_exit3:
@@ -1238,6 +1243,7 @@ static int internal_modify_qp(struct ib_qp *ibqp,
 	u64 update_mask;
 	u64 h_ret;
 	int bad_wqe_cnt = 0;
+	int is_user = 0;
 	int squeue_locked = 0;
 	unsigned long flags = 0;
 
@@ -1260,6 +1266,8 @@ static int internal_modify_qp(struct ib_qp *ibqp,
 		ret = ehca2ib_return_code(h_ret);
 		goto modify_qp_exit1;
 	}
+	if (ibqp->uobject)
+		is_user = 1;
 
 	qp_cur_state = ehca2ib_qp_state(mqpcb->qp_state);
 
@@ -1722,7 +1730,8 @@ static int internal_modify_qp(struct ib_qp *ibqp,
 			goto modify_qp_exit2;
 		}
 	}
-	if ((qp_new_state == IB_QPS_ERR) && (qp_cur_state != IB_QPS_ERR)) {
+	if ((qp_new_state == IB_QPS_ERR) && (qp_cur_state != IB_QPS_ERR)
+	    && !is_user) {
 		ret = check_for_left_cqes(my_qp, shca);
 		if (ret)
 			goto modify_qp_exit2;
@@ -1732,16 +1741,17 @@ static int internal_modify_qp(struct ib_qp *ibqp,
 		ipz_qeit_reset(&my_qp->ipz_rqueue);
 		ipz_qeit_reset(&my_qp->ipz_squeue);
 
-		if (qp_cur_state == IB_QPS_ERR) {
+		if (qp_cur_state == IB_QPS_ERR && !is_user) {
 			del_from_err_list(my_qp->send_cq, &my_qp->sq_err_node);
 
 			if (HAS_RQ(my_qp))
 				del_from_err_list(my_qp->recv_cq,
 						  &my_qp->rq_err_node);
 		}
-		reset_queue_map(&my_qp->sq_map);
+		if (!is_user)
+			reset_queue_map(&my_qp->sq_map);
 
-		if (HAS_RQ(my_qp))
+		if (HAS_RQ(my_qp) && !is_user)
 			reset_queue_map(&my_qp->rq_map);
 	}
 
@@ -2132,10 +2142,12 @@ static int internal_destroy_qp(struct ib_device *dev, struct ehca_qp *my_qp,
 	int ret;
 	u64 h_ret;
 	u8 port_num;
+	int is_user = 0;
 	enum ib_qp_type	qp_type;
 	unsigned long flags;
 
 	if (uobject) {
+		is_user = 1;
 		if (my_qp->mm_count_galpa ||
 		    my_qp->mm_count_rqueue || my_qp->mm_count_squeue) {
 			ehca_err(dev, "Resources still referenced in "
@@ -2162,10 +2174,10 @@ static int internal_destroy_qp(struct ib_device *dev, struct ehca_qp *my_qp,
 	 * SRQs will never get into an error list and do not have a recv_cq,
 	 * so we need to skip them here.
 	 */
-	if (HAS_RQ(my_qp) && !IS_SRQ(my_qp))
+	if (HAS_RQ(my_qp) && !IS_SRQ(my_qp) && !is_user)
 		del_from_err_list(my_qp->recv_cq, &my_qp->rq_err_node);
 
-	if (HAS_SQ(my_qp))
+	if (HAS_SQ(my_qp) && !is_user)
 		del_from_err_list(my_qp->send_cq, &my_qp->sq_err_node);
 
 	/* now wait until all pending events have completed */
@@ -2204,12 +2216,14 @@ static int internal_destroy_qp(struct ib_device *dev, struct ehca_qp *my_qp,
 	if (HAS_RQ(my_qp)) {
 		ipz_queue_dtor(my_pd, &my_qp->ipz_rqueue);
 
-		vfree(my_qp->rq_map.map);
+		if (!is_user)
+			vfree(my_qp->rq_map.map);
 	}
 	if (HAS_SQ(my_qp)) {
 		ipz_queue_dtor(my_pd, &my_qp->ipz_squeue);
 
-		vfree(my_qp->sq_map.map);
+		if (!is_user)
+			vfree(my_qp->sq_map.map);
 	}
 	kmem_cache_free(qp_cache, my_qp);
 	atomic_dec(&shca->num_qps);
diff --git a/drivers/infiniband/hw/ehca/hcp_if.c b/drivers/infiniband/hw/ehca/hcp_if.c
index 415d3a4..c7cc5d7 100644
--- a/drivers/infiniband/hw/ehca/hcp_if.c
+++ b/drivers/infiniband/hw/ehca/hcp_if.c
@@ -284,7 +284,7 @@ u64 hipz_h_alloc_resource_cq(const struct ipz_adapter_handle adapter_handle,
 	param->act_pages = (u32)outs[4];
 
 	if (ret == H_SUCCESS)
-		hcp_galpas_ctor(&cq->galpas, outs[5], outs[6]);
+		hcp_galpas_ctor(&cq->galpas, 0, outs[5], outs[6]);
 
 	if (ret == H_NOT_ENOUGH_RESOURCES)
 		ehca_gen_err("Not enough resources. ret=%li", ret);
@@ -293,7 +293,7 @@ u64 hipz_h_alloc_resource_cq(const struct ipz_adapter_handle adapter_handle,
 }
 
 u64 hipz_h_alloc_resource_qp(const struct ipz_adapter_handle adapter_handle,
-			     struct ehca_alloc_qp_parms *parms)
+			     struct ehca_alloc_qp_parms *parms, int is_user)
 {
 	u64 ret;
 	u64 allocate_controls, max_r10_reg, r11, r12;
@@ -359,7 +359,7 @@ u64 hipz_h_alloc_resource_qp(const struct ipz_adapter_handle adapter_handle,
 		(u32)EHCA_BMASK_GET(H_ALL_RES_QP_RQUEUE_SIZE_PAGES, outs[4]);
 
 	if (ret == H_SUCCESS)
-		hcp_galpas_ctor(&parms->galpas, outs[6], outs[6]);
+		hcp_galpas_ctor(&parms->galpas, is_user, outs[6], outs[6]);
 
 	if (ret == H_NOT_ENOUGH_RESOURCES)
 		ehca_gen_err("Not enough resources. ret=%li", ret);
diff --git a/drivers/infiniband/hw/ehca/hcp_if.h b/drivers/infiniband/hw/ehca/hcp_if.h
index 2c3c6e0..39c1c36 100644
--- a/drivers/infiniband/hw/ehca/hcp_if.h
+++ b/drivers/infiniband/hw/ehca/hcp_if.h
@@ -78,7 +78,7 @@ u64 hipz_h_alloc_resource_cq(const struct ipz_adapter_handle adapter_handle,
  * initialize resources, create empty QPPTs (2 rings).
  */
 u64 hipz_h_alloc_resource_qp(const struct ipz_adapter_handle adapter_handle,
-			     struct ehca_alloc_qp_parms *parms);
+			     struct ehca_alloc_qp_parms *parms, int is_user);
 
 u64 hipz_h_query_port(const struct ipz_adapter_handle adapter_handle,
 		      const u8 port_id,
diff --git a/drivers/infiniband/hw/ehca/hcp_phyp.c b/drivers/infiniband/hw/ehca/hcp_phyp.c
index 2148210..fc3a245 100644
--- a/drivers/infiniband/hw/ehca/hcp_phyp.c
+++ b/drivers/infiniband/hw/ehca/hcp_phyp.c
@@ -54,12 +54,15 @@ int hcall_unmap_page(u64 mapaddr)
 	return 0;
 }
 
-int hcp_galpas_ctor(struct h_galpas *galpas,
+int hcp_galpas_ctor(struct h_galpas *galpas, int is_user,
 		    u64 paddr_kernel, u64 paddr_user)
 {
-	int ret = hcall_map_page(paddr_kernel, &galpas->kernel.fw_handle);
-	if (ret)
-		return ret;
+	if (!is_user) {
+		int ret = hcall_map_page(paddr_kernel, &galpas->kernel.fw_handle);
+		if (ret)
+			return ret;
+	} else
+		galpas->kernel.fw_handle = NULL;
 
 	galpas->user.fw_handle = paddr_user;
 
diff --git a/drivers/infiniband/hw/ehca/hcp_phyp.h b/drivers/infiniband/hw/ehca/hcp_phyp.h
index 5305c2a..204227d 100644
--- a/drivers/infiniband/hw/ehca/hcp_phyp.h
+++ b/drivers/infiniband/hw/ehca/hcp_phyp.h
@@ -78,7 +78,7 @@ static inline void hipz_galpa_store(struct h_galpa galpa, u32 offset, u64 value)
 	*(volatile u64 __force *)addr = value;
 }
 
-int hcp_galpas_ctor(struct h_galpas *galpas,
+int hcp_galpas_ctor(struct h_galpas *galpas, int is_user,
 		    u64 paddr_kernel, u64 paddr_user);
 
 int hcp_galpas_dtor(struct h_galpas *galpas);
diff --git a/drivers/infiniband/hw/ehca/ipz_pt_fn.c b/drivers/infiniband/hw/ehca/ipz_pt_fn.c
index c3a3284..1227c59 100644
--- a/drivers/infiniband/hw/ehca/ipz_pt_fn.c
+++ b/drivers/infiniband/hw/ehca/ipz_pt_fn.c
@@ -220,10 +220,13 @@ int ipz_queue_ctor(struct ehca_pd *pd, struct ipz_queue *queue,
 	queue->small_page = NULL;
 
 	/* allocate queue page pointers */
-	queue->queue_pages = vmalloc(nr_of_pages * sizeof(void *));
+	queue->queue_pages = kmalloc(nr_of_pages * sizeof(void *), GFP_KERNEL);
 	if (!queue->queue_pages) {
-		ehca_gen_err("Couldn't allocate queue page list");
-		return 0;
+		queue->queue_pages = vmalloc(nr_of_pages * sizeof(void *));
+		if (!queue->queue_pages) {
+			ehca_gen_err("Couldn't allocate queue page list");
+			return 0;
+		}
 	}
 	memset(queue->queue_pages, 0, nr_of_pages * sizeof(void *));
 
@@ -240,7 +243,10 @@ int ipz_queue_ctor(struct ehca_pd *pd, struct ipz_queue *queue,
 ipz_queue_ctor_exit0:
 	ehca_gen_err("Couldn't alloc pages queue=%p "
 		 "nr_of_pages=%x",  queue, nr_of_pages);
-	vfree(queue->queue_pages);
+	if (is_vmalloc_addr(queue->queue_pages))
+		vfree(queue->queue_pages);
+	else
+		kfree(queue->queue_pages);
 
 	return 0;
 }
@@ -262,7 +268,10 @@ int ipz_queue_dtor(struct ehca_pd *pd, struct ipz_queue *queue)
 			free_page((unsigned long)queue->queue_pages[i]);
 	}
 
-	vfree(queue->queue_pages);
+	if (is_vmalloc_addr(queue->queue_pages))
+		vfree(queue->queue_pages);
+	else
+		kfree(queue->queue_pages);
 
 	return 1;
 }