Sophie

Sophie

distrib > Scientific%20Linux > 5x > x86_64 > by-pkgid > fc11cd6e1c513a17304da94a5390f3cd > files > 2327

kernel-2.6.18-194.11.1.el5.src.rpm

From: Doug Ledford <dledford@redhat.com>
Date: Sat, 22 Aug 2009 09:06:21 -0400
Subject: [net] mlx4_core: fails to load on large systems
Message-id: 7EBC3D49-5D75-486E-9E6B-EB71C74CD405@redhat.com
O-Subject: [Patch RHEL5] mlx4_core fails to load on large systems
Bugzilla: 514147

This is for bugzilla 514147.  On systems with large numbers of CPUs
(32+), mlx4_core fails to load because the single page it allocates
for the EQ array is too small.  This patch updates the driver to
allocate enough memory to hold the EQ array based upon what is
needed.  Built and tested (on less than 32 cores) by me.  Tested on
more than 32 cores upstream.  Accepted into Roland's tree with two
minor touchups, neither of which are functional issues and one of
which wouldn't even work in our tree due to differences between 2.6.18
and 2.6.31.  Brew build completed:

https://brewweb.devel.redhat.com/taskinfo?taskID=1940489

commit 36ab134e0b0b74da36457f66109f17c993f16ea3
Author: Doug Ledford <dledford@redhat.com>
Date:   Fri Aug 21 18:24:01 2009 -0400

    [mlx4] Map sufficient memory for large EQ arrays needed when > 32 cores

    Signed-off-by: Doug Ledford <dledford@redhat.com>

diff --git a/drivers/net/mlx4/eq.c b/drivers/net/mlx4/eq.c
index 5306659..99aa04c 100644
--- a/drivers/net/mlx4/eq.c
+++ b/drivers/net/mlx4/eq.c
@@ -519,29 +519,35 @@ int mlx4_map_eq_icm(struct mlx4_dev *dev, u64 icm_virt)
 {
 	struct mlx4_priv *priv = mlx4_priv(dev);
 	int ret;
+	int host_pages, icm_pages;
+	int i;
+
+	host_pages = ALIGN(min_t(int, dev->caps.num_eqs, num_possible_cpus() + MLX4_EQ_COMP_CPU0) *
+			   dev->caps.eqc_entry_size, PAGE_SIZE) >> PAGE_SHIFT;
+	priv->eq_table.order = ilog2(roundup_pow_of_two(host_pages));
 
-	/*
-	 * We assume that mapping one page is enough for the whole EQ
-	 * context table.  This is fine with all current HCAs, because
-	 * we only use 32 EQs and each EQ uses 64 bytes of context
-	 * memory, or 1 KB total.
-	 */
 	priv->eq_table.icm_virt = icm_virt;
-	priv->eq_table.icm_page = alloc_page(GFP_HIGHUSER);
+	priv->eq_table.icm_page = alloc_pages(GFP_HIGHUSER, priv->eq_table.order);
 	if (!priv->eq_table.icm_page)
 		return -ENOMEM;
 	priv->eq_table.icm_dma  = pci_map_page(dev->pdev, priv->eq_table.icm_page, 0,
-					       PAGE_SIZE, PCI_DMA_BIDIRECTIONAL);
+					       PAGE_SIZE << priv->eq_table.order,
+					       PCI_DMA_BIDIRECTIONAL);
 	if (pci_dma_mapping_error(priv->eq_table.icm_dma)) {
-		__free_page(priv->eq_table.icm_page);
+		__free_pages(priv->eq_table.icm_page, priv->eq_table.order);
 		return -ENOMEM;
 	}
 
-	ret = mlx4_MAP_ICM_page(dev, priv->eq_table.icm_dma, icm_virt);
-	if (ret) {
-		pci_unmap_page(dev->pdev, priv->eq_table.icm_dma, PAGE_SIZE,
-			       PCI_DMA_BIDIRECTIONAL);
-		__free_page(priv->eq_table.icm_page);
+	icm_pages = (PAGE_SIZE / MLX4_ICM_PAGE_SIZE) * (1 << priv->eq_table.order);
+	for (i = 0; i < icm_pages; ++i) {
+		ret = mlx4_MAP_ICM_page(dev, priv->eq_table.icm_dma, icm_virt + i * MLX4_ICM_PAGE_SIZE);
+		if (ret) {
+			mlx4_UNMAP_ICM(dev, priv->eq_table.icm_virt, i);
+			pci_unmap_page(dev->pdev, priv->eq_table.icm_dma, PAGE_SIZE,
+				       PCI_DMA_BIDIRECTIONAL);
+			__free_pages(priv->eq_table.icm_page, priv->eq_table.order);
+			break;
+		}
 	}
 
 	return ret;
@@ -550,11 +556,12 @@ int mlx4_map_eq_icm(struct mlx4_dev *dev, u64 icm_virt)
 void mlx4_unmap_eq_icm(struct mlx4_dev *dev)
 {
 	struct mlx4_priv *priv = mlx4_priv(dev);
+	int icm_pages = (PAGE_SIZE / MLX4_ICM_PAGE_SIZE) * (1 << priv->eq_table.order);
 
-	mlx4_UNMAP_ICM(dev, priv->eq_table.icm_virt, 1);
-	pci_unmap_page(dev->pdev, priv->eq_table.icm_dma, PAGE_SIZE,
-		       PCI_DMA_BIDIRECTIONAL);
-	__free_page(priv->eq_table.icm_page);
+	mlx4_UNMAP_ICM(dev, priv->eq_table.icm_virt, icm_pages);
+	pci_unmap_page(dev->pdev, priv->eq_table.icm_dma,
+		       PAGE_SIZE << priv->eq_table.order, PCI_DMA_BIDIRECTIONAL);
+	__free_pages(priv->eq_table.icm_page, priv->eq_table.order);
 }
 
 int mlx4_init_eq_table(struct mlx4_dev *dev)
diff --git a/drivers/net/mlx4/main.c b/drivers/net/mlx4/main.c
index 9358528..c0dc474 100644
--- a/drivers/net/mlx4/main.c
+++ b/drivers/net/mlx4/main.c
@@ -282,6 +282,7 @@ static int mlx4_dev_cap(struct mlx4_dev *dev, struct mlx4_dev_cap *dev_cap)
 	dev->caps.max_cqes	     = dev_cap->max_cq_sz - 1;
 	dev->caps.reserved_cqs	     = dev_cap->reserved_cqs;
 	dev->caps.reserved_eqs	     = dev_cap->reserved_eqs;
+	dev->caps.eqc_entry_size     = dev_cap->eqc_entry_sz;
 	dev->caps.mtts_per_seg	     = 1 << log_mtts_per_seg;
 	dev->caps.reserved_mtts	     = DIV_ROUND_UP(dev_cap->reserved_mtts,
 						    dev->caps.mtts_per_seg);
diff --git a/drivers/net/mlx4/mlx4.h b/drivers/net/mlx4/mlx4.h
index f2ce940..2cd8b78 100644
--- a/drivers/net/mlx4/mlx4.h
+++ b/drivers/net/mlx4/mlx4.h
@@ -223,6 +223,7 @@ struct mlx4_eq_table {
 	struct mlx4_icm_table	cmpt_table;
 	int			have_irq;
 	u8			inta_pin;
+	int			order;
 };
 
 struct mlx4_srq_table {
diff --git a/include/linux/mlx4/device.h b/include/linux/mlx4/device.h
index b3ed1cb..39bf8bd 100644
--- a/include/linux/mlx4/device.h
+++ b/include/linux/mlx4/device.h
@@ -208,6 +208,7 @@ struct mlx4_caps {
 	int			max_cqes;
 	int			reserved_cqs;
 	int			num_eqs;
+	int			eqc_entry_size;
 	int			reserved_eqs;
 	int			num_comp_vectors;
 	int			num_mpts;