Sophie

Sophie

distrib > Scientific%20Linux > 5x > x86_64 > by-pkgid > fc11cd6e1c513a17304da94a5390f3cd > files > 2783

kernel-2.6.18-194.11.1.el5.src.rpm

From: Michal Schmidt <mschmidt@redhat.com>
Date: Thu, 31 Jul 2008 14:04:18 +0200
Subject: [pci] fix problems with msi interrupt management
Message-id: 20080731140418.1e4081b8@hammerfall
O-Subject: [RHEL5.3 PATCH] pci: fix problems with msi interrupt management
Bugzilla: 428696
RH-Acked-by: Andy Gospodarek <gospo@redhat.com>

BZ: https://bugzilla.redhat.com/show_bug.cgi?id=428696

This patch was previously submitted by Andy Gospodarek. Andy and I were
going to combine it with my patches for another MSI bug (432451), but
later we found the other bug cannot be fixed properly. So we should
only apply the patches for the first bug.

Andy's original description:
> Our customers first started noticing this when setting ethtool
> parameters on forcedeth devices.  Whenever setting ethtool parameters
> for link speed, the device would quit getting interrupts.  Since the
> forcedeth driver would call disable_irq and then enable_irq when
> setting ethtool parameters it seemed that one of those calls (it's
> tough to tell which one) wasn't worked as expected.  Of course the
> latest upstream worked just fine, so after looking around I noticed
> that there were significant changes to the MSI code between 2.6.18
> and now.
>
> I decided that I would do a git bisect (while constantly backporting
> the latest forcedeth driver to the bisecting kernel), and see if I
> could trick git into telling me when the problem was fixed.  This
> patch turned out to be the first one that made my test case start
> working:
>
> commit 7bd007e480672c99d8656c7b7b12ef0549432c37
> Author: Eric W. Biederman <ebiederm@xmission.com>
> Date:   Wed Oct 4 02:16:31 2006 -0700
>
>     [PATCH] genirq: msi: simplify msi enable and disable
>
> I knew that all patches up to that point would be good ones to try
> (since git bisect is designed to find regressions not fixes I could
> not guarantee anything less than this patch would fix the issue).  I
> packed up the 4 fixes between this and 2.6.18 (since this patch first
> appeared in 2.6.19) and got feedback that this resolved customer
> problems.  The issue however is that this patch looks to be part of
> an 8-patch set that touches pci/msi.c and also appears to be part of
> a 25-patch set overall?
>
> I've decided I'd like to take all the fixes upto the one that works
> (the four listed below).

This combines 4 upstream patches:

commit dd159eeca971d594fa30176733b66d37acda82a3:
    [PATCH] genirq: msi: make the msi boolean tests return either 0 or 1

    This allows the output of the msi tests to be stored directly in a
    bit field. If you don't do this a value greater than one will be
    truncated and become 0. Changing true to false with bizare consequences.

commit 7bd007e480672c99d8656c7b7b12ef0549432c37:
    [PATCH] genirq: msi: simplify msi enable and disable

    The problem.  Because the disable routines leave the msi interrupts in all
    sorts of half enabled states the enable routines become impossible to
    implement correctly, and almost impossible to understand.

    Simplifing this allows me to simply kill the buggy reroute_msix_table, and
    generally makes the code more maintainable.

commit 571817849c76aabf34d534c905b5e604f2e824c5:
    [PATCH] msi: use kmem_cache_zalloc()

    Simpler, cleaner.

commit 24334a12533e9ac70dcb467ccd629f190afc5361:
    MSI: Factorize common code in pci_msi_supported()

    pci_enable_msi() and pci_enable_msix() use the same code to detect
    whether MSI might be enabled on this device. Factorize this code in
    pci_msi_supported(). And improve the documentation about the fact
    that only the root chipset must support MSI, but it is hard to
    find the root bus so we check all parent busses MSI flags.

Brew scratch build:
http://brewweb.devel.redhat.com/brew/taskinfo?taskID=1415867

Andy tested the four patches before and found they fixed the bug.

diff --git a/drivers/pci/msi.c b/drivers/pci/msi.c
index 7922259..2a63a74 100644
--- a/drivers/pci/msi.c
+++ b/drivers/pci/msi.c
@@ -45,16 +45,10 @@ msi_register(struct msi_ops *ops)
 	return 0;
 }
 
-static void msi_cache_ctor(void *p, kmem_cache_t *cache, unsigned long flags)
-{
-	memset(p, 0, sizeof(struct msi_desc));
-}
-
 static int msi_cache_init(void)
 {
-	msi_cachep = kmem_cache_create("msi_cache",
-			sizeof(struct msi_desc),
-		       	0, SLAB_HWCACHE_ALIGN, msi_cache_ctor, NULL);
+	msi_cachep = kmem_cache_create("msi_cache", sizeof(struct msi_desc),
+					0, SLAB_HWCACHE_ALIGN, NULL, NULL);
 	if (!msi_cachep)
 		return -ENOMEM;
 
@@ -404,11 +398,10 @@ static struct msi_desc* alloc_msi_entry(void)
 {
 	struct msi_desc *entry;
 
-	entry = kmem_cache_alloc(msi_cachep, SLAB_KERNEL);
+	entry = kmem_cache_zalloc(msi_cachep, GFP_KERNEL);
 	if (!entry)
 		return NULL;
 
-	memset(entry, 0, sizeof(struct msi_desc));
 	entry->link.tail = entry->link.head = 0;	/* single message */
 	entry->dev = NULL;
 
@@ -903,6 +896,33 @@ static int msix_capability_init(struct pci_dev *dev,
 }
 
 /**
+ * pci_msi_supported - check whether MSI may be enabled on device
+ * @dev: pointer to the pci_dev data structure of MSI device function
+ *
+ * MSI must be globally enabled and supported by the device and its root
+ * bus. But, the root bus is not easy to find since some architectures
+ * have virtual busses on top of the PCI hierarchy (for instance the
+ * hypertransport bus), while the actual bus where MSI must be supported
+ * is below. So we test the MSI flag on all parent busses and assume
+ * that no quirk will ever set the NO_MSI flag on a non-root bus.
+ **/
+static
+int pci_msi_supported(struct pci_dev * dev)
+{
+	struct pci_bus *bus;
+
+	if (!pci_msi_enable || !dev || dev->no_msi)
+		return -EINVAL;
+
+	/* check MSI flags of all parent busses */
+	for (bus = dev->bus; bus; bus = bus->parent)
+		if (bus->bus_flags & PCI_BUS_FLAGS_NO_MSI)
+			return -EINVAL;
+
+	return 0;
+}
+
+/**
  * pci_enable_msi - configure device's MSI capability structure
  * @dev: pointer to the pci_dev data structure of MSI device function
  *
@@ -914,19 +934,10 @@ static int msix_capability_init(struct pci_dev *dev,
  **/
 int pci_enable_msi(struct pci_dev* dev)
 {
-	struct pci_bus *bus;
-	int pos, temp, status = -EINVAL;
-	u16 control;
-
-	if (!pci_msi_enable || !dev)
- 		return status;
-
-	if (dev->no_msi)
-		return status;
+	int pos, temp, status;
 
-	for (bus = dev->bus; bus; bus = bus->parent)
-		if (bus->bus_flags & PCI_BUS_FLAGS_NO_MSI)
-			return -EINVAL;
+	if (pci_msi_supported(dev) < 0)
+		return -EINVAL;
 
 	temp = dev->irq;
 
@@ -938,27 +949,8 @@ int pci_enable_msi(struct pci_dev* dev)
 	if (!pos)
 		return -EINVAL;
 
-	if (!msi_lookup_vector(dev, PCI_CAP_ID_MSI)) {
-		/* Lookup Sucess */
-		unsigned long flags;
+	WARN_ON(!msi_lookup_vector(dev, PCI_CAP_ID_MSI));
 
-		pci_read_config_word(dev, msi_control_reg(pos), &control);
-		if (control & PCI_MSI_FLAGS_ENABLE)
-			return 0;	/* Already in MSI mode */
-		spin_lock_irqsave(&msi_lock, flags);
-		if (!vector_irq[dev->irq]) {
-			msi_desc[dev->irq]->msi_attrib.state = 0;
-			vector_irq[dev->irq] = -1;
-			nr_released_vectors--;
-			spin_unlock_irqrestore(&msi_lock, flags);
-			status = msi_register_init(dev, msi_desc[dev->irq]);
-			if (status == 0)
-				enable_msi_mode(dev, pos, PCI_CAP_ID_MSI);
-			return status;
-		}
-		spin_unlock_irqrestore(&msi_lock, flags);
-		dev->irq = temp;
-	}
 	/* Check whether driver already requested for MSI-X vectors */
 	pos = pci_find_capability(dev, PCI_CAP_ID_MSIX);
 	if (pos > 0 && !msi_lookup_vector(dev, PCI_CAP_ID_MSIX)) {
@@ -1000,6 +992,8 @@ void pci_disable_msi(struct pci_dev* dev)
 	if (!(control & PCI_MSI_FLAGS_ENABLE))
 		return;
 
+	disable_msi_mode(dev, pos, PCI_CAP_ID_MSI);
+
 	spin_lock_irqsave(&msi_lock, flags);
 	entry = msi_desc[dev->irq];
 	if (!entry || !entry->dev || entry->msi_attrib.type != PCI_CAP_ID_MSI) {
@@ -1013,14 +1007,12 @@ void pci_disable_msi(struct pci_dev* dev)
 		       pci_name(dev), dev->irq);
 		BUG_ON(entry->msi_attrib.state > 0);
 	} else {
-		vector_irq[dev->irq] = 0; /* free it */
-		nr_released_vectors++;
 		default_vector = entry->msi_attrib.default_vector;
 		spin_unlock_irqrestore(&msi_lock, flags);
+		msi_free_vector(dev, dev->irq, 0);
+
 		/* Restore dev->irq to its default pin-assertion vector */
 		dev->irq = default_vector;
-		disable_msi_mode(dev, pci_find_capability(dev, PCI_CAP_ID_MSI),
-					PCI_CAP_ID_MSI);
 	}
 }
 
@@ -1068,57 +1060,6 @@ static int msi_free_vector(struct pci_dev* dev, int vector, int reassign)
 	return 0;
 }
 
-static int reroute_msix_table(int head, struct msix_entry *entries, int *nvec)
-{
-	int vector = head, tail = 0;
-	int i, j = 0, nr_entries = 0;
-	void __iomem *base;
-	unsigned long flags;
-
-	spin_lock_irqsave(&msi_lock, flags);
-	while (head != tail) {
-		nr_entries++;
-		tail = msi_desc[vector]->link.tail;
-		if (entries[0].entry == msi_desc[vector]->msi_attrib.entry_nr)
-			j = vector;
-		vector = tail;
-	}
-	if (*nvec > nr_entries) {
-		spin_unlock_irqrestore(&msi_lock, flags);
-		*nvec = nr_entries;
-		return -EINVAL;
-	}
-	vector = ((j > 0) ? j : head);
-	for (i = 0; i < *nvec; i++) {
-		j = msi_desc[vector]->msi_attrib.entry_nr;
-		msi_desc[vector]->msi_attrib.state = 0;	/* Mark it not active */
-		vector_irq[vector] = -1;		/* Mark it busy */
-		nr_released_vectors--;
-		entries[i].vector = vector;
-		if (j != (entries + i)->entry) {
-			base = msi_desc[vector]->mask_base;
-			msi_desc[vector]->msi_attrib.entry_nr =
-				(entries + i)->entry;
-			writel( readl(base + j * PCI_MSIX_ENTRY_SIZE +
-				PCI_MSIX_ENTRY_LOWER_ADDR_OFFSET), base +
-				(entries + i)->entry * PCI_MSIX_ENTRY_SIZE +
-				PCI_MSIX_ENTRY_LOWER_ADDR_OFFSET);
-			writel(	readl(base + j * PCI_MSIX_ENTRY_SIZE +
-				PCI_MSIX_ENTRY_UPPER_ADDR_OFFSET), base +
-				(entries + i)->entry * PCI_MSIX_ENTRY_SIZE +
-				PCI_MSIX_ENTRY_UPPER_ADDR_OFFSET);
-			writel( (readl(base + j * PCI_MSIX_ENTRY_SIZE +
-				PCI_MSIX_ENTRY_DATA_OFFSET) & 0xff00) | vector,
-				base + (entries+i)->entry*PCI_MSIX_ENTRY_SIZE +
-				PCI_MSIX_ENTRY_DATA_OFFSET);
-		}
-		vector = msi_desc[vector]->link.tail;
-	}
-	spin_unlock_irqrestore(&msi_lock, flags);
-
-	return 0;
-}
-
 /**
  * pci_enable_msix - configure device's MSI-X capability structure
  * @dev: pointer to the pci_dev data structure of MSI-X device function
@@ -1136,22 +1077,14 @@ static int reroute_msix_table(int head, struct msix_entry *entries, int *nvec)
  **/
 int pci_enable_msix(struct pci_dev* dev, struct msix_entry *entries, int nvec)
 {
-	struct pci_bus *bus;
 	int status, pos, nr_entries, free_vectors;
 	int i, j, temp;
 	u16 control;
 	unsigned long flags;
 
-	if (!pci_msi_enable || !dev || !entries)
+	if (!entries || pci_msi_supported(dev) < 0)
  		return -EINVAL;
 
-	if (dev->no_msi)
-		return -EINVAL;
-
-	for (bus = dev->bus; bus; bus = bus->parent)
-		if (bus->bus_flags & PCI_BUS_FLAGS_NO_MSI)
-			return -EINVAL;
-
 	status = msi_init();
 	if (status < 0)
 		return status;
@@ -1161,9 +1094,6 @@ int pci_enable_msix(struct pci_dev* dev, struct msix_entry *entries, int nvec)
  		return -EINVAL;
 
 	pci_read_config_word(dev, msi_control_reg(pos), &control);
-	if (control & PCI_MSIX_FLAGS_ENABLE)
-		return -EINVAL;			/* Already in MSI-X mode */
-
 	nr_entries = multi_msix_capable(control);
 	if (nvec > nr_entries)
 		return -EINVAL;
@@ -1178,19 +1108,8 @@ int pci_enable_msix(struct pci_dev* dev, struct msix_entry *entries, int nvec)
 		}
 	}
 	temp = dev->irq;
-	if (!msi_lookup_vector(dev, PCI_CAP_ID_MSIX)) {
-		/* Lookup Sucess */
-		nr_entries = nvec;
-		/* Reroute MSI-X table */
-		if (reroute_msix_table(dev->irq, entries, &nr_entries)) {
-			/* #requested > #previous-assigned */
-			dev->irq = temp;
-			return nr_entries;
-		}
-		dev->irq = temp;
-		enable_msi_mode(dev, pos, PCI_CAP_ID_MSIX);
-		return 0;
-	}
+	WARN_ON(!msi_lookup_vector(dev, PCI_CAP_ID_MSIX));
+
 	/* Check whether driver already requested for MSI vector */
    	if (pci_find_capability(dev, PCI_CAP_ID_MSI) > 0 &&
 		!msi_lookup_vector(dev, PCI_CAP_ID_MSI)) {
@@ -1249,37 +1168,32 @@ void pci_disable_msix(struct pci_dev* dev)
 	if (!(control & PCI_MSIX_FLAGS_ENABLE))
 		return;
 
+	disable_msi_mode(dev, pos, PCI_CAP_ID_MSIX);
+
 	temp = dev->irq;
 	if (!msi_lookup_vector(dev, PCI_CAP_ID_MSIX)) {
 		int state, vector, head, tail = 0, warning = 0;
 		unsigned long flags;
 
 		vector = head = dev->irq;
-		spin_lock_irqsave(&msi_lock, flags);
+		dev->irq = temp;			/* Restore pin IRQ */
 		while (head != tail) {
+			spin_lock_irqsave(&msi_lock, flags);
 			state = msi_desc[vector]->msi_attrib.state;
+			tail = msi_desc[vector]->link.tail;
+			spin_unlock_irqrestore(&msi_lock, flags);
 			if (state)
 				warning = 1;
-			else {
-				vector_irq[vector] = 0; /* free it */
-				nr_released_vectors++;
-			}
-			tail = msi_desc[vector]->link.tail;
+			else if (vector != head)	/* Release MSI-X vector */
+				msi_free_vector(dev, vector, 0);
 			vector = tail;
 		}
-		spin_unlock_irqrestore(&msi_lock, flags);
+		msi_free_vector(dev, vector, 0);
 		if (warning) {
-			dev->irq = temp;
 			printk(KERN_WARNING "PCI: %s: pci_disable_msix() called without "
 			       "free_irq() on all MSI-X vectors\n",
 			       pci_name(dev));
 			BUG_ON(warning > 0);
-		} else {
-			dev->irq = temp;
-			disable_msi_mode(dev,
-				pci_find_capability(dev, PCI_CAP_ID_MSIX),
-				PCI_CAP_ID_MSIX);
-
 		}
 	}
 }
diff --git a/drivers/pci/msi.h b/drivers/pci/msi.h
index 56951c3..9b31d4c 100644
--- a/drivers/pci/msi.h
+++ b/drivers/pci/msi.h
@@ -110,8 +110,8 @@ extern int pci_vector_resources(int last, int nr_released);
 	(1 << ((control & PCI_MSI_FLAGS_QMASK) >> 1))
 #define multi_msi_enable(control, num) \
 	control |= (((num >> 1) << 4) & PCI_MSI_FLAGS_QSIZE);
-#define is_64bit_address(control)	(control & PCI_MSI_FLAGS_64BIT)
-#define is_mask_bit_support(control)	(control & PCI_MSI_FLAGS_MASKBIT)
+#define is_64bit_address(control)	(!!(control & PCI_MSI_FLAGS_64BIT))
+#define is_mask_bit_support(control)	(!!(control & PCI_MSI_FLAGS_MASKBIT))
 #define msi_enable(control, num) multi_msi_enable(control, num); \
 	control |= PCI_MSI_FLAGS_ENABLE