Sophie

Sophie

distrib > Scientific%20Linux > 5x > x86_64 > by-pkgid > fc11cd6e1c513a17304da94a5390f3cd > files > 4563

kernel-2.6.18-194.11.1.el5.src.rpm

From: Don Dugger <ddugger@redhat.com>
Date: Thu, 8 Apr 2010 15:37:35 -0400
Subject: [xen] vtd: fix ioapic pin array
Message-id: <201004081537.o38FbZCJ017692@sobek.n0ano.com>
Patchwork-id: 24046
O-Subject: [RHEL 5.6 PATCH] BZ 563546: xen: ioapic pin array
Bugzilla: 563546

This patch fixes a problem where RHEL 5 was not booting on newer Intel
platforms when interrupt remapping was enabled.

Some BIOS didn't zero out reserve fields in IOAPIC RTE's. When "format" bit is
not cleared, the IOAPIC RTE will be regarded as remapped format, thus will get
index of Interrupt remapping table by computing index_15 and index_0_14 fields,
rather than assign a new index. It results in index check failure, and breaks
interrupt remapping. This patch introduces an array to record index for each
IOAPIC pin, then don't need to check "format" bit, thus fix this issue.

Upstream status: Accepted (CS 19415, 19602, 20178)

Brew build: http://brewweb.devel.redhat.com/brew/taskinfo?taskID=2369334

Testing: tested on both a Tylersber-EP and a Westmere and the systems
boot properly both with and without interrupt remapping enabled.

Signed-off-by: Weidong Han <weidong.han@intel.com>
Signed-off-by: Don Dugger <donald.d.dugger@intel.com>
---
 drivers/passthrough/vtd/intremap.c |  145 ++++++++++++++++++++++--------------
 1 files changed, 88 insertions(+), 57 deletions(-)

Signed-off-by: Jarod Wilson <jarod@redhat.com>

diff --git a/drivers/passthrough/vtd/intremap.c b/drivers/passthrough/vtd/intremap.c
index a836ae7..23749e9 100644
--- a/drivers/passthrough/vtd/intremap.c
+++ b/drivers/passthrough/vtd/intremap.c
@@ -23,6 +23,7 @@
 #include <xen/iommu.h>
 #include <asm/hvm/iommu.h>
 #include <xen/time.h>
+#include <xen/list.h>
 #include <xen/pci.h>
 #include <xen/pci_regs.h>
 #include "iommu.h"
@@ -30,9 +31,70 @@
 #include "vtd.h"
 #include "extern.h"
 
-#ifndef dest_SMI
-#define dest_SMI -1
-#endif
+/* The max number of IOAPIC (or IOSAPIC) pin. The typical values can be 24 or
+ * 48 on x86 and Itanium platforms. Here we use a biger number 256. This
+ * should be big enough. Actually now IREMAP_ENTRY_NR is also 256.
+ */
+#define MAX_IOAPIC_PIN_NUM  256
+
+struct ioapicid_pin_intremap_index {
+	struct list_head list;
+	unsigned int ioapic_id;
+	unsigned int pin;
+	int intremap_index;
+};
+
+static struct list_head ioapic_pin_to_intremap_index[MAX_IOAPIC_PIN_NUM];
+
+static int init_ioapic_pin_intremap_index(void)
+{
+    static int initialized = 0;
+    int i;
+
+    if ( initialized == 1 )
+        return 0;
+
+    for ( i = 0; i < MAX_IOAPIC_PIN_NUM; i++ )
+        INIT_LIST_HEAD(&ioapic_pin_to_intremap_index[i]);
+
+    initialized = 1;
+    return 0;
+}
+
+static int get_ioapic_pin_intremap_index(unsigned int ioapic_id,
+                                         unsigned int pin)
+{
+    struct ioapicid_pin_intremap_index *entry;
+    struct list_head *pos, *tmp;
+
+    list_for_each_safe ( pos, tmp, &ioapic_pin_to_intremap_index[pin] )
+    {
+        entry = list_entry(pos, struct ioapicid_pin_intremap_index, list);
+        if ( entry->ioapic_id == ioapic_id )
+            return entry->intremap_index;
+    }
+
+    return -1;
+}
+
+static int set_ioapic_pin_intremap_index(unsigned int ioapic_id,
+                                         unsigned int pin,
+                                         int index)
+{
+    struct ioapicid_pin_intremap_index *entry;
+
+    entry = xmalloc(struct ioapicid_pin_intremap_index);
+    if ( !entry )
+        return -ENOMEM;
+
+    entry->ioapic_id = ioapic_id;
+    entry->pin = pin;
+    entry->intremap_index = index;
+
+    list_add_tail(&entry->list, &ioapic_pin_to_intremap_index[pin]);
+
+    return 0;
+}
 
 u16 apicid_to_bdf(int apic_id)
 {
@@ -48,11 +110,9 @@ u16 apicid_to_bdf(int apic_id)
 }
 
 static int remap_entry_to_ioapic_rte(
-    struct iommu *iommu, struct IO_xAPIC_route_entry *old_rte)
+    struct iommu *iommu, int index, struct IO_xAPIC_route_entry *old_rte)
 {
     struct iremap_entry *iremap_entry = NULL, *iremap_entries;
-    struct IO_APIC_route_remap_entry *remap_rte;
-    int index = 0;
     unsigned long flags;
     struct ir_ctrl *ir_ctrl = iommu_ir_ctrl(iommu);
 
@@ -63,9 +123,6 @@ static int remap_entry_to_ioapic_rte(
         return -EFAULT;
     }
 
-    remap_rte = (struct IO_APIC_route_remap_entry *) old_rte;
-    index = (remap_rte->index_15 << 15) | remap_rte->index_0_14;
-
     if ( index > ir_ctrl->iremap_index )
     {
         dprintk(XENLOG_ERR VTDPREFIX,
@@ -94,7 +151,7 @@ static int remap_entry_to_ioapic_rte(
 }
 
 static int ioapic_rte_to_remap_entry(struct iommu *iommu,
-    int apic_id, struct IO_xAPIC_route_entry *old_rte,
+    int apic_id, unsigned int ioapic_pin, struct IO_xAPIC_route_entry *old_rte,
     unsigned int rte_upper, unsigned int value)
 {
     struct iremap_entry *iremap_entry = NULL, *iremap_entries;
@@ -108,13 +165,13 @@ static int ioapic_rte_to_remap_entry(struct iommu *iommu,
     remap_rte = (struct IO_APIC_route_remap_entry *) old_rte;
     spin_lock_irqsave(&ir_ctrl->iremap_lock, flags);
 
-    if ( remap_rte->format == 0 )
+    index = get_ioapic_pin_intremap_index(apic_id, ioapic_pin);
+    if ( index < 0 )
     {
         ir_ctrl->iremap_index++;
         index = ir_ctrl->iremap_index;
+        set_ioapic_pin_intremap_index(apic_id, ioapic_pin, index);
     }
-    else
-        index = (remap_rte->index_15 << 15) | remap_rte->index_0_14;
 
     if ( index > IREMAP_ENTRY_NR - 1 )
     {
@@ -192,9 +249,18 @@ unsigned int io_apic_read_remap_rte(
     int rte_upper = (reg & 1) ? 1 : 0;
     struct iommu *iommu = ioapic_to_iommu(IO_APIC_ID(apic));
     struct ir_ctrl *ir_ctrl = iommu_ir_ctrl(iommu);
+    unsigned int ioapic_pin = (reg - 0x10) / 2;
+    int index;
 
     if ( !iommu || !ir_ctrl || ir_ctrl->iremap_maddr == 0 ||
-         ir_ctrl->iremap_index == -1 )
+        (ir_ctrl->iremap_index == -1) )
+    {
+        *IO_APIC_BASE(apic) = reg;
+        return *(IO_APIC_BASE(apic)+4);
+    }
+
+    index = get_ioapic_pin_intremap_index(IO_APIC_ID(apic), ioapic_pin);
+    if ( index < 0 )
     {
         *IO_APIC_BASE(apic) = reg;
         return *(IO_APIC_BASE(apic)+4);
@@ -211,13 +277,7 @@ unsigned int io_apic_read_remap_rte(
 
     remap_rte = (struct IO_APIC_route_remap_entry *) &old_rte;
 
-    if ( (remap_rte->format == 0) || (old_rte.delivery_mode == dest_SMI) )
-    {
-        *IO_APIC_BASE(apic) = rte_upper ? (reg + 1) : reg;
-        return *(IO_APIC_BASE(apic)+4);
-    }
-
-    if ( remap_entry_to_ioapic_rte(iommu, &old_rte) )
+    if ( remap_entry_to_ioapic_rte(iommu, index, &old_rte) )
     {
         *IO_APIC_BASE(apic) = rte_upper ? (reg + 1) : reg;
         return *(IO_APIC_BASE(apic)+4);
@@ -232,6 +292,7 @@ unsigned int io_apic_read_remap_rte(
 void io_apic_write_remap_rte(
     unsigned int apic, unsigned int reg, unsigned int value)
 {
+    unsigned int ioapic_pin = (reg - 0x10) / 2;
     struct IO_xAPIC_route_entry old_rte = { 0 };
     struct IO_APIC_route_remap_entry *remap_rte;
     unsigned int rte_upper = (reg & 1) ? 1 : 0;
@@ -257,31 +318,6 @@ void io_apic_write_remap_rte(
 
     remap_rte = (struct IO_APIC_route_remap_entry *) &old_rte;
 
-    if ( old_rte.delivery_mode == dest_SMI )
-    {
-        /* Some BIOS does not zero out reserve fields in IOAPIC
-         * RTE's.  clear_IO_APIC() zeroes out all RTE's except for RTE
-         * with MSI delivery type.  This is a problem when the host
-         * OS converts SMI delivery type to some other type but leaving
-         * the reserved field uninitialized.  This can cause interrupt
-         * remapping table out of bound error if "format" field is 1
-         * and the "index" field has a value that that is larger than 
-         * the maximum index of interrupt remapping table.
-         */
-        if ( remap_rte->format == 1 )
-        {
-            remap_rte->format = 0;
-            *IO_APIC_BASE(apic) = reg;
-            *(IO_APIC_BASE(apic)+4) = *(((u32 *)&old_rte)+0);
-            *IO_APIC_BASE(apic) = reg + 1;
-            *(IO_APIC_BASE(apic)+4) = *(((u32 *)&old_rte)+1);
-        }
-
-        *IO_APIC_BASE(apic) = rte_upper ? (reg + 1) : reg;
-        *(IO_APIC_BASE(apic)+4) = value;
-        return;
-    }
-
     /* mask the interrupt while we change the intremap table */
     saved_mask = remap_rte->mask;
     remap_rte->mask = 1;
@@ -289,7 +325,8 @@ void io_apic_write_remap_rte(
     *(IO_APIC_BASE(apic)+4) = *(((int *)&old_rte)+0);
     remap_rte->mask = saved_mask;
 
-    if ( ioapic_rte_to_remap_entry(iommu, IO_APIC_ID(apic),
+    ASSERT(ioapic_pin < MAX_IOAPIC_PIN_NUM);
+    if ( ioapic_rte_to_remap_entry(iommu, IO_APIC_ID(apic), ioapic_pin,
                                    &old_rte, rte_upper, value) )
     {
         *IO_APIC_BASE(apic) = rte_upper ? (reg + 1) : reg;
@@ -534,11 +571,7 @@ int intremap_setup(struct iommu *iommu)
     while ( !(dmar_readl(iommu->reg, DMAR_GSTS_REG) & DMA_GSTS_SIRTPS) )
     {
         if ( NOW() > (start_time + DMAR_OPERATION_TIMEOUT) )
-        {
-            dprintk(XENLOG_ERR VTDPREFIX,
-                    "Cannot set SIRTP field for interrupt remapping\n");
-            return -ENODEV;
-        }
+            panic("Cannot set SIRTP field for interrupt remapping\n");
         cpu_relax();
     }
 
@@ -550,11 +583,7 @@ int intremap_setup(struct iommu *iommu)
     while ( !(dmar_readl(iommu->reg, DMAR_GSTS_REG) & DMA_GSTS_CFIS) )
     {
         if ( NOW() > (start_time + DMAR_OPERATION_TIMEOUT) )
-        {
-            dprintk(XENLOG_ERR VTDPREFIX,
-                    "Cannot set CFI field for interrupt remapping\n");
-            return -ENODEV;
-        }
+            panic("Cannot set CFI field for interrupt remapping\n");
         cpu_relax();
     }
 
@@ -577,5 +606,7 @@ int intremap_setup(struct iommu *iommu)
     /* After set SIRTP, we should do globally invalidate the IEC */
     iommu_flush_iec_global(iommu);
 
+    init_ioapic_pin_intremap_index();
+
     return 0;
 }