Sophie

Sophie

distrib > Scientific%20Linux > 5x > x86_64 > by-pkgid > fc11cd6e1c513a17304da94a5390f3cd > files > 4454

kernel-2.6.18-194.11.1.el5.src.rpm

From: ddugger@redhat.com <ddugger@redhat.com>
Date: Mon, 23 Mar 2009 10:23:35 -0600
Subject: [xen] HVM MSI passthrough support
Message-id: 200903231623.n2NGNZmw022152@sobek.n0ano.com
O-Subject: [RHEL5.4 PATCH 20/21 V2] xen: HVM MSI passthrough support
Bugzilla: 484227
RH-Acked-by: Gerd Hoffmann <kraxel@redhat.com>

Xen hypervisor support for HVM guest passthrough MSI, add vmsi concept
similar to vioapic delivered irq, add MSI type in bind_pt_irq hypercalls
to allow device model to bind vmsi to msi pirq and the code to inject
MSI to HVM guest.

Upstream Status: Accepted (CS 17537)

BZ: 484227

Signed-off-by: Qing He <qing.he@intel.com>
Signed-off-by: Gerd Hoffman <kraxel@redhat.com>
Signed-off-by: Don Dugger <donald.d.dugger@intel.com>

diff --git a/arch/x86/hvm/Makefile b/arch/x86/hvm/Makefile
index d94fd08..7ef5e3e 100644
--- a/arch/x86/hvm/Makefile
+++ b/arch/x86/hvm/Makefile
@@ -16,3 +16,4 @@ obj-y += vioapic.o
 obj-y += vlapic.o
 obj-y += vpic.o
 obj-y += save.o
+obj-y += vmsi.o
diff --git a/arch/x86/hvm/vmsi.c b/arch/x86/hvm/vmsi.c
new file mode 100644
index 0000000..2ce5722
--- /dev/null
+++ b/arch/x86/hvm/vmsi.c
@@ -0,0 +1,196 @@
+/*
+ *  Copyright (C) 2001  MandrakeSoft S.A.
+ *
+ *    MandrakeSoft S.A.
+ *    43, rue d'Aboukir
+ *    75002 Paris - France
+ *    http://www.linux-mandrake.com/
+ *    http://www.mandrakesoft.com/
+ *
+ *  This library is free software; you can redistribute it and/or
+ *  modify it under the terms of the GNU Lesser General Public
+ *  License as published by the Free Software Foundation; either
+ *  version 2 of the License, or (at your option) any later version.
+ *
+ *  This library is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ *  Lesser General Public License for more details.
+ *
+ *  You should have received a copy of the GNU Lesser General Public
+ *  License along with this library; if not, write to the Free Software
+ *  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307 USA
+ *
+ * Support for virtual MSI logic
+ * Will be merged it with virtual IOAPIC logic, since most is the same
+*/
+
+#include <xen/config.h>
+#include <xen/types.h>
+#include <xen/mm.h>
+#include <xen/xmalloc.h>
+#include <xen/lib.h>
+#include <xen/errno.h>
+#include <xen/sched.h>
+#include <public/hvm/ioreq.h>
+#include <asm/hvm/io.h>
+#include <asm/hvm/vpic.h>
+#include <asm/hvm/vlapic.h>
+#include <asm/hvm/support.h>
+#include <asm/current.h>
+#include <asm/event.h>
+
+static uint32_t vmsi_get_delivery_bitmask(
+    struct domain *d, uint16_t dest, uint8_t dest_mode)
+{
+    uint32_t mask = 0;
+    struct vcpu *v;
+
+    HVM_DBG_LOG(DBG_LEVEL_IOAPIC, "ioapic_get_delivery_bitmask "
+                "dest %d dest_mode %d\n", dest, dest_mode);
+
+    if ( dest_mode == 0 ) /* Physical mode. */
+    {
+        if ( dest == 0xFF ) /* Broadcast. */
+        {
+            for_each_vcpu ( d, v )
+                mask |= 1 << v->vcpu_id;
+            goto out;
+        }
+
+        for_each_vcpu ( d, v )
+        {
+            if ( VLAPIC_ID(vcpu_vlapic(v)) == dest )
+            {
+                mask = 1 << v->vcpu_id;
+                break;
+            }
+        }
+    }
+    else if ( dest != 0 ) /* Logical mode, MDA non-zero. */
+    {
+        for_each_vcpu ( d, v )
+            if ( vlapic_match_logical_addr(vcpu_vlapic(v), dest) )
+                mask |= 1 << v->vcpu_id;
+    }
+
+ out:
+    HVM_DBG_LOG(DBG_LEVEL_IOAPIC, "ioapic_get_delivery_bitmask mask %x\n",
+                mask);
+    return mask;
+}
+
+static void vmsi_inj_irq(
+    struct domain *d,
+    struct vlapic *target,
+    uint8_t vector,
+    uint8_t trig_mode,
+    uint8_t delivery_mode)
+{
+    HVM_DBG_LOG(DBG_LEVEL_IOAPIC, "ioapic_inj_irq "
+                "irq %d trig %d delive mode %d\n",
+                vector, trig_mode, delivery_mode);
+
+    switch ( delivery_mode )
+    {
+    case dest_Fixed:
+    case dest_LowestPrio:
+        if ( vlapic_set_irq(target, vector, trig_mode) )
+            vcpu_kick(vlapic_vcpu(target));
+        break;
+    default:
+        gdprintk(XENLOG_WARNING, "error delivery mode %d\n", delivery_mode);
+        break;
+    }
+}
+
+#define VMSI_DEST_ID_MASK 0xff
+#define VMSI_RH_MASK      0x100
+#define VMSI_DM_MASK      0x200
+#define VMSI_DELIV_MASK   0x7000
+#define VMSI_TRIG_MODE    0x8000
+
+#define GFLAGS_SHIFT_DEST_ID        0
+#define GFLAGS_SHIFT_RH             8
+#define GFLAGS_SHIFT_DM             9
+#define GLFAGS_SHIFT_DELIV_MODE     12
+#define GLFAGS_SHIFT_TRG_MODE       15
+
+int vmsi_deliver(struct domain *d, int pirq)
+{
+    struct hvm_irq_dpci *hvm_irq_dpci = d->arch.hvm_domain.irq.dpci;
+    uint32_t flags = hvm_irq_dpci->mirq[pirq].gmsi.gflags;
+    int vector = hvm_irq_dpci->mirq[pirq].gmsi.gvec;
+    uint16_t dest = (flags & VMSI_DEST_ID_MASK) >> GFLAGS_SHIFT_DEST_ID;
+    uint8_t dest_mode = (flags & VMSI_DM_MASK) >> GFLAGS_SHIFT_DM;
+    uint8_t delivery_mode = (flags & VMSI_DELIV_MASK) >> GLFAGS_SHIFT_DELIV_MODE;
+    uint8_t trig_mode = (flags & VMSI_TRIG_MODE) >> GLFAGS_SHIFT_TRG_MODE;
+    uint32_t deliver_bitmask;
+    struct vlapic *target;
+    struct vcpu *v;
+
+    HVM_DBG_LOG(DBG_LEVEL_IOAPIC,
+                "msi: dest=%x dest_mode=%x delivery_mode=%x "
+                "vector=%x trig_mode=%x\n",
+                dest, dest_mode, delivery_mode, vector, trig_mode);
+
+    if ( !test_bit(_HVM_IRQ_DPCI_MSI, &hvm_irq_dpci->mirq[pirq].flags) )
+    {
+        gdprintk(XENLOG_WARNING, "pirq %x not msi \n", pirq);
+        return 0;
+    }
+
+    deliver_bitmask = vmsi_get_delivery_bitmask(d, dest, dest_mode);
+    if ( !deliver_bitmask )
+    {
+        HVM_DBG_LOG(DBG_LEVEL_IOAPIC, "ioapic deliver "
+                    "no target on destination\n");
+        return 0;
+    }
+
+    switch ( delivery_mode )
+    {
+    case dest_LowestPrio:
+    {
+        /* N.B. backport, from apic_lowest_prio, vector is not used */
+        target = apic_round_robin(d, 0, deliver_bitmask);
+        if ( target != NULL )
+            vmsi_inj_irq(d, target, vector, trig_mode, delivery_mode);
+        else
+            HVM_DBG_LOG(DBG_LEVEL_IOAPIC, "null round robin: "
+                        "mask=%x vector=%x delivery_mode=%x\n",
+                        deliver_bitmask, vector, dest_LowestPrio);
+        break;
+    }
+
+    case dest_Fixed:
+    case dest_ExtINT:
+    {
+        uint8_t bit;
+        for ( bit = 0; deliver_bitmask != 0; bit++ )
+        {
+            if ( !(deliver_bitmask & (1 << bit)) )
+                continue;
+            deliver_bitmask &= ~(1 << bit);
+            v = d->vcpu[bit];
+            if ( v != NULL )
+            {
+                target = vcpu_vlapic(v);
+                vmsi_inj_irq(d, target, vector, trig_mode, delivery_mode);
+            }
+        }
+        break;
+    }
+
+    case dest_SMI:
+    case dest_NMI:
+    case dest_INIT:
+    case dest__reserved_2:
+    default:
+        gdprintk(XENLOG_WARNING, "Unsupported delivery mode %d\n",
+                 delivery_mode);
+        break;
+    }
+    return 1;
+}
+
diff --git a/drivers/passthrough/io.c b/drivers/passthrough/io.c
index 1c96032..a627b99 100644
--- a/drivers/passthrough/io.c
+++ b/drivers/passthrough/io.c
@@ -88,65 +88,97 @@ int pt_irq_create_bind_vtd(
         return -EINVAL;
     }
 
-    machine_gsi = pt_irq_bind->machine_irq;
-    device = pt_irq_bind->u.pci.device;
-    intx = pt_irq_bind->u.pci.intx;
-    guest_gsi = hvm_pci_intx_gsi(device, intx);
-    link = hvm_pci_intx_link(device, intx);
-    hvm_irq_dpci->link_cnt[link]++;
-
-    digl = xmalloc(struct dev_intx_gsi_link);
-    if ( !digl )
+    if ( pt_irq_bind->irq_type == PT_IRQ_TYPE_MSI )
     {
-        spin_unlock(&d->event_lock);
-        return -ENOMEM;
-    }
-
-    digl->device = device;
-    digl->intx = intx;
-    digl->gsi = guest_gsi;
-    digl->link = link;
-    list_add_tail(&digl->list,
-                  &hvm_irq_dpci->mirq[machine_gsi].digl_list);
-
-    hvm_irq_dpci->girq[guest_gsi].valid = 1;
-    hvm_irq_dpci->girq[guest_gsi].device = device;
-    hvm_irq_dpci->girq[guest_gsi].intx = intx;
-    hvm_irq_dpci->girq[guest_gsi].machine_gsi = machine_gsi;
 
-    /* Bind the same mirq once in the same domain */
-    if ( !test_and_set_bit(machine_gsi, hvm_irq_dpci->mapping))
-    {
-        unsigned int vector = domain_irq_to_vector(d, machine_gsi);
-
-        hvm_irq_dpci->mirq[machine_gsi].dom = d;
+        if ( !test_and_set_bit(pirq, hvm_irq_dpci->mapping))
+        {
+            set_bit(_HVM_IRQ_DPCI_MSI, &hvm_irq_dpci->mirq[pirq].flags);
+            hvm_irq_dpci->mirq[pirq].gmsi.gvec = pt_irq_bind->u.msi.gvec;
+            hvm_irq_dpci->mirq[pirq].gmsi.gflags = pt_irq_bind->u.msi.gflags;
+            hvm_irq_dpci->msi_gvec_pirq[pt_irq_bind->u.msi.gvec] = pirq;
+            /* bind after hvm_irq_dpci is setup to avoid race with irq handler*/
+            rc = pirq_guest_bind(d->vcpu[0], pirq, 0);
+            if ( unlikely(rc) )
+            {
+                hvm_irq_dpci->msi_gvec_pirq[pt_irq_bind->u.msi.gvec] = 0;
+                hvm_irq_dpci->mirq[pirq].gmsi.gflags = 0;
+                hvm_irq_dpci->mirq[pirq].gmsi.gvec = 0;
+                clear_bit(_HVM_IRQ_DPCI_MSI, &hvm_irq_dpci->mirq[pirq].flags);
+                clear_bit(pirq, hvm_irq_dpci->mapping);
+                spin_unlock(&d->event_lock);
+                return rc;
+            }
+        }
+        else if (hvm_irq_dpci->mirq[pirq].gmsi.gvec != pt_irq_bind->u.msi.gvec
+                ||hvm_irq_dpci->msi_gvec_pirq[pt_irq_bind->u.msi.gvec] != pirq)
 
-        /* Init timer before binding */
-        init_timer(&hvm_irq_dpci->hvm_timer[vector],
-                   pt_irq_time_out, &hvm_irq_dpci->mirq[machine_gsi], 0);
-        /* Deal with gsi for legacy devices */
-        rc = pirq_guest_bind(d->vcpu[0], machine_gsi, BIND_PIRQ__WILL_SHARE);
-        if ( unlikely(rc) )
         {
-            kill_timer(&hvm_irq_dpci->hvm_timer[vector]);
-            hvm_irq_dpci->mirq[machine_gsi].dom = NULL;
-            clear_bit(machine_gsi, hvm_irq_dpci->mapping);
-            hvm_irq_dpci->girq[guest_gsi].machine_gsi = 0;
-            hvm_irq_dpci->girq[guest_gsi].intx = 0;
-            hvm_irq_dpci->girq[guest_gsi].device = 0;
-            hvm_irq_dpci->girq[guest_gsi].valid = 0;
-            list_del(&digl->list);
-            hvm_irq_dpci->link_cnt[link]--;
             spin_unlock(&d->event_lock);
-            xfree(digl);
-            return rc;
+            return -EBUSY;
         }
     }
+    else
+    {
+        machine_gsi = pt_irq_bind->machine_irq;
+        device = pt_irq_bind->u.pci.device;
+        intx = pt_irq_bind->u.pci.intx;
+        guest_gsi = hvm_pci_intx_gsi(device, intx);
+        link = hvm_pci_intx_link(device, intx);
+        hvm_irq_dpci->link_cnt[link]++;
+
+        digl = xmalloc(struct dev_intx_gsi_link);
+        if ( !digl )
+        {
+            spin_unlock(&d->event_lock);
+            return -ENOMEM;
+        }
 
-    gdprintk(XENLOG_INFO VTDPREFIX,
-             "VT-d irq bind: m_irq = %x device = %x intx = %x\n",
-             machine_gsi, device, intx);
+        digl->device = device;
+        digl->intx = intx;
+        digl->gsi = guest_gsi;
+        digl->link = link;
+        list_add_tail(&digl->list,
+                      &hvm_irq_dpci->mirq[machine_gsi].digl_list);
+
+        hvm_irq_dpci->girq[guest_gsi].valid = 1;
+        hvm_irq_dpci->girq[guest_gsi].device = device;
+        hvm_irq_dpci->girq[guest_gsi].intx = intx;
+        hvm_irq_dpci->girq[guest_gsi].machine_gsi = machine_gsi;
+
+        /* Bind the same mirq once in the same domain */
+        if ( !test_and_set_bit(machine_gsi, hvm_irq_dpci->mapping))
+        {
+            unsigned int vector = domain_irq_to_vector(d, machine_gsi);
 
+            hvm_irq_dpci->mirq[machine_gsi].dom = d;
+
+            /* Init timer before binding */
+            init_timer(&hvm_irq_dpci->hvm_timer[vector],
+                       pt_irq_time_out, &hvm_irq_dpci->mirq[machine_gsi], 0);
+            /* Deal with gsi for legacy devices */
+            rc = pirq_guest_bind(d->vcpu[0], machine_gsi, BIND_PIRQ__WILL_SHARE);
+            if ( unlikely(rc) )
+            {
+                kill_timer(&hvm_irq_dpci->hvm_timer[vector]);
+                hvm_irq_dpci->mirq[machine_gsi].dom = NULL;
+                clear_bit(machine_gsi, hvm_irq_dpci->mapping);
+                hvm_irq_dpci->girq[guest_gsi].machine_gsi = 0;
+                hvm_irq_dpci->girq[guest_gsi].intx = 0;
+                hvm_irq_dpci->girq[guest_gsi].device = 0;
+                hvm_irq_dpci->girq[guest_gsi].valid = 0;
+                list_del(&digl->list);
+                hvm_irq_dpci->link_cnt[link]--;
+                spin_unlock(&d->event_lock);
+                xfree(digl);
+                return rc;
+            }
+        }
+
+        gdprintk(XENLOG_INFO VTDPREFIX,
+                 "VT-d irq bind: m_irq = %x device = %x intx = %x\n",
+                 machine_gsi, device, intx);
+    }
     spin_unlock(&d->event_lock);
     return 0;
 }
@@ -243,12 +275,53 @@ int hvm_do_IRQ_dpci(struct domain *d, unsigned int mirq)
     return 1;
 }
 
+#ifdef SUPPORT_MSI_REMAPPING
+void hvm_dpci_msi_eoi(struct domain *d, int vector)
+{
+    struct hvm_irq_dpci *hvm_irq_dpci = d->arch.hvm_domain.irq.dpci;
+    irq_desc_t *desc;
+    int pirq;
+
+    if ( !iommu_enabled || (hvm_irq_dpci == NULL) )
+       return;
+
+    spin_lock(&d->event_lock);
+    pirq = hvm_irq_dpci->msi_gvec_pirq[vector];
+
+    if ( ( pirq >= 0 ) && (pirq < NR_IRQS) &&
+          test_bit(pirq, hvm_irq_dpci->mapping) &&
+         (test_bit(_HVM_IRQ_DPCI_MSI, &hvm_irq_dpci->mirq[pirq].flags)))
+     {
+         BUG_ON(!local_irq_is_enabled());
+         desc = domain_spin_lock_irq_desc(d, pirq, NULL);
+         if (!desc)
+         {
+            spin_unlock(&d->event_lock);
+            return;
+         }
+
+         desc->status &= ~IRQ_INPROGRESS;
+         spin_unlock_irq(&desc->lock);
+
+         pirq_guest_eoi(d, pirq);
+     }
+
+    spin_unlock(&d->event_lock);
+}
+
+extern int vmsi_deliver(struct domain *d, int pirq);
+static int hvm_pci_msi_assert(struct domain *d, int pirq)
+{
+    return vmsi_deliver(d, pirq);
+}
+#endif
+
 void hvm_dirq_assist(struct vcpu *v)
 {
     unsigned int irq;
     uint32_t device, intx;
     struct domain *d = v->domain;
-    struct hvm_irq_dpci *hvm_irq_dpci = domain_get_irq_dpci(d);
+    struct hvm_irq_dpci *hvm_irq_dpci = d->arch.hvm_domain.irq.dpci;
     struct dev_intx_gsi_link *digl;
 
     if ( !iommu_enabled || (v->vcpu_id != 0) || (hvm_irq_dpci == NULL) )
@@ -262,6 +335,14 @@ void hvm_dirq_assist(struct vcpu *v)
             continue;
 
         spin_lock(&d->event_lock);
+#ifdef SUPPORT_MSI_REMAPPING
+        if ( test_bit(_HVM_IRQ_DPCI_MSI, &hvm_irq_dpci->mirq[irq].flags) )
+        {
+            hvm_pci_msi_assert(d, irq);
+            spin_unlock(&d->event_lock);
+            continue;
+        }
+#endif
         stop_timer(&hvm_irq_dpci->hvm_timer[domain_irq_to_vector(d, irq)]);
 
         list_for_each_entry ( digl, &hvm_irq_dpci->mirq[irq].digl_list, list )
@@ -324,7 +405,8 @@ void hvm_dpci_eoi(struct domain *d, unsigned int guest_gsi,
              * No need to get vector lock for timer
              * since interrupt is still not EOIed
              */
-            stop_timer(&hvm_irq_dpci->hvm_timer[domain_irq_to_vector(d, machine_gsi)]);
+            stop_timer(&hvm_irq_dpci->hvm_timer[
+                domain_irq_to_vector(d, machine_gsi)]);
             pirq_guest_eoi(d, machine_gsi);
         }
     }
diff --git a/include/asm-x86/hvm/irq.h b/include/asm-x86/hvm/irq.h
index b6e78ba..781b845 100644
--- a/include/asm-x86/hvm/irq.h
+++ b/include/asm-x86/hvm/irq.h
@@ -125,4 +125,11 @@ int cpu_has_pending_irq(struct vcpu *v);
 int get_isa_irq_vector(struct vcpu *vcpu, int irq, int type);
 int is_isa_irq_masked(struct vcpu *v, int isa_irq);
 
+/*
+ * Currently IA64 Xen doesn't support MSI. So for x86, we define this macro
+ * to control the conditional compilation of some MSI-related functions.
+ * This macro will be removed once IA64 has MSI support.
+ */
+#define SUPPORT_MSI_REMAPPING 1
+
 #endif /* __ASM_X86_HVM_IRQ_H__ */