Sophie

Sophie

distrib > Scientific%20Linux > 5x > x86_64 > by-pkgid > fc11cd6e1c513a17304da94a5390f3cd > files > 4587

kernel-2.6.18-194.11.1.el5.src.rpm

From: ddugger@redhat.com <ddugger@redhat.com>
Date: Mon, 23 Mar 2009 10:22:59 -0600
Subject: [xen] x86: IRQ injection changes for VT-d
Message-id: 200903231622.n2NGMx1v022051@sobek.n0ano.com
O-Subject: [RHEL5.4 PATCH 5/21 V2] IRQ injection changes for VT-d
Bugzilla: 484227
RH-Acked-by: Gerd Hoffmann <kraxel@redhat.com>
RH-Acked-by: Chris Lalancette <clalance@redhat.com>

implement irq functions for VT-d, intercept interrupts for VT-d
in __do_IRQ_guest, and do EOI particularly.

Upstream Status: Accepted (CS 15903, 16268)

BZ: 484227

Signed-off-by: Weidong Han <weidong.han@intel.com>
Signed-off-by: Gerd Hoffman <kraxel@redhat.com>
Signed-off-by: Don Dugger <donald.d.dugger@intel.com>

diff --git a/arch/x86/hvm/vioapic.c b/arch/x86/hvm/vioapic.c
index d755caa..3266c39 100644
--- a/arch/x86/hvm/vioapic.c
+++ b/arch/x86/hvm/vioapic.c
@@ -458,6 +458,14 @@ void vioapic_update_EOI(struct domain *d, int vector)
     ent = &vioapic->redirtbl[gsi];
 
     ent->fields.remote_irr = 0;
+
+    if ( iommu_enabled )
+    {
+        spin_unlock(&d->arch.hvm_domain.irq_lock);
+        hvm_dpci_eoi(current->domain, gsi, ent);
+        spin_lock(&d->arch.hvm_domain.irq_lock);
+    }
+
     if ( (ent->fields.trig_mode == VIOAPIC_LEVEL_TRIG) &&
          !ent->fields.mask &&
          hvm_irq->gsi_assert_count[gsi] )
diff --git a/arch/x86/hvm/vmx/intr.c b/arch/x86/hvm/vmx/intr.c
index cdea237..f4d1e15 100644
--- a/arch/x86/hvm/vmx/intr.c
+++ b/arch/x86/hvm/vmx/intr.c
@@ -114,6 +114,7 @@ asmlinkage void vmx_intr_assist(void)
     unsigned long inst_len;
 
     pt_update_irq(v);
+    hvm_dirq_assist(v);
 
     update_tpr_threshold(vcpu_vlapic(v));
 
diff --git a/arch/x86/hvm/vpic.c b/arch/x86/hvm/vpic.c
index 19ef5d0..0835698 100644
--- a/arch/x86/hvm/vpic.c
+++ b/arch/x86/hvm/vpic.c
@@ -182,8 +182,7 @@ static void vpic_ioport_write(
 
     vpic_lock(vpic);
 
-    addr &= 1;
-    if ( addr == 0 )
+    if ( (addr & 1) == 0 )
     {
         if ( val & 0x10 )
         {
@@ -250,7 +249,13 @@ static void vpic_ioport_write(
                 vpic->isr &= ~(1 << irq);
                 if ( cmd == 7 )
                     vpic->priority_add = (irq + 1) & 7;
-                break;
+                /* Release lock and EOI the physical interrupt (if any). */
+                vpic_update_int_output(vpic);
+                vpic_unlock(vpic);
+                hvm_dpci_eoi(current->domain,
+                             hvm_isa_irq_to_gsi((addr >> 7) ? (irq|8) : irq),
+                             NULL);
+                return; /* bail immediately */
             case 6: /* Set Priority                */
                 vpic->priority_add = (val + 1) & 7;
                 break;
diff --git a/arch/x86/irq.c b/arch/x86/irq.c
index b9d761c..423d451 100644
--- a/arch/x86/irq.c
+++ b/arch/x86/irq.c
@@ -236,7 +236,8 @@ static void __do_IRQ_guest(int vector)
         if ( (action->ack_type != ACKTYPE_NONE) &&
              !test_and_set_bit(irq, d->pirq_mask) )
             action->in_flight++;
-        send_guest_pirq(d, irq);
+        if ( !hvm_do_IRQ_dpci(d, irq) )
+            send_guest_pirq(d, irq);
     }
 }
 
diff --git a/drivers/passthrough/Makefile b/drivers/passthrough/Makefile
index 662f89c..259497f 100644
--- a/drivers/passthrough/Makefile
+++ b/drivers/passthrough/Makefile
@@ -3,3 +3,4 @@ subdir-$(x86_64) += vtd
 
 obj-y += iommu.o
 obj-y += pci.o
+obj-y += io.o
diff --git a/drivers/passthrough/io.c b/drivers/passthrough/io.c
new file mode 100644
index 0000000..0452e07
--- /dev/null
+++ b/drivers/passthrough/io.c
@@ -0,0 +1,332 @@
+/*
+ * Copyright (c) 2006, Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+ * Place - Suite 330, Boston, MA 02111-1307 USA.
+ *
+ * Copyright (C) Allen Kay <allen.m.kay@intel.com>
+ * Copyright (C) Xiaohui Xin <xiaohui.xin@intel.com>
+ */
+
+#include <xen/event.h>
+#include <xen/iommu.h>
+#include <asm/hvm/irq.h>
+#include <asm/hvm/iommu.h>
+#include <xen/hvm/irq.h>
+
+static void pt_irq_time_out(void *data)
+{
+    struct hvm_mirq_dpci_mapping *irq_map = data;
+    unsigned int guest_gsi, machine_gsi = 0;
+    int vector;
+    struct hvm_irq_dpci *dpci = NULL;
+    struct dev_intx_gsi_link *digl;
+    uint32_t device, intx;
+
+    spin_lock(&irq_map->dom->evtchn_lock);
+
+    dpci = domain_get_irq_dpci(irq_map->dom);
+    ASSERT(dpci);
+    list_for_each_entry ( digl, &irq_map->digl_list, list )
+    {
+        guest_gsi = digl->gsi;
+        machine_gsi = dpci->girq[guest_gsi].machine_gsi;
+        device = digl->device;
+        intx = digl->intx;
+        hvm_pci_intx_deassert(irq_map->dom, device, intx);
+    }
+
+    clear_bit(machine_gsi, dpci->dirq_mask);
+    vector = irq_to_vector(machine_gsi);
+    dpci->mirq[machine_gsi].pending = 0;
+    spin_unlock(&irq_map->dom->evtchn_lock);
+    pirq_guest_eoi(irq_map->dom, machine_gsi);
+}
+
+int pt_irq_create_bind_vtd(
+    struct domain *d, xen_domctl_bind_pt_irq_t *pt_irq_bind)
+{
+    struct hvm_irq_dpci *hvm_irq_dpci = NULL;
+    uint32_t machine_gsi, guest_gsi;
+    uint32_t device, intx, link;
+    struct dev_intx_gsi_link *digl;
+    int rc, pirq = pt_irq_bind->machine_irq;
+
+    if ( pirq < 0 || pirq >= NR_IRQS )
+        return -EINVAL;
+
+    spin_lock(&d->evtchn_lock);
+
+    hvm_irq_dpci = domain_get_irq_dpci(d);
+    if ( hvm_irq_dpci == NULL )
+    {
+        hvm_irq_dpci = xmalloc(struct hvm_irq_dpci);
+        if ( hvm_irq_dpci == NULL )
+        {
+            spin_unlock(&d->evtchn_lock);
+            return -ENOMEM;
+        }
+        memset(hvm_irq_dpci, 0, sizeof(*hvm_irq_dpci));
+        for ( int i = 0; i < NR_IRQS; i++ )
+            INIT_LIST_HEAD(&hvm_irq_dpci->mirq[i].digl_list);
+    }
+
+    if ( domain_set_irq_dpci(d, hvm_irq_dpci) == 0 )
+    {
+        xfree(hvm_irq_dpci);
+        spin_unlock(&d->evtchn_lock);
+        return -EINVAL;
+    }
+
+    machine_gsi = pt_irq_bind->machine_irq;
+    device = pt_irq_bind->u.pci.device;
+    intx = pt_irq_bind->u.pci.intx;
+    guest_gsi = hvm_pci_intx_gsi(device, intx);
+    link = hvm_pci_intx_link(device, intx);
+    hvm_irq_dpci->link_cnt[link]++;
+
+    digl = xmalloc(struct dev_intx_gsi_link);
+    if ( !digl )
+    {
+        spin_unlock(&d->evtchn_lock);
+        return -ENOMEM;
+    }
+
+    digl->device = device;
+    digl->intx = intx;
+    digl->gsi = guest_gsi;
+    digl->link = link;
+    list_add_tail(&digl->list,
+                  &hvm_irq_dpci->mirq[machine_gsi].digl_list);
+
+    hvm_irq_dpci->girq[guest_gsi].valid = 1;
+    hvm_irq_dpci->girq[guest_gsi].device = device;
+    hvm_irq_dpci->girq[guest_gsi].intx = intx;
+    hvm_irq_dpci->girq[guest_gsi].machine_gsi = machine_gsi;
+
+    /* Bind the same mirq once in the same domain */
+    if ( !test_and_set_bit(machine_gsi, hvm_irq_dpci->mapping))
+    {
+        unsigned int vector = irq_to_vector(machine_gsi);
+
+        hvm_irq_dpci->mirq[machine_gsi].dom = d;
+
+        /* Init timer before binding */
+        init_timer(&hvm_irq_dpci->hvm_timer[vector],
+                   pt_irq_time_out, &hvm_irq_dpci->mirq[machine_gsi], 0);
+        /* Deal with gsi for legacy devices */
+        rc = pirq_guest_bind(d->vcpu[0], machine_gsi, BIND_PIRQ__WILL_SHARE);
+        if ( unlikely(rc) )
+        {
+            kill_timer(&hvm_irq_dpci->hvm_timer[vector]);
+            hvm_irq_dpci->mirq[machine_gsi].dom = NULL;
+            clear_bit(machine_gsi, hvm_irq_dpci->mapping);
+            hvm_irq_dpci->girq[guest_gsi].machine_gsi = 0;
+            hvm_irq_dpci->girq[guest_gsi].intx = 0;
+            hvm_irq_dpci->girq[guest_gsi].device = 0;
+            hvm_irq_dpci->girq[guest_gsi].valid = 0;
+            list_del(&digl->list);
+            hvm_irq_dpci->link_cnt[link]--;
+            spin_unlock(&d->evtchn_lock);
+            xfree(digl);
+            return rc;
+        }
+    }
+
+    gdprintk(XENLOG_INFO VTDPREFIX,
+             "VT-d irq bind: m_irq = %x device = %x intx = %x\n",
+             machine_gsi, device, intx);
+
+    spin_unlock(&d->evtchn_lock);
+    return 0;
+}
+
+int pt_irq_destroy_bind_vtd(
+    struct domain *d, xen_domctl_bind_pt_irq_t *pt_irq_bind)
+{
+    struct hvm_irq_dpci *hvm_irq_dpci = NULL;
+    uint32_t machine_gsi, guest_gsi;
+    uint32_t device, intx, link;
+    struct list_head *digl_list, *tmp;
+    struct dev_intx_gsi_link *digl;
+
+    machine_gsi = pt_irq_bind->machine_irq;
+    device = pt_irq_bind->u.pci.device;
+    intx = pt_irq_bind->u.pci.intx;
+    guest_gsi = hvm_pci_intx_gsi(device, intx);
+    link = hvm_pci_intx_link(device, intx);
+
+    gdprintk(XENLOG_INFO,
+             "pt_irq_destroy_bind_vtd: machine_gsi=%d "
+             "guest_gsi=%d, device=%d, intx=%d.\n",
+             machine_gsi, guest_gsi, device, intx);
+    spin_lock(&d->evtchn_lock);
+
+    hvm_irq_dpci = domain_get_irq_dpci(d);
+
+    if ( hvm_irq_dpci == NULL )
+    {
+        spin_unlock(&d->evtchn_lock);
+        return -EINVAL;
+    }
+
+    hvm_irq_dpci->link_cnt[link]--;
+    memset(&hvm_irq_dpci->girq[guest_gsi], 0,
+           sizeof(struct hvm_girq_dpci_mapping));
+
+    /* clear the mirq info */
+    if ( test_bit(machine_gsi, hvm_irq_dpci->mapping))
+    {
+        list_for_each_safe ( digl_list, tmp,
+                &hvm_irq_dpci->mirq[machine_gsi].digl_list )
+        {
+            digl = list_entry(digl_list,
+                    struct dev_intx_gsi_link, list);
+            if ( digl->device == device &&
+                 digl->intx   == intx &&
+                 digl->link   == link &&
+                 digl->gsi    == guest_gsi )
+            {
+                list_del(&digl->list);
+                xfree(digl);
+            }
+        }
+
+        if ( list_empty(&hvm_irq_dpci->mirq[machine_gsi].digl_list) )
+        {
+            pirq_guest_unbind(d, machine_gsi);
+            kill_timer(&hvm_irq_dpci->hvm_timer[irq_to_vector(machine_gsi)]);
+            hvm_irq_dpci->mirq[machine_gsi].dom   = NULL;
+            hvm_irq_dpci->mirq[machine_gsi].flags = 0;
+            clear_bit(machine_gsi, hvm_irq_dpci->mapping);
+        }
+    }
+    spin_unlock(&d->evtchn_lock);
+    gdprintk(XENLOG_INFO,
+             "XEN_DOMCTL_irq_unmapping: m_irq = %x device = %x intx = %x\n",
+             machine_gsi, device, intx);
+
+    return 0;
+}
+
+int hvm_do_IRQ_dpci(struct domain *d, unsigned int mirq)
+{
+    struct hvm_irq_dpci *dpci = domain_get_irq_dpci(d);
+
+    ASSERT(spin_is_locked(&irq_desc[irq_to_vector(mirq)].lock));
+    if ( !iommu_enabled || (d == dom0) || !dpci ||
+         !test_bit(mirq, dpci->mapping))
+        return 0;
+
+    /*
+     * Set a timer here to avoid situations where the IRQ line is shared, and
+     * the device belonging to the pass-through guest is not yet active. In
+     * this case the guest may not pick up the interrupt (e.g., masked at the
+     * PIC) and we need to detect that.
+     */
+    set_bit(mirq, dpci->dirq_mask);
+    if ( !test_bit(_HVM_IRQ_DPCI_MSI, &dpci->mirq[mirq].flags) )
+        set_timer(&dpci->hvm_timer[irq_to_vector(mirq)],
+                  NOW() + PT_IRQ_TIME_OUT);
+    vcpu_kick(d->vcpu[0]);
+
+    return 1;
+}
+
+void hvm_dirq_assist(struct vcpu *v)
+{
+    unsigned int irq;
+    uint32_t device, intx;
+    struct domain *d = v->domain;
+    struct hvm_irq_dpci *hvm_irq_dpci = domain_get_irq_dpci(d);
+    struct dev_intx_gsi_link *digl;
+
+    if ( !iommu_enabled || (v->vcpu_id != 0) || (hvm_irq_dpci == NULL) )
+        return;
+
+    for ( irq = find_first_bit(hvm_irq_dpci->dirq_mask, NR_IRQS);
+          irq < NR_IRQS;
+          irq = find_next_bit(hvm_irq_dpci->dirq_mask, NR_IRQS, irq + 1) )
+    {
+        if ( !test_and_clear_bit(irq, &hvm_irq_dpci->dirq_mask) )
+            continue;
+
+        spin_lock(&d->evtchn_lock);
+        stop_timer(&hvm_irq_dpci->hvm_timer[irq_to_vector(irq)]);
+
+        list_for_each_entry ( digl, &hvm_irq_dpci->mirq[irq].digl_list, list )
+        {
+            device = digl->device;
+            intx = digl->intx;
+            hvm_pci_intx_assert(d, device, intx);
+            hvm_irq_dpci->mirq[irq].pending++;
+        }
+
+        /*
+         * Set a timer to see if the guest can finish the interrupt or not. For
+         * example, the guest OS may unmask the PIC during boot, before the
+         * guest driver is loaded. hvm_pci_intx_assert() may succeed, but the
+         * guest will never deal with the irq, then the physical interrupt line
+         * will never be deasserted.
+         */
+        set_timer(&hvm_irq_dpci->hvm_timer[irq_to_vector(irq)],
+                  NOW() + PT_IRQ_TIME_OUT);
+        spin_unlock(&d->evtchn_lock);
+    }
+}
+
+void hvm_dpci_eoi(struct domain *d, unsigned int guest_gsi,
+                  union vioapic_redir_entry *ent)
+{
+    struct hvm_irq_dpci *hvm_irq_dpci = NULL;
+    uint32_t device, intx, machine_gsi;
+
+    if ( !iommu_enabled)
+        return;
+
+    if ( guest_gsi < NR_ISAIRQS )
+    {
+        hvm_dpci_isairq_eoi(d, guest_gsi);
+        return;
+    }
+
+    spin_lock(&d->evtchn_lock);
+    hvm_irq_dpci = domain_get_irq_dpci(d);
+
+    if((hvm_irq_dpci == NULL) ||
+         (guest_gsi >= NR_ISAIRQS &&
+          !hvm_irq_dpci->girq[guest_gsi].valid) )
+    {
+        spin_unlock(&d->evtchn_lock);
+        return;
+    }
+
+    device = hvm_irq_dpci->girq[guest_gsi].device;
+    intx = hvm_irq_dpci->girq[guest_gsi].intx;
+    hvm_pci_intx_deassert(d, device, intx);
+
+    machine_gsi = hvm_irq_dpci->girq[guest_gsi].machine_gsi;
+    if ( --hvm_irq_dpci->mirq[machine_gsi].pending == 0 )
+    {
+        if ( (ent == NULL) || !ent->fields.mask )
+        {
+            /*
+             * No need to get vector lock for timer
+             * since interrupt is still not EOIed
+             */
+            stop_timer(&hvm_irq_dpci->hvm_timer[irq_to_vector(machine_gsi)]);
+            pirq_guest_eoi(d, machine_gsi);
+        }
+    }
+    spin_unlock(&d->evtchn_lock);
+}
diff --git a/include/asm-x86/hvm/io.h b/include/asm-x86/hvm/io.h
index 15981ff..c3b4d76 100644
--- a/include/asm-x86/hvm/io.h
+++ b/include/asm-x86/hvm/io.h
@@ -144,6 +144,9 @@ void send_invalidate_req(void);
 extern void handle_mmio(unsigned long gpa);
 extern void hvm_interrupt_post(struct vcpu *v, int vector, int type);
 extern void hvm_io_assist(void);
+void hvm_dpci_eoi(struct domain *d, unsigned int guest_irq,
+                  union vioapic_redir_entry *ent);
+
 
 #endif /* __ASM_X86_HVM_IO_H__ */