Sophie

Sophie

distrib > Scientific%20Linux > 5x > x86_64 > by-pkgid > fc11cd6e1c513a17304da94a5390f3cd > files > 4569

kernel-2.6.18-194.11.1.el5.src.rpm

From: ddugger@redhat.com <ddugger@redhat.com>
Date: Mon, 23 Mar 2009 10:23:15 -0600
Subject: [xen] x86: emulate accesses to PCI window regs cf8/cfc
Message-id: 200903231623.n2NGNFWm022091@sobek.n0ano.com
O-Subject: [RHEL5.4 PATCH 11/21 V2] Emulate accesses to PCI window registers cf8/cfc to synchronise with accesses by the hypervisor itself. All users of cf8/cfc go through new access functions which take the appropriate spinlock.
Bugzilla: 484227
RH-Acked-by: Chris Lalancette <clalance@redhat.com>
RH-Acked-by: Gerd Hoffmann <kraxel@redhat.com>
RH-Acked-by: Chris Lalancette <clalance@redhat.com>

This is backported from xen-unstable (c/s 17441 and 17465). Dom0 will
access PCI window register cf8/cfc, now hypervisor also accesses PCI
configure space using PCI window register cf8/cfc for VT-d. So it's
possible that there is conflict, though the possibility is very small.
This patch emulates accesses to PCI window registers cf8/cfc to
synchronise with accesses by the hypervisor itself, then fixes the
issue.

BTW, there are still many changes on I/O port access between RHEL 5.4
and xen-unstable, the but they are not necessary for VT-d, we didn't
backport them, so you can find two difference between this patch and
xen-unstable: there are no check_guest_io_breakpoint() and
pv_post_outb_hook() in traps.c in RHEL 5.4.

Upstream Status: Accepted (CS 17441, 17465)

BZ: 484227

Signed-off-by: Gerd Hoffman <kraxel@redhat.com>
Signed-off-by: Weidong Han <weidong.han@intel.com>
Signed-off-by: Don Dugger <donald.d.dugger@intel.com>

diff --git a/arch/x86/cpu/amd.c b/arch/x86/cpu/amd.c
index 42947f0..f848afe 100644
--- a/arch/x86/cpu/amd.c
+++ b/arch/x86/cpu/amd.c
@@ -3,6 +3,7 @@
 #include <xen/bitops.h>
 #include <xen/mm.h>
 #include <xen/smp.h>
+#include <xen/pci.h>
 #include <asm/io.h>
 #include <asm/msr.h>
 #include <asm/processor.h>
@@ -66,19 +67,6 @@ static int c1_ramping_may_cause_clock_drift(struct cpuinfo_x86 *c)
 	return 1;
 }
 
-/* PCI access functions. Should be safe to use 0xcf8/0xcfc port accesses here. */
-static u8 pci_read_byte(u32 bus, u32 dev, u32 fn, u32 reg)
-{
-	outl((1U<<31) | (bus << 16) | (dev << 11) | (fn << 8) | (reg & ~3), 0xcf8);
-	return inb(0xcfc + (reg & 3));
-}
-
-static void pci_write_byte(u32 bus, u32 dev, u32 fn, u32 reg, u8 val)
-{
-	outl((1U<<31) | (bus << 16) | (dev << 11) | (fn << 8) | (reg & ~3), 0xcf8);
-	outb(val, 0xcfc + (reg & 3));
-}
-
 /*
  * Disable C1-Clock ramping if enabled in PMM7.CpuLowPwrEnh on 8th-generation
  * cores only. Assume BIOS has setup all Northbridges equivalently.
@@ -88,18 +76,18 @@ static void disable_c1_ramping(void)
 	u8 pmm7;
 	int node, nr_nodes;
 
-	/* Read the number of nodes from the first Northbridge. */
-	nr_nodes = ((pci_read_byte(0, 0x18, 0x0, 0x60)>>4)&0x07)+1;
-	for (node = 0; node < nr_nodes; node++) {
-		/* PMM7: bus=0, dev=0x18+node, function=0x3, register=0x87. */
-		pmm7 = pci_read_byte(0, 0x18+node, 0x3, 0x87);
-		/* Invalid read means we've updated every Northbridge. */
-		if (pmm7 == 0xFF)
-			break;
-		pmm7 &= 0xFC; /* clear pmm7[1:0] */
-		pci_write_byte(0, 0x18+node, 0x3, 0x87, pmm7);
-		printk ("AMD: Disabling C1 Clock Ramping Node #%x\n", node);
-	}
+    /* Read the number of nodes from the first Northbridge. */
+    nr_nodes = ((pci_conf_read32(0, 0x18, 0x0, 0x60)>>4)&0x07)+1;
+    for (node = 0; node < nr_nodes; node++) {
+        /* PMM7: bus=0, dev=0x18+node, function=0x3, register=0x87. */
+        pmm7 = pci_conf_read8(0, 0x18+node, 0x3, 0x87);
+        /* Invalid read means we've updated every Northbridge. */
+        if (pmm7 == 0xFF)
+            break;
+        pmm7 &= 0xFC; /* clear pmm7[1:0] */
+        pci_conf_write8(0, 0x18+node, 0x3, 0x87, pmm7);
+        printk ("AMD: Disabling C1 Clock Ramping Node #%x\n", node);
+    }
 }
 
 static void __init init_amd(struct cpuinfo_x86 *c)
diff --git a/arch/x86/domain_build.c b/arch/x86/domain_build.c
index 8dcf816..59141cc 100644
--- a/arch/x86/domain_build.c
+++ b/arch/x86/domain_build.c
@@ -985,6 +985,8 @@ int __init construct_dom0(
     rc |= ioports_deny_access(dom0, 0x40, 0x43);
     /* PIT Channel 2 / PC Speaker Control. */
     rc |= ioports_deny_access(dom0, 0x61, 0x61);
+    /* PCI configuration space (NB. 0xcf8 has special treatment). */
+    rc |= ioports_deny_access(dom0, 0xcfc, 0xcff);
     /* Command-line I/O ranges. */
     process_dom0_ioports_disable();
 
diff --git a/arch/x86/traps.c b/arch/x86/traps.c
index 911694a..b6bf936 100644
--- a/arch/x86/traps.c
+++ b/arch/x86/traps.c
@@ -1119,7 +1119,7 @@ static int read_descriptor(unsigned int sel,
 }
 
 /* Has the guest requested sufficient permission for this I/O access? */
-static inline int guest_io_okay(
+static int guest_io_okay(
     unsigned int port, unsigned int bytes,
     struct vcpu *v, struct cpu_user_regs *regs)
 {
@@ -1161,19 +1161,126 @@ static inline int guest_io_okay(
 }
 
 /* Has the administrator granted sufficient permission for this I/O access? */
-static inline int admin_io_okay(
+static int admin_io_okay(
     unsigned int port, unsigned int bytes,
     struct vcpu *v, struct cpu_user_regs *regs)
 {
+    /*
+     * Port 0xcf8 (CONFIG_ADDRESS) is only visible for DWORD accesses.
+     * We never permit direct access to that register.
+     */
+    if ( (port == 0xcf8) && (bytes == 4) )
+        return 0;
+
     return ioports_access_permitted(v->domain, port, port + bytes - 1);
 }
 
-#define guest_inb_okay(_p, _d, _r) admin_io_okay(_p, 1, _d, _r)
-#define guest_inw_okay(_p, _d, _r) admin_io_okay(_p, 2, _d, _r)
-#define guest_inl_okay(_p, _d, _r) admin_io_okay(_p, 4, _d, _r)
-#define guest_outb_okay(_p, _d, _r) admin_io_okay(_p, 1, _d, _r)
-#define guest_outw_okay(_p, _d, _r) admin_io_okay(_p, 2, _d, _r)
-#define guest_outl_okay(_p, _d, _r) admin_io_okay(_p, 4, _d, _r)
+static uint32_t guest_io_read(
+    unsigned int port, unsigned int bytes,
+    struct vcpu *v, struct cpu_user_regs *regs)
+{
+    extern uint32_t pci_conf_read(
+        uint32_t cf8, uint8_t offset, uint8_t bytes);
+
+    uint32_t data = 0;
+    unsigned int shift = 0;
+
+    if ( admin_io_okay(port, bytes, v, regs) )
+    {
+        switch ( bytes )
+        {
+        case 1: return inb(port);
+        case 2: return inw(port);
+        case 4: return inl(port);
+        }
+    }
+
+    while ( bytes != 0 )
+    {
+        unsigned int size = 1;
+        uint32_t sub_data = 0xff;
+
+        if ( (port == 0x42) || (port == 0x43) || (port == 0x61) )
+        {
+            sub_data = pv_pit_handler(port, 0, 0);
+        }
+        else if ( (port == 0xcf8) && (bytes == 4) )
+        {
+            size = 4;
+            sub_data = v->domain->arch.pci_cf8;
+        }
+        else if ( ((port & 0xfffc) == 0xcfc) && IS_PRIV(v->domain) )
+        {
+            size = min(bytes, 4 - (port & 3));
+            if ( size == 3 )
+                size = 2;
+            sub_data = pci_conf_read(v->domain->arch.pci_cf8, port & 3, size);
+        }
+
+        if ( size == 4 )
+            return sub_data;
+
+        data |= (sub_data & ((1u << (size * 8)) - 1)) << shift;
+        shift += size * 8;
+        port += size;
+        bytes -= size;
+    }
+
+    return data;
+}
+
+static void guest_io_write(
+    unsigned int port, unsigned int bytes, uint32_t data,
+    struct vcpu *v, struct cpu_user_regs *regs)
+{
+    extern void pci_conf_write(
+        uint32_t cf8, uint8_t offset, uint8_t bytes, uint32_t data);
+
+    if ( admin_io_okay(port, bytes, v, regs) )
+    {
+        switch ( bytes ) {
+        case 1:
+            outb((uint8_t)data, port);
+            break;
+        case 2:
+            outw((uint16_t)data, port);
+            break;
+        case 4:
+            outl(data, port);
+            break;
+        }
+        return;
+    }
+
+    while ( bytes != 0 )
+    {
+        unsigned int size = 1;
+
+        if ( (port == 0x42) || (port == 0x43) || (port == 0x61) )
+        {
+            pv_pit_handler(port, (uint8_t)data, 1);
+        }
+        else if ( (port == 0xcf8) && (bytes == 4) )
+        {
+            size = 4;
+            v->domain->arch.pci_cf8 = data;
+        }
+        else if ( ((port & 0xfffc) == 0xcfc) && IS_PRIV(v->domain) )
+        {
+            size = min(bytes, 4 - (port & 3));
+            if ( size == 3 )
+                size = 2;
+            pci_conf_write(v->domain->arch.pci_cf8, port & 3, size, data);
+        }
+
+        if ( size == 4 )
+            return;
+
+        port += size;
+        bytes -= size;
+        data >>= size * 8;
+    }
+}
 
 /* I/O emulation support. Helper routines for, and type of, the stack stub.*/
 void host_to_guest_gpr_switch(struct cpu_user_regs *)
@@ -1294,7 +1401,7 @@ static int emulate_privileged_op(struct cpu_user_regs *regs)
 
     /* REX prefix. */
     if ( rex & 8 ) /* REX.W */
-        op_bytes = 4; /* emulating only opcodes not supporting 64-bit operands */
+        op_bytes = 4; /* emulate only opcodes not supporting 64-bit operands */
     modrm_reg = (rex & 4) << 1;  /* REX.R */
     /* REX.X does not need to be decoded. */
     modrm_rm  = (rex & 1) << 3;  /* REX.B */
@@ -1323,7 +1430,8 @@ static int emulate_privileged_op(struct cpu_user_regs *regs)
         {
             if ( !read_descriptor(data_sel, v, regs,
                                   &data_base, &data_limit, &ar,
-                                  _SEGMENT_WR|_SEGMENT_S|_SEGMENT_DPL|_SEGMENT_P) )
+                                  _SEGMENT_WR|_SEGMENT_S|_SEGMENT_DPL|
+                                  _SEGMENT_P) )
                 goto fail;
             if ( !(ar & _SEGMENT_S) ||
                  !(ar & _SEGMENT_P) ||
@@ -1362,73 +1470,47 @@ static int emulate_privileged_op(struct cpu_user_regs *regs)
         }
 #endif
 
+        port = (u16)regs->edx;
+
     continue_io_string:
         switch ( opcode )
         {
         case 0x6c: /* INSB */
             op_bytes = 1;
         case 0x6d: /* INSW/INSL */
-            if ( data_limit < op_bytes - 1 ||
-                 rd_ad(edi) > data_limit - (op_bytes - 1) ||
-                 !guest_io_okay((u16)regs->edx, op_bytes, v, regs) )
+            if ( (data_limit < (op_bytes - 1)) ||
+                 (rd_ad(edi) > (data_limit - (op_bytes - 1))) ||
+                 !guest_io_okay(port, op_bytes, v, regs) )
                 goto fail;
-            port = (u16)regs->edx;
-            switch ( op_bytes )
-            {
-            case 1:
-                /* emulate PIT counter 2 */
-                data = (u8)(guest_inb_okay(port, v, regs) ? inb(port) : 
-                       ((port == 0x42 || port == 0x43 || port == 0x61) ?
-                       pv_pit_handler(port, 0, 0) : ~0)); 
-                break;
-            case 2:
-                data = (u16)(guest_inw_okay(port, v, regs) ? inw(port) : ~0);
-                break;
-            case 4:
-                data = (u32)(guest_inl_okay(port, v, regs) ? inl(port) : ~0);
-                break;
-            }
-            if ( (rc = copy_to_user((void *)data_base + rd_ad(edi), &data, op_bytes)) != 0 )
+            data = guest_io_read(port, op_bytes, v, regs);
+            if ( (rc = copy_to_user((void *)data_base + rd_ad(edi),
+                                    &data, op_bytes)) != 0 )
             {
                 propagate_page_fault(data_base + rd_ad(edi) + op_bytes - rc,
                                      PFEC_write_access);
                 return EXCRET_fault_fixed;
             }
-            wr_ad(edi, regs->edi + (int)((regs->eflags & EF_DF) ? -op_bytes : op_bytes));
+            wr_ad(edi, regs->edi + (int)((regs->eflags & EF_DF)
+                                         ? -op_bytes : op_bytes));
             break;
 
         case 0x6e: /* OUTSB */
             op_bytes = 1;
         case 0x6f: /* OUTSW/OUTSL */
-            if ( data_limit < op_bytes - 1 ||
-                 rd_ad(esi) > data_limit - (op_bytes - 1) ||
-                 !guest_io_okay((u16)regs->edx, op_bytes, v, regs) )
+           if ( (data_limit < (op_bytes - 1)) ||
+                 (rd_ad(esi) > (data_limit - (op_bytes - 1))) ||
+                  !guest_io_okay(port, op_bytes, v, regs) )
                 goto fail;
-            rc = copy_from_user(&data, (void *)data_base + rd_ad(esi), op_bytes);
-            if ( rc != 0 )
+            if ( (rc = copy_from_user(&data, (void *)data_base + rd_ad(esi),
+                                      op_bytes)) != 0 )
             {
-                propagate_page_fault(data_base + rd_ad(esi) + op_bytes - rc, 0);
+                propagate_page_fault(data_base + rd_ad(esi)
+                                     + op_bytes - rc, 0);
                 return EXCRET_fault_fixed;
             }
-            port = (u16)regs->edx;
-            switch ( op_bytes )
-            {
-            case 1:
-                if ( guest_outb_okay(port, v, regs) )
-                    outb((u8)data, port);
-                else if ( port == 0x42 || port == 0x43 || port == 0x61 )
-                    pv_pit_handler(port, data, 1);
-                break;
-            case 2:
-                if ( guest_outw_okay(port, v, regs) )
-                    outw((u16)data, port);
-                break;
-            case 4:
-                if ( guest_outl_okay(port, v, regs) )
-                    outl((u32)data, port);
-                break;
-            }
-            wr_ad(esi, regs->esi + (int)((regs->eflags & EF_DF) ? -op_bytes : op_bytes));
+            guest_io_write(port, op_bytes, data, v, regs);
+            wr_ad(esi, regs->esi + (int)((regs->eflags & EF_DF)
+                                         ? -op_bytes : op_bytes));
             break;
         }
 
@@ -1490,31 +1572,17 @@ static int emulate_privileged_op(struct cpu_user_regs *regs)
     exec_in:
         if ( !guest_io_okay(port, op_bytes, v, regs) )
             goto fail;
-        switch ( op_bytes )
+        if ( admin_io_okay(port, op_bytes, v, regs) )
         {
-        case 1:
-            if ( guest_inb_okay(port, v, regs) )
-                io_emul(regs);
-            else if ( port == 0x42 || port == 0x43 || port == 0x61 )
-            {
-                regs->eax &= ~0xffUL;
-                regs->eax |= pv_pit_handler(port, 0, 0);
-            } 
-            else
-                regs->eax |= (u8)~0;
-            break;
-        case 2:
-            if ( guest_inw_okay(port, v, regs) )
-                io_emul(regs);
-            else
-                regs->eax |= (u16)~0;
-            break;
-        case 4:
-            if ( guest_inl_okay(port, v, regs) )
-                io_emul(regs);
+            io_emul(regs);
+        }
+        else
+        {
+            if ( op_bytes == 4 )
+                regs->eax = 0;
             else
-                regs->eax = (u32)~0;
-            break;
+                regs->eax &= ~((1u << (op_bytes * 8)) - 1);
+            regs->eax |= guest_io_read(port, op_bytes, v, regs);
         }
         goto done;
 
@@ -1532,22 +1600,11 @@ static int emulate_privileged_op(struct cpu_user_regs *regs)
     exec_out:
         if ( !guest_io_okay(port, op_bytes, v, regs) )
             goto fail;
-        switch ( op_bytes )
+        if ( admin_io_okay(port, op_bytes, v, regs) )
+            io_emul(regs);
+        else
         {
-        case 1:
-            if ( guest_outb_okay(port, v, regs) )
-                io_emul(regs);
-            else if ( port == 0x42 || port == 0x43 || port == 0x61 )
-                pv_pit_handler(port, regs->eax, 1);
-            break;
-        case 2:
-            if ( guest_outw_okay(port, v, regs) )
-                io_emul(regs);
-            break;
-        case 4:
-            if ( guest_outl_okay(port, v, regs) )
-                io_emul(regs);
-            break;
+            guest_io_write(port, op_bytes, regs->eax, v, regs);
         }
         goto done;
 
diff --git a/include/asm-x86/domain.h b/include/asm-x86/domain.h
index 7a34df4..3f5f06a 100644
--- a/include/asm-x86/domain.h
+++ b/include/asm-x86/domain.h
@@ -219,6 +219,7 @@ struct arch_domain
 
     /* I/O-port admin-specified access capabilities. */
     struct rangeset *ioport_caps;
+    uint32_t pci_cf8;
 
     struct hvm_domain hvm_domain;