Sophie

Sophie

distrib > Scientific%20Linux > 5x > x86_64 > by-pkgid > fc11cd6e1c513a17304da94a5390f3cd > files > 4591

kernel-2.6.18-194.11.1.el5.src.rpm

From: ddugger@redhat.com <ddugger@redhat.com>
Date: Mon, 23 Mar 2009 10:23:04 -0600
Subject: [xen] x86: memory changes for VT-d
Message-id: 200903231623.n2NGN49Z022065@sobek.n0ano.com
O-Subject: [RHEL5.4 PATCH 7/21 V2] memory changes for VT-d
Bugzilla: 484227
RH-Acked-by: Gerd Hoffmann <kraxel@redhat.com>

map memory for VT-d page table, map mmio of assigned device in p2m/ept table,
and set UC of these mmio pages.

Also take care to set UC in shadow page tables.

Upstream Status: Accepted (CS 15916, 17529, 17645, 15843, 16152).

BZ: 484227

Signed-off-by: Weidong Han <weidong.han@intel.com>
Signed-off-by: Gerd Hoffman <kraxel@redhat.com>
Signed-off-by: Don Dugger <donald.d.dugger@intel.com>

diff --git a/arch/x86/cpu/common.c b/arch/x86/cpu/common.c
index bbda900..268bcc2 100644
--- a/arch/x86/cpu/common.c
+++ b/arch/x86/cpu/common.c
@@ -23,6 +23,12 @@ static int disable_x86_serial_nr __devinitdata = 0;
 
 struct cpu_dev * cpu_devs[X86_VENDOR_NUM] = {};
 
+/*
+ * Default host IA32_CR_PAT value to cover all memory types.
+ * BIOS usually sets it to 0x07040600070406.
+ */
+u64 host_pat = 0x050100070406;
+
 static void default_init(struct cpuinfo_x86 * c)
 {
 	/* Not much we can do here... */
@@ -557,6 +563,9 @@ void __devinit cpu_init(void)
 	}
 	printk(KERN_INFO "Initializing CPU#%d\n", cpu);
 
+    if (cpu_has_pat)
+        wrmsrl(MSR_IA32_CR_PAT, host_pat);
+
 	if (cpu_has_vme || cpu_has_tsc || cpu_has_de)
 		clear_in_cr4(X86_CR4_VME|X86_CR4_PVI|X86_CR4_TSD|X86_CR4_DE);
 
diff --git a/arch/x86/mm/p2m-ept.c b/arch/x86/mm/p2m-ept.c
index 0feb30a..ac9df26 100644
--- a/arch/x86/mm/p2m-ept.c
+++ b/arch/x86/mm/p2m-ept.c
@@ -23,6 +23,7 @@
 #include <asm/types.h>
 #include <asm/domain.h>
 #include <asm/hvm/vmx/vmx.h>
+#include <xen/iocap.h>
 
 #if 1 /* XEN_VERSION == 3 && XEN_SUBVERSION < 2 */
 
@@ -45,6 +46,10 @@ compat_ept_set_entry(struct domain *d, unsigned long gfn,
   mfn_t mfn, int order, u32 l1e_flags)
 {
     p2m_type_t t = ept_flags_to_p2m_type(l1e_flags);
+    if ( t == p2m_ram_rw &&
+         iomem_access_permitted(d, mfn_x(mfn), mfn_x(mfn)) )
+        t = p2m_mmio_direct;
+
     return ept_set_entry(d, gfn, mfn, order, t);
 }
 
@@ -72,8 +77,11 @@ static void ept_p2m_type_to_flags(ept_entry_t *entry, p2m_type_t type)
     switch(type)
     {
         case p2m_ram_rw:
+             entry->r = entry->w = entry->x = 1;
+            return;
         case p2m_mmio_direct:
              entry->r = entry->w = entry->x = 1;
+             entry->emt = 0x8;
             return;
         case p2m_ram_logdirty:
         case p2m_ram_ro:
@@ -193,13 +201,16 @@ ept_set_entry(struct domain *d, unsigned long gfn, mfn_t mfn,
 
     if ( ret != GUEST_TABLE_SPLIT_PAGE )
     {
-        if ( mfn_valid(mfn_x(mfn)) )
+        if ( mfn_valid(mfn_x(mfn)) || (p2mt == p2m_mmio_direct) )
         {
             /* Track the highest gfn for which we have ever had a valid mapping */
             if ( gfn > d->arch.p2m.max_mapped_pfn )
                 d->arch.p2m.max_mapped_pfn = gfn;
 
-            ept_entry->emt = EPT_DEFAULT_MT;
+            if ( p2mt == p2m_mmio_direct )
+                ept_entry->emt = 0x8;
+            else
+                ept_entry->emt = EPT_DEFAULT_MT;
             ept_entry->sp_avail = walk_level ? 1 : 0;
 
             if ( ret == GUEST_TABLE_SUPER_PAGE )
@@ -278,6 +289,32 @@ ept_set_entry(struct domain *d, unsigned long gfn, mfn_t mfn,
  out:
     unmap_domain_page(table);
     ept_sync_domain(d);
+
+    /* support pci pass-through */
+    if ( iommu_enabled )
+    {
+        if ( p2mt == p2m_ram_rw )
+        {
+            if ( order == EPT_TABLE_ORDER )
+            {
+                for ( i = 0; i < ( 1 << order ); i++ )
+                    iommu_map_page(d, gfn-offset+i, mfn_x(mfn)-offset+i);
+            }
+            else if ( !order )
+                iommu_map_page(d, gfn, mfn_x(mfn));
+        }
+        else
+        {
+            if ( order == EPT_TABLE_ORDER )
+            {
+                for ( i = 0; i < ( 1 << order ); i++ )
+                    iommu_unmap_page(d, gfn-offset+i);
+            }
+            else if ( !order )
+                iommu_unmap_page(d, gfn);
+        }
+    }
+
     return rv;
 }
 
diff --git a/arch/x86/mm/p2m.c b/arch/x86/mm/p2m.c
index 6f9e1d1..bfc0648 100644
--- a/arch/x86/mm/p2m.c
+++ b/arch/x86/mm/p2m.c
@@ -28,6 +28,7 @@
 #include <asm/paging.h>
 #include <asm/p2m.h>
 #include <asm/hvm/vmx/vmx.h> /* ept_p2m_init() */
+#include <xen/iommu.h>
 
 /* Debugging and auditing of the P2M code? */
 #define P2M_AUDIT     0
@@ -226,6 +227,7 @@ p2m_set_entry(struct domain *d, unsigned long gfn, mfn_t mfn,
     l1_pgentry_t *p2m_entry;
     l1_pgentry_t entry_content;
     l2_pgentry_t l2e_content;
+    p2m_type_t p2mt = p2m_flags_to_type(l1e_flags);
     int rv=0;
 
 #if CONFIG_PAGING_LEVELS >= 4
@@ -262,7 +264,7 @@ p2m_set_entry(struct domain *d, unsigned long gfn, mfn_t mfn,
                                    0, L1_PAGETABLE_ENTRIES);
         ASSERT(p2m_entry);
         
-        if ( mfn_valid(mfn) )
+        if ( mfn_valid(mfn) || p2mt == p2m_mmio_direct )
             entry_content = l1e_from_pfn(mfn_x(mfn), l1e_flags);
         else
             entry_content = l1e_empty();
@@ -298,7 +300,18 @@ p2m_set_entry(struct domain *d, unsigned long gfn, mfn_t mfn,
     /* Track the highest gfn for which we have ever had a valid mapping */
     if ( mfn_valid(mfn) && (gfn > d->arch.p2m.max_mapped_pfn) ) 
         d->arch.p2m.max_mapped_pfn = gfn + (1UL << order) - 1;
-
+ 
+     if ( iommu_enabled )
+     {
+         int i;
+         if ( p2mt == p2m_ram_rw )
+             for ( i = 0; i < (1UL << order); i++ )
+                 iommu_map_page(d, gfn+i, mfn_x(mfn)+i );
+         else
+             for ( i = 0; i < (1UL << order); i++ )
+                 iommu_unmap_page(d, gfn+i);
+     }
+ 
     /* Success */
     rv = 1;
  
@@ -1017,6 +1030,49 @@ int p2m_set_flags(struct domain *d, paddr_t gpa, u32 l1e_flags)
     return 1;
 }
 
+int set_mmio_p2m_entry(struct domain *d, unsigned long gfn, mfn_t mfn)
+{
+    int rc = 0;
+    unsigned long omfn;
+
+    if ( !paging_mode_translate(d) )
+        return 0;
+
+    omfn = gmfn_to_mfn(d, gfn);
+    if ( INVALID_MFN != omfn )
+    {
+        ASSERT(mfn_valid(_mfn(omfn)));
+        set_gpfn_from_mfn(omfn, INVALID_M2P_ENTRY);
+    }
+
+    rc = set_p2m_entry(d, gfn, mfn, 0, p2m_type_to_flags(p2m_mmio_direct));
+    if ( 0 == rc )
+        gdprintk(XENLOG_ERR,
+            "set_mmio_p2m_entry: set_p2m_entry failed! mfn=%08lx\n",
+            gmfn_to_mfn(d, gfn));
+    return rc;
+}
+
+int clear_mmio_p2m_entry(struct domain *d, unsigned long gfn)
+{
+    int rc = 0;
+    unsigned long mfn;
+
+    if ( !paging_mode_translate(d) )
+        return 0;
+
+    mfn = gmfn_to_mfn(d, gfn);
+    if ( INVALID_MFN == mfn )
+    {
+        gdprintk(XENLOG_ERR,
+            "clear_mmio_p2m_entry: gfn_to_mfn failed! gfn=%08lx\n", gfn);
+        return 0;
+    }
+    rc = set_p2m_entry(d, gfn, _mfn(INVALID_MFN), 0, 0);
+
+    return rc;
+}
+
 /*
  * Local variables:
  * mode: C
diff --git a/arch/x86/mm/shadow/multi.c b/arch/x86/mm/shadow/multi.c
index 271d124..2c2f98b 100644
--- a/arch/x86/mm/shadow/multi.c
+++ b/arch/x86/mm/shadow/multi.c
@@ -35,6 +35,7 @@
 #include <asm/hvm/hvm.h>
 #include "private.h"
 #include "types.h"
+#include <xen/iocap.h>
 
 /* THINGS TO DO LATER:
  * 
@@ -654,7 +655,8 @@ _sh_propagate(struct vcpu *v,
         goto done;
     }
 
-    if ( level == 1 && mmio )
+    if ( level == 1 && mmio &&
+         !iomem_access_permitted(d, mfn_x(target_mfn), mfn_x(target_mfn)) )
     {
         /* Guest l1e maps MMIO space */
         *sp = sh_l1e_mmio(guest_l1e_get_gfn(*gp), gflags);
@@ -667,7 +669,8 @@ _sh_propagate(struct vcpu *v,
     // case of a prefetch, an invalid mfn means that we can not usefully
     // shadow anything, and so we return early.
     //
-    if ( !mfn_valid(target_mfn) )
+    if ( !mfn_valid(target_mfn) &&
+         !iomem_access_permitted(d, mfn_x(target_mfn), mfn_x(target_mfn)) )
     {
         ASSERT((ft == ft_prefetch));
         *sp = shadow_l1e_empty();
@@ -750,6 +753,10 @@ _sh_propagate(struct vcpu *v,
         sflags |= _PAGE_USER;
     }
 
+    /* MMIO addresses should never be cached */
+    if ( iomem_access_permitted(d, mfn_x(target_mfn), mfn_x(target_mfn)) )
+        sflags |= _PAGE_PCD;
+
     *sp = shadow_l1e_from_mfn(target_mfn, sflags);
  done:
     SHADOW_DEBUG(PROPAGATE,
diff --git a/include/asm-x86/cpufeature.h b/include/asm-x86/cpufeature.h
index c8b511f..130a7f7 100644
--- a/include/asm-x86/cpufeature.h
+++ b/include/asm-x86/cpufeature.h
@@ -105,6 +105,7 @@
 #define cpu_has_tsc		boot_cpu_has(X86_FEATURE_TSC)
 #define cpu_has_pae		boot_cpu_has(X86_FEATURE_PAE)
 #define cpu_has_pge		boot_cpu_has(X86_FEATURE_PGE)
+#define cpu_has_pat		boot_cpu_has(X86_FEATURE_PAT)
 #define cpu_has_apic		boot_cpu_has(X86_FEATURE_APIC)
 #define cpu_has_sep		boot_cpu_has(X86_FEATURE_SEP)
 #define cpu_has_mtrr		boot_cpu_has(X86_FEATURE_MTRR)
@@ -129,6 +130,7 @@
 #define cpu_has_tsc		1
 #define cpu_has_pae		1
 #define cpu_has_pge		1
+#define cpu_has_pat		1
 #define cpu_has_apic		boot_cpu_has(X86_FEATURE_APIC)
 #define cpu_has_sep		0
 #define cpu_has_mtrr		1
diff --git a/include/asm-x86/msr.h b/include/asm-x86/msr.h
index 3d85857..eaca741 100644
--- a/include/asm-x86/msr.h
+++ b/include/asm-x86/msr.h
@@ -216,6 +216,8 @@ static inline void write_efer(u64 val)
 #define MSR_IA32_LASTINTFROMIP		0x1dd
 #define MSR_IA32_LASTINTTOIP		0x1de
 
+#define MSR_IA32_CR_PAT             0x00000277
+
 #define MSR_IA32_MC0_CTL		0x400
 #define MSR_IA32_MC0_STATUS		0x401
 #define MSR_IA32_MC0_ADDR		0x402
diff --git a/include/asm-x86/p2m.h b/include/asm-x86/p2m.h
index 71035d0..f554bdc 100644
--- a/include/asm-x86/p2m.h
+++ b/include/asm-x86/p2m.h
@@ -61,11 +61,40 @@ typedef enum {
 #define p2m_is_readonly(_t) (p2m_to_mask(_t) & P2M_RO_TYPES)
 #define p2m_is_valid(_t) (p2m_to_mask(_t) & (P2M_RAM_TYPES | P2M_MMIO_TYPES))
 
+/* PTE flags for the various types of p2m entry */
+#define P2M_BASE_FLAGS \
+        (_PAGE_PRESENT | _PAGE_USER | _PAGE_DIRTY | _PAGE_ACCESSED)
+
 /* Extract the type from the PTE flags that store it */
 static inline p2m_type_t p2m_flags_to_type(unsigned long flags)
 {
-    /* Type is stored in the "available" bits, 9, 10 and 11 */
-    return (flags >> 9) & 0x7;
+    if ( (flags & _PAGE_RW) && (flags & _PAGE_PCD) )
+        return p2m_mmio_direct;
+    else if ( flags & _PAGE_RW )
+        return p2m_ram_rw;
+    else if ( paging_mode_log_dirty(current->domain) )
+        return p2m_ram_logdirty;
+    else
+        return p2m_invalid;
+}
+
+static inline unsigned long p2m_type_to_flags(p2m_type_t t)
+{
+    unsigned long flags = 0;
+    switch(t)
+    {
+    case p2m_ram_rw:
+        return flags | P2M_BASE_FLAGS | _PAGE_RW;
+    case p2m_mmio_direct:
+        return flags | P2M_BASE_FLAGS | _PAGE_RW | _PAGE_PCD;
+    case p2m_ram_logdirty:
+    case p2m_ram_ro:
+    case p2m_mmio_dm:
+        return flags | P2M_BASE_FLAGS;
+    case p2m_invalid:
+    default:
+        return flags;
+    }
 }
 
 #endif
@@ -172,6 +201,9 @@ gl1e_to_ml1e(struct domain *d, l1_pgentry_t l1e)
 }
 
 
+/* Set mmio addresses in the p2m table (for pass-through) */
+int set_mmio_p2m_entry(struct domain *d, unsigned long gfn, mfn_t mfn);
+int clear_mmio_p2m_entry(struct domain *d, unsigned long gfn);
 
 /* Init the datastructures for later use by the p2m code */
 void p2m_init(struct domain *d);