Sophie

Sophie

distrib > Scientific%20Linux > 5x > x86_64 > by-pkgid > 89877e42827f16fa5f86b1df0c2860b1 > files > 2885

kernel-2.6.18-128.1.10.el5.src.rpm

From: Bhavana Nagendra <bnagendr@redhat.com>
Date: Mon, 21 Jan 2008 16:33:00 -0500
Subject: [xen] x86-pae: support >4GB memory
Message-id: 47950F8C.5010408@redhat.com
O-Subject: Re: [RHEL5.2 PATCH] 32-bit PAE HV hardware limitation > 4GB memory
Bugzilla: 316371

Resolves BZ 316371

In 32bit PAE hypervisor, nested paging can only translate 32bit guest virtual
address. If guest is PAE with >4GB memory, its page table entry (virtual
address) will index >4GB space. That will crash the guests because of the
wrong translation.   This patch fails attempts to add pages to guest pseudophys
memory map above 4GB when running with AMD NPT on PAE host.

Upstream changeset: 16279

Please review and provide ACKs.

Stephen C. Tweedie wrote:
> Also, did we get anywhere with a printk to warn users when this
> truncating of address space is going on?
>
Keir checked in the printk with an additional condition so that the
warning only prints once.

Bhavana

Acked-by: Bill Burns <bburns@redhat.com>
Acked-by: "Stephen C. Tweedie" <sct@redhat.com>

diff --git a/arch/x86/hvm/hvm.c b/arch/x86/hvm/hvm.c
index 66fc9a5..00c1c86 100644
--- a/arch/x86/hvm/hvm.c
+++ b/arch/x86/hvm/hvm.c
@@ -52,8 +52,9 @@
 /*
  * Xen command-line option to allow/disallow hardware-assisted paging.
  * Since the phys-to-machine table of AMD NPT is in host format, 32-bit Xen
- * could only support guests using NPT with up to a 4GB memory map. Therefore
- * we only allow HAP by default on 64-bit Xen.
+ * can only support guests using NPT with up to a 4GB memory map. Therefore
+ * we disallow HAP by default on PAE Xen (by default we want to support an
+ * 8GB pseudophysical memory map for HVM guests on a PAE host).
  */
 static int opt_hap_permitted = (CONFIG_PAGING_LEVELS != 3);
 boolean_param("hap", opt_hap_permitted);
diff --git a/arch/x86/mm/p2m.c b/arch/x86/mm/p2m.c
index 9efb571..56466c0 100644
--- a/arch/x86/mm/p2m.c
+++ b/arch/x86/mm/p2m.c
@@ -193,15 +193,17 @@ set_p2m_entry(struct domain *d, unsigned long gfn, mfn_t mfn, u32 l1e_flags)
         goto out;
 #endif
 #if CONFIG_PAGING_LEVELS >= 3
-    // When using PAE Xen, we only allow 33 bits of pseudo-physical
-    // address in translated guests (i.e. 8 GBytes).  This restriction
-    // comes from wanting to map the P2M table into the 16MB RO_MPT hole
-    // in Xen's address space for translated PV guests.
-    //
+    /*
+     * When using PAE Xen, we only allow 33 bits of pseudo-physical
+     * address in translated guests (i.e. 8 GBytes).  This restriction
+     * comes from wanting to map the P2M table into the 16MB RO_MPT hole
+     * in Xen's address space for translated PV guests.
+     * When using AMD's NPT on PAE Xen, we are restricted to 4GB.
+     */
     if ( !p2m_next_level(d, &table_mfn, &table, &gfn_remainder, gfn,
                          L3_PAGETABLE_SHIFT - PAGE_SHIFT,
-                         (CONFIG_PAGING_LEVELS == 3
-                          ? 8
+                         ((CONFIG_PAGING_LEVELS == 3)
+                          ? (hvm_funcs.hap_supported ? 4 : 8)
                           : L3_PAGETABLE_ENTRIES),
                          PGT_l2_page_table) )
         goto out;
@@ -648,15 +650,32 @@ guest_physmap_remove_page(struct domain *d, unsigned long gfn,
     p2m_unlock(d);    
 }
 
-void
+int
 guest_physmap_add_page(struct domain *d, unsigned long gfn,
                        unsigned long mfn)
 {
     unsigned long ogfn;
     mfn_t omfn;
+    int rc = 0;
 
     if ( !paging_mode_translate(d) )
-        return;
+        return -EINVAL;
+
+#if CONFIG_PAGING_LEVELS == 3
+    /*
+     * 32bit PAE nested paging does not support over 4GB guest due to 
+     * hardware translation limit. This limitation is checked by comparing
+     * gfn with 0xfffffUL.
+     */
+    if ( paging_mode_hap(d) && (gfn > 0xfffffUL) )
+    {
+        if ( !test_and_set_bool(d->arch.hvm_domain.amd_npt_4gb_warning) )
+            dprintk(XENLOG_WARNING, "Dom%d failed to populate memory beyond"
+                    " 4GB: remove 'hap' Xen boot parameter.\n",
+                    d->domain_id);
+        return -EINVAL;
+    }
+#endif
 
     p2m_lock(d);
     audit_p2m(d);
@@ -666,7 +685,9 @@ guest_physmap_add_page(struct domain *d, unsigned long gfn,
     omfn = gfn_to_mfn(d, gfn);
     if ( mfn_valid(omfn) )
     {
-        set_p2m_entry(d, gfn, _mfn(INVALID_MFN), __PAGE_HYPERVISOR|_PAGE_USER);
+        if ( !set_p2m_entry(d, gfn, _mfn(INVALID_MFN), __PAGE_HYPERVISOR|_PAGE_USER) )
+            rc = -EINVAL;
+
         set_gpfn_from_mfn(mfn_x(omfn), INVALID_M2P_ENTRY);
     }
 
@@ -692,11 +713,15 @@ guest_physmap_add_page(struct domain *d, unsigned long gfn,
         }
     }
 
-    set_p2m_entry(d, gfn, _mfn(mfn), __PAGE_HYPERVISOR|_PAGE_USER);
+    if ( !set_p2m_entry(d, gfn, _mfn(mfn), __PAGE_HYPERVISOR|_PAGE_USER) )
+            rc = -EINVAL;
+
     set_gpfn_from_mfn(mfn, gfn);
 
     audit_p2m(d);
     p2m_unlock(d);
+
+    return rc;
 }
 
 /* This function goes through P2M table and modify l1e flags of all pages. Note
diff --git a/common/memory.c b/common/memory.c
index 1c2f2a8..24f5848 100644
--- a/common/memory.c
+++ b/common/memory.c
@@ -130,7 +130,8 @@ static void populate_physmap(struct memop_args *a)
         if ( unlikely(paging_mode_translate(d)) )
         {
             for ( j = 0; j < (1 << a->extent_order); j++ )
-                guest_physmap_add_page(d, gpfn + j, mfn + j);
+                if ( guest_physmap_add_page(d, gpfn + j, mfn + j) )
+                    goto out;
         }
         else
         {
@@ -427,8 +428,9 @@ static long memory_exchange(XEN_GUEST_HANDLE(xen_memory_exchange_t) arg)
             mfn = page_to_mfn(page);
             if ( unlikely(paging_mode_translate(d)) )
             {
+                /* Ignore failure here. There's nothing we can do. */
                 for ( k = 0; k < (1UL << exch.out.extent_order); k++ )
-                    guest_physmap_add_page(d, gpfn + k, mfn + k);
+                    (void)guest_physmap_add_page(d, gpfn + k, mfn + k);
             }
             else
             {
diff --git a/include/asm-x86/hvm/domain.h b/include/asm-x86/hvm/domain.h
index 191deac..3c8d54e 100644
--- a/include/asm-x86/hvm/domain.h
+++ b/include/asm-x86/hvm/domain.h
@@ -60,6 +60,10 @@ struct hvm_domain {
     spinlock_t             pbuf_lock;
 
     uint64_t               params[HVM_NR_PARAMS];
+
+#if CONFIG_PAGING_LEVELS == 3
+    bool_t                 amd_npt_4gb_warning;
+#endif
 };
 
 #endif /* __ASM_X86_HVM_DOMAIN_H__ */
diff --git a/include/asm-x86/p2m.h b/include/asm-x86/p2m.h
index 1863da5..5ab11e2 100644
--- a/include/asm-x86/p2m.h
+++ b/include/asm-x86/p2m.h
@@ -122,7 +122,7 @@ int p2m_alloc_table(struct domain *d,
 void p2m_teardown(struct domain *d);
 
 /* Add a page to a domain's p2m table */
-void guest_physmap_add_page(struct domain *d, unsigned long gfn,
+int guest_physmap_add_page(struct domain *d, unsigned long gfn,
                             unsigned long mfn);
 
 /* Remove a page from a domain's p2m table */