Sophie

Sophie

distrib > Scientific%20Linux > 5x > x86_64 > by-pkgid > 89877e42827f16fa5f86b1df0c2860b1 > files > 2861

kernel-2.6.18-128.1.10.el5.src.rpm

From: Rik van Riel <riel@redhat.com>
Date: Fri, 15 Feb 2008 15:46:19 -0500
Subject: [xen] mprotect performance improvements
Message-id: 20080215154619.73a5dfe0@bree.surriel.com
O-Subject: [RHEL 5.2 PATCH 1/2] mprotect performance improvements
Bugzilla: 412731

This patch introduces a batched page table update mechanism in the hypervisor
for mprotect.  This brings Xen mprotect performance to an acceptable level
for SAP.

This changeset is in the upstream Xen codebase and got tested for a week at
the SAP virt workshop by ourselves and other participants.

Fixes bug 412731

Acked-by: Eduardo Habkost <ehabkost@redhat.com>

diff --git a/arch/x86/mm.c b/arch/x86/mm.c
index e3887cc..f98ac50 100644
--- a/arch/x86/mm.c
+++ b/arch/x86/mm.c
@@ -1295,16 +1295,24 @@ static inline int update_intpte(intpte_t *p,
                                 intpte_t old, 
                                 intpte_t new,
                                 unsigned long mfn,
-                                struct vcpu *v)
+                                struct vcpu *v,
+                                int preserve_ad)
 {
     int rv = 1;
 #ifndef PTE_UPDATE_WITH_CMPXCHG
-    rv = paging_write_guest_entry(v, p, new, _mfn(mfn));
-#else
+    if ( !preserve_ad )
+    {
+        rv = paging_write_guest_entry(v, p, new, _mfn(mfn));
+    }
+    else
+#endif
     {
         intpte_t t = old;
         for ( ; ; )
         {
+            if ( preserve_ad )
+                new |= old & (_PAGE_ACCESSED | _PAGE_DIRTY);
+
             rv = paging_cmpxchg_guest_entry(v, p, &t, new, _mfn(mfn));
             if ( unlikely(rv == 0) )
             {
@@ -1322,20 +1330,19 @@ static inline int update_intpte(intpte_t *p,
             old = t;
         }
     }
-#endif
     return rv;
 }
 
 /* Macro that wraps the appropriate type-changes around update_intpte().
  * Arguments are: type, ptr, old, new, mfn, vcpu */
-#define UPDATE_ENTRY(_t,_p,_o,_n,_m,_v)                             \
+#define UPDATE_ENTRY(_t,_p,_o,_n,_m,_v,_ad)                         \
     update_intpte(&_t ## e_get_intpte(*(_p)),                       \
                   _t ## e_get_intpte(_o), _t ## e_get_intpte(_n),   \
-                  (_m), (_v))
+                  (_m), (_v), (_ad))
 
 /* Update the L1 entry at pl1e to new value nl1e. */
 static int mod_l1_entry(l1_pgentry_t *pl1e, l1_pgentry_t nl1e, 
-                        unsigned long gl1mfn)
+                        unsigned long gl1mfn, int preserve_ad)
 {
     l1_pgentry_t ol1e;
     struct domain *d = current->domain;
@@ -1345,7 +1352,7 @@ static int mod_l1_entry(l1_pgentry_t *pl1e, l1_pgentry_t nl1e,
         return 0;
 
     if ( unlikely(paging_mode_refcounts(d)) )
-        return UPDATE_ENTRY(l1, pl1e, ol1e, nl1e, gl1mfn, current);
+        return UPDATE_ENTRY(l1, pl1e, ol1e, nl1e, gl1mfn, current, preserve_ad);
 
     if ( l1e_get_flags(nl1e) & _PAGE_PRESENT )
     {
@@ -1367,12 +1374,14 @@ static int mod_l1_entry(l1_pgentry_t *pl1e, l1_pgentry_t nl1e,
 
         /* Fast path for identical mapping, r/w and presence. */
         if ( !l1e_has_changed(ol1e, nl1e, _PAGE_RW | _PAGE_PRESENT) )
-            return UPDATE_ENTRY(l1, pl1e, ol1e, nl1e, gl1mfn, current);
+            return UPDATE_ENTRY(l1, pl1e, ol1e, nl1e, gl1mfn, current,
+                                preserve_ad);
 
         if ( unlikely(!get_page_from_l1e(nl1e, FOREIGNDOM)) )
             return 0;
         
-        if ( unlikely(!UPDATE_ENTRY(l1, pl1e, ol1e, nl1e, gl1mfn, current)) )
+        if ( unlikely(!UPDATE_ENTRY(l1, pl1e, ol1e, nl1e, gl1mfn, current,
+                                    preserve_ad)) )
         {
             put_page_from_l1e(nl1e, d);
             return 0;
@@ -1380,7 +1389,8 @@ static int mod_l1_entry(l1_pgentry_t *pl1e, l1_pgentry_t nl1e,
     }
     else
     {
-        if ( unlikely(!UPDATE_ENTRY(l1, pl1e, ol1e, nl1e, gl1mfn, current)) )
+        if ( unlikely(!UPDATE_ENTRY(l1, pl1e, ol1e, nl1e, gl1mfn, current,
+                                    preserve_ad)) )
             return 0;
     }
 
@@ -1393,7 +1403,8 @@ static int mod_l1_entry(l1_pgentry_t *pl1e, l1_pgentry_t nl1e,
 static int mod_l2_entry(l2_pgentry_t *pl2e, 
                         l2_pgentry_t nl2e, 
                         unsigned long pfn,
-                        unsigned long type)
+                        unsigned long type,
+                        int preserve_ad)
 {
     l2_pgentry_t ol2e;
     struct domain *d = current->domain;
@@ -1420,18 +1431,21 @@ static int mod_l2_entry(l2_pgentry_t *pl2e,
 
         /* Fast path for identical mapping and presence. */
         if ( !l2e_has_changed(ol2e, nl2e, _PAGE_PRESENT))
-            return UPDATE_ENTRY(l2, pl2e, ol2e, nl2e, pfn, current);
+            return UPDATE_ENTRY(l2, pl2e, ol2e, nl2e, pfn, current,
+                                preserve_ad);
 
         if ( unlikely(!get_page_from_l2e(nl2e, pfn, d)) )
             return 0;
 
-        if ( unlikely(!UPDATE_ENTRY(l2, pl2e, ol2e, nl2e, pfn, current)) )
+        if ( unlikely(!UPDATE_ENTRY(l2, pl2e, ol2e, nl2e, pfn, current,
+                                    preserve_ad)) )
         {
             put_page_from_l2e(nl2e, pfn);
             return 0;
         }
     }
-    else if ( unlikely(!UPDATE_ENTRY(l2, pl2e, ol2e, nl2e, pfn, current)) )
+    else if ( unlikely(!UPDATE_ENTRY(l2, pl2e, ol2e, nl2e, pfn, current,
+                                     preserve_ad)) )
     {
         return 0;
     }
@@ -1445,7 +1459,8 @@ static int mod_l2_entry(l2_pgentry_t *pl2e,
 /* Update the L3 entry at pl3e to new value nl3e. pl3e is within frame pfn. */
 static int mod_l3_entry(l3_pgentry_t *pl3e, 
                         l3_pgentry_t nl3e, 
-                        unsigned long pfn)
+                        unsigned long pfn,
+                        int preserve_ad)
 {
     l3_pgentry_t ol3e;
     struct domain *d = current->domain;
@@ -1482,18 +1497,21 @@ static int mod_l3_entry(l3_pgentry_t *pl3e,
 
         /* Fast path for identical mapping and presence. */
         if (!l3e_has_changed(ol3e, nl3e, _PAGE_PRESENT))
-            return UPDATE_ENTRY(l3, pl3e, ol3e, nl3e, pfn, current);
+            return UPDATE_ENTRY(l3, pl3e, ol3e, nl3e, pfn, current,
+                                preserve_ad);
 
         if ( unlikely(!get_page_from_l3e(nl3e, pfn, d)) )
             return 0;
 
-        if ( unlikely(!UPDATE_ENTRY(l3, pl3e, ol3e, nl3e, pfn, current)) )
+        if ( unlikely(!UPDATE_ENTRY(l3, pl3e, ol3e, nl3e, pfn, current,
+                                    preserve_ad)) )
         {
             put_page_from_l3e(nl3e, pfn);
             return 0;
         }
     }
-    else if ( unlikely(!UPDATE_ENTRY(l3, pl3e, ol3e, nl3e, pfn, current)) )
+    else if ( unlikely(!UPDATE_ENTRY(l3, pl3e, ol3e, nl3e, pfn, current,
+                                     preserve_ad)) )
     {
         return 0;
     }
@@ -1515,7 +1533,8 @@ static int mod_l3_entry(l3_pgentry_t *pl3e,
 static int mod_l4_entry(struct domain *d,
                         l4_pgentry_t *pl4e, 
                         l4_pgentry_t nl4e, 
-                        unsigned long pfn)
+                        unsigned long pfn,
+                        int preserve_ad)
 {
     l4_pgentry_t ol4e;
 
@@ -1541,18 +1560,21 @@ static int mod_l4_entry(struct domain *d,
 
         /* Fast path for identical mapping and presence. */
         if (!l4e_has_changed(ol4e, nl4e, _PAGE_PRESENT))
-            return UPDATE_ENTRY(l4, pl4e, ol4e, nl4e, pfn, current);
+            return UPDATE_ENTRY(l4, pl4e, ol4e, nl4e, pfn, current,
+                                preserve_ad);
 
         if ( unlikely(!get_page_from_l4e(nl4e, pfn, current->domain)) )
             return 0;
 
-        if ( unlikely(!UPDATE_ENTRY(l4, pl4e, ol4e, nl4e, pfn, current)) )
+        if ( unlikely(!UPDATE_ENTRY(l4, pl4e, ol4e, nl4e, pfn, current,
+                                    preserve_ad)) )
         {
             put_page_from_l4e(nl4e, pfn);
             return 0;
         }
     }
-    else if ( unlikely(!UPDATE_ENTRY(l4, pl4e, ol4e, nl4e, pfn, current)) )
+    else if ( unlikely(!UPDATE_ENTRY(l4, pl4e, ol4e, nl4e, pfn, current,
+                                     preserve_ad)) )
     {
         return 0;
     }
@@ -1826,7 +1848,7 @@ int new_guest_cr3(unsigned long mfn)
                     l4e_from_pfn(
                         mfn,
                         (_PAGE_PRESENT|_PAGE_RW|_PAGE_USER|_PAGE_ACCESSED)),
-                    pagetable_get_pfn(v->arch.guest_table));
+                    pagetable_get_pfn(v->arch.guest_table), 0);
         if ( unlikely(!okay) )
         {
             MEM_LOG("Error while installing new compat baseptr %lx", mfn);
@@ -2336,9 +2358,12 @@ int do_mmu_update(
         {
             /*
              * MMU_NORMAL_PT_UPDATE: Normal update to any level of page table.
+             * MMU_UPDATE_PT_PRESERVE_AD: As above but also preserve (OR)
+             * current A/D bits.
              */
         case MMU_NORMAL_PT_UPDATE:
-
+        case MMU_PT_UPDATE_PRESERVE_AD:
+            req.ptr -= cmd;
             gmfn = req.ptr >> PAGE_SHIFT;
             mfn = gmfn_to_mfn(d, gmfn);
 
@@ -2375,20 +2400,23 @@ int do_mmu_update(
                 case PGT_l1_page_table:
                 {
                     l1_pgentry_t l1e = l1e_from_intpte(req.val);
-                    okay = mod_l1_entry(va, l1e, mfn);
+                    okay = mod_l1_entry(va, l1e, mfn,
+                                        cmd == MMU_PT_UPDATE_PRESERVE_AD);
                 }
                 break;
                 case PGT_l2_page_table:
                 {
                     l2_pgentry_t l2e = l2e_from_intpte(req.val);
-                    okay = mod_l2_entry(va, l2e, mfn, type_info);
+                    okay = mod_l2_entry(va, l2e, mfn, type_info,
+                                        cmd == MMU_PT_UPDATE_PRESERVE_AD);
                 }
                 break;
 #if CONFIG_PAGING_LEVELS >= 3
                 case PGT_l3_page_table:
                 {
                     l3_pgentry_t l3e = l3e_from_intpte(req.val);
-                    okay = mod_l3_entry(va, l3e, mfn);
+                    okay = mod_l3_entry(va, l3e, mfn,
+                                        cmd == MMU_PT_UPDATE_PRESERVE_AD);
                 }
                 break;
 #endif
@@ -2396,7 +2424,8 @@ int do_mmu_update(
                 case PGT_l4_page_table:
                 {
                     l4_pgentry_t l4e = l4e_from_intpte(req.val);
-                    okay = mod_l4_entry(d, va, l4e, mfn);
+                    okay = mod_l4_entry(d, va, l4e, mfn,
+                                        cmd == MMU_PT_UPDATE_PRESERVE_AD);
                 }
                 break;
 #endif
@@ -2522,7 +2551,7 @@ static int create_grant_pte_mapping(
     }
 
     ol1e = *(l1_pgentry_t *)va;
-    if ( !UPDATE_ENTRY(l1, (l1_pgentry_t *)va, ol1e, nl1e, mfn, v) )
+    if ( !UPDATE_ENTRY(l1, (l1_pgentry_t *)va, ol1e, nl1e, mfn, v, 0) )
     {
         put_page_type(page);
         rc = GNTST_general_error;
@@ -2590,9 +2619,11 @@ static int destroy_grant_pte_mapping(
     }
 
     /* Delete pagetable entry. */
-    if ( unlikely(!UPDATE_ENTRY(l1, 
-                      (l1_pgentry_t *)va, ol1e, l1e_empty(), mfn, 
-                      d->vcpu[0] /* Change if we go to per-vcpu shadows. */)) )
+    if ( unlikely(!UPDATE_ENTRY
+                  (l1, 
+                   (l1_pgentry_t *)va, ol1e, l1e_empty(), mfn, 
+                   d->vcpu[0] /* Change if we go to per-vcpu shadows. */,
+                   0)) )
     {
         MEM_LOG("Cannot delete PTE entry at %p", va);
         put_page_type(page);
@@ -2628,7 +2659,7 @@ static int create_grant_va_mapping(
         return GNTST_general_error;
     }
     ol1e = *pl1e;
-    okay = UPDATE_ENTRY(l1, pl1e, ol1e, nl1e, gl1mfn, v);
+    okay = UPDATE_ENTRY(l1, pl1e, ol1e, nl1e, gl1mfn, v, 0);
     guest_unmap_l1e(v, pl1e);
     pl1e = NULL;
 
@@ -2666,7 +2697,7 @@ static int destroy_grant_va_mapping(
     }
 
     /* Delete pagetable entry. */
-    if ( unlikely(!UPDATE_ENTRY(l1, pl1e, ol1e, l1e_empty(), gl1mfn, v)) )
+    if ( unlikely(!UPDATE_ENTRY(l1, pl1e, ol1e, l1e_empty(), gl1mfn, v, 0)) )
     {
         MEM_LOG("Cannot delete PTE entry at %p", (unsigned long *)pl1e);
         rc = GNTST_general_error;
@@ -2768,7 +2799,7 @@ int do_update_va_mapping(unsigned long va, u64 val64,
 
     pl1e = guest_map_l1e(v, va, &gl1mfn);
 
-    if ( unlikely(!pl1e || !mod_l1_entry(pl1e, val, gl1mfn)) )
+    if ( unlikely(!pl1e || !mod_l1_entry(pl1e, val, gl1mfn, 0)) )
         rc = -EINVAL;
 
     if ( pl1e )
@@ -3318,7 +3349,7 @@ static int ptwr_emulated_update(
     else
     {
         ol1e = *pl1e;
-        if ( !UPDATE_ENTRY(l1, pl1e, ol1e, nl1e, mfn, v) )
+        if ( !UPDATE_ENTRY(l1, pl1e, ol1e, nl1e, mfn, v, 0) )
             BUG();
     }
 
diff --git a/common/kernel.c b/common/kernel.c
index 034ff21..2ee6961 100644
--- a/common/kernel.c
+++ b/common/kernel.c
@@ -220,6 +220,10 @@ DO(xen_version)(int cmd, XEN_GUEST_HANDLE(void) arg)
                     (1U << XENFEAT_auto_translated_physmap);
             if ( supervisor_mode_kernel )
                 fi.submap |= 1U << XENFEAT_supervisor_mode_kernel;
+#ifdef CONFIG_X86
+            if ( !is_hvm_vcpu(current) )
+                fi.submap |= 1U << XENFEAT_mmu_pt_update_preserve_ad;
+#endif
             break;
         default:
             return -EINVAL;
diff --git a/include/public/features.h b/include/public/features.h
index d4b373f..05fc5dc 100644
--- a/include/public/features.h
+++ b/include/public/features.h
@@ -56,6 +56,9 @@
  */
 #define XENFEAT_pae_pgdir_above_4gb        4
 
+/* x86: Does this Xen host support the MMU_PT_UPDATE_PRESERVE_AD hypercall? */
+#define XENFEAT_mmu_pt_update_preserve_ad  5
+
 #define XENFEAT_NR_SUBMAPS 1
 
 #endif /* __XEN_PUBLIC_FEATURES_H__ */
diff --git a/include/public/xen.h b/include/public/xen.h
index e686ccb..fb0dc23 100644
--- a/include/public/xen.h
+++ b/include/public/xen.h
@@ -168,9 +168,13 @@
  * ptr[:2]  -- Machine address within the frame whose mapping to modify.
  *             The frame must belong to the FD, if one is specified.
  * val      -- Value to write into the mapping entry.
+ * 
+ * ptr[1:0] == MMU_PT_UPDATE_PRESERVE_AD:
+ * As MMU_NORMAL_PT_UPDATE above, but A/D bits in the PTE are preserved (ORed).
  */
-#define MMU_NORMAL_PT_UPDATE     0 /* checked '*ptr = val'. ptr is MA.       */
-#define MMU_MACHPHYS_UPDATE      1 /* ptr = MA of frame to modify entry for  */
+#define MMU_NORMAL_PT_UPDATE      0 /* checked '*ptr = val'. ptr is MA.      */
+#define MMU_MACHPHYS_UPDATE       1 /* ptr = MA of frame to modify entry for */
+#define MMU_PT_UPDATE_PRESERVE_AD 2 /* '*ptr = val', preserve (OR) A/D bits  */
 
 /*
  * MMU EXTENDED OPERATIONS