Sophie

Sophie

distrib > Scientific%20Linux > 5x > x86_64 > by-pkgid > fc11cd6e1c513a17304da94a5390f3cd > files > 4460

kernel-2.6.18-194.11.1.el5.src.rpm

From: Tetsu Yamamoto <tyamamot@redhat.com>
Date: Mon, 21 Jan 2008 20:48:15 -0500
Subject: [xen] ia64: create 100GB mem guest fixes
Message-id: 20080121165202.15518.30769.sendpatchset@dhcp83-164.boston.redhat.com
O-Subject: [RHEL5.2 PATCH 1/4][Take2] Cannot create guest having 100GB memory on Xen-ia64
Bugzilla: 251353

Acked-by: Bill Burns <bburns@redhat.com>
Acked-by: "Stephen C. Tweedie" <sct@redhat.com>

diff --git a/arch/ia64/xen/domain.c b/arch/ia64/xen/domain.c
index 57f528a..5a966df 100644
--- a/arch/ia64/xen/domain.c
+++ b/arch/ia64/xen/domain.c
@@ -562,6 +562,7 @@ int arch_domain_create(struct domain *d)
 		goto fail_nomem;
 
 	memset(&d->arch.mm, 0, sizeof(d->arch.mm));
+        d->arch.mm_teardown_offset = 0;
 
 	if ((d->arch.mm.pgd = pgd_alloc(&d->arch.mm)) == NULL)
 	    goto fail_nomem;
@@ -765,14 +766,17 @@ static void relinquish_memory(struct domain *d, struct list_head *list)
     spin_unlock_recursive(&d->page_alloc_lock);
 }
 
-void domain_relinquish_resources(struct domain *d)
+int domain_relinquish_resources(struct domain *d)
 {
+    int ret;
     /* Relinquish guest resources for VT-i domain. */
     if (d->arch.is_vti)
 	    vmx_relinquish_guest_resources(d);
 
     /* Tear down shadow mode stuff. */
-    mm_teardown(d);
+    ret = mm_teardown(d);
+    if (ret != 0)
+        return ret;
 
     /* Relinquish every page of memory. */
     relinquish_memory(d, &d->xenpage_list);
@@ -783,6 +787,8 @@ void domain_relinquish_resources(struct domain *d)
 
     /* Free page used by xen oprofile buffer */
     free_xenoprof_pages(d);
+
+    return 0;
 }
 
 unsigned long
diff --git a/arch/ia64/xen/mm.c b/arch/ia64/xen/mm.c
index 58e9602..46570b4 100644
--- a/arch/ia64/xen/mm.c
+++ b/arch/ia64/xen/mm.c
@@ -172,6 +172,7 @@
 #include <asm/vhpt.h>
 #include <asm/vcpu.h>
 #include <asm/shadow.h>
+#include <asm/event.h>
 #include <asm/p2m_entry.h>
 #include <asm/tlb_track.h>
 #include <linux/efi.h>
@@ -209,6 +210,18 @@ alloc_dom_xen_and_dom_io(void)
     BUG_ON(dom_io == NULL);
 }
 
+static int
+mm_teardown_can_skip(struct domain* d, unsigned long offset)
+{
+    return d->arch.mm_teardown_offset > offset;
+}
+
+static void
+mm_teardown_update_offset(struct domain* d, unsigned long offset)
+{
+    d->arch.mm_teardown_offset = offset;
+}
+
 static void
 mm_teardown_pte(struct domain* d, volatile pte_t* pte, unsigned long offset)
 {
@@ -249,46 +262,73 @@ mm_teardown_pte(struct domain* d, volatile pte_t* pte, unsigned long offset)
     }
 }
 
-static void
+static int
 mm_teardown_pmd(struct domain* d, volatile pmd_t* pmd, unsigned long offset)
 {
     unsigned long i;
     volatile pte_t* pte = pte_offset_map(pmd, offset);
 
     for (i = 0; i < PTRS_PER_PTE; i++, pte++) {
-        if (!pte_present(*pte)) // acquire semantics
+        unsigned long cur_offset = offset + (i << PAGE_SHIFT);
+        if (mm_teardown_can_skip(d, cur_offset + PAGE_SIZE))
             continue;
-        mm_teardown_pte(d, pte, offset + (i << PAGE_SHIFT));
+        if (!pte_present(*pte)) { // acquire semantics
+            mm_teardown_update_offset(d, cur_offset);
+            continue;
+        }
+        mm_teardown_update_offset(d, cur_offset);
+        mm_teardown_pte(d, pte, cur_offset);
+        if (hypercall_preempt_check())
+            return -EAGAIN;
     }
+    return 0;
 }
 
-static void
+static int
 mm_teardown_pud(struct domain* d, volatile pud_t *pud, unsigned long offset)
 {
     unsigned long i;
     volatile pmd_t *pmd = pmd_offset(pud, offset);
 
     for (i = 0; i < PTRS_PER_PMD; i++, pmd++) {
-        if (!pmd_present(*pmd)) // acquire semantics
+        unsigned long cur_offset = offset + (i << PMD_SHIFT);
+        if (mm_teardown_can_skip(d, cur_offset + PMD_SIZE))
+            continue;
+        if (!pmd_present(*pmd)) { // acquire semantics
+            mm_teardown_update_offset(d, cur_offset);
             continue;
-        mm_teardown_pmd(d, pmd, offset + (i << PMD_SHIFT));
+        }
+        if (mm_teardown_pmd(d, pmd, cur_offset))
+            return -EAGAIN;
     }
+    return 0;
 }
 
-static void
+static int
 mm_teardown_pgd(struct domain* d, volatile pgd_t *pgd, unsigned long offset)
 {
     unsigned long i;
     volatile pud_t *pud = pud_offset(pgd, offset);
 
     for (i = 0; i < PTRS_PER_PUD; i++, pud++) {
-        if (!pud_present(*pud)) // acquire semantics
+        unsigned long cur_offset = offset + (i << PUD_SHIFT);
+#ifndef __PAGETABLE_PUD_FOLDED
+        if (mm_teardown_can_skip(d, cur_offset + PUD_SIZE))
+            continue;
+#endif
+        if (!pud_present(*pud)) { // acquire semantics
+#ifndef __PAGETABLE_PUD_FOLDED
+            mm_teardown_update_offset(d, cur_offset);
+#endif
             continue;
-        mm_teardown_pud(d, pud, offset + (i << PUD_SHIFT));
+        }
+        if (mm_teardown_pud(d, pud, cur_offset))
+            return -EAGAIN;
     }
+    return 0;
 }
 
-void
+int
 mm_teardown(struct domain* d)
 {
     struct mm_struct* mm = &d->arch.mm;
@@ -296,14 +336,22 @@ mm_teardown(struct domain* d)
     volatile pgd_t* pgd;
 
     if (mm->pgd == NULL)
-        return;
+        return 0;
 
     pgd = pgd_offset(mm, 0);
     for (i = 0; i < PTRS_PER_PGD; i++, pgd++) {
-        if (!pgd_present(*pgd)) // acquire semantics
+        unsigned long cur_offset = i << PGDIR_SHIFT;
+
+        if (mm_teardown_can_skip(d, cur_offset + PGDIR_SIZE))
             continue;
-        mm_teardown_pgd(d, pgd, i << PGDIR_SHIFT);
+        if (!pgd_present(*pgd)) { // acquire semantics
+            mm_teardown_update_offset(d, cur_offset);
+            continue;
+        }
+        if (mm_teardown_pgd(d, pgd, cur_offset))
+            return -EAGAIN;
     }
+    return 0;
 }
 
 static void
diff --git a/arch/powerpc/domain.c b/arch/powerpc/domain.c
index 3ac8801..e3b185d 100644
--- a/arch/powerpc/domain.c
+++ b/arch/powerpc/domain.c
@@ -313,13 +313,13 @@ static void relinquish_memory(struct domain *d, struct list_head *list)
     spin_unlock_recursive(&d->page_alloc_lock);
 }
 
-void domain_relinquish_resources(struct domain *d)
+int domain_relinquish_resources(struct domain *d)
 {
     relinquish_memory(d, &d->xenpage_list);
     relinquish_memory(d, &d->page_list);
     xfree(d->arch.foreign_mfns);
     xfree(d->arch.p2m);
-    return;
+    return 0;
 }
 
 void arch_dump_domain_info(struct domain *d)
diff --git a/arch/x86/domain.c b/arch/x86/domain.c
index 46ea2a0..77a6583 100644
--- a/arch/x86/domain.c
+++ b/arch/x86/domain.c
@@ -1703,7 +1703,7 @@ static void vcpu_destroy_pagetables(struct vcpu *v)
     v->arch.cr3 = 0;
 }
 
-void domain_relinquish_resources(struct domain *d)
+int domain_relinquish_resources(struct domain *d)
 {
     struct vcpu *v;
 
@@ -1740,6 +1740,8 @@ void domain_relinquish_resources(struct domain *d)
 
     if ( is_hvm_domain(d) )
         hvm_domain_relinquish_resources(d);
+
+    return 0;
 }
 
 void arch_dump_domain_info(struct domain *d)
diff --git a/common/domain.c b/common/domain.c
index 1a8937f..fa7a2e3 100644
--- a/common/domain.c
+++ b/common/domain.c
@@ -238,7 +238,7 @@ struct domain *domain_create(
     return d;
 
  fail:
-    d->is_dying = 1;
+    d->is_dying = DOMDYING_dead;
     atomic_set(&d->refcnt, DOMAIN_DESTROYED);
     if ( init_status & INIT_arch )
         arch_domain_destroy(d);
@@ -298,26 +298,37 @@ struct domain *rcu_lock_domain_by_id(domid_t dom)
 }
 
 
-void domain_kill(struct domain *d)
+int domain_kill(struct domain *d)
 {
-    domain_pause(d);
+    int rc = 0;
+
+    if ( d == current->domain )
+        return -EINVAL;
 
-    /* Already dying? Then bail. */
-    if ( test_and_set_bool(d->is_dying) )
+    /* Protected by domctl_lock. */
+    switch ( d->is_dying )
     {
-        domain_unpause(d);
-        return;
+    case DOMDYING_alive:
+        domain_pause(d);
+        d->is_dying = DOMDYING_dying;
+        evtchn_destroy(d);
+        gnttab_release_mappings(d);
+    case DOMDYING_dying:
+        rc = domain_relinquish_resources(d);
+        page_scrub_kick();
+        if ( rc != 0 )
+        {
+            BUG_ON(rc != -EAGAIN);
+            break;
+        }
+        d->is_dying = DOMDYING_dead;
+        put_domain(d);
+        send_guest_global_virq(dom0, VIRQ_DOM_EXC);
+    case DOMDYING_dead:
+        break;
     }
 
-    evtchn_destroy(d);
-    gnttab_release_mappings(d);
-    domain_relinquish_resources(d);
-    put_domain(d);
-
-    /* Kick page scrubbing after domain_relinquish_resources(). */
-    page_scrub_kick();
-
-    send_guest_global_virq(dom0, VIRQ_DOM_EXC);
+    return rc;
 }
 
 
diff --git a/common/domctl.c b/common/domctl.c
index dfd54dd..2d3270f 100644
--- a/common/domctl.c
+++ b/common/domctl.c
@@ -120,10 +120,10 @@ void getdomaininfo(struct domain *d, struct xen_domctl_getdomaininfo *info)
     info->cpu_time = cpu_time;
 
     info->flags = flags |
-        (d->is_dying                ? XEN_DOMINF_dying    : 0) |
-        (d->is_shut_down            ? XEN_DOMINF_shutdown : 0) |
-        (d->is_paused_by_controller ? XEN_DOMINF_paused   : 0) |
-        (d->debugger_attached       ? XEN_DOMINF_debugged : 0) |
+        ((d->is_dying == DOMDYING_dead) ? XEN_DOMINF_dying    : 0) |
+        (d->is_shut_down                ? XEN_DOMINF_shutdown : 0) |
+        (d->is_paused_by_controller     ? XEN_DOMINF_paused   : 0) |
+        (d->debugger_attached           ? XEN_DOMINF_debugged : 0) |
         d->shutdown_code << XEN_DOMINF_shutdownshift;
 
     if ( is_hvm_domain(d) )
@@ -403,10 +403,7 @@ long do_domctl(XEN_GUEST_HANDLE(xen_domctl_t) u_domctl)
         {
             ret = -EINVAL;
             if ( d != current->domain )
-            {
-                domain_kill(d);
-                ret = 0;
-            }
+                ret = domain_kill(d);
             rcu_unlock_domain(d);
         }
     }
diff --git a/include/asm-ia64/domain.h b/include/asm-ia64/domain.h
index ba414d8..89c3f2d 100644
--- a/include/asm-ia64/domain.h
+++ b/include/asm-ia64/domain.h
@@ -18,7 +18,6 @@ struct p2m_entry;
 struct tlb_track;
 #endif
 
-extern void domain_relinquish_resources(struct domain *);
 struct vcpu;
 extern void relinquish_vcpu_resources(struct vcpu *v);
 extern void vcpu_share_privregs_with_guest(struct vcpu *v);
@@ -132,6 +131,9 @@ struct arch_domain {
 #ifdef CONFIG_XEN_IA64_TLB_TRACK
     struct tlb_track*   tlb_track;
 #endif
+
+    /* for domctl_destroy_domain continuation */
+    unsigned long mm_teardown_offset;
 };
 #define INT_ENABLE_OFFSET(v) 		  \
     (sizeof(vcpu_info_t) * (v)->vcpu_id + \
diff --git a/include/asm-ia64/mm.h b/include/asm-ia64/mm.h
index 3260746..d5d4148 100644
--- a/include/asm-ia64/mm.h
+++ b/include/asm-ia64/mm.h
@@ -417,7 +417,7 @@ extern unsigned long totalram_pages;
 extern int nr_swap_pages;
 
 extern void alloc_dom_xen_and_dom_io(void);
-extern void mm_teardown(struct domain* d);
+extern int mm_teardown(struct domain* d);
 extern void mm_final_teardown(struct domain* d);
 extern struct page_info * assign_new_domain_page(struct domain *d, unsigned long mpaddr);
 extern void assign_new_domain0_page(struct domain *d, unsigned long mpaddr);
diff --git a/include/xen/domain.h b/include/xen/domain.h
index dd19c96..aa8c72f 100644
--- a/include/xen/domain.h
+++ b/include/xen/domain.h
@@ -45,7 +45,7 @@ void arch_domain_destroy(struct domain *d);
 int arch_set_info_guest(struct vcpu *, vcpu_guest_context_u);
 void arch_get_info_guest(struct vcpu *, vcpu_guest_context_u);
 
-void domain_relinquish_resources(struct domain *d);
+int domain_relinquish_resources(struct domain *d);
 
 void dump_pageframe_info(struct domain *d);
 
diff --git a/include/xen/sched.h b/include/xen/sched.h
index 1840c13..3d9f190 100644
--- a/include/xen/sched.h
+++ b/include/xen/sched.h
@@ -191,7 +191,7 @@ struct domain
     /* Are any VCPUs polling event channels (SCHEDOP_poll)? */
     bool_t           is_polling;
     /* Is this guest dying (i.e., a zombie)? */
-    bool_t           is_dying;
+    enum { DOMDYING_alive, DOMDYING_dying, DOMDYING_dead } is_dying;
     /* Domain is paused by controller software? */
     bool_t           is_paused_by_controller;
 
@@ -335,7 +335,7 @@ static inline struct domain *rcu_lock_current_domain(void)
 
 struct domain *get_domain_by_id(domid_t dom);
 void domain_destroy(struct domain *d);
-void domain_kill(struct domain *d);
+int domain_kill(struct domain *d);
 void domain_shutdown(struct domain *d, u8 reason);
 void domain_resume(struct domain *d);
 void domain_pause_for_debugger(void);