From: Tetsu Yamamoto <tyamamot@redhat.com> Date: Mon, 21 Jan 2008 20:48:15 -0500 Subject: [xen] ia64: create 100GB mem guest fixes Message-id: 20080121165202.15518.30769.sendpatchset@dhcp83-164.boston.redhat.com O-Subject: [RHEL5.2 PATCH 1/4][Take2] Cannot create guest having 100GB memory on Xen-ia64 Bugzilla: 251353 Acked-by: Bill Burns <bburns@redhat.com> Acked-by: "Stephen C. Tweedie" <sct@redhat.com> diff --git a/arch/ia64/xen/domain.c b/arch/ia64/xen/domain.c index 57f528a..5a966df 100644 --- a/arch/ia64/xen/domain.c +++ b/arch/ia64/xen/domain.c @@ -562,6 +562,7 @@ int arch_domain_create(struct domain *d) goto fail_nomem; memset(&d->arch.mm, 0, sizeof(d->arch.mm)); + d->arch.mm_teardown_offset = 0; if ((d->arch.mm.pgd = pgd_alloc(&d->arch.mm)) == NULL) goto fail_nomem; @@ -765,14 +766,17 @@ static void relinquish_memory(struct domain *d, struct list_head *list) spin_unlock_recursive(&d->page_alloc_lock); } -void domain_relinquish_resources(struct domain *d) +int domain_relinquish_resources(struct domain *d) { + int ret; /* Relinquish guest resources for VT-i domain. */ if (d->arch.is_vti) vmx_relinquish_guest_resources(d); /* Tear down shadow mode stuff. */ - mm_teardown(d); + ret = mm_teardown(d); + if (ret != 0) + return ret; /* Relinquish every page of memory. */ relinquish_memory(d, &d->xenpage_list); @@ -783,6 +787,8 @@ void domain_relinquish_resources(struct domain *d) /* Free page used by xen oprofile buffer */ free_xenoprof_pages(d); + + return 0; } unsigned long diff --git a/arch/ia64/xen/mm.c b/arch/ia64/xen/mm.c index 58e9602..46570b4 100644 --- a/arch/ia64/xen/mm.c +++ b/arch/ia64/xen/mm.c @@ -172,6 +172,7 @@ #include <asm/vhpt.h> #include <asm/vcpu.h> #include <asm/shadow.h> +#include <asm/event.h> #include <asm/p2m_entry.h> #include <asm/tlb_track.h> #include <linux/efi.h> @@ -209,6 +210,18 @@ alloc_dom_xen_and_dom_io(void) BUG_ON(dom_io == NULL); } +static int +mm_teardown_can_skip(struct domain* d, unsigned long offset) +{ + return d->arch.mm_teardown_offset > offset; +} + +static void +mm_teardown_update_offset(struct domain* d, unsigned long offset) +{ + d->arch.mm_teardown_offset = offset; +} + static void mm_teardown_pte(struct domain* d, volatile pte_t* pte, unsigned long offset) { @@ -249,46 +262,73 @@ mm_teardown_pte(struct domain* d, volatile pte_t* pte, unsigned long offset) } } -static void +static int mm_teardown_pmd(struct domain* d, volatile pmd_t* pmd, unsigned long offset) { unsigned long i; volatile pte_t* pte = pte_offset_map(pmd, offset); for (i = 0; i < PTRS_PER_PTE; i++, pte++) { - if (!pte_present(*pte)) // acquire semantics + unsigned long cur_offset = offset + (i << PAGE_SHIFT); + if (mm_teardown_can_skip(d, cur_offset + PAGE_SIZE)) continue; - mm_teardown_pte(d, pte, offset + (i << PAGE_SHIFT)); + if (!pte_present(*pte)) { // acquire semantics + mm_teardown_update_offset(d, cur_offset); + continue; + } + mm_teardown_update_offset(d, cur_offset); + mm_teardown_pte(d, pte, cur_offset); + if (hypercall_preempt_check()) + return -EAGAIN; } + return 0; } -static void +static int mm_teardown_pud(struct domain* d, volatile pud_t *pud, unsigned long offset) { unsigned long i; volatile pmd_t *pmd = pmd_offset(pud, offset); for (i = 0; i < PTRS_PER_PMD; i++, pmd++) { - if (!pmd_present(*pmd)) // acquire semantics + unsigned long cur_offset = offset + (i << PMD_SHIFT); + if (mm_teardown_can_skip(d, cur_offset + PMD_SIZE)) + continue; + if (!pmd_present(*pmd)) { // acquire semantics + mm_teardown_update_offset(d, cur_offset); continue; - mm_teardown_pmd(d, pmd, offset + (i << PMD_SHIFT)); + } + if (mm_teardown_pmd(d, pmd, cur_offset)) + return -EAGAIN; } + return 0; } -static void +static int mm_teardown_pgd(struct domain* d, volatile pgd_t *pgd, unsigned long offset) { unsigned long i; volatile pud_t *pud = pud_offset(pgd, offset); for (i = 0; i < PTRS_PER_PUD; i++, pud++) { - if (!pud_present(*pud)) // acquire semantics + unsigned long cur_offset = offset + (i << PUD_SHIFT); +#ifndef __PAGETABLE_PUD_FOLDED + if (mm_teardown_can_skip(d, cur_offset + PUD_SIZE)) + continue; +#endif + if (!pud_present(*pud)) { // acquire semantics +#ifndef __PAGETABLE_PUD_FOLDED + mm_teardown_update_offset(d, cur_offset); +#endif continue; - mm_teardown_pud(d, pud, offset + (i << PUD_SHIFT)); + } + if (mm_teardown_pud(d, pud, cur_offset)) + return -EAGAIN; } + return 0; } -void +int mm_teardown(struct domain* d) { struct mm_struct* mm = &d->arch.mm; @@ -296,14 +336,22 @@ mm_teardown(struct domain* d) volatile pgd_t* pgd; if (mm->pgd == NULL) - return; + return 0; pgd = pgd_offset(mm, 0); for (i = 0; i < PTRS_PER_PGD; i++, pgd++) { - if (!pgd_present(*pgd)) // acquire semantics + unsigned long cur_offset = i << PGDIR_SHIFT; + + if (mm_teardown_can_skip(d, cur_offset + PGDIR_SIZE)) continue; - mm_teardown_pgd(d, pgd, i << PGDIR_SHIFT); + if (!pgd_present(*pgd)) { // acquire semantics + mm_teardown_update_offset(d, cur_offset); + continue; + } + if (mm_teardown_pgd(d, pgd, cur_offset)) + return -EAGAIN; } + return 0; } static void diff --git a/arch/powerpc/domain.c b/arch/powerpc/domain.c index 3ac8801..e3b185d 100644 --- a/arch/powerpc/domain.c +++ b/arch/powerpc/domain.c @@ -313,13 +313,13 @@ static void relinquish_memory(struct domain *d, struct list_head *list) spin_unlock_recursive(&d->page_alloc_lock); } -void domain_relinquish_resources(struct domain *d) +int domain_relinquish_resources(struct domain *d) { relinquish_memory(d, &d->xenpage_list); relinquish_memory(d, &d->page_list); xfree(d->arch.foreign_mfns); xfree(d->arch.p2m); - return; + return 0; } void arch_dump_domain_info(struct domain *d) diff --git a/arch/x86/domain.c b/arch/x86/domain.c index 46ea2a0..77a6583 100644 --- a/arch/x86/domain.c +++ b/arch/x86/domain.c @@ -1703,7 +1703,7 @@ static void vcpu_destroy_pagetables(struct vcpu *v) v->arch.cr3 = 0; } -void domain_relinquish_resources(struct domain *d) +int domain_relinquish_resources(struct domain *d) { struct vcpu *v; @@ -1740,6 +1740,8 @@ void domain_relinquish_resources(struct domain *d) if ( is_hvm_domain(d) ) hvm_domain_relinquish_resources(d); + + return 0; } void arch_dump_domain_info(struct domain *d) diff --git a/common/domain.c b/common/domain.c index 1a8937f..fa7a2e3 100644 --- a/common/domain.c +++ b/common/domain.c @@ -238,7 +238,7 @@ struct domain *domain_create( return d; fail: - d->is_dying = 1; + d->is_dying = DOMDYING_dead; atomic_set(&d->refcnt, DOMAIN_DESTROYED); if ( init_status & INIT_arch ) arch_domain_destroy(d); @@ -298,26 +298,37 @@ struct domain *rcu_lock_domain_by_id(domid_t dom) } -void domain_kill(struct domain *d) +int domain_kill(struct domain *d) { - domain_pause(d); + int rc = 0; + + if ( d == current->domain ) + return -EINVAL; - /* Already dying? Then bail. */ - if ( test_and_set_bool(d->is_dying) ) + /* Protected by domctl_lock. */ + switch ( d->is_dying ) { - domain_unpause(d); - return; + case DOMDYING_alive: + domain_pause(d); + d->is_dying = DOMDYING_dying; + evtchn_destroy(d); + gnttab_release_mappings(d); + case DOMDYING_dying: + rc = domain_relinquish_resources(d); + page_scrub_kick(); + if ( rc != 0 ) + { + BUG_ON(rc != -EAGAIN); + break; + } + d->is_dying = DOMDYING_dead; + put_domain(d); + send_guest_global_virq(dom0, VIRQ_DOM_EXC); + case DOMDYING_dead: + break; } - evtchn_destroy(d); - gnttab_release_mappings(d); - domain_relinquish_resources(d); - put_domain(d); - - /* Kick page scrubbing after domain_relinquish_resources(). */ - page_scrub_kick(); - - send_guest_global_virq(dom0, VIRQ_DOM_EXC); + return rc; } diff --git a/common/domctl.c b/common/domctl.c index dfd54dd..2d3270f 100644 --- a/common/domctl.c +++ b/common/domctl.c @@ -120,10 +120,10 @@ void getdomaininfo(struct domain *d, struct xen_domctl_getdomaininfo *info) info->cpu_time = cpu_time; info->flags = flags | - (d->is_dying ? XEN_DOMINF_dying : 0) | - (d->is_shut_down ? XEN_DOMINF_shutdown : 0) | - (d->is_paused_by_controller ? XEN_DOMINF_paused : 0) | - (d->debugger_attached ? XEN_DOMINF_debugged : 0) | + ((d->is_dying == DOMDYING_dead) ? XEN_DOMINF_dying : 0) | + (d->is_shut_down ? XEN_DOMINF_shutdown : 0) | + (d->is_paused_by_controller ? XEN_DOMINF_paused : 0) | + (d->debugger_attached ? XEN_DOMINF_debugged : 0) | d->shutdown_code << XEN_DOMINF_shutdownshift; if ( is_hvm_domain(d) ) @@ -403,10 +403,7 @@ long do_domctl(XEN_GUEST_HANDLE(xen_domctl_t) u_domctl) { ret = -EINVAL; if ( d != current->domain ) - { - domain_kill(d); - ret = 0; - } + ret = domain_kill(d); rcu_unlock_domain(d); } } diff --git a/include/asm-ia64/domain.h b/include/asm-ia64/domain.h index ba414d8..89c3f2d 100644 --- a/include/asm-ia64/domain.h +++ b/include/asm-ia64/domain.h @@ -18,7 +18,6 @@ struct p2m_entry; struct tlb_track; #endif -extern void domain_relinquish_resources(struct domain *); struct vcpu; extern void relinquish_vcpu_resources(struct vcpu *v); extern void vcpu_share_privregs_with_guest(struct vcpu *v); @@ -132,6 +131,9 @@ struct arch_domain { #ifdef CONFIG_XEN_IA64_TLB_TRACK struct tlb_track* tlb_track; #endif + + /* for domctl_destroy_domain continuation */ + unsigned long mm_teardown_offset; }; #define INT_ENABLE_OFFSET(v) \ (sizeof(vcpu_info_t) * (v)->vcpu_id + \ diff --git a/include/asm-ia64/mm.h b/include/asm-ia64/mm.h index 3260746..d5d4148 100644 --- a/include/asm-ia64/mm.h +++ b/include/asm-ia64/mm.h @@ -417,7 +417,7 @@ extern unsigned long totalram_pages; extern int nr_swap_pages; extern void alloc_dom_xen_and_dom_io(void); -extern void mm_teardown(struct domain* d); +extern int mm_teardown(struct domain* d); extern void mm_final_teardown(struct domain* d); extern struct page_info * assign_new_domain_page(struct domain *d, unsigned long mpaddr); extern void assign_new_domain0_page(struct domain *d, unsigned long mpaddr); diff --git a/include/xen/domain.h b/include/xen/domain.h index dd19c96..aa8c72f 100644 --- a/include/xen/domain.h +++ b/include/xen/domain.h @@ -45,7 +45,7 @@ void arch_domain_destroy(struct domain *d); int arch_set_info_guest(struct vcpu *, vcpu_guest_context_u); void arch_get_info_guest(struct vcpu *, vcpu_guest_context_u); -void domain_relinquish_resources(struct domain *d); +int domain_relinquish_resources(struct domain *d); void dump_pageframe_info(struct domain *d); diff --git a/include/xen/sched.h b/include/xen/sched.h index 1840c13..3d9f190 100644 --- a/include/xen/sched.h +++ b/include/xen/sched.h @@ -191,7 +191,7 @@ struct domain /* Are any VCPUs polling event channels (SCHEDOP_poll)? */ bool_t is_polling; /* Is this guest dying (i.e., a zombie)? */ - bool_t is_dying; + enum { DOMDYING_alive, DOMDYING_dying, DOMDYING_dead } is_dying; /* Domain is paused by controller software? */ bool_t is_paused_by_controller; @@ -335,7 +335,7 @@ static inline struct domain *rcu_lock_current_domain(void) struct domain *get_domain_by_id(domid_t dom); void domain_destroy(struct domain *d); -void domain_kill(struct domain *d); +int domain_kill(struct domain *d); void domain_shutdown(struct domain *d, u8 reason); void domain_resume(struct domain *d); void domain_pause_for_debugger(void);