From: Tetsu Yamamoto <tyamamot@redhat.com> Date: Mon, 28 Jan 2008 16:10:18 -0500 Subject: [Xen] gnttab: allow more than 3 VNIFs Message-id: 479E44BA.20206@redhat.com O-Subject: [RHEL5.2 PATCH][Xen][Take2] Expand VNIF number per a guest domain over 3 Bugzilla: 297331 This is a revised patch to fix BZ#297331 which is cloned from BZ#223908, to enable a PV guest domain to have more than 3 VNIFs. https://bugzilla.redhat.com/show_bug.cgi?id=297331 https://bugzilla.redhat.com/show_bug.cgi?id=223908 The previous patch was once committed, but reverted in kernel -74 because it caused DomU panic on boot on i386. It was backported from the patch which was posted to the upstream, but actually not committed. It had a bug to set gnttab_free_head at grow_gnttab_list() in gnttab.c. It is fixed in the actually committed patch in the upstream: - Dynamic grant-table sizing. http://xenbits.xensource.com/xen-unstable.hg?rev/70f05d642a2e The attached patch is backported from this committed patch. I've tested this patch with kernel -75 on i386 and ia64 box for both of Dom0 and DomU, and confirmed that DomU can boot with no problem and 4 VNIFs works well. Please review and ACK. Regards, Tetsu Yamamoto # HG changeset patch # User kfraser@localhost.localdomain # Date 1171536852 0 # Node ID 70f05d642a2e1c0a688e17e39e622e930998e60b # Parent 047b3e9f90325eac9a84d840ed27dcb2c8691f5a Dynamic grant-table sizing. Signed-off-by: Christopher CLark <christopher.clark@cl.cam.ac.uk> Signed-off-by: Andrei Petrov <andrei.petrov@xensource.com> Signed-off-by: Keir Fraser <keir@xensource.com> Acked-by: Bill Burns <bburns@redhat.com> diff --git a/drivers/xen/core/gnttab.c b/drivers/xen/core/gnttab.c index 66c9802..fe3228f 100644 --- a/drivers/xen/core/gnttab.c +++ b/drivers/xen/core/gnttab.c @@ -3,7 +3,7 @@ * * Granting foreign access to our memory reservation. * - * Copyright (c) 2005, Christopher Clark + * Copyright (c) 2005-2006, Christopher Clark * Copyright (c) 2004-2005, K A Fraser * * This program is free software; you can redistribute it and/or @@ -34,7 +34,6 @@ #include <linux/module.h> #include <linux/sched.h> #include <linux/mm.h> -#include <linux/vmalloc.h> #include <xen/interface/xen.h> #include <xen/gnttab.h> #include <asm/pgtable.h> @@ -42,40 +41,55 @@ #include <asm/synch_bitops.h> #include <asm/io.h> #include <xen/interface/memory.h> +#include <xen/driver_util.h> /* External tools reserve first few grant table entries. */ #define NR_RESERVED_ENTRIES 8 +#define GNTTAB_LIST_END 0xffffffff +#define GREFS_PER_GRANT_FRAME (PAGE_SIZE / sizeof(grant_entry_t)) -#define NR_GRANT_ENTRIES \ - (NR_GRANT_FRAMES * PAGE_SIZE / sizeof(struct grant_entry)) -#define GNTTAB_LIST_END (NR_GRANT_ENTRIES + 1) - -static grant_ref_t gnttab_list[NR_GRANT_ENTRIES]; +static grant_ref_t **gnttab_list; +static unsigned int nr_grant_frames; +static unsigned int boot_max_nr_grant_frames; static int gnttab_free_count; static grant_ref_t gnttab_free_head; static DEFINE_SPINLOCK(gnttab_list_lock); static struct grant_entry *shared; +#ifndef CONFIG_XEN +static unsigned long resume_frames; +#endif static struct gnttab_free_callback *gnttab_free_callback_list; +static int gnttab_expand(unsigned int req_entries); + +#define RPP (PAGE_SIZE / sizeof(grant_ref_t)) +#define gnttab_entry(entry) (gnttab_list[(entry) / RPP][(entry) % RPP]) + static int get_free_entries(int count) { unsigned long flags; - int ref; + int ref, rc; grant_ref_t head; + spin_lock_irqsave(&gnttab_list_lock, flags); - if (gnttab_free_count < count) { + + if ((gnttab_free_count < count) && + ((rc = gnttab_expand(count - gnttab_free_count)) < 0)) { spin_unlock_irqrestore(&gnttab_list_lock, flags); - return -1; + return rc; } + ref = head = gnttab_free_head; gnttab_free_count -= count; while (count-- > 1) - head = gnttab_list[head]; - gnttab_free_head = gnttab_list[head]; - gnttab_list[head] = GNTTAB_LIST_END; + head = gnttab_entry(head); + gnttab_free_head = gnttab_entry(head); + gnttab_entry(head) = GNTTAB_LIST_END; + spin_unlock_irqrestore(&gnttab_list_lock, flags); + return ref; } @@ -111,7 +125,7 @@ static void put_free_entry(grant_ref_t ref) { unsigned long flags; spin_lock_irqsave(&gnttab_list_lock, flags); - gnttab_list[ref] = gnttab_free_head; + gnttab_entry(ref) = gnttab_free_head; gnttab_free_head = ref; gnttab_free_count++; check_free_callbacks(); @@ -127,7 +141,7 @@ int gnttab_grant_foreign_access(domid_t domid, unsigned long frame, { int ref; - if (unlikely((ref = get_free_entry()) == -1)) + if (unlikely((ref = get_free_entry()) < 0)) return -ENOSPC; shared[ref].frame = frame; @@ -197,7 +211,7 @@ int gnttab_grant_foreign_transfer(domid_t domid, unsigned long pfn) { int ref; - if (unlikely((ref = get_free_entry()) == -1)) + if (unlikely((ref = get_free_entry()) < 0)) return -ENOSPC; gnttab_grant_foreign_transfer_ref(ref, domid, pfn); @@ -268,11 +282,11 @@ void gnttab_free_grant_references(grant_ref_t head) return; spin_lock_irqsave(&gnttab_list_lock, flags); ref = head; - while (gnttab_list[ref] != GNTTAB_LIST_END) { - ref = gnttab_list[ref]; + while (gnttab_entry(ref) != GNTTAB_LIST_END) { + ref = gnttab_entry(ref); count++; } - gnttab_list[ref] = gnttab_free_head; + gnttab_entry(ref) = gnttab_free_head; gnttab_free_head = head; gnttab_free_count += count; check_free_callbacks(); @@ -284,7 +298,7 @@ int gnttab_alloc_grant_references(u16 count, grant_ref_t *head) { int h = get_free_entries(count); - if (h == -1) + if (h < 0) return -ENOSPC; *head = h; @@ -304,7 +318,7 @@ int gnttab_claim_grant_reference(grant_ref_t *private_head) grant_ref_t g = *private_head; if (unlikely(g == GNTTAB_LIST_END)) return -ENOSPC; - *private_head = gnttab_list[g]; + *private_head = gnttab_entry(g); return g; } EXPORT_SYMBOL_GPL(gnttab_claim_grant_reference); @@ -312,7 +326,7 @@ EXPORT_SYMBOL_GPL(gnttab_claim_grant_reference); void gnttab_release_grant_reference(grant_ref_t *private_head, grant_ref_t release) { - gnttab_list[release] = *private_head; + gnttab_entry(release) = *private_head; *private_head = release; } EXPORT_SYMBOL_GPL(gnttab_release_grant_reference); @@ -351,6 +365,64 @@ void gnttab_cancel_free_callback(struct gnttab_free_callback *callback) } EXPORT_SYMBOL_GPL(gnttab_cancel_free_callback); +static int grow_gnttab_list(unsigned int more_frames) +{ + unsigned int new_nr_grant_frames, extra_entries, i; + + new_nr_grant_frames = nr_grant_frames + more_frames; + extra_entries = more_frames * GREFS_PER_GRANT_FRAME; + + for (i = nr_grant_frames; i < new_nr_grant_frames; i++) + { + gnttab_list[i] = (grant_ref_t *)__get_free_page(GFP_ATOMIC); + if (!gnttab_list[i]) + goto grow_nomem; + } + + + for (i = GREFS_PER_GRANT_FRAME * nr_grant_frames; + i < GREFS_PER_GRANT_FRAME * new_nr_grant_frames - 1; i++) + gnttab_entry(i) = i + 1; + + gnttab_entry(i) = gnttab_free_head; + gnttab_free_head = GREFS_PER_GRANT_FRAME * nr_grant_frames; + gnttab_free_count += extra_entries; + + nr_grant_frames = new_nr_grant_frames; + + check_free_callbacks(); + + return 0; + +grow_nomem: + for ( ; i >= nr_grant_frames; i--) + free_page((unsigned long) gnttab_list[i]); + return -ENOMEM; +} + +static unsigned int __max_nr_grant_frames(void) +{ + struct gnttab_query_size query; + int rc; + + query.dom = DOMID_SELF; + + rc = HYPERVISOR_grant_table_op(GNTTABOP_query_size, &query, 1); + if ((rc < 0) || (query.status != GNTST_okay)) + return 4; /* Legacy max supported number of frames */ + + return query.max_nr_frames; +} + +static inline unsigned int max_nr_grant_frames(void) +{ + unsigned int xen_max = __max_nr_grant_frames(); + + if (xen_max > boot_max_nr_grant_frames) + return boot_max_nr_grant_frames; + return xen_max; +} + #ifdef CONFIG_XEN #ifndef __ia64__ @@ -373,49 +445,62 @@ static int unmap_pte_fn(pte_t *pte, struct page *pmd_page, } #endif -int gnttab_resume(void) +static int gnttab_map(unsigned int start_idx, unsigned int end_idx) { struct gnttab_setup_table setup; - unsigned long frames[NR_GRANT_FRAMES]; + unsigned long *frames; + unsigned int nr_gframes = end_idx + 1; int rc; -#ifndef __ia64__ - void *pframes = frames; - struct vm_struct *area; -#endif + + frames = kmalloc(nr_gframes * sizeof(unsigned long), GFP_ATOMIC); + if (!frames) + return -ENOMEM; setup.dom = DOMID_SELF; - setup.nr_frames = NR_GRANT_FRAMES; + setup.nr_frames = nr_gframes; set_xen_guest_handle(setup.frame_list, frames); rc = HYPERVISOR_grant_table_op(GNTTABOP_setup_table, &setup, 1); - if (rc == -ENOSYS) + if (rc == -ENOSYS) { + kfree(frames); return -ENOSYS; + } BUG_ON(rc || setup.status); #ifndef __ia64__ if (shared == NULL) { - area = get_vm_area(PAGE_SIZE * NR_GRANT_FRAMES, VM_IOREMAP); + struct vm_struct *area; + area = alloc_vm_area(PAGE_SIZE * max_nr_grant_frames()); BUG_ON(area == NULL); shared = area->addr; } rc = apply_to_page_range(&init_mm, (unsigned long)shared, - PAGE_SIZE * NR_GRANT_FRAMES, - map_pte_fn, &pframes); + PAGE_SIZE * nr_gframes, + map_pte_fn, &frames); BUG_ON(rc); + frames -= nr_gframes; /* adjust after map_pte_fn() */ #else shared = __va(frames[0] << PAGE_SHIFT); - printk("grant table at %p\n", shared); #endif + kfree(frames); + return 0; } +int gnttab_resume(void) +{ + if (max_nr_grant_frames() < nr_grant_frames) + return -ENOSYS; + return gnttab_map(0, nr_grant_frames - 1); +} + int gnttab_suspend(void) { #ifndef __ia64__ apply_to_page_range(&init_mm, (unsigned long)shared, - PAGE_SIZE * NR_GRANT_FRAMES, + PAGE_SIZE * nr_grant_frames, unmap_pte_fn, NULL); #endif return 0; @@ -425,24 +510,39 @@ int gnttab_suspend(void) #include <platform-pci.h> -int gnttab_resume(void) +static int gnttab_map(unsigned int start_idx, unsigned int end_idx) { - unsigned long frames; struct xen_add_to_physmap xatp; unsigned int i; - frames = alloc_xen_mmio(PAGE_SIZE * NR_GRANT_FRAMES); - - for (i = 0; i < NR_GRANT_FRAMES; i++) { + /* Loop backwards, so that the first hypercall has the largest index, + * ensuring that the table will grow only once. + */ + for (i = end_idx; i >= start_idx; i--) { xatp.domid = DOMID_SELF; xatp.idx = i; xatp.space = XENMAPSPACE_grant_table; - xatp.gpfn = (frames >> PAGE_SHIFT) + i; + xatp.gpfn = (resume_frames >> PAGE_SHIFT) + i; if (HYPERVISOR_memory_op(XENMEM_add_to_physmap, &xatp)) BUG(); } +} + +int gnttab_resume(void) +{ + struct xen_add_to_physmap xatp; + unsigned int i, max_nr_gframes, nr_gframes; + + nr_gframes = nr_grant_frames; + max_nr_gframes = max_nr_grant_frames(); + if (max_nr_gframes < nr_gframes) + return -ENOSYS; + + resume_frames = alloc_xen_mmio(PAGE_SIZE * max_nr_gframes); - shared = ioremap(frames, PAGE_SIZE * NR_GRANT_FRAMES); + gnttab_map(0, nr_gframes - 1); + + shared = ioremap(resume_frames, PAGE_SIZE * max_nr_gframes); if (shared == NULL) { printk("error to ioremap gnttab share frames\n"); return -1; @@ -454,28 +554,79 @@ int gnttab_resume(void) int gnttab_suspend(void) { iounmap(shared); + resume_frames = 0; return 0; } #endif /* !CONFIG_XEN */ +static int gnttab_expand(unsigned int req_entries) +{ + int rc; + unsigned int cur, extra; + + cur = nr_grant_frames; + extra = ((req_entries + (GREFS_PER_GRANT_FRAME-1)) / + GREFS_PER_GRANT_FRAME); + if (cur + extra > max_nr_grant_frames()) + return -ENOSPC; + + if ((rc = gnttab_map(cur, cur + extra - 1)) == 0) + rc = grow_gnttab_list(extra); + + return rc; +} + int __init gnttab_init(void) { int i; + unsigned int max_nr_glist_frames; + unsigned int nr_init_grefs; if (!is_running_on_xen()) return -ENODEV; + nr_grant_frames = 1; + boot_max_nr_grant_frames = __max_nr_grant_frames(); + + /* Determine the maximum number of frames required for the + * grant reference free list on the current hypervisor. + */ + max_nr_glist_frames = (boot_max_nr_grant_frames * + GREFS_PER_GRANT_FRAME / + (PAGE_SIZE / sizeof(grant_ref_t))); + + gnttab_list = kmalloc(max_nr_glist_frames * sizeof(grant_ref_t *), + GFP_KERNEL); + if (gnttab_list == NULL) + return -ENOMEM; + + for (i = 0; i < nr_grant_frames; i++) { + gnttab_list[i] = (grant_ref_t *)__get_free_page(GFP_KERNEL); + if (gnttab_list[i] == NULL) + goto ini_nomem; + } + if (gnttab_resume() < 0) return -ENODEV; - for (i = NR_RESERVED_ENTRIES; i < NR_GRANT_ENTRIES; i++) - gnttab_list[i] = i + 1; - gnttab_free_count = NR_GRANT_ENTRIES - NR_RESERVED_ENTRIES; + nr_init_grefs = nr_grant_frames * GREFS_PER_GRANT_FRAME; + + for (i = NR_RESERVED_ENTRIES; i < nr_init_grefs - 1; i++) + gnttab_entry(i) = i + 1; + + gnttab_entry(nr_init_grefs - 1) = GNTTAB_LIST_END; + gnttab_free_count = nr_init_grefs - NR_RESERVED_ENTRIES; gnttab_free_head = NR_RESERVED_ENTRIES; printk("Grant table initialized\n"); return 0; + + ini_nomem: + for (i--; i >= 0; i--) + free_page((unsigned long)gnttab_list[i]); + kfree(gnttab_list); + return -ENOMEM; } #ifdef CONFIG_XEN diff --git a/include/xen/gnttab.h b/include/xen/gnttab.h index 558aee5..899d55b 100644 --- a/include/xen/gnttab.h +++ b/include/xen/gnttab.h @@ -41,13 +41,6 @@ #include <xen/interface/grant_table.h> #include <xen/features.h> -/* NR_GRANT_FRAMES must be less than or equal to that configured in Xen */ -#ifdef __ia64__ -#define NR_GRANT_FRAMES 1 -#else -#define NR_GRANT_FRAMES 4 -#endif - struct gnttab_free_callback { struct gnttab_free_callback *next; void (*fn)(void *); @@ -107,12 +100,6 @@ void gnttab_grant_foreign_access_ref(grant_ref_t ref, domid_t domid, void gnttab_grant_foreign_transfer_ref(grant_ref_t, domid_t domid, unsigned long pfn); -#ifdef __ia64__ -#define gnttab_map_vaddr(map) __va(map.dev_bus_addr) -#else -#define gnttab_map_vaddr(map) ((void *)(map.host_virt_addr)) -#endif - int gnttab_suspend(void); int gnttab_resume(void);