Sophie

Sophie

distrib > Scientific%20Linux > 5x > x86_64 > by-pkgid > fc11cd6e1c513a17304da94a5390f3cd > files > 4259

kernel-2.6.18-194.11.1.el5.src.rpm

From: Tetsu Yamamoto <tyamamot@redhat.com>
Date: Mon, 28 Jan 2008 16:10:18 -0500
Subject: [Xen] gnttab: allow more than 3 VNIFs
Message-id: 479E44BA.20206@redhat.com
O-Subject: [RHEL5.2 PATCH][Xen][Take2] Expand VNIF number per a guest domain over 3
Bugzilla: 297331

This is a revised patch to fix BZ#297331 which is cloned from BZ#223908,
to enable a PV guest domain to have more than 3 VNIFs.

https://bugzilla.redhat.com/show_bug.cgi?id=297331
https://bugzilla.redhat.com/show_bug.cgi?id=223908

The previous patch was once committed, but reverted in kernel -74
because it caused DomU panic on boot on i386.

It was backported from the patch which was posted to the upstream, but
actually not committed.  It had a bug to set gnttab_free_head at
grow_gnttab_list() in gnttab.c.  It is fixed in the actually committed
patch in the upstream:
 - Dynamic grant-table sizing.
 http://xenbits.xensource.com/xen-unstable.hg?rev/70f05d642a2e
The attached patch is backported from this committed patch.

I've tested this patch with kernel -75 on i386 and ia64 box for both of
Dom0 and DomU, and confirmed that DomU can boot with no problem and 4
VNIFs works well.

Please review and ACK.

Regards,

Tetsu Yamamoto

# HG changeset patch
# User kfraser@localhost.localdomain
# Date 1171536852 0
# Node ID 70f05d642a2e1c0a688e17e39e622e930998e60b
# Parent  047b3e9f90325eac9a84d840ed27dcb2c8691f5a
Dynamic grant-table sizing.
Signed-off-by: Christopher CLark <christopher.clark@cl.cam.ac.uk>
Signed-off-by: Andrei Petrov <andrei.petrov@xensource.com>
Signed-off-by: Keir Fraser <keir@xensource.com>

Acked-by: Bill Burns <bburns@redhat.com>

diff --git a/drivers/xen/core/gnttab.c b/drivers/xen/core/gnttab.c
index 66c9802..fe3228f 100644
--- a/drivers/xen/core/gnttab.c
+++ b/drivers/xen/core/gnttab.c
@@ -3,7 +3,7 @@
  *
  * Granting foreign access to our memory reservation.
  *
- * Copyright (c) 2005, Christopher Clark
+ * Copyright (c) 2005-2006, Christopher Clark
  * Copyright (c) 2004-2005, K A Fraser
  *
  * This program is free software; you can redistribute it and/or
@@ -34,7 +34,6 @@
 #include <linux/module.h>
 #include <linux/sched.h>
 #include <linux/mm.h>
-#include <linux/vmalloc.h>
 #include <xen/interface/xen.h>
 #include <xen/gnttab.h>
 #include <asm/pgtable.h>
@@ -42,40 +41,55 @@
 #include <asm/synch_bitops.h>
 #include <asm/io.h>
 #include <xen/interface/memory.h>
+#include <xen/driver_util.h>
 
 /* External tools reserve first few grant table entries. */
 #define NR_RESERVED_ENTRIES 8
+#define GNTTAB_LIST_END 0xffffffff
+#define GREFS_PER_GRANT_FRAME (PAGE_SIZE / sizeof(grant_entry_t))
 
-#define NR_GRANT_ENTRIES \
-	(NR_GRANT_FRAMES * PAGE_SIZE / sizeof(struct grant_entry))
-#define GNTTAB_LIST_END (NR_GRANT_ENTRIES + 1)
-
-static grant_ref_t gnttab_list[NR_GRANT_ENTRIES];
+static grant_ref_t **gnttab_list;
+static unsigned int nr_grant_frames;
+static unsigned int boot_max_nr_grant_frames;
 static int gnttab_free_count;
 static grant_ref_t gnttab_free_head;
 static DEFINE_SPINLOCK(gnttab_list_lock);
 
 static struct grant_entry *shared;
+#ifndef CONFIG_XEN
+static unsigned long resume_frames;
+#endif
 
 static struct gnttab_free_callback *gnttab_free_callback_list;
 
+static int gnttab_expand(unsigned int req_entries);
+
+#define RPP (PAGE_SIZE / sizeof(grant_ref_t))
+#define gnttab_entry(entry) (gnttab_list[(entry) / RPP][(entry) % RPP])
+
 static int get_free_entries(int count)
 {
 	unsigned long flags;
-	int ref;
+	int ref, rc;
 	grant_ref_t head;
+
 	spin_lock_irqsave(&gnttab_list_lock, flags);
-	if (gnttab_free_count < count) {
+
+	if ((gnttab_free_count < count) &&
+	    ((rc = gnttab_expand(count - gnttab_free_count)) < 0)) {
 		spin_unlock_irqrestore(&gnttab_list_lock, flags);
-		return -1;
+		return rc;
 	}
+
 	ref = head = gnttab_free_head;
 	gnttab_free_count -= count;
 	while (count-- > 1)
-		head = gnttab_list[head];
-	gnttab_free_head = gnttab_list[head];
-	gnttab_list[head] = GNTTAB_LIST_END;
+		head = gnttab_entry(head);
+ 	gnttab_free_head = gnttab_entry(head);
+	gnttab_entry(head) = GNTTAB_LIST_END;
+
 	spin_unlock_irqrestore(&gnttab_list_lock, flags);
+
 	return ref;
 }
 
@@ -111,7 +125,7 @@ static void put_free_entry(grant_ref_t ref)
 {
 	unsigned long flags;
 	spin_lock_irqsave(&gnttab_list_lock, flags);
-	gnttab_list[ref] = gnttab_free_head;
+	gnttab_entry(ref) = gnttab_free_head;
 	gnttab_free_head = ref;
 	gnttab_free_count++;
 	check_free_callbacks();
@@ -127,7 +141,7 @@ int gnttab_grant_foreign_access(domid_t domid, unsigned long frame,
 {
 	int ref;
 
-	if (unlikely((ref = get_free_entry()) == -1))
+	if (unlikely((ref = get_free_entry()) < 0))
 		return -ENOSPC;
 
 	shared[ref].frame = frame;
@@ -197,7 +211,7 @@ int gnttab_grant_foreign_transfer(domid_t domid, unsigned long pfn)
 {
 	int ref;
 
-	if (unlikely((ref = get_free_entry()) == -1))
+	if (unlikely((ref = get_free_entry()) < 0))
 		return -ENOSPC;
 	gnttab_grant_foreign_transfer_ref(ref, domid, pfn);
 
@@ -268,11 +282,11 @@ void gnttab_free_grant_references(grant_ref_t head)
 		return;
 	spin_lock_irqsave(&gnttab_list_lock, flags);
 	ref = head;
-	while (gnttab_list[ref] != GNTTAB_LIST_END) {
-		ref = gnttab_list[ref];
+	while (gnttab_entry(ref) != GNTTAB_LIST_END) {
+		ref = gnttab_entry(ref);
 		count++;
 	}
-	gnttab_list[ref] = gnttab_free_head;
+	gnttab_entry(ref) = gnttab_free_head;
 	gnttab_free_head = head;
 	gnttab_free_count += count;
 	check_free_callbacks();
@@ -284,7 +298,7 @@ int gnttab_alloc_grant_references(u16 count, grant_ref_t *head)
 {
 	int h = get_free_entries(count);
 
-	if (h == -1)
+	if (h < 0)
 		return -ENOSPC;
 
 	*head = h;
@@ -304,7 +318,7 @@ int gnttab_claim_grant_reference(grant_ref_t *private_head)
 	grant_ref_t g = *private_head;
 	if (unlikely(g == GNTTAB_LIST_END))
 		return -ENOSPC;
-	*private_head = gnttab_list[g];
+	*private_head = gnttab_entry(g);
 	return g;
 }
 EXPORT_SYMBOL_GPL(gnttab_claim_grant_reference);
@@ -312,7 +326,7 @@ EXPORT_SYMBOL_GPL(gnttab_claim_grant_reference);
 void gnttab_release_grant_reference(grant_ref_t *private_head,
 				    grant_ref_t release)
 {
-	gnttab_list[release] = *private_head;
+	gnttab_entry(release) = *private_head;
 	*private_head = release;
 }
 EXPORT_SYMBOL_GPL(gnttab_release_grant_reference);
@@ -351,6 +365,64 @@ void gnttab_cancel_free_callback(struct gnttab_free_callback *callback)
 }
 EXPORT_SYMBOL_GPL(gnttab_cancel_free_callback);
 
+static int grow_gnttab_list(unsigned int more_frames)
+{
+	unsigned int new_nr_grant_frames, extra_entries, i;
+
+	new_nr_grant_frames = nr_grant_frames + more_frames;
+	extra_entries       = more_frames * GREFS_PER_GRANT_FRAME;
+
+	for (i = nr_grant_frames; i < new_nr_grant_frames; i++)
+	{
+		gnttab_list[i] = (grant_ref_t *)__get_free_page(GFP_ATOMIC);
+		if (!gnttab_list[i])
+			goto grow_nomem;
+	}
+
+
+	for (i = GREFS_PER_GRANT_FRAME * nr_grant_frames;
+	     i < GREFS_PER_GRANT_FRAME * new_nr_grant_frames - 1; i++)
+		gnttab_entry(i) = i + 1;
+
+	gnttab_entry(i) = gnttab_free_head;
+	gnttab_free_head = GREFS_PER_GRANT_FRAME * nr_grant_frames;
+	gnttab_free_count += extra_entries;
+
+	nr_grant_frames = new_nr_grant_frames;
+
+	check_free_callbacks();
+
+	return 0;
+	
+grow_nomem:
+	for ( ; i >= nr_grant_frames; i--)
+		free_page((unsigned long) gnttab_list[i]);
+	return -ENOMEM;
+}
+
+static unsigned int __max_nr_grant_frames(void)
+{
+	struct gnttab_query_size query;
+	int rc;
+
+	query.dom = DOMID_SELF;
+
+	rc = HYPERVISOR_grant_table_op(GNTTABOP_query_size, &query, 1);
+	if ((rc < 0) || (query.status != GNTST_okay))
+		return 4; /* Legacy max supported number of frames */
+
+	return query.max_nr_frames;
+}
+
+static inline unsigned int max_nr_grant_frames(void)
+{
+	unsigned int xen_max = __max_nr_grant_frames();
+
+	if (xen_max > boot_max_nr_grant_frames)
+		return boot_max_nr_grant_frames;
+	return xen_max;
+}
+
 #ifdef CONFIG_XEN
 
 #ifndef __ia64__
@@ -373,49 +445,62 @@ static int unmap_pte_fn(pte_t *pte, struct page *pmd_page,
 }
 #endif
 
-int gnttab_resume(void)
+static int gnttab_map(unsigned int start_idx, unsigned int end_idx)
 {
 	struct gnttab_setup_table setup;
-	unsigned long frames[NR_GRANT_FRAMES];
+	unsigned long *frames;
+	unsigned int nr_gframes = end_idx + 1;
 	int rc;
-#ifndef __ia64__
-	void *pframes = frames;
-	struct vm_struct *area;
-#endif
+
+	frames = kmalloc(nr_gframes * sizeof(unsigned long), GFP_ATOMIC);
+	if (!frames)
+		return -ENOMEM;
 
 	setup.dom        = DOMID_SELF;
-	setup.nr_frames  = NR_GRANT_FRAMES;
+	setup.nr_frames  = nr_gframes;
 	set_xen_guest_handle(setup.frame_list, frames);
 
 	rc = HYPERVISOR_grant_table_op(GNTTABOP_setup_table, &setup, 1);
-	if (rc == -ENOSYS)
+	if (rc == -ENOSYS) {
+		kfree(frames);
 		return -ENOSYS;
+	}
 
 	BUG_ON(rc || setup.status);
 
 #ifndef __ia64__
 	if (shared == NULL) {
-		area = get_vm_area(PAGE_SIZE * NR_GRANT_FRAMES, VM_IOREMAP);
+		struct vm_struct *area;
+		area = alloc_vm_area(PAGE_SIZE * max_nr_grant_frames());
 		BUG_ON(area == NULL);
 		shared = area->addr;
 	}
 	rc = apply_to_page_range(&init_mm, (unsigned long)shared,
-				 PAGE_SIZE * NR_GRANT_FRAMES,
-				 map_pte_fn, &pframes);
+				 PAGE_SIZE * nr_gframes,
+				 map_pte_fn, &frames);
 	BUG_ON(rc);
+        frames -= nr_gframes; /* adjust after map_pte_fn() */
 #else
 	shared = __va(frames[0] << PAGE_SHIFT);
-	printk("grant table at %p\n", shared);
 #endif
 
+	kfree(frames);
+
 	return 0;
 }
 
+int gnttab_resume(void)
+{
+	if (max_nr_grant_frames() < nr_grant_frames)
+		return -ENOSYS;
+	return gnttab_map(0, nr_grant_frames - 1);
+}
+
 int gnttab_suspend(void)
 {
 #ifndef __ia64__
 	apply_to_page_range(&init_mm, (unsigned long)shared,
-			    PAGE_SIZE * NR_GRANT_FRAMES,
+			    PAGE_SIZE * nr_grant_frames,
 			    unmap_pte_fn, NULL);
 #endif
 	return 0;
@@ -425,24 +510,39 @@ int gnttab_suspend(void)
 
 #include <platform-pci.h>
 
-int gnttab_resume(void)
+static int gnttab_map(unsigned int start_idx, unsigned int end_idx)
 {
-	unsigned long frames;
 	struct xen_add_to_physmap xatp;
 	unsigned int i;
 
-	frames = alloc_xen_mmio(PAGE_SIZE * NR_GRANT_FRAMES);
-
-	for (i = 0; i < NR_GRANT_FRAMES; i++) {
+	/* Loop backwards, so that the first hypercall has the largest index,
+	 * ensuring that the table will grow only once.
+	 */
+	for (i = end_idx; i >= start_idx; i--) {
 		xatp.domid = DOMID_SELF;
 		xatp.idx = i;
 		xatp.space = XENMAPSPACE_grant_table;
-		xatp.gpfn = (frames >> PAGE_SHIFT) + i;
+		xatp.gpfn = (resume_frames >> PAGE_SHIFT) + i;
 		if (HYPERVISOR_memory_op(XENMEM_add_to_physmap, &xatp))
 			BUG();
 	}
+}
+
+int gnttab_resume(void)
+{
+	struct xen_add_to_physmap xatp;
+	unsigned int i, max_nr_gframes, nr_gframes;
+
+	nr_gframes = nr_grant_frames;
+	max_nr_gframes = max_nr_grant_frames();
+	if (max_nr_gframes < nr_gframes)
+		return -ENOSYS;
+
+	resume_frames = alloc_xen_mmio(PAGE_SIZE * max_nr_gframes);
 
-	shared = ioremap(frames, PAGE_SIZE * NR_GRANT_FRAMES);
+	gnttab_map(0, nr_gframes - 1);
+
+	shared = ioremap(resume_frames, PAGE_SIZE * max_nr_gframes);
 	if (shared == NULL) {
 		printk("error to ioremap gnttab share frames\n");
 		return -1;
@@ -454,28 +554,79 @@ int gnttab_resume(void)
 int gnttab_suspend(void)
 {
 	iounmap(shared);
+ 	resume_frames = 0;
 	return 0;
 }
 
 #endif /* !CONFIG_XEN */
 
+static int gnttab_expand(unsigned int req_entries)
+{
+	int rc;
+	unsigned int cur, extra;
+
+	cur = nr_grant_frames;
+	extra = ((req_entries + (GREFS_PER_GRANT_FRAME-1)) /
+		 GREFS_PER_GRANT_FRAME);
+	if (cur + extra > max_nr_grant_frames())
+		return -ENOSPC;
+
+	if ((rc = gnttab_map(cur, cur + extra - 1)) == 0)
+		rc = grow_gnttab_list(extra);
+
+	return rc;
+}
+ 
 int __init gnttab_init(void)
 {
 	int i;
+ 	unsigned int max_nr_glist_frames;
+ 	unsigned int nr_init_grefs;
 
 	if (!is_running_on_xen())
 		return -ENODEV;
 
+ 	nr_grant_frames = 1;
+ 	boot_max_nr_grant_frames = __max_nr_grant_frames();
+ 
+ 	/* Determine the maximum number of frames required for the
+ 	 * grant reference free list on the current hypervisor.
+ 	 */
+ 	max_nr_glist_frames = (boot_max_nr_grant_frames *
+ 			       GREFS_PER_GRANT_FRAME /
+ 			       (PAGE_SIZE / sizeof(grant_ref_t)));
+ 
+ 	gnttab_list = kmalloc(max_nr_glist_frames * sizeof(grant_ref_t *),
+ 			      GFP_KERNEL);
+ 	if (gnttab_list == NULL)
+ 		return -ENOMEM;
+ 
+ 	for (i = 0; i < nr_grant_frames; i++) {
+ 		gnttab_list[i] = (grant_ref_t *)__get_free_page(GFP_KERNEL);
+ 		if (gnttab_list[i] == NULL)
+ 			goto ini_nomem;
+ 	}
+ 
 	if (gnttab_resume() < 0)
 		return -ENODEV;
 
-	for (i = NR_RESERVED_ENTRIES; i < NR_GRANT_ENTRIES; i++)
-		gnttab_list[i] = i + 1;
-	gnttab_free_count = NR_GRANT_ENTRIES - NR_RESERVED_ENTRIES;
+ 	nr_init_grefs = nr_grant_frames * GREFS_PER_GRANT_FRAME;
+ 
+ 	for (i = NR_RESERVED_ENTRIES; i < nr_init_grefs - 1; i++)
+ 		gnttab_entry(i) = i + 1;
+ 
+ 	gnttab_entry(nr_init_grefs - 1) = GNTTAB_LIST_END;
+ 	gnttab_free_count = nr_init_grefs - NR_RESERVED_ENTRIES;
 	gnttab_free_head  = NR_RESERVED_ENTRIES;
 
 	printk("Grant table initialized\n");
 	return 0;
+ 
+  ini_nomem:
+ 	for (i--; i >= 0; i--)
+ 		free_page((unsigned long)gnttab_list[i]);
+ 	kfree(gnttab_list);
+ 	return -ENOMEM;
 }
 
 #ifdef CONFIG_XEN
diff --git a/include/xen/gnttab.h b/include/xen/gnttab.h
index 558aee5..899d55b 100644
--- a/include/xen/gnttab.h
+++ b/include/xen/gnttab.h
@@ -41,13 +41,6 @@
 #include <xen/interface/grant_table.h>
 #include <xen/features.h>
 
-/* NR_GRANT_FRAMES must be less than or equal to that configured in Xen */
-#ifdef __ia64__
-#define NR_GRANT_FRAMES 1
-#else
-#define NR_GRANT_FRAMES 4
-#endif
-
 struct gnttab_free_callback {
 	struct gnttab_free_callback *next;
 	void (*fn)(void *);
@@ -107,12 +100,6 @@ void gnttab_grant_foreign_access_ref(grant_ref_t ref, domid_t domid,
 void gnttab_grant_foreign_transfer_ref(grant_ref_t, domid_t domid,
 				       unsigned long pfn);
 
-#ifdef __ia64__
-#define gnttab_map_vaddr(map) __va(map.dev_bus_addr)
-#else
-#define gnttab_map_vaddr(map) ((void *)(map.host_virt_addr))
-#endif
-
 int gnttab_suspend(void);
 int gnttab_resume(void);