Sophie

Sophie

distrib > Scientific%20Linux > 5x > x86_64 > media > main-src > by-pkgid > d0a35cd31c1125e2132804d68547073d > files > 4118

kernel-2.6.18-194.26.1.el5.src.rpm

From: Chris Lalancette <clalance@redhat.com>
Date: Wed, 8 Apr 2009 10:07:30 +0200
Subject: [x86] xen: crash when specifying mem=
Message-id: 49DC5B42.4020303@redhat.com
O-Subject: [RHEL5.4 PATCH]: Fix xen crash when specifying mem=
Bugzilla: 240429
RH-Acked-by: Gerd Hoffmann <kraxel@redhat.com>
RH-Acked-by: Rik van Riel <riel@redhat.com>

All,
     We've had a long standing bug where passing the "mem=" parameter to the
dom0 or domU kernel causes a crash.  For the most part, it doesn't make a whole
lot of sense to pass these parameters to a guest kernel; after all, we can
balloon the guest.  However, when moving back and forth between a non-Xen and a
Xen kernel, this can cause a little bit of problems.  So I'm attempting to fix
it here.
     The problem boils down to a mis-match between xen_info->nr_pages (which is
equivalent to maxmem in the xen configuration file), and {max,end}_pfn, which is
the last pfn that the domain will ever see.  In the case of using the mem=
parameter, what happens is that the {max,end}_pfn is set to a value smaller than
xen_info->nr_pages.  We allocate the p2m map based on {max,end}_pfn, but then
attempt to copy xen_info->nr_pages into that array.  Hilarity ensues.
     The solution is to only copy the smaller of xen_info->nr_pages and
{max,end}_pfn into the p2m array.  As an additional optimization, if using the
mem= parameter to make the domain smaller than what was specified in the
configuration, we return that memory to the hypervisor so it can be used for
other domains.
     Tested by me to work on dom0 and domU, both i386 and x86_64.  An outside
reporter of the issue also confirmed that it worked for them.  This is a
backport of xen-unstable c/s 15098, plus a couple of minor tweaks necessary for
RHEL.  This should solve BZ 240429.  Please review and ACK.

--
Chris Lalancette

diff --git a/arch/i386/kernel/setup-xen.c b/arch/i386/kernel/setup-xen.c
index 73cbf04..ae7a43d 100644
--- a/arch/i386/kernel/setup-xen.c
+++ b/arch/i386/kernel/setup-xen.c
@@ -1602,6 +1602,7 @@ void __init setup_arch(char **cmdline_p)
 	int i, j, k, fpp;
 	struct physdev_set_iopl set_iopl;
 	unsigned long max_low_pfn;
+	unsigned long p2m_pages;
 
 	/* Force a quick death if the kernel panics (not domain 0). */
 	extern int panic_timeout;
@@ -1742,6 +1743,32 @@ void __init setup_arch(char **cmdline_p)
 	find_smp_config();
 #endif
 
+	p2m_pages = max_pfn;
+	if (xen_start_info->nr_pages > max_pfn) {
+		/*
+		 * the max_pfn was shrunk (probably by mem= or highmem=
+		 * kernel parameter); shrink reservation with the HV
+		 */
+		struct xen_memory_reservation reservation = {
+			.address_bits = 0,
+			.extent_order = 0,
+			.domid = DOMID_SELF
+		};
+		unsigned int difference;
+		int ret;
+
+		difference = xen_start_info->nr_pages - max_pfn;
+
+		set_xen_guest_handle(reservation.extent_start,
+				     ((unsigned long *)xen_start_info->mfn_list)+ max_pfn);
+		reservation.nr_extents = difference;
+		ret = HYPERVISOR_memory_op(XENMEM_decrease_reservation,
+					   &reservation);
+		BUG_ON  (ret != difference);
+	}
+	else if (max_pfn > xen_start_info->nr_pages)
+		p2m_pages = xen_start_info->nr_pages;
+
 	/* Make sure we have a correctly sized P->M table. */
 	if (!xen_feature(XENFEAT_auto_translated_physmap)) {
 		phys_to_machine_mapping = alloc_bootmem_low_pages(
@@ -1750,7 +1777,7 @@ void __init setup_arch(char **cmdline_p)
 		       max_pfn * sizeof(unsigned long));
 		memcpy(phys_to_machine_mapping,
 		       (unsigned long *)xen_start_info->mfn_list,
-		       xen_start_info->nr_pages * sizeof(unsigned long));
+		       p2m_pages * sizeof(unsigned long));
 		free_bootmem(
 		     __pa(xen_start_info->mfn_list),
 		     PFN_PHYS(PFN_UP(xen_start_info->nr_pages *
diff --git a/arch/x86_64/kernel/setup-xen.c b/arch/x86_64/kernel/setup-xen.c
index 50de168..3758b68 100644
--- a/arch/x86_64/kernel/setup-xen.c
+++ b/arch/x86_64/kernel/setup-xen.c
@@ -533,7 +533,7 @@ contig_initmem_init(unsigned long start_pfn, unsigned long end_pfn)
 		panic("Cannot find bootmem map of size %ld\n",bootmap_size);
 	bootmap_size = init_bootmem(bootmap >> PAGE_SHIFT, end_pfn);
 #ifdef CONFIG_XEN
-	e820_bootmem_free(NODE_DATA(0), 0, xen_start_info->nr_pages<<PAGE_SHIFT);
+	e820_bootmem_free(NODE_DATA(0), 0, min(xen_start_info->nr_pages,end_pfn)<<PAGE_SHIFT);
 #else
 	e820_bootmem_free(NODE_DATA(0), 0, end_pfn << PAGE_SHIFT);
 #endif
@@ -803,6 +803,33 @@ void __init setup_arch(char **cmdline_p)
 #ifdef CONFIG_XEN
 	{
 		int i, j, k, fpp;
+		unsigned long p2m_pages;
+
+		p2m_pages = end_pfn;
+		if (xen_start_info->nr_pages > end_pfn) {
+			/*
+			 * the end_pfn was shrunk (probably by mem=
+			 * kernel parameter); shrink reservation with the HV
+			 */
+			struct xen_memory_reservation reservation = {
+				.address_bits = 0,
+				.extent_order = 0,
+				.domid = DOMID_SELF
+			};
+			unsigned int difference;
+			int ret;
+
+			difference = xen_start_info->nr_pages - end_pfn;
+
+			set_xen_guest_handle(reservation.extent_start,
+					     ((unsigned long *)xen_start_info->mfn_list) + end_pfn);
+			reservation.nr_extents = difference;
+			ret = HYPERVISOR_memory_op(XENMEM_decrease_reservation,
+						   &reservation);
+			BUG_ON (ret != difference);
+		}
+		else if (end_pfn > xen_start_info->nr_pages)
+			p2m_pages = xen_start_info->nr_pages;
 
 		if (!xen_feature(XENFEAT_auto_translated_physmap)) {
 			/* Make sure we have a large enough P->M table. */
@@ -812,7 +839,7 @@ void __init setup_arch(char **cmdline_p)
 			       end_pfn * sizeof(unsigned long));
 			memcpy(phys_to_machine_mapping,
 			       (unsigned long *)xen_start_info->mfn_list,
-			       xen_start_info->nr_pages * sizeof(unsigned long));
+			       p2m_pages * sizeof(unsigned long));
 			free_bootmem(
 				__pa(xen_start_info->mfn_list),
 				PFN_PHYS(PFN_UP(xen_start_info->nr_pages *
diff --git a/arch/x86_64/mm/init-xen.c b/arch/x86_64/mm/init-xen.c
index 42bc9d8..d433ecb 100644
--- a/arch/x86_64/mm/init-xen.c
+++ b/arch/x86_64/mm/init-xen.c
@@ -485,7 +485,7 @@ phys_pmd_init(pmd_t *pmd, unsigned long address, unsigned long end)
 		for (k = 0; k < PTRS_PER_PTE; pte++, k++, address += PTE_SIZE) {
 			if ((address >= end) ||
 			    ((address >> PAGE_SHIFT) >=
-			     xen_start_info->nr_pages)) { 
+			     min(end_pfn, xen_start_info->nr_pages))) {
 				__set_pte(pte, __pte(0)); 
 				continue;
 			}