Sophie

Sophie

distrib > Scientific%20Linux > 5x > x86_64 > by-pkgid > 89877e42827f16fa5f86b1df0c2860b1 > files > 2748

kernel-2.6.18-128.1.10.el5.src.rpm

From: Chris Lalancette <clalance@redhat.com>
Date: Mon, 3 Nov 2008 09:09:07 +0100
Subject: [xen] remove contiguous_bitmap
Message-id: 490EB1A3.9000405@redhat.com
O-Subject: [RHEL5.3 PATCH 2/2]: Remove Xen contiguous_bitmap
Bugzilla: 463500
RH-Acked-by: Mark McLoughlin <markmc@redhat.com>
RH-Acked-by: Bill Burns <bburns@redhat.com>
RH-Acked-by: Prarit Bhargava <prarit@redhat.com>
RH-Acked-by: Rik van Riel <riel@redhat.com>

When the RHEL-5 kernel is running under Xen, it can ask the hypervisor to
allocate a set of physically contiguous pages on it's behalf (for instance, so
dom0 can do DMA operations to devices).  The kernel tracks where these
contiguous pages are via a contiguous_bitmap.  In the swiotlb path, we check
this bitmap to see if pages are physically contiguous; if they aren't, then we
know we have to split this request.

Unfortunately, on i386, this bitmap is allocated based on max_low_pfn.  For the
most part, this is fine; if your page is below max_low_pfn, then checking in the
bitmap is just fine.  Also, due to some other checking before checking the
bitmap, if the request is for just one page, regardless of where that page is,
we know we only have to do one request.

The problem comes in when we get a request for multiple pages (I've seen
requests for 2), starting at a high memory address.  In this case, we try to
access the contiguous_bitmap, but we end up taking a fatal page fault in the
kernel because we are looking too far above max_low_pfn.

It turns out that the contiguous_bitmap is a dubious optimization of a more
general problem that Stephen solved for 5.2.  For that reason, upstream decided
to rip out the contiguous_bitmap.  By doing that, we completely avoid the fatal
pagefault on i386, and we really shouldn't be losing any performance.  This
patch is a straightforward backport of upstream linux-2.6.18-xen.hg c/s 707.

diff --git a/arch/i386/kernel/pci-dma-xen.c b/arch/i386/kernel/pci-dma-xen.c
index 14f3539..7bf796e 100644
--- a/arch/i386/kernel/pci-dma-xen.c
+++ b/arch/i386/kernel/pci-dma-xen.c
@@ -130,17 +130,11 @@ static int check_pages_physically_contiguous(unsigned long pfn,
 
 int range_straddles_page_boundary(paddr_t p, size_t size)
 {
-	extern unsigned long *contiguous_bitmap;
 	unsigned long pfn = p >> PAGE_SHIFT;
 	unsigned int offset = p & ~PAGE_MASK;
 
-	if (offset + size <= PAGE_SIZE)
-		return 0;
-	if (test_bit(pfn, contiguous_bitmap))
-		return 0;
-	if (check_pages_physically_contiguous(pfn, offset, size))
-		return 0;
-	return 1;
+	return ((offset + size > PAGE_SIZE) &&
+		!check_pages_physically_contiguous(pfn, offset, size));
 }
 
 int
diff --git a/arch/i386/mm/hypervisor.c b/arch/i386/mm/hypervisor.c
index d09507b..88b4dab 100644
--- a/arch/i386/mm/hypervisor.c
+++ b/arch/i386/mm/hypervisor.c
@@ -214,54 +214,6 @@ void xen_set_ldt(unsigned long ptr, unsigned long len)
 	BUG_ON(HYPERVISOR_mmuext_op(&op, 1, NULL, DOMID_SELF) < 0);
 }
 
-/*
- * Bitmap is indexed by page number. If bit is set, the page is part of a
- * xen_create_contiguous_region() area of memory.
- */
-unsigned long *contiguous_bitmap;
-
-static void contiguous_bitmap_set(
-	unsigned long first_page, unsigned long nr_pages)
-{
-	unsigned long start_off, end_off, curr_idx, end_idx;
-
-	curr_idx  = first_page / BITS_PER_LONG;
-	start_off = first_page & (BITS_PER_LONG-1);
-	end_idx   = (first_page + nr_pages) / BITS_PER_LONG;
-	end_off   = (first_page + nr_pages) & (BITS_PER_LONG-1);
-
-	if (curr_idx == end_idx) {
-		contiguous_bitmap[curr_idx] |=
-			((1UL<<end_off)-1) & -(1UL<<start_off);
-	} else {
-		contiguous_bitmap[curr_idx] |= -(1UL<<start_off);
-		while ( ++curr_idx < end_idx )
-			contiguous_bitmap[curr_idx] = ~0UL;
-		contiguous_bitmap[curr_idx] |= (1UL<<end_off)-1;
-	}
-}
-
-static void contiguous_bitmap_clear(
-	unsigned long first_page, unsigned long nr_pages)
-{
-	unsigned long start_off, end_off, curr_idx, end_idx;
-
-	curr_idx  = first_page / BITS_PER_LONG;
-	start_off = first_page & (BITS_PER_LONG-1);
-	end_idx   = (first_page + nr_pages) / BITS_PER_LONG;
-	end_off   = (first_page + nr_pages) & (BITS_PER_LONG-1);
-
-	if (curr_idx == end_idx) {
-		contiguous_bitmap[curr_idx] &=
-			-(1UL<<end_off) | ((1UL<<start_off)-1);
-	} else {
-		contiguous_bitmap[curr_idx] &= (1UL<<start_off)-1;
-		while ( ++curr_idx != end_idx )
-			contiguous_bitmap[curr_idx] = 0;
-		contiguous_bitmap[curr_idx] &= -(1UL<<end_off);
-	}
-}
-
 /* Protected by balloon_lock. */
 #define MAX_CONTIG_ORDER 9 /* 2MB */
 static unsigned long discontig_frames[1<<MAX_CONTIG_ORDER];
@@ -355,10 +307,6 @@ int xen_create_contiguous_region(
 	if (HYPERVISOR_multicall(cr_mcl, i))
 		BUG();
 
-	if (success)
-		contiguous_bitmap_set(__pa(vstart) >> PAGE_SHIFT,
-				      1UL << order);
-
 	balloon_unlock(flags);
 
 	return success ? 0 : -ENOMEM;
@@ -384,8 +332,7 @@ void xen_destroy_contiguous_region(unsigned long vstart, unsigned int order)
 		}
 	};
 
-	if (xen_feature(XENFEAT_auto_translated_physmap) ||
-	    !test_bit(__pa(vstart) >> PAGE_SHIFT, contiguous_bitmap))
+	if (xen_feature(XENFEAT_auto_translated_physmap))
 		return;
 
 	if (unlikely(order > MAX_CONTIG_ORDER))
@@ -398,8 +345,6 @@ void xen_destroy_contiguous_region(unsigned long vstart, unsigned int order)
 
 	balloon_lock(flags);
 
-	contiguous_bitmap_clear(__pa(vstart) >> PAGE_SHIFT, 1UL << order);
-
 	/* 1. Find start MFN of contiguous extent. */
 	in_frame = pfn_to_mfn(__pa(vstart) >> PAGE_SHIFT);
 
diff --git a/arch/i386/mm/init-xen.c b/arch/i386/mm/init-xen.c
index a7c09dc..1fd6d79 100644
--- a/arch/i386/mm/init-xen.c
+++ b/arch/i386/mm/init-xen.c
@@ -47,8 +47,6 @@
 #include <asm/hypervisor.h>
 #include <asm/swiotlb.h>
 
-extern unsigned long *contiguous_bitmap;
-
 unsigned int __VMALLOC_RESERVE = 128 << 20;
 
 DEFINE_PER_CPU(struct mmu_gather, mmu_gathers);
@@ -647,11 +645,6 @@ void __init mem_init(void)
 	int bad_ppro;
 	unsigned long pfn;
 
-	contiguous_bitmap = alloc_bootmem_low_pages(
-		(max_low_pfn + 2*BITS_PER_LONG) >> 3);
-	BUG_ON(!contiguous_bitmap);
-	memset(contiguous_bitmap, 0, (max_low_pfn + 2*BITS_PER_LONG) >> 3);
-
 #if defined(CONFIG_SWIOTLB)
 	swiotlb_init();	
 #endif
diff --git a/arch/ia64/kernel/setup.c b/arch/ia64/kernel/setup.c
index f96c41d..cc3c02b 100644
--- a/arch/ia64/kernel/setup.c
+++ b/arch/ia64/kernel/setup.c
@@ -699,9 +699,6 @@ setup_arch (char **cmdline_p)
 	platform_setup(cmdline_p);
 	check_sal_cache_flush();
 	paging_init();
-#ifdef CONFIG_XEN
-	xen_contiguous_bitmap_init(max_pfn);
-#endif
 }
 
 /*
diff --git a/arch/ia64/xen/hypervisor.c b/arch/ia64/xen/hypervisor.c
index 6224ca8..7c22682 100644
--- a/arch/ia64/xen/hypervisor.c
+++ b/arch/ia64/xen/hypervisor.c
@@ -52,152 +52,6 @@ static int p2m_expose_init(void);
 
 EXPORT_SYMBOL(__hypercall);
 
-//XXX same as i386, x86_64 contiguous_bitmap_set(), contiguous_bitmap_clear()
-// move those to lib/contiguous_bitmap?
-//XXX discontigmem/sparsemem
-
-/*
- * Bitmap is indexed by page number. If bit is set, the page is part of a
- * xen_create_contiguous_region() area of memory.
- */
-unsigned long *contiguous_bitmap;
-
-#ifdef CONFIG_VIRTUAL_MEM_MAP
-/* Following logic is stolen from create_mem_map_table() for virtual memmap */
-static int
-create_contiguous_bitmap(u64 start, u64 end, void *arg)
-{
-	unsigned long address, start_page, end_page;
-	unsigned long bitmap_start, bitmap_end;
-	unsigned char *bitmap;
-	int node;
-	pgd_t *pgd;
-	pud_t *pud;
-	pmd_t *pmd;
-	pte_t *pte;
-
-	bitmap_start = (unsigned long)contiguous_bitmap +
-	               ((__pa(start) >> PAGE_SHIFT) >> 3);
-	bitmap_end = (unsigned long)contiguous_bitmap +
-	             (((__pa(end) >> PAGE_SHIFT) + 2 * BITS_PER_LONG) >> 3);
-
-	start_page = bitmap_start & PAGE_MASK;
-	end_page = PAGE_ALIGN(bitmap_end);
-	node = paddr_to_nid(__pa(start));
-
-	bitmap = alloc_bootmem_pages_node(NODE_DATA(node),
-	                                  end_page - start_page);
-	BUG_ON(!bitmap);
-	memset(bitmap, 0, end_page - start_page);
-
-	for (address = start_page; address < end_page; address += PAGE_SIZE) {
-		pgd = pgd_offset_k(address);
-		if (pgd_none(*pgd))
-			pgd_populate(&init_mm, pgd,
-			             alloc_bootmem_pages_node(NODE_DATA(node),
-			                                      PAGE_SIZE));
-		pud = pud_offset(pgd, address);
-
-		if (pud_none(*pud))
-			pud_populate(&init_mm, pud,
-			             alloc_bootmem_pages_node(NODE_DATA(node),
-			                                      PAGE_SIZE));
-		pmd = pmd_offset(pud, address);
-
-		if (pmd_none(*pmd))
-			pmd_populate_kernel(&init_mm, pmd,
-			                    alloc_bootmem_pages_node
-			                    (NODE_DATA(node), PAGE_SIZE));
-		pte = pte_offset_kernel(pmd, address);
-
-		if (pte_none(*pte))
-			set_pte(pte,
-			        pfn_pte(__pa(bitmap + (address - start_page))
-			                >> PAGE_SHIFT, PAGE_KERNEL));
-	}
-	return 0;
-}
-#endif
-
-static void
-__contiguous_bitmap_init(unsigned long size)
-{
-	contiguous_bitmap = alloc_bootmem_pages(size);
-	BUG_ON(!contiguous_bitmap);
-	memset(contiguous_bitmap, 0, size);
-}
-
-void
-xen_contiguous_bitmap_init(unsigned long end_pfn)
-{
-	unsigned long size = (end_pfn + 2 * BITS_PER_LONG) >> 3;
-#ifndef CONFIG_VIRTUAL_MEM_MAP
-	__contiguous_bitmap_init(size);
-#else
-	unsigned long max_gap = 0;
-
-	efi_memmap_walk(find_largest_hole, (u64*)&max_gap);
-	if (max_gap < LARGE_GAP) {
-		__contiguous_bitmap_init(size);
-	} else {
-		unsigned long map_size = PAGE_ALIGN(size);
-		vmalloc_end -= map_size;
-		contiguous_bitmap = (unsigned long*)vmalloc_end;
-		efi_memmap_walk(create_contiguous_bitmap, NULL);
-	}
-#endif
-}
-
-#if 0
-int
-contiguous_bitmap_test(void* p)
-{
-	return test_bit(__pa(p) >> PAGE_SHIFT, contiguous_bitmap);
-}
-#endif
-
-static void contiguous_bitmap_set(
-	unsigned long first_page, unsigned long nr_pages)
-{
-	unsigned long start_off, end_off, curr_idx, end_idx;
-
-	curr_idx  = first_page / BITS_PER_LONG;
-	start_off = first_page & (BITS_PER_LONG-1);
-	end_idx   = (first_page + nr_pages) / BITS_PER_LONG;
-	end_off   = (first_page + nr_pages) & (BITS_PER_LONG-1);
-
-	if (curr_idx == end_idx) {
-		contiguous_bitmap[curr_idx] |=
-			((1UL<<end_off)-1) & -(1UL<<start_off);
-	} else {
-		contiguous_bitmap[curr_idx] |= -(1UL<<start_off);
-		while ( ++curr_idx < end_idx )
-			contiguous_bitmap[curr_idx] = ~0UL;
-		contiguous_bitmap[curr_idx] |= (1UL<<end_off)-1;
-	}
-}
-
-static void contiguous_bitmap_clear(
-	unsigned long first_page, unsigned long nr_pages)
-{
-	unsigned long start_off, end_off, curr_idx, end_idx;
-
-	curr_idx  = first_page / BITS_PER_LONG;
-	start_off = first_page & (BITS_PER_LONG-1);
-	end_idx   = (first_page + nr_pages) / BITS_PER_LONG;
-	end_off   = (first_page + nr_pages) & (BITS_PER_LONG-1);
-
-	if (curr_idx == end_idx) {
-		contiguous_bitmap[curr_idx] &=
-			-(1UL<<end_off) | ((1UL<<start_off)-1);
-	} else {
-		contiguous_bitmap[curr_idx] &= (1UL<<start_off)-1;
-		while ( ++curr_idx != end_idx )
-			contiguous_bitmap[curr_idx] = 0;
-		contiguous_bitmap[curr_idx] &= -(1UL<<end_off);
-	}
-}
-
 // __xen_create_contiguous_region(), __xen_destroy_contiguous_region()
 // are based on i386 xen_create_contiguous_region(),
 // xen_destroy_contiguous_region()
@@ -273,8 +127,6 @@ __xen_create_contiguous_region(unsigned long vstart,
 		} else
 			success = 1;
 	}
-	if (success)
-		contiguous_bitmap_set(start_gpfn, num_gpfn);
 #if 0
 	if (success) {
 		unsigned long mfn;
@@ -333,9 +185,6 @@ __xen_destroy_contiguous_region(unsigned long vstart, unsigned int order)
         };
 	
 
-	if (!test_bit(start_gpfn, contiguous_bitmap))
-		return;
-
 	if (unlikely(order > MAX_CONTIG_ORDER))
 		return;
 
@@ -346,8 +195,6 @@ __xen_destroy_contiguous_region(unsigned long vstart, unsigned int order)
 
 	balloon_lock(flags);
 
-	contiguous_bitmap_clear(start_gpfn, num_gpfn);
-
         /* Do the exchange for non-contiguous MFNs. */
 	in_frame = start_gpfn;
 	for (i = 0; i < num_gpfn; i++) {
diff --git a/arch/ia64/xen/xen_dma.c b/arch/ia64/xen/xen_dma.c
index 9b277ed..41bcb3d 100644
--- a/arch/ia64/xen/xen_dma.c
+++ b/arch/ia64/xen/xen_dma.c
@@ -55,7 +55,6 @@ static int check_pages_physically_contiguous(unsigned long pfn,
 
 int range_straddles_page_boundary(paddr_t p, size_t size)
 {
-	extern unsigned long *contiguous_bitmap;
 	unsigned long pfn = p >> PAGE_SHIFT;
 	unsigned int offset = p & ~PAGE_MASK;
 
@@ -64,8 +63,6 @@ int range_straddles_page_boundary(paddr_t p, size_t size)
 
 	if (offset + size <= PAGE_SIZE)
 		return 0;
-	if (test_bit(pfn, contiguous_bitmap))
-		return 0;
 	if (check_pages_physically_contiguous(pfn, offset, size))
 		return 0;
 	return 1;
diff --git a/arch/x86_64/mm/init-xen.c b/arch/x86_64/mm/init-xen.c
index 08d9e8d..42bc9d8 100644
--- a/arch/x86_64/mm/init-xen.c
+++ b/arch/x86_64/mm/init-xen.c
@@ -54,8 +54,6 @@
 struct dma_mapping_ops* dma_ops;
 EXPORT_SYMBOL(dma_ops);
 
-extern unsigned long *contiguous_bitmap;
-
 static unsigned long dma_reserve __initdata;
 
 DEFINE_PER_CPU(struct mmu_gather, mmu_gathers);
@@ -1058,11 +1056,6 @@ void __init mem_init(void)
 	long codesize, reservedpages, datasize, initsize;
 	unsigned long pfn;
 
-	contiguous_bitmap = alloc_bootmem_low_pages(
-		(end_pfn + 2*BITS_PER_LONG) >> 3);
-	BUG_ON(!contiguous_bitmap);
-	memset(contiguous_bitmap, 0, (end_pfn + 2*BITS_PER_LONG) >> 3);
-
 	pci_iommu_alloc();
 
 	/* How many end-of-memory variables you have, grandma! */
diff --git a/include/asm-ia64/hypervisor.h b/include/asm-ia64/hypervisor.h
index 0230101..0bb0441 100644
--- a/include/asm-ia64/hypervisor.h
+++ b/include/asm-ia64/hypervisor.h
@@ -147,7 +147,6 @@ int privcmd_mmap(struct file * file, struct vm_area_struct * vma);
 #define pfn_pte_ma(_x,_y)	__pte_ma(0)     /* unmodified use */
 
 #ifndef CONFIG_VMX_GUEST
-void xen_contiguous_bitmap_init(unsigned long end_pfn);
 int __xen_create_contiguous_region(unsigned long vstart, unsigned int order, unsigned int address_bits);
 static inline int
 xen_create_contiguous_region(unsigned long vstart,