Sophie

Sophie

distrib > Scientific%20Linux > 5x > x86_64 > by-pkgid > fc11cd6e1c513a17304da94a5390f3cd > files > 3759

kernel-2.6.18-194.11.1.el5.src.rpm

From: Larry Woodman <lwoodman@redhat.com>
Date: Mon, 18 May 2009 12:36:45 -0400
Subject: [trace] mm: eliminate extra mm tracepoint overhead
Message-id: 1242664605.10978.282.camel@dhcp-100-19-198.bos.redhat.com
O-Subject: [RHEL5-U4 patch] Fix Panic at __handle_mm_fault() and eliminate extra mm tracepoint overhead.
Bugzilla: 501013
RH-Acked-by: Dave Anderson <anderson@redhat.com>
RH-Acked-by: Prarit Bhargava <prarit@redhat.com>
RH-Acked-by: Rik van Riel <riel@redhat.com>

The system was incurring a panic in the pagefault handler when the mm
tracepoints patch was installed even though the tracepoints are not
enabled.  The cause of this was doing a page_to_pfn(page) inside the
call to the mm tracepoints and in and OOM path the page is NULL.  That
is easy to fix but the real problem is these tracepoints have more
overhead that we(or at least I) thought.

In the case of do_swap_page() I added the following tracepoint:

     trace_mm_anon_pgin(mm, address, page_to_pfn(page));

The compiler generates the code to be executed for the page_to_pfn()
before testing whether the tracepoint is enabled.  Since there is a path
in do_swap_page() that has a NULL page at that tracepoint(OOM path), the
system crashed in page_to_pfn(NULL).  This is easy to fix but since this
is happening, there is overhead to these tracepoints whenever we pass
anything but raw data to the tracepoint regardless of whether the
tracepoint is enabled or not.

To fix this problem I passes only the raw page pointer the the
tracepoint and will change the system tap script to deference the page
and/or get the PFN.

Fixes BZ  510013

diff --git a/include/trace/mm.h b/include/trace/mm.h
index e18fa8b..7b949bc 100644
--- a/include/trace/mm.h
+++ b/include/trace/mm.h
@@ -8,48 +8,48 @@
 #include <linux/mm.h>
 
 DEFINE_TRACE(mm_anon_fault,
-	TPPROTO(struct mm_struct *mm, unsigned long address, unsigned long pfn),
-	TPARGS(mm, address, pfn));
+	TPPROTO(struct mm_struct *mm, unsigned long address, struct page *page),
+	TPARGS(mm, address, page));
 
 DEFINE_TRACE(mm_anon_pgin,
-	TPPROTO(struct mm_struct *mm, unsigned long address, unsigned long pfn),
-	TPARGS(mm, address, pfn));
+	TPPROTO(struct mm_struct *mm, unsigned long address, struct page *page),
+	TPARGS(mm, address, page));
 
 DEFINE_TRACE(mm_anon_cow,
-	TPPROTO(struct mm_struct *mm, unsigned long address, unsigned long pfn),
-	TPARGS(mm, address, pfn));
+	TPPROTO(struct mm_struct *mm, unsigned long address, struct page *page),
+	TPARGS(mm, address, page));
 
 DEFINE_TRACE(mm_anon_userfree,
-	TPPROTO(struct mm_struct *mm, unsigned long address, unsigned long pfn),
-	TPARGS(mm, address, pfn));
+	TPPROTO(struct mm_struct *mm, unsigned long address, struct page *page),
+	TPARGS(mm, address, page));
 
 DEFINE_TRACE(mm_anon_unmap,
-	TPPROTO(unsigned long pfn, int success),
-	TPARGS(pfn, success));
+	TPPROTO(struct page *page, int success),
+	TPARGS(page, success));
 
 DEFINE_TRACE(mm_filemap_fault,
-	TPPROTO(struct mm_struct *mm, unsigned long address, unsigned long pfn),
-	TPARGS(mm, address, pfn));
+	TPPROTO(struct mm_struct *mm, unsigned long address, struct page *page),
+	TPARGS(mm, address, page));
 
 DEFINE_TRACE(mm_filemap_cow,
-	TPPROTO(struct mm_struct *mm, unsigned long address, unsigned long pfn),
-	TPARGS(mm, address, pfn));
+	TPPROTO(struct mm_struct *mm, unsigned long address, struct page *page),
+	TPARGS(mm, address, page));
 
 DEFINE_TRACE(mm_filemap_unmap,
-	TPPROTO(unsigned long pfn, int success),
-	TPARGS(pfn, success));
+	TPPROTO(struct page *page, int success),
+	TPARGS(page, success));
 
 DEFINE_TRACE(mm_filemap_userunmap,
-	TPPROTO(struct mm_struct *mm, unsigned long address, unsigned long pfn),
-	TPARGS(mm, address, pfn));
+	TPPROTO(struct mm_struct *mm, unsigned long address, struct page *page),
+	TPARGS(mm, address, page));
 
 DEFINE_TRACE(mm_pagereclaim_pgout,
-	TPPROTO(unsigned long pfn, int anon),
-	TPARGS(pfn, anon));
+	TPPROTO(struct page *page, int anon),
+	TPARGS(page, anon));
 
 DEFINE_TRACE(mm_pagereclaim_free,
-	TPPROTO(unsigned long pfn, int anon),
-	TPARGS(pfn, anon));
+	TPPROTO(struct page *page, int anon),
+	TPARGS(page, anon));
 
 DEFINE_TRACE(mm_pdflush_bgwriteout,
 	TPPROTO(unsigned long count),
@@ -60,8 +60,8 @@ DEFINE_TRACE(mm_pdflush_kupdate,
 	TPARGS(count));
 
 DEFINE_TRACE(mm_page_allocation,
-	TPPROTO(unsigned long pfn, unsigned long free),
-	TPARGS(pfn, free));
+	TPPROTO(struct page *page, unsigned long free),
+	TPARGS(page, free));
 
 DEFINE_TRACE(mm_kswapd_runs,
 	TPPROTO(unsigned long reclaimed),
@@ -84,28 +84,28 @@ DEFINE_TRACE(mm_pagereclaim_shrinkactive,
 	TPARGS(scanned));
 
 DEFINE_TRACE(mm_pagereclaim_shrinkactive_a2a,
-	TPPROTO(unsigned long pfn),
-	TPARGS(pfn));
+	TPPROTO(struct page *page),
+	TPARGS(page));
 
 DEFINE_TRACE(mm_pagereclaim_shrinkactive_a2i,
-	TPPROTO(unsigned long pfn),
-	TPARGS(pfn));
+	TPPROTO(struct page *page),
+	TPARGS(page));
 
 DEFINE_TRACE(mm_pagereclaim_shrinkinactive,
 	TPPROTO(unsigned long reclaimed),
 	TPARGS(reclaimed));
 
 DEFINE_TRACE(mm_pagereclaim_shrinkinactive_i2a,
-	TPPROTO(unsigned long pfn),
-	TPARGS(pfn));
+	TPPROTO(struct page *page),
+	TPARGS(page));
 
 DEFINE_TRACE(mm_pagereclaim_shrinkinactive_i2i,
-	TPPROTO(unsigned long pfn),
-	TPARGS(pfn));
+	TPPROTO(struct page *page),
+	TPARGS(page));
 
 DEFINE_TRACE(mm_page_free,
-	TPPROTO(unsigned long pfn),
-	TPARGS(pfn));
+	TPPROTO(struct page *page),
+	TPARGS(page));
 
 #undef TRACE_SYSTEM
 #endif
diff --git a/mm/filemap.c b/mm/filemap.c
index f899d86..87900f6 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -1503,7 +1503,7 @@ success:
 	mark_page_accessed(page);
 	if (type)
 		*type = majmin;
-	trace_mm_filemap_fault(area->vm_mm, address, page_to_pfn(page));
+	trace_mm_filemap_fault(area->vm_mm, address, page);
 	return page;
 
 outside_data_content:
diff --git a/mm/memory.c b/mm/memory.c
index 9332b31..527b06a 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -830,16 +830,14 @@ static unsigned long zap_pte_range(struct mmu_gather *tlb,
 					   pgoff_to_pte(page->index));
 			if (PageAnon(page)) {
 				anon_rss--;
-				trace_mm_anon_userfree(mm, addr,
-							page_to_pfn(page));
+				trace_mm_anon_userfree(mm, addr, page);
 			} else {
 				if (pte_dirty(ptent))
 					set_page_dirty(page);
 				if (pte_young(ptent))
 					SetPageReferenced(page);
 				file_rss--;
-				trace_mm_filemap_userunmap(mm, addr,
-							page_to_pfn(page));
+				trace_mm_filemap_userunmap(mm, addr, page);
 			}
 			page_remove_rmap(page);
 			tlb_remove_page(tlb, page);
@@ -2048,12 +2046,11 @@ gotten:
 			if (!PageAnon(old_page)) {
 				dec_mm_counter(mm, file_rss);
 				inc_mm_counter(mm, anon_rss);
-				trace_mm_filemap_cow(mm, address,
-						page_to_pfn(new_page));
+				trace_mm_filemap_cow(mm, address, new_page);
 			}
 		} else {
 			inc_mm_counter(mm, anon_rss);
-			trace_mm_anon_cow(mm, address, page_to_pfn(new_page));
+			trace_mm_anon_cow(mm, address, new_page);
 		}
 		flush_cache_page(vma, address, pte_pfn(orig_pte));
 		entry = mk_pte(new_page, vma->vm_page_prot);
@@ -2526,7 +2523,7 @@ static int do_swap_page(struct mm_struct *mm, struct vm_area_struct *vma,
 unlock:
 	pte_unmap_unlock(page_table, ptl);
 out:
-	trace_mm_anon_pgin(mm, address, page_to_pfn(page));
+	trace_mm_anon_pgin(mm, address, page);
 	return ret;
 out_nomap:
 	pte_unmap_unlock(page_table, ptl);
@@ -2588,7 +2585,7 @@ static int do_anonymous_page(struct mm_struct *mm, struct vm_area_struct *vma,
 	lazy_mmu_prot_update(entry);
 unlock:
 	pte_unmap_unlock(page_table, ptl);
-	trace_mm_anon_fault(mm, address, page_to_pfn(page));
+	trace_mm_anon_fault(mm, address, page);
 	return VM_FAULT_MINOR;
 release:
 	page_cache_release(page);
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 4bbf86b..2cebaec 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -746,7 +746,7 @@ static void fastcall free_hot_cold_page(struct page *page, int cold)
 	if (free_pages_check(page))
 		return;
 
-	trace_mm_page_free(page_to_pfn(page));
+	trace_mm_page_free(page);
 	kernel_map_pages(page, 1, 0);
 
 	pcp = &zone_pcp(zone, get_cpu())->pcp[cold];
@@ -836,7 +836,7 @@ again:
 	BUG_ON(bad_range(zone, page));
 	if (prep_new_page(page, order, gfp_flags))
 		goto again;
-	trace_mm_page_allocation(page_to_pfn(page), zone->free_pages);
+	trace_mm_page_allocation(page, zone->free_pages);
 	return page;
 
 failed:
diff --git a/mm/rmap.c b/mm/rmap.c
index 12bb716..3c53315 100644
--- a/mm/rmap.c
+++ b/mm/rmap.c
@@ -809,7 +809,7 @@ static int try_to_unmap_anon(struct page *page, int migration)
 			break;
 	}
 	spin_unlock(&anon_vma->lock);
-	trace_mm_anon_unmap(page_to_pfn(page), ret == SWAP_SUCCESS);
+	trace_mm_anon_unmap(page, ret == SWAP_SUCCESS);
 	return ret;
 }
 
@@ -906,7 +906,7 @@ static int try_to_unmap_file(struct page *page, int migration)
 		vma->vm_private_data = NULL;
 out:
 	spin_unlock(&mapping->i_mmap_lock);
-	trace_mm_filemap_unmap(page_to_pfn(page), ret == SWAP_SUCCESS);
+	trace_mm_filemap_unmap(page, ret == SWAP_SUCCESS);
 	return ret;
 }
 
diff --git a/mm/vmscan.c b/mm/vmscan.c
index 8a11dd8..c137e8a 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -375,7 +375,7 @@ static pageout_t pageout(struct page *page, struct address_space *mapping)
 			ClearPageReclaim(page);
 		}
 
-		trace_mm_pagereclaim_pgout(page_to_pfn(page), PageAnon(page));
+		trace_mm_pagereclaim_pgout(page, PageAnon(page));
 		return PAGE_SUCCESS;
 	}
 
@@ -562,18 +562,18 @@ free_it:
 		nr_reclaimed++;
 		if (!pagevec_add(&freed_pvec, page))
 			__pagevec_release_nonlru(&freed_pvec);
-		trace_mm_pagereclaim_free(page_to_pfn(page), PageAnon(page));
+		trace_mm_pagereclaim_free(page, PageAnon(page));
 		continue;
 
 activate_locked:
 		SetPageActive(page);
 		pgactivate++;
-		trace_mm_pagereclaim_shrinkinactive_i2a(page_to_pfn(page));
+		trace_mm_pagereclaim_shrinkinactive_i2a(page);
 keep_locked:
 		unlock_page(page);
 keep:
 		list_add(&page->lru, &ret_pages);
-		trace_mm_pagereclaim_shrinkinactive_i2i(page_to_pfn(page));
+		trace_mm_pagereclaim_shrinkinactive_i2i(page);
 		BUG_ON(PageLRU(page));
 	}
 	list_splice(&ret_pages, page_list);
@@ -819,7 +819,7 @@ force_reclaim_mapped:
 			    (total_swap_pages == 0 && PageAnon(page)) ||
 			    page_referenced(page, 0)) {
 				list_add(&page->lru, &l_active);
-				trace_mm_pagereclaim_shrinkactive_a2a(page_to_pfn(page));
+				trace_mm_pagereclaim_shrinkactive_a2a(page);
 				continue;
 			}
 		}
@@ -839,7 +839,7 @@ force_reclaim_mapped:
 
 		list_move(&page->lru, &zone->inactive_list);
 		pgmoved++;
-		trace_mm_pagereclaim_shrinkactive_a2i(page_to_pfn(page));
+		trace_mm_pagereclaim_shrinkactive_a2i(page);
 		if (!pagevec_add(&pvec, page)) {
 			zone->nr_inactive += pgmoved;
 			spin_unlock_irq(&zone->lru_lock);