Sophie

Sophie

distrib > Scientific%20Linux > 5x > x86_64 > by-pkgid > 89877e42827f16fa5f86b1df0c2860b1 > files > 1840

kernel-2.6.18-128.1.10.el5.src.rpm

From: Scott Moser <smoser@redhat.com>
Subject: [RHEL5.1 PATCH] bz242550 [PPC] Fix 64K pages with kexec on native hash table
Date: Tue, 10 Jul 2007 11:58:27 -0400 (EDT)
Bugzilla: 242550
Message-Id: <Pine.LNX.4.64.0707101154260.18157@squad5-lp1.lab.boston.redhat.com>
Changelog: [PPC] Fix 64K pages with kexec on native hash table


RHBZ#: 242550
------
https://bugzilla.redhat.com/bugzilla/show_bug.cgi?id=242550

Description:
------------

The code for clearning the MMU hash table and TLBs on kexec for
"native" (non-hypervisor) hash tables would only work properly
with 4K base page sizes. It lacked the ability to properly
extract the page size from the hashed PTEs and pass it to tlbie
for proper TLB invalidations, thus causing kexec/dump to crash.

RHEL Version Found:
-------------------
bug against 5.1

Upstream Status:
----------------
This code is present in 2.6.22.  Relevant git-commits are:
 71bf08b6c083df4ee97874d895f911529f4150dd
 2454c7e98c0dd0aff29dfe1ee369801507f4d6a5

Test Status:
------------
To ensure cross platform build, a brew scratch build this patch on top of 2.6.18-32.el5 has been done at [1]

These changes have been tested by IBM cell developers via their cell test
suite.

Proposed Patch:
----------------
Please review and ACK for RHEL5.1

---
 arch/powerpc/mm/hash_native_64.c |   79 ++++++++++++++++++++++++++++-----------
 include/asm-powerpc/mmu.h        |   10 ++++
 2 files changed, 68 insertions(+), 21 deletions(-)

Index: b/arch/powerpc/mm/hash_native_64.c
===================================================================
--- a/arch/powerpc/mm/hash_native_64.c
+++ b/arch/powerpc/mm/hash_native_64.c
@@ -26,6 +26,7 @@
 #include <asm/tlb.h>
 #include <asm/cputable.h>
 #include <asm/udbg.h>
+#include <asm/kexec.h>
 
 #ifdef DEBUG_LOW
 #define DBG_LOW(fmt...) udbg_printf(fmt)
@@ -340,31 +341,67 @@ static void native_hpte_invalidate(unsig
 	local_irq_restore(flags);
 }
 
-/*
- * XXX This need fixing based on page size. It's only used by
- * native_hpte_clear() for now which needs fixing too so they
- * make a good pair...
- */
-static unsigned long slot2va(unsigned long hpte_v, unsigned long slot)
-{
-	unsigned long avpn = HPTE_V_AVPN_VAL(hpte_v);
-	unsigned long va;
+#define LP_SHIFT	12
+#define LP_BITS		8
+#define LP_MASK(i)	((0xFF >> (i)) << LP_SHIFT)
+
+static void hpte_decode(hpte_t *hpte, unsigned long slot,
+			int *psize, unsigned long *va)
+{
+	unsigned long hpte_r = hpte->r;
+	unsigned long hpte_v = hpte->v;
+	unsigned long avpn;
+	int i, size, shift, penc;
+
+	if (!(hpte_v & HPTE_V_LARGE))
+		size = MMU_PAGE_4K;
+	else {
+		for (i = 0; i < LP_BITS; i++) {
+			if ((hpte_r & LP_MASK(i+1)) == LP_MASK(i+1))
+				break;
+		}
+		penc = LP_MASK(i+1) >> LP_SHIFT;
+		for (size = 0; size < MMU_PAGE_COUNT; size++) {
 
-	va = avpn << 23;
+			/* 4K pages are not represented by LP */
+			if (size == MMU_PAGE_4K)
+				continue;
+
+			/* valid entries have a shift value */
+			if (!mmu_psize_defs[size].shift)
+				continue;
+
+			if (penc == mmu_psize_defs[size].penc)
+				break;
+		}
+	}
 
-	if (! (hpte_v & HPTE_V_LARGE)) {
-		unsigned long vpi, pteg;
+	/* This works for all page sizes, and for 256M and 1T segments */
+	shift = mmu_psize_defs[size].shift;
+	avpn = (HPTE_V_AVPN_VAL(hpte_v) & ~mmu_psize_defs[size].avpnm) << 23;
+
+	if (shift < 23) {
+		unsigned long vpi, vsid, pteg;
 
 		pteg = slot / HPTES_PER_GROUP;
 		if (hpte_v & HPTE_V_SECONDARY)
 			pteg = ~pteg;
-
-		vpi = ((va >> 28) ^ pteg) & htab_hash_mask;
-
-		va |= vpi << PAGE_SHIFT;
+		switch (hpte_v >> HPTE_V_SSIZE_SHIFT) {
+		case MMU_SEGSIZE_256M:
+			vpi = ((avpn >> 28) ^ pteg) & htab_hash_mask;
+			break;
+		case MMU_SEGSIZE_1T:
+			vsid = avpn >> 40;
+			vpi = (vsid ^ (vsid << 25) ^ pteg) & htab_hash_mask;
+			break;
+		default:
+			avpn = vpi = size = 0;
+		}
+		avpn |= (vpi << mmu_psize_defs[size].shift);
 	}
 
-	return va;
+	*va = avpn;
+	*psize = size;
 }
 
 /*
@@ -374,15 +411,14 @@ static unsigned long slot2va(unsigned lo
  *
  * TODO: add batching support when enabled.  remember, no dynamic memory here,
  * athough there is the control page available...
- *
- * XXX FIXME: 4k only for now !
  */
 static void native_hpte_clear(void)
 {
 	unsigned long slot, slots, flags;
 	hpte_t *hptep = htab_address;
-	unsigned long hpte_v;
+	unsigned long hpte_v, va;
 	unsigned long pteg_count;
+	int psize;
 
 	pteg_count = htab_hash_mask + 1;
 
@@ -408,8 +444,9 @@ static void native_hpte_clear(void)
 		 * already hold the native_tlbie_lock.
 		 */
 		if (hpte_v & HPTE_V_VALID) {
+			hpte_decode(hptep, slot, &psize, &va);
 			hptep->v = 0;
-			__tlbie(slot2va(hpte_v, slot), MMU_PAGE_4K);
+			__tlbie(va, psize);
 		}
 	}
 
Index: b/include/asm-powerpc/mmu.h
===================================================================
--- a/include/asm-powerpc/mmu.h
+++ b/include/asm-powerpc/mmu.h
@@ -79,6 +79,7 @@ extern char initial_stab[];
 
 #define HPTES_PER_GROUP 8
 
+#define HPTE_V_SSIZE_SHIFT      62
 #define HPTE_V_AVPN_SHIFT	7
 #define HPTE_V_AVPN		ASM_CONST(0xffffffffffffff80)
 #define HPTE_V_AVPN_VAL(x)	(((x) & HPTE_V_AVPN) >> HPTE_V_AVPN_SHIFT)
@@ -157,6 +158,15 @@ struct mmu_psize_def
 #define MMU_PAGE_16G		5	/* 16G */
 #define MMU_PAGE_COUNT		6
 
+/*
+ * Segment sizes.
+ * These are the values used by hardware in the B field of
+ * SLB entries and the first dword of MMU hashtable entries.
+ * The B field is 2 bits; the values 2 and 3 are unused and reserved.
+ */
+#define MMU_SEGSIZE_256M        0
+#define MMU_SEGSIZE_1T		1
+
 #ifndef __ASSEMBLY__
 
 /*