Sophie

Sophie

distrib > Scientific%20Linux > 5x > x86_64 > by-pkgid > 27922b4260f65d317aabda37e42bbbff > files > 1482

kernel-2.6.18-238.el5.src.rpm

From: George Beshers <gbeshers@redhat.com>
Date: Thu, 31 Jul 2008 15:32:48 -0400
Subject: [IA64] handle TLB errors from duplicate itr.d dropins
Message-id: 20080731192653.4411.5155.sendpatchset@dhcp-100-2-194.bos.redhat.com
O-Subject: [RHEL5.3 PATCH 2/19] [IA64] Proper handling of TLB errors from duplicate itr.d dropins
Bugzilla: 455308
RH-Acked-by: Prarit Bhargava <prarit@redhat.com>

[patch] Proper handling of TLB errors from duplicate itr.d dropins

BZ#455308

Upstream: http://git.kernel.org/?p=linux/kernel/git/aegl/linux-2.6.git;a=commitdiff;h=618b206f0b580d965eb26f704ed23beee2a8c25d

Jack Steiner noticed that duplicate TLB DTC entries do not cause a
linux panic.  See discussion:

http://www.gelato.unsw.edu.au/archives/linux-ia64/0307/6108.html

The current TLB recovery code is recovering from the duplicate itr.d
dropins, masking the underlying problem.  This change modifies
the MCA recovery code to look for the TLB check signature of the
duplicate TLB entry and panic in that case.

Signed-off-by: Russ Anderson (rja@sgi.com)
Acked-by: Raymund Will <ray@suse.de>

diff --git a/arch/ia64/kernel/mca.c b/arch/ia64/kernel/mca.c
index a4f8a96..35392ad 100644
--- a/arch/ia64/kernel/mca.c
+++ b/arch/ia64/kernel/mca.c
@@ -1192,8 +1192,6 @@ void
 ia64_mca_handler(struct pt_regs *regs, struct switch_stack *sw,
 		 struct ia64_sal_os_state *sos)
 {
-	pal_processor_state_info_t *psp = (pal_processor_state_info_t *)
-		&sos->proc_state_param;
 	int recover, cpu = smp_processor_id();
 	struct task_struct *previous_current;
 	struct ia64_mca_notify_die nd =
@@ -1223,10 +1221,8 @@ ia64_mca_handler(struct pt_regs *regs, struct switch_stack *sw,
 	/* Get the MCA error record and log it */
 	ia64_mca_log_sal_error_record(SAL_INFO_TYPE_MCA);
 
-	/* TLB error is only exist in this SAL error record */
-	recover = (psp->tc && !(psp->cc || psp->bc || psp->rc || psp->uc))
-	/* other error recovery */
-	   || (ia64_mca_ucmc_extension
+	/* MCA error recovery */
+	recover = (ia64_mca_ucmc_extension
 		&& ia64_mca_ucmc_extension(
 			IA64_LOG_CURR_BUFFER(SAL_INFO_TYPE_MCA),
 			sos));
diff --git a/arch/ia64/kernel/mca_drv.c b/arch/ia64/kernel/mca_drv.c
index afc1403..c216423 100644
--- a/arch/ia64/kernel/mca_drv.c
+++ b/arch/ia64/kernel/mca_drv.c
@@ -607,6 +607,33 @@ recover_from_platform_error(slidx_table_t *slidx, peidx_table_t *peidx,
 	return status;
 }
 
+/*
+ * recover_from_tlb_check
+ * @peidx:     pointer of index of processor error section
+ *
+ * Return value:
+ *     1 on Success / 0 on Failure
+ */
+static int
+recover_from_tlb_check(peidx_table_t *peidx)
+{
+	sal_log_mod_error_info_t *smei;
+	pal_tlb_check_info_t *ptci;
+
+	smei = (sal_log_mod_error_info_t *)peidx_tlb_check(peidx, 0);
+	ptci = (pal_tlb_check_info_t *)&(smei->check_info);
+
+	/*
+	 * Look for signature of a duplicate TLB DTC entry, which is
+	 * a SW bug and always fatal.
+	 */
+	if (ptci->op == PAL_TLB_CHECK_OP_PURGE
+	    && !(ptci->itr || ptci->dtc || ptci->itc))
+		return fatal_mca("Duplicate TLB entry");
+
+	return mca_recovered("TLB check recovered");
+}
+
 /**
  * recover_from_processor_error
  * @platform:	whether there are some platform error section or not
@@ -652,6 +679,12 @@ recover_from_processor_error(int platform, slidx_table_t *slidx,
 		return fatal_mca("error not contained");
 
 	/*
+	 * Look for recoverable TLB check
+	 */
+	if (psp->tc && !(psp->cc || psp->bc || psp->rc || psp->uc))
+		return recover_from_tlb_check(peidx);
+
+	/*
 	 * The cache check and bus check bits have four possible states
 	 *   cc bc
 	 *    0  0	Weird record, not recovered
diff --git a/include/asm-ia64/pal.h b/include/asm-ia64/pal.h
index 1d7880a..74f089e 100644
--- a/include/asm-ia64/pal.h
+++ b/include/asm-ia64/pal.h
@@ -343,6 +343,7 @@ typedef struct pal_cache_line_info_s {
 
 } pal_cache_line_info_t;
 
+#define PAL_TLB_CHECK_OP_PURGE			8
 
 /* Machine Check related crap */