Sophie

Sophie

distrib > Scientific%20Linux > 5x > x86_64 > by-pkgid > 27922b4260f65d317aabda37e42bbbff > files > 1493

kernel-2.6.18-238.el5.src.rpm

From: Neil Horman <nhorman@redhat.com>
Date: Mon, 9 Aug 2010 13:28:12 -0400
Subject: [ia64] kdump: prevent hang on INIT interrupt during boot
Message-id: <20100809132812.GA1838@hmsreliant.think-freely.org>
Patchwork-id: 27474
O-Subject: [RHEL5.6 PATCH] prevent kdump from hanging on ia64 if INIT interrupt
	is generated early in boot (bz 506694)
Bugzilla: 506694
RH-Acked-by: Dave Anderson <anderson@redhat.com>

Hey all-
	This is a backport of the following upstream commits:
0cced40e7c58b1105aef3ca446da7b158a18a9a6
5959906ee9dee602a46e49c868a7e543e050d605
1726b0883dd08636705ea55d577eb0ec314ba427
68cb14c7c46d9204ba451a534f15a8bc12c88e28
6cc3efcdf01cf874ffe770919395918a3ee9365b
07a6a4ae827b54cec4c1b1d92bed1cc9176b45ec
4295ab34883d2070b1145e14f4619478e9788807

Which break a deadlock in the redezvous code on ia64 that can occur if an INIT
NMI is received by the OS early during kdump boot up.  Tested successfully by
Fujitsu.

Neil

Signed-off-by: Jarod Wilson <jarod@redhat.com>

diff --git a/arch/ia64/kernel/crash.c b/arch/ia64/kernel/crash.c
index c50b9d5..1502849 100644
--- a/arch/ia64/kernel/crash.c
+++ b/arch/ia64/kernel/crash.c
@@ -25,6 +25,7 @@ int kdump_status[NR_CPUS];
 atomic_t kdump_cpu_freezed;
 int kdump_on_init = 1;
 atomic_t kdump_in_progress;
+static int kdump_freeze_monarch;
 
 ssize_t
 copy_oldmem_page(unsigned long pfn, char *buf,
@@ -126,13 +127,39 @@ machine_crash_shutdown(struct pt_regs *pt)
 	 */
 	kexec_disable_iosapic();
 #ifdef CONFIG_SMP
+	/*
+	 * If kdump_on_init is set and an INIT is asserted here, kdump will
+	 * be started again via INIT monarch.
+	 */
+	local_irq_disable();
+	ia64_set_psr_mc();	/* mask MCA/INIT */
+	if (atomic_inc_return(&kdump_in_progress) != 1)
+		unw_init_running(kdump_cpu_freeze, NULL);
+
+	/*
+	 * Now this cpu is ready for kdump.
+	 * Stop all others by IPI or INIT.  They could receive INIT from
+	 * outside and might be INIT monarch, but only thing they have to
+	 * do is falling into kdump_cpu_freeze().
+	 *
+	 * If an INIT is asserted here:
+	 * - All receivers might be slaves, since some of cpus could already
+	 *   be frozen and INIT might be masked on monarch.  In this case,
+	 *   all slaves will be frozen soon since kdump_in_progress will let
+	 *   them into DIE_INIT_SLAVE_LEAVE.
+	 * - One might be a monarch, but INIT rendezvous will fail since
+	 *   at least this cpu already have INIT masked so it never join
+	 *   to the rendezvous.  In this case, all slaves and monarch will
+	 *   be frozen soon with no wait since the INIT rendezvous is skipped
+	 *   by kdump_in_progress.
+	 */
 	kdump_smp_send_stop();
-	if (kdump_wait_cpu_freeze() && kdump_on_init) 	{
-		//not all cpu response to IPI, send INIT to freeze them
-		kdump_sending_init = 1;
-		mb();
+	if (kdump_wait_cpu_freeze()) {
 		kdump_smp_send_init();
+		/* wait again, don't go ahead if possible */
+		kdump_wait_cpu_freeze();
 	}
+
 #endif
 }
 
@@ -153,16 +180,12 @@ kdump_cpu_freeze(struct unw_frame_info *info, void *arg)
 	local_irq_disable();
 	crash_save_this_cpu();
 	current->thread.ksp = (__u64)info->sw - 16;
+	ia64_set_psr_mc();      /* mask MCA/INIT and stop reentrance */
 	atomic_inc(&kdump_cpu_freezed);
 	kdump_status[cpuid] = 1;
 	mb();
-        /* return cpus (except cpu0) to SAL slave loop */
-        if (cpuid == 0) {
-                for (;;)
-                        cpu_relax();
-        } else {
-                ia64_jump_to_sal(&sal_boot_rendez_state[cpuid]);
-        }
+	for (;;)
+		cpu_relax();
 }
 
 static int
@@ -171,6 +194,20 @@ kdump_init_notifier(struct notifier_block *self, unsigned long val, void *data)
 	struct ia64_mca_notify_die *nd;
 	struct die_args *args = data;
 
+	if (atomic_read(&kdump_in_progress)) {
+		switch (val) {
+		case DIE_INIT_MONARCH_LEAVE:
+			if (!kdump_freeze_monarch)
+				break;
+			/* fall through */
+		case DIE_INIT_SLAVE_LEAVE:
+		case DIE_INIT_MONARCH_ENTER:
+		case DIE_MCA_RENDZVOUS_LEAVE:
+			unw_init_running(kdump_cpu_freeze, NULL);
+			break;
+		}
+	}
+
 	if (!kdump_on_init)
 		return NOTIFY_DONE;
 
@@ -183,41 +220,32 @@ kdump_init_notifier(struct notifier_block *self, unsigned long val, void *data)
 	}
 
 	if (val != DIE_INIT_MONARCH_LEAVE &&
-	    val != DIE_INIT_SLAVE_LEAVE &&
 	    val != DIE_INIT_MONARCH_PROCESS &&
-	    val != DIE_MCA_RENDZVOUS_LEAVE &&
 	    val != DIE_MCA_MONARCH_LEAVE)
 		return NOTIFY_DONE;
 
 	nd = (struct ia64_mca_notify_die *)args->err;
-	/* Reason code 1 means machine check rendezous*/
-	if ((val == DIE_INIT_MONARCH_LEAVE || val == DIE_INIT_SLAVE_LEAVE
-	    || val == DIE_INIT_MONARCH_PROCESS) && nd->sos->rv_rc == 1)
-		return NOTIFY_DONE;
 
 	if (kdump_sending_init)
 		unw_init_running(kdump_cpu_freeze, NULL);
 
 	switch (val) {
 		case DIE_INIT_MONARCH_PROCESS:
-			atomic_set(&kdump_in_progress, 1);
-			*(nd->monarch_cpu) = -1;
+			/* Reason code 1 means machine check rendezvous*/
+			if (kdump_on_init && (nd->sos->rv_rc != 1)) {
+				if (atomic_inc_return(&kdump_in_progress) != 1)
+					kdump_freeze_monarch = 1;
+			}
 			break;
 		case DIE_INIT_MONARCH_LEAVE:
-			machine_kdump_on_init();
-			break;
-		case DIE_INIT_SLAVE_LEAVE:
-			if (atomic_read(&kdump_in_progress))
-				unw_init_running(kdump_cpu_freeze, NULL);
-			break;
-		case DIE_MCA_RENDZVOUS_LEAVE:
-			if (atomic_read(&kdump_in_progress))
-				unw_init_running(kdump_cpu_freeze, NULL);
+			/* Reason code 1 means machine check rendezvous*/
+			if (kdump_on_init && (nd->sos->rv_rc != 1))
+				machine_kdump_on_init();
 			break;
 		case DIE_MCA_MONARCH_LEAVE:
-			/* die_register->signr indicate if MCA is recoverable */
-			if (!args->signr)
+			if (atomic_read(&kdump_in_progress) == 1)
 				machine_kdump_on_init();
+			/* We got fatal MCA while kdump!? No way!! */
 			break;
 	}
 	return NOTIFY_DONE;
diff --git a/arch/ia64/kernel/head.S b/arch/ia64/kernel/head.S
index bd9d2da..0e9a617 100644
--- a/arch/ia64/kernel/head.S
+++ b/arch/ia64/kernel/head.S
@@ -1154,7 +1154,7 @@ GLOBAL_ENTRY(ia64_jump_to_sal)
 	movl r16=SAL_PSR_BITS_TO_SET;;
 	mov cr.ipsr=r16
 	mov cr.ifs=r0;;
-	rfi;;
+	rfi;;			// note: this unmask MCA/INIT (psr.mc)
 1:
 	/*
 	 * Invalidate all TLB data/inst
diff --git a/arch/ia64/kernel/machine_kexec.c b/arch/ia64/kernel/machine_kexec.c
index 30338e5..eaca026 100644
--- a/arch/ia64/kernel/machine_kexec.c
+++ b/arch/ia64/kernel/machine_kexec.c
@@ -22,6 +22,8 @@
 #include <asm/processor.h>
 #include <linux/numa.h>
 #include <linux/mmzone.h>
+#include <asm/sal.h>
+#include <asm/mca.h>
 
 typedef void (*relocate_new_kernel_t)(unsigned long, unsigned long,
 		struct ia64_boot_param *, unsigned long);
@@ -99,13 +101,26 @@ static void ia64_machine_kexec(struct unw_frame_info *info, void *arg)
 	unsigned long code_addr = (unsigned long)page_address(image->control_code_page);
 	unsigned long vector;
 	int ii;
+	u64 fp, gp;
+	ia64_fptr_t *init_handler = (ia64_fptr_t *)ia64_os_init_on_kdump;
 
 	BUG_ON(!image);
 	if (image->type == KEXEC_TYPE_CRASH) {
 		crash_save_this_cpu();
 		current->thread.ksp = (__u64)info->sw - 16;
+
+		/* Register noop init handler */
+		fp = ia64_tpa(init_handler->fp);
+		gp = ia64_tpa(ia64_getreg(_IA64_REG_GP));
+		ia64_sal_set_vectors(SAL_VECTOR_OS_INIT, fp, gp, 0, fp, gp, 0);
+	} else {
+		/* Unregister init handlers of current kernel */
+		ia64_sal_set_vectors(SAL_VECTOR_OS_INIT, 0, 0, 0, 0, 0, 0);
 	}
 
+	/* Unregister mca handler - No more recovery on current kernel */
+	ia64_sal_set_vectors(SAL_VECTOR_OS_MCA, 0, 0, 0, 0, 0, 0);
+
 	/* Interrupts aren't acceptable while we reboot */
 	local_irq_disable();
 
diff --git a/arch/ia64/kernel/mca.c b/arch/ia64/kernel/mca.c
index 07746ea..88a06d3 100644
--- a/arch/ia64/kernel/mca.c
+++ b/arch/ia64/kernel/mca.c
@@ -1266,7 +1266,7 @@ ia64_mca_handler(struct pt_regs *regs, struct switch_stack *sw,
 		/* Dump buffered message to console */
 		ia64_mlogbuf_finish(1);
 #ifdef CONFIG_KEXEC
-		atomic_set(&kdump_in_progress, 1);
+		atomic_inc(&kdump_in_progress);
 		/* In the case of (!recover), notify_die(DIE_MCA_MONARCH_LEAVE)
 		   will not return. A dump kernel will be booted. Need to set
 		   nonarch_cpu here to get slave cpus out of looping in OS.
@@ -1641,16 +1641,27 @@ ia64_init_handler(struct pt_regs *regs, struct switch_stack *sw,
 
 	if (!sos->monarch) {
 		ia64_mc_info.imi_rendez_checkin[cpu] = IA64_MCA_RENDEZ_CHECKIN_INIT;
+
+#ifdef CONFIG_KEXEC
+		while (monarch_cpu == -1 && !atomic_read(&kdump_in_progress))
+			udelay(1000);
+#else
 		while (monarch_cpu == -1)
 		       cpu_relax();	/* spin until monarch enters */
+#endif
 		if (notify_die(DIE_INIT_SLAVE_ENTER, "INIT", regs, (long)&nd, 0, 0)
 				== NOTIFY_STOP)
 			ia64_mca_spin(__FUNCTION__);
 		if (notify_die(DIE_INIT_SLAVE_PROCESS, "INIT", regs, (long)&nd, 0, 0)
 				== NOTIFY_STOP)
 			ia64_mca_spin(__FUNCTION__);
+#ifdef CONFIG_KEXEC
+		while (monarch_cpu != -1 && !atomic_read(&kdump_in_progress))
+			udelay(1000);
+#else
 		while (monarch_cpu != -1)
 		       cpu_relax();	/* spin until monarch leaves */
+#endif
 		if (notify_die(DIE_INIT_SLAVE_LEAVE, "INIT", regs, (long)&nd, 0, 0)
 				== NOTIFY_STOP)
 			ia64_mca_spin(__FUNCTION__);
diff --git a/arch/ia64/kernel/mca_asm.S b/arch/ia64/kernel/mca_asm.S
index c469ab5..8b06607 100644
--- a/arch/ia64/kernel/mca_asm.S
+++ b/arch/ia64/kernel/mca_asm.S
@@ -36,6 +36,7 @@
 
 	.global ia64_do_tlb_purge
 	.global ia64_os_mca_dispatch
+	.global ia64_os_init_on_kdump
 	.global ia64_os_init_dispatch_monarch
 	.global ia64_os_init_dispatch_slave
 
@@ -314,6 +315,25 @@ END(ia64_os_mca_virtual_begin)
 //StartMain////////////////////////////////////////////////////////////////////
 
 //
+// NOP init handler for kdump.  In panic situation, we may receive INIT
+// while kernel transition.  Since we initialize registers on leave from
+// current kernel, no longer monarch/slave handlers of current kernel in
+// virtual mode are called safely.
+// We can unregister these init handlers from SAL, however then the INIT
+// will result in warmboot by SAL and we cannot retrieve the crashdump.
+// Therefore register this NOP function to SAL, to prevent entering virtual
+// mode and resulting warmboot by SAL.
+//
+ia64_os_init_on_kdump:
+	mov		r8=r0		// IA64_INIT_RESUME
+	mov             r9=r10		// SAL_GP
+	mov		r22=r17		// *minstate
+	;;
+	mov		r10=r0		// return to same context
+	mov		b0=r12		// SAL_CHECK return address
+	br		b0
+
+//
 // SAL to OS entry point for INIT on all processors.  This has been defined for
 // registration purposes with SAL as a part of ia64_mca_init.  Monarch and
 // slave INIT have identical processing, except for the value of the
@@ -1089,3 +1109,30 @@ GLOBAL_ENTRY(ia64_get_rnat)
 	mov ar.rsc=3
 	br.ret.sptk.many rp
 END(ia64_get_rnat)
+
+
+// void ia64_set_psr_mc(void)
+//
+// Set psr.mc bit to mask MCA/INIT.
+GLOBAL_ENTRY(ia64_set_psr_mc)
+	rsm psr.i | psr.ic		// disable interrupts
+	;;
+	srlz.d
+	;;
+	mov r14 = psr			// get psr{36:35,31:0}
+	movl r15 = 1f
+	;;
+	dep r14 = -1, r14, PSR_MC, 1	// set psr.mc
+	;;
+	dep r14 = -1, r14, PSR_IC, 1	// set psr.ic
+	;;
+	dep r14 = -1, r14, PSR_BN, 1	// keep bank1 in use
+	;;
+	mov cr.ipsr = r14
+	mov cr.ifs = r0
+	mov cr.iip = r15
+	;;
+	rfi
+1:
+	br.ret.sptk.many rp
+END(ia64_set_psr_mc)
diff --git a/arch/ia64/kernel/relocate_kernel.S b/arch/ia64/kernel/relocate_kernel.S
index 5639960..e8d238d 100644
--- a/arch/ia64/kernel/relocate_kernel.S
+++ b/arch/ia64/kernel/relocate_kernel.S
@@ -54,7 +54,7 @@ GLOBAL_ENTRY(relocate_new_kernel)
 	srlz.i
 	;;
 	mov ar.rnat=r18
-	rfi
+	rfi				// note: this unmask MCA/INIT (psr.mc)
 	;;
 1:
 	//physical mode code begin
diff --git a/include/asm-ia64/mca.h b/include/asm-ia64/mca.h
index d8973ab..d2ed2ac 100644
--- a/include/asm-ia64/mca.h
+++ b/include/asm-ia64/mca.h
@@ -145,12 +145,14 @@ extern void ia64_mca_ucmc_handler(struct pt_regs *, struct ia64_sal_os_state *);
 extern void ia64_init_handler(struct pt_regs *,
 			      struct switch_stack *,
 			      struct ia64_sal_os_state *);
+extern void ia64_os_init_on_kdump(void);
 extern void ia64_monarch_init_handler(void);
 extern void ia64_slave_init_handler(void);
 extern void ia64_mca_cmc_vector_setup(void);
 extern int  ia64_reg_MCA_extension(int (*fn)(void *, struct ia64_sal_os_state *));
 extern void ia64_unreg_MCA_extension(void);
 extern u64 ia64_get_rnat(u64 *);
+extern void ia64_set_psr_mc(void);
 
 struct ia64_mca_notify_die {
 	struct ia64_sal_os_state *sos;