Sophie

Sophie

distrib > Scientific%20Linux > 5x > x86_64 > by-pkgid > 27922b4260f65d317aabda37e42bbbff > files > 1854

kernel-2.6.18-238.el5.src.rpm

From: Prarit Bhargava <prarit@redhat.com>
Date: Fri, 9 Nov 2007 11:19:27 -0500
Subject: [misc] backport upstream softlockup_tick code
Message-id: 4734888F.7080101@redhat.com
O-Subject: Re: [RHEL 5 PATCH 3/4]: Backport upstream softlockup_tick() code
Bugzilla: 367251

- Upstream softlockup code uses ns timer, RHEL5 still uses jiffies.
- Upstream code uses CTL_UNUMBERED -- in RHEL5 I've introduced
  KERN_SOFTLOCKUP_THRESH

Acked-by: Doug Chapman <dchapman@redhat.com>

diff --git a/Documentation/sysctl/kernel.txt b/Documentation/sysctl/kernel.txt
index 89bf8c2..3cb3334 100644
--- a/Documentation/sysctl/kernel.txt
+++ b/Documentation/sysctl/kernel.txt
@@ -309,6 +309,14 @@ kernel.  This value defaults to SHMMAX.
 
 ==============================================================
 
+softlockup_thresh:
+
+This value can be used to lower the softlockup tolerance
+threshold. The default threshold is 10s.  If a cpu is locked up
+for 10s, the kernel complains.  Valid values are 1-60s.
+
+==============================================================
+
 tainted: 
 
 Non-zero if the kernel has been tainted.  Numeric values, which
diff --git a/include/linux/sysctl.h b/include/linux/sysctl.h
index 98bc8c9..0748a18 100644
--- a/include/linux/sysctl.h
+++ b/include/linux/sysctl.h
@@ -155,6 +155,7 @@ enum
 	KERN_MAX_LOCK_DEPTH=74,
 	KERN_KDUMP_ON_INIT=75,	/* int: ia64 kdump with INIT */
  	KERN_PANIC_ON_NMI=76, /* int: whether we will panic on an unrecovered */
+	KERN_SOFTLOCKUP_THRESH=80, /* int: min time to report softlockups */
 };
 
 
diff --git a/kernel/softlockup.c b/kernel/softlockup.c
index c55ef63..e35a03b 100644
--- a/kernel/softlockup.c
+++ b/kernel/softlockup.c
@@ -13,6 +13,7 @@
 #include <linux/kthread.h>
 #include <linux/notifier.h>
 #include <linux/module.h>
+#include <linux/sched.h>
 
 static DEFINE_SPINLOCK(print_lock);
 
@@ -21,6 +22,7 @@ static DEFINE_PER_CPU(unsigned long, print_timestamp);
 static DEFINE_PER_CPU(struct task_struct *, watchdog_task);
 
 static int did_panic = 0;
+int softlockup_thresh = 10;
 
 static int
 softlock_panic(struct notifier_block *this, unsigned long event, void *ptr)
@@ -54,37 +56,48 @@ EXPORT_SYMBOL(touch_all_softlockup_watchdogs);
  * This callback runs from the timer interrupt, and checks
  * whether the watchdog thread has hung or not:
  */
-void softlockup_tick(void)
+void softlockup_tick(struct pt_regs *regs)
 {
 	int this_cpu = smp_processor_id();
 	unsigned long touch_timestamp = per_cpu(touch_timestamp, this_cpu);
+	unsigned long print_timestamp;
+	unsigned long now;
 
-	/* prevent double reports: */
-	if (per_cpu(print_timestamp, this_cpu) == touch_timestamp ||
-		did_panic ||
-			!per_cpu(watchdog_task, this_cpu))
+	if (touch_timestamp == 0) {
+		touch_softlockup_watchdog();
 		return;
+	}
 
-	/* do not print during early bootup: */
-	if (unlikely(system_state != SYSTEM_RUNNING)) {
-		touch_softlockup_watchdog();
+	print_timestamp = per_cpu(print_timestamp, this_cpu);
+	/* report at most once a second */
+	if (time_after_eq(print_timestamp, touch_timestamp) &&
+	    time_before(print_timestamp, touch_timestamp + HZ) ||
+ 	    did_panic || !per_cpu(watchdog_task, this_cpu)) {
 		return;
 	}
 
+	now = jiffies;
+
 	/* Wake up the high-prio watchdog task every second: */
-	if (time_after(jiffies, touch_timestamp + HZ))
+	if (time_after(now, touch_timestamp + HZ))
 		wake_up_process(per_cpu(watchdog_task, this_cpu));
 
 	/* Warn about unreasonable 10+ seconds delays: */
-	if (time_after(jiffies, touch_timestamp + 10*HZ)) {
-		per_cpu(print_timestamp, this_cpu) = touch_timestamp;
+	if (time_before(now, touch_timestamp + softlockup_thresh*HZ))
+		return;
+
+	per_cpu(print_timestamp, this_cpu) = touch_timestamp;
 
-		spin_lock(&print_lock);
-		printk(KERN_ERR "BUG: soft lockup detected on CPU#%d!\n",
-			this_cpu);
+	spin_lock(&print_lock);
+	printk(KERN_ERR "BUG: soft lockup - CPU#%d stuck for %lus! [%s:%d]\n",
+	       this_cpu, (now - touch_timestamp)/HZ,
+	       current->comm, current->pid);
+	if (regs)
+		show_regs(regs);
+	else
 		dump_stack();
-		spin_unlock(&print_lock);
-	}
+	spin_unlock(&print_lock);
+
 }
 
 /*
diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index 07833ea..9a1bcc5 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -105,6 +105,15 @@ static int __init setup_exec_shield(char *str)
 
 __setup("exec-shield=", setup_exec_shield);
 
+/* Constants used for minimum and  maximum */
+#ifdef CONFIG_DETECT_SOFTLOCKUP
+static int one = 1;
+static int sixty = 60;
+#endif
+
+static int zero;
+static int one_hundred = 100;
+
 /* this is needed for the proc_dointvec_minmax for [fs_]overflow UID and GID */
 static int maxolduid = 65535;
 static int minolduid;
@@ -748,6 +757,19 @@ static ctl_table kern_table[] = {
 		.proc_handler	= &proc_dointvec,
 	},
 #endif
+#ifdef CONFIG_DETECT_SOFTLOCKUP
+	{
+		.ctl_name	= KERN_SOFTLOCKUP_THRESH,
+		.procname	= "softlockup_thresh",
+		.data		= &softlockup_thresh,
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= &proc_dointvec_minmax,
+		.strategy	= &sysctl_intvec,
+		.extra1		= &one,
+		.extra2		= &sixty,
+	},
+#endif
 #ifdef CONFIG_COMPAT
 	{
 		.ctl_name	= KERN_COMPAT_LOG,
@@ -772,12 +794,6 @@ static ctl_table kern_table[] = {
 	{ .ctl_name = 0 }
 };
 
-/* Constants for minimum and maximum testing in vm_table.
-   We use these as one-element integer vectors. */
-static int zero;
-static int one_hundred = 100;
-
-
 static ctl_table vm_table[] = {
 	{
 		.ctl_name	= VM_OVERCOMMIT_MEMORY,