Sophie

Sophie

distrib > Scientific%20Linux > 5x > x86_64 > by-pkgid > fc11cd6e1c513a17304da94a5390f3cd > files > 4038

kernel-2.6.18-194.11.1.el5.src.rpm

From: Chris Lalancette <clalance@redhat.com>
Date: Fri, 20 Mar 2009 10:23:18 +0100
Subject: [x86] use cpu_khz for loops_per_jiffy calculation
Message-id: 49C36086.2090801@redhat.com
O-Subject: [RHEL5.4 PATCH 9/14]: x86: use cpu_khz for loops_per_jiffy calculation
Bugzilla: 463573
RH-Acked-by: Rik van Riel <riel@redhat.com>
RH-Acked-by: Justin M. Forbes <jforbes@redhat.com>

On the x86 platform we can use the value of tsc_khz computed during tsc
calibration to calculate the loops_per_jiffy value. Its very important
to keep the error in lpj values to minimum as any error in that may
result in kernel panic in check_timer. In virtualization environment, On
a highly overloaded host the guest delay calibration may sometimes
result in errors beyond the ~50% that timer_irq_works can handle,
resulting in the guest panicking.

Does some formating changes to lpj_setup code to now have a single
printk to print the bogomips value.

We do this only for the boot processor because the AP's can have
different base frequencies or the BIOS might boot a AP at a different
frequency.

This is actually 2 upstream commits:

3da757daf86e498872855f0b5e101f763ba79499
f3f3149f35b9195ef4b761b1353fc0766b5f53be

Fixes BZ 463573

diff --git a/arch/i386/kernel/tsc.c b/arch/i386/kernel/tsc.c
index 9789209..473c226 100644
--- a/arch/i386/kernel/tsc.c
+++ b/arch/i386/kernel/tsc.c
@@ -11,6 +11,7 @@
 #include <linux/init.h>
 #include <linux/dmi.h>
 #include <linux/acpi.h>
+#include <linux/delay.h>
 #include <asm/delay.h>
 #include <asm/tsc.h>
 #include <asm/delay.h>
@@ -205,6 +206,8 @@ EXPORT_SYMBOL(recalibrate_cpu_khz);
 
 void tsc_init(void)
 {
+	u64 lpj;
+
 	if (!cpu_has_tsc || tsc_disable)
 		return;
 
@@ -219,6 +222,11 @@ void tsc_init(void)
 				(unsigned long)cpu_khz % 1000);
 
 	set_cyc2ns_scale(cpu_khz);
+
+	lpj = ((u64)tsc_khz * 1000);
+	do_div(lpj, HZ);
+	lpj_fine = lpj;
+
 	use_tsc_delay();
 }
 
diff --git a/arch/x86_64/kernel/time.c b/arch/x86_64/kernel/time.c
index fb9fcd9..b9f5303 100644
--- a/arch/x86_64/kernel/time.c
+++ b/arch/x86_64/kernel/time.c
@@ -35,6 +35,7 @@
 #include <linux/kallsyms.h>
 #include <linux/efi.h>
 #include <linux/acpi.h>
+#include <linux/delay.h>
 #ifdef CONFIG_ACPI
 #include <acpi/achware.h>	/* for PM timer frequency */
 #include <acpi/acpi_bus.h>
@@ -1035,6 +1036,8 @@ void __init time_init(void)
 		cpu_khz = tsc_khz;
 	}
 
+	lpj_fine = ((unsigned long)tsc_khz * 1000)/HZ;
+
 	vxtime.mode = VXTIME_TSC;
 	vxtime.quot = (NSEC_PER_SEC << NS_SCALE) / vxtime_hz;
 	vxtime.tsc_quot = (NSEC_PER_MSEC << NS_SCALE) / cpu_khz;
diff --git a/include/linux/delay.h b/include/linux/delay.h
index 5443e1f..7b91236 100644
--- a/include/linux/delay.h
+++ b/include/linux/delay.h
@@ -39,6 +39,7 @@ extern unsigned long loops_per_jiffy;
 #define ndelay(x)	udelay(((x)+999)/1000)
 #endif
 
+extern unsigned long lpj_fine;
 void calibrate_delay(void);
 void msleep(unsigned int msecs);
 unsigned long msleep_interruptible(unsigned int msecs);
diff --git a/init/calibrate.c b/init/calibrate.c
index 06066a6..2785c54 100644
--- a/init/calibrate.c
+++ b/init/calibrate.c
@@ -7,9 +7,11 @@
 #include <linux/sched.h>
 #include <linux/delay.h>
 #include <linux/init.h>
+#include <linux/smp.h>
 
 #include <asm/timex.h>
 
+unsigned long lpj_fine;
 unsigned long preset_lpj;
 static int __init lpj_setup(char *str)
 {
@@ -33,9 +35,9 @@ static unsigned long __devinit calibrate_delay_direct(void)
 	unsigned long pre_start, start, post_start;
 	unsigned long pre_end, end, post_end;
 	unsigned long start_jiffies;
-	unsigned long tsc_rate_min, tsc_rate_max;
-	unsigned long good_tsc_sum = 0;
-	unsigned long good_tsc_count = 0;
+	unsigned long timer_rate_min, timer_rate_max;
+	unsigned long good_timer_sum = 0;
+	unsigned long good_timer_count = 0;
 	unsigned long delay_calibration_ticks = ((REAL_HZ < 100) ? 1 : (REAL_HZ/100));
 	int i;
 
@@ -80,25 +82,27 @@ static unsigned long __devinit calibrate_delay_direct(void)
 		}
 		read_current_timer(&post_end);
 
-		tsc_rate_max = (post_end - pre_start) / delay_calibration_ticks;
-		tsc_rate_min = (pre_end - post_start) / delay_calibration_ticks;
+		timer_rate_max = (post_end - pre_start) /
+					delay_calibration_ticks;
+		timer_rate_min = (pre_end - post_start) /
+					delay_calibration_ticks;
 		
-		tsc_rate_max /= tick_divider;
-		tsc_rate_min /= tick_divider;
+		timer_rate_max /= tick_divider;
+		timer_rate_min /= tick_divider;
 		
 		/*
-	 	 * If the upper limit and lower limit of the tsc_rate is
+		 * If the upper limit and lower limit of the timer_rate is
 		 * >= 12.5% apart, redo calibration.
 		 */
 		if (pre_start != 0 && pre_end != 0 &&
-		    (tsc_rate_max - tsc_rate_min) < (tsc_rate_max >> 3)) {
-			good_tsc_count++;
-			good_tsc_sum += tsc_rate_max;
+		    (timer_rate_max - timer_rate_min) < (timer_rate_max >> 3)) {
+			good_timer_count++;
+			good_timer_sum += timer_rate_max;
 		}
 	}
 
-	if (good_tsc_count)
-		return (good_tsc_sum/good_tsc_count);
+	if (good_timer_count)
+		return (good_timer_sum/good_timer_count);
 
 	printk(KERN_WARNING "calibrate_delay_direct() failed to get a good "
 	       "estimate for loops_per_jiffy.\nProbably due to long platform interrupts. Consider using \"lpj=\" boot option.\n");
@@ -112,6 +116,10 @@ static unsigned long __devinit calibrate_delay_direct(void) {return 0;}
  * This is the number of bits of precision for the loops_per_jiffy.  Each
  * bit takes on average 1.5/HZ seconds.  This (like the original) is a little
  * better than 1%
+ * For the boot cpu we can skip the delay calibration and assign it a value
+ * calculated based on the timer frequency.
+ * For the rest of the CPUs we cannot assume that the timer frequency is same as
+ * the cpu frequency, hence do the calibration for those.
  */
 #define LPS_PREC 8
 
@@ -122,20 +130,20 @@ void __devinit calibrate_delay(void)
 
 	if (preset_lpj) {
 		loops_per_jiffy = preset_lpj;
-		printk("Calibrating delay loop (skipped)... "
-			"%lu.%02lu BogoMIPS preset\n",
-			loops_per_jiffy/(500000/HZ),
-			(loops_per_jiffy/(5000/HZ)) % 100);
+		printk(KERN_INFO
+			"Calibrating delay loop (skipped) preset value.. ");
+	} else if ((smp_processor_id() == 0) && lpj_fine) {
+		loops_per_jiffy = lpj_fine;
+		printk(KERN_INFO
+			"Calibrating delay loop (skipped), "
+			"value calculated using timer frequency.. ");
 	} else if ((loops_per_jiffy = calibrate_delay_direct()) != 0) {
-		printk("Calibrating delay using timer specific routine.. ");
-		printk("%lu.%02lu BogoMIPS (lpj=%lu)\n",
-			loops_per_jiffy/(500000/HZ),
-			(loops_per_jiffy/(5000/HZ)) % 100,
-			loops_per_jiffy);
+		printk(KERN_INFO
+			"Calibrating delay using timer specific routine.. ");
 	} else {
 		loops_per_jiffy = (1<<12);
 
-		printk(KERN_DEBUG "Calibrating delay loop... ");
+		printk(KERN_INFO "Calibrating delay loop... ");
 		while ((loops_per_jiffy <<= 1) != 0) {
 			/* wait for "start of" clock tick */
 			ticks = jiffies;
@@ -165,12 +173,8 @@ void __devinit calibrate_delay(void)
 			if (jiffies != ticks)	/* longer than 1 tick */
 				loops_per_jiffy &= ~loopbit;
 		}
-
-		/* Round the value and print it */
-		printk("%lu.%02lu BogoMIPS (lpj=%lu)\n",
-			loops_per_jiffy/(500000/HZ),
-			(loops_per_jiffy/(5000/HZ)) % 100,
-			loops_per_jiffy);
 	}
-
+	printk(KERN_CONT "%lu.%02lu BogoMIPS (lpj=%lu)\n",
+			loops_per_jiffy/(500000/HZ),
+			(loops_per_jiffy/(5000/HZ)) % 100, loops_per_jiffy);
 }