Sophie

Sophie

distrib > Scientific%20Linux > 5x > x86_64 > media > main-src > by-pkgid > d0a35cd31c1125e2132804d68547073d > files > 4172

kernel-2.6.18-194.26.1.el5.src.rpm

From: Prarit Bhargava <prarit@redhat.com>
Date: Tue, 18 Aug 2009 10:54:08 -0400
Subject: [x86_64] fix gettimeoday TSC overflow issue - 1
Message-id: 4A8AC090.6080106@redhat.com
O-Subject: [RHEL5 PATCH]: Fix gettimeoday() TSC overflow issue [v3]
Bugzilla: 467942
RH-Acked-by: Rik van Riel <riel@redhat.com>
RH-Acked-by: Chris Lalancette <clalance@redhat.com>
RH-Acked-by: Pete Zaitcev <zaitcev@redhat.com>

A quick repost for dzickus.

I had to update this patch for -148.el5 because of significant conflicts
in that kernel.  The patches that my patch conflicted with have since
been removed and now [v2] will no longer apply properly.

Refresh for -163.el5.

Quick ACKs are appreciated -- I think those of you cc'd have already
acked [v2].

P.

diff --git a/arch/x86_64/kernel/time.c b/arch/x86_64/kernel/time.c
index b1f1e22..c3b3ea0 100644
--- a/arch/x86_64/kernel/time.c
+++ b/arch/x86_64/kernel/time.c
@@ -78,6 +78,7 @@ static int notsc __initdata = 0;
 #define NSEC_PER_REAL_TICK (NSEC_PER_SEC / REAL_HZ)
 
 #define NS_SCALE	10 /* 2^10, carefully chosen */
+#define NS_SCALE_22	22 /* 2^22, carefully chosen for TSC */
 #define US_SCALE	32 /* 2^32, arbitralrily chosen */
 
 unsigned int cpu_khz;					/* TSC clocks / usec, not used here */
@@ -120,7 +121,7 @@ static inline long do_gettimeoffset_tsc(void)
 	t = get_cycles_sync();
 	if (t < vxtime.last_tsc) 
 		t = vxtime.last_tsc; /* hack */
-	x = ((t - vxtime.last_tsc) * vxtime.tsc_quot) >> NS_SCALE;
+	x = ((t - vxtime.last_tsc) * vxtime.tsc_quot) >> NS_SCALE_22;
 	return x;
 }
 
@@ -400,6 +401,11 @@ static void do_timer_account_lost_ticks(struct pt_regs *regs)
 {
 	unsigned long tsc;
 	int delay = 0, offset = 0, lost = 0, i;
+	long tsc_offset = 0;
+
+	/* for re-calculate offset */
+	long last_tsc_quot = vxtime.tsc_quot;
+	unsigned long last_tsc = vxtime.last_tsc;
 
 	if (vxtime.hpet_address)
 		offset = hpet_readl(HPET_COUNTER);
@@ -443,27 +449,24 @@ static void do_timer_account_lost_ticks(struct pt_regs *regs)
 		lost = pmtimer_mark_offset();
 #endif
 	} else {
-		offset = (((tsc - vxtime.last_tsc) *
-			   vxtime.tsc_quot) >> NS_SCALE) - NSEC_PER_REAL_TICK;
+		tsc_offset = (((tsc - vxtime.last_tsc) *
+			       vxtime.tsc_quot) >> NS_SCALE_22) -
+			     NSEC_PER_REAL_TICK;
 
-		if (offset < 0)
-			offset = 0;
+		if (tsc_offset < 0)
+			tsc_offset = 0;
 
 		lost = 0;
-		while (offset > NSEC_PER_REAL_TICK) {
+		while (tsc_offset > NSEC_PER_REAL_TICK) {
 			lost++;
-			offset -= NSEC_PER_REAL_TICK;
+			tsc_offset -= NSEC_PER_REAL_TICK;
 		}
 
 		/* FIXME: 1000 or 1000000? */
 		monotonic_base += (tsc - vxtime.last_tsc) * 1000000 / cpu_khz;
 
-		vxtime.last_tsc = tsc - vxtime.quot * delay / vxtime.tsc_quot;
-
-		if ((((tsc - vxtime.last_tsc) *
-		      vxtime.tsc_quot) >> NS_SCALE) < offset)
-			vxtime.last_tsc = tsc -
-				(((long) offset << NS_SCALE) / vxtime.tsc_quot) - 1;
+		vxtime.last_tsc = tsc - vxtime.quot * delay /
+				  (vxtime.tsc_quot >> (NS_SCALE_22 - NS_SCALE));
 	}
 	/* SCALE: We expect tick_divider - 1 lost, ie 0 for normal behaviour */
 	if (lost > (int)tick_divider - 1)  {
@@ -474,6 +477,16 @@ static void do_timer_account_lost_ticks(struct pt_regs *regs)
 	/* Do the timer stuff */
 	for (i = 0; i < tick_divider; i++)
 		do_timer_jiffy(regs);
+
+	/* re-calculate vxtime.last_tsc */
+	if (vxtime.mode != VXTIME_HPET && vxtime.mode != VXTIME_PMTMR) {
+		if (tsc > (last_tsc + tsc_khz)) {
+			vxtime.last_tsc = vxtime.last_tsc -
+			                  (tsc - last_tsc -
+					   ((u64)tsc_khz * (u64)(lost + 1))) *
+			                  last_tsc_quot / vxtime.tsc_quot;
+		}
+	}
 }
 
 /*
@@ -577,12 +590,12 @@ static unsigned int cyc2ns_scale __read_mostly;
 
 static inline void set_cyc2ns_scale(unsigned long cpu_khz)
 {
-	cyc2ns_scale = (NSEC_PER_MSEC << NS_SCALE) / cpu_khz;
+	cyc2ns_scale = (NSEC_PER_MSEC << NS_SCALE_22) / cpu_khz;
 }
 
 static inline unsigned long long cycles_2_ns(unsigned long long cyc)
 {
-	return (cyc * cyc2ns_scale) >> NS_SCALE;
+	return (cyc * cyc2ns_scale) >> NS_SCALE_22;
 }
 
 unsigned long long sched_clock(void)
@@ -800,7 +813,8 @@ static int time_cpufreq_notifier(struct notifier_block *nb, unsigned long val,
 
 		tsc_khz = cpufreq_scale(tsc_khz_ref, ref_freq, freq->new);
 		if (!(freq->flags & CPUFREQ_CONST_LOOPS))
-			vxtime.tsc_quot = (NSEC_PER_MSEC << NS_SCALE) / cpu_khz;
+			vxtime.tsc_quot = (NSEC_PER_MSEC << NS_SCALE_22) /
+					  tsc_khz;
 	}
 	
 	set_cyc2ns_scale(tsc_khz_ref);
@@ -1134,7 +1148,7 @@ void __init time_init(void)
 
 	vxtime.mode = VXTIME_TSC;
 	vxtime.quot = (NSEC_PER_SEC << NS_SCALE) / vxtime_hz;
-	vxtime.tsc_quot = (NSEC_PER_MSEC << NS_SCALE) / cpu_khz;
+	vxtime.tsc_quot = (NSEC_PER_MSEC << NS_SCALE_22) / tsc_khz;
 	vxtime.last_tsc = get_cycles_sync();
 	setup_irq(0, &irq0);
 
@@ -1230,7 +1244,7 @@ void time_init_gtod(void)
 	printk(KERN_INFO "time.c: Detected %d.%03d MHz processor.\n", 
 		cpu_khz / 1000, cpu_khz % 1000);
 	vxtime.quot = (NSEC_PER_SEC << NS_SCALE) / vxtime_hz;
-	vxtime.tsc_quot = (NSEC_PER_MSEC << NS_SCALE) / tsc_khz;
+	vxtime.tsc_quot = (NSEC_PER_MSEC << NS_SCALE_22) / tsc_khz;
 	vxtime.last_tsc = get_cycles_sync();
 
 	set_cyc2ns_scale(tsc_khz);
diff --git a/arch/x86_64/kernel/vsyscall.c b/arch/x86_64/kernel/vsyscall.c
index 69719d6..bb42909 100644
--- a/arch/x86_64/kernel/vsyscall.c
+++ b/arch/x86_64/kernel/vsyscall.c
@@ -53,6 +53,8 @@ int __vgetcpu_mode __section_vgetcpu_mode;
 	  ((v - fix_to_virt(VSYSCALL_FIRST_PAGE)) + __pa_symbol(&__vsyscall_0)); })
 
 #define NS_SCALE	10 /* 2^10, carefully chosen */
+#define NS_SCALE_22	22 /* 2^22, carefully chosen for TSC*/
+
 
 static __always_inline void timeval_normalize(struct timeval * tv)
 {
@@ -82,7 +84,7 @@ static __always_inline void do_vgettimeofday(struct timeval * tv)
 			if (t < __vxtime.last_tsc)
 				t = __vxtime.last_tsc;
 			nsec += ((t - __vxtime.last_tsc) *
-				 __vxtime.tsc_quot) >> NS_SCALE;
+				 __vxtime.tsc_quot) >> NS_SCALE_22;
 		} else {
 			nsec += ((readl((void *)fix_to_virt(VSYSCALL_HPET) +
 					0xf0) -
diff --git a/kernel/timer.c b/kernel/timer.c
index 0e831b5..e7a3e51 100644
--- a/kernel/timer.c
+++ b/kernel/timer.c
@@ -1294,6 +1294,22 @@ static void update_wall_time(void)
 	xtime.tv_nsec = (s64)clock->xtime_nsec >> clock->shift;
 	clock->xtime_nsec -= (s64)xtime.tv_nsec << clock->shift;
 
+#ifdef CONFIG_X86_64
+#define NS_SCALE_22	22 /* 2^22, carefully chosen */
+	{
+#ifdef CONFIG_XEN
+		unsigned int tsc_khz = cpu_khz;
+#else
+		extern unsigned int tsc_khz;
+#endif
+		/* re-calculate vxtime.tsc_quot */
+		vxtime.tsc_quot = (NSEC_PER_MSEC << NS_SCALE_22) / tsc_khz *
+				  (((s64)clock->xtime_interval +
+				    (s64)clock->xtime_nsec) >>
+				   clock->shift) / NSEC_PER_MSEC;
+	}
+#endif
+
 	/* check to see if there is a new clocksource to use */
 	if (change_clocksource()) {
 		clock->error = 0;