Sophie

Sophie

distrib > Scientific%20Linux > 5x > x86_64 > media > main-src > by-pkgid > d0a35cd31c1125e2132804d68547073d > files > 4088

kernel-2.6.18-194.26.1.el5.src.rpm

From: Peter Zijlstra <pzijlstr@redhat.com>
Date: Thu, 7 May 2009 15:59:41 +0200
Subject: [x86] scale cyc_2_nsec according to CPU frequency
Message-id: 20090507140138.248038000@chello.nl
O-Subject: [PATCH 3/5] RHEL-5: x86: scale cyc_2_nsec according to CPU frequency
Bugzilla: 297731
RH-Acked-by: Brian Maly <bmaly@redhat.com>
RH-Acked-by: Rik van Riel <riel@redhat.com>
CVE: CVE-2007-3719

Backport of:

commit 53d517cdbaac704352b3d0c10fecb99e0b54572e
Author: Guillaume Chazarain <guichaz@yahoo.fr>
Date:   Wed Jan 30 13:30:06 2008 +0100

    x86: scale cyc_2_nsec according to CPU frequency

    scale the sched_clock() cyc_2_nsec scaling factor according to
    CPU frequency changes.

    [ mingo@elte.hu: simplified it and fixed it for SMP. ]

    Signed-off-by: Ingo Molnar <mingo@elte.hu>
    Signed-off-by: Thomas Gleixner <tglx@linutronix.de>

Signed-off-by: Peter Zijlstra <pzijlstr@redhat.com>

diff --git a/arch/i386/kernel/tsc.c b/arch/i386/kernel/tsc.c
index 711eae4..254fdfb 100644
--- a/arch/i386/kernel/tsc.c
+++ b/arch/i386/kernel/tsc.c
@@ -12,6 +12,7 @@
 #include <linux/dmi.h>
 #include <linux/acpi.h>
 #include <linux/delay.h>
+#include <linux/percpu.h>
 #include <asm/delay.h>
 #include <asm/tsc.h>
 #include <asm/delay.h>
@@ -89,18 +90,46 @@ EXPORT_SYMBOL_GPL(mark_tsc_unstable);
  *
  *			-johnstul@us.ibm.com "math is hard, lets go shopping!"
  */
-static unsigned long cyc2ns_scale __read_mostly;
+static DEFINE_PER_CPU(unsigned long, cyc2ns);
 
 #define CYC2NS_SCALE_FACTOR 10 /* 2^10, carefully chosen */
 
-static inline void set_cyc2ns_scale(unsigned long cpu_khz)
+static inline unsigned long long __cycles_2_ns(unsigned long long cyc)
 {
-	cyc2ns_scale = (1000000 << CYC2NS_SCALE_FACTOR)/cpu_khz;
+	return cyc * __get_cpu_var(cyc2ns) >> CYC2NS_SCALE_FACTOR;
+}
+
+static inline void set_cyc2ns_scale(unsigned long cpu_khz, int cpu)
+{
+	unsigned long flags, prev_scale, *scale;
+	unsigned long long tsc_now, ns_now;
+
+	local_irq_save(flags);
+	sched_clock_idle_sleep_event();
+
+	scale = &per_cpu(cyc2ns, cpu);
+
+	rdtscll(tsc_now);
+	ns_now = __cycles_2_ns(tsc_now);
+
+	prev_scale = *scale;
+	if (cpu_khz)
+		*scale = (1000000 << CYC2NS_SCALE_FACTOR)/cpu_khz;
+
+	sched_clock_idle_wakeup_event(0);
+	local_irq_restore(flags);
 }
 
 static inline unsigned long long cycles_2_ns(unsigned long long cyc)
 {
-	return (cyc * cyc2ns_scale) >> CYC2NS_SCALE_FACTOR;
+	unsigned long long ns;
+	unsigned long flags;
+
+	local_irq_save(flags);
+	ns = __cycles_2_ns(cyc);
+	local_irq_restore(flags);
+
+	return ns;
 }
 
 #ifndef CONFIG_XEN
@@ -109,7 +138,8 @@ static inline unsigned long long cycles_2_ns(unsigned long long cyc)
  */
 unsigned long long sched_clock(void)
 {
-	unsigned long long this_offset;
+	unsigned long long clock = 0;
+	unsigned long flags;
 
 	/*
 	 * in the NUMA case we dont use the TSC as they are not
@@ -121,11 +151,14 @@ unsigned long long sched_clock(void)
 		/* no locking but a rare wrong value is not a big deal */
 		return (jiffies_64 - INITIAL_JIFFIES) * (1000000000 / HZ);
 
+	local_irq_save(flags);
 	/* read the Time Stamp Counter: */
-	rdtscll(this_offset);
-
+	rdtscll(clock);
 	/* return the value in ns */
-	return cycles_2_ns(this_offset);
+	clock = __cycles_2_ns(clock);
+	local_irq_restore(flags);
+
+	return clock;
 }
 #endif
 
@@ -207,6 +240,7 @@ EXPORT_SYMBOL(recalibrate_cpu_khz);
 void tsc_init(void)
 {
 	u64 lpj;
+	int cpu;
 
 	if (!cpu_has_tsc || tsc_disable)
 		return;
@@ -221,7 +255,8 @@ void tsc_init(void)
 				(unsigned long)cpu_khz / 1000,
 				(unsigned long)cpu_khz % 1000);
 
-	set_cyc2ns_scale(cpu_khz);
+	for_each_possible_cpu(cpu)
+		set_cyc2ns_scale(cpu_khz, cpu);
 
 	lpj = ((u64)tsc_khz * 1000);
 	do_div(lpj, HZ);
@@ -301,7 +336,8 @@ time_cpufreq_notifier(struct notifier_block *nb, unsigned long val, void *data)
 						ref_freq, freq->new);
 			if (!(freq->flags & CPUFREQ_CONST_LOOPS)) {
 				tsc_khz = cpu_khz;
-				set_cyc2ns_scale(cpu_khz);
+				set_cyc2ns_scale(cpu_khz, get_cpu());
+				put_cpu();
 				/*
 				 * TSC based sched_clock turns
 				 * to junk w/ cpufreq
diff --git a/arch/x86_64/kernel/time.c b/arch/x86_64/kernel/time.c
index 897d5a8..9727728 100644
--- a/arch/x86_64/kernel/time.c
+++ b/arch/x86_64/kernel/time.c
@@ -296,12 +296,46 @@ static void set_rtc_mmss(unsigned long nowtime)
 	spin_unlock(&rtc_lock);
 }
 
-static inline unsigned long long cycles_2_ns(unsigned long long cyc)
+static DEFINE_PER_CPU(unsigned long, cyc2ns);
+
+#define CYC2NS_SCALE_FACTOR 10 /* 2^10, carefully chosen */
+
+static inline unsigned long long __cycles_2_ns(unsigned long long cyc)
 {
-	return (cyc * cyc2ns_scale) >> NS_SCALE;
+	return cyc * __get_cpu_var(cyc2ns) >> CYC2NS_SCALE_FACTOR;
+}
+
+static inline void set_cyc2ns_scale(unsigned long cpu_khz, int cpu)
+{
+	unsigned long flags, *scale;
+	unsigned long long tsc_now, ns_now;
+
+	local_irq_save(flags);
+	sched_clock_idle_sleep_event();
+
+	scale = &per_cpu(cyc2ns, cpu);
+
+	rdtscll(tsc_now);
+	ns_now = __cycles_2_ns(tsc_now);
+
+	if (cpu_khz)
+		*scale = (1000000 << CYC2NS_SCALE_FACTOR)/cpu_khz;
+
+	sched_clock_idle_wakeup_event(0);
+	local_irq_restore(flags);
 }
 
-#define __cycles_2_ns(c) cycles_2_ns(c)
+static inline unsigned long long cycles_2_ns(unsigned long long cyc)
+{
+	unsigned long long ns;
+	unsigned long flags;
+
+	local_irq_save(flags);
+	ns = __cycles_2_ns(cyc);
+	local_irq_restore(flags);
+
+	return ns;
+}
 
 /* monotonic_clock(): returns # of nanoseconds passed since time_init()
  *		Note: This function is required to return accurate
@@ -559,16 +593,9 @@ static irqreturn_t timer_interrupt(int irq, void *dev_id, struct pt_regs *regs)
 	return IRQ_HANDLED;
 }
 
-static unsigned int cyc2ns_scale __read_mostly;
-
-static inline void set_cyc2ns_scale(unsigned long cpu_khz)
-{
-	cyc2ns_scale = (NSEC_PER_MSEC << NS_SCALE) / cpu_khz;
-}
-
 unsigned long long sched_clock(void)
 {
-	unsigned long a = 0;
+	unsigned long flags, clock = 0;
 
 #if 0
 	/* Don't do a HPET read here. Using TSC always is much faster
@@ -584,8 +611,12 @@ unsigned long long sched_clock(void)
 	   CPUs. But the errors should be too small to matter for scheduling
 	   purposes. */
 
-	rdtscll(a);
-	return cycles_2_ns(a);
+	local_irq_save(flags);
+	rdtscll(clock);
+	clock = __cycles_2_ns(clock);
+	local_irq_restore(flags);
+
+	return clock;
 }
 
 static unsigned long get_cmos_time(void)
@@ -775,7 +806,8 @@ static int time_cpufreq_notifier(struct notifier_block *nb, unsigned long val,
 			vxtime.tsc_quot = (NSEC_PER_MSEC << NS_SCALE) / cpu_khz;
 	}
 	
-	set_cyc2ns_scale(tsc_khz_ref);
+	set_cyc2ns_scale(tsc_khz_ref, get_cpu());
+	put_cpu();
 
 	return 0;
 }
@@ -1039,6 +1071,7 @@ time_cpu_notifier(struct notifier_block *nb, unsigned long action, void *hcpu)
 void __init time_init(void)
 {
 	unsigned int hypervisor_khz;
+	int cpu;
 
 	if (nohpet)
 		vxtime.hpet_address = 0;
@@ -1106,7 +1139,8 @@ void __init time_init(void)
 	vxtime.last_tsc = get_cycles_sync();
 	setup_irq(0, &irq0);
 
-	set_cyc2ns_scale(tsc_khz);
+	for_each_possible_cpu(cpu)
+		set_cyc2ns_scale(tsc_khz, cpu);
 
 	hotcpu_notifier(time_cpu_notifier, 0);
 	time_cpu_notifier(NULL, CPU_ONLINE, (void *)(long)smp_processor_id());
@@ -1153,6 +1187,7 @@ __cpuinit int unsynchronized_tsc(void)
 void time_init_gtod(void)
 {
 	char *timetype;
+	int cpu;
 
 	if (unsynchronized_tsc())
 		notsc = 1;
@@ -1196,7 +1231,8 @@ void time_init_gtod(void)
 	vxtime.tsc_quot = (NSEC_PER_MSEC << NS_SCALE) / tsc_khz;
 	vxtime.last_tsc = get_cycles_sync();
 
-	set_cyc2ns_scale(tsc_khz);
+	for_each_possible_cpu(cpu)
+		set_cyc2ns_scale(tsc_khz, cpu);
 }
 
 __setup("report_lost_ticks", time_setup);