Sophie

Sophie

distrib > Scientific%20Linux > 5x > x86_64 > by-pkgid > 27922b4260f65d317aabda37e42bbbff > files > 480

kernel-2.6.18-238.el5.src.rpm

From: Matthew Garrett <mjg@redhat.com>
Date: Tue, 10 Nov 2009 18:47:09 -0500
Subject: [cpufreq] add option to avoid smi while calibrating
Message-id: <1257878829-27945-1-git-send-email-mjg@redhat.com>
Patchwork-id: 21347
O-Subject: [PATCH] [RHEL 5.5] time: Provide option for avoiding SMIs when
	calibrating CPU speeds
Bugzilla: 513649
RH-Acked-by: Prarit Bhargava <prarit@redhat.com>
RH-Acked-by: Vitaly Mayatskikh <vmayatsk@redhat.com>

Bug 513649 covers cases where an SMI occurs during CPU speed calibration,
resulting in the frequency being incorrect. This patch provides code for
using the hpet as a sanity check and re-attempting calibration if it detects
that an SMI has occured. As this is very sensitive code, it is optional and
requires the "avoid_smi" kernel parameter to be passed at boot time.

diff --git a/arch/i386/kernel/setup.c b/arch/i386/kernel/setup.c
index dcb5564..ea8b079 100644
--- a/arch/i386/kernel/setup.c
+++ b/arch/i386/kernel/setup.c
@@ -156,6 +156,7 @@ struct e820map e820;
 extern void early_cpu_init(void);
 extern void generic_apic_probe(char *);
 extern int root_mountflags;
+extern int avoid_smi;
 
 unsigned long saved_videomode;
 
@@ -830,6 +831,8 @@ static void __init parse_cmdline_early (char ** cmdline_p)
 		else if (!memcmp(from, "noexec=", 7))
 			noexec_setup(from + 7);
 
+		else if (!memcmp(from, "avoid_smi", 9))
+			avoid_smi = 1;
 
 #ifdef  CONFIG_X86_SMP
 		/*
diff --git a/arch/i386/kernel/time.c b/arch/i386/kernel/time.c
index 55a9ab8..444a42f 100644
--- a/arch/i386/kernel/time.c
+++ b/arch/i386/kernel/time.c
@@ -63,6 +63,7 @@
 #include <linux/timex.h>
 
 #include <asm/hpet.h>
+#include <linux/hpet.h>
 
 #include <asm/arch_hooks.h>
 
diff --git a/arch/i386/kernel/tsc.c b/arch/i386/kernel/tsc.c
index 59af96b..b36d838 100644
--- a/arch/i386/kernel/tsc.c
+++ b/arch/i386/kernel/tsc.c
@@ -11,15 +11,20 @@
 #include <linux/init.h>
 #include <linux/dmi.h>
 #include <linux/acpi.h>
+#include <linux/acpi_pmtmr.h>
 #include <linux/delay.h>
 #include <asm/delay.h>
 #include <asm/tsc.h>
+#include <asm/hpet.h>
 #include <asm/delay.h>
 #include <asm/io.h>
 #include <asm/generic-hypervisor.h>
+#include <linux/hpet.h>
 
 #include "mach_timer.h"
 
+int avoid_smi;
+
 /*
  * On some systems the TSC frequency does not
  * change with the cpu frequency. So we need
@@ -29,6 +34,8 @@ unsigned int tsc_khz;
 
 int tsc_disable __cpuinitdata = 0;
 
+static unsigned long calculate_cpu_khz(void);
+
 #ifdef CONFIG_X86_TSC
 static int __init tsc_setup(char *str)
 {
@@ -127,6 +134,307 @@ unsigned long long sched_clock(void)
 	/* return the value in ns */
 	return cycles_2_ns(this_offset);
 }
+
+#define MAX_RETRIES     5
+#define SMI_TRESHOLD    50000
+
+/*
+ * Read TSC and the reference counters. Take care of SMI disturbance
+ */
+static u64 tsc_read_refs(u64 *p, int hpet)
+{
+	u64 t1, t2;
+	int i;
+
+	for (i = 0; i < MAX_RETRIES; i++) {
+		t1 = get_cycles();
+		if (hpet)
+			*p = hpet_readl(HPET_COUNTER) & 0xFFFFFFFF;
+		else
+			*p = acpi_pm_read_early();
+		t2 = get_cycles();
+		if ((t2 - t1) < SMI_TRESHOLD)
+			return t2;
+	}
+	return ULLONG_MAX;
+}
+
+/*
+ * Calculate the TSC frequency from HPET reference
+ */
+static unsigned long calc_hpet_ref(u64 deltatsc, u64 hpet1, u64 hpet2)
+{
+	u64 tmp;
+
+	if (hpet2 < hpet1)
+		hpet2 += 0x100000000ULL;
+	hpet2 -= hpet1;
+	tmp = ((u64)hpet2 * hpet_readl(HPET_PERIOD));
+	do_div(tmp, 1000000);
+	do_div(deltatsc, tmp);
+
+	return (unsigned long) deltatsc;
+}
+
+/* Number of PMTMR ticks expected during calibration run */
+#define PMTMR_TICKS_PER_SEC 3579545
+
+/* Overrun value */
+#define ACPI_PM_OVRRUN  (1<<24)
+
+/*
+ * Calculate the TSC frequency from PMTimer reference
+ */
+static unsigned long calc_pmtimer_ref(u64 deltatsc, u64 pm1, u64 pm2)
+{
+	u64 tmp;
+
+	if (!pm1 && !pm2)
+		return ULONG_MAX;
+
+	if (pm2 < pm1)
+		pm2 += (u64)ACPI_PM_OVRRUN;
+	pm2 -= pm1;
+	tmp = pm2 * 1000000000LL;
+	do_div(tmp, PMTMR_TICKS_PER_SEC);
+	do_div(deltatsc, tmp);
+
+	return (unsigned long) deltatsc;
+}
+
+#define CAL_MS		10
+#define CAL_LATCH	(CLOCK_TICK_RATE / (1000 / CAL_MS))
+#define CAL_PIT_LOOPS	1000
+
+#define CAL2_MS		50
+#define CAL2_LATCH	(CLOCK_TICK_RATE / (1000 / CAL2_MS))
+#define CAL2_PIT_LOOPS	5000
+
+
+/*
+ * Try to calibrate the TSC against the Programmable
+ * Interrupt Timer and return the frequency of the TSC
+ * in kHz.
+ *
+ * Return ULONG_MAX on failure to calibrate.
+ */
+static unsigned long pit_calibrate_tsc(u32 latch, unsigned long ms, int loopmin)
+{
+	u64 tsc, t1, t2, delta;
+	unsigned long tscmin, tscmax;
+	int pitcnt;
+
+	/* Set the Gate high, disable speaker */
+	outb((inb(0x61) & ~0x02) | 0x01, 0x61);
+
+	/*
+	 * Setup CTC channel 2* for mode 0, (interrupt on terminal
+	 * count mode), binary count. Set the latch register to 50ms
+	 * (LSB then MSB) to begin countdown.
+	 */
+	outb(0xb0, 0x43);
+	outb(latch & 0xff, 0x42);
+	outb(latch >> 8, 0x42);
+
+	tsc = t1 = t2 = get_cycles();
+
+	pitcnt = 0;
+	tscmax = 0;
+	tscmin = ULONG_MAX;
+	while ((inb(0x61) & 0x20) == 0) {
+		t2 = get_cycles();
+		delta = t2 - tsc;
+		tsc = t2;
+		if ((unsigned long) delta < tscmin)
+			tscmin = (unsigned int) delta;
+		if ((unsigned long) delta > tscmax)
+			tscmax = (unsigned int) delta;
+		pitcnt++;
+	}
+
+	/*
+	 * Sanity checks:
+	 *
+	 * If we were not able to read the PIT more than loopmin
+	 * times, then we have been hit by a massive SMI
+	 *
+	 * If the maximum is 10 times larger than the minimum,
+	 * then we got hit by an SMI as well.
+	 */
+	if (pitcnt < loopmin || tscmax > 10 * tscmin)
+		return ULONG_MAX;
+
+	/* Calculate the PIT value */
+	delta = t2 - t1;
+	do_div(delta, ms);
+	return delta;
+}
+
+/**
+ * native_calibrate_tsc - calibrate the tsc on boot
+ */
+static unsigned long native_calibrate_tsc(void)
+{
+	u64 tsc1, tsc2, delta, ref1, ref2;
+	unsigned long tsc_pit_min = ULONG_MAX, tsc_ref_min = ULONG_MAX;
+	unsigned long flags, latch, ms, tsc_khz;
+	int hpet = is_hpet_enabled(), i, loopmin;
+
+	tsc_khz = get_hypervisor_tsc_freq();
+	if (tsc_khz) {
+		printk(KERN_INFO "TSC: Frequency read from the hypervisor\n");
+		return tsc_khz;
+	}
+
+	/*
+	 * Run 5 calibration loops to get the lowest frequency value
+	 * (the best estimate). We use two different calibration modes
+	 * here:
+	 *
+	 * 1) PIT loop. We set the PIT Channel 2 to oneshot mode and
+	 * load a timeout of 50ms. We read the time right after we
+	 * started the timer and wait until the PIT count down reaches
+	 * zero. In each wait loop iteration we read the TSC and check
+	 * the delta to the previous read. We keep track of the min
+	 * and max values of that delta. The delta is mostly defined
+	 * by the IO time of the PIT access, so we can detect when a
+	 * SMI/SMM disturbance happend between the two reads. If the
+	 * maximum time is significantly larger than the minimum time,
+	 * then we discard the result and have another try.
+	 *
+	 * 2) Reference counter. If available we use the HPET or the
+	 * PMTIMER as a reference to check the sanity of that value.
+	 * We use separate TSC readouts and check inside of the
+	 * reference read for a SMI/SMM disturbance. We dicard
+	 * disturbed values here as well. We do that around the PIT
+	 * calibration delay loop as we have to wait for a certain
+	 * amount of time anyway.
+	 */
+
+	/* Preset PIT loop values */
+	latch = CAL_LATCH;
+	ms = CAL_MS;
+	loopmin = CAL_PIT_LOOPS;
+
+	for (i = 0; i < 3; i++) {
+		unsigned long tsc_pit_khz;
+
+		/*
+		 * Read the start value and the reference count of
+		 * hpet/pmtimer when available. Then do the PIT
+		 * calibration, which will take at least 50ms, and
+		 * read the end value.
+		 */
+		local_irq_save(flags);
+		tsc1 = tsc_read_refs(&ref1, hpet);
+		tsc_pit_khz = pit_calibrate_tsc(latch, ms, loopmin);
+		tsc2 = tsc_read_refs(&ref2, hpet);
+		local_irq_restore(flags);
+
+		/* Pick the lowest PIT TSC calibration so far */
+		tsc_pit_min = min(tsc_pit_min, tsc_pit_khz);
+
+		/* hpet or pmtimer available ? */
+		if (!hpet && !ref1 && !ref2)
+			continue;
+
+		/* Check, whether the sampling was disturbed by an SMI */
+		if (tsc1 == ULLONG_MAX || tsc2 == ULLONG_MAX)
+			continue;
+
+		tsc2 = (tsc2 - tsc1) * 1000000LL;
+		if (hpet)
+			tsc2 = calc_hpet_ref(tsc2, ref1, ref2);
+		else
+			tsc2 = calc_pmtimer_ref(tsc2, ref1, ref2);
+
+		tsc_ref_min = min(tsc_ref_min, (unsigned long) tsc2);
+
+		/* Check the reference deviation */
+		delta = ((u64) tsc_pit_min) * 100;
+		do_div(delta, tsc_ref_min);
+
+		/*
+		 * If both calibration results are inside a 10% window
+		 * then we can be sure, that the calibration
+		 * succeeded. We break out of the loop right away. We
+		 * use the reference value, as it is more precise.
+		 */
+		if (delta >= 90 && delta <= 110) {
+			printk(KERN_INFO
+			       "TSC: PIT calibration matches %s. %d loops\n",
+			       hpet ? "HPET" : "PMTIMER", i + 1);
+			return tsc_ref_min;
+		}
+
+		/*
+		 * Check whether PIT failed more than once. This
+		 * happens in virtualized environments. We need to
+		 * give the virtual PC a slightly longer timeframe for
+		 * the HPET/PMTIMER to make the result precise.
+		 */
+		if (i == 1 && tsc_pit_min == ULONG_MAX) {
+			latch = CAL2_LATCH;
+			ms = CAL2_MS;
+			loopmin = CAL2_PIT_LOOPS;
+		}
+	}
+
+	/*
+	 * Now check the results.
+	 */
+	if (tsc_pit_min == ULONG_MAX) {
+		/* PIT gave no useful value */
+		printk(KERN_WARNING "TSC: Unable to calibrate against PIT\n");
+
+		/* We don't have an alternative source, disable TSC */
+		if (!hpet && !ref1 && !ref2) {
+			printk("TSC: No reference (HPET/PMTIMER) available\n");
+			return 0;
+		}
+
+		/* The alternative source failed as well, disable TSC */
+		if (tsc_ref_min == ULONG_MAX) {
+			printk(KERN_WARNING "TSC: HPET/PMTIMER calibration "
+			       "failed.\n");
+			return 0;
+		}
+
+		/* Use the alternative source */
+		printk(KERN_INFO "TSC: using %s reference calibration\n",
+		       hpet ? "HPET" : "PMTIMER");
+
+		return tsc_ref_min;
+	}
+
+	/* We don't have an alternative source, use the PIT calibration value */
+	if (!hpet && !ref1 && !ref2) {
+		printk(KERN_INFO "TSC: Using PIT calibration value\n");
+		return tsc_pit_min;
+	}
+
+	/* The alternative source failed, use the PIT calibration value */
+	if (tsc_ref_min == ULONG_MAX) {
+		printk(KERN_WARNING "TSC: HPET/PMTIMER calibration failed. "
+		       "Using PIT calibration\n");
+		return tsc_pit_min;
+	}
+
+	/*
+	 * The calibration values differ too much. In doubt, we use
+	 * the PIT value as we know that there are PMTIMERs around
+	 * running at double speed. At least we let the user know:
+	 */
+	printk(KERN_WARNING "TSC: PIT calibration deviates from %s: %lu %lu.\n",
+	       hpet ? "HPET" : "PMTIMER", tsc_pit_min, tsc_ref_min);
+	printk(KERN_INFO "TSC: Using PIT calibration value\n");
+	return tsc_pit_min;
+}
+#else
+static unsigned long native_calibrate_tsc(void)
+{
+	return calculate_cpu_khz();
+}
 #endif
 
 static unsigned long calculate_cpu_khz(void)
@@ -189,7 +497,12 @@ int recalibrate_cpu_khz(void)
 	unsigned long cpu_khz_old = cpu_khz;
 
 	if (cpu_has_tsc) {
-		cpu_khz = calculate_cpu_khz();
+		if (avoid_smi) {
+			printk(KERN_INFO
+			       "Enabling SMI avoidance in CPU calibration\n");
+			cpu_khz = native_calibrate_tsc();
+		} else
+			cpu_khz = calculate_cpu_khz();
 		tsc_khz = cpu_khz;
 		cpu_data[0].loops_per_jiffy =
 			cpufreq_scale(cpu_data[0].loops_per_jiffy,
@@ -211,7 +524,11 @@ void tsc_init(void)
 	if (!cpu_has_tsc || tsc_disable)
 		return;
 
-	cpu_khz = calculate_cpu_khz();
+	if (avoid_smi) {
+		printk(KERN_INFO "Enabling SMI avoidance in CPU calibration\n");
+		cpu_khz = native_calibrate_tsc();
+	} else
+		cpu_khz = calculate_cpu_khz();
 	tsc_khz = cpu_khz;
 
 	if (!cpu_khz)
diff --git a/arch/x86_64/kernel/setup.c b/arch/x86_64/kernel/setup.c
index 94421cd..c0bc8cd 100644
--- a/arch/x86_64/kernel/setup.c
+++ b/arch/x86_64/kernel/setup.c
@@ -123,6 +123,7 @@ EXPORT_SYMBOL_GPL(edid_info);
 struct e820map e820;
 
 extern int root_mountflags;
+extern int avoid_smi;
 
 char command_line[COMMAND_LINE_SIZE];
 
@@ -307,6 +308,9 @@ static __init void parse_cmdline_early (char ** cmdline_p)
 		if (c != ' ') 
 			goto next_char; 
 
+		else if (!memcmp(from, "avoid_smi", 9))
+			avoid_smi = 1;
+
 #ifdef  CONFIG_SMP
 		/*
 		 * If the BIOS enumerates physical processors before logical,
diff --git a/arch/x86_64/kernel/time.c b/arch/x86_64/kernel/time.c
index 73abf98..2f5a849 100644
--- a/arch/x86_64/kernel/time.c
+++ b/arch/x86_64/kernel/time.c
@@ -35,6 +35,7 @@
 #include <linux/kallsyms.h>
 #include <linux/efi.h>
 #include <linux/acpi.h>
+#include <linux/acpi_pmtmr.h>
 #include <linux/delay.h>
 #include <linux/kvm_para.h>
 #ifdef CONFIG_ACPI
@@ -70,6 +71,7 @@ DEFINE_SPINLOCK(i8253_lock);
 
 int nohpet __initdata = 0;
 static int notsc __initdata = 0;
+int avoid_smi;
 
 #define USEC_PER_TICK (USEC_PER_SEC / HZ)
 #define NSEC_PER_TICK (NSEC_PER_SEC / HZ)
@@ -845,6 +847,308 @@ core_initcall(cpufreq_tsc);
  */
 
 #define TICK_COUNT 100000000
+#define SMI_TRESHOLD 50000
+#define MAX_RETRIES 5
+
+/*
+ * Read TSC and the reference counters. Take care of SMI disturbance
+ */
+static u64 tsc_read_refs(u64 *p, int hpet)
+{
+	u64 t1, t2;
+	int i;
+
+	for (i = 0; i < MAX_RETRIES; i++) {
+		t1 = get_cycles();
+		if (hpet)
+			*p = hpet_readl(HPET_COUNTER) & 0xFFFFFFFF;
+		else
+			*p = inl(pmtmr_ioport) & ACPI_PM_MASK;
+		t2 = get_cycles();
+		if ((t2 - t1) < SMI_TRESHOLD)
+			return t2;
+	}
+	return ULLONG_MAX;
+}
+
+/*
+ * Calculate the TSC frequency from HPET reference
+ */
+static unsigned long calc_hpet_ref(u64 deltatsc, u64 hpet1, u64 hpet2)
+{
+	u64 tmp;
+
+	if (hpet2 < hpet1)
+		hpet2 += 0x100000000ULL;
+	hpet2 -= hpet1;
+	tmp = ((u64)hpet2 * hpet_readl(HPET_PERIOD));
+	do_div(tmp, 1000000);
+	do_div(deltatsc, tmp);
+
+	return (unsigned long) deltatsc;
+}
+
+/* Number of PMTMR ticks expected during calibration run */
+#define PMTMR_TICKS_PER_SEC 3579545
+
+/* Overrun value */
+#define ACPI_PM_OVRRUN  (1<<24)
+
+/*
+ * Calculate the TSC frequency from PMTimer reference
+ */
+static unsigned long calc_pmtimer_ref(u64 deltatsc, u64 pm1, u64 pm2)
+{
+	u64 tmp;
+
+	if (!pm1 && !pm2)
+		return ULONG_MAX;
+
+	if (pm2 < pm1)
+		pm2 += (u64)ACPI_PM_OVRRUN;
+	pm2 -= pm1;
+	tmp = pm2 * 1000000000LL;
+	do_div(tmp, PMTMR_TICKS_PER_SEC);
+	do_div(deltatsc, tmp);
+
+	return (unsigned long) deltatsc;
+}
+
+#define CAL_MS		10
+#define CAL_LATCH	(CLOCK_TICK_RATE / (1000 / CAL_MS))
+#define CAL_PIT_LOOPS	1000
+
+#define CAL2_MS		50
+#define CAL2_LATCH	(CLOCK_TICK_RATE / (1000 / CAL2_MS))
+#define CAL2_PIT_LOOPS	5000
+
+/*
+ * Try to calibrate the TSC against the Programmable
+ * Interrupt Timer and return the frequency of the TSC
+ * in kHz.
+ *
+ * Return ULONG_MAX on failure to calibrate.
+ */
+static unsigned long pit_calibrate_tsc_smi(u32 latch, unsigned long ms, int 
+loopmin)
+{
+	u64 tsc, t1, t2, delta;
+	unsigned long tscmin, tscmax;
+	int pitcnt;
+
+	/* Set the Gate high, disable speaker */
+	outb((inb(0x61) & ~0x02) | 0x01, 0x61);
+
+	/*
+	 * Setup CTC channel 2* for mode 0, (interrupt on terminal
+	 * count mode), binary count. Set the latch register to 50ms
+	 * (LSB then MSB) to begin countdown.
+	 */
+	outb(0xb0, 0x43);
+	outb(latch & 0xff, 0x42);
+	outb(latch >> 8, 0x42);
+
+	tsc = t1 = t2 = get_cycles();
+
+	pitcnt = 0;
+	tscmax = 0;
+	tscmin = ULONG_MAX;
+	while ((inb(0x61) & 0x20) == 0) {
+		t2 = get_cycles();
+		delta = t2 - tsc;
+		tsc = t2;
+		if ((unsigned long) delta < tscmin)
+			tscmin = (unsigned int) delta;
+		if ((unsigned long) delta > tscmax)
+			tscmax = (unsigned int) delta;
+		pitcnt++;
+	}
+
+	/*
+	 * Sanity checks:
+	 *
+	 * If we were not able to read the PIT more than loopmin
+	 * times, then we have been hit by a massive SMI
+	 *
+	 * If the maximum is 10 times larger than the minimum,
+	 * then we got hit by an SMI as well.
+	 */
+	if (pitcnt < loopmin || tscmax > 10 * tscmin)
+		return ULONG_MAX;
+
+	/* Calculate the PIT value */
+	delta = t2 - t1;
+	do_div(delta, ms);
+	return delta;
+}
+
+/**
+ * native_calibrate_tsc - calibrate the tsc on boot
+ */
+unsigned long native_calibrate_tsc(void)
+{
+	u64 tsc1, tsc2, delta, ref1, ref2;
+	unsigned long tsc_pit_min = ULONG_MAX, tsc_ref_min = ULONG_MAX;
+	unsigned long flags, latch, ms, tsc_khz;
+	int hpet = is_hpet_enabled(), i, loopmin;
+
+#ifndef CONFIG_XEN
+	tsc_khz = get_hypervisor_tsc_freq();
+	if (tsc_khz) {
+		printk(KERN_INFO "TSC: Frequency read from the hypervisor\n");
+		return tsc_khz;
+	}
+#endif
+
+	/*
+	 * Run 5 calibration loops to get the lowest frequency value
+	 * (the best estimate). We use two different calibration modes
+	 * here:
+	 *
+	 * 1) PIT loop. We set the PIT Channel 2 to oneshot mode and
+	 * load a timeout of 50ms. We read the time right after we
+	 * started the timer and wait until the PIT count down reaches
+	 * zero. In each wait loop iteration we read the TSC and check
+	 * the delta to the previous read. We keep track of the min
+	 * and max values of that delta. The delta is mostly defined
+	 * by the IO time of the PIT access, so we can detect when a
+	 * SMI/SMM disturbance happend between the two reads. If the
+	 * maximum time is significantly larger than the minimum time,
+	 * then we discard the result and have another try.
+	 *
+	 * 2) Reference counter. If available we use the HPET or the
+	 * PMTIMER as a reference to check the sanity of that value.
+	 * We use separate TSC readouts and check inside of the
+	 * reference read for a SMI/SMM disturbance. We dicard
+	 * disturbed values here as well. We do that around the PIT
+	 * calibration delay loop as we have to wait for a certain
+	 * amount of time anyway.
+	 */
+
+	/* Preset PIT loop values */
+	latch = CAL_LATCH;
+	ms = CAL_MS;
+	loopmin = CAL_PIT_LOOPS;
+
+	for (i = 0; i < 3; i++) {
+		unsigned long tsc_pit_khz;
+
+		/*
+		 * Read the start value and the reference count of
+		 * hpet/pmtimer when available. Then do the PIT
+		 * calibration, which will take at least 50ms, and
+		 * read the end value.
+		 */
+		local_irq_save(flags);
+		tsc1 = tsc_read_refs(&ref1, hpet);
+		tsc_pit_khz = pit_calibrate_tsc_smi(latch, ms, loopmin);
+		tsc2 = tsc_read_refs(&ref2, hpet);
+		local_irq_restore(flags);
+
+		/* Pick the lowest PIT TSC calibration so far */
+		tsc_pit_min = min(tsc_pit_min, tsc_pit_khz);
+
+		/* hpet or pmtimer available ? */
+		if (!hpet && !ref1 && !ref2)
+			continue;
+
+		/* Check, whether the sampling was disturbed by an SMI 
+*/
+		if (tsc1 == ULLONG_MAX || tsc2 == ULLONG_MAX)
+			continue;
+
+		tsc2 = (tsc2 - tsc1) * 1000000LL;
+		if (hpet)
+			tsc2 = calc_hpet_ref(tsc2, ref1, ref2);
+		else
+			tsc2 = calc_pmtimer_ref(tsc2, ref1, ref2);
+
+		tsc_ref_min = min(tsc_ref_min, (unsigned long) tsc2);
+
+		/* Check the reference deviation */
+		delta = ((u64) tsc_pit_min) * 100;
+		do_div(delta, tsc_ref_min);
+
+		/*
+		 * If both calibration results are inside a 10% window
+		 * then we can be sure, that the calibration
+		 * succeeded. We break out of the loop right away. We
+		 * use the reference value, as it is more precise.
+		 */
+		if (delta >= 90 && delta <= 110) {
+			printk(KERN_INFO
+			       "TSC: PIT calibration matches %s. %d loops\n",
+			       hpet ? "HPET" : "PMTIMER", i + 1);
+			return tsc_ref_min;
+		}
+
+		/*
+		 * Check whether PIT failed more than once. This
+		 * happens in virtualized environments. We need to
+		 * give the virtual PC a slightly longer timeframe for
+		 * the HPET/PMTIMER to make the result precise.
+		 */
+		if (i == 1 && tsc_pit_min == ULONG_MAX) {
+			latch = CAL2_LATCH;
+			ms = CAL2_MS;
+			loopmin = CAL2_PIT_LOOPS;
+		}
+	}
+
+	/*
+	 * Now check the results.
+	 */
+	if (tsc_pit_min == ULONG_MAX) {
+		/* PIT gave no useful value */
+		printk(KERN_WARNING "TSC: Unable to calibrate against PIT\n");
+
+		/* We don't have an alternative source, disable TSC */
+		if (!hpet && !ref1 && !ref2) {
+			printk("TSC: No reference (HPET/PMTIMER) available\n");
+			return 0;
+		}
+
+		/* The alternative source failed as well, disable TSC */
+		if (tsc_ref_min == ULONG_MAX) {
+			printk(KERN_WARNING "TSC: HPET/PMTIMER calibration "
+			       "failed.\n");
+			return 0;
+		}
+
+		/* Use the alternative source */
+		printk(KERN_INFO "TSC: using %s reference calibration\n",
+		       hpet ? "HPET" : "PMTIMER");
+
+		return tsc_ref_min;
+	}
+
+	/* We don't have an alternative source, use the PIT calibration 
+value */
+	if (!hpet && !ref1 && !ref2) {
+		printk(KERN_INFO "TSC: Using PIT calibration value\n");
+		return tsc_pit_min;
+	}
+
+	/* The alternative source failed, use the PIT calibration value 
+*/
+	if (tsc_ref_min == ULONG_MAX) {
+		printk(KERN_WARNING "TSC: HPET/PMTIMER calibration failed. "
+		       "Using PIT calibration\n");
+		return tsc_pit_min;
+	}
+
+	/*
+	 * The calibration values differ too much. In doubt, we use
+	 * the PIT value as we know that there are PMTIMERs around
+	 * running at double speed. At least we let the user know:
+	 */
+	printk(KERN_WARNING "TSC: PIT calibration deviates from %s: %lu %lu.\n",
+	       hpet ? "HPET" : "PMTIMER", tsc_pit_min, tsc_ref_min);
+	printk(KERN_INFO "TSC: Using PIT calibration value\n");
+	return tsc_pit_min;
+}
+
+
 
 static unsigned int __init hpet_calibrate_tsc(void)
 {
@@ -870,7 +1174,6 @@ static unsigned int __init hpet_calibrate_tsc(void)
 		/ ((hpet_now - hpet_start) * hpet_period / 1000);
 }
 
-
 /*
  * pit_calibrate_tsc() uses the speaker output (channel 2) of
  * the PIT. This is better than using the timer interrupt output,
@@ -1101,6 +1404,20 @@ void __init time_init(void)
 		/* no need to get frequency here, since we'll skip the calibrate loop anyway */
 		timekeeping_use_tsc = 1;
 		vxtime.last_kvm = kvm_clock_read();
+	} else if (avoid_smi) {
+		printk(KERN_INFO "Enabling SMI avoidance in CPU calibration\n");
+		if (hpet_use_timer) {
+			tick_nsec = TICK_NSEC_HPET;
+			timename = "HPET";
+		} else if (pmtmr_ioport && !vxtime.hpet_address) {
+			vxtime_hz = PM_TIMER_FREQUENCY;
+			timename = "PM";
+			pit_init();
+		} else {
+			pit_init();
+			timename = "PIT";
+		}
+		tsc_khz = native_calibrate_tsc();
 	} else if (hpet_use_timer) {
 		/* set tick_nsec to use the proper rate for HPET */
 	  	tick_nsec = TICK_NSEC_HPET;
@@ -1113,7 +1430,7 @@ void __init time_init(void)
 		pit_init();
 		tsc_khz = pit_calibrate_tsc();
 #endif
-	} else {
+	} else {	
 		pit_init();
 		tsc_khz = pit_calibrate_tsc();
 		timename = "PIT";
@@ -1122,7 +1439,7 @@ void __init time_init(void)
 	cpu_khz = tsc_khz;
 	if (cpu_has(&boot_cpu_data, X86_FEATURE_CONSTANT_TSC) &&
 		boot_cpu_data.x86_vendor == X86_VENDOR_AMD &&
-		boot_cpu_data.x86 == 16)
+		boot_cpu_data.x86 == 16 && !avoid_smi)
 		cpu_khz = tsc_calibrate_cpu_khz();
 
 	/* Should we get tsc_khz from the hypervisor? */
diff --git a/drivers/clocksource/acpi_pm.c b/drivers/clocksource/acpi_pm.c
index a770228..e10c728 100644
--- a/drivers/clocksource/acpi_pm.c
+++ b/drivers/clocksource/acpi_pm.c
@@ -40,7 +40,7 @@ static inline u32 read_pmtmr(void)
 	return inl(pmtmr_ioport) & ACPI_PM_MASK;
 }
 
-static cycle_t acpi_pm_read_verified(void)
+cycle_t acpi_pm_read_verified(void)
 {
 	u32 v1 = 0, v2 = 0, v3 = 0;
 
diff --git a/include/linux/acpi_pmtmr.h b/include/linux/acpi_pmtmr.h
new file mode 100644
index 0000000..0042599
--- /dev/null
+++ b/include/linux/acpi_pmtmr.h
@@ -0,0 +1,42 @@
+#ifndef _ACPI_PMTMR_H_
+#define _ACPI_PMTMR_H_
+
+#include <linux/clocksource.h>
+
+/* Number of PMTMR ticks expected during calibration run */
+#define PMTMR_TICKS_PER_SEC 3579545
+
+/* limit it to 24 bits */
+#define ACPI_PM_MASK CLOCKSOURCE_MASK(24)
+
+/* Overrun value */
+#define ACPI_PM_OVRRUN	(1<<24)
+
+#ifdef CONFIG_X86_PM_TIMER
+
+extern cycle_t acpi_pm_read_verified(void);
+extern u32 pmtmr_ioport;
+
+#ifndef CONFIG_X86_64
+static inline cycle_t acpi_pm_read_early(void)
+{
+	if (!pmtmr_ioport)
+		return 0;
+	/* mask the output to 24 bits */
+	return acpi_pm_read_verified() & ACPI_PM_MASK;
+}
+#endif
+
+extern void pmtimer_wait(unsigned);
+
+#else
+
+static inline cycle_t acpi_pm_read_early(void)
+{
+	return 0;
+}
+
+#endif
+
+#endif
+