From: Prarit Bhargava <prarit@redhat.com> Date: Tue, 4 Nov 2008 10:06:23 -0500 Subject: [x86_64] incorrect cpu_khz calculation for AMD processor Message-id: 20081104150623.9873.93147.sendpatchset@prarit.bos.redhat.com O-Subject: [RHEL5.4 PATCH] Fix incorrect cpu_khz calculation for AMD processors Bugzilla: 467782 RH-Acked-by: Aristeu Rozanski <aris@redhat.com> RH-Acked-by: Pete Zaitcev <zaitcev@redhat.com> Backport upstream's calibrate_cpu() into RHEL5. While booting a cluster LLNL noticed that a small # of the cpus were being brought up with a bogus cpu_khz value: correct: time.c: Detected 2311.850 MHz processor incorrect: time.c: Detected 2288.237 MHz processor This eventually leads to a bogus calibration of the HPET and/or tsc which leads to the system being unstable. The problem is that the existing AMD tsc_calibrate_cpu_khz function does not check to see if the perfctr being used is busy. If the perfctr was busy this leads to an incorrect value for the cpu_khz. The solution is to backport the upstream version of calibrate_cpu() into RHEL5 (note, that this backport doesn't eliminate the possibility of an incorrect calibration -- but the patch does significantly reduce the probability of it happening). Brew built by me, tested by woodard at LLNL. Resolves BZ 467782. diff --git a/arch/x86_64/kernel/time.c b/arch/x86_64/kernel/time.c index 23af13e..fa1985f 100644 --- a/arch/x86_64/kernel/time.c +++ b/arch/x86_64/kernel/time.c @@ -596,32 +596,46 @@ static unsigned long get_cmos_time(void) #define TICK_COUNT 100000000 static unsigned int __init tsc_calibrate_cpu_khz(void) { - int tsc_start, tsc_now; - int no_ctr_free; - unsigned long evntsel3 = 0, pmc3 = 0, pmc_now = 0; - unsigned long flags; - - rdmsrl(MSR_K7_EVNTSEL3, evntsel3); - wrmsrl(MSR_K7_EVNTSEL3, 0); - rdmsrl(MSR_K7_PERFCTR3, pmc3); - local_irq_save(flags); - /* start meauring cycles, incrementing from 0 */ - wrmsrl(MSR_K7_PERFCTR3, 0); - wrmsrl(MSR_K7_EVNTSEL3, 1 << 22 | 3 << 16 | 0x76); - rdtscl(tsc_start); - do { - rdmsrl(MSR_K7_PERFCTR3, pmc_now); - tsc_now = get_cycles_sync(); - } while ((tsc_now - tsc_start) < TICK_COUNT); - - local_irq_restore(flags); - wrmsrl(MSR_K7_EVNTSEL3, 0); - wrmsrl(MSR_K7_PERFCTR3, pmc3); - wrmsrl(MSR_K7_EVNTSEL3, evntsel3); - - return pmc_now * tsc_khz / (tsc_now - tsc_start); -} + int tsc_start, tsc_now; + int i, no_ctr_free; + unsigned long evntsel3 = 0, pmc3 = 0, pmc_now = 0; + unsigned long flags; + for (i = 0; i < 4; i++) + if (avail_to_resrv_perfctr_nmi_bit(i)) + break; + no_ctr_free = (i == 4); + if (no_ctr_free) { + i = 3; + rdmsrl(MSR_K7_EVNTSEL3, evntsel3); + wrmsrl(MSR_K7_EVNTSEL3, 0); + rdmsrl(MSR_K7_PERFCTR3, pmc3); + } else { + reserve_perfctr_nmi(MSR_K7_PERFCTR0 + i); + reserve_evntsel_nmi(MSR_K7_EVNTSEL0 + i); + } + local_irq_save(flags); + /* start measuring cycles, incrementing from 0 */ + wrmsrl(MSR_K7_PERFCTR0 + i, 0); + wrmsrl(MSR_K7_EVNTSEL0 + i, 1 << 22 | 3 << 16 | 0x76); + rdtscl(tsc_start); + do { + rdmsrl(MSR_K7_PERFCTR0 + i, pmc_now); + tsc_now = get_cycles(); + } while ((tsc_now - tsc_start) < TICK_COUNT); + + local_irq_restore(flags); + if (no_ctr_free) { + wrmsrl(MSR_K7_EVNTSEL3, 0); + wrmsrl(MSR_K7_PERFCTR3, pmc3); + wrmsrl(MSR_K7_EVNTSEL3, evntsel3); + } else { + release_perfctr_nmi(MSR_K7_PERFCTR0 + i); + release_evntsel_nmi(MSR_K7_EVNTSEL0 + i); + } + + return pmc_now * tsc_khz / (tsc_now - tsc_start); +} #ifdef CONFIG_CPU_FREQ /* Frequency scaling support. Adjust the TSC based timer when the cpu frequency