Sophie

Sophie

distrib > Scientific%20Linux > 5x > x86_64 > by-pkgid > 27922b4260f65d317aabda37e42bbbff > files > 1673

kernel-2.6.18-238.el5.src.rpm

From: Glauber Costa <glommer@redhat.com>
Date: Thu, 29 Oct 2009 17:04:58 -0400
Subject: [kvm] use upstream kvm_get_tsc_khz
Message-id: <1256835898-13168-1-git-send-email-glommer@redhat.com>
Patchwork-id: 21250
O-Subject: [PATCH] BZ531025 use upstream kvm_get_tsc_khz
Bugzilla: 531025
RH-Acked-by: Rik van Riel <riel@redhat.com>
RH-Acked-by: Prarit Bhargava <prarit@redhat.com>
RH-Acked-by: Marcelo Tosatti <mtosatti@redhat.com>

When we first came up with an approach to pv clock in KVM, only the tsc
adjustments for delay loop calculations was taken care of.
Since we did not intend to port the whole kvmclock infrastructure, a tiny
placeholder were written, that allocated some boot memory, used it, and
turned off the clock later on.

When we actually did port kvmclock, this was not needed anymore. But being
already there, I've left it. But the problem is, this code was run *after*
pvclock was setup. Which means that after already setup, we would register
a new area for pvclock in cpu0, free it, and then turn the clock off for
that cpu.

This was the reason we were seeing drifts after a while in some systems.
The bug was well hidden, because everything else was working for the other
cpus. Also, as most systems had a good tsc, we were able to keep going just
with the tsc for a while.

Signed-off-by: Glauber Costa <glommer@redhat.com>
RH-Bugzilla: 531025 and many others


diff --git a/arch/i386/kernel/cpu/hypervisor.c b/arch/i386/kernel/cpu/hypervisor.c
index 5913d34..f60d069 100644
--- a/arch/i386/kernel/cpu/hypervisor.c
+++ b/arch/i386/kernel/cpu/hypervisor.c
@@ -27,7 +27,7 @@
 #include <asm/processor.h>
 #include <asm/vmware.h>
 #include <asm/generic-hypervisor.h>
-#include <asm/kvm_hypervisor.h>
+#include <linux/jiffies.h>
 
 static inline void __cpuinit
 detect_hypervisor_vendor(struct cpuinfo_x86 *c)
diff --git a/arch/i386/kernel/kvmclock.c b/arch/i386/kernel/kvmclock.c
index 053d45f..7ed58c0 100644
--- a/arch/i386/kernel/kvmclock.c
+++ b/arch/i386/kernel/kvmclock.c
@@ -76,6 +76,23 @@ cycle_t kvm_clock_read(void)
 	return ret;
 }
 
+/*
+ * If we don't do that, there is the possibility that the guest
+ * will calibrate under heavy load - thus, getting a lower lpj -
+ * and execute the delays themselves without load. This is wrong,
+ * because no delay loop can finish beforehand.
+ * Any heuristics is subject to fail, because ultimately, a large
+ * poll of guests can be running and trouble each other. So we preset
+ * lpj here
+ */
+unsigned long kvm_get_tsc_khz(void)
+{
+	struct pvclock_vcpu_time_info *src;
+	src = &per_cpu(hv_clock, 0);
+	return pvclock_tsc_khz(src);
+}
+
+
 #ifdef CONFIG_X86_32
 static struct clocksource kvm_clock = {
 	.name = "kvm-clock",
diff --git a/include/asm-i386/kvm_hypervisor.h b/include/asm-i386/kvm_hypervisor.h
deleted file mode 100644
index 5399e3a..0000000
--- a/include/asm-i386/kvm_hypervisor.h
+++ /dev/null
@@ -1,74 +0,0 @@
-
-/*  Stripped down version of kvmclock.
-    Copyright (C) 2008 Glauber de Oliveira Costa, Red Hat Inc.
-
-    This program is free software; you can redistribute it and/or modify
-    it under the terms of the GNU General Public License as published by
-    the Free Software Foundation; either version 2 of the License, or
-    (at your option) any later version.
-
-    This program is distributed in the hope that it will be useful,
-    but WITHOUT ANY WARRANTY; without even the implied warranty of
-    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-    GNU General Public License for more details.
-
-    You should have received a copy of the GNU General Public License
-    along with this program; if not, write to the Free Software
-    Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
-*/
-
-#include <asm/kvm_para.h>
-#include <asm/pvclock-abi.h>
-#include <linux/bootmem.h>
-#include <asm/msr.h>
-#include <asm/apic.h>
-
-static inline unsigned long pvclock_tsc_khz(struct pvclock_vcpu_time_info *src)
-{
-        u64 pv_tsc_khz = 1000000ULL << 32;
-
-        do_div(pv_tsc_khz, src->tsc_to_system_mul);
-        if (src->tsc_shift < 0)
-                pv_tsc_khz <<= -src->tsc_shift;
-        else
-                pv_tsc_khz >>= src->tsc_shift;
-        return pv_tsc_khz;
-}
-
-static inline unsigned long kvm_get_tsc_khz(void)
-{
-	int cpu = smp_processor_id();
-	int low, high;
-	unsigned long kvm_tsc_khz;
-	struct pvclock_vcpu_time_info *hv_clock;
-
-	if (!kvm_para_has_feature(KVM_FEATURE_CLOCKSOURCE))
-		return 0;
-
-	hv_clock = alloc_bootmem_pages(PAGE_SIZE);
-	if (!hv_clock)
-		return 0;
-
-	low = (int)__pa(hv_clock) | 1;
-	high = ((u64)__pa(hv_clock) >> 32);
-	printk(KERN_INFO "%s: cpu %d, msr %x:%x\n", __func__,
-	       cpu, high, low);
-
-	if (wrmsr_safe(MSR_KVM_SYSTEM_TIME, low, high)) {
-		printk(KERN_ERR "%s: MSR_KVM_SYSTEM_TIME init failure\n",
-				__func__);
-		free_bootmem(__pa(hv_clock), PAGE_SIZE);
-		return 0;
-	}
-
-	kvm_tsc_khz = pvclock_tsc_khz(hv_clock);
-
-	if (wrmsr_safe(MSR_KVM_SYSTEM_TIME, 0, 0))
-		printk(KERN_ERR "%s: MSR_KVM_SYSTEM_TIME shutdown failure\n",
-				__func__);
-	else
-		free_bootmem(__pa(hv_clock), PAGE_SIZE);
-
-	return kvm_tsc_khz;
-}
-
diff --git a/include/asm-i386/kvm_para.h b/include/asm-i386/kvm_para.h
index 9ac2b26..a34d09e 100644
--- a/include/asm-i386/kvm_para.h
+++ b/include/asm-i386/kvm_para.h
@@ -53,6 +53,7 @@ extern int kvm_register_clock(char *txt);
 extern void kvmclock_disable(void);
 extern unsigned long kvm_get_wallclock(void);
 extern unsigned int use_kvm_time;
+unsigned long kvm_get_tsc_khz(void);
 
 
 /* This instruction is vmcall.  On non-VT architectures, it will generate a
diff --git a/include/asm-x86_64/kvm_hypervisor.h b/include/asm-x86_64/kvm_hypervisor.h
deleted file mode 100644
index 5399e3a..0000000
--- a/include/asm-x86_64/kvm_hypervisor.h
+++ /dev/null
@@ -1,74 +0,0 @@
-
-/*  Stripped down version of kvmclock.
-    Copyright (C) 2008 Glauber de Oliveira Costa, Red Hat Inc.
-
-    This program is free software; you can redistribute it and/or modify
-    it under the terms of the GNU General Public License as published by
-    the Free Software Foundation; either version 2 of the License, or
-    (at your option) any later version.
-
-    This program is distributed in the hope that it will be useful,
-    but WITHOUT ANY WARRANTY; without even the implied warranty of
-    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-    GNU General Public License for more details.
-
-    You should have received a copy of the GNU General Public License
-    along with this program; if not, write to the Free Software
-    Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
-*/
-
-#include <asm/kvm_para.h>
-#include <asm/pvclock-abi.h>
-#include <linux/bootmem.h>
-#include <asm/msr.h>
-#include <asm/apic.h>
-
-static inline unsigned long pvclock_tsc_khz(struct pvclock_vcpu_time_info *src)
-{
-        u64 pv_tsc_khz = 1000000ULL << 32;
-
-        do_div(pv_tsc_khz, src->tsc_to_system_mul);
-        if (src->tsc_shift < 0)
-                pv_tsc_khz <<= -src->tsc_shift;
-        else
-                pv_tsc_khz >>= src->tsc_shift;
-        return pv_tsc_khz;
-}
-
-static inline unsigned long kvm_get_tsc_khz(void)
-{
-	int cpu = smp_processor_id();
-	int low, high;
-	unsigned long kvm_tsc_khz;
-	struct pvclock_vcpu_time_info *hv_clock;
-
-	if (!kvm_para_has_feature(KVM_FEATURE_CLOCKSOURCE))
-		return 0;
-
-	hv_clock = alloc_bootmem_pages(PAGE_SIZE);
-	if (!hv_clock)
-		return 0;
-
-	low = (int)__pa(hv_clock) | 1;
-	high = ((u64)__pa(hv_clock) >> 32);
-	printk(KERN_INFO "%s: cpu %d, msr %x:%x\n", __func__,
-	       cpu, high, low);
-
-	if (wrmsr_safe(MSR_KVM_SYSTEM_TIME, low, high)) {
-		printk(KERN_ERR "%s: MSR_KVM_SYSTEM_TIME init failure\n",
-				__func__);
-		free_bootmem(__pa(hv_clock), PAGE_SIZE);
-		return 0;
-	}
-
-	kvm_tsc_khz = pvclock_tsc_khz(hv_clock);
-
-	if (wrmsr_safe(MSR_KVM_SYSTEM_TIME, 0, 0))
-		printk(KERN_ERR "%s: MSR_KVM_SYSTEM_TIME shutdown failure\n",
-				__func__);
-	else
-		free_bootmem(__pa(hv_clock), PAGE_SIZE);
-
-	return kvm_tsc_khz;
-}
-
diff --git a/include/asm-x86_64/kvm_para.h b/include/asm-x86_64/kvm_para.h
index 82604a7..f680ef1 100644
--- a/include/asm-x86_64/kvm_para.h
+++ b/include/asm-x86_64/kvm_para.h
@@ -55,6 +55,7 @@ extern unsigned long kvm_get_wallclock(void);
 #include <linux/clocksource.h> // for cycle_t
 cycle_t kvm_clock_read(void);
 extern unsigned int use_kvm_time;
+unsigned long kvm_get_tsc_khz(void);
 
 /* This instruction is vmcall.  On non-VT architectures, it will generate a
  * trap that we will then rewrite to the appropriate instruction.