Sophie

Sophie

distrib > Scientific%20Linux > 5x > x86_64 > by-pkgid > fc11cd6e1c513a17304da94a5390f3cd > files > 3963

kernel-2.6.18-194.11.1.el5.src.rpm

From: Glauber Costa <glommer@redhat.com>
Date: Tue, 11 Aug 2009 13:32:01 -0400
Subject: [x86] kvm: import kvmclock.c
Message-id: 1250011926-31633-3-git-send-email-glommer@redhat.com
O-Subject: [PATCH v2 2/7] Import kvmclock.c from upstream kernel
Bugzilla: 476075

This is a copy of kvmclock.c implementation from upstream kernel. The
relevant differences are:
 *  use_kvm_time variable to tell whether or not we succeeded in turning our clocksource on.
    This is needed because upstream hooks functions into structures like smp_ops, machine_ops
    and pv_ops, which we lack.
 * Make some functions non-static, as we'll now call them from other object files, instead of
   dealing with hooks
 * removal of kvm_set_wallclock(), unused.
 * removal of kvm_get_tsc_khz() and kvm_get_preset_lpj(), we already provide it in another file,
   and I decided not to mess with it.
 * ifdef CONFIG_X86_32 around the clocksource definition, because x86_64 does not have clocksources.
 * removal of kvm_setup_secondary_clock() kvm_smp_prepare_boot_cpu(). It is hard for us to hook them,
   so we'll call them directly when needed.
 * remove initialization of smp_ops, machine_ops and pv_ops in kvmclock_init()

[ v2: fixed a silly typo ]

Signed-off-by: Glauber Costa <glommer@redhat.com>

diff --git a/arch/i386/kernel/Makefile b/arch/i386/kernel/Makefile
index 1410349..89cacf0 100644
--- a/arch/i386/kernel/Makefile
+++ b/arch/i386/kernel/Makefile
@@ -42,6 +42,7 @@ obj-$(CONFIG_HPET_TIMER) 	+= hpet.o
 obj-$(CONFIG_K8_NB)		+= k8.o
 ifndef CONFIG_XEN
 obj-y				+= pvclock.o
+obj-y				+= kvmclock.o
 endif
 
 EXTRA_AFLAGS   := -traditional
diff --git a/arch/i386/kernel/kvmclock.c b/arch/i386/kernel/kvmclock.c
new file mode 100644
index 0000000..6edd2e2
--- /dev/null
+++ b/arch/i386/kernel/kvmclock.c
@@ -0,0 +1,130 @@
+/*  KVM paravirtual clock driver. A clocksource implementation
+    Copyright (C) 2008 Glauber de Oliveira Costa, Red Hat Inc.
+
+    This program is free software; you can redistribute it and/or modify
+    it under the terms of the GNU General Public License as published by
+    the Free Software Foundation; either version 2 of the License, or
+    (at your option) any later version.
+
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License
+    along with this program; if not, write to the Free Software
+    Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
+*/
+
+#include <linux/clocksource.h>
+#include <linux/kvm_para.h>
+#include <asm/pvclock.h>
+#ifdef CONFIG_X86_32
+#include <asm/arch_hooks.h>
+#endif
+#include <asm/msr.h>
+#include <asm/apic.h>
+#include <linux/percpu.h>
+
+#define KVM_SCALE 22
+
+static int kvmclock = 1;
+
+unsigned int use_kvm_time = 1; /* RHEL specific */
+
+static int parse_no_kvmclock(char *arg)
+{
+	kvmclock = 0;
+	return 0;
+}
+early_param("no-kvmclock", parse_no_kvmclock);
+
+/* The hypervisor will put information about time periodically here */
+static DEFINE_PER_CPU(struct pvclock_vcpu_time_info, hv_clock);
+static struct pvclock_wall_clock wall_clock;
+
+/*
+ * The wallclock is the time of day when we booted. Since then, some time may
+ * have elapsed since the hypervisor wrote the data. So we try to account for
+ * that with system time
+ */
+unsigned long kvm_get_wallclock(void)
+{
+	struct pvclock_vcpu_time_info *vcpu_time;
+	struct timespec ts;
+	int low, high;
+
+	low = (int)__pa(&wall_clock);
+	high = ((u64)__pa(&wall_clock) >> 32);
+	wrmsr(MSR_KVM_WALL_CLOCK, low, high);
+
+	vcpu_time = &get_cpu_var(hv_clock);
+	pvclock_read_wallclock(&wall_clock, vcpu_time, &ts);
+	put_cpu_var(hv_clock);
+
+	return ts.tv_sec;
+}
+
+cycle_t kvm_clock_read(void)
+{
+	struct pvclock_vcpu_time_info *src;
+	cycle_t ret;
+
+	src = &get_cpu_var(hv_clock);
+	ret = pvclock_clocksource_read(src);
+	put_cpu_var(hv_clock);
+	return ret;
+}
+
+#ifdef CONFIG_X86_32
+static struct clocksource kvm_clock = {
+	.name = "kvm-clock",
+	.read = kvm_clock_read,
+	.rating = 400,
+	.mask = CLOCKSOURCE_MASK(64),
+	.mult = 1 << KVM_SCALE,
+	.shift = KVM_SCALE,
+	.is_continuous = 1,
+};
+#endif
+
+int kvm_register_clock(char *txt)
+{
+	int cpu = smp_processor_id();
+	int low, high;
+	/* upstream kernel does not use this, because the smp_ops structure
+	 * guarantees it won't be called at all when disabled
+	 */
+	if (use_kvm_time == 0)
+		return 0;
+	low = (int)__pa(&per_cpu(hv_clock, cpu)) | 1;
+	high = ((u64)__pa(&per_cpu(hv_clock, cpu)) >> 32);
+	printk(KERN_INFO "kvm-clock: cpu %d, msr %x:%x, %s\n",
+	       cpu, high, low, txt);
+	return wrmsr_safe(MSR_KVM_SYSTEM_TIME, low, high);
+}
+
+/* warning: thus function is not upstream. Upstream does it through machine_ops,
+ * which we lack. It exists to avoid exposing kvmclock related structures throughout
+ * the rest of our kernel code - glommer
+ */
+void kvmclock_disable(void)
+{
+	if (use_kvm_time > 0)
+		wrmsr(MSR_KVM_SYSTEM_TIME, 0, 0);
+}
+void __init kvmclock_init(void)
+{
+	if (!kvm_para_available())
+		return;
+
+	if (kvmclock && kvm_para_has_feature(KVM_FEATURE_CLOCKSOURCE)) {
+		if (kvm_register_clock("boot clock")) {
+			use_kvm_time = 0;
+			return;
+		}
+#ifdef CONFIG_X86_32
+		clocksource_register(&kvm_clock);
+#endif
+	}
+}
diff --git a/arch/x86_64/kernel/Makefile b/arch/x86_64/kernel/Makefile
index 023aa49..d2b4d62 100644
--- a/arch/x86_64/kernel/Makefile
+++ b/arch/x86_64/kernel/Makefile
@@ -52,6 +52,7 @@ ifndef CONFIG_XEN
 obj-y				+= vmware.o
 obj-y				+= hypervisor.o
 obj-y				+= pvclock.o
+obj-y				+= kvmclock.o
 endif
 
 CFLAGS_vsyscall.o		:= $(PROFILING) -g0
@@ -65,6 +66,7 @@ intel_cacheinfo-y		+= ../../i386/kernel/cpu/intel_cacheinfo.o
 addon_cpuid_features-y		+= ../../i386/kernel/cpu/addon_cpuid_features.o
 vmware-y			+= ../../i386/kernel/cpu/vmware.o
 pvclock-y			+= ../../i386/kernel/pvclock.o
+kvmclock-y			+= ../../i386/kernel/kvmclock.o
 hypervisor-y			+= ../../i386/kernel/cpu/hypervisor.o
 quirks-y			+= ../../i386/kernel/quirks.o
 i8237-y				+= ../../i386/kernel/i8237.o
diff --git a/include/asm-i386/kvm_para.h b/include/asm-i386/kvm_para.h
index b8a3305..9ac2b26 100644
--- a/include/asm-i386/kvm_para.h
+++ b/include/asm-i386/kvm_para.h
@@ -49,6 +49,10 @@ struct kvm_mmu_op_release_pt {
 #include <asm/processor.h>
 
 extern void kvmclock_init(void);
+extern int kvm_register_clock(char *txt);
+extern void kvmclock_disable(void);
+extern unsigned long kvm_get_wallclock(void);
+extern unsigned int use_kvm_time;
 
 
 /* This instruction is vmcall.  On non-VT architectures, it will generate a
diff --git a/include/asm-x86_64/kvm_para.h b/include/asm-x86_64/kvm_para.h
index b8a3305..82604a7 100644
--- a/include/asm-x86_64/kvm_para.h
+++ b/include/asm-x86_64/kvm_para.h
@@ -49,7 +49,12 @@ struct kvm_mmu_op_release_pt {
 #include <asm/processor.h>
 
 extern void kvmclock_init(void);
-
+extern int kvm_register_clock(char *txt);
+extern void kvmclock_disable(void);
+extern unsigned long kvm_get_wallclock(void);
+#include <linux/clocksource.h> // for cycle_t
+cycle_t kvm_clock_read(void);
+extern unsigned int use_kvm_time;
 
 /* This instruction is vmcall.  On non-VT architectures, it will generate a
  * trap that we will then rewrite to the appropriate instruction.