Sophie

Sophie

distrib > Scientific%20Linux > 5x > x86_64 > by-pkgid > fc11cd6e1c513a17304da94a5390f3cd > files > 4137

kernel-2.6.18-194.11.1.el5.src.rpm

From: Glauber Costa <glommer@redhat.com>
Date: Tue, 11 Aug 2009 13:32:02 -0400
Subject: [x86_64] kvm clocksource's implementation
Message-id: 1250011926-31633-4-git-send-email-glommer@redhat.com
O-Subject: [PATCH v2 3/7] x86_64 kvm clocksource's implementation
Bugzilla: 476075

Since x86_64 does not have a clocksource infrastructure, we can
re-use the code for timekeeping_using_tsc to do that. "Using tsc"
does not really reflect what we're doing here, but changing the name
of it now could disruput users relying on the parameter name, for example.

To do that, I'm adding a variable last_kvm in vxtime. I could probably reuse
hpet's, but thought this way would be cleaner.

Other than that, we don't differ much from vmware's tsc code, and I tried
to reuse as much code as possible.

[ v2: don't return 0 in get_hypervisor_cycles_per_tick, spotted by clalance ]

Signed-off-by: Glauber Costa <glommer@redhat.com>

diff --git a/arch/i386/kernel/cpu/hypervisor.c b/arch/i386/kernel/cpu/hypervisor.c
index 7800c71..5913d34 100644
--- a/arch/i386/kernel/cpu/hypervisor.c
+++ b/arch/i386/kernel/cpu/hypervisor.c
@@ -50,6 +50,15 @@ unsigned long get_hypervisor_tsc_freq(void)
 	return 0;
 }
 
+unsigned long get_hypervisor_cycles_per_tick(void)
+{
+
+	if (boot_cpu_data.x86_hyper_vendor == X86_HYPER_VENDOR_KVM)
+		return 1000000000 / REAL_HZ;
+	else /* Same thing for VMware or baremetal, in case we force it */
+		return (cpu_khz * 1000) / REAL_HZ;
+}
+
 static inline void __cpuinit
 hypervisor_set_feature_bits(struct cpuinfo_x86 *c)
 {
@@ -57,6 +66,8 @@ hypervisor_set_feature_bits(struct cpuinfo_x86 *c)
 		vmware_set_feature_bits(c);
 		return;
 	}
+	if (boot_cpu_data.x86_hyper_vendor == X86_HYPER_VENDOR_KVM)
+		kvmclock_init();
 }
 
 void __cpuinit init_hypervisor(struct cpuinfo_x86 *c)
diff --git a/arch/x86_64/kernel/time.c b/arch/x86_64/kernel/time.c
index ed66286..b1f1e22 100644
--- a/arch/x86_64/kernel/time.c
+++ b/arch/x86_64/kernel/time.c
@@ -36,6 +36,7 @@
 #include <linux/efi.h>
 #include <linux/acpi.h>
 #include <linux/delay.h>
+#include <linux/kvm_para.h>
 #ifdef CONFIG_ACPI
 #include <acpi/achware.h>	/* for PM timer frequency */
 #include <acpi/acpi_bus.h>
@@ -123,6 +124,11 @@ static inline long do_gettimeoffset_tsc(void)
 	return x;
 }
 
+static inline long do_gettimeoffset_kvm(void)
+{
+	return (kvm_clock_read() - vxtime.last_kvm);
+}
+
 static inline long do_gettimeoffset_hpet(void)
 {
 	/* cap counter read to one tick to avoid inconsistencies */
@@ -477,9 +483,18 @@ static void do_timer_tsc_timekeeping(struct pt_regs *regs)
 {
 	int i;
 	cycles_t tsc, tsc_accounted, tsc_not_accounted;
+	unsigned long *last = NULL;
 
-	tsc = get_cycles_sync();
-	tsc_accounted = vxtime.last_tsc;
+
+	if (use_kvm_time) {
+		tsc = kvm_clock_read();
+		last = &vxtime.last_kvm;
+	}
+	else {
+		tsc = get_cycles_sync();
+		last = &vxtime.last_tsc;
+	}
+	tsc_accounted = *last;
 
 	if (unlikely(tsc < tsc_accounted))
 		return;
@@ -499,9 +514,13 @@ static void do_timer_tsc_timekeeping(struct pt_regs *regs)
 		tsc_accounted += cycles_per_tick;
 	}
 
-	monotonic_base += ((tsc_accounted - vxtime.last_tsc) *
+	if (use_kvm_time)
+		monotonic_base += (tsc_accounted - *last);
+	else
+		monotonic_base += ((tsc_accounted - *last) *
 					1000000 / cpu_khz);
-	vxtime.last_tsc = tsc_accounted;
+
+	*last = tsc_accounted;
 }
 
 void main_timer_handler(struct pt_regs *regs)
@@ -522,6 +541,7 @@ void main_timer_handler(struct pt_regs *regs)
 	else
 		do_timer_account_lost_ticks(regs);
 
+
 /*
  * If we have an externally synchronized Linux clock, then update CMOS clock
  * accordingly every ~11 minutes. set_rtc_mmss() will be called in the jiffy
@@ -643,6 +663,15 @@ static unsigned long get_cmos_time(void)
 	return mktime(year, mon, day, hour, min, sec);
 }
 
+static unsigned long get_wallclock(void)
+{
+	if (use_kvm_time)
+		return kvm_get_wallclock();
+	else
+		return get_cmos_time();
+}
+
+
 /* calibrate_cpu is used on systems with fixed rate TSCs to determine
  * processor frequency */
 #define TICK_COUNT 100000000
@@ -1042,7 +1071,7 @@ void __init time_init(void)
 	if (nohpet)
 		vxtime.hpet_address = 0;
 
-	xtime.tv_sec = get_cmos_time();
+	xtime.tv_sec = get_wallclock();
 	xtime.tv_nsec = 0;
 
 	set_normalized_timespec(&wall_to_monotonic,
@@ -1053,7 +1082,11 @@ void __init time_init(void)
 	else
 		vxtime.hpet_address = 0;
 
-	if (hpet_use_timer) {
+	if (use_kvm_time) {
+		timename = "KVM";
+		/* no need to get frequency here, since we'll skip the calibrate loop anyway */
+		timekeeping_use_tsc = 1;
+	} else if (hpet_use_timer) {
 		/* set tick_nsec to use the proper rate for HPET */
 	  	tick_nsec = TICK_NSEC_HPET;
 		tsc_khz = hpet_calibrate_tsc();
@@ -1088,7 +1121,7 @@ void __init time_init(void)
 
 	/* Keep time based on the TSC rather than by counting interrupts. */
 	if (timekeeping_use_tsc > 0) {
-		cycles_per_tick = (cpu_khz * 1000) / REAL_HZ;
+		cycles_per_tick = get_hypervisor_cycles_per_tick();
 		/*
 		 * The maximum cycles we will account per
 		 * timer interrupt is 10 minutes.
@@ -1161,7 +1194,12 @@ void time_init_gtod(void)
 	else
 		vgetcpu_mode = VGETCPU_LSL;
 
-	if (timekeeping_use_tsc > 0) {
+	if (use_kvm_time) {
+		timetype = "KVM";
+		vxtime.last_kvm = kvm_clock_read();
+		vxtime.mode = VXTIME_TSC;
+		do_gettimeoffset = do_gettimeoffset_kvm;
+	} else if (timekeeping_use_tsc > 0) {
 		timetype = "TSC Timekeeping";
 		vxtime.mode = VXTIME_TSC;
 	} else if (vxtime.hpet_address && notsc) {
@@ -1212,7 +1250,7 @@ static int timer_suspend(struct sys_device *dev, pm_message_t state)
 	/*
 	 * Estimate time zone so that set_time can update the clock
 	 */
-	long cmos_time =  get_cmos_time();
+	long cmos_time =  get_wallclock();
 
 	clock_cmos_diff = -cmos_time;
 	clock_cmos_diff += get_seconds();
@@ -1224,7 +1262,7 @@ static int timer_resume(struct sys_device *dev)
 {
 	unsigned long flags;
 	unsigned long sec;
-	unsigned long ctime = get_cmos_time();
+	unsigned long ctime = get_wallclock();
 	unsigned long sleep_length = (ctime - sleep_start) * HZ;
 
 	if (vxtime.hpet_address)
diff --git a/include/asm-x86_64/generic-hypervisor.h b/include/asm-x86_64/generic-hypervisor.h
index 369f5c5..dcdfb4a 100644
--- a/include/asm-x86_64/generic-hypervisor.h
+++ b/include/asm-x86_64/generic-hypervisor.h
@@ -21,6 +21,7 @@
 #define ASM_X86__HYPERVISOR_H
 
 extern unsigned long get_hypervisor_tsc_freq(void);
+extern unsigned long get_hypervisor_cycles_per_tick(void);
 extern void init_hypervisor(struct cpuinfo_x86 *c);
 
 #endif
diff --git a/include/asm-x86_64/vsyscall.h b/include/asm-x86_64/vsyscall.h
index 2281e93..53c1fe9 100644
--- a/include/asm-x86_64/vsyscall.h
+++ b/include/asm-x86_64/vsyscall.h
@@ -35,6 +35,7 @@ struct vxtime_data {
 	long hpet_address;	/* HPET base address */
 	int last;
 	unsigned long last_tsc;
+	unsigned long last_kvm;
 	long quot;
 	long tsc_quot;
 	int mode;