Sophie

Sophie

distrib > Scientific%20Linux > 5x > x86_64 > by-pkgid > fc11cd6e1c513a17304da94a5390f3cd > files > 4270

kernel-2.6.18-194.11.1.el5.src.rpm

From: Kei Tokunaga <ktokunag@redhat.com>
Subject: [RHEL5.1 PATCH 15/21] Cannot measure process time accurately on IA64
Date: Thu, 07 Jun 2007 03:44:43 -0400
Bugzilla: 240107
Message-Id: <4667B76B.9030200@redhat.com>
Changelog: [xen] ia64: Cannot measure process time accurately


bz240107
https://bugzilla.redhat.com/bugzilla/show_bug.cgi?id=240107

Backport of cset#14504 and 14508

Process time measurement cannot be done accurately on
ia64 when runing CPU intensive benchmark-process at
least two or more.  It seems 2 or more times lesser
than native measurements.  To fix this, the patch is
to add code to account CPU steal time of PV.

Thanks,
Kei


rh bug 121137

# HG changeset patch
# User awilliam@xenbuild2.aw
# Date 1173372625 25200
# Node ID 9fbaf07d3f670b96d6a1653a002f5ca3db895fc8
# Parent  4e367aa898956466747417bff637a681b8b58202
[IA64] Add HYPERVISOR_vcpu_op

For support of steal time accounting, only supports
VCPUOP_register_runstate_memory_area right now.

Signed-off-by: Atsushi SAKAI <sakaia@jp.fujitsu.com>


---

 linux-2.6.18-21.el5-gerd-order-kei/arch/ia64/xen/xcom_hcall.c        |   18 ++++++++++
 linux-2.6.18-21.el5-gerd-order-kei/include/asm-ia64/hypercall.h      |    7 +++
 linux-2.6.18-21.el5-gerd-order-kei/include/asm-ia64/xen/xcom_hcall.h |    2 +
 3 files changed, 27 insertions(+)

diff -puN arch/ia64/xen/xcom_hcall.c~14504-IA64_Add_HYPERVISOR_vcpu_op arch/ia64/xen/xcom_hcall.c
--- linux-2.6.18-21.el5-gerd-order/arch/ia64/xen/xcom_hcall.c~14504-IA64_Add_HYPERVISOR_vcpu_op	2007-06-07 02:44:33.000000000 -0400
+++ linux-2.6.18-21.el5-gerd-order-kei/arch/ia64/xen/xcom_hcall.c	2007-06-07 02:44:33.000000000 -0400
@@ -32,6 +32,7 @@
 #include <xen/interface/callback.h>
 #include <xen/interface/acm_ops.h>
 #include <xen/interface/hvm/params.h>
+#include <xen/interface/vcpu.h>
 #include <asm/hypercall.h>
 #include <asm/page.h>
 #include <asm/uaccess.h>
@@ -301,3 +302,20 @@ xencomm_hypercall_suspend(unsigned long 
 
 	return xencomm_arch_hypercall_suspend(xencomm_create_inline(&arg));
 }
+
+long
+xencomm_hypercall_vcpu_op(int cmd, int cpu, void *arg)
+{
+	switch (cmd) {
+	case VCPUOP_register_runstate_memory_area:
+		xencommize_memory_reservation((xen_memory_reservation_t *)arg);
+		break;
+
+	default:
+		printk("%s: unknown vcpu op %d\n", __func__, cmd);
+		return -ENOSYS;
+	}
+
+	return xencomm_arch_hypercall_vcpu_op(cmd, cpu,
+					      xencomm_create_inline(arg));
+}
diff -puN include/asm-ia64/hypercall.h~14504-IA64_Add_HYPERVISOR_vcpu_op include/asm-ia64/hypercall.h
--- linux-2.6.18-21.el5-gerd-order/include/asm-ia64/hypercall.h~14504-IA64_Add_HYPERVISOR_vcpu_op	2007-06-07 02:44:33.000000000 -0400
+++ linux-2.6.18-21.el5-gerd-order-kei/include/asm-ia64/hypercall.h	2007-06-07 02:44:33.000000000 -0400
@@ -205,6 +205,12 @@ xencomm_arch_hypercall_hvm_op(int cmd, v
 	return _hypercall2(unsigned long, hvm_op, cmd, arg);
 }
 
+static inline long
+xencomm_arch_hypercall_vcpu_op(int cmd, int cpu, void *arg)
+{
+	return _hypercall3(long, vcpu_op, cmd, cpu, arg);
+}
+
 static inline int
 HYPERVISOR_physdev_op(int cmd, void *arg)
 {
@@ -383,5 +389,6 @@ HYPERVISOR_expose_p2m(unsigned long conv
 #endif
 
 #define HYPERVISOR_suspend xencomm_hypercall_suspend
+#define HYPERVISOR_vcpu_op xencomm_hypercall_vcpu_op
 
 #endif /* __HYPERCALL_H__ */
diff -puN include/asm-ia64/xen/xcom_hcall.h~14504-IA64_Add_HYPERVISOR_vcpu_op include/asm-ia64/xen/xcom_hcall.h
--- linux-2.6.18-21.el5-gerd-order/include/asm-ia64/xen/xcom_hcall.h~14504-IA64_Add_HYPERVISOR_vcpu_op	2007-06-07 02:44:33.000000000 -0400
+++ linux-2.6.18-21.el5-gerd-order-kei/include/asm-ia64/xen/xcom_hcall.h	2007-06-07 02:44:33.000000000 -0400
@@ -46,6 +46,8 @@ extern unsigned long xencomm_hypercall_h
 
 extern int xencomm_hypercall_suspend(unsigned long srec);
 
+extern long xencomm_hypercall_vcpu_op(int cmd, int cpu, void *arg);
+
 /* Using mini xencomm.  */
 extern int xencomm_mini_hypercall_console_io(int cmd, int count, char *str);
 

_


rh bug 121137

# HG changeset patch
# User awilliam@xenbuild2.aw
# Date 1173713999 21600
# Node ID 9ea0c5f469c871f3b6a387a61555a725cecdd1b6
# Parent  800f7904d6a4a3f5c0deaaa444289dff139ad610
[IA64] PV steal time accounting

Signed-off-by: Atsushi SAKAI <sakaia@jp.fujitsu.com>


---

 linux-2.6.18-21.el5-gerd-order-kei/arch/ia64/kernel/time.c |  131 +++++++++++++
 1 file changed, 131 insertions(+)

diff -puN arch/ia64/kernel/time.c~14508-IA64_PV_steal_time_accounting arch/ia64/kernel/time.c
--- linux-2.6.18-21.el5-gerd-order/arch/ia64/kernel/time.c~14508-IA64_PV_steal_time_accounting	2007-06-07 02:44:33.000000000 -0400
+++ linux-2.6.18-21.el5-gerd-order-kei/arch/ia64/kernel/time.c	2007-06-07 02:44:33.000000000 -0400
@@ -29,6 +29,13 @@
 #include <asm/sections.h>
 #include <asm/system.h>
 
+#ifdef CONFIG_XEN
+#include <linux/kernel_stat.h>
+#include <linux/posix-timers.h>
+#include <xen/interface/vcpu.h>
+#include <asm/percpu.h>
+#endif
+
 extern unsigned long wall_jiffies;
 
 volatile int time_keeper_id = 0; /* smp_processor_id() of time-keeper */
@@ -40,16 +47,109 @@ EXPORT_SYMBOL(last_cli_ip);
 
 #endif
 
+#ifdef CONFIG_XEN
+DEFINE_PER_CPU(struct vcpu_runstate_info, runstate);
+DEFINE_PER_CPU(unsigned long, processed_stolen_time);
+DEFINE_PER_CPU(unsigned long, processed_blocked_time);
+#define NS_PER_TICK (1000000000LL/HZ)
+#endif
+
 static struct time_interpolator itc_interpolator = {
 	.shift = 16,
 	.mask = 0xffffffffffffffffLL,
 	.source = TIME_SOURCE_CPU
 };
 
+#ifdef CONFIG_XEN
+static unsigned long 
+consider_steal_time(unsigned long new_itm, struct pt_regs *regs)
+{
+	unsigned long stolen, blocked, sched_time;
+	unsigned long delta_itm = 0, stolentick = 0;
+	int i, cpu = smp_processor_id();
+	struct vcpu_runstate_info *runstate;
+	struct task_struct *p = current;
+
+	runstate = &per_cpu(runstate, smp_processor_id());
+
+	do {
+		sched_time = runstate->state_entry_time;
+		mb();
+		stolen = runstate->time[RUNSTATE_runnable] + 
+			 runstate->time[RUNSTATE_offline] -
+			 per_cpu(processed_stolen_time, cpu);
+		blocked = runstate->time[RUNSTATE_blocked] -
+			  per_cpu(processed_blocked_time, cpu);
+		mb();
+	} while (sched_time != runstate->state_entry_time);
+
+	/*
+	 * Check for vcpu migration effect
+	 * In this case, itc value is reversed.
+	 * This causes huge stolen value.  
+	 * This function just checks and reject this effect.
+	 */
+	if (!time_after_eq(runstate->time[RUNSTATE_blocked],
+			   per_cpu(processed_blocked_time, cpu)))
+		blocked = 0;
+
+	if (!time_after_eq(runstate->time[RUNSTATE_runnable] +
+			   runstate->time[RUNSTATE_offline],
+			   per_cpu(processed_stolen_time, cpu)))
+		stolen = 0;
+
+	if (!time_after(delta_itm + new_itm, ia64_get_itc()))
+		stolentick = ia64_get_itc() - delta_itm - new_itm;
+
+	do_div(stolentick, NS_PER_TICK);
+	stolentick++;
+
+	do_div(stolen, NS_PER_TICK);
+
+	if (stolen > stolentick)
+		stolen = stolentick;
+
+	stolentick -= stolen;
+	do_div(blocked, NS_PER_TICK);
+
+	if (blocked > stolentick)
+		blocked = stolentick;
+
+	if (stolen > 0 || blocked > 0) {
+		account_steal_time(NULL, jiffies_to_cputime(stolen)); 
+		account_steal_time(idle_task(cpu), jiffies_to_cputime(blocked)); 
+		run_local_timers();
+
+		if (rcu_pending(cpu))
+			rcu_check_callbacks(cpu, user_mode(regs));
+
+		scheduler_tick();
+		run_posix_cpu_timers(p);
+		delta_itm += local_cpu_data->itm_delta * (stolen + blocked);
+
+		if (cpu == time_keeper_id) {
+			write_seqlock(&xtime_lock);
+			for(i = 0; i < stolen + blocked; i++)
+				do_timer(regs);
+			local_cpu_data->itm_next = delta_itm + new_itm;
+			write_sequnlock(&xtime_lock);
+		} else {
+			local_cpu_data->itm_next = delta_itm + new_itm;
+		}
+		per_cpu(processed_stolen_time,cpu) += NS_PER_TICK * stolen;
+		per_cpu(processed_blocked_time,cpu) += NS_PER_TICK * blocked;
+	}
+	return delta_itm; 
+}
+#else
+#define consider_steal_time(new_itm, regs) (0)
+#endif
+
 static irqreturn_t
 timer_interrupt (int irq, void *dev_id, struct pt_regs *regs)
 {
 	unsigned long new_itm;
+	unsigned long delta_itm; /* XEN */
 
 	if (unlikely(cpu_is_offline(smp_processor_id()))) {
 		return IRQ_HANDLED;
@@ -65,6 +165,13 @@ timer_interrupt (int irq, void *dev_id, 
 
 	profile_tick(CPU_PROFILING, regs);
 
+	if (is_running_on_xen()) {
+		delta_itm = consider_steal_time(new_itm, regs);
+		new_itm += delta_itm;
+		if (time_after(new_itm, ia64_get_itc()) && delta_itm)
+			goto skip_process_time_accounting;
+	}
+
 	while (1) {
 		update_process_times(user_mode(regs));
 
@@ -88,6 +195,8 @@ timer_interrupt (int irq, void *dev_id, 
 			break;
 	}
 
+skip_process_time_accounting:	/* XEN */
+
 	do {
 		/*
 		 * If we're too close to the next clock tick for
@@ -142,6 +251,25 @@ static int __init nojitter_setup(char *s
 
 __setup("nojitter", nojitter_setup);
 
+#ifdef CONFIG_XEN
+/* taken from i386/kernel/time-xen.c */
+static void init_missing_ticks_accounting(int cpu)
+{
+	struct vcpu_register_runstate_memory_area area;
+	struct vcpu_runstate_info *runstate = &per_cpu(runstate, cpu);
+
+	memset(runstate, 0, sizeof(*runstate));
+
+	area.addr.v = runstate;
+	HYPERVISOR_vcpu_op(VCPUOP_register_runstate_memory_area, cpu, &area);
+
+	per_cpu(processed_blocked_time, cpu) = runstate->time[RUNSTATE_blocked];
+	per_cpu(processed_stolen_time, cpu) = runstate->time[RUNSTATE_runnable]
+					    + runstate->time[RUNSTATE_offline];
+}
+#else
+#define init_missing_ticks_accounting(cpu) do {} while (0)
+#endif
 
 void __devinit
 ia64_init_itm (void)
@@ -225,6 +353,9 @@ ia64_init_itm (void)
 		register_time_interpolator(&itc_interpolator);
 	}
 
+	if (is_running_on_xen())
+		init_missing_ticks_accounting(smp_processor_id());
+
 	/* Setup the CPU local timer tick */
 	ia64_cpu_local_tick();
 }

_