Sophie

Sophie

distrib > Scientific%20Linux > 5x > x86_64 > by-pkgid > fc11cd6e1c513a17304da94a5390f3cd > files > 4469

kernel-2.6.18-194.11.1.el5.src.rpm

From: Chris Lalancette <clalance@redhat.com>
Date: Fri, 13 Mar 2009 09:55:20 +0100
Subject: [xen] ia64: fix FP emulation in a PV domain
Message-id: 49BA1F78.8010601@redhat.com
O-Subject: [RHEL5.4 PATCH 3/3]: Fix FP emulation in a PV domain
Bugzilla: 477098
RH-Acked-by: Don Dutile <ddutile@redhat.com>

IA64: fix emulation of fp emulation in pv domain

When vmm fails to get a bundle in a question during fpswa processing,
there is no way, but a guest provides the bundle.
On the other hand the current implementation just returns random value.
This patch make the fpswa hypercall calling convention complicated and
pass necessary informations to the hypervisor.

IA64: fix fp fault/trap handler.

When fpswa handler fails to get a bundle in guest,
fp fault/trap should be injected into the guest and let a guest
to handle it.
When the fpswa library return a error, there is no way to
pass the value to the guest. In that case, just inject fpswa
fault/trap into a guest running a risk that guest may get
error with their own fpswa call. Here it is assumed that
no applications depend on SIGFP process signal to recover
their computation.

IA64: make the fpswa emulation keep the previous behaviour.

When fpswa library return statue > 0, keep the previous behavior.
This case should be addressed somehow later, but it seems somewhat
difficult to resolve, so keep the previous behavor for now.
It is assumed that a guest kernel calls fpswa library
without preemption. This assumption breaks if a guest kernel is
preemptive.

IA64: fix panic caused by daccess fault.

While fpswa emulation, Xen VMM access guest virtual address space
which may cause daccess fault resulting in panic.
This patch make daccess fault handler handle such cases properly.

xen-unstable c/s 18982 18983 18984 18985

Fixes BZ 477098

diff --git a/arch/ia64/vmx/vmx_process.c b/arch/ia64/vmx/vmx_process.c
index e62fe9d..d4b5871 100644
--- a/arch/ia64/vmx/vmx_process.c
+++ b/arch/ia64/vmx/vmx_process.c
@@ -76,7 +76,14 @@ static u64 vec2off[68] = {0x0,0x400,0x800,0xc00,0x1000,0x1400,0x1800,
     0x7f00
 };
 
-
+void vmx_lazy_load_fpu(struct vcpu *vcpu)
+{
+    if (FP_PSR(vcpu) & IA64_PSR_DFH) {
+        FP_PSR(vcpu) = IA64_PSR_MFH;
+        if (__ia64_per_cpu_var(fp_owner) != vcpu)
+            __ia64_load_fpu(vcpu->arch._thread.fph);
+    }
+}
 
 void vmx_reflect_interruption(u64 ifa, u64 isr, u64 iim,
                               u64 vec, REGS *regs)
@@ -100,11 +107,7 @@ void vmx_reflect_interruption(u64 ifa, u64 isr, u64 iim,
     case 25:	// IA64_DISABLED_FPREG_VECTOR
         if (!(vpsr & IA64_PSR_IC))
             goto nested_fault;
-        if (FP_PSR(vcpu) & IA64_PSR_DFH) {
-            FP_PSR(vcpu) = IA64_PSR_MFH;
-            if (__ia64_per_cpu_var(fp_owner) != vcpu)
-                __ia64_load_fpu(vcpu->arch._thread.fph);
-        }
+        vmx_lazy_load_fpu(vcpu);
         if (!(VCPU(vcpu, vpsr) & IA64_PSR_DFH)) {
             regs->cr_ipsr &= ~IA64_PSR_DFH;
             return;
@@ -121,8 +124,7 @@ void vmx_reflect_interruption(u64 ifa, u64 isr, u64 iim,
         if (!status) {
             vcpu_increment_iip(vcpu);
             return;
-        } else if (IA64_RETRY == status)
-            return;
+        }
         break;
 
     case 33:	// IA64_FP_TRAP_VECTOR
@@ -132,10 +134,6 @@ void vmx_reflect_interruption(u64 ifa, u64 isr, u64 iim,
         status = handle_fpu_swa(0, regs, isr);
         if (!status)
             return;
-        else if (IA64_RETRY == status) {
-            vcpu_decrement_iip(vcpu);
-            return;
-        }
         break;
 
     case 29: // IA64_DEBUG_VECTOR
diff --git a/arch/ia64/xen/dom_fw.c b/arch/ia64/xen/dom_fw.c
index ff6d13d..cb37308 100644
--- a/arch/ia64/xen/dom_fw.c
+++ b/arch/ia64/xen/dom_fw.c
@@ -144,6 +144,117 @@ build_pal_hypercall_bundles(u64 *imva, u64 brkimm, u64 hypnum)
 	ia64_fc(imva + 3);
 }
 
+/* xen fpswa call stub. 14 bundles */
+extern const unsigned long xen_ia64_fpswa_call_stub[];
+extern const unsigned long xen_ia64_fpswa_call_stub_end[];
+extern const unsigned long xen_ia64_fpswa_call_stub_patch[];
+asm(
+	".align 32\n"
+	".proc xen_ia64_fpswa_call_stub;\n"
+	"xen_ia64_fpswa_call_stub:\n"
+	".prologue\n"
+	"alloc r3 = ar.pfs, 8, 0, 0, 0\n"
+	".body\n"
+	"mov r14 = in0\n"
+	"ld8 r15 = [in1], 8\n"
+	";;\n"
+	"ld8 r16 = [in1]\n"
+	"ld8 r17 = [in2]\n"
+	"ld8 r18 = [in3]\n"
+	"ld8 r19 = [in4]\n"
+	"ld8 r20 = [in5]\n"
+	"ld8 r21 = [in6]\n"
+	"ld8 r22 = [in7], 8\n"
+	";;\n"
+	"ld8 r23 = [in7], 8\n"
+	";;\n"
+	"ld8 r24 = [in7], 8\n"
+	";;\n"
+	"cmp.ne p6, p0 = r24, r0\n"
+	"ld8 r25 = [in7], 8\n"
+	";;\n"
+	"(p6) tpa r24 = r24\n"
+	"cmp.ne p7, p0 = r25, r0\n"
+	"ld8 r26 = [in7], 8\n"
+	";;\n"
+	"(p7)tpa r25 = r25\n"
+	"cmp.ne p8, p0 = r26, r0\n"
+	"ld8 r27 = [in7], 8\n"
+	";;\n"
+	"(p8)tpa r26 = r26\n"
+	"cmp.ne p9, p0 = r27, r0\n"
+	";;\n"
+	"tpa r27 = r27\n"
+	"xen_ia64_fpswa_call_stub_patch:"
+	"{\n"
+	"mov r2 = " FW_HYPERCALL_FPSWA_STR "\n"
+	"break " __IA64_XEN_HYPERCALL_DEFAULT_STR "\n"
+	"nop.i 0\n"
+	"}\n"
+	"st8 [in2] = r17\n"
+	"st8 [in3] = r18\n"
+	"st8 [in4] = r19\n"
+	"st8 [in5] = r20\n"
+	"st8 [in6] = r21\n"
+	"br.ret.sptk.many rp\n"
+	"xen_ia64_fpswa_call_stub_end:"
+	".endp xen_ia64_fpswa_call_stub\n"
+);
+
+static void
+build_fpswa_hypercall_bundle(uint64_t *imva, uint64_t brkimm, uint64_t hypnum)
+{
+	INST64_A5 slot0;
+	INST64_I19 slot1;
+	INST64_I18 slot2;
+	IA64_BUNDLE bundle;
+
+	/* slot0: mov r2 = hypnum (low 20 bits) */
+	slot0.inst = 0;
+	slot0.qp = 0;
+	slot0.r1 = 2;
+	slot0.r3 = 0;
+	slot0.major = 0x9;
+
+	slot0.s = 0;
+	slot0.imm9d = hypnum >> 7;
+	slot0.imm5c = hypnum >> 16;
+	slot0.imm7b = hypnum;
+
+	/* slot1: break brkimm */
+	slot1.inst = 0;
+	slot1.qp = 0;
+	slot1.x6 = 0;
+	slot1.x3 = 0;
+	slot1.major = 0x0;
+	slot1.i = brkimm >> 20;
+	slot1.imm20 = brkimm;
+
+	/* slot2: nop.i */
+	slot2.inst = 0;
+	slot2.qp = 0;
+	slot2.imm20 = 0;
+	slot2.y = 0;
+	slot2.x6 = 1;
+	slot2.x3 = 0;
+	slot2.i = 0;
+	slot2.major = 0;
+
+	/* MII bundle */
+	bundle.i64[0] = 0;
+	bundle.i64[1] = 0;
+	bundle.template = 0x0; /* MII */
+	bundle.slot0 = slot0.inst;
+	bundle.slot1a = slot1.inst;
+	bundle.slot1b = slot1.inst >> 18;
+	bundle.slot2 = slot2.inst;
+	
+	imva[0] = bundle.i64[0];
+	imva[1] = bundle.i64[1];
+	ia64_fc(imva);
+	ia64_fc(imva + 1);
+}
+
 // builds a hypercall bundle at domain physical address
 static void
 dom_fpswa_hypercall_patch(struct domain *d, unsigned long imva)
@@ -151,6 +262,10 @@ dom_fpswa_hypercall_patch(struct domain *d, unsigned long imva)
 	unsigned long *entry_imva, *patch_imva;
 	const unsigned long entry_paddr = FW_HYPERCALL_FPSWA_ENTRY_PADDR;
 	const unsigned long patch_paddr = FW_HYPERCALL_FPSWA_PATCH_PADDR;
+	const size_t stub_size =
+		(char*)xen_ia64_fpswa_call_stub_end -
+		(char*)xen_ia64_fpswa_call_stub;
+	size_t i;
 
 	entry_imva = (unsigned long *)(imva + entry_paddr -
 	                               FW_HYPERCALL_BASE_PADDR);
@@ -160,9 +275,19 @@ dom_fpswa_hypercall_patch(struct domain *d, unsigned long imva)
 	/* Descriptor.  */
 	*entry_imva++ = patch_paddr;
 	*entry_imva   = 0;
+        /* see dom_fw.h */
+        BUILD_BUG_ON((char*)xen_ia64_fpswa_call_stub_end -
+                     (char*)xen_ia64_fpswa_call_stub > 0xff - 16);
+ 
+        /* call stub */
+        memcpy(patch_imva, xen_ia64_fpswa_call_stub, stub_size);
+        for (i = 0; i < stub_size; i++)
+                ia64_fc(imva + i);
+        patch_imva +=
+                xen_ia64_fpswa_call_stub_patch - xen_ia64_fpswa_call_stub;
+        build_fpswa_hypercall_bundle(patch_imva, d->arch.breakimm,
+							FW_HYPERCALL_FPSWA);
 
-	build_hypercall_bundle(patch_imva, d->arch.breakimm,
-	                       FW_HYPERCALL_FPSWA, 1);
 }
 
 // builds a hypercall bundle at domain physical address
diff --git a/arch/ia64/xen/faults.c b/arch/ia64/xen/faults.c
index 2737c3a..ba0b927 100644
--- a/arch/ia64/xen/faults.c
+++ b/arch/ia64/xen/faults.c
@@ -340,7 +340,6 @@ fp_emulate(int fp_fault, void *bundle, unsigned long *ipsr,
 unsigned long
 handle_fpu_swa(int fp_fault, struct pt_regs *regs, unsigned long isr)
 {
-	struct vcpu *v = current;
 	IA64_BUNDLE bundle;
 	unsigned long fault_ip;
 	fpswa_ret_t ret;
@@ -374,8 +373,7 @@ handle_fpu_swa(int fp_fault, struct pt_regs *regs, unsigned long isr)
 	                 &isr, &regs->pr, &regs->cr_ifs, regs);
 
 	if (ret.status) {
-		PSCBX(v, fpswa_ret) = ret;
-		printk("%s(%s): fp_emulate() returned %ld\n",
+		gdprintk(XENLOG_ERR, "%s(%s): fp_emulate() returned %ld\n",
 		       __FUNCTION__, fp_fault ? "fault" : "trap", ret.status);
 	}
 
@@ -438,6 +436,13 @@ ia64_fault(unsigned long vector, unsigned long isr, unsigned long ifa,
 		printk("Dirty-bit.\n");
 		break;
 
+	case 10:
+		/* __domain_get_bundle() may cause fault. */
+		if (ia64_done_with_exception(regs))
+			return;
+		printk("Data Access-bit.\n");
+		break;
+
 	case 20:
 		printk("Page Not Found.\n");
 		break;
@@ -588,6 +593,17 @@ ia64_handle_privop(unsigned long ifa, struct pt_regs *regs, unsigned long isr,
 }
 
 void
+ia64_lazy_load_fpu(struct vcpu *v)
+{
+	if (PSCB(v, hpsr_dfh)) {
+		PSCB(v, hpsr_dfh) = 0;
+		PSCB(v, hpsr_mfh) = 1;
+		if (__ia64_per_cpu_var(fp_owner) != v)
+			__ia64_load_fpu(v->arch._thread.fph);
+	}
+}
+
+void
 ia64_handle_reflection(unsigned long ifa, struct pt_regs *regs,
                        unsigned long isr, unsigned long iim,
                        unsigned long vector)
@@ -626,12 +642,7 @@ ia64_handle_reflection(unsigned long ifa, struct pt_regs *regs,
 		vector = IA64_GENEX_VECTOR;
 		break;
 	case 25:
-		if (PSCB(v, hpsr_dfh)) {
-			PSCB(v, hpsr_dfh) = 0;
-			PSCB(v, hpsr_mfh) = 1;
-			if (__ia64_per_cpu_var(fp_owner) != v)
-				__ia64_load_fpu(v->arch._thread.fph);
-		}
+		ia64_lazy_load_fpu(v);
 		if (!PSCB(v, vpsr_dfh)) {
 			regs->cr_ipsr &= ~IA64_PSR_DFH;
 			return;
@@ -691,22 +702,12 @@ ia64_handle_reflection(unsigned long ifa, struct pt_regs *regs,
 			vcpu_increment_iip(v);
 			return;
 		}
-		// fetch code fail
-		if (IA64_RETRY == status)
-			return;
-		printk("ia64_handle_reflection: handling FP fault\n");
 		vector = IA64_FP_FAULT_VECTOR;
 		break;
 	case 33:
 		status = handle_fpu_swa(0, regs, isr);
 		if (!status)
 			return;
-		// fetch code fail
-		if (IA64_RETRY == status) {
-			vcpu_decrement_iip(v);
-			return;
-		}
-		printk("ia64_handle_reflection: handling FP trap\n");
 		vector = IA64_FP_TRAP_VECTOR;
 		break;
 	case 34:
diff --git a/arch/ia64/xen/hypercall.c b/arch/ia64/xen/hypercall.c
index 9f8c7d0..8c6e6ff 100644
--- a/arch/ia64/xen/hypercall.c
+++ b/arch/ia64/xen/hypercall.c
@@ -17,6 +17,7 @@
 #include <asm/sal.h>	/* FOR struct ia64_sal_retval */
 #include <asm/fpswa.h>	/* FOR struct fpswa_ret_t */
 
+#include <asm/vmx.h>
 #include <asm/vmx_vcpu.h>
 #include <asm/vcpu.h>
 #include <asm/dom_fw.h>
@@ -121,14 +122,135 @@ fw_hypercall_ipi (struct pt_regs *regs)
 	return;
 }
 
+static int
+fpswa_get_domain_addr(struct vcpu *v, unsigned long gpaddr, size_t size,
+		      void **virt, struct page_info **page, const char *name)
+{
+	int cross_page_boundary;
+
+	if (gpaddr == 0) {
+		*virt = 0;
+		return 0;
+	}
+
+	cross_page_boundary = (((gpaddr & ~PAGE_MASK) + size) > PAGE_SIZE);
+	if (unlikely(cross_page_boundary)) {
+		/* this case isn't implemented */
+		gdprintk(XENLOG_ERR,
+			 "%s: fpswa hypercall is called with "
+			 "page crossing argument %s 0x%lx\n",
+			 __func__, name, gpaddr);
+		return -ENOSYS;
+	}
+
+again:
+        *virt = domain_mpa_to_imva(v->domain, gpaddr);
+        *page = virt_to_page(*virt);
+        if (get_page(*page, current->domain) == 0) {
+                if (page_get_owner(*page) != current->domain) {
+			*page = NULL;
+			return -EFAULT;
+		}
+                goto again;
+        }
+
+	return 0;
+}
+
+static fpswa_ret_t
+fw_hypercall_fpswa (struct vcpu *v, struct pt_regs *regs)
+{
+	fpswa_ret_t ret = {-1, 0, 0, 0};
+	unsigned long bundle[2] = { regs->r15, regs->r16};
+	fp_state_t fp_state;
+	struct page_info *lp_page = NULL;
+	struct page_info *lv_page = NULL;
+	struct page_info *hp_page = NULL;
+	struct page_info *hv_page = NULL;
+
+ 	if (unlikely(PSCBX(v, fpswa_ret).status != 0 && 
+ 		     PSCBX(v, fpswa_ret).status != IA64_RETRY)) {
+ 		ret = PSCBX(v, fpswa_ret);
+ 		PSCBX(v, fpswa_ret) = (fpswa_ret_t){0, 0, 0, 0};
+ 		return ret;
+ 	}
+
+	if (!fpswa_interface)
+		goto error;
+
+	memset(&fp_state, 0, sizeof(fp_state));
+	fp_state.bitmask_low64 = regs->r22;
+	fp_state.bitmask_high64 = regs->r23;
+
+	/* bit6..bit11 */
+	if ((fp_state.bitmask_low64 & 0xfc0) != 0xfc0) {
+		/* other cases isn't supported yet */
+		gdprintk(XENLOG_ERR, "%s unsupported bitmask_low64 0x%lx\n",
+			 __func__, fp_state.bitmask_low64);
+		goto error;
+	}
+	if (regs->r25 == 0)
+		/* fp_state.fp_state_low_volatile must be supplied */
+		goto error;
+
+	/* eager save/lazy restore fpu: f32...f127 */
+	if ((~fp_state.bitmask_low64 & ((1UL << 31) - 1)) != 0 ||
+	    ~fp_state.bitmask_high64 != 0) {
+		if (VMX_DOMAIN(v))
+			vmx_lazy_load_fpu(v);
+		else
+			ia64_lazy_load_fpu(v);
+	}
+
+	if (fpswa_get_domain_addr(v, regs->r24,
+				  sizeof(fp_state.fp_state_low_preserved), 
+				  (void*)&fp_state.fp_state_low_preserved,
+				  &lp_page, "fp_state_low_preserved") < 0)
+		goto error;
+	if (fpswa_get_domain_addr(v, regs->r25,
+				  sizeof(fp_state.fp_state_low_volatile),
+				  (void*)&fp_state.fp_state_low_volatile,
+				  &lv_page, "fp_state_low_volatile") < 0)
+		goto error;
+	if (fpswa_get_domain_addr(v, regs->r26,
+				  sizeof(fp_state.fp_state_high_preserved),
+				  (void*)&fp_state.fp_state_high_preserved,
+				  &hp_page, "fp_state_low_preserved") < 0)
+		goto error;
+	if (fpswa_get_domain_addr(v, regs->r27,
+				  sizeof(fp_state.fp_state_high_volatile),
+				  (void*)&fp_state.fp_state_high_volatile,
+				  &hv_page, "fp_state_high_volatile") < 0)
+		goto error;
+
+	ret = (*fpswa_interface->fpswa)(regs->r14,
+					bundle,
+					&regs->r17,	/* pipsr */
+					&regs->r18,	/* pfsr */
+					&regs->r19,	/* pisr */
+					&regs->r20,	/* ppreds */
+					&regs->r21,	/* pifs	*/
+					&fp_state);
+
+error:
+	if (lp_page != NULL)
+		put_page(lp_page);
+	if (lv_page != NULL)
+		put_page(lv_page);
+	if (hp_page != NULL)
+		put_page(hp_page);
+	if (hv_page != NULL)
+		put_page(hv_page);
+	return ret;
+}
+
 static fpswa_ret_t
-fw_hypercall_fpswa (struct vcpu *v)
+fw_hypercall_fpswa_error(void)
 {
-	return PSCBX(v, fpswa_ret);
+	return (fpswa_ret_t) {-1, 0, 0, 0};
 }
 
-IA64FAULT
-ia64_hypercall(struct pt_regs *regs)
+IA64FAULT ia64_hypercall(struct pt_regs *regs)
 {
 	struct vcpu *v = current;
 	struct sal_ret_values x;
@@ -224,8 +346,24 @@ ia64_hypercall(struct pt_regs *regs)
 	case FW_HYPERCALL_SET_SHARED_INFO_VA:
 	        regs->r8 = domain_set_shared_info_va (regs->r28);
 		break;
-	case FW_HYPERCALL_FPSWA:
-		fpswa_ret = fw_hypercall_fpswa (v);
+	case FW_HYPERCALL_FPSWA_BASE:
+		switch (regs->r2) {
+		case FW_HYPERCALL_FPSWA_BROKEN:
+			gdprintk(XENLOG_WARNING,
+				 "Old fpswa hypercall was called (0x%lx).\n"
+				 "Please update your domain builder. ip 0x%lx\n",
+				 FW_HYPERCALL_FPSWA_BROKEN, regs->cr_iip);
+			fpswa_ret = fw_hypercall_fpswa_error();
+			break;
+		case FW_HYPERCALL_FPSWA:
+			fpswa_ret = fw_hypercall_fpswa(v, regs);
+			break;
+		default:
+			gdprintk(XENLOG_ERR, "unknown fpswa hypercall %lx\n",
+				 regs->r2);
+			fpswa_ret = fw_hypercall_fpswa_error();
+			break;
+		}
 		regs->r8  = fpswa_ret.status;
 		regs->r9  = fpswa_ret.err0;
 		regs->r10 = fpswa_ret.err1;
diff --git a/arch/ia64/xen/ivt.S b/arch/ia64/xen/ivt.S
index 5bee6d6..767b103 100644
--- a/arch/ia64/xen/ivt.S
+++ b/arch/ia64/xen/ivt.S
@@ -977,10 +977,17 @@ ENTRY(daccess_bit)
 #ifdef XEN
 	mov r16=cr.isr
 	mov r17=cr.ifa
+	mov r18=cr.ipsr
 	mov r31=pr
 	mov r19=10
+	;;
 	mov r20=0x2800
-	br.sptk.many fast_access_reflect
+	extr.u r18=r18,IA64_PSR_CPL0_BIT,2
+	;;
+	cmp.ne p6,p0=r0,r18 	/* cpl != 0? */
+(p6)	br.sptk.many fast_access_reflect
+	/* __domain_get_bundle() may cause this fault. */
+	br.sptk.few dispatch_to_fault_handler
 	;;
 #else
 	// Like Entry 8, except for data access
diff --git a/include/asm-ia64/bundle.h b/include/asm-ia64/bundle.h
index 7a1b11e..7098643 100644
--- a/include/asm-ia64/bundle.h
+++ b/include/asm-ia64/bundle.h
@@ -33,6 +33,11 @@ typedef union U_INST64_B9 {
     struct { unsigned long qp:6, imm20:20, :1, x6:6, :3, i:1, major:4; };
 } INST64_B9;
 
+typedef union U_INST64_I18 {
+    IA64_INST inst;
+    struct { unsigned long qp:6, imm20:20, y:1, x6:6, x3:3, i:1, major:4; };
+} INST64_I18;
+
 typedef union U_INST64_I19 {
     IA64_INST inst;
     struct { unsigned long qp:6, imm20:20, :1, x6:6, x3:3, i:1, major:4; };
@@ -191,6 +196,7 @@ typedef union U_INST64 {
     INST64_B4 B4;	// used in build_hypercall_bundle only
     INST64_B8 B8;	// rfi, bsw.[01]
     INST64_B9 B9;	// break.b
+    INST64_I18 I18;	// nop.i used in build_fpswa_hypercall_bundle only
     INST64_I19 I19;	// used in build_hypercall_bundle only
     INST64_I26 I26;	// mov register to ar (I unit)
     INST64_I27 I27;	// mov immediate to ar (I unit)
diff --git a/include/asm-ia64/dom_fw.h b/include/asm-ia64/dom_fw.h
index 2dd785b..65370b2 100644
--- a/include/asm-ia64/dom_fw.h
+++ b/include/asm-ia64/dom_fw.h
@@ -7,6 +7,9 @@
 
 #include <linux/efi.h>
 
+#define __IA64_XEN_HYPERCALL_DEFAULT            0x1000
+#define __IA64_XEN_HYPERCALL_DEFAULT_STR        "0x1000"
+
 /* Portion of guest physical memory space reserved for PAL/SAL/EFI/ACPI
    data and code.  */
 #define FW_BASE_PADDR		0x0000UL
@@ -157,13 +160,21 @@
 
 /*
  * This is a hypercall number for FPSWA.
- * FPSWA hypercall uses 2 bundles for a pseudo-entry-point and a hypercall-patch.
+ * FPSWA hypercall uses one bundle for a pseudo-entry-point
+ * and 14 bundles for a hypercall-patch.
+ *
+ * 0x500 was used before. But that implemetation is broken.
+ * To keep hypercall abi, 0x500 is obsoleted and allocate 0x501 for 
+ * fspwa hypercall.
  */
 #define FW_HYPERCALL_FPSWA_ENTRY_INDEX			0x90UL
 #define FW_HYPERCALL_FPSWA_PATCH_INDEX			0x91UL
 #define FW_HYPERCALL_FPSWA_ENTRY_PADDR			FW_HYPERCALL_PADDR(FW_HYPERCALL_FPSWA_ENTRY_INDEX)
 #define FW_HYPERCALL_FPSWA_PATCH_PADDR			FW_HYPERCALL_PADDR(FW_HYPERCALL_FPSWA_PATCH_INDEX)
-#define FW_HYPERCALL_FPSWA				0x500UL
+#define FW_HYPERCALL_FPSWA_BASE				0x500UL
+#define FW_HYPERCALL_FPSWA_BROKEN			0x500UL
+#define FW_HYPERCALL_FPSWA				0x501UL
+#define FW_HYPERCALL_FPSWA_STR				"0x501"
 
 /* Set the shared_info base virtual address.  */
 #define FW_HYPERCALL_SET_SHARED_INFO_VA			0x600UL
diff --git a/include/asm-ia64/domain.h b/include/asm-ia64/domain.h
index 46f4f28..f9e8a95 100644
--- a/include/asm-ia64/domain.h
+++ b/include/asm-ia64/domain.h
@@ -205,9 +205,10 @@ struct arch_vcpu {
     char irq_new_condition;    // vpsr.i/vtpr change, check for pending VHPI
     char hypercall_continuation;
 
+    fpswa_ret_t fpswa_ret;	/* save return values of FPSWA emulation */
+
     //for phycial  emulation
     int mode_flags;
-    fpswa_ret_t fpswa_ret;	/* save return values of FPSWA emulation */
     struct timer hlt_timer;
     struct arch_vmx_struct arch_vmx; /* Virtual Machine Extensions */
 
@@ -233,6 +234,9 @@ int
 do_perfmon_op(unsigned long cmd,
               XEN_GUEST_HANDLE(void) arg1, unsigned long arg2);
 
+void
+ia64_lazy_load_fpu(struct vcpu *vcpu);
+
 #endif /* __ASM_DOMAIN_H__ */
 
 /*
diff --git a/include/asm-ia64/vmx.h b/include/asm-ia64/vmx.h
index a217b05..4b3b1b1 100644
--- a/include/asm-ia64/vmx.h
+++ b/include/asm-ia64/vmx.h
@@ -22,6 +22,8 @@
 #ifndef _ASM_IA64_VT_H
 #define _ASM_IA64_VT_H
 
+#include <asm/ia64_int.h>
+
 #include <public/hvm/ioreq.h>
 #define vmx_user_mode(regs) (((struct ia64_psr *)&(regs)->cr_ipsr)->vm == 1)
 
@@ -36,7 +38,7 @@ extern void vmx_load_state(struct vcpu *v);
 extern void vmx_setup_platform(struct domain *d);
 extern void vmx_do_launch(struct vcpu *v);
 extern void vmx_io_assist(struct vcpu *v);
-extern int ia64_hypercall (struct pt_regs *regs);
+extern IA64FAULTia64_hypercall(struct pt_regs *regs);
 extern void vmx_save_state(struct vcpu *v);
 extern void vmx_load_state(struct vcpu *v);
 extern void show_registers(struct pt_regs *regs);
@@ -58,6 +60,7 @@ extern void vmx_die_if_kernel(char *str, struct pt_regs *regs, long err);
 extern void vmx_send_assist_req(struct vcpu *v);
 extern void deliver_pal_init(struct vcpu *vcpu);
 extern void vmx_pend_pal_init(struct domain *d);
+extern void vmx_lazy_load_fpu(struct vcpu *vcpu);
 
 static inline vcpu_iodata_t *get_vio(struct domain *d, unsigned long cpu)
 {