Sophie

Sophie

distrib > Scientific%20Linux > 5x > x86_64 > by-pkgid > fc11cd6e1c513a17304da94a5390f3cd > files > 4514

kernel-2.6.18-194.11.1.el5.src.rpm

From: Bill Burns <bburns@redhat.com>
Date: Thu, 28 Aug 2008 13:12:33 -0400
Subject: [xen] Intel EPT Patch
Message-id: 20080828171234.10349.46817.sendpatchset@localhost.localdomain
O-Subject: [RHEL5.3 PATCH 2/4 v3] Xen Intel EPT Patch
Bugzilla: 426679
RH-Acked-by: Chris Lalancette <clalance@redhat.com>
RH-Acked-by: Don Dutile <ddutile@redhat.com>
RH-Acked-by: Don Dutile <ddutile@redhat.com>
RH-Acked-by: Rik van Riel <riel@redhat.com>

Fixes bz 426679

This is the main bulk of the EPT functionality.

Intel Extended Page Table (EPT) support.

changeset: x86, vmx: Enable EPT (Extended PageTable) support on new Intel proces
sors.
changeset 17404:        9b635405ef90
parent 17403:   e1962ac0fb1c
child 17405:    32e3c81ada56
author:         Keir Fraser <keir.fraser@citrix.com>
date:   Wed Apr 09 11:30:32 2008 +0100 (4 months ago)
files:  tools/libxc/xc_hvm_build.c xen/arch/x86/domain.c xen/arch/x86/hvm/hvm.c
xen/arch/x86/hvm/vmx/vmcs.c xen/arch/x86/hvm/vmx/vmx.c xen/arch/x86/mm.c xen/arc
h/x86/mm/hap/Makefile xen/arch/x86/mm/hap/p2m-ept.c xen/arch/x86/mm/p2m.c xen/ar
ch/x86/mm/paging.c xen/common/domctl.c xen/drivers/passthrough/vtd/iommu.c xen/i
nclude/asm-x86/domain.h xen/include/asm-x86/hvm/domain.h xen/include/asm-x86/hvm
/svm/vmcb.h xen/include/asm-x86/hvm/vmx/vmcs.h xen/include/asm-x86/hvm/vmx/vmx.h
 xen/include/asm-x86/p2m.h xen/include/asm-x86/paging.h xen/include/public/hvm/p
arams.h xen/include/xen/hypercall.h
description:    x86, vmx: Enable EPT (Extended PageTable) support on new Intel p
rocessors.
We use the EPT page table as P2M (guest physical to machine
mapping), removing the linear page table when EPT is used for the
domain (see the new file p2m-ept.c). We did this by adding three
operations in the p2m_domain. If VT-d is enabled, the EPT page table
will be used as the VT-d page table as well (i.e. shared).

Signed-off-by: Xin Li <xin.b.li@intel.com>
Signed-off-by: Jun Nakajima <jun.nakajima@intel.com>
Signed-off-by: Xiaohui Xin <Xiaohui.xin@intel.com>
Signed-off-by: Keir Fraser <keir.fraser@citrix.com>

diff --git a/arch/x86/domain.c b/arch/x86/domain.c
index 298432c..319676c 100644
--- a/arch/x86/domain.c
+++ b/arch/x86/domain.c
@@ -625,7 +625,20 @@ int arch_set_info_guest(
     }
     else
     {
+        u32* ident_pt;
+
         hvm_load_cpu_guest_regs(v, &v->arch.guest_context.user_regs);
+        /* Fill it with 32-bit, non-PAE superpage entries, each mapping 4MB
+         * of virtual address space onto the same physical address range */
+        if ( v->vcpu_id == 0 )
+        {
+            ident_pt = map_domain_page(mfn_x(gfn_to_mfn(v->domain,
+                      (HVM_IDENT_PT_PAGE >> PAGE_SHIFT))));
+            for ( i = 0; i < PAGE_SIZE / sizeof(*ident_pt); i++ )
+                ident_pt[i] = (i << 22) | _PAGE_PRESENT | _PAGE_RW | _PAGE_USER
+                  | _PAGE_ACCESSED | _PAGE_DIRTY | _PAGE_PSE;
+            unmap_domain_page(ident_pt);
+        }
     }
 
     memset(v->arch.guest_context.debugreg, 0,
diff --git a/arch/x86/hvm/vmx/vmcs.c b/arch/x86/hvm/vmx/vmcs.c
index 08bcc54..bc295d4 100644
--- a/arch/x86/hvm/vmx/vmcs.c
+++ b/arch/x86/hvm/vmx/vmcs.c
@@ -64,7 +64,7 @@ static u32 adjust_vmx_controls(u32 ctl_min, u32 ctl_opt, u32 msr)
 }
 
 #define vmx_has_secondary_exec_ctls \
-    (_vmx_cpu_based_exec_control & ACTIVATE_SECONDARY_CONTROLS)
+    (_vmx_cpu_based_exec_control & CPU_BASED_ACTIVATE_SECONDARY_CONTROLS)
 
 void vmx_init_vmcs_config(void)
 {
@@ -75,6 +75,8 @@ void vmx_init_vmcs_config(void)
     u32 _vmx_vmexit_control;
     u32 _vmx_vmentry_control;
 
+    rdmsr(MSR_IA32_VMX_BASIC_MSR, vmx_msr_low, vmx_msr_high);
+
     min = (PIN_BASED_EXT_INTR_MASK |
            PIN_BASED_NMI_EXITING);
     opt = 0; /*PIN_BASED_VIRTUAL_NMIS*/
@@ -83,13 +85,15 @@ void vmx_init_vmcs_config(void)
 
     min = (CPU_BASED_HLT_EXITING |
            CPU_BASED_INVLPG_EXITING |
+           CPU_BASED_CR3_LOAD_EXITING |
+           CPU_BASED_CR3_STORE_EXITING |
            CPU_BASED_MWAIT_EXITING |
            CPU_BASED_MOV_DR_EXITING |
            CPU_BASED_ACTIVATE_IO_BITMAP |
            CPU_BASED_USE_TSC_OFFSETING);
-    opt = CPU_BASED_ACTIVATE_MSR_BITMAP;
-    opt |= CPU_BASED_TPR_SHADOW;
-    opt |= ACTIVATE_SECONDARY_CONTROLS;
+    opt = (CPU_BASED_ACTIVATE_MSR_BITMAP |
+           CPU_BASED_TPR_SHADOW |
+           CPU_BASED_ACTIVATE_SECONDARY_CONTROLS);
     _vmx_cpu_based_exec_control = adjust_vmx_controls(
         min, opt, MSR_IA32_VMX_PROCBASED_CTLS_MSR);
 #ifdef __x86_64__
@@ -107,11 +111,29 @@ void vmx_init_vmcs_config(void)
     if ( vmx_has_secondary_exec_ctls )
     {
         min = 0;
-        opt = SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES;
+
+        opt = (SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES |
+              SECONDARY_EXEC_ENABLE_EPT);
         _vmx_secondary_exec_control = adjust_vmx_controls(
             min, opt, MSR_IA32_VMX_PROCBASED_CTLS2);
     }
 
+    if ( _vmx_secondary_exec_control & SECONDARY_EXEC_ENABLE_EPT )
+    {
+        /*
+         * To use EPT we expect to be able to clear certain intercepts.
+         * We check VMX_BASIC_MSR[55] to correctly handle default1 controls.
+         */
+        uint32_t must_be_one, must_be_zero, msr = MSR_IA32_VMX_PROCBASED_CTLS_MSR;
+        if ( vmx_msr_high & (1u << 23) )
+            msr = MSR_IA32_VMX_TRUE_PROCBASED_CTLS;
+        rdmsr(msr, must_be_one, must_be_zero);
+        if ( must_be_one & (CPU_BASED_INVLPG_EXITING |
+                            CPU_BASED_CR3_LOAD_EXITING |
+                            CPU_BASED_CR3_STORE_EXITING) )
+            _vmx_secondary_exec_control &= ~SECONDARY_EXEC_ENABLE_EPT;
+    }
+
     min = VM_EXIT_ACK_INTR_ON_EXIT;
     opt = 0;
 #ifdef __x86_64__
@@ -124,7 +146,6 @@ void vmx_init_vmcs_config(void)
     _vmx_vmentry_control = adjust_vmx_controls(
         min, opt, MSR_IA32_VMX_ENTRY_CTLS_MSR);
 
-    rdmsr(MSR_IA32_VMX_BASIC_MSR, vmx_msr_low, vmx_msr_high);
 
     if ( smp_processor_id() == 0 )
     {
@@ -205,34 +226,69 @@ static void vmx_load_vmcs(struct vcpu *v)
     this_cpu(current_vmcs) = v->arch.hvm_vmx.vmcs;
 }
 
+struct foreign_vmcs {
+    struct vcpu *v;
+    unsigned int count;
+};
+static DEFINE_PER_CPU(struct foreign_vmcs, foreign_vmcs);
+
 void vmx_vmcs_enter(struct vcpu *v)
 {
+    struct foreign_vmcs *fv;
+
     /*
      * NB. We must *always* run an HVM VCPU on its own VMCS, except for
      * vmx_vmcs_enter/exit critical regions.
      */
-    if ( v == current )
+    if ( likely(v == current) )
         return;
 
-    vcpu_pause(v);
-    spin_lock(&v->arch.hvm_vmx.vmcs_lock);
+   fv = &this_cpu(foreign_vmcs);
 
-    vmx_clear_vmcs(v);
-    vmx_load_vmcs(v);
+   if ( fv->v == v )
+   {
+       BUG_ON(fv->count == 0);
+   }
+   else
+   {
+       BUG_ON(fv->v != NULL);
+       BUG_ON(fv->count != 0);
+
+       vcpu_pause(v);
+       spin_lock(&v->arch.hvm_vmx.vmcs_lock);
+
+       vmx_clear_vmcs(v);
+       vmx_load_vmcs(v);
+
+       fv->v = v;
+   }
+
+   fv->count++;
 }
 
 void vmx_vmcs_exit(struct vcpu *v)
 {
-    if ( v == current )
+   struct foreign_vmcs *fv;
+
+   if ( likely(v == current) )
         return;
 
-    /* Don't confuse vmx_do_resume (for @v or @current!) */
-    vmx_clear_vmcs(v);
-    if ( is_hvm_vcpu(current) )
-        vmx_load_vmcs(current);
+    fv = &this_cpu(foreign_vmcs);
+    BUG_ON(fv->v != v);
+    BUG_ON(fv->count == 0);
+
+    if ( --fv->count == 0 )
+    {
+        /* Don't confuse vmx_do_resume (for @v or @current!) */
+        vmx_clear_vmcs(v);
+        if ( is_hvm_vcpu(current) )
+            vmx_load_vmcs(current);
+
+        spin_unlock(&v->arch.hvm_vmx.vmcs_lock);
+        vcpu_unpause(v);
 
-    spin_unlock(&v->arch.hvm_vmx.vmcs_lock);
-    vcpu_unpause(v);
+        fv->v = NULL;
+    }
 }
 
 struct vmcs_struct *vmx_alloc_host_vmcs(void)
@@ -306,6 +362,7 @@ static void vmx_set_host_env(struct vcpu *v)
 
 static void construct_vmcs(struct vcpu *v)
 {
+    struct domain *d = v->domain;
     unsigned long cr0, cr4;
     union vmcs_arbytes arbytes;
 
@@ -315,10 +372,25 @@ static void construct_vmcs(struct vcpu *v)
     __vmwrite(PIN_BASED_VM_EXEC_CONTROL, vmx_pin_based_exec_control);
     __vmwrite(VM_EXIT_CONTROLS, vmx_vmexit_control);
     __vmwrite(VM_ENTRY_CONTROLS, vmx_vmentry_control);
-    __vmwrite(CPU_BASED_VM_EXEC_CONTROL, vmx_cpu_based_exec_control);
+
     v->arch.hvm_vmx.exec_control = vmx_cpu_based_exec_control;
-    if ( vmx_cpu_based_exec_control & ACTIVATE_SECONDARY_CONTROLS )
-        __vmwrite(SECONDARY_VM_EXEC_CONTROL, vmx_secondary_exec_control);
+    v->arch.hvm_vmx.secondary_exec_control = vmx_secondary_exec_control;
+
+    if ( paging_mode_hap(d) )
+    {
+        v->arch.hvm_vmx.exec_control &= ~(CPU_BASED_INVLPG_EXITING |
+                                          CPU_BASED_CR3_LOAD_EXITING |
+                                          CPU_BASED_CR3_STORE_EXITING);
+    }
+    else
+    {
+        v->arch.hvm_vmx.secondary_exec_control &= ~SECONDARY_EXEC_ENABLE_EPT;
+    }
+
+    __vmwrite(CPU_BASED_VM_EXEC_CONTROL, v->arch.hvm_vmx.exec_control);
+    if ( cpu_has_vmx_secondary_exec_control )
+        __vmwrite(SECONDARY_VM_EXEC_CONTROL,
+          v->arch.hvm_vmx.secondary_exec_control);
 
     if ( cpu_has_vmx_msr_bitmap )
         __vmwrite(MSR_BITMAP, virt_to_maddr(vmx_msr_bitmap));
@@ -428,7 +500,10 @@ static void construct_vmcs(struct vcpu *v)
     __vmwrite(VMCS_LINK_POINTER_HIGH, ~0UL);
 #endif
 
-    __vmwrite(EXCEPTION_BITMAP, HVM_TRAP_MASK | (1U << TRAP_page_fault));
+    if ( paging_mode_hap(d) )
+        __vmwrite(EXCEPTION_BITMAP, HVM_TRAP_MASK);
+    else
+        __vmwrite(EXCEPTION_BITMAP, HVM_TRAP_MASK | (1U << TRAP_page_fault));
 
     /* Guest CR0. */
     cr0 = read_cr0();
@@ -439,7 +514,14 @@ static void construct_vmcs(struct vcpu *v)
 
     /* Guest CR4. */
     cr4 = read_cr4();
-    __vmwrite(GUEST_CR4, cr4 & ~X86_CR4_PSE);
+    if ( paging_mode_hap(v->domain) )
+    {
+        hvm_update_guest_cr(v, 3);
+        hvm_update_guest_cr(v, 4);
+    }
+    else
+        __vmwrite(GUEST_CR4, cr4 & ~X86_CR4_PSE);
+
     v->arch.hvm_vmx.cpu_shadow_cr4 =
         cr4 & ~(X86_CR4_PGE | X86_CR4_VMXE | X86_CR4_PAE);
     __vmwrite(CR4_READ_SHADOW, v->arch.hvm_vmx.cpu_shadow_cr4);
@@ -454,6 +536,18 @@ static void construct_vmcs(struct vcpu *v)
     }
 #endif
 
+    if ( paging_mode_hap(d) )
+    {
+        v->arch.hvm_vmx.ept_control.etmt = EPT_DEFAULT_MT;
+        v->arch.hvm_vmx.ept_control.gaw  = EPT_DEFAULT_GAW;
+        v->arch.hvm_vmx.ept_control.asr  =
+          pagetable_get_pfn(d->arch.phys_table);
+        __vmwrite(EPT_POINTER, v->arch.hvm_vmx.ept_control.eptp);
+#ifdef CONFIG_X86_PAE
+        __vmwrite(EPT_POINTER_HIGH, v->arch.hvm_vmx.ept_control.eptp >> 32);
+#endif
+    }
+
     /* Memory-mapped based VLAPIC TPR optimization. */
     if ( cpu_has_vmx_mmap_vtpr_optimization )
     {
@@ -618,6 +712,10 @@ void vmcs_dump_vcpu(void)
     print_section("64-bit RO Data Fields", 0x6400, 0x640A, 2);
     print_section("Natural 64-bit Guest-State Fields", 0x6800, 0x6826, 2);
     print_section("Natural 64-bit Host-State Fields", 0x6c00, 0x6c16, 2);
+    printk("secondary exec control = 0x%08x\n",
+      (uint32_t)__vmread(SECONDARY_VM_EXEC_CONTROL));
+    printk("EPT pointer = 0x%08x%08x\n",
+      (uint32_t)__vmread(EPT_POINTER_HIGH), (uint32_t)__vmread(EPT_POINTER));
 }
 
 
diff --git a/arch/x86/hvm/vmx/vmx.c b/arch/x86/hvm/vmx/vmx.c
index 3c01a84..ed108da 100644
--- a/arch/x86/hvm/vmx/vmx.c
+++ b/arch/x86/hvm/vmx/vmx.c
@@ -50,6 +50,7 @@
 #include <asm/hvm/vpt.h>
 #include <public/hvm/save.h>
 #include <asm/hvm/trace.h>
+#include <asm/paging.h>
 
 char *vmx_msr_bitmap;
 
@@ -79,6 +80,7 @@ static int vmx_vcpu_initialise(struct vcpu *v)
 
 static void vmx_vcpu_destroy(struct vcpu *v)
 {
+    ept_sync_all();
     vmx_destroy_vmcs(v);
 }
 
@@ -634,9 +636,104 @@ void vmx_vmcs_save(struct vcpu *v, struct hvm_hw_cpu *c)
     vmx_vmcs_exit(v);
 }
 
+/* the caller needs to check if the guest is switching to PAE mode */
+static void vmx_load_pdptrs(struct vcpu *v)
+{
+    uint64_t *guest_pdptrs;
+    unsigned long cr3 = v->arch.hvm_vmx.cpu_cr3, mfn;
+    char *p;
+
+    if ( cr3 & 0x1fUL )
+        goto crash;
+
+    mfn = mfn_x(gfn_to_mfn(v->domain, cr3 >> PAGE_SHIFT));
+    p = map_domain_page(mfn);
+    guest_pdptrs = (uint64_t *)(p + (cr3 & ~PAGE_MASK));
+
+    vmx_vmcs_enter(v);
+
+    __vmwrite(GUEST_PDPTR0, guest_pdptrs[0]);
+    __vmwrite(GUEST_PDPTR1, guest_pdptrs[1]);
+    __vmwrite(GUEST_PDPTR2, guest_pdptrs[2]);
+    __vmwrite(GUEST_PDPTR3, guest_pdptrs[3]);
+#ifdef __i386__
+    __vmwrite(GUEST_PDPTR0_HIGH, guest_pdptrs[0] >> 32);
+    __vmwrite(GUEST_PDPTR1_HIGH, guest_pdptrs[1] >> 32);
+    __vmwrite(GUEST_PDPTR2_HIGH, guest_pdptrs[2] >> 32);
+    __vmwrite(GUEST_PDPTR3_HIGH, guest_pdptrs[3] >> 32);
+#endif
+
+    vmx_vmcs_exit(v);
+    unmap_domain_page(p);
+    return;
+
+crash:
+    domain_crash(v->domain);
+}
+
+static void vmx_update_guest_cr(struct vcpu *v, unsigned int cr)
+{
+    unsigned long cr4;
+
+    if ( !hap_enabled(v->domain) )
+        return;
+
+    ASSERT((v == current) || !vcpu_runnable(v));
+
+    vmx_vmcs_enter(v);
+
+    switch (cr)
+    {
+    case 0:
+        if ( vmx_paging_enabled(v) )
+            v->arch.hvm_vmx.exec_control &= ~(CPU_BASED_CR3_LOAD_EXITING |
+                                              CPU_BASED_CR3_STORE_EXITING);
+        else
+            v->arch.hvm_vmx.exec_control |= (CPU_BASED_CR3_LOAD_EXITING |
+                                             CPU_BASED_CR3_STORE_EXITING);
+
+        __vmwrite(CPU_BASED_VM_EXEC_CONTROL, v->arch.hvm_vmx.exec_control);
+        break;
+
+    case 3:
+        if ( vmx_paging_enabled(v) )
+        {
+            if ( vmx_pae_enabled(v) && !vmx_long_mode_enabled(v) )
+                vmx_load_pdptrs(v);
+            __vmwrite(GUEST_CR3, v->arch.hvm_vmx.cpu_cr3);
+        }
+        else
+            __vmwrite(GUEST_CR3, HVM_IDENT_PT_PAGE);
+        break;
+
+    case 4:
+        if ( vmx_paging_enabled(v) )
+        {
+            cr4 = HVM_CR4_HOST_MASK & ~X86_CR4_PAE;
+            cr4 |= v->arch.hvm_vmx.cpu_shadow_cr4;
+            if ( vmx_pae_enabled(v) && !vmx_long_mode_enabled(v) )
+                    vmx_load_pdptrs(v);
+        }
+        else
+        {
+            cr4 = __vmread(GUEST_CR4) | HVM_CR4_HOST_MASK;
+            cr4 |= X86_CR4_PSE;
+            cr4 &= ~X86_CR4_PAE;
+        }
+
+        __vmwrite(GUEST_CR4, cr4);
+        break;
+
+    default:
+        BUG();
+    }
+
+    vmx_vmcs_exit(v);
+}
+
 int vmx_vmcs_restore(struct vcpu *v, struct hvm_hw_cpu *c)
 {
-    unsigned long mfn, old_base_mfn;
+    unsigned long mfn = 0, old_base_mfn;
 
     vmx_vmcs_enter(v);
 
@@ -645,8 +742,13 @@ int vmx_vmcs_restore(struct vcpu *v, struct hvm_hw_cpu *c)
     __vmwrite(GUEST_RFLAGS, c->rflags);
 
     v->arch.hvm_vmx.cpu_cr0 = (c->cr0 | X86_CR0_PE | X86_CR0_PG |
-                               X86_CR0_NE | X86_CR0_WP | X86_CR0_ET);
+                               X86_CR0_NE | X86_CR0_ET);
+
+    if ( paging_mode_shadow(v->domain) )
+        v->arch.hvm_vmx.cpu_cr0 |= X86_CR0_WP;
+
     __vmwrite(GUEST_CR0, v->arch.hvm_vmx.cpu_cr0);
+
     v->arch.hvm_vmx.cpu_shadow_cr0 = c->cr0;
     __vmwrite(CR0_READ_SHADOW, v->arch.hvm_vmx.cpu_shadow_cr0);
 
@@ -659,7 +761,7 @@ int vmx_vmcs_restore(struct vcpu *v, struct hvm_hw_cpu *c)
            __func__, c->cr3, c->cr0, c->cr4);
 #endif
 
-    if ( !vmx_paging_enabled(v) )
+    if ( !vmx_paging_enabled(v) || paging_mode_hap(v->domain) )
     {
         HVM_DBG_LOG(DBG_LEVEL_VMMU, "%s: paging not enabled.", __func__);
         goto skip_cr3;
@@ -686,10 +788,14 @@ int vmx_vmcs_restore(struct vcpu *v, struct hvm_hw_cpu *c)
     if ( vmx_long_mode_enabled(v) )
         vmx_enable_long_mode(v);
 
-    __vmwrite(GUEST_CR4, (c->cr4 | HVM_CR4_HOST_MASK));
     v->arch.hvm_vmx.cpu_shadow_cr4 = c->cr4;
     __vmwrite(CR4_READ_SHADOW, v->arch.hvm_vmx.cpu_shadow_cr4);
 
+    if ( paging_mode_shadow(v->domain) )
+        __vmwrite(GUEST_CR4, (c->cr4 | HVM_CR4_HOST_MASK));
+    else
+        v->arch.hvm_vmx.cpu_cr3 = c->cr3;
+
     __vmwrite(GUEST_IDTR_LIMIT, c->idtr_limit);
     __vmwrite(GUEST_IDTR_BASE, c->idtr_base);
 
@@ -746,6 +852,13 @@ int vmx_vmcs_restore(struct vcpu *v, struct hvm_hw_cpu *c)
 
     paging_update_paging_modes(v);
 
+    if ( paging_mode_hap(v->domain) )
+    {
+        vmx_update_guest_cr(v, 0);
+        vmx_update_guest_cr(v, 3);
+        vmx_update_guest_cr(v, 4);
+    }
+
     if ( c->pending_valid )
     {
         vmx_vmcs_enter(v);
@@ -1236,7 +1349,8 @@ static struct hvm_function_table vmx_function_table = {
     .inject_exception     = vmx_inject_exception,
     .init_ap_context      = vmx_init_ap_context,
     .init_hypercall_page  = vmx_init_hypercall_page,
-    .event_injection_faulted = vmx_event_injection_faulted
+    .event_injection_faulted = vmx_event_injection_faulted,
+    .update_guest_cr      = vmx_update_guest_cr
 };
 
 int start_vmx(void)
@@ -1291,6 +1405,10 @@ int start_vmx(void)
         return 0;
     }
 
+    vmx_function_table.hap_supported = cpu_has_vmx_ept;
+
+    ept_sync_all();
+
     vmx_save_host_msrs();
 
     if ( smp_processor_id() != 0 )
@@ -1974,7 +2092,7 @@ static int vmx_world_restore(struct vcpu *v, struct vmx_assist_context *c)
     v->arch.hvm_vmx.cpu_shadow_cr0 = c->cr0;
     __vmwrite(CR0_READ_SHADOW, v->arch.hvm_vmx.cpu_shadow_cr0);
 
-    if ( !vmx_paging_enabled(v) )
+    if ( !vmx_paging_enabled(v) || paging_mode_hap(v->domain) )
         goto skip_cr3;
 
     if ( c->cr3 == v->arch.hvm_vmx.cpu_cr3 )
@@ -2011,10 +2129,18 @@ static int vmx_world_restore(struct vcpu *v, struct vmx_assist_context *c)
     else
         HVM_DBG_LOG(DBG_LEVEL_VMMU, "Update CR3 value = %x", c->cr3);
 
-    __vmwrite(GUEST_CR4, (c->cr4 | HVM_CR4_HOST_MASK));
     v->arch.hvm_vmx.cpu_shadow_cr4 = c->cr4;
     __vmwrite(CR4_READ_SHADOW, v->arch.hvm_vmx.cpu_shadow_cr4);
 
+    if ( paging_mode_shadow(v->domain) )
+        __vmwrite(GUEST_CR4, (c->cr4 | HVM_CR4_HOST_MASK));
+    else
+    {
+        v->arch.hvm_vmx.cpu_cr3 = c->cr3;
+        vmx_update_guest_cr(v, 3);
+        vmx_update_guest_cr(v, 4);
+    }
+
     __vmwrite(GUEST_IDTR_LIMIT, c->idtr_limit);
     __vmwrite(GUEST_IDTR_BASE, c->idtr_base);
 
@@ -2153,10 +2279,11 @@ static int vmx_assist(struct vcpu *v, int mode)
 static int vmx_set_cr0(unsigned long value)
 {
     struct vcpu *v = current;
-    unsigned long mfn;
+    struct domain *d = v->domain;
     unsigned long eip;
     int paging_enabled;
     unsigned long old_cr0;
+    unsigned long mfn;
     unsigned long old_base_mfn;
 
     HVM_DBG_LOG(DBG_LEVEL_VMMU, "Update CR0 value = %lx", value);
@@ -2181,12 +2308,23 @@ static int vmx_set_cr0(unsigned long value)
     paging_enabled = old_cr0 & X86_CR0_PG;
 
     v->arch.hvm_vmx.cpu_cr0 = (value | X86_CR0_PE | X86_CR0_PG
-                               | X86_CR0_NE | X86_CR0_WP);
+                               | X86_CR0_NE);
+
+    if ( paging_mode_shadow(d) )
+        v->arch.hvm_vmx.cpu_cr0 |= X86_CR0_WP;
+
     __vmwrite(GUEST_CR0, v->arch.hvm_vmx.cpu_cr0);
 
     v->arch.hvm_vmx.cpu_shadow_cr0 = value;
     __vmwrite(CR0_READ_SHADOW, v->arch.hvm_vmx.cpu_shadow_cr0);
 
+    if ( paging_mode_hap(d) )
+    {
+        vmx_update_guest_cr(v, 0);
+        vmx_update_guest_cr(v, 3);
+        vmx_update_guest_cr(v, 4);
+    }
+
     /* Trying to enable paging. */
     if ( (value & X86_CR0_PE) && (value & X86_CR0_PG) && !paging_enabled )
     {
@@ -2207,37 +2345,40 @@ static int vmx_set_cr0(unsigned long value)
         /*
          * The guest CR3 must be pointing to the guest physical.
          */
-        mfn = get_mfn_from_gpfn(v->arch.hvm_vmx.cpu_cr3 >> PAGE_SHIFT);
-        if ( !mfn_valid(mfn) || !get_page(mfn_to_page(mfn), v->domain) )
+        if ( paging_mode_shadow(v->domain) )
         {
-            gdprintk(XENLOG_ERR, "Invalid CR3 value = %lx (mfn=%lx)\n",
-                     v->arch.hvm_vmx.cpu_cr3, mfn);
-            domain_crash(v->domain);
-            return 0;
-        }
+            mfn = get_mfn_from_gpfn(v->arch.hvm_vmx.cpu_cr3 >> PAGE_SHIFT);
+            if ( !mfn_valid(mfn) || !get_page(mfn_to_page(mfn), v->domain) )
+            {
+                gdprintk(XENLOG_ERR, "Invalid CR3 value = %lx (mfn=%lx)\n",
+                  v->arch.hvm_vmx.cpu_cr3, mfn);
+                domain_crash(v->domain);
+                return 0;
+            }
 
-        /*
-         * Now arch.guest_table points to machine physical.
-         */
-        old_base_mfn = pagetable_get_pfn(v->arch.guest_table);
-        v->arch.guest_table = pagetable_from_pfn(mfn);
-        if ( old_base_mfn )
-            put_page(mfn_to_page(old_base_mfn));
+            /*
+             * Now arch.guest_table points to machine physical.
+             */
+            old_base_mfn = pagetable_get_pfn(v->arch.guest_table);
+            v->arch.guest_table = pagetable_from_pfn(mfn);
+            if ( old_base_mfn )
+                put_page(mfn_to_page(old_base_mfn));
 
-        paging_update_paging_modes(v);
+            HVM_DBG_LOG(DBG_LEVEL_VMMU, "New arch.guest_table = %lx",
+                        (unsigned long) (mfn << PAGE_SHIFT));
 
-        HVM_DBG_LOG(DBG_LEVEL_VMMU, "New arch.guest_table = %lx",
-                    (unsigned long) (mfn << PAGE_SHIFT));
+            HVM_DBG_LOG(DBG_LEVEL_VMMU, "Update CR3 value = %lx, mfn = %lx",
+                        v->arch.hvm_vmx.cpu_cr3, mfn);
+        }
 
-        HVM_DBG_LOG(DBG_LEVEL_VMMU, "Update CR3 value = %lx, mfn = %lx",
-                    v->arch.hvm_vmx.cpu_cr3, mfn);
+        paging_update_paging_modes(v);
     }
 
     /* Trying to disable paging. */
     if ( ((value & (X86_CR0_PE | X86_CR0_PG)) != (X86_CR0_PE | X86_CR0_PG)) &&
          paging_enabled )
     {
-        if ( v->arch.hvm_vmx.cpu_cr3 )
+        if ( v->arch.hvm_vmx.cpu_cr3 && paging_mode_shadow(v->domain) )
         {
             put_page(mfn_to_page(get_mfn_from_gpfn(
                       v->arch.hvm_vmx.cpu_cr3 >> PAGE_SHIFT)));
@@ -2351,7 +2492,7 @@ static int mov_to_cr(int gp, int cr, struct cpu_user_regs *regs)
         /*
          * If paging is not enabled yet, simply copy the value to CR3.
          */
-        if ( !vmx_paging_enabled(v) )
+        if ( !vmx_paging_enabled(v) || paging_mode_hap(v->domain) )
         {
             v->arch.hvm_vmx.cpu_cr3 = value;
             break;
@@ -2403,7 +2544,7 @@ static int mov_to_cr(int gp, int cr, struct cpu_user_regs *regs)
 
         if ( (value & X86_CR4_PAE) && !(old_cr & X86_CR4_PAE) )
         {
-            if ( vmx_pgbit_test(v) )
+            if ( vmx_pgbit_test(v) && paging_mode_shadow(v->domain) )
             {
                 /* The guest is a 32-bit PAE guest. */
 #if CONFIG_PAGING_LEVELS >= 3
@@ -2441,10 +2582,17 @@ static int mov_to_cr(int gp, int cr, struct cpu_user_regs *regs)
             }
         }
 
-        __vmwrite(GUEST_CR4, value | HVM_CR4_HOST_MASK);
         v->arch.hvm_vmx.cpu_shadow_cr4 = value;
         __vmwrite(CR4_READ_SHADOW, v->arch.hvm_vmx.cpu_shadow_cr4);
 
+        if ( paging_mode_shadow(v->domain) )
+            __vmwrite(GUEST_CR4, (value | HVM_CR4_HOST_MASK));
+        else
+        {
+            vmx_update_guest_cr(v, 3);
+            vmx_update_guest_cr(v, 4);
+        }
+
         /*
          * Writing to CR4 to modify the PSE, PGE, or PAE flag invalidates
          * all TLB entries except global entries.
@@ -2717,7 +2865,7 @@ static void check_vlapic_msr_for_vtpr(struct vcpu *v)
         vcpu_vlapic(v)->mmap_vtpr_enabled = 1;
 
         v->arch.hvm_vcpu.u.vmx.exec_control |=
-            ( ACTIVATE_SECONDARY_CONTROLS | CPU_BASED_TPR_SHADOW );
+            ( CPU_BASED_ACTIVATE_SECONDARY_CONTROLS | CPU_BASED_TPR_SHADOW );
         __vmwrite(CPU_BASED_VM_EXEC_CONTROL,
                   v->arch.hvm_vcpu.u.vmx.exec_control);
         tmp  = __vmread(SECONDARY_VM_EXEC_CONTROL);
@@ -2885,6 +3033,17 @@ static void vmx_reflect_exception(struct vcpu *v)
     }
 }
 
+static void ept_handle_violation(unsigned long qualification, paddr_t gpa)
+{
+    if ( unlikely(((qualification >> 7) & 0x3) != 0x3) )
+    {
+        domain_crash(current->domain);
+        return;
+    }
+    /* must be MMIO */
+    handle_mmio(gpa);
+}
+
 static void vmx_failed_vmentry(unsigned int exit_reason,
                                struct cpu_user_regs *regs)
 {
@@ -2925,6 +3084,15 @@ asmlinkage void vmx_vmexit_handler(struct cpu_user_regs *regs)
     unsigned long exit_qualification, inst_len = 0;
     struct vcpu *v = current;
 
+    if ( paging_mode_hap(v->domain) && hvm_paging_enabled(v) )
+    {
+        __asm__ __volatile__ ("mov"__OS" %%cr2, %0"
+                              : "=r"(v->arch.hvm_vmx.cpu_cr2));
+
+        /* __hvm_copy() need this when paging is enabled. */
+        v->arch.hvm_vmx.cpu_cr3 = __vmread(GUEST_CR3);
+    }
+ 
     exit_reason = __vmread(VM_EXIT_REASON);
 
     HVMTRACE_2D(VMEXIT, v, __vmread(GUEST_RIP), exit_reason);
@@ -3116,6 +3284,21 @@ asmlinkage void vmx_vmexit_handler(struct cpu_user_regs *regs)
         break;
     }
 
+    case EXIT_REASON_EPT_VIOLATION:
+    {
+        paddr_t gpa = __vmread(GUEST_PHYSICAL_ADDRESS);
+#ifdef __i386__
+        gpa += (unsigned long long)__vmread(GUEST_PHYSICAL_ADDRESS_HIGH) << 32;
+#endif
+        exit_qualification = __vmread(EXIT_QUALIFICATION);
+        ept_handle_violation(exit_qualification, gpa);
+        break;
+    }
+
+    case EXIT_REASON_EPT_MISCONFIG:
+        domain_crash(current->domain);
+        break;
+
     default:
     exit_and_crash:
         gdprintk(XENLOG_ERR, "Bad vmexit (reason %x)\n", exit_reason);
diff --git a/arch/x86/mm/Makefile b/arch/x86/mm/Makefile
index 79b2596..3374ee6 100644
--- a/arch/x86/mm/Makefile
+++ b/arch/x86/mm/Makefile
@@ -3,3 +3,4 @@ subdir-y += hap
 
 obj-y += paging.o
 obj-y += p2m.o
+obj-y += p2m-ept.o
diff --git a/arch/x86/mm/hap/hap.c b/arch/x86/mm/hap/hap.c
index 67a5e0e..5316fbd 100644
--- a/arch/x86/mm/hap/hap.c
+++ b/arch/x86/mm/hap/hap.c
@@ -593,6 +593,7 @@ int hap_invlpg(struct vcpu *v, unsigned long va)
  */
 void hap_update_cr3(struct vcpu *v, int do_locking)
 {
+   hvm_update_guest_cr(v, 3);
 }
 
 void hap_update_paging_modes(struct vcpu *v)
@@ -626,8 +627,11 @@ void hap_update_paging_modes(struct vcpu *v)
         mfn_t mmfn = hap_make_monitor_table(v);
         v->arch.monitor_table = pagetable_from_mfn(mmfn);
         make_cr3(v, mfn_x(mmfn));
+        hvm_update_host_cr3(v);
     }
 
+    hap_update_cr3(v, 1);
+
     hap_unlock(d);
 }
 
diff --git a/arch/x86/mm/p2m-ept.c b/arch/x86/mm/p2m-ept.c
new file mode 100644
index 0000000..10921de
--- /dev/null
+++ b/arch/x86/mm/p2m-ept.c
@@ -0,0 +1,208 @@
+/*
+ * p2m-ept.c: use the EPT page table as p2m
+ * Copyright (c) 2007, Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+ * Place - Suite 330, Boston, MA 02111-1307 USA.
+ */
+
+#include <xen/config.h>
+#include <xen/domain_page.h>
+#include <xen/sched.h>
+#include <asm/current.h>
+#include <asm/types.h>
+#include <asm/domain.h>
+#include <asm/hvm/vmx/vmx.h>
+
+#if 1 /* XEN_VERSION == 3 && XEN_SUBVERSION < 2 */
+
+static int ept_set_entry(struct domain *d, unsigned long gfn, mfn_t mfn, p2m_type_t p2mt);
+mfn_t ept_get_entry(struct domain *d, unsigned long gfn, p2m_type_t *t);
+static mfn_t ept_get_entry_fast(unsigned long gfn, p2m_type_t *t);
+
+static inline int 
+compat_ept_set_entry(struct domain *d, unsigned long gfn, mfn_t mfn, u32 l1e_flags)
+{
+    return ept_set_entry(d, gfn, mfn, p2m_ram_rw);
+}
+
+static mfn_t compat_ept_get_entry(struct domain *d, unsigned long gfn)
+{
+    p2m_type_t dummy;
+    return ept_get_entry(d, gfn, &dummy);
+}
+
+static mfn_t compat_ept_get_entry_fast(unsigned long gfn)
+{
+    p2m_type_t dummy;
+    return ept_get_entry_fast(gfn, &dummy);
+}
+#else
+
+#define compat_ept_set_entry ept_set_entry
+#define compat_ept_get_entry ept_get_entry
+#define compat_ept_get_entry_fast 
+
+#endif
+
+static int ept_next_level(struct domain *d, bool_t read_only,
+                          ept_entry_t **table, unsigned long *gfn_remainder,
+                          u32 shift)
+{
+    ept_entry_t *ept_entry, *next;
+    u32 index;
+
+    index = *gfn_remainder >> shift;
+    *gfn_remainder &= (1UL << shift) - 1;
+
+    ept_entry = (*table) + index;
+
+    if ( !(ept_entry->epte & 0x7) )
+    {
+        struct page_info *pg;
+
+        if ( read_only )
+            return 0;
+
+        pg = d->arch.p2m.alloc_page(d);
+        if ( pg == NULL )
+            return 0;
+        pg->count_info = 1;
+        pg->u.inuse.type_info = 1 | PGT_validated;
+        list_add_tail(&pg->list, &d->arch.p2m.pages);
+
+        ept_entry->emt = 0;
+        ept_entry->sp_avail = 0;
+        ept_entry->avail1 = 0;
+        ept_entry->mfn = page_to_mfn(pg);
+        ept_entry->rsvd = 0;
+        ept_entry->avail2 = 0;
+        /* last step */
+        ept_entry->r = ept_entry->w = ept_entry->x = 1;
+    }
+
+    next = map_domain_page(ept_entry->mfn);
+    unmap_domain_page(*table);
+    *table = next;
+
+    return 1;
+}
+
+static int
+ept_set_entry(struct domain *d, unsigned long gfn, mfn_t mfn, p2m_type_t p2mt)
+{
+    ept_entry_t *table =
+        map_domain_page(mfn_x(pagetable_get_mfn(d->arch.phys_table)));
+    unsigned long gfn_remainder = gfn;
+    ept_entry_t *ept_entry;
+    u32 index;
+    int i, rv = 0;
+
+    /* should check if gfn obeys GAW here */
+
+    for ( i = EPT_DEFAULT_GAW; i > 0; i-- )
+        if ( !ept_next_level(d, 0, &table, &gfn_remainder, i * EPT_TABLE_ORDER) )
+            goto out;
+
+    index = gfn_remainder;
+    ept_entry = table + index;
+
+    if ( mfn_valid(mfn_x(mfn)) )
+    {
+        /* Track the highest gfn for which we have ever had a valid mapping */
+        if ( gfn > d->arch.p2m.max_mapped_pfn )
+            d->arch.p2m.max_mapped_pfn = gfn;
+
+        ept_entry->emt = EPT_DEFAULT_MT;
+        ept_entry->sp_avail = 0;
+        ept_entry->avail1 = p2mt;
+        ept_entry->mfn = mfn_x(mfn);
+        ept_entry->rsvd = 0;
+        ept_entry->avail2 = 0;
+        /* last step */
+        ept_entry->r = ept_entry->w = ept_entry->x = 1;
+    }
+    else
+        ept_entry->epte = 0;
+
+    ept_sync_all();
+
+    /* Success */
+    rv = 1;
+
+ out:
+    unmap_domain_page(table);
+    return rv;
+}
+
+/* Read ept p2m entries */
+mfn_t ept_get_entry(struct domain *d, unsigned long gfn, p2m_type_t *t)
+{
+    ept_entry_t *table =
+        map_domain_page(mfn_x(pagetable_get_mfn(d->arch.phys_table)));
+    unsigned long gfn_remainder = gfn;
+    ept_entry_t *ept_entry;
+    u32 index;
+    int i;
+    mfn_t mfn = _mfn(INVALID_MFN);
+
+    *t = p2m_mmio_dm;
+
+    /* This pfn is higher than the highest the p2m map currently holds */
+    if ( gfn > d->arch.p2m.max_mapped_pfn )
+        goto out;
+
+    /* should check if gfn obeys GAW here */
+
+    for ( i = EPT_DEFAULT_GAW; i > 0; i-- )
+        if ( !ept_next_level(d, 1, &table, &gfn_remainder, i * EPT_TABLE_ORDER) )
+            goto out;
+
+    index = gfn_remainder;
+    ept_entry = table + index;
+
+    if ( (ept_entry->epte & 0x7) == 0x7 )
+    {
+        if ( ept_entry->avail1 != p2m_invalid )
+        {
+            *t = ept_entry->avail1;
+            mfn = _mfn(ept_entry->mfn);
+        }
+    }
+
+ out:
+    unmap_domain_page(table);
+    return mfn;
+}
+
+static mfn_t ept_get_entry_fast(unsigned long gfn, p2m_type_t *t)
+{
+    return ept_get_entry(current->domain, gfn, t);
+}
+
+void ept_p2m_init(struct domain *d)
+{
+    d->arch.p2m.set_entry = compat_ept_set_entry;
+    d->arch.p2m.get_entry = compat_ept_get_entry;
+    d->arch.p2m.get_entry_fast = compat_ept_get_entry_fast;
+}
+
+/*
+ * Local variables:
+ * mode: C
+ * c-set-style: "BSD"
+ * c-basic-offset: 4
+ * tab-width: 4
+ * indent-tabs-mode: nil
+ * End:
+ */
diff --git a/arch/x86/mm/p2m.c b/arch/x86/mm/p2m.c
index ca7ff41..1205840 100644
--- a/arch/x86/mm/p2m.c
+++ b/arch/x86/mm/p2m.c
@@ -27,6 +27,7 @@
 #include <asm/page.h>
 #include <asm/paging.h>
 #include <asm/p2m.h>
+#include <asm/hvm/vmx/vmx.h> /* ept_p2m_init() */
 
 /* Debugging and auditing of the P2M code? */
 #define P2M_AUDIT     0
@@ -92,8 +93,6 @@
 #undef page_to_mfn
 #define page_to_mfn(_pg) (_mfn((_pg) - frame_table))
 
-
-
 // Find the next level's P2M entry, checking for out-of-range gfn's...
 // Returns NULL on error.
 //
@@ -214,7 +213,7 @@ p2m_next_level(struct domain *d, mfn_t *table_mfn, void **table,
 
 // Returns 0 on error (out of memory)
 static int
-set_p2m_entry(struct domain *d, unsigned long gfn, mfn_t mfn, u32 l1e_flags)
+p2m_set_entry(struct domain *d, unsigned long gfn, mfn_t mfn, u32 l1e_flags)
 {
     // XXX -- this might be able to be faster iff current->domain == d
     mfn_t table_mfn = pagetable_get_mfn(d->arch.phys_table);
@@ -304,15 +303,30 @@ set_p2m_entry(struct domain *d, unsigned long gfn, mfn_t mfn, u32 l1e_flags)
     return rv;
 }
 
+static mfn_t
+p2m_gfn_to_mfn(struct domain *d, unsigned long gfn);
 
 /* Init the datastructures for later use by the p2m code */
 void p2m_init(struct domain *d)
 {
     p2m_lock_init(d);
     INIT_LIST_HEAD(&d->arch.p2m.pages);
-}
 
+    d->arch.p2m.set_entry = p2m_set_entry;
+    d->arch.p2m.get_entry = p2m_gfn_to_mfn;
+    d->arch.p2m.get_entry_fast = p2m_gfn_to_mfn_fast;
+
+    if ( is_hvm_domain(d) && hap_enabled(d) &&
+         (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL) )
+        ept_p2m_init(d);
+}
 
+static inline
+int set_p2m_entry(struct domain *d, unsigned long gfn, mfn_t mfn, u32 l1e_flags)
+{
+    return d->arch.p2m.set_entry(d, gfn, mfn, l1e_flags);
+}
+ 
 // Allocate a new p2m table for a domain.
 //
 // The structure of the p2m table is that of a pagetable for xen (i.e. it is
@@ -427,7 +441,7 @@ void p2m_teardown(struct domain *d)
 }
 
 mfn_t
-gfn_to_mfn_foreign(struct domain *d, unsigned long gpfn)
+p2m_gfn_to_mfn(struct domain *d, unsigned long gpfn)
 /* Read another domain's p2m entries */
 {
     mfn_t mfn;
diff --git a/arch/x86/mm/paging.c b/arch/x86/mm/paging.c
index 0be022f..52ea79a 100644
--- a/arch/x86/mm/paging.c
+++ b/arch/x86/mm/paging.c
@@ -27,8 +27,6 @@
 #include <asm/hap.h>
 #include <asm/guest_access.h>
 
-#define hap_enabled(d) (hvm_funcs.hap_supported && is_hvm_domain(d))
-
 /* Printouts */
 #define PAGING_PRINTK(_f, _a...)                                     \
     debugtrace_printk("pg: %s(): " _f, __func__, ##_a)
diff --git a/include/asm-x86/domain.h b/include/asm-x86/domain.h
index dcf1f53..53028d2 100644
--- a/include/asm-x86/domain.h
+++ b/include/asm-x86/domain.h
@@ -139,6 +139,10 @@ struct p2m_domain {
     struct page_info * (*alloc_page  )(struct domain *d);
     void               (*free_page   )(struct domain *d, 
                                        struct page_info *pg);
+    int                (*set_entry   )(struct domain *d, unsigned long gfn,
+                                       mfn_t mfn, u32 l1e_flags);
+    mfn_t              (*get_entry   )(struct domain *d, unsigned long gfn);
+    mfn_t              (*get_entry_fast)(unsigned long gfn);
 
     /* Highest guest frame that's ever been mapped in the p2m */
     unsigned long max_mapped_pfn;
diff --git a/include/asm-x86/hvm/hvm.h b/include/asm-x86/hvm/hvm.h
index 117f167..21f7afd 100644
--- a/include/asm-x86/hvm/hvm.h
+++ b/include/asm-x86/hvm/hvm.h
@@ -153,6 +153,8 @@ struct hvm_function_table {
     void (*init_hypercall_page)(struct domain *d, void *hypercall_page);
 
     int  (*event_injection_faulted)(struct vcpu *v);
+
+    void (*update_guest_cr)(struct vcpu *v, unsigned int cr);
 };
 
 extern struct hvm_function_table hvm_funcs;
@@ -241,6 +243,12 @@ hvm_update_vtpr(struct vcpu *v, unsigned long value)
 
 void hvm_update_guest_cr3(struct vcpu *v, unsigned long guest_cr3);
 
+static inline void hvm_update_guest_cr(struct vcpu *v, unsigned int cr)
+{
+    if ( hvm_funcs.update_guest_cr )
+        hvm_funcs.update_guest_cr(v, cr);
+}
+
 static inline void 
 hvm_flush_guest_tlbs(void)
 {
@@ -318,4 +326,6 @@ static inline int hvm_event_injection_faulted(struct vcpu *v)
 /* These exceptions must always be intercepted. */
 #define HVM_TRAP_MASK (1U << TRAP_machine_check)
 
+#define HVM_IDENT_PT_PAGE 0xE8000
+
 #endif /* __ASM_X86_HVM_HVM_H__ */
diff --git a/include/asm-x86/hvm/vmx/vmcs.h b/include/asm-x86/hvm/vmx/vmcs.h
index c0e715e..355523b 100644
--- a/include/asm-x86/hvm/vmx/vmcs.h
+++ b/include/asm-x86/hvm/vmx/vmcs.h
@@ -47,6 +47,9 @@ struct vmx_msr_state {
     unsigned long msrs[VMX_MSR_COUNT];
 };
 
+#define EPT_DEFAULT_MT      6
+#define EPT_DEFAULT_GAW     3
+
 struct arch_vmx_struct {
     /* Virtual address of VMCS. */
     struct vmcs_struct  *vmcs;
@@ -62,8 +65,19 @@ struct arch_vmx_struct {
     int                  active_cpu;
     int                  launched;
 
+    union {
+        struct {
+            u64 etmt :3,
+                gaw  :3,
+                rsvd :6,
+                asr  :52;
+        };
+        u64 eptp;
+    } ept_control;
+
     /* Cache of cpu execution control. */
     u32                  exec_control;
+    u32                  secondary_exec_control;
 
     /* If there is vector installed in the INTR_INFO_FIELD. */
     u32                  vector_injected;
@@ -101,6 +115,8 @@ void vmx_vmcs_exit(struct vcpu *v);
 #define CPU_BASED_MWAIT_EXITING         0x00000400
 #define CPU_BASED_RDPMC_EXITING         0x00000800
 #define CPU_BASED_RDTSC_EXITING         0x00001000
+#define CPU_BASED_CR3_LOAD_EXITING      0x00008000
+#define CPU_BASED_CR3_STORE_EXITING     0x00010000
 #define CPU_BASED_CR8_LOAD_EXITING      0x00080000
 #define CPU_BASED_CR8_STORE_EXITING     0x00100000
 #define CPU_BASED_TPR_SHADOW            0x00200000
@@ -111,7 +127,7 @@ void vmx_vmcs_exit(struct vcpu *v);
 #define CPU_BASED_ACTIVATE_MSR_BITMAP   0x10000000
 #define CPU_BASED_MONITOR_EXITING       0x20000000
 #define CPU_BASED_PAUSE_EXITING         0x40000000
-#define ACTIVATE_SECONDARY_CONTROLS     0x80000000
+#define CPU_BASED_ACTIVATE_SECONDARY_CONTROLS     0x80000000
 extern u32 vmx_cpu_based_exec_control;
 
 #define PIN_BASED_EXT_INTR_MASK         0x00000001
@@ -129,6 +145,7 @@ extern u32 vmx_vmexit_control;
 extern u32 vmx_vmentry_control;
 
 #define SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES 0x00000001
+#define SECONDARY_EXEC_ENABLE_EPT               0x00000002
 extern u32 vmx_secondary_exec_control;
 
 #define cpu_has_vmx_virtualize_apic_accesses \
@@ -140,6 +157,11 @@ extern u32 vmx_secondary_exec_control;
 
 #define cpu_has_vmx_msr_bitmap \
     (vmx_cpu_based_exec_control & CPU_BASED_ACTIVATE_MSR_BITMAP)
+#define cpu_has_vmx_secondary_exec_control \
+    (vmx_cpu_based_exec_control & CPU_BASED_ACTIVATE_SECONDARY_CONTROLS)
+#define cpu_has_vmx_ept \
+    (vmx_secondary_exec_control & SECONDARY_EXEC_ENABLE_EPT)
+
 extern char *vmx_msr_bitmap;
 
 /* GUEST_INTERRUPTIBILITY_INFO flags. */
@@ -183,10 +205,22 @@ enum vmcs_field {
     VIRTUAL_APIC_PAGE_ADDR_HIGH     = 0x00002013,
     APIC_ACCESS_ADDR                = 0x00002014,
     APIC_ACCESS_ADDR_HIGH           = 0x00002015, 
+    EPT_POINTER                     = 0x0000201a,
+    EPT_POINTER_HIGH                = 0x0000201b,
+    GUEST_PHYSICAL_ADDRESS          = 0x00002400,
+    GUEST_PHYSICAL_ADDRESS_HIGH     = 0x00002401,
     VMCS_LINK_POINTER               = 0x00002800,
     VMCS_LINK_POINTER_HIGH          = 0x00002801,
     GUEST_IA32_DEBUGCTL             = 0x00002802,
     GUEST_IA32_DEBUGCTL_HIGH        = 0x00002803,
+    GUEST_PDPTR0                    = 0x0000280a,
+    GUEST_PDPTR0_HIGH               = 0x0000280b,
+    GUEST_PDPTR1                    = 0x0000280c,
+    GUEST_PDPTR1_HIGH               = 0x0000280d,
+    GUEST_PDPTR2                    = 0x0000280e,
+    GUEST_PDPTR2_HIGH               = 0x0000280f,
+    GUEST_PDPTR3                    = 0x00002810,
+    GUEST_PDPTR3_HIGH               = 0x00002811,
     PIN_BASED_VM_EXEC_CONTROL       = 0x00004000,
     CPU_BASED_VM_EXEC_CONTROL       = 0x00004002,
     EXCEPTION_BITMAP                = 0x00004004,
diff --git a/include/asm-x86/hvm/vmx/vmx.h b/include/asm-x86/hvm/vmx/vmx.h
index 194fe3a..dc61578 100644
--- a/include/asm-x86/hvm/vmx/vmx.h
+++ b/include/asm-x86/hvm/vmx/vmx.h
@@ -23,9 +23,29 @@
 #include <asm/types.h>
 #include <asm/regs.h>
 #include <asm/processor.h>
-#include <asm/hvm/vmx/vmcs.h>
 #include <asm/i387.h>
+#include <asm/hvm/support.h>
 #include <asm/hvm/trace.h>
+#include <asm/hvm/vmx/vmcs.h>
+#include <asm/paging.h>
+#include <asm/p2m.h>
+
+typedef union {
+    struct {
+        u64 r       :   1,
+        w           :   1,
+        x           :   1,
+        emt         :   4,
+        sp_avail    :   1,
+        avail1      :   4,
+        mfn         :   45,
+        rsvd        :   5,
+        avail2      :   2;
+    };
+    u64 epte;
+} ept_entry_t;
+
+#define EPT_TABLE_ORDER     9
 
 void vmx_asm_vmexit_handler(struct cpu_user_regs);
 void vmx_asm_do_vmentry(void);
@@ -85,6 +105,8 @@ extern struct page_info *change_guest_physmap_for_vtpr(struct domain *d,
 
 #define EXIT_REASON_TPR_BELOW_THRESHOLD 43
 #define EXIT_REASON_APIC_ACCESS         44
+#define EXIT_REASON_EPT_VIOLATION       48
+#define EXIT_REASON_EPT_MISCONFIG       49
 
 /*
  * Interruption-information format
@@ -153,12 +175,14 @@ extern struct page_info *change_guest_physmap_for_vtpr(struct domain *d,
 #define VMREAD_OPCODE   ".byte 0x0f,0x78\n"
 #define VMRESUME_OPCODE ".byte 0x0f,0x01,0xc3\n"
 #define VMWRITE_OPCODE  ".byte 0x0f,0x79\n"
+#define INVEPT_OPCODE   ".byte 0x66,0x0f,0x38,0x80\n"   /* m128,r64/32 */
 #define VMXOFF_OPCODE   ".byte 0x0f,0x01,0xc4\n"
 #define VMXON_OPCODE    ".byte 0xf3,0x0f,0xc7\n"
 
+#define MODRM_EAX_08    ".byte 0x08\n" /* ECX, [EAX] */
 #define MODRM_EAX_06    ".byte 0x30\n" /* [EAX], with reg/opcode: /6 */
 #define MODRM_EAX_07    ".byte 0x38\n" /* [EAX], with reg/opcode: /7 */
-#define MODRM_EAX_ECX   ".byte 0xc1\n" /* [EAX], [ECX] */
+#define MODRM_EAX_ECX   ".byte 0xc1\n" /* EAX, ECX */
 
 static inline void __vmptrld(u64 addr)
 {
@@ -242,6 +266,21 @@ static inline void __vm_clear_bit(unsigned long field, unsigned int bit)
     __vmwrite(field, __vmread(field) & ~(1UL << bit));
 }
 
+static inline void __invept(int ext, u64 eptp, u64 gpa)
+{
+    struct {
+        u64 eptp, gpa;
+    } operand = {eptp, gpa};
+
+    __asm__ __volatile__ ( INVEPT_OPCODE
+                           MODRM_EAX_08
+                           /* CF==1 or ZF==1 --> rc = -1 */
+                           "ja 1f ; ud2 ; 1:\n"
+                           :
+                           : "a" (&operand), "c" (ext)
+                           : "memory");
+}
+
 static inline void __vmxoff (void)
 {
     __asm__ __volatile__ ( VMXOFF_OPCODE
@@ -263,6 +302,14 @@ static inline int __vmxon (u64 addr)
     return rc;
 }
 
+static inline void ept_sync_all(void)
+{
+    if ( !hap_enabled(current->domain) )
+        return;
+
+    __invept(2, 0, 0);
+}
+
 static inline void __vmx_inject_exception(struct vcpu *v, int trap, int type,
                                          int error_code, int ilen)
 {
@@ -314,4 +361,6 @@ static inline void vmx_inject_extint(struct vcpu *v, int trap, int error_code)
     __vmx_inject_exception(v, trap, INTR_TYPE_EXT_INTR, error_code, 0);
 }
 
+void ept_p2m_init(struct domain *d);
+
 #endif /* __ASM_X86_HVM_VMX_VMX_H__ */
diff --git a/include/asm-x86/msr.h b/include/asm-x86/msr.h
index 09b6381..8508183 100644
--- a/include/asm-x86/msr.h
+++ b/include/asm-x86/msr.h
@@ -121,6 +121,7 @@ static inline void wrmsrl(unsigned int msr, __u64 val)
 #define MSR_IA32_VMX_CR4_FIXED0                 0x488
 #define MSR_IA32_VMX_CR4_FIXED1                 0x489
 #define MSR_IA32_VMX_PROCBASED_CTLS2            0x48b
+#define MSR_IA32_VMX_TRUE_PROCBASED_CTLS        0x48e
 #define IA32_FEATURE_CONTROL_MSR                0x3a
 #define IA32_FEATURE_CONTROL_MSR_LOCK           0x1
 #define IA32_FEATURE_CONTROL_MSR_ENABLE_VMXON   0x4
diff --git a/include/asm-x86/p2m.h b/include/asm-x86/p2m.h
index 53e66e4..40501e0 100644
--- a/include/asm-x86/p2m.h
+++ b/include/asm-x86/p2m.h
@@ -26,6 +26,49 @@
 #ifndef _XEN_P2M_H
 #define _XEN_P2M_H
 
+#include <asm/paging.h>
+
+#if 1 /* XEN_VERSION == 3 && XEN_SUBVERSION < 2 */
+
+typedef enum {
+    p2m_invalid = 0,            /* Nothing mapped here */
+    p2m_ram_rw = 1,             /* Normal read/write guest RAM */
+    p2m_ram_logdirty = 2,       /* Temporarily read-only for log-dirty */
+    p2m_ram_ro = 3,             /* Read-only; writes go to the device model */
+    p2m_mmio_dm = 4,            /* Reads and write go to the device model */
+    p2m_mmio_direct = 5,        /* Read/write mapping of genuine MMIO area */
+} p2m_type_t;
+
+/* We use bitmaps and maks to handle groups of types */
+#define p2m_to_mask(_t) (1UL << (_t))
+
+/* RAM types, which map to real machine frames */
+#define P2M_RAM_TYPES (p2m_to_mask(p2m_ram_rw)          \
+                       | p2m_to_mask(p2m_ram_logdirty)  \
+                       | p2m_to_mask(p2m_ram_ro))
+
+/* MMIO types, which don't have to map to anything in the frametable */
+#define P2M_MMIO_TYPES (p2m_to_mask(p2m_mmio_dm)        \
+                        | p2m_to_mask(p2m_mmio_direct))
+
+/* Read-only types, which must have the _PAGE_RW bit clear in their PTEs */
+#define P2M_RO_TYPES (p2m_to_mask(p2m_ram_logdirty)     \
+                      | p2m_to_mask(p2m_ram_ro))
+
+/* Useful predicates */
+#define p2m_is_ram(_t) (p2m_to_mask(_t) & P2M_RAM_TYPES)
+#define p2m_is_mmio(_t) (p2m_to_mask(_t) & P2M_MMIO_TYPES)
+#define p2m_is_readonly(_t) (p2m_to_mask(_t) & P2M_RO_TYPES)
+#define p2m_is_valid(_t) (p2m_to_mask(_t) & (P2M_RAM_TYPES | P2M_MMIO_TYPES))
+
+/* Extract the type from the PTE flags that store it */
+static inline p2m_type_t p2m_flags_to_type(unsigned long flags)
+{
+    /* Type is stored in the "available" bits, 9, 10 and 11 */
+    return (flags >> 9) & 0x7;
+}
+
+#endif
 
 /* The phys_to_machine_mapping is the reversed mapping of MPT for full
  * virtualization.  It is only used by shadow_mode_translate()==true
@@ -38,6 +81,12 @@
 /* Read the current domain's P2M table. */
 static inline mfn_t gfn_to_mfn_current(unsigned long gfn)
 {
+    return current->domain->arch.p2m.get_entry_fast(gfn);
+}
+
+/* Read the current domain's p2m table (through the linear mapping). */
+static inline mfn_t p2m_gfn_to_mfn_fast(unsigned long gfn)
+{
     l1_pgentry_t l1e = l1e_empty();
     l2_pgentry_t l2e = l2e_empty();
     int ret;
@@ -73,7 +122,11 @@ static inline mfn_t gfn_to_mfn_current(unsigned long gfn)
 }
 
 /* Read another domain's P2M table, mapping pages as we go */
-mfn_t gfn_to_mfn_foreign(struct domain *d, unsigned long gpfn);
+static inline
+mfn_t gfn_to_mfn_foreign(struct domain *d, unsigned long gpfn)
+{
+    return d->arch.p2m.get_entry(d, gpfn);
+}
 
 /* General conversion function from gfn to mfn */
 static inline mfn_t gfn_to_mfn(struct domain *d, unsigned long gfn)
diff --git a/include/asm-x86/paging.h b/include/asm-x86/paging.h
index 35067b2..fdbdd8d 100644
--- a/include/asm-x86/paging.h
+++ b/include/asm-x86/paging.h
@@ -36,6 +36,8 @@
 /*****************************************************************************
  * Macros to tell which paging mode a domain is in */
 
+#define hap_enabled(d) (hvm_funcs.hap_supported && is_hvm_domain(d))
+
 #define PG_SH_shift    20
 #define PG_HAP_shift   21
 /* We're in one of the shadow modes */