Sophie

Sophie

distrib > Scientific%20Linux > 5x > x86_64 > by-pkgid > fc11cd6e1c513a17304da94a5390f3cd > files > 4603

kernel-2.6.18-194.11.1.el5.src.rpm

From: ddugger@redhat.com <ddugger@redhat.com>
Date: Wed, 11 Mar 2009 15:44:17 -0600
Subject: [xen] x86: VPID: implement feature
Message-id: 20090311214417.GV10195@sobek.n0ano.com
O-Subject: [PATCH 1/2] VPID: implement feature
Bugzilla: 464821
RH-Acked-by: Justin M. Forbes <jforbes@redhat.com>

This is the code that actually implements the VPID feature

Upstream Status: Accepted (CS 17441, 17533, 18585)

BZ: 464821

Signed-off-by: Xiaohui Xin <xiaohui.xin@intel.com>
Signed-off-by: Edwin Zhai <edwin.zhai@intel.com>
Signed-off-by: Don Dugger <donald.d.dugger@intel.com>

diff --git a/arch/x86/hvm/hvm.c b/arch/x86/hvm/hvm.c
index 208f14b..89e02e9 100644
--- a/arch/x86/hvm/hvm.c
+++ b/arch/x86/hvm/hvm.c
@@ -256,7 +256,10 @@ int hvm_domain_initialise(struct domain *d)
     hvm_init_ioreq_page(d, &d->arch.hvm_domain.ioreq);
     hvm_init_ioreq_page(d, &d->arch.hvm_domain.buf_ioreq);
 
-    return 0;
+    if ( hvm_funcs.domain_initialise )
+        rc = hvm_funcs.domain_initialise(d);
+
+    return rc;
 }
 
 void hvm_domain_relinquish_resources(struct domain *d)
diff --git a/arch/x86/hvm/vmx/vmcs.c b/arch/x86/hvm/vmx/vmcs.c
index 83864af..a241d9f 100644
--- a/arch/x86/hvm/vmx/vmcs.c
+++ b/arch/x86/hvm/vmx/vmcs.c
@@ -37,6 +37,9 @@
 #include <xen/keyhandler.h>
 #include <asm/shadow.h>
 
+static int opt_vpid_enabled = 1;
+boolean_param("vpid", opt_vpid_enabled);
+
 /* Dynamic (run-time adjusted) execution control flags. */
 u32 vmx_pin_based_exec_control __read_mostly;
 u32 vmx_cpu_based_exec_control __read_mostly;
@@ -113,7 +116,9 @@ void vmx_init_vmcs_config(void)
         min = 0;
 
         opt = (SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES |
-              SECONDARY_EXEC_ENABLE_EPT);
+          SECONDARY_EXEC_ENABLE_EPT);
+        if ( opt_vpid_enabled )
+            opt |= SECONDARY_EXEC_ENABLE_VPID;
         _vmx_secondary_exec_control = adjust_vmx_controls(
             min, opt, MSR_IA32_VMX_PROCBASED_CTLS2);
     }
@@ -516,7 +521,7 @@ static void construct_vmcs(struct vcpu *v)
     cr4 = read_cr4();
     if ( paging_mode_hap(v->domain) )
     {
-        hvm_update_guest_cr(v, 3);
+        hvm_update_guest_cr(v, 0);
         hvm_update_guest_cr(v, 4);
     }
     else
@@ -548,6 +553,13 @@ static void construct_vmcs(struct vcpu *v)
 #endif
     }
 
+    if ( cpu_has_vmx_vpid )
+    {
+        v->arch.hvm_vmx.vpid = v->vcpu_id +
+          v->domain->arch.hvm_domain.vmx_vpid_base;
+        __vmwrite(VIRTUAL_PROCESSOR_ID, v->arch.hvm_vmx.vpid);
+    }
+
     /* Memory-mapped based VLAPIC TPR optimization. */
     if ( cpu_has_vmx_mmap_vtpr_optimization )
     {
@@ -631,6 +643,7 @@ void vmx_do_resume(struct vcpu *v)
         vmx_load_vmcs(v);
         hvm_migrate_timers(v);
         vmx_set_host_env(v);
+        vpid_sync_vcpu_all(v);
     }
 
     if ( !v->arch.hvm_vmx.launched && vcpu_vlapic(v)->mmap_vtpr_enabled )
@@ -716,6 +729,8 @@ void vmcs_dump_vcpu(void)
       (uint32_t)__vmread(SECONDARY_VM_EXEC_CONTROL));
     printk("EPT pointer = 0x%08x%08x\n",
       (uint32_t)__vmread(EPT_POINTER_HIGH), (uint32_t)__vmread(EPT_POINTER));
+    printk("virtual processor ID = 0x%04x\n",
+      (uint32_t)__vmread(VIRTUAL_PROCESSOR_ID));
 }
 
 
diff --git a/arch/x86/hvm/vmx/vmx.c b/arch/x86/hvm/vmx/vmx.c
index c131d52..51ec1f8 100644
--- a/arch/x86/hvm/vmx/vmx.c
+++ b/arch/x86/hvm/vmx/vmx.c
@@ -56,6 +56,18 @@ char *vmx_msr_bitmap;
 
 static void vmx_ctxt_switch_from(struct vcpu *v);
 static void vmx_ctxt_switch_to(struct vcpu *v);
+static int  vmx_alloc_vpid(struct domain *d);
+static void vmx_free_vpid(struct domain *d);
+
+static int vmx_domain_initialise(struct domain *d)
+{
+    return vmx_alloc_vpid(d);
+}
+
+static void vmx_domain_destroy(struct domain *d)
+{
+    vmx_free_vpid(d);
+}
 
 static int vmx_vcpu_initialise(struct vcpu *v)
 {
@@ -704,6 +716,7 @@ static void vmx_update_guest_cr(struct vcpu *v, unsigned int cr)
         }
         else
             __vmwrite(GUEST_CR3, HVM_IDENT_PT_PAGE);
+        vpid_sync_vcpu_all(v);
         break;
 
     case 4:
@@ -1272,14 +1285,19 @@ static void vmx_update_guest_cr3(struct vcpu *v)
     ASSERT( (v == current) || !vcpu_runnable(v) );
     vmx_vmcs_enter(v);
     __vmwrite(GUEST_CR3, v->arch.hvm_vcpu.hw_cr3);
+    vpid_sync_vcpu_all(v);
     vmx_vmcs_exit(v);
 }
 
 static void vmx_flush_guest_tlbs(void)
 {
-    /* No tagged TLB support on VMX yet.  The fact that we're in Xen
-     * at all means any guest will have a clean TLB when it's next run,
-     * because VMRESUME will flush it for us. */
+    /* If VPID (i.e. tagged TLB support) is not enabled, the fact that
+     * we're in Xen at all means any guest will have a clean TLB when
+     * it's next run, because VMRESUME will flush it for us. 
+     * 
+     * If enabled, we invalidate all translations associated with all
+     * VPID values */
+    vpid_sync_all();
 }
 
 static void vmx_inject_exception(
@@ -1329,6 +1347,8 @@ static void disable_intercept_for_msr(u32 msr)
 static struct hvm_function_table vmx_function_table = {
     .name                 = "VMX",
     .disable              = stop_vmx,
+    .domain_initialise    = vmx_domain_initialise,
+    .domain_destroy       = vmx_domain_destroy,
     .vcpu_initialise      = vmx_vcpu_initialise,
     .vcpu_destroy         = vmx_vcpu_destroy,
     .store_cpu_guest_regs = vmx_store_cpu_guest_regs,
@@ -1357,6 +1377,9 @@ static struct hvm_function_table vmx_function_table = {
     .update_guest_cr      = vmx_update_guest_cr
 };
 
+static unsigned long *vpid_bitmap;
+#define VPID_BITMAP_SIZE ((1u << VMCS_VPID_WIDTH) / MAX_VIRT_CPUS)
+
 int start_vmx(void)
 {
     u32 eax, edx;
@@ -1413,6 +1436,21 @@ int start_vmx(void)
 
     ept_sync_all();
 
+    vpid_sync_all();
+
+    if ( cpu_has_vmx_vpid )
+    {
+        printk("VMX: VPID is available.\n");
+
+        vpid_bitmap = xmalloc_array(
+          unsigned long, BITS_TO_LONGS(VPID_BITMAP_SIZE));
+        BUG_ON(vpid_bitmap == NULL);
+        memset(vpid_bitmap, 0, BITS_TO_LONGS(VPID_BITMAP_SIZE) * sizeof(long));
+
+        /* VPID 0 is used by VMX root mode (the hypervisor). */
+        __set_bit(0, vpid_bitmap);
+    }
+
     vmx_save_host_msrs();
 
     if ( smp_processor_id() != 0 )
@@ -1438,6 +1476,36 @@ int start_vmx(void)
     return 1;
 }
 
+static int vmx_alloc_vpid(struct domain *d)
+{
+    int idx;
+
+    if ( !cpu_has_vmx_vpid )
+        return 0;
+
+    do {
+        idx = find_first_zero_bit(vpid_bitmap, VPID_BITMAP_SIZE);
+        if ( idx >= VPID_BITMAP_SIZE )
+        {
+            dprintk(XENLOG_WARNING, "VMX VPID space exhausted.\n");
+            return -EBUSY;
+        }
+    }
+    while ( test_and_set_bit(idx, vpid_bitmap) );
+
+    d->arch.hvm_domain.vmx_vpid_base = idx * MAX_VIRT_CPUS;
+    return 0;
+}
+
+static void vmx_free_vpid(struct domain *d)
+{
+    if ( !cpu_has_vmx_vpid )
+        return;
+
+    clear_bit(d->arch.hvm_domain.vmx_vpid_base / MAX_VIRT_CPUS, vpid_bitmap);
+}
+
+
 /*
  * Not all cases receive valid value in the VM-exit instruction length field.
  * Callers must know what they're doing!
@@ -1610,7 +1678,8 @@ static void vmx_do_invlpg(unsigned long va)
      * We do the safest things first, then try to update the shadow
      * copying from guest
      */
-    paging_invlpg(v, va);
+    if ( paging_invlpg(v, va) )
+        vpid_sync_vcpu_gva(v, va);
 }
 
 /*
diff --git a/arch/x86/traps.c b/arch/x86/traps.c
index 1f8d269..8e72af7 100644
--- a/arch/x86/traps.c
+++ b/arch/x86/traps.c
@@ -643,6 +643,7 @@ asmlinkage int do_invalid_op(struct cpu_user_regs *regs)
     struct bug_frame bug;
     struct bug_frame_str bug_str;
     char *filename, *predicate, *eip = (char *)regs->eip;
+    unsigned long fixup;
     int rc, id, lineno;
 
     DEBUGGER_trap_entry(TRAP_invalid_op, regs);
@@ -713,6 +714,11 @@ asmlinkage int do_invalid_op(struct cpu_user_regs *regs)
           predicate, filename, lineno);
 
  die:
+    if ( (fixup = search_exception_table(regs->eip)) != 0 )
+    {
+        regs->eip = fixup;
+        return 0;
+    }
     DEBUGGER_trap_fatal(TRAP_invalid_op, regs);
     show_execution_state(regs);
     panic("FATAL TRAP: vector = %d (invalid opcode)\n", TRAP_invalid_op);
diff --git a/include/asm-x86/hvm/domain.h b/include/asm-x86/hvm/domain.h
index 88cc1cd..f6c4c61 100644
--- a/include/asm-x86/hvm/domain.h
+++ b/include/asm-x86/hvm/domain.h
@@ -69,6 +69,8 @@ struct hvm_domain {
 #if CONFIG_PAGING_LEVELS == 3
     bool_t                 amd_npt_4gb_warning;
 #endif
+
+    unsigned long           vmx_vpid_base;
 };
 
 #endif /* __ASM_X86_HVM_DOMAIN_H__ */
diff --git a/include/asm-x86/hvm/hvm.h b/include/asm-x86/hvm/hvm.h
index 21f7afd..9089965 100644
--- a/include/asm-x86/hvm/hvm.h
+++ b/include/asm-x86/hvm/hvm.h
@@ -72,8 +72,10 @@ struct hvm_function_table {
     void (*disable)(void);
 
     /*
-     * Initialise/destroy HVM VCPU resources
+     * Initialise/destroy HVM domain/vcpu resources
      */
+    int  (*domain_initialise)(struct domain *d);
+    void (*domain_destroy)(struct domain *d);
     int  (*vcpu_initialise)(struct vcpu *v);
     void (*vcpu_destroy)(struct vcpu *v);
 
diff --git a/include/asm-x86/hvm/vmx/vmcs.h b/include/asm-x86/hvm/vmx/vmcs.h
index 355523b..ade7a5a 100644
--- a/include/asm-x86/hvm/vmx/vmcs.h
+++ b/include/asm-x86/hvm/vmx/vmcs.h
@@ -79,6 +79,8 @@ struct arch_vmx_struct {
     u32                  exec_control;
     u32                  secondary_exec_control;
 
+    u16                  vpid;
+
     /* If there is vector installed in the INTR_INFO_FIELD. */
     u32                  vector_injected;
 
@@ -146,6 +148,8 @@ extern u32 vmx_vmentry_control;
 
 #define SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES 0x00000001
 #define SECONDARY_EXEC_ENABLE_EPT               0x00000002
+#define SECONDARY_EXEC_ENABLE_VPID              0x00000020
+
 extern u32 vmx_secondary_exec_control;
 
 #define cpu_has_vmx_virtualize_apic_accesses \
@@ -162,6 +166,9 @@ extern u32 vmx_secondary_exec_control;
 #define cpu_has_vmx_ept \
     (vmx_secondary_exec_control & SECONDARY_EXEC_ENABLE_EPT)
 
+#define cpu_has_vmx_vpid \
+    (vmx_secondary_exec_control & SECONDARY_EXEC_ENABLE_VPID)
+
 extern char *vmx_msr_bitmap;
 
 /* GUEST_INTERRUPTIBILITY_INFO flags. */
@@ -172,6 +179,7 @@ extern char *vmx_msr_bitmap;
 
 /* VMCS field encodings. */
 enum vmcs_field {
+    VIRTUAL_PROCESSOR_ID            = 0x00000000,
     GUEST_ES_SELECTOR               = 0x00000800,
     GUEST_CS_SELECTOR               = 0x00000802,
     GUEST_SS_SELECTOR               = 0x00000804,
@@ -311,6 +319,8 @@ enum vmcs_field {
     HOST_RIP                        = 0x00006c16,
 };
 
+#define VMCS_VPID_WIDTH     (16)
+
 #endif /* ASM_X86_HVM_VMX_VMCS_H__ */
 
 /*
diff --git a/include/asm-x86/hvm/vmx/vmx.h b/include/asm-x86/hvm/vmx/vmx.h
index 642f5a9..aa30520 100644
--- a/include/asm-x86/hvm/vmx/vmx.h
+++ b/include/asm-x86/hvm/vmx/vmx.h
@@ -178,6 +178,7 @@ extern struct page_info *change_guest_physmap_for_vtpr(struct domain *d,
 #define VMRESUME_OPCODE ".byte 0x0f,0x01,0xc3\n"
 #define VMWRITE_OPCODE  ".byte 0x0f,0x79\n"
 #define INVEPT_OPCODE   ".byte 0x66,0x0f,0x38,0x80\n"   /* m128,r64/32 */
+#define INVVPID_OPCODE  ".byte 0x66,0x0f,0x38,0x81\n"   /* m128,r64/32 */
 #define VMXOFF_OPCODE   ".byte 0x0f,0x01,0xc4\n"
 #define VMXON_OPCODE    ".byte 0xf3,0x0f,0xc7\n"
 
@@ -283,6 +284,27 @@ static inline void __invept(int ext, u64 eptp, u64 gpa)
                            : "memory");
 }
 
+static inline void __invvpid(int ext, u16 vpid, u64 gva)
+{
+    struct {
+        u64 vpid:16;
+        u64 rsvd:48;
+        u64 gva;
+    } __attribute__ ((packed)) operand = {vpid, 0, gva};
+
+    /* Fix up #UD exceptions which occur when TLBs are flushed before VMXON. */
+    asm volatile ( "1: " INVVPID_OPCODE MODRM_EAX_08
+                   /* CF==1 or ZF==1 --> crash (ud2) */
+                   "ja 2f ; ud2 ; 2:\n"
+                   ".section __ex_table,\"a\"\n"
+                   "    "__FIXUP_ALIGN"\n"
+                   "    "__FIXUP_WORD" 1b,2b\n"
+                   ".previous"
+                   :
+                   : "a" (&operand), "c" (ext)
+                   : "memory");
+}
+
 static inline void __vmxoff (void)
 {
     __asm__ __volatile__ ( VMXOFF_OPCODE
@@ -314,6 +336,25 @@ static inline void ept_sync_all(void)
 
 void ept_sync_domain(struct domain *d);
 
+static inline void vpid_sync_vcpu_gva(struct vcpu *v, unsigned long gva)
+{
+    if ( cpu_has_vmx_vpid )
+        __invvpid(0, v->arch.hvm_vmx.vpid, (u64)gva);
+}
+
+static inline void vpid_sync_vcpu_all(struct vcpu *v)
+{
+    if ( cpu_has_vmx_vpid )
+        __invvpid(1, v->arch.hvm_vmx.vpid, 0);
+}
+
+static inline void vpid_sync_all(void)
+{
+    if ( cpu_has_vmx_vpid )
+        __invvpid(2, 0, 0);
+}
+
+
 static inline void __vmx_inject_exception(struct vcpu *v, int trap, int type,
                                          int error_code, int ilen)
 {