From: ddugger@redhat.com <ddugger@redhat.com> Date: Wed, 11 Mar 2009 15:44:17 -0600 Subject: [xen] x86: VPID: implement feature Message-id: 20090311214417.GV10195@sobek.n0ano.com O-Subject: [PATCH 1/2] VPID: implement feature Bugzilla: 464821 RH-Acked-by: Justin M. Forbes <jforbes@redhat.com> This is the code that actually implements the VPID feature Upstream Status: Accepted (CS 17441, 17533, 18585) BZ: 464821 Signed-off-by: Xiaohui Xin <xiaohui.xin@intel.com> Signed-off-by: Edwin Zhai <edwin.zhai@intel.com> Signed-off-by: Don Dugger <donald.d.dugger@intel.com> diff --git a/arch/x86/hvm/hvm.c b/arch/x86/hvm/hvm.c index 208f14b..89e02e9 100644 --- a/arch/x86/hvm/hvm.c +++ b/arch/x86/hvm/hvm.c @@ -256,7 +256,10 @@ int hvm_domain_initialise(struct domain *d) hvm_init_ioreq_page(d, &d->arch.hvm_domain.ioreq); hvm_init_ioreq_page(d, &d->arch.hvm_domain.buf_ioreq); - return 0; + if ( hvm_funcs.domain_initialise ) + rc = hvm_funcs.domain_initialise(d); + + return rc; } void hvm_domain_relinquish_resources(struct domain *d) diff --git a/arch/x86/hvm/vmx/vmcs.c b/arch/x86/hvm/vmx/vmcs.c index 83864af..a241d9f 100644 --- a/arch/x86/hvm/vmx/vmcs.c +++ b/arch/x86/hvm/vmx/vmcs.c @@ -37,6 +37,9 @@ #include <xen/keyhandler.h> #include <asm/shadow.h> +static int opt_vpid_enabled = 1; +boolean_param("vpid", opt_vpid_enabled); + /* Dynamic (run-time adjusted) execution control flags. */ u32 vmx_pin_based_exec_control __read_mostly; u32 vmx_cpu_based_exec_control __read_mostly; @@ -113,7 +116,9 @@ void vmx_init_vmcs_config(void) min = 0; opt = (SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES | - SECONDARY_EXEC_ENABLE_EPT); + SECONDARY_EXEC_ENABLE_EPT); + if ( opt_vpid_enabled ) + opt |= SECONDARY_EXEC_ENABLE_VPID; _vmx_secondary_exec_control = adjust_vmx_controls( min, opt, MSR_IA32_VMX_PROCBASED_CTLS2); } @@ -516,7 +521,7 @@ static void construct_vmcs(struct vcpu *v) cr4 = read_cr4(); if ( paging_mode_hap(v->domain) ) { - hvm_update_guest_cr(v, 3); + hvm_update_guest_cr(v, 0); hvm_update_guest_cr(v, 4); } else @@ -548,6 +553,13 @@ static void construct_vmcs(struct vcpu *v) #endif } + if ( cpu_has_vmx_vpid ) + { + v->arch.hvm_vmx.vpid = v->vcpu_id + + v->domain->arch.hvm_domain.vmx_vpid_base; + __vmwrite(VIRTUAL_PROCESSOR_ID, v->arch.hvm_vmx.vpid); + } + /* Memory-mapped based VLAPIC TPR optimization. */ if ( cpu_has_vmx_mmap_vtpr_optimization ) { @@ -631,6 +643,7 @@ void vmx_do_resume(struct vcpu *v) vmx_load_vmcs(v); hvm_migrate_timers(v); vmx_set_host_env(v); + vpid_sync_vcpu_all(v); } if ( !v->arch.hvm_vmx.launched && vcpu_vlapic(v)->mmap_vtpr_enabled ) @@ -716,6 +729,8 @@ void vmcs_dump_vcpu(void) (uint32_t)__vmread(SECONDARY_VM_EXEC_CONTROL)); printk("EPT pointer = 0x%08x%08x\n", (uint32_t)__vmread(EPT_POINTER_HIGH), (uint32_t)__vmread(EPT_POINTER)); + printk("virtual processor ID = 0x%04x\n", + (uint32_t)__vmread(VIRTUAL_PROCESSOR_ID)); } diff --git a/arch/x86/hvm/vmx/vmx.c b/arch/x86/hvm/vmx/vmx.c index c131d52..51ec1f8 100644 --- a/arch/x86/hvm/vmx/vmx.c +++ b/arch/x86/hvm/vmx/vmx.c @@ -56,6 +56,18 @@ char *vmx_msr_bitmap; static void vmx_ctxt_switch_from(struct vcpu *v); static void vmx_ctxt_switch_to(struct vcpu *v); +static int vmx_alloc_vpid(struct domain *d); +static void vmx_free_vpid(struct domain *d); + +static int vmx_domain_initialise(struct domain *d) +{ + return vmx_alloc_vpid(d); +} + +static void vmx_domain_destroy(struct domain *d) +{ + vmx_free_vpid(d); +} static int vmx_vcpu_initialise(struct vcpu *v) { @@ -704,6 +716,7 @@ static void vmx_update_guest_cr(struct vcpu *v, unsigned int cr) } else __vmwrite(GUEST_CR3, HVM_IDENT_PT_PAGE); + vpid_sync_vcpu_all(v); break; case 4: @@ -1272,14 +1285,19 @@ static void vmx_update_guest_cr3(struct vcpu *v) ASSERT( (v == current) || !vcpu_runnable(v) ); vmx_vmcs_enter(v); __vmwrite(GUEST_CR3, v->arch.hvm_vcpu.hw_cr3); + vpid_sync_vcpu_all(v); vmx_vmcs_exit(v); } static void vmx_flush_guest_tlbs(void) { - /* No tagged TLB support on VMX yet. The fact that we're in Xen - * at all means any guest will have a clean TLB when it's next run, - * because VMRESUME will flush it for us. */ + /* If VPID (i.e. tagged TLB support) is not enabled, the fact that + * we're in Xen at all means any guest will have a clean TLB when + * it's next run, because VMRESUME will flush it for us. + * + * If enabled, we invalidate all translations associated with all + * VPID values */ + vpid_sync_all(); } static void vmx_inject_exception( @@ -1329,6 +1347,8 @@ static void disable_intercept_for_msr(u32 msr) static struct hvm_function_table vmx_function_table = { .name = "VMX", .disable = stop_vmx, + .domain_initialise = vmx_domain_initialise, + .domain_destroy = vmx_domain_destroy, .vcpu_initialise = vmx_vcpu_initialise, .vcpu_destroy = vmx_vcpu_destroy, .store_cpu_guest_regs = vmx_store_cpu_guest_regs, @@ -1357,6 +1377,9 @@ static struct hvm_function_table vmx_function_table = { .update_guest_cr = vmx_update_guest_cr }; +static unsigned long *vpid_bitmap; +#define VPID_BITMAP_SIZE ((1u << VMCS_VPID_WIDTH) / MAX_VIRT_CPUS) + int start_vmx(void) { u32 eax, edx; @@ -1413,6 +1436,21 @@ int start_vmx(void) ept_sync_all(); + vpid_sync_all(); + + if ( cpu_has_vmx_vpid ) + { + printk("VMX: VPID is available.\n"); + + vpid_bitmap = xmalloc_array( + unsigned long, BITS_TO_LONGS(VPID_BITMAP_SIZE)); + BUG_ON(vpid_bitmap == NULL); + memset(vpid_bitmap, 0, BITS_TO_LONGS(VPID_BITMAP_SIZE) * sizeof(long)); + + /* VPID 0 is used by VMX root mode (the hypervisor). */ + __set_bit(0, vpid_bitmap); + } + vmx_save_host_msrs(); if ( smp_processor_id() != 0 ) @@ -1438,6 +1476,36 @@ int start_vmx(void) return 1; } +static int vmx_alloc_vpid(struct domain *d) +{ + int idx; + + if ( !cpu_has_vmx_vpid ) + return 0; + + do { + idx = find_first_zero_bit(vpid_bitmap, VPID_BITMAP_SIZE); + if ( idx >= VPID_BITMAP_SIZE ) + { + dprintk(XENLOG_WARNING, "VMX VPID space exhausted.\n"); + return -EBUSY; + } + } + while ( test_and_set_bit(idx, vpid_bitmap) ); + + d->arch.hvm_domain.vmx_vpid_base = idx * MAX_VIRT_CPUS; + return 0; +} + +static void vmx_free_vpid(struct domain *d) +{ + if ( !cpu_has_vmx_vpid ) + return; + + clear_bit(d->arch.hvm_domain.vmx_vpid_base / MAX_VIRT_CPUS, vpid_bitmap); +} + + /* * Not all cases receive valid value in the VM-exit instruction length field. * Callers must know what they're doing! @@ -1610,7 +1678,8 @@ static void vmx_do_invlpg(unsigned long va) * We do the safest things first, then try to update the shadow * copying from guest */ - paging_invlpg(v, va); + if ( paging_invlpg(v, va) ) + vpid_sync_vcpu_gva(v, va); } /* diff --git a/arch/x86/traps.c b/arch/x86/traps.c index 1f8d269..8e72af7 100644 --- a/arch/x86/traps.c +++ b/arch/x86/traps.c @@ -643,6 +643,7 @@ asmlinkage int do_invalid_op(struct cpu_user_regs *regs) struct bug_frame bug; struct bug_frame_str bug_str; char *filename, *predicate, *eip = (char *)regs->eip; + unsigned long fixup; int rc, id, lineno; DEBUGGER_trap_entry(TRAP_invalid_op, regs); @@ -713,6 +714,11 @@ asmlinkage int do_invalid_op(struct cpu_user_regs *regs) predicate, filename, lineno); die: + if ( (fixup = search_exception_table(regs->eip)) != 0 ) + { + regs->eip = fixup; + return 0; + } DEBUGGER_trap_fatal(TRAP_invalid_op, regs); show_execution_state(regs); panic("FATAL TRAP: vector = %d (invalid opcode)\n", TRAP_invalid_op); diff --git a/include/asm-x86/hvm/domain.h b/include/asm-x86/hvm/domain.h index 88cc1cd..f6c4c61 100644 --- a/include/asm-x86/hvm/domain.h +++ b/include/asm-x86/hvm/domain.h @@ -69,6 +69,8 @@ struct hvm_domain { #if CONFIG_PAGING_LEVELS == 3 bool_t amd_npt_4gb_warning; #endif + + unsigned long vmx_vpid_base; }; #endif /* __ASM_X86_HVM_DOMAIN_H__ */ diff --git a/include/asm-x86/hvm/hvm.h b/include/asm-x86/hvm/hvm.h index 21f7afd..9089965 100644 --- a/include/asm-x86/hvm/hvm.h +++ b/include/asm-x86/hvm/hvm.h @@ -72,8 +72,10 @@ struct hvm_function_table { void (*disable)(void); /* - * Initialise/destroy HVM VCPU resources + * Initialise/destroy HVM domain/vcpu resources */ + int (*domain_initialise)(struct domain *d); + void (*domain_destroy)(struct domain *d); int (*vcpu_initialise)(struct vcpu *v); void (*vcpu_destroy)(struct vcpu *v); diff --git a/include/asm-x86/hvm/vmx/vmcs.h b/include/asm-x86/hvm/vmx/vmcs.h index 355523b..ade7a5a 100644 --- a/include/asm-x86/hvm/vmx/vmcs.h +++ b/include/asm-x86/hvm/vmx/vmcs.h @@ -79,6 +79,8 @@ struct arch_vmx_struct { u32 exec_control; u32 secondary_exec_control; + u16 vpid; + /* If there is vector installed in the INTR_INFO_FIELD. */ u32 vector_injected; @@ -146,6 +148,8 @@ extern u32 vmx_vmentry_control; #define SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES 0x00000001 #define SECONDARY_EXEC_ENABLE_EPT 0x00000002 +#define SECONDARY_EXEC_ENABLE_VPID 0x00000020 + extern u32 vmx_secondary_exec_control; #define cpu_has_vmx_virtualize_apic_accesses \ @@ -162,6 +166,9 @@ extern u32 vmx_secondary_exec_control; #define cpu_has_vmx_ept \ (vmx_secondary_exec_control & SECONDARY_EXEC_ENABLE_EPT) +#define cpu_has_vmx_vpid \ + (vmx_secondary_exec_control & SECONDARY_EXEC_ENABLE_VPID) + extern char *vmx_msr_bitmap; /* GUEST_INTERRUPTIBILITY_INFO flags. */ @@ -172,6 +179,7 @@ extern char *vmx_msr_bitmap; /* VMCS field encodings. */ enum vmcs_field { + VIRTUAL_PROCESSOR_ID = 0x00000000, GUEST_ES_SELECTOR = 0x00000800, GUEST_CS_SELECTOR = 0x00000802, GUEST_SS_SELECTOR = 0x00000804, @@ -311,6 +319,8 @@ enum vmcs_field { HOST_RIP = 0x00006c16, }; +#define VMCS_VPID_WIDTH (16) + #endif /* ASM_X86_HVM_VMX_VMCS_H__ */ /* diff --git a/include/asm-x86/hvm/vmx/vmx.h b/include/asm-x86/hvm/vmx/vmx.h index 642f5a9..aa30520 100644 --- a/include/asm-x86/hvm/vmx/vmx.h +++ b/include/asm-x86/hvm/vmx/vmx.h @@ -178,6 +178,7 @@ extern struct page_info *change_guest_physmap_for_vtpr(struct domain *d, #define VMRESUME_OPCODE ".byte 0x0f,0x01,0xc3\n" #define VMWRITE_OPCODE ".byte 0x0f,0x79\n" #define INVEPT_OPCODE ".byte 0x66,0x0f,0x38,0x80\n" /* m128,r64/32 */ +#define INVVPID_OPCODE ".byte 0x66,0x0f,0x38,0x81\n" /* m128,r64/32 */ #define VMXOFF_OPCODE ".byte 0x0f,0x01,0xc4\n" #define VMXON_OPCODE ".byte 0xf3,0x0f,0xc7\n" @@ -283,6 +284,27 @@ static inline void __invept(int ext, u64 eptp, u64 gpa) : "memory"); } +static inline void __invvpid(int ext, u16 vpid, u64 gva) +{ + struct { + u64 vpid:16; + u64 rsvd:48; + u64 gva; + } __attribute__ ((packed)) operand = {vpid, 0, gva}; + + /* Fix up #UD exceptions which occur when TLBs are flushed before VMXON. */ + asm volatile ( "1: " INVVPID_OPCODE MODRM_EAX_08 + /* CF==1 or ZF==1 --> crash (ud2) */ + "ja 2f ; ud2 ; 2:\n" + ".section __ex_table,\"a\"\n" + " "__FIXUP_ALIGN"\n" + " "__FIXUP_WORD" 1b,2b\n" + ".previous" + : + : "a" (&operand), "c" (ext) + : "memory"); +} + static inline void __vmxoff (void) { __asm__ __volatile__ ( VMXOFF_OPCODE @@ -314,6 +336,25 @@ static inline void ept_sync_all(void) void ept_sync_domain(struct domain *d); +static inline void vpid_sync_vcpu_gva(struct vcpu *v, unsigned long gva) +{ + if ( cpu_has_vmx_vpid ) + __invvpid(0, v->arch.hvm_vmx.vpid, (u64)gva); +} + +static inline void vpid_sync_vcpu_all(struct vcpu *v) +{ + if ( cpu_has_vmx_vpid ) + __invvpid(1, v->arch.hvm_vmx.vpid, 0); +} + +static inline void vpid_sync_all(void) +{ + if ( cpu_has_vmx_vpid ) + __invvpid(2, 0, 0); +} + + static inline void __vmx_inject_exception(struct vcpu *v, int trap, int type, int error_code, int ilen) {