From 7de347a5476d17193727409c8d09b8cc733f2aeb Mon Sep 17 00:00:00 2001 From: Andrea Arcangeli <aarcange@redhat.com> Date: Tue, 20 Jan 2009 22:32:48 +0100 Subject: [PATCH 1/9] kvm handlers for ksm Implements kvm methods invoked by ksm. Signed-off-by: Izik Eidus <ieidus@redhat.com> Signed-off-by: Andrea Arcangeli <aarcange@redhat.com> RH-Type: improvement(ksm) RH-Upstream-status: pending --- arch/x86/include/asm/kvm_host.h | 2 + arch/x86/kvm/mmu.c | 84 +++++++++++++++++++++++++++++++++++--- arch/x86/kvm/paging_tmpl.h | 2 + arch/x86/kvm/x86.c | 82 ++++++++++++++++++++++++++++++++++++++ include/linux/kvm_host.h | 1 + virt/kvm/kvm_main.c | 14 ++++++ 6 files changed, 178 insertions(+), 7 deletions(-) diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index 730843d..fb6a673 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -758,5 +758,7 @@ asmlinkage void kvm_handle_fault_on_reboot(void); #define KVM_ARCH_WANT_MMU_NOTIFIER int kvm_unmap_hva(struct kvm *kvm, unsigned long hva); int kvm_age_hva(struct kvm *kvm, unsigned long hva); +void kvm_ksm_set_spte_hva(struct kvm *kvm, unsigned long hva, pte_t pte); +int kvm_ksm_spte_count_hva(struct kvm *kvm, unsigned long hva); #endif /* _ASM_X86_KVM_HOST_H */ diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c index 83f11c7..2f2b253 100644 --- a/arch/x86/kvm/mmu.c +++ b/arch/x86/kvm/mmu.c @@ -670,23 +670,81 @@ static int rmap_write_protect(struct kvm *kvm, u64 gfn) return write_protected; } -static int kvm_unmap_rmapp(struct kvm *kvm, unsigned long *rmapp) +static int kvm_ksm_set_pte_rmapp(struct kvm *kvm, unsigned long *rmapp, + void *param) +{ + u64 *spte, new_spte; + pte_t *ptep = (pte_t *)param; + pte_t pte; + struct page *new_page; + struct page *old_page; + + pte = *ptep; + new_page = pfn_to_page(pte_pfn(pte)); + spte = rmap_next(kvm, rmapp, NULL); + while (spte) { + BUG_ON(!(*spte & PT_PRESENT_MASK)); + rmap_printk("kvm_set_pte_rmapp: spte %p %llx\n", spte, *spte); + new_spte = *spte & ~(PT64_BASE_ADDR_MASK); + new_spte |= pte_pfn(pte) << PAGE_SHIFT; + if (!pte_write(pte)) + new_spte &= ~PT_WRITABLE_MASK; + old_page = pfn_to_page((*spte & PT64_BASE_ADDR_MASK) >> PAGE_SHIFT); + get_page(new_page); + set_shadow_pte(spte, new_spte); + kvm_flush_remote_tlbs(kvm); + put_page(old_page); + + spte = rmap_next(kvm, rmapp, spte); + } + return 0; +} + +static int kvm_ksm_spte_count_rmapp(struct kvm *kvm, unsigned long *rmapp, + void *param) +{ + u64 *spte; + int count = 0; + + spte = rmap_next(kvm, rmapp, NULL); + while (spte) { + count++; + spte = rmap_next(kvm, rmapp, spte); + } + + return count; +} + +static int kvm_unmap_rmapp(struct kvm *kvm, unsigned long *rmapp, void *param) { u64 *spte; int need_tlb_flush = 0; while ((spte = rmap_next(kvm, rmapp, NULL))) { +#ifdef CONFIG_MMU_NOTIFIER BUG_ON(!(*spte & PT_PRESENT_MASK)); rmap_printk("kvm_rmap_unmap_hva: spte %p %llx\n", spte, *spte); rmap_remove(kvm, spte); set_shadow_pte(spte, shadow_trap_nonpresent_pte); need_tlb_flush = 1; +#else + struct page *page; + BUG_ON(!(*spte & PT_PRESENT_MASK)); + page = pfn_to_page((*spte & PT64_BASE_ADDR_MASK) >> PAGE_SHIFT); + get_page(page); + rmap_remove(kvm, spte); + set_shadow_pte(spte, shadow_trap_nonpresent_pte); + kvm_flush_remote_tlbs(kvm); + put_page(page); +#endif } return need_tlb_flush; } static int kvm_handle_hva(struct kvm *kvm, unsigned long hva, - int (*handler)(struct kvm *kvm, unsigned long *rmapp)) + int (*handler)(struct kvm *kvm, unsigned long *rmapp, + void *param), + void *param) { int i; int retval = 0; @@ -707,11 +765,13 @@ static int kvm_handle_hva(struct kvm *kvm, unsigned long hva, end = start + (memslot->npages << PAGE_SHIFT); if (hva >= start && hva < end) { gfn_t gfn_offset = (hva - start) >> PAGE_SHIFT; - retval |= handler(kvm, &memslot->rmap[gfn_offset]); + retval |= handler(kvm, &memslot->rmap[gfn_offset], + param); retval |= handler(kvm, &memslot->lpage_info[ gfn_offset / - KVM_PAGES_PER_HPAGE].rmap_pde); + KVM_PAGES_PER_HPAGE].rmap_pde, + param); } } @@ -720,10 +780,20 @@ static int kvm_handle_hva(struct kvm *kvm, unsigned long hva, int kvm_unmap_hva(struct kvm *kvm, unsigned long hva) { - return kvm_handle_hva(kvm, hva, kvm_unmap_rmapp); + return kvm_handle_hva(kvm, hva, kvm_unmap_rmapp, NULL); +} + +void kvm_ksm_set_spte_hva(struct kvm *kvm, unsigned long hva, pte_t pte) +{ + kvm_handle_hva(kvm, hva, kvm_ksm_set_pte_rmapp, &pte); +} + +int kvm_ksm_spte_count_hva(struct kvm *kvm, unsigned long hva) +{ + return kvm_handle_hva(kvm, hva, kvm_ksm_spte_count_rmapp, NULL); } -static int kvm_age_rmapp(struct kvm *kvm, unsigned long *rmapp) +static int kvm_age_rmapp(struct kvm *kvm, unsigned long *rmapp, void *param) { u64 *spte; int young = 0; @@ -749,7 +819,7 @@ static int kvm_age_rmapp(struct kvm *kvm, unsigned long *rmapp) int kvm_age_hva(struct kvm *kvm, unsigned long hva) { - return kvm_handle_hva(kvm, hva, kvm_age_rmapp); + return kvm_handle_hva(kvm, hva, kvm_age_rmapp, NULL); } #ifdef MMU_DEBUG diff --git a/arch/x86/kvm/paging_tmpl.h b/arch/x86/kvm/paging_tmpl.h index 9fd78b6..7ad38f3 100644 --- a/arch/x86/kvm/paging_tmpl.h +++ b/arch/x86/kvm/paging_tmpl.h @@ -609,6 +609,8 @@ static int FNAME(sync_page)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp) nr_present++; pte_access = sp->role.access & FNAME(gpte_access)(vcpu, gpte); + if (!PageAnon(pfn_to_page(spte_to_pfn(sp->spt[i])))) + pte_access &= ~PT_WRITABLE_MASK; set_spte(vcpu, &sp->spt[i], pte_access, 0, 0, is_dirty_pte(gpte), 0, gpte & PT_GLOBAL_MASK, gfn, spte_to_pfn(sp->spt[i]), true, false); diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index cc17546..ddd1036 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -4084,6 +4084,88 @@ void kvm_arch_vcpu_uninit(struct kvm_vcpu *vcpu) free_page((unsigned long)vcpu->arch.pio_data); } +/* + * This must update all sptes corresponding to the gfn at address (mm, + * address) gfn to point to the new pfn mapped. + */ +void kvm_ksm_set_pte(struct mm_struct *mm, + unsigned long address, + pte_t pte) +{ + struct kvm *kvm; + + spin_lock(&kvm_lock); + kvm = mm_to_kvm(mm); + if (kvm) { + kvm_get_kvm(kvm); + spin_unlock(&kvm_lock); + + spin_lock(&kvm->mmu_lock); + kvm->mmu_notifier_seq++; + kvm_ksm_set_spte_hva(kvm, address, pte); + spin_unlock(&kvm->mmu_lock); + + kvm_put_kvm(kvm); + } else + spin_unlock(&kvm_lock); +} +EXPORT_SYMBOL_GPL(kvm_ksm_set_pte); + +void kvm_ksm_invalidate_page(struct mm_struct *mm, + unsigned long address) +{ + struct kvm *kvm; + int tlb_flush = 0; + + spin_lock(&kvm_lock); + kvm = mm_to_kvm(mm); + if (kvm) { + kvm_get_kvm(kvm); + spin_unlock(&kvm_lock); + + spin_lock(&kvm->mmu_lock); + kvm->mmu_notifier_seq++; + tlb_flush = kvm_unmap_hva(kvm, address); + spin_unlock(&kvm->mmu_lock); + if (tlb_flush) + kvm_flush_remote_tlbs(kvm); + kvm_put_kvm(kvm); + } else + spin_unlock(&kvm_lock); +} + +unsigned long kvm_ksm_spte_count(struct mm_struct *mm, + unsigned long address) +{ + struct kvm *kvm; + unsigned long count = 0; + + spin_lock(&kvm_lock); + kvm = mm_to_kvm(mm); + if (kvm) { + kvm_get_kvm(kvm); + spin_unlock(&kvm_lock); + + spin_lock(&kvm->mmu_lock); + kvm->mmu_notifier_seq++; + count = kvm_ksm_spte_count_hva(kvm, address); + spin_unlock(&kvm->mmu_lock); + kvm_put_kvm(kvm); + } else + spin_unlock(&kvm_lock); + + return count; +} +EXPORT_SYMBOL_GPL(kvm_ksm_spte_count); + +/* slots lock cannot be taken here, depends on all KSM ptes being readonly */ +void kvm_wp_notifier(struct mm_struct *mm, + unsigned long address) +{ + kvm_ksm_invalidate_page(mm, address); +} +EXPORT_SYMBOL_GPL(kvm_wp_notifier); + struct kvm *kvm_arch_create_vm(void) { struct kvm *kvm = kzalloc(sizeof(struct kvm), GFP_KERNEL); diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index ec49d0b..418cdb2 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -183,6 +183,7 @@ void kvm_arch_flush_shadow(struct kvm *kvm); gfn_t unalias_gfn(struct kvm *kvm, gfn_t gfn); struct page *gfn_to_page(struct kvm *kvm, gfn_t gfn); unsigned long gfn_to_hva(struct kvm *kvm, gfn_t gfn); +struct kvm *mm_to_kvm(struct mm_struct *mm); void kvm_release_page_clean(struct page *page); void kvm_release_page_dirty(struct page *page); void kvm_set_page_dirty(struct page *page); diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index 3a5a082..27ea9ee 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -1265,6 +1265,20 @@ struct page *gfn_to_page(struct kvm *kvm, gfn_t gfn) EXPORT_SYMBOL_GPL(gfn_to_page); +/* + * should be called with kvm_lock + */ +struct kvm *mm_to_kvm(struct mm_struct *mm) +{ + struct kvm *kvm; + + list_for_each_entry(kvm, &vm_list, vm_list) + if (kvm->mm == mm) + return kvm; + return NULL; +} +EXPORT_SYMBOL_GPL(mm_to_kvm); + void kvm_release_page_clean(struct page *page) { kvm_release_pfn_clean(page_to_pfn(page)); -- 1.6.1