Sophie

Sophie

distrib > Scientific%20Linux > 5x > x86_64 > by-pkgid > 340e01248478ba8b78a6d4d1809b1eff > files > 401

kvm-83-270.el5_11.src.rpm

From 7de347a5476d17193727409c8d09b8cc733f2aeb Mon Sep 17 00:00:00 2001
From: Andrea Arcangeli <aarcange@redhat.com>
Date: Tue, 20 Jan 2009 22:32:48 +0100
Subject: [PATCH 1/9] kvm handlers for ksm

Implements kvm methods invoked by ksm.

Signed-off-by: Izik Eidus <ieidus@redhat.com>
Signed-off-by: Andrea Arcangeli <aarcange@redhat.com>
RH-Type: improvement(ksm)
RH-Upstream-status: pending
---
 arch/x86/include/asm/kvm_host.h |    2 +
 arch/x86/kvm/mmu.c              |   84 +++++++++++++++++++++++++++++++++++---
 arch/x86/kvm/paging_tmpl.h      |    2 +
 arch/x86/kvm/x86.c              |   82 ++++++++++++++++++++++++++++++++++++++
 include/linux/kvm_host.h        |    1 +
 virt/kvm/kvm_main.c             |   14 ++++++
 6 files changed, 178 insertions(+), 7 deletions(-)

diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 730843d..fb6a673 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -758,5 +758,7 @@ asmlinkage void kvm_handle_fault_on_reboot(void);
 #define KVM_ARCH_WANT_MMU_NOTIFIER
 int kvm_unmap_hva(struct kvm *kvm, unsigned long hva);
 int kvm_age_hva(struct kvm *kvm, unsigned long hva);
+void kvm_ksm_set_spte_hva(struct kvm *kvm, unsigned long hva, pte_t pte);
+int kvm_ksm_spte_count_hva(struct kvm *kvm, unsigned long hva);
 
 #endif /* _ASM_X86_KVM_HOST_H */
diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index 83f11c7..2f2b253 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -670,23 +670,81 @@ static int rmap_write_protect(struct kvm *kvm, u64 gfn)
 	return write_protected;
 }
 
-static int kvm_unmap_rmapp(struct kvm *kvm, unsigned long *rmapp)
+static int kvm_ksm_set_pte_rmapp(struct kvm *kvm, unsigned long *rmapp,
+				 void *param)
+{
+	u64 *spte, new_spte;
+	pte_t *ptep = (pte_t *)param;
+	pte_t pte;
+	struct page *new_page;
+	struct page *old_page;
+
+	pte = *ptep;
+	new_page = pfn_to_page(pte_pfn(pte));
+	spte = rmap_next(kvm, rmapp, NULL);
+	while (spte) {
+		BUG_ON(!(*spte & PT_PRESENT_MASK));
+		rmap_printk("kvm_set_pte_rmapp: spte %p %llx\n", spte, *spte);
+		new_spte = *spte & ~(PT64_BASE_ADDR_MASK);
+		new_spte |= pte_pfn(pte) << PAGE_SHIFT;
+		if (!pte_write(pte))
+			new_spte &= ~PT_WRITABLE_MASK;
+		old_page = pfn_to_page((*spte & PT64_BASE_ADDR_MASK) >> PAGE_SHIFT);
+		get_page(new_page);
+		set_shadow_pte(spte, new_spte);
+		kvm_flush_remote_tlbs(kvm);
+		put_page(old_page);
+
+		spte = rmap_next(kvm, rmapp, spte);
+	}
+	return 0;
+}
+
+static int kvm_ksm_spte_count_rmapp(struct kvm *kvm, unsigned long *rmapp,
+				    void *param)
+{
+	u64 *spte;
+	int count = 0;
+
+	spte = rmap_next(kvm, rmapp, NULL);
+	while (spte) {
+		count++;
+		spte = rmap_next(kvm, rmapp, spte);
+	}
+
+	return count;
+}
+
+static int kvm_unmap_rmapp(struct kvm *kvm, unsigned long *rmapp, void *param)
 {
 	u64 *spte;
 	int need_tlb_flush = 0;
 
 	while ((spte = rmap_next(kvm, rmapp, NULL))) {
+#ifdef CONFIG_MMU_NOTIFIER
 		BUG_ON(!(*spte & PT_PRESENT_MASK));
 		rmap_printk("kvm_rmap_unmap_hva: spte %p %llx\n", spte, *spte);
 		rmap_remove(kvm, spte);
 		set_shadow_pte(spte, shadow_trap_nonpresent_pte);
 		need_tlb_flush = 1;
+#else
+		struct page *page;
+		BUG_ON(!(*spte & PT_PRESENT_MASK));
+		page = pfn_to_page((*spte & PT64_BASE_ADDR_MASK) >> PAGE_SHIFT);
+		get_page(page);
+		rmap_remove(kvm, spte);
+		set_shadow_pte(spte, shadow_trap_nonpresent_pte);
+		kvm_flush_remote_tlbs(kvm);
+		put_page(page);
+#endif
 	}
 	return need_tlb_flush;
 }
 
 static int kvm_handle_hva(struct kvm *kvm, unsigned long hva,
-			  int (*handler)(struct kvm *kvm, unsigned long *rmapp))
+			  int (*handler)(struct kvm *kvm, unsigned long *rmapp,
+					 void *param),
+			  void *param)
 {
 	int i;
 	int retval = 0;
@@ -707,11 +765,13 @@ static int kvm_handle_hva(struct kvm *kvm, unsigned long hva,
 		end = start + (memslot->npages << PAGE_SHIFT);
 		if (hva >= start && hva < end) {
 			gfn_t gfn_offset = (hva - start) >> PAGE_SHIFT;
-			retval |= handler(kvm, &memslot->rmap[gfn_offset]);
+			retval |= handler(kvm, &memslot->rmap[gfn_offset],
+					  param);
 			retval |= handler(kvm,
 					  &memslot->lpage_info[
 						  gfn_offset /
-						  KVM_PAGES_PER_HPAGE].rmap_pde);
+						  KVM_PAGES_PER_HPAGE].rmap_pde,
+					  param);
 		}
 	}
 
@@ -720,10 +780,20 @@ static int kvm_handle_hva(struct kvm *kvm, unsigned long hva,
 
 int kvm_unmap_hva(struct kvm *kvm, unsigned long hva)
 {
-	return kvm_handle_hva(kvm, hva, kvm_unmap_rmapp);
+	return kvm_handle_hva(kvm, hva, kvm_unmap_rmapp, NULL);
+}
+
+void kvm_ksm_set_spte_hva(struct kvm *kvm, unsigned long hva, pte_t pte)
+{
+	kvm_handle_hva(kvm, hva, kvm_ksm_set_pte_rmapp, &pte);
+}
+
+int kvm_ksm_spte_count_hva(struct kvm *kvm, unsigned long hva)
+{
+	return kvm_handle_hva(kvm, hva, kvm_ksm_spte_count_rmapp, NULL);
 }
 
-static int kvm_age_rmapp(struct kvm *kvm, unsigned long *rmapp)
+static int kvm_age_rmapp(struct kvm *kvm, unsigned long *rmapp, void *param)
 {
 	u64 *spte;
 	int young = 0;
@@ -749,7 +819,7 @@ static int kvm_age_rmapp(struct kvm *kvm, unsigned long *rmapp)
 
 int kvm_age_hva(struct kvm *kvm, unsigned long hva)
 {
-	return kvm_handle_hva(kvm, hva, kvm_age_rmapp);
+	return kvm_handle_hva(kvm, hva, kvm_age_rmapp, NULL);
 }
 
 #ifdef MMU_DEBUG
diff --git a/arch/x86/kvm/paging_tmpl.h b/arch/x86/kvm/paging_tmpl.h
index 9fd78b6..7ad38f3 100644
--- a/arch/x86/kvm/paging_tmpl.h
+++ b/arch/x86/kvm/paging_tmpl.h
@@ -609,6 +609,8 @@ static int FNAME(sync_page)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp)
 
 		nr_present++;
 		pte_access = sp->role.access & FNAME(gpte_access)(vcpu, gpte);
+		if (!PageAnon(pfn_to_page(spte_to_pfn(sp->spt[i]))))
+			pte_access &= ~PT_WRITABLE_MASK;
 		set_spte(vcpu, &sp->spt[i], pte_access, 0, 0,
 			 is_dirty_pte(gpte), 0, gpte & PT_GLOBAL_MASK, gfn,
 			 spte_to_pfn(sp->spt[i]), true, false);
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index cc17546..ddd1036 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -4084,6 +4084,88 @@ void kvm_arch_vcpu_uninit(struct kvm_vcpu *vcpu)
 	free_page((unsigned long)vcpu->arch.pio_data);
 }
 
+/*
+ * This must update all sptes corresponding to the gfn at address (mm,
+ * address) gfn to point to the new pfn mapped.
+ */
+void kvm_ksm_set_pte(struct mm_struct *mm,
+		     unsigned long address,
+		     pte_t pte)
+{
+	struct kvm *kvm;
+
+	spin_lock(&kvm_lock);
+	kvm = mm_to_kvm(mm);
+	if (kvm) {
+		kvm_get_kvm(kvm);
+		spin_unlock(&kvm_lock);
+
+		spin_lock(&kvm->mmu_lock);
+		kvm->mmu_notifier_seq++;
+		kvm_ksm_set_spte_hva(kvm, address, pte);
+		spin_unlock(&kvm->mmu_lock);
+
+		kvm_put_kvm(kvm);
+	} else
+		spin_unlock(&kvm_lock);
+}
+EXPORT_SYMBOL_GPL(kvm_ksm_set_pte);
+
+void kvm_ksm_invalidate_page(struct mm_struct *mm,
+			     unsigned long address)
+{
+	struct kvm *kvm;
+	int tlb_flush = 0;
+
+	spin_lock(&kvm_lock);
+	kvm = mm_to_kvm(mm);
+	if (kvm) {
+		kvm_get_kvm(kvm);
+		spin_unlock(&kvm_lock);
+
+		spin_lock(&kvm->mmu_lock);
+		kvm->mmu_notifier_seq++;
+		tlb_flush = kvm_unmap_hva(kvm, address);
+		spin_unlock(&kvm->mmu_lock);
+		if (tlb_flush)
+			kvm_flush_remote_tlbs(kvm);
+		kvm_put_kvm(kvm);
+	} else
+		spin_unlock(&kvm_lock);
+}
+
+unsigned long kvm_ksm_spte_count(struct mm_struct *mm,
+				 unsigned long address)
+{
+	struct kvm *kvm;
+	unsigned long count = 0;
+
+	spin_lock(&kvm_lock);
+	kvm = mm_to_kvm(mm);
+	if (kvm) {
+		kvm_get_kvm(kvm);
+		spin_unlock(&kvm_lock);
+
+		spin_lock(&kvm->mmu_lock);
+		kvm->mmu_notifier_seq++;
+		count = kvm_ksm_spte_count_hva(kvm, address);
+		spin_unlock(&kvm->mmu_lock);
+		kvm_put_kvm(kvm);
+	} else
+		spin_unlock(&kvm_lock);
+
+	return count;
+}
+EXPORT_SYMBOL_GPL(kvm_ksm_spte_count);
+
+/* slots lock cannot be taken here, depends on all KSM ptes being readonly */
+void kvm_wp_notifier(struct mm_struct *mm,
+		     unsigned long address)
+{
+	kvm_ksm_invalidate_page(mm, address);
+}
+EXPORT_SYMBOL_GPL(kvm_wp_notifier);
+
 struct  kvm *kvm_arch_create_vm(void)
 {
 	struct kvm *kvm = kzalloc(sizeof(struct kvm), GFP_KERNEL);
diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index ec49d0b..418cdb2 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -183,6 +183,7 @@ void kvm_arch_flush_shadow(struct kvm *kvm);
 gfn_t unalias_gfn(struct kvm *kvm, gfn_t gfn);
 struct page *gfn_to_page(struct kvm *kvm, gfn_t gfn);
 unsigned long gfn_to_hva(struct kvm *kvm, gfn_t gfn);
+struct kvm *mm_to_kvm(struct mm_struct *mm);
 void kvm_release_page_clean(struct page *page);
 void kvm_release_page_dirty(struct page *page);
 void kvm_set_page_dirty(struct page *page);
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index 3a5a082..27ea9ee 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -1265,6 +1265,20 @@ struct page *gfn_to_page(struct kvm *kvm, gfn_t gfn)
 
 EXPORT_SYMBOL_GPL(gfn_to_page);
 
+/*
+ * should be called with kvm_lock
+ */
+struct kvm *mm_to_kvm(struct mm_struct *mm)
+{
+	struct kvm *kvm;
+
+	list_for_each_entry(kvm, &vm_list, vm_list)
+		if (kvm->mm == mm)
+			return kvm;
+	return NULL;
+}
+EXPORT_SYMBOL_GPL(mm_to_kvm);
+
 void kvm_release_page_clean(struct page *page)
 {
 	kvm_release_pfn_clean(page_to_pfn(page));
-- 
1.6.1