Sophie

Sophie

distrib > Scientific%20Linux > 5x > x86_64 > by-pkgid > 27922b4260f65d317aabda37e42bbbff > files > 2252

kernel-2.6.18-238.el5.src.rpm

From: john cooper <john.cooper@redhat.com>
Date: Sat, 4 Apr 2009 16:29:55 -0400
Subject: [mm] mmu_notifier: kabi workaround support
Message-id: 49D7C343.4060804@redhat.com
O-Subject: [RHEL5.4 PATCH] implement mmu_notifier mechanism V3, [5/5] BZ#485718
Bugzilla: 485718
RH-Acked-by: Rik van Riel <riel@redhat.com>
RH-Acked-by: Andrea Arcangeli <aarcange@redhat.com>

BZ#485718  https://bugzilla.redhat.com/show_bug.cgi?id=485718

--
john.cooper@redhat.com

The original version of the mmu notifier backport patch
requires the addition of a pointer to an mm_struct.  This
causes kABI breakage due to expansion of the structure.
As insufficient unused structure padding is available to
contain the pointer on some target archs, this patch adds
an index scheme which utilizes 16-bits of available,
unused structure pad in an mm_struct.  This patch adapts
the mmu notifier backport to support and use this index
mechanism.

 include/linux/mmu_notifier.h |   16 +++-
 include/linux/sched.h        |    6 -
 mm/Kconfig                   |    5 +
 mm/memory.c                  |    8 +-
 mm/mmu_notifier.c            |  156 ++++++++++++++++++++++++++++++++++++-------
 5 files changed, 157 insertions(+), 34 deletions(-)
=================================================================

diff --git a/include/linux/mmu_notifier.h b/include/linux/mmu_notifier.h
index fecf0ad..f664c30 100644
--- a/include/linux/mmu_notifier.h
+++ b/include/linux/mmu_notifier.h
@@ -23,6 +23,16 @@ struct mmu_notifier_mm {
 	spinlock_t lock;
 };
 
+/* Due to kABI constraints we can't alter the size of a struct mm,
+ * nor does sufficient reclaimable struct pad space exist to hold
+ * an mmu_notifier_mm *.  However we can reclaim enough unused space
+ * to hold an index value.  We then maintain a mapping from index
+ * values to a table of struct mmu_notifier_mm *s.
+ *
+ * 0: unused, 1..CONFIG_MMU_NOTIFIER_MAXIDX: valid index
+ */
+typedef unsigned short mmu_notifier_index;
+
 struct mmu_notifier_ops {
 	/*
 	 * Called either by mmu_notifier_unregister or when the mm is
@@ -139,9 +149,9 @@ struct mmu_notifier {
 	const struct mmu_notifier_ops *ops;
 };
 
-static inline int mm_has_notifiers(struct mm_struct *mm)
+static inline mmu_notifier_index mm_has_notifiers(struct mm_struct *mm)
 {
-	return unlikely(mm->mmu_notifier_mm);
+	return unlikely(mm->mmu_notifier_idx);
 }
 
 extern int mmu_notifier_register(struct mmu_notifier *mn,
@@ -198,7 +208,7 @@ static inline void mmu_notifier_invalidate_range_end(struct mm_struct *mm,
 
 static inline void mmu_notifier_mm_init(struct mm_struct *mm)
 {
-	mm->mmu_notifier_mm = NULL;
+	mm->mmu_notifier_idx = 0;
 }
 
 static inline void mmu_notifier_mm_destroy(struct mm_struct *mm)
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 267ddcb..e3a6e26 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -386,6 +386,9 @@ struct mm_struct {
 	/* Token based thrashing protection. */
 	unsigned long swap_token_time;
 	char recent_pagein;
+#if defined(CONFIG_MMU_NOTIFIER) && !defined(__GENKSYMS__)
+	unsigned short mmu_notifier_idx;
+#endif
 
 	/* coredumping support */
 	int core_waiters;
@@ -394,9 +397,6 @@ struct mm_struct {
 	/* aio bits */
 	rwlock_t		ioctx_list_lock;
 	struct kioctx		*ioctx_list;
-#ifdef CONFIG_MMU_NOTIFIER
-	struct mmu_notifier_mm *mmu_notifier_mm;
-#endif
 };
 
 struct sighand_struct {
diff --git a/mm/Kconfig b/mm/Kconfig
index 719ef3c..646c69d 100644
--- a/mm/Kconfig
+++ b/mm/Kconfig
@@ -162,3 +162,8 @@ config RESOURCES_64BIT
 config MMU_NOTIFIER
 	bool
 	default y
+
+config MMU_NOTIFIER_TABSZ
+	int
+	range	16 4095
+	default 512
diff --git a/mm/memory.c b/mm/memory.c
index 7c07d55..cffa52a 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -922,9 +922,10 @@ unsigned long unmap_vmas(struct mmu_gather **tlbp,
 	unsigned long start = start_addr;
 	spinlock_t *i_mmap_lock = details? details->i_mmap_lock: NULL;
 	int fullmm = (*tlbp)->fullmm;
-	struct mm_struct *mm = vma->vm_mm;
+	struct mm_struct *mm = vma ? vma->vm_mm : NULL;
 
-	mmu_notifier_invalidate_range_start(mm, start_addr, end_addr);
+	if (mm)
+		mmu_notifier_invalidate_range_start(mm, start_addr, end_addr);
 	for ( ; vma && vma->vm_start < end_addr; vma = vma->vm_next) {
 		unsigned long end;
 
@@ -975,7 +976,8 @@ unsigned long unmap_vmas(struct mmu_gather **tlbp,
 		}
 	}
 out:
-	mmu_notifier_invalidate_range_end(mm, start_addr, end_addr);
+	if (mm)
+		mmu_notifier_invalidate_range_end(mm, start_addr, end_addr);
 	return start;	/* which is now the end (or restart) address */
 }
 
diff --git a/mm/mmu_notifier.c b/mm/mmu_notifier.c
index 599576f..a0e2bd6 100644
--- a/mm/mmu_notifier.c
+++ b/mm/mmu_notifier.c
@@ -17,6 +17,93 @@
 #include <linux/rcupdate.h>
 #include <linux/sched.h>
 
+/* global state of index mapping table
+ */
+static struct mmu_notifier_mm **mn_tab = NULL;
+static int mn_tab_free = 0;
+static mmu_notifier_index mn_tab_search = 0;
+
+/* lock scope: mn_tab, mn_tab_free, mn_tab_search
+ */
+static DEFINE_SPINLOCK(mn_tab_lock);
+
+#if 65535 < CONFIG_MMU_NOTIFIER_TABSZ
+#error	"CONFIG_MMU_NOTIFIER_TABSZ exceeds index limit"
+#endif
+#define MAXIDX	CONFIG_MMU_NOTIFIER_TABSZ
+
+/* initialize mmu_notifier index mapping
+ */
+static int mmu_notifier_init(void)
+{
+	mn_tab = kzalloc(sizeof(struct mmu_notifier_mm *) * MAXIDX, GFP_KERNEL);
+	if (mn_tab)
+		mn_tab_free = MAXIDX;
+	WARN_ON(!mn_tab);
+	return !mn_tab;
+}
+
+__initcall(mmu_notifier_init);
+
+/* map index to struct mmu_notifier_mm *
+ */
+static inline struct mmu_notifier_mm *mmu_notifier_map(mmu_notifier_index idx)
+{
+	struct mmu_notifier_mm *pmn;
+	unsigned long flags;
+
+	BUG_ON(!(1 <= idx && idx <= MAXIDX));
+	spin_lock_irqsave(&mn_tab_lock, flags);
+	pmn = mn_tab[--idx];
+	spin_unlock_irqrestore(&mn_tab_lock, flags);
+	BUG_ON(!pmn);
+	return pmn;
+}
+
+/* search for first free entry, if found set to pmn and return index,
+ * return 0 otherwise
+ */
+static inline mmu_notifier_index mmu_notifier_idxalloc(
+	struct mmu_notifier_mm *pmn)
+{
+	unsigned long flags;
+	int i, idx;
+
+	spin_lock_irqsave(&mn_tab_lock, flags);
+	if (mn_tab_free) {
+		idx = mn_tab_search;
+		for (i = MAXIDX; i; --i, ++idx) {
+			if (MAXIDX <= idx)
+				idx = 0;
+			if (!mn_tab[idx]) {
+				mn_tab[idx] = pmn;
+				--mn_tab_free;
+				mn_tab_search = ++idx;
+				spin_unlock_irqrestore(&mn_tab_lock, flags);
+				return idx;
+			}
+		}
+	}
+	spin_unlock_irqrestore(&mn_tab_lock, flags);
+	return 0;
+}
+
+/* free entry of idx
+ */
+static inline void mmu_notifier_idxfree(mmu_notifier_index idx)
+{
+	unsigned long flags;
+
+	BUG_ON(!(1 <= idx && idx <= MAXIDX));
+	spin_lock_irqsave(&mn_tab_lock, flags);
+	--idx;
+	BUG_ON(!mn_tab[idx]);
+	mn_tab[idx] = NULL;
+	mn_tab_search = idx;
+	++mn_tab_free;
+	spin_unlock_irqrestore(&mn_tab_lock, flags);
+}
+
 /*
  * This function can't run concurrently against mmu_notifier_register
  * because mm->mm_users > 0 during mmu_notifier_register and exit_mmap
@@ -31,11 +118,12 @@
  */
 void __mmu_notifier_release(struct mm_struct *mm)
 {
+	struct mmu_notifier_mm *pmn = mmu_notifier_map(mm->mmu_notifier_idx);
 	struct mmu_notifier *mn;
 
-	spin_lock(&mm->mmu_notifier_mm->lock);
-	while (unlikely(!hlist_empty(&mm->mmu_notifier_mm->list))) {
-		mn = hlist_entry(mm->mmu_notifier_mm->list.first,
+	spin_lock(&pmn->lock);
+	while (unlikely(!hlist_empty(&pmn->list))) {
+		mn = hlist_entry(pmn->list.first,
 				 struct mmu_notifier,
 				 hlist);
 		/*
@@ -50,7 +138,7 @@ void __mmu_notifier_release(struct mm_struct *mm)
 		 * ->release returns.
 		 */
 		rcu_read_lock();
-		spin_unlock(&mm->mmu_notifier_mm->lock);
+		spin_unlock(&pmn->lock);
 		/*
 		 * if ->release runs before mmu_notifier_unregister it
 		 * must be handled as it's the only way for the driver
@@ -61,9 +149,9 @@ void __mmu_notifier_release(struct mm_struct *mm)
 		if (mn->ops->release)
 			mn->ops->release(mn, mm);
 		rcu_read_unlock();
-		spin_lock(&mm->mmu_notifier_mm->lock);
+		spin_lock(&pmn->lock);
 	}
-	spin_unlock(&mm->mmu_notifier_mm->lock);
+	spin_unlock(&pmn->lock);
 
 	/*
 	 * synchronize_rcu here prevents mmu_notifier_release to
@@ -71,7 +159,7 @@ void __mmu_notifier_release(struct mm_struct *mm)
 	 * in the mm) until the ->release method returns, if it was
 	 * invoked by mmu_notifier_unregister.
 	 *
-	 * The mmu_notifier_mm can't go away from under us because one
+	 * The mmu_notifier_idx can't go away from under us because one
 	 * mm_count is hold by exit_mmap.
 	 */
 	synchronize_rcu();
@@ -85,12 +173,13 @@ void __mmu_notifier_release(struct mm_struct *mm)
 int __mmu_notifier_clear_flush_young(struct mm_struct *mm,
 					unsigned long address)
 {
+	struct mmu_notifier_mm *pmn = mmu_notifier_map(mm->mmu_notifier_idx);
 	struct mmu_notifier *mn;
 	struct hlist_node *n;
 	int young = 0;
 
 	rcu_read_lock();
-	hlist_for_each_entry_rcu(mn, n, &mm->mmu_notifier_mm->list, hlist) {
+	hlist_for_each_entry_rcu(mn, n, &pmn->list, hlist) {
 		if (mn->ops->clear_flush_young)
 			young |= mn->ops->clear_flush_young(mn, mm, address);
 	}
@@ -102,11 +191,12 @@ int __mmu_notifier_clear_flush_young(struct mm_struct *mm,
 void __mmu_notifier_invalidate_page(struct mm_struct *mm,
 					  unsigned long address)
 {
+	struct mmu_notifier_mm *pmn = mmu_notifier_map(mm->mmu_notifier_idx);
 	struct mmu_notifier *mn;
 	struct hlist_node *n;
 
 	rcu_read_lock();
-	hlist_for_each_entry_rcu(mn, n, &mm->mmu_notifier_mm->list, hlist) {
+	hlist_for_each_entry_rcu(mn, n, &pmn->list, hlist) {
 		if (mn->ops->invalidate_page)
 			mn->ops->invalidate_page(mn, mm, address);
 	}
@@ -116,11 +206,12 @@ void __mmu_notifier_invalidate_page(struct mm_struct *mm,
 void __mmu_notifier_invalidate_range_start(struct mm_struct *mm,
 				  unsigned long start, unsigned long end)
 {
+	struct mmu_notifier_mm *pmn = mmu_notifier_map(mm->mmu_notifier_idx);
 	struct mmu_notifier *mn;
 	struct hlist_node *n;
 
 	rcu_read_lock();
-	hlist_for_each_entry_rcu(mn, n, &mm->mmu_notifier_mm->list, hlist) {
+	hlist_for_each_entry_rcu(mn, n, &pmn->list, hlist) {
 		if (mn->ops->invalidate_range_start)
 			mn->ops->invalidate_range_start(mn, mm, start, end);
 	}
@@ -130,11 +221,12 @@ void __mmu_notifier_invalidate_range_start(struct mm_struct *mm,
 void __mmu_notifier_invalidate_range_end(struct mm_struct *mm,
 				  unsigned long start, unsigned long end)
 {
+	struct mmu_notifier_mm *pmn = mmu_notifier_map(mm->mmu_notifier_idx);
 	struct mmu_notifier *mn;
 	struct hlist_node *n;
 
 	rcu_read_lock();
-	hlist_for_each_entry_rcu(mn, n, &mm->mmu_notifier_mm->list, hlist) {
+	hlist_for_each_entry_rcu(mn, n, &pmn->list, hlist) {
 		if (mn->ops->invalidate_range_end)
 			mn->ops->invalidate_range_end(mn, mm, start, end);
 	}
@@ -145,15 +237,17 @@ static int do_mmu_notifier_register(struct mmu_notifier *mn,
 				    struct mm_struct *mm,
 				    int take_mmap_sem)
 {
-	struct mmu_notifier_mm *mmu_notifier_mm;
+	struct mmu_notifier_mm *mmu_notifier_mm, *pmn;
+	mmu_notifier_index idx;
 	int ret;
 
 	BUG_ON(atomic_read(&mm->mm_users) <= 0);
 
-	ret = -ENOMEM;
 	mmu_notifier_mm = kmalloc(sizeof(struct mmu_notifier_mm), GFP_KERNEL);
-	if (unlikely(!mmu_notifier_mm))
+	if (unlikely(!mmu_notifier_mm)) {
+		ret = -ENOMEM;
 		goto out;
+	}
 
 	if (take_mmap_sem)
 		down_write(&mm->mmap_sem);
@@ -161,12 +255,18 @@ static int do_mmu_notifier_register(struct mmu_notifier *mn,
 	if (unlikely(ret))
 		goto out_cleanup;
 
-	if (!mm_has_notifiers(mm)) {
+	if (mm_has_notifiers(mm))
+		idx = mm->mmu_notifier_idx;
+	else if (!(idx = mmu_notifier_idxalloc(mmu_notifier_mm))) {
+		ret = -ENOMEM;
+		goto out_cleanup_unlock;
+	} else {
 		INIT_HLIST_HEAD(&mmu_notifier_mm->list);
 		spin_lock_init(&mmu_notifier_mm->lock);
-		mm->mmu_notifier_mm = mmu_notifier_mm;
+		mm->mmu_notifier_idx = idx;
 		mmu_notifier_mm = NULL;
 	}
+	pmn = mmu_notifier_map(idx);
 	atomic_inc(&mm->mm_count);
 
 	/*
@@ -177,10 +277,11 @@ static int do_mmu_notifier_register(struct mmu_notifier *mn,
 	 * We can't race against any other mmu notifier method either
 	 * thanks to mm_take_all_locks().
 	 */
-	spin_lock(&mm->mmu_notifier_mm->lock);
-	hlist_add_head(&mn->hlist, &mm->mmu_notifier_mm->list);
-	spin_unlock(&mm->mmu_notifier_mm->lock);
+	spin_lock(&pmn->lock);
+	hlist_add_head(&mn->hlist, &pmn->list);
+	spin_unlock(&pmn->lock);
 
+out_cleanup_unlock:
 	mm_drop_all_locks(mm);
 out_cleanup:
 	if (take_mmap_sem)
@@ -224,9 +325,13 @@ EXPORT_SYMBOL_GPL(__mmu_notifier_register);
 /* this is called after the last mmu_notifier_unregister() returned */
 void __mmu_notifier_mm_destroy(struct mm_struct *mm)
 {
-	BUG_ON(!hlist_empty(&mm->mmu_notifier_mm->list));
-	kfree(mm->mmu_notifier_mm);
-	mm->mmu_notifier_mm = LIST_POISON1; /* debug */
+	mmu_notifier_index idx = mm->mmu_notifier_idx;
+	struct mmu_notifier_mm *pmn = mmu_notifier_map(idx);
+
+	BUG_ON(!hlist_empty(&pmn->list));
+
+	kfree(pmn);
+	mmu_notifier_idxfree(idx);
 }
 
 /*
@@ -241,9 +346,10 @@ void __mmu_notifier_mm_destroy(struct mm_struct *mm)
  */
 void mmu_notifier_unregister(struct mmu_notifier *mn, struct mm_struct *mm)
 {
+	struct mmu_notifier_mm *pmn = mmu_notifier_map(mm->mmu_notifier_idx);
 	BUG_ON(atomic_read(&mm->mm_count) <= 0);
 
-	spin_lock(&mm->mmu_notifier_mm->lock);
+	spin_lock(&pmn->lock);
 	if (!hlist_unhashed(&mn->hlist)) {
 		hlist_del_rcu(&mn->hlist);
 
@@ -252,7 +358,7 @@ void mmu_notifier_unregister(struct mmu_notifier *mn, struct mm_struct *mm)
 		 * before freeing the pages.
 		 */
 		rcu_read_lock();
-		spin_unlock(&mm->mmu_notifier_mm->lock);
+		spin_unlock(&pmn->lock);
 		/*
 		 * exit_mmap will block in mmu_notifier_release to
 		 * guarantee ->release is called before freeing the
@@ -262,7 +368,7 @@ void mmu_notifier_unregister(struct mmu_notifier *mn, struct mm_struct *mm)
 			mn->ops->release(mn, mm);
 		rcu_read_unlock();
 	} else
-		spin_unlock(&mm->mmu_notifier_mm->lock);
+		spin_unlock(&pmn->lock);
 
 	/*
 	 * Wait any running method to finish, of course including