Sophie

Sophie

distrib > Scientific%20Linux > 5x > x86_64 > by-pkgid > 27922b4260f65d317aabda37e42bbbff > files > 477

kernel-2.6.18-238.el5.src.rpm

From: Konrad Rzeszutek <konradr@redhat.com>
Subject: [RHEL 5.1] RHBZ #217583: cpu-hotplug operations along with make and module insertion script on JS21,causes system to drop to xmon.
Date: Tue, 27 Feb 2007 12:01:09 -0500
Bugzilla: 217583
Message-Id: <20070227170109.GA2353@localhost.localdomain>
Changelog: [cpu-hotplug] make and module insertion script cause a panic


RHBZ#:
------
https://bugzilla.redhat.com/bugzilla/show_bug.cgi?id=217583

Description:
------------

During the testing in RHEL5 Beta2 doing CPU hotplug operation 
while doing various operations (compile kernel, remove/re-insert
the modules) causes a panic.

The fix is do away with lock_cpu_hotplug in the slab subsystem.  
The patch is in the upstream kernel.

RHEL Version Found:
------------------
RHEL5 Beta2 

Upstream Status:
----------------
Discussion about it: http://lkml.org/lkml/2006/10/27/238
Patch is in 2.6.21-rc1

Test Status:
------------
This has been tested on the affecting platform and also regression
tested on RHTS. If you have any specific boxes you think are affected
by this, please e-mail and I run this kernel on your box.

Proposed Patch:
---------------
This patch is based on 2.6.18-8.el5

diff -uNr linux-2.6.18.i686.orig/mm/slab.c linux-2.6.18.i686/mm/slab.c
--- linux-2.6.18.i686.orig/mm/slab.c	2007-02-05 12:42:05.000000000 -0500
+++ linux-2.6.18.i686/mm/slab.c	2007-02-05 15:21:27.000000000 -0500
@@ -730,7 +730,10 @@
 }
 #endif
 
-/* Guard access to the cache-chain. */
+/*
+ * 1. Guard access to the cache-chain.
+ * 2. Protect sanity of cpu_online_map against cpu hotplug events
+ */
 static DEFINE_MUTEX(cache_chain_mutex);
 static struct list_head cache_chain;
 
@@ -1224,12 +1227,18 @@
 			kfree(shared);
 			free_alien_cache(alien);
 		}
-		mutex_unlock(&cache_chain_mutex);
 		break;
 	case CPU_ONLINE:
+		mutex_unlock(&cache_chain_mutex);
 		start_cpu_timer(cpu);
 		break;
 #ifdef CONFIG_HOTPLUG_CPU
+	case CPU_DOWN_PREPARE:
+		mutex_lock(&cache_chain_mutex);
+		break;
+	case CPU_DOWN_FAILED:
+		mutex_unlock(&cache_chain_mutex);
+		break;
 	case CPU_DEAD:
 		/*
 		 * Even if all the cpus of a node are down, we don't free the
@@ -1240,8 +1249,8 @@
 		 * gets destroyed at kmem_cache_destroy().
 		 */
 		/* fall thru */
+#endif
 	case CPU_UP_CANCELED:
-		mutex_lock(&cache_chain_mutex);
 		list_for_each_entry(cachep, &cache_chain, next) {
 			struct array_cache *nc;
 			struct array_cache *shared;
@@ -1302,11 +1311,9 @@
 		}
 		mutex_unlock(&cache_chain_mutex);
 		break;
-#endif
 	}
 	return NOTIFY_OK;
 bad:
-	mutex_unlock(&cache_chain_mutex);
 	return NOTIFY_BAD;
 }
 
@@ -2054,11 +2061,9 @@
 	}
 
 	/*
-	 * Prevent CPUs from coming and going.
-	 * lock_cpu_hotplug() nests outside cache_chain_mutex
+	 * We use cache_chain_mutex to ensure a consistent view of
+	 * cpu_online_map as well.  Please see cpuup_callback
 	 */
-	lock_cpu_hotplug();
-
 	mutex_lock(&cache_chain_mutex);
 
 	list_for_each_entry(pc, &cache_chain, next) {
@@ -2263,7 +2268,6 @@
 		panic("kmem_cache_create(): failed to create slab `%s'\n",
 		      name);
 	mutex_unlock(&cache_chain_mutex);
-	unlock_cpu_hotplug();
 	return cachep;
 }
 EXPORT_SYMBOL(kmem_cache_create);
@@ -2381,6 +2385,7 @@
 	return nr_freed;
 }
 
+/* Called with cache_chain_mutex held to protect against cpu hotplug */
 static int __cache_shrink(struct kmem_cache *cachep)
 {
 	int ret = 0, i = 0;
@@ -2411,9 +2416,13 @@
  */
 int kmem_cache_shrink(struct kmem_cache *cachep)
 {
+	int ret;
 	BUG_ON(!cachep || in_interrupt());
 
-	return __cache_shrink(cachep);
+	mutex_lock(&cache_chain_mutex);
+	ret = __cache_shrink(cachep);
+	mutex_unlock(&cache_chain_mutex);
+	return ret;
 }
 EXPORT_SYMBOL(kmem_cache_shrink);
 
@@ -2441,23 +2450,17 @@
 
 	BUG_ON(!cachep || in_interrupt());
 
-	/* Don't let CPUs to come and go */
-	lock_cpu_hotplug();
-
 	/* Find the cache in the chain of caches. */
 	mutex_lock(&cache_chain_mutex);
 	/*
 	 * the chain is never empty, cache_cache is never destroyed
 	 */
 	list_del(&cachep->next);
-	mutex_unlock(&cache_chain_mutex);
 
 	if (__cache_shrink(cachep)) {
 		slab_error(cachep, "Can't free all objects");
-		mutex_lock(&cache_chain_mutex);
 		list_add(&cachep->next, &cache_chain);
 		mutex_unlock(&cache_chain_mutex);
-		unlock_cpu_hotplug();
 		return 1;
 	}
 
@@ -2477,7 +2480,7 @@
 		}
 	}
 	kmem_cache_free(&cache_cache, cachep);
-	unlock_cpu_hotplug();
+	mutex_unlock(&cache_chain_mutex);
 	return 0;
 }
 EXPORT_SYMBOL(kmem_cache_destroy);

-- 
Konrad Rzeszutek 1-(978)-392-3903 or 1-(617)-693-1718
IBM on-site partner.