Sophie

Sophie

distrib > Scientific%20Linux > 5x > x86_64 > by-pkgid > 27922b4260f65d317aabda37e42bbbff > files > 1781

kernel-2.6.18-238.el5.src.rpm

From: Mikulas Patocka <mpatocka@redhat.com>
Date: Wed, 2 Dec 2009 02:40:12 -0500
Subject: [md] fix deadlock in device mapper multipath
Message-id: <Pine.LNX.4.64.0912012135400.19328@hs20-bc2-1.build.redhat.com>
Patchwork-id: 21639
O-Subject: [RHEL5.5 PATCH] bz543270 Fix deadlock in device mapper multipath
Bugzilla: 543270
RH-Acked-by: Alasdair G Kergon <agk@redhat.com>

Hi

This patch fixes a deadlock in multipath, reported in CentOS 5.3 and in
upstream.

Upstream status: in Alasdair's tree at
ftp://ftp.kernel.org/pub/linux/kernel/people/agk/patches/2.6/editing/dm-avoid-_hash_lock-deadlock.patch
Will be included in next upstream kernel merge window.

Testing: it is a race and it is not reproducible for me, so I only tested
some basic lvm functionality after applying the patch. I think some people
who reproduced the bug did testing of the patch in upstream.


diff --git a/drivers/md/dm-ioctl.c b/drivers/md/dm-ioctl.c
index e20d907..554df49 100644
--- a/drivers/md/dm-ioctl.c
+++ b/drivers/md/dm-ioctl.c
@@ -56,6 +56,11 @@ static void dm_hash_remove_all(int keep_open_devices);
  */
 static DECLARE_RWSEM(_hash_lock);
 
+/*
+ * Protects use of mdptr to obtain hash cell name and uuid from mapped device.
+ */
+static DEFINE_MUTEX(dm_hash_cells_mutex);
+
 static void init_buckets(struct list_head *buckets)
 {
 	unsigned int i;
@@ -206,7 +211,9 @@ static int dm_hash_insert(const char *name, const char *uuid, struct mapped_devi
 		list_add(&cell->uuid_list, _uuid_buckets + hash_str(uuid));
 	}
 	dm_get(md);
+	mutex_lock(&dm_hash_cells_mutex);
 	dm_set_mdptr(md, cell);
+	mutex_unlock(&dm_hash_cells_mutex);
 	up_write(&_hash_lock);
 
 	return 0;
@@ -224,7 +231,9 @@ static void __hash_remove(struct hash_cell *hc)
 	/* remove from the dev hash */
 	list_del(&hc->uuid_list);
 	list_del(&hc->name_list);
+	mutex_lock(&dm_hash_cells_mutex);
 	dm_set_mdptr(hc->md, NULL);
+	mutex_unlock(&dm_hash_cells_mutex);
 
 	table = dm_get_table(hc->md);
 	if (table) {
@@ -321,7 +330,9 @@ static int dm_hash_rename(const char *old, const char *new)
 	 */
 	list_del(&hc->name_list);
 	old_name = hc->name;
+	mutex_lock(&dm_hash_cells_mutex);
 	hc->name = new_name;
+	mutex_unlock(&dm_hash_cells_mutex);
 	list_add(&hc->name_list, _name_buckets + hash_str(new_name));
 
 	/*
@@ -1537,8 +1548,7 @@ int dm_copy_name_and_uuid(struct mapped_device *md, char *name, char *uuid)
 	if (!md)
 		return -ENXIO;
 
-	dm_get(md);
-	down_read(&_hash_lock);
+	mutex_lock(&dm_hash_cells_mutex);
 	hc = dm_get_mdptr(md);
 	if (!hc || hc->md != md) {
 		r = -ENXIO;
@@ -1549,8 +1559,7 @@ int dm_copy_name_and_uuid(struct mapped_device *md, char *name, char *uuid)
 	strcpy(uuid, hc->uuid ? : "");
 
 out:
-	up_read(&_hash_lock);
-	dm_put(md);
+	mutex_unlock(&dm_hash_cells_mutex);
 
 	return r;
 }
diff --git a/drivers/md/dm-uevent.c b/drivers/md/dm-uevent.c
index b968194..f585f59 100644
--- a/drivers/md/dm-uevent.c
+++ b/drivers/md/dm-uevent.c
@@ -141,14 +141,13 @@ void dm_send_uevents(struct list_head *events, struct kobject *kobj)
 		list_del_init(&event->elist);
 
 		/*
-		 * Need to call dm_copy_name_and_uuid from here for now.
-		 * Context of previous var adds and locking used for
-		 * hash_cell not compatable.
+		 * When a device is being removed this copy fails and we
+		 * discard these unsent events.
 		 */
 		if (dm_copy_name_and_uuid(event->md, event->name,
 					  event->uuid)) {
-			DMERR("%s: dm_copy_name_and_uuid() failed",
-			      __FUNCTION__);
+			DMINFO("%s: skipping sending uevent for lost device",
+			       __func__);
 			goto uevent_free;
 		}