Sophie

Sophie

distrib > Scientific%20Linux > 5x > x86_64 > by-pkgid > fc11cd6e1c513a17304da94a5390f3cd > files > 1492

kernel-2.6.18-194.11.1.el5.src.rpm

From: Heinz Mauelshagen <heinzm@redhat.com>
Date: Sat, 20 Dec 2008 10:30:10 +0100
Subject: [md] fix oops with device-mapper mirror target
Message-id: 1229765410.3625.14.camel@o
O-Subject: Re: [RHEL5.4 PATCH] BZ#472558: fix oops in dm-raid1.c (function choose_mirror); 2nd version
Bugzilla: 472558
RH-Acked-by: Jonathan Brassow <jbrassow@redhat.com>
RH-Acked-by: Alasdair G Kergon <agk@redhat.com>
RH-Acked-by: Jonathan Brassow <jbrassow@redhat.com>

Bugzilla: 472558

This is a reworked version to replace my initial one with a spinlock
instead of a mutex in order to address Alasdair's finding that
choose_mirror() can be called in interrupt context on mirror leg
failure.

IMO the atomics used are safe to update the round_robin ios
(variables ms->rr_ios_set and ms->rr_ios).

Please ACK

Heinz

Am Freitag, den 19.12.2008, 21:40 +0100 schrieb Heinz Mauelshagen:
> Bugzilla: 472558
>
> Description:
> Customer gets oops with device-mapper mirror target round robin read
> balancing because multiple callers of choose_mirror() in dm-raid1.c race
> on updating ms->read_mirror pointer, hence turning it invalid.
> Introducing mutex to tackle race conditions.
>
> Adding message interface in order to get rid of static compiled in round
> robin ios value, so that we are able to optimize at runtime via:
>
> "dmsetup message <dev> 0 io_balance round_robin read_ios #"
>
> As before, the default of 128 ios is still preset.
>
>
> Upstream: no upstream version of dm mirror target read balancing yet.
>
>
> Please ACK
>
> Heinz

diff --git a/drivers/md/dm-raid1.c b/drivers/md/dm-raid1.c
index 43fd75d..1725f8e 100644
--- a/drivers/md/dm-raid1.c
+++ b/drivers/md/dm-raid1.c
@@ -146,7 +146,11 @@ struct mirror_set {
 	struct mirror *default_mirror;	/* Default mirror */
 
 	unsigned int nr_mirrors;
-	atomic_t read_count;      /* Read counter for read balancing */
+
+	/* Round robin read balancing. */
+	spinlock_t choose_lock;
+	atomic_t rr_ios_set; /* Adjustable default ios. */
+	atomic_t rr_ios;     /* Round robin read ios. */
 	struct mirror *read_mirror; /* Last mirror read. */
 
 	struct workqueue_struct *kmirrord_wq;
@@ -839,33 +843,41 @@ static void do_recovery(struct mirror_set *ms)
  */
 static struct mirror *choose_mirror(struct mirror_set *ms)
 {
-	struct mirror *start_mirror = ms->read_mirror;
+	long flags;
+	struct mirror *start_mirror, *ret;
+
+	/* Can get called in interrupt from mirror_end_io(). */
+	spin_lock_irqsave(&ms->choose_lock, flags);
 
 	/*
 	 * Perform MIN_READS on each working mirror then
 	 * advance to the next one.  start_mirror stores
 	 * the first we tried, so we know when we're done.
 	 */
+	ret = start_mirror = ms->read_mirror;
 	do {
-		if (likely(!atomic_read(&ms->read_mirror->error_count)) &&
-		    !atomic_dec_and_test(&ms->read_count))
+		if (likely(!atomic_read(&ret->error_count) &&
+			   !atomic_dec_and_test(&ms->rr_ios)))
 			goto use_mirror;
 
-		atomic_set(&ms->read_count, MIN_READS);
+		atomic_set(&ms->rr_ios, atomic_read(&ms->rr_ios_set));
 
 		if (ms->read_mirror-- == ms->mirror)
 			ms->read_mirror += ms->nr_mirrors;
-	} while (ms->read_mirror != start_mirror);
+
+		ret = ms->read_mirror;
+	} while (ret != start_mirror);
 
 	/*
 	 * We've rejected every mirror.
 	 * Confirm the start_mirror can be used.
 	 */
-	if (unlikely(atomic_read(&ms->read_mirror->error_count)))
-		return NULL;
+	if (unlikely(atomic_read(&ret->error_count)))
+	      ret = NULL;
 
 use_mirror:
-	return ms->read_mirror;
+	spin_unlock_irqrestore(&ms->choose_lock, flags);
+	return ret;
 }
 
 /* fail_mirror
@@ -1374,6 +1386,8 @@ static struct mirror_set *alloc_context(unsigned int nr_mirrors,
 	ms->in_sync = 0;
 	ms->log_failure = 0;
 	atomic_set(&ms->suspend, 0);
+
+	spin_lock_init(&ms->choose_lock);
 	ms->read_mirror = &ms->mirror[DEFAULT_MIRROR];
 	ms->default_mirror = &ms->mirror[DEFAULT_MIRROR];
 
@@ -1390,7 +1404,8 @@ static struct mirror_set *alloc_context(unsigned int nr_mirrors,
 		return NULL;
 	}
 
-	atomic_set(&ms->read_count, MIN_READS);
+	atomic_set(&ms->rr_ios_set, MIN_READS);
+	atomic_set(&ms->rr_ios, MIN_READS);
 
 	bio_list_init(&ms->failures);
 
@@ -1770,6 +1785,34 @@ static void mirror_resume(struct dm_target *ti)
 	rh_start_recovery(&ms->rh);
 }
 
+/* Set round robin ios via message. */
+static int mirror_message(struct dm_target *ti, unsigned argc, char **argv)
+{
+	unsigned rr_ios_set;
+	struct mirror_set *ms = ti->private;
+	struct mapped_device *md;
+
+	if (argc != 4 ||
+	    strncmp(argv[0], "io_balance", strlen(argv[0])) ||
+	    strncmp(argv[1], "round_robin", strlen(argv[1])) ||
+	    strncmp(argv[2], "ios", strlen(argv[2])))
+		return -EINVAL;
+
+	if (sscanf(argv[3], "%u", &rr_ios_set) != 1 ||
+	    rr_ios_set < 2) {
+		DMERR("Round robin read ios have to be > 1");
+		return -EINVAL;
+	}
+
+	md = dm_table_get_md(ti->table);
+	DMINFO("Setting round robin read ios for \"%s\" to %u",
+	        dm_device_name(md), rr_ios_set);
+	dm_put(md);
+	atomic_set(&ms->rr_ios_set, rr_ios_set);
+	atomic_set(&ms->rr_ios, rr_ios_set);
+	return 0;
+}
+
 /*
  * device_status_char
  * @m: mirror device/leg we want the status of
@@ -1837,6 +1880,7 @@ static struct target_type mirror_target = {
 	.presuspend = mirror_presuspend,
 	.postsuspend = mirror_postsuspend,
 	.resume	 = mirror_resume,
+	.message = mirror_message,
 	.status	 = mirror_status,
 };