Sophie

Sophie

distrib > Scientific%20Linux > 5x > x86_64 > by-pkgid > 27922b4260f65d317aabda37e42bbbff > files > 1797

kernel-2.6.18-238.el5.src.rpm

From: Mikulas Patocka <mpatocka@redhat.com>
Date: Tue, 19 Aug 2008 14:09:54 -0400
Subject: [md] LVM raid-1 performance fixes
Message-id: Pine.LNX.4.64.0808191354510.7851@hs20-bc2-1.build.redhat.com
O-Subject: [PATCH 1/2 RHEL 5.3] bz438153 LVM raid-1 performance fixes
Bugzilla: 438153
RH-Acked-by: Jonathan Brassow <jbrassow@redhat.com>
RH-Acked-by: Alasdair G Kergon <agk@redhat.com>
RH-Acked-by: Alasdair G Kergon <agk@redhat.com>

Hi.

Schedule() could cause looping and CPU consumption. schedule_timeout()
(that used to be there before) could, on the other hand, cause serious
performance degradation (it really happened with one custorem).

This patch solves the problem properly, by using timers.

(note, this is the first patch for the bug 438153, there is also second
one, that I will send)

The patch is already upstream and in RHEL-4.7.

Testing: I compiled it on my workstation and run test from the bugzilla.

Upstream commit: a2aebe03be60ae4da03507a00d60211d5e0327c3 (2.6.26-rc1)

Fix bugzilla 438153.

Using schedule() or schedule_timeout() (which used to be there) would block
the process and degrade performance.

I reworked the code to use timer: after adding something to queues, the code
must call either wake (process the queue immediatelly) or delayed_wake (process
the queue after at most 1/5s).

Signed-off-by: Mikulas Patocka <mpatocka@redhat.com>

diff --git a/drivers/md/dm-raid1.c b/drivers/md/dm-raid1.c
index 94b0032..92625f7 100644
--- a/drivers/md/dm-raid1.c
+++ b/drivers/md/dm-raid1.c
@@ -145,6 +145,9 @@ struct mirror_set {
 	struct workqueue_struct *kmirrord_wq;
 	struct work_struct kmirrord_work;
 
+	struct timer_list timer;
+	unsigned long timer_pending;
+
 	struct mirror mirror[0];
 };
 
@@ -161,11 +164,30 @@ static inline sector_t region_to_sector(struct region_hash *rh, region_t region)
 	return region << rh->region_shift;
 }
 
-static inline void wake(struct mirror_set *ms)
+static void wake(struct mirror_set *ms)
 {
 	queue_work(ms->kmirrord_wq, &ms->kmirrord_work);
 }
 
+static void delayed_wake_fn(unsigned long data)
+{
+	struct mirror_set *ms = (struct mirror_set *) data;
+
+	clear_bit(0, &ms->timer_pending);
+	wake(ms);
+}
+
+static void delayed_wake(struct mirror_set *ms)
+{
+	if (test_and_set_bit(0, &ms->timer_pending))
+		return;
+
+	ms->timer.expires = jiffies + HZ / 5;
+	ms->timer.data = (unsigned long) ms;
+	ms->timer.function = delayed_wake_fn;
+	add_timer(&ms->timer);
+}
+
 /* FIXME move this */
 static void queue_bio(struct mirror_set *ms, struct bio *bio, int rw);
 
@@ -1184,9 +1206,12 @@ static void do_writes(struct mirror_set *ms, struct bio_list *writes)
 	 * Add bios that are delayed due to remote recovery
 	 * back on to the write queue
 	 */
-	spin_lock_irq(&ms->lock);
-	bio_list_merge(&ms->writes, &requeue);
-	spin_unlock_irq(&ms->lock);
+	if (requeue.head) {
+		spin_lock_irq(&ms->lock);
+		bio_list_merge(&ms->writes, &requeue);
+		spin_unlock_irq(&ms->lock);
+		delayed_wake(ms);
+	}
 
 	/*
 	 * Increment the pending counts for any regions that will
@@ -1201,14 +1226,14 @@ static void do_writes(struct mirror_set *ms, struct bio_list *writes)
 	/*
 	 * Dispatch io.
 	 */
-	if (ms->log_failure) {
+	if (unlikely(ms->log_failure)) {
 		spin_lock_irq(&ms->lock);
 		bio_list_merge(&ms->failures, &sync);
 		spin_unlock_irq(&ms->lock);
-	} else {
+		wake(ms);
+	} else
 		while ((bio = bio_list_pop(&sync)))
 			do_write(ms, bio);
-	}
 
 	while ((bio = bio_list_pop(&recover)))
 		rh_delay(&ms->rh, bio);
@@ -1255,7 +1280,7 @@ static void do_failures(struct mirror_set *ms, struct bio_list *failures)
 			spin_lock_irq(&ms->lock);
 			bio_list_merge(&ms->failures, failures);
 			spin_unlock_irq(&ms->lock);
-			wake(ms);
+			delayed_wake(ms);
 		}
 		return;
 	}
@@ -1270,8 +1295,9 @@ static void do_failures(struct mirror_set *ms, struct bio_list *failures)
 /*-----------------------------------------------------------------
  * kmirrord
  *---------------------------------------------------------------*/
-static int do_mirror(struct mirror_set *ms)
+static void do_mirror(void *data)
 {
+	struct mirror_set *ms = data;
 	struct bio_list reads, writes, failures;
 
 	spin_lock_irq(&ms->lock);
@@ -1288,14 +1314,6 @@ static int do_mirror(struct mirror_set *ms)
 	do_reads(ms, &reads);
 	do_writes(ms, &writes);
 	do_failures(ms, &failures);
-
-	return (ms->writes.head || ms->failures.head) ? 1 : 0;
-}
-
-static void do_work(void *data)
-{
-	while (do_mirror(data))
-		schedule();
 }
 
 /*-----------------------------------------------------------------
@@ -1499,7 +1517,9 @@ static int mirror_ctr(struct dm_target *ti, unsigned int argc, char **argv)
 		free_context(ms, ti, ms->nr_mirrors);
 		return -ENOMEM;
 	}
-	INIT_WORK(&ms->kmirrord_work, do_work, ms);
+	INIT_WORK(&ms->kmirrord_work, do_mirror, ms);
+	init_timer(&ms->timer);
+	ms->timer_pending = 0;
 
 	r = kcopyd_client_create(DM_IO_PAGES, &ms->kcopyd_client);
 	if (r) {
@@ -1516,6 +1536,7 @@ static void mirror_dtr(struct dm_target *ti)
 {
 	struct mirror_set *ms = (struct mirror_set *) ti->private;
 
+	del_timer_sync(&ms->timer);
 	flush_workqueue(ms->kmirrord_wq);
 	kcopyd_client_destroy(ms->kcopyd_client);
 	destroy_workqueue(ms->kmirrord_wq);