Sophie

Sophie

distrib > Scientific%20Linux > 5x > x86_64 > by-pkgid > 27922b4260f65d317aabda37e42bbbff > files > 703

kernel-2.6.18-238.el5.src.rpm

Date: Thu, 21 Sep 2006 16:47:20 +0100
From: Alasdair G Kergon <agk@redhat.com>
Subject: [RHEL5 PATCH 12/30] dm snapshot: tidy snapshot_map

This patch rearranges the snapshot_map code so that the functional
changes in subsequent patches are clearer.
 
The only functional change is to replace the existing read lock with
a write lock which the next patch needs.
 
Index: linux-2.6.18.noarch/drivers/md/dm-snap.c
===================================================================
--- linux-2.6.18.noarch.orig/drivers/md/dm-snap.c
+++ linux-2.6.18.noarch/drivers/md/dm-snap.c
@@ -851,7 +851,6 @@ static int snapshot_map(struct dm_target
 {
 	struct exception *e;
 	struct dm_snapshot *s = (struct dm_snapshot *) ti->private;
-	int copy_needed = 0;
 	int r = 1;
 	chunk_t chunk;
 	struct pending_exception *pe = NULL;
@@ -866,29 +865,28 @@ static int snapshot_map(struct dm_target
 	if (unlikely(bio_barrier(bio)))
 		return -EOPNOTSUPP;
 
+	/* FIXME: should only take write lock if we need
+	 * to copy an exception */
+	down_write(&s->lock);
+
+	if (!s->valid) {
+		r = -EIO;
+		goto out_unlock;
+	}
+
+	/* If the block is already remapped - use that, else remap it */
+	e = lookup_exception(&s->complete, chunk);
+	if (e) {
+		remap_exception(s, e, bio);
+		goto out_unlock;
+	}
+
 	/*
 	 * Write to snapshot - higher level takes care of RW/RO
 	 * flags so we should only get this if we are
 	 * writeable.
 	 */
 	if (bio_rw(bio) == WRITE) {
-
-		/* FIXME: should only take write lock if we need
-		 * to copy an exception */
-		down_write(&s->lock);
-
-		if (!s->valid) {
-			r = -EIO;
-			goto out_unlock;
-		}
-
-		/* If the block is already remapped - use that, else remap it */
-		e = lookup_exception(&s->complete, chunk);
-		if (e) {
-			remap_exception(s, e, bio);
-			goto out_unlock;
-		}
-
 		pe = __find_pending_exception(s, bio);
 		if (!pe) {
 			__invalidate_snapshot(s, pe, -ENOMEM);
@@ -899,45 +897,27 @@ static int snapshot_map(struct dm_target
 		remap_exception(s, &pe->e, bio);
 		bio_list_add(&pe->snapshot_bios, bio);
 
+		r = 0;
+
 		if (!pe->started) {
 			/* this is protected by snap->lock */
 			pe->started = 1;
-			copy_needed = 1;
-		}
-
-		r = 0;
-
- out_unlock:
-		up_write(&s->lock);
-
-		if (copy_needed)
+			up_write(&s->lock);
 			start_copy(pe);
-	} else {
+			goto out;
+		}
+	} else
 		/*
 		 * FIXME: this read path scares me because we
 		 * always use the origin when we have a pending
 		 * exception.  However I can't think of a
 		 * situation where this is wrong - ejt.
 		 */
+		bio->bi_bdev = s->origin->bdev;
 
-		/* Do reads */
-		down_read(&s->lock);
-
-		if (!s->valid) {
-			up_read(&s->lock);
-			return -EIO;
-		}
-
-		/* See if it it has been remapped */
-		e = lookup_exception(&s->complete, chunk);
-		if (e)
-			remap_exception(s, e, bio);
-		else
-			bio->bi_bdev = s->origin->bdev;
-
-		up_read(&s->lock);
-	}
-
+ out_unlock:
+	up_write(&s->lock);
+ out:
 	return r;
 }
 

Date: Thu, 21 Sep 2006 16:47:40 +0100
From: Alasdair G Kergon <agk@redhat.com>
Subject: [RHEL5 PATCH 13/30] dm snapshot: tidy pending_complete

This patch rearranges the pending_complete() code so that the
functional changes in subsequent patches are clearer.

By consolidating the error and the non-error paths, we
can move error_snapshot_bios() and __flush_bios() in line.

Index: linux-2.6.18.noarch/drivers/md/dm-snap.c
===================================================================
--- linux-2.6.18.noarch.orig/drivers/md/dm-snap.c
+++ linux-2.6.18.noarch/drivers/md/dm-snap.c
@@ -609,26 +609,6 @@ static void error_bios(struct bio *bio)
 	}
 }
 
-static inline void error_snapshot_bios(struct pending_exception *pe)
-{
-	error_bios(bio_list_get(&pe->snapshot_bios));
-}
-
-static struct bio *__flush_bios(struct pending_exception *pe)
-{
-	/*
-	 * If this pe is involved in a write to the origin and
-	 * it is the last sibling to complete then release
-	 * the bios for the original write to the origin.
-	 */
-
-	if (pe->primary_pe &&
-	    atomic_dec_and_test(&pe->primary_pe->sibling_count))
-		return bio_list_get(&pe->primary_pe->origin_bios);
-
-	return NULL;
-}
-
 static void __invalidate_snapshot(struct dm_snapshot *s,
 				struct pending_exception *pe, int err)
 {
@@ -656,16 +636,15 @@ static void pending_complete(struct pend
 	struct exception *e;
 	struct pending_exception *primary_pe;
 	struct dm_snapshot *s = pe->snap;
-	struct bio *flush = NULL;
+	struct bio *origin_bios = NULL;
+	struct bio *snapshot_bios = NULL;
+	int error = 0;
 
 	if (!success) {
 		/* Read/write error - snapshot is unusable */
 		down_write(&s->lock);
 		__invalidate_snapshot(s, pe, -EIO);
-		flush = __flush_bios(pe);
-		up_write(&s->lock);
-
-		error_snapshot_bios(pe);
+		error = 1;
 		goto out;
 	}
 
@@ -673,42 +652,40 @@ static void pending_complete(struct pend
 	if (!e) {
 		down_write(&s->lock);
 		__invalidate_snapshot(s, pe, -ENOMEM);
-		flush = __flush_bios(pe);
-		up_write(&s->lock);
-
-		error_snapshot_bios(pe);
+		error = 1;
 		goto out;
 	}
 	*e = pe->e;
 
-	/*
-	 * Add a proper exception, and remove the
-	 * in-flight exception from the list.
-	 */
 	down_write(&s->lock);
 	if (!s->valid) {
-		flush = __flush_bios(pe);
-		up_write(&s->lock);
-
 		free_exception(e);
-
-		error_snapshot_bios(pe);
+		error = 1;
 		goto out;
 	}
 
+	/*
+	 * Add a proper exception, and remove the
+	 * in-flight exception from the list.
+	 */
 	insert_exception(&s->complete, e);
 	remove_exception(&pe->e);
-	flush = __flush_bios(pe);
-
-	up_write(&s->lock);
-
-	/* Submit any pending write bios */
-	flush_bios(bio_list_get(&pe->snapshot_bios));
 
  out:
+	snapshot_bios = bio_list_get(&pe->snapshot_bios);
+
 	primary_pe = pe->primary_pe;
 
 	/*
+	 * If this pe is involved in a write to the origin and
+	 * it is the last sibling to complete then release
+	 * the bios for the original write to the origin.
+	 */
+	if (primary_pe &&
+	    atomic_dec_and_test(&primary_pe->sibling_count))
+		origin_bios = bio_list_get(&primary_pe->origin_bios);
+
+	/*
 	 * Free the pe if it's not linked to an origin write or if
 	 * it's not itself a primary pe.
 	 */
@@ -721,8 +698,15 @@ static void pending_complete(struct pend
 	if (primary_pe && !atomic_read(&primary_pe->sibling_count))
 		free_pending_exception(primary_pe);
 
-	if (flush)
-		flush_bios(flush);
+	up_write(&s->lock);
+
+	/* Submit any pending write bios */
+	if (error)
+		error_bios(snapshot_bios);
+	else
+		flush_bios(snapshot_bios);
+
+	flush_bios(origin_bios);
 }
 
 static void commit_callback(void *context, int success)

Date: Thu, 21 Sep 2006 16:47:55 +0100
From: Alasdair G Kergon <agk@redhat.com>
Subject: [RHEL5 PATCH 14/30] dm snapshot: add workqueue

Add a workqueue so that I/O can be queued up to be flushed from a
separate thread (e.g. if local interrupts are disabled).

A new per-snapshot spinlock pe_lock is introduced to protect
queued_bios.

Index: linux-2.6.18.noarch/drivers/md/dm-snap.c
===================================================================
--- linux-2.6.18.noarch.orig/drivers/md/dm-snap.c
+++ linux-2.6.18.noarch/drivers/md/dm-snap.c
@@ -39,6 +39,9 @@
  */
 #define SNAPSHOT_PAGES 256
 
+struct workqueue_struct *ksnapd;
+static void flush_queued_bios(void *data);
+
 struct pending_exception {
 	struct exception e;
 
@@ -488,6 +491,7 @@ static int snapshot_ctr(struct dm_target
 	s->active = 0;
 	s->last_percent = 0;
 	init_rwsem(&s->lock);
+	spin_lock_init(&s->pe_lock);
 	s->table = ti->table;
 
 	/* Allocate hash table for COW data */
@@ -523,6 +527,9 @@ static int snapshot_ctr(struct dm_target
 		goto bad6;
 	}
 
+	bio_list_init(&s->queued_bios);
+	INIT_WORK(&s->queued_bios_work, flush_queued_bios, s);
+
 	/* Add snapshot to the list of snapshots for this origin */
 	/* Exceptions aren't triggered till snapshot_resume() is called */
 	if (register_snapshot(s)) {
@@ -561,6 +568,8 @@ static void snapshot_dtr(struct dm_targe
 {
 	struct dm_snapshot *s = (struct dm_snapshot *) ti->private;
 
+	flush_workqueue(ksnapd);
+
 	/* Prevent further origin writes from using this snapshot. */
 	/* After this returns there can be no new kcopyd jobs. */
 	unregister_snapshot(s);
@@ -594,6 +603,19 @@ static void flush_bios(struct bio *bio)
 	}
 }
 
+static void flush_queued_bios(void *data)
+{
+	struct dm_snapshot *s = (struct dm_snapshot *) data;
+	struct bio *queued_bios;
+	unsigned long flags;
+
+	spin_lock_irqsave(&s->pe_lock, flags);
+	queued_bios = bio_list_get(&s->queued_bios);
+	spin_unlock_irqrestore(&s->pe_lock, flags);
+
+	flush_bios(queued_bios);
+}
+
 /*
  * Error a list of buffers.
  */
@@ -1240,8 +1262,17 @@ static int __init dm_snapshot_init(void)
 		goto bad5;
 	}
 
+	ksnapd = create_singlethread_workqueue("ksnapd");
+	if (!ksnapd) {
+		DMERR("Failed to create ksnapd workqueue.");
+		r = -ENOMEM;
+		goto bad6;
+	}
+
 	return 0;
 
+      bad6:
+	mempool_destroy(pending_pool);
       bad5:
 	kmem_cache_destroy(pending_cache);
       bad4:
@@ -1259,6 +1290,8 @@ static void __exit dm_snapshot_exit(void
 {
 	int r;
 
+	destroy_workqueue(ksnapd);
+
 	r = dm_unregister_target(&snapshot_target);
 	if (r)
 		DMERR("snapshot unregister failed %d", r);
Index: linux-2.6.18.noarch/drivers/md/dm-snap.h
===================================================================
--- linux-2.6.18.noarch.orig/drivers/md/dm-snap.h
+++ linux-2.6.18.noarch/drivers/md/dm-snap.h
@@ -10,7 +10,9 @@
 #define DM_SNAPSHOT_H
 
 #include "dm.h"
+#include "dm-bio-list.h"
 #include <linux/blkdev.h>
+#include <linux/workqueue.h>
 
 struct exception_table {
 	uint32_t hash_mask;
@@ -112,10 +114,20 @@ struct dm_snapshot {
 	struct exception_table pending;
 	struct exception_table complete;
 
+	/*
+	 * pe_lock protects all pending_exception operations and access
+	 * as well as the snapshot_bios list.
+	 */
+	spinlock_t pe_lock;
+
 	/* The on disk metadata handler */
 	struct exception_store store;
 
 	struct kcopyd_client *kcopyd_client;
+
+	/* Queue of snapshot writes for ksnapd to flush */
+	struct bio_list queued_bios;
+	struct work_struct queued_bios_work;
 };
 
 /*

Date: Thu, 21 Sep 2006 16:48:18 +0100
From: Alasdair G Kergon <agk@redhat.com>
Subject: [RHEL5 PATCH 15/30] dm snapshot: tidy pe ref counting

Rename sibling_count to ref_count and introduce get and put functions.

Index: linux-2.6.18.noarch/drivers/md/dm-snap.c
===================================================================
--- linux-2.6.18.noarch.orig/drivers/md/dm-snap.c
+++ linux-2.6.18.noarch/drivers/md/dm-snap.c
@@ -59,7 +59,7 @@ struct pending_exception {
 
 	/*
 	 * The primary pending_exception is the one that holds
-	 * the sibling_count and the list of origin_bios for a
+	 * the ref_count and the list of origin_bios for a
 	 * group of pending_exceptions.  It is always last to get freed.
 	 * These fields get set up when writing to the origin.
 	 */
@@ -72,7 +72,7 @@ struct pending_exception {
 	 * the sibling concerned and not pe->primary_pe->snap->lock unless
 	 * they are the same.
 	 */
-	atomic_t sibling_count;
+	atomic_t ref_count;
 
 	/* Pointer back to snapshot context */
 	struct dm_snapshot *snap;
@@ -653,10 +653,46 @@ static void __invalidate_snapshot(struct
 	dm_table_event(s->table);
 }
 
+static void get_pending_exception(struct pending_exception *pe)
+{
+	atomic_inc(&pe->ref_count);
+}
+
+static struct bio *put_pending_exception(struct pending_exception *pe)
+{
+	struct pending_exception *primary_pe;
+	struct bio *origin_bios = NULL;
+
+	primary_pe = pe->primary_pe;
+
+	/*
+	 * If this pe is involved in a write to the origin and
+	 * it is the last sibling to complete then release
+	 * the bios for the original write to the origin.
+	 */
+	if (primary_pe &&
+	    atomic_dec_and_test(&primary_pe->ref_count))
+		origin_bios = bio_list_get(&primary_pe->origin_bios);
+
+	/*
+	 * Free the pe if it's not linked to an origin write or if
+	 * it's not itself a primary pe.
+	 */
+	if (!primary_pe || primary_pe != pe)
+		free_pending_exception(pe);
+
+	/*
+	 * Free the primary pe if nothing references it.
+	 */
+	if (primary_pe && !atomic_read(&primary_pe->ref_count))
+		free_pending_exception(primary_pe);
+
+	return origin_bios;
+}
+
 static void pending_complete(struct pending_exception *pe, int success)
 {
 	struct exception *e;
-	struct pending_exception *primary_pe;
 	struct dm_snapshot *s = pe->snap;
 	struct bio *origin_bios = NULL;
 	struct bio *snapshot_bios = NULL;
@@ -695,30 +731,7 @@ static void pending_complete(struct pend
 
  out:
 	snapshot_bios = bio_list_get(&pe->snapshot_bios);
-
-	primary_pe = pe->primary_pe;
-
-	/*
-	 * If this pe is involved in a write to the origin and
-	 * it is the last sibling to complete then release
-	 * the bios for the original write to the origin.
-	 */
-	if (primary_pe &&
-	    atomic_dec_and_test(&primary_pe->sibling_count))
-		origin_bios = bio_list_get(&primary_pe->origin_bios);
-
-	/*
-	 * Free the pe if it's not linked to an origin write or if
-	 * it's not itself a primary pe.
-	 */
-	if (!primary_pe || primary_pe != pe)
-		free_pending_exception(pe);
-
-	/*
-	 * Free the primary pe if nothing references it.
-	 */
-	if (primary_pe && !atomic_read(&primary_pe->sibling_count))
-		free_pending_exception(primary_pe);
+	origin_bios = put_pending_exception(pe);
 
 	up_write(&s->lock);
 
@@ -829,7 +842,7 @@ __find_pending_exception(struct dm_snaps
 	bio_list_init(&pe->origin_bios);
 	bio_list_init(&pe->snapshot_bios);
 	pe->primary_pe = NULL;
-	atomic_set(&pe->sibling_count, 1);
+	atomic_set(&pe->ref_count, 0);
 	pe->snap = s;
 	pe->started = 0;
 
@@ -838,6 +851,7 @@ __find_pending_exception(struct dm_snaps
 		return NULL;
 	}
 
+	get_pending_exception(pe);
 	insert_exception(&s->pending, &pe->e);
 
  out:
@@ -1012,7 +1026,7 @@ static int __origin_write(struct list_he
 		 * is already remapped in this snapshot
 		 * and trigger an exception if not.
 		 *
-		 * sibling_count is initialised to 1 so pending_complete()
+		 * ref_count is initialised to 1 so pending_complete()
 		 * won't destroy the primary_pe while we're inside this loop.
 		 */
 		e = lookup_exception(&snap->complete, chunk);
@@ -1043,8 +1057,8 @@ static int __origin_write(struct list_he
 		}
 
 		if (!pe->primary_pe) {
-			atomic_inc(&primary_pe->sibling_count);
 			pe->primary_pe = primary_pe;
+			get_pending_exception(primary_pe);
 		}
 
 		if (!pe->started) {
@@ -1057,20 +1071,20 @@ static int __origin_write(struct list_he
 	}
 
 	if (!primary_pe)
-		goto out;
+		return r;
 
 	/*
 	 * If this is the first time we're processing this chunk and
-	 * sibling_count is now 1 it means all the pending exceptions
+	 * ref_count is now 1 it means all the pending exceptions
 	 * got completed while we were in the loop above, so it falls to
 	 * us here to remove the primary_pe and submit any origin_bios.
 	 */
 
-	if (first && atomic_dec_and_test(&primary_pe->sibling_count)) {
+	if (first && atomic_dec_and_test(&primary_pe->ref_count)) {
 		flush_bios(bio_list_get(&primary_pe->origin_bios));
 		free_pending_exception(primary_pe);
 		/* If we got here, pe_queue is necessarily empty. */
-		goto out;
+		return r;
 	}
 
 	/*
@@ -1079,7 +1093,6 @@ static int __origin_write(struct list_he
 	list_for_each_entry_safe(pe, next_pe, &pe_queue, list)
 		start_copy(pe);
 
- out:
 	return r;
 }
 

Date: Thu, 21 Sep 2006 16:48:35 +0100
From: Alasdair G Kergon <agk@redhat.com>
Subject: [RHEL5 PATCH 16/30] dm snapshot: fix freeing pending exception

If a snapshot became invalid while there are outstanding pending_exceptions,
when pending_complete() processes each one it forgets to remove the
corresponding exception from its exception table before freeing it.

Fix this by moving the 'out:' label up one statement so that
remove_exception() is always called.  Then __invalidate_exception() no
longer needs to call it and its 'pe' argument become superfluous.

Index: linux-2.6.18.noarch/drivers/md/dm-snap.c
===================================================================
--- linux-2.6.18.noarch.orig/drivers/md/dm-snap.c
+++ linux-2.6.18.noarch/drivers/md/dm-snap.c
@@ -631,8 +631,7 @@ static void error_bios(struct bio *bio)
 	}
 }
 
-static void __invalidate_snapshot(struct dm_snapshot *s,
-				struct pending_exception *pe, int err)
+static void __invalidate_snapshot(struct dm_snapshot *s, int err)
 {
 	if (!s->valid)
 		return;
@@ -642,9 +641,6 @@ static void __invalidate_snapshot(struct
 	else if (err == -ENOMEM)
 		DMERR("Invalidating snapshot: Unable to allocate exception.");
 
-	if (pe)
-		remove_exception(&pe->e);
-
 	if (s->store.drop_snapshot)
 		s->store.drop_snapshot(&s->store);
 
@@ -701,7 +697,7 @@ static void pending_complete(struct pend
 	if (!success) {
 		/* Read/write error - snapshot is unusable */
 		down_write(&s->lock);
-		__invalidate_snapshot(s, pe, -EIO);
+		__invalidate_snapshot(s, -EIO);
 		error = 1;
 		goto out;
 	}
@@ -709,7 +705,7 @@ static void pending_complete(struct pend
 	e = alloc_exception();
 	if (!e) {
 		down_write(&s->lock);
-		__invalidate_snapshot(s, pe, -ENOMEM);
+		__invalidate_snapshot(s, -ENOMEM);
 		error = 1;
 		goto out;
 	}
@@ -727,9 +723,9 @@ static void pending_complete(struct pend
 	 * in-flight exception from the list.
 	 */
 	insert_exception(&s->complete, e);
-	remove_exception(&pe->e);
 
  out:
+	remove_exception(&pe->e);
 	snapshot_bios = bio_list_get(&pe->snapshot_bios);
 	origin_bios = put_pending_exception(pe);
 
@@ -909,7 +905,7 @@ static int snapshot_map(struct dm_target
 	if (bio_rw(bio) == WRITE) {
 		pe = __find_pending_exception(s, bio);
 		if (!pe) {
-			__invalidate_snapshot(s, pe, -ENOMEM);
+			__invalidate_snapshot(s, -ENOMEM);
 			r = -EIO;
 			goto out_unlock;
 		}
@@ -1035,7 +1031,7 @@ static int __origin_write(struct list_he
 
 		pe = __find_pending_exception(snap, bio);
 		if (!pe) {
-			__invalidate_snapshot(snap, pe, -ENOMEM);
+			__invalidate_snapshot(snap, -ENOMEM);
 			goto next_snapshot;
 		}