Sophie

Sophie

distrib > Scientific%20Linux > 5x > x86_64 > by-pkgid > 27922b4260f65d317aabda37e42bbbff > files > 1812

kernel-2.6.18-238.el5.src.rpm

From: Mikulas Patocka <mpatocka@redhat.com>
Date: Thu, 23 Apr 2009 01:01:00 -0400
Subject: [md] snapshot: store damage
Message-id: Pine.LNX.4.64.0904230051460.12945@hs20-bc2-1.build.redhat.com
O-Subject: [PATCH RHEL5.4] bz496102 snapshot store damage
Bugzilla: 496102
RH-Acked-by: Jonathan Brassow <jbrassow@redhat.com>

Hi

This is a combination of 3 upstream patches. Updates to snapshot exception
store were misordered.

Snapshot exceptions are stored as a linear array in multiple chunks and
zero entry is used to mark the array end. When the chunk is completely
filled, it was written to disk and new zero-filled chunk was allocated and
writen. If the computer crashed between these two writes, the zero-entry
marker would be missing and it would load random exceptions on the next
reboot.

Testing: I tested basic snapshot functionality on a filesystem, I copied
and compared few directories. I also run a synthetic test that I made some
times ago. I didn't try to reproduce the bug because the timing window was
already very narrow.

Mikulas

A combined patch for commits in 2.6.28
a481db784682b33d078c7bf8a1d0581dc09946c1
7c9e6c17325fab380fbe9c9787680ff7d4a51abd
7acedc5b98a2fcff64f00c21110aae7d3ac2f7df

We must zero the next chunk on disk *before* writing out the current chunk, not
after. Otherwise if the machine crashes at the wrong time, the "end of metadata"
marker may be missing.

Signed-off-by: Mikulas Patocka

diff --git a/drivers/md/dm-exception-store.c b/drivers/md/dm-exception-store.c
index f6ce5ef..330f1e8 100644
--- a/drivers/md/dm-exception-store.c
+++ b/drivers/md/dm-exception-store.c
@@ -104,6 +104,11 @@ struct pstore {
 	void *area;
 
 	/*
+	 * An area of zeros used to clear the next area.
+	 */
+	void *zero_area;
+
+	/*
 	 * Used to keep track of which metadata area the data in
 	 * 'chunk' refers to.
 	 */
@@ -146,6 +151,13 @@ static int alloc_area(struct pstore *ps)
 	if (!ps->area)
 		return r;
 
+	ps->zero_area = vmalloc(len);
+	if (!ps->zero_area) {
+		vfree(ps->area);
+		return r;
+	}
+	memset(ps->zero_area, 0, len);
+
 	return 0;
 }
 
@@ -153,6 +165,8 @@ static void free_area(struct pstore *ps)
 {
 	vfree(ps->area);
 	ps->area = NULL;
+	vfree(ps->zero_area);
+	ps->zero_area = NULL;
 }
 
 /*
@@ -177,29 +191,52 @@ static int chunk_io(struct pstore *ps, uint32_t chunk, int rw)
 }
 
 /*
+ * Convert a metadata area index to a chunk index.
+ */
+static chunk_t area_location(struct pstore *ps, chunk_t area)
+{
+	return 1 + ((ps->exceptions_per_area + 1) * area);
+}
+
+/*
  * Read or write a metadata area.  Remembering to skip the first
  * chunk which holds the header.
  */
-static int area_io(struct pstore *ps, uint32_t area, int rw)
+static int area_io(struct pstore *ps, int rw)
 {
 	int r;
 	uint32_t chunk;
 
-	/* convert a metadata area index to a chunk index */
-	chunk = 1 + ((ps->exceptions_per_area + 1) * area);
+	chunk = area_location(ps, ps->current_area);
 
 	r = chunk_io(ps, chunk, rw);
 	if (r)
 		return r;
 
-	ps->current_area = area;
 	return 0;
 }
 
-static int zero_area(struct pstore *ps, uint32_t area)
+static void zero_memory_area(struct pstore *ps)
 {
 	memset(ps->area, 0, ps->snap->chunk_size << SECTOR_SHIFT);
-	return area_io(ps, area, WRITE);
+}
+
+static int zero_disk_area(struct pstore *ps, chunk_t area)
+{
+	struct io_region where = {
+		.bdev = ps->snap->cow->bdev,
+		.sector = ps->snap->chunk_size * area_location(ps, area),
+		.count = ps->snap->chunk_size,
+	};
+	struct dm_io_request io_req = {
+		.bi_rw = WRITE,
+		.mem.type = DM_IO_VMA,
+		.mem.ptr.vma = ps->zero_area,
+		.client = ps->io_client,
+		.notify.fn = NULL,
+	};
+
+	return dm_io(&io_req, 1, &where, NULL);
 }
 
 static int read_header(struct pstore *ps, int *new_snapshot)
@@ -372,15 +409,14 @@ static int insert_exceptions(struct pstore *ps, int *full)
 
 static int read_exceptions(struct pstore *ps)
 {
-	uint32_t area;
 	int r, full = 1;
 
 	/*
 	 * Keeping reading chunks and inserting exceptions until
 	 * we find a partially full area.
 	 */
-	for (area = 0; full; area++) {
-		r = area_io(ps, area, READ);
+	for (ps->current_area = 0; full; ps->current_area++) {
+		r = area_io(ps, READ);
 		if (r)
 			return r;
 
@@ -389,6 +425,8 @@ static int read_exceptions(struct pstore *ps)
 			return r;
 	}
 
+	ps->current_area--;
+
 	return 0;
 }
 
@@ -446,12 +484,13 @@ static int persistent_read_metadata(struct exception_store *store)
 			return r;
 		}
 
-		r = zero_area(ps, 0);
+		ps->current_area = 0;
+		zero_memory_area(ps);
+		r = zero_disk_area(ps, 0);
 		if (r) {
-			DMWARN("zero_area(0) failed");
+			DMWARN("zero_disk_area(0) failed");
 			return r;
 		}
-
 	} else {
 		/*
 		 * Sanity checks.
@@ -509,7 +548,6 @@ static void persistent_commit(struct exception_store *store,
 			      void (*callback) (void *, int success),
 			      void *callback_context)
 {
-	int r;
 	unsigned int i;
 	struct pstore *ps = get_info(store);
 	struct disk_exception de;
@@ -530,33 +568,41 @@ static void persistent_commit(struct exception_store *store,
 	cb->context = callback_context;
 
 	/*
-	 * If there are no more exceptions in flight, or we have
-	 * filled this metadata area we commit the exceptions to
-	 * disk.
+	 * If there are exceptions in flight and we have not yet
+	 * filled this metadata area there's nothing more to do.
 	 */
-	if (atomic_dec_and_test(&ps->pending_count) ||
-	    (ps->current_committed == ps->exceptions_per_area)) {
-		r = area_io(ps, ps->current_area, WRITE);
-		if (r)
-			ps->valid = 0;
+	if (!atomic_dec_and_test(&ps->pending_count) &&
+	    (ps->current_committed != ps->exceptions_per_area))
+		return;
 
-		/*
-		 * Have we completely filled the current area ?
-		 */
-		if (ps->current_committed == ps->exceptions_per_area) {
-			ps->current_committed = 0;
-			r = zero_area(ps, ps->current_area + 1);
-			if (r)
-				ps->valid = 0;
-		}
+	/*
+	 * If we completely filled the current area, then wipe the next one.
+	 */
+	if ((ps->current_committed == ps->exceptions_per_area) &&
+	     zero_disk_area(ps, ps->current_area + 1))
+		ps->valid = 0;
 
-		for (i = 0; i < ps->callback_count; i++) {
-			cb = ps->callbacks + i;
-			cb->callback(cb->context, r == 0 ? 1 : 0);
-		}
+	/*
+	 * Commit exceptions to disk.
+	 */
+	if (ps->valid && area_io(ps, WRITE))
+		ps->valid = 0;
+
+	/*
+	 * Advance to the next area if this one is full.
+	 */
+	if (ps->current_committed == ps->exceptions_per_area) {
+		ps->current_committed = 0;
+		ps->current_area++;
+		zero_memory_area(ps);
+	}
 
-		ps->callback_count = 0;
+	for (i = 0; i < ps->callback_count; i++) {
+		cb = ps->callbacks + i;
+		cb->callback(cb->context, ps->valid);
 	}
+
+	ps->callback_count = 0;
 }
 
 static void persistent_drop(struct exception_store *store)