From: Mikulas Patocka <mpatocka@redhat.com> Date: Thu, 23 Apr 2009 01:11:27 -0400 Subject: [md] dm snapshot: refactor __find_pending_exception Message-id: Pine.LNX.4.64.0904230109590.12945@hs20-bc2-1.build.redhat.com O-Subject: [PATCH 2/2 RHEL5.4] bz496100 race conditions in snapshots Bugzilla: 496100 RH-Acked-by: Jonathan Brassow <jbrassow@redhat.com> The second patch --- two exception for the same chunk may be created. It won't cause crashed but it can cause incorrect data being returned. Mikulas Commits: 2913808eb56a6445a7b277eb8d17651c8defb035 c66213921c816f6b1b16a84911618ba9a363b134 35bf659b008e83e725dcd30f542e38461dbb867c We need to check if the exception was completed after dropping the lock. After regaining the lock, __find_pending_exception checks if the exception was already placed into &s->pending hash. But we don't check if the exception was already completed and placed into &s->complete hash. If the process waiting in alloc_pending_exception was delayed at this point because of a scheduling latency and the exception was meanwhile completed, we'd miss that and allocate another pending exception for already completed chunk. It would lead to a situation where two records for the same chunk exist and potential data corruption because multiple snapshot I/Os to the affected chunk could be redirected to different locations in the snapshot. Signed-off-by: Mikulas Patocka <mpatocka@redhat.com> diff --git a/drivers/md/dm-snap.c b/drivers/md/dm-snap.c index 121ca19..8fba1f6 100644 --- a/drivers/md/dm-snap.c +++ b/drivers/md/dm-snap.c @@ -965,6 +965,17 @@ static void start_copy(struct pending_exception *pe) &src, 1, &dest, 0, copy_callback, pe); } +static struct pending_exception * +__lookup_pending_exception(struct dm_snapshot *s, chunk_t chunk) +{ + struct exception *e = lookup_exception(&s->pending, chunk); + + if (!e) + return NULL; + + return container_of(e, struct pending_exception, e); +} + /* * Looks to see if this snapshot already has a pending exception * for this chunk, otherwise it allocates a new one and inserts @@ -974,40 +985,15 @@ static void start_copy(struct pending_exception *pe) * this. */ static struct pending_exception * -__find_pending_exception(struct dm_snapshot *s, struct bio *bio) +__find_pending_exception(struct dm_snapshot *s, + struct pending_exception *pe, chunk_t chunk) { - struct exception *e; - struct pending_exception *pe; - chunk_t chunk = sector_to_chunk(s, bio->bi_sector); - - /* - * Is there a pending exception for this already ? - */ - e = lookup_exception(&s->pending, chunk); - if (e) { - /* cast the exception to a pending exception */ - pe = container_of(e, struct pending_exception, e); - goto out; - } - - /* - * Create a new pending exception, we don't want - * to hold the lock while we do this. - */ - up_write(&s->lock); - pe = alloc_pending_exception(s); - down_write(&s->lock); - - if (!s->valid) { - free_pending_exception(pe); - return NULL; - } + struct pending_exception *pe2; - e = lookup_exception(&s->pending, chunk); - if (e) { + pe2 = __lookup_pending_exception(s, chunk); + if (pe2) { free_pending_exception(pe); - pe = container_of(e, struct pending_exception, e); - goto out; + return pe2; } pe->e.old_chunk = chunk; @@ -1025,7 +1011,6 @@ __find_pending_exception(struct dm_snapshot *s, struct bio *bio) get_pending_exception(pe); insert_exception(&s->pending, &pe->e); - out: return pe; } @@ -1076,11 +1061,31 @@ static int snapshot_map(struct dm_target *ti, struct bio *bio, * writeable. */ if (bio_rw(bio) == WRITE) { - pe = __find_pending_exception(s, bio); + pe = __lookup_pending_exception(s, chunk); if (!pe) { - __invalidate_snapshot(s, -ENOMEM); - r = -EIO; - goto out_unlock; + up_write(&s->lock); + pe = alloc_pending_exception(s); + down_write(&s->lock); + + if (!s->valid) { + free_pending_exception(pe); + r = -EIO; + goto out_unlock; + } + + e = lookup_exception(&s->complete, chunk); + if (e) { + free_pending_exception(pe); + remap_exception(s, e, bio, chunk); + goto out_unlock; + } + + pe = __find_pending_exception(s, pe, chunk); + if (!pe) { + __invalidate_snapshot(s, -ENOMEM); + r = -EIO; + goto out_unlock; + } } remap_exception(s, &pe->e, bio, chunk); @@ -1210,10 +1215,28 @@ static int __origin_write(struct list_head *snapshots, struct bio *bio) if (e) goto next_snapshot; - pe = __find_pending_exception(snap, bio); + pe = __lookup_pending_exception(snap, chunk); if (!pe) { - __invalidate_snapshot(snap, -ENOMEM); - goto next_snapshot; + up_write(&snap->lock); + pe = alloc_pending_exception(snap); + down_write(&snap->lock); + + if (!snap->valid) { + free_pending_exception(pe); + goto next_snapshot; + } + + e = lookup_exception(&snap->complete, chunk); + if (e) { + free_pending_exception(pe); + goto next_snapshot; + } + + pe = __find_pending_exception(snap, pe, chunk); + if (!pe) { + __invalidate_snapshot(snap, -ENOMEM); + goto next_snapshot; + } } if (!primary_pe) {