From: Milan Broz <mbroz@redhat.com> Subject: [RHEL 5.1 PATCH] dm crypt: fix possible data corruptions Date: Thu, 24 May 2007 20:28:01 +0200 Bugzilla: 241272 Message-Id: <4655D931.6030509@redhat.com> Changelog: [md] dm crypt: fix possible data corruptions RHEL5.1 device mapper crypt: fix possible data corruptions Resolves: rhbz#241272 Patches are upstream (in 2.6.22-rc) In the current stable kernel release are fixes for possible data corruption, this backport of these fixes to RHEL5 kernel. * disable barriers in dm-crypt because of current workqueue processing can reorder requests (dmcrypt code doesn't support barriers properly) * code must not access a bio after generic_make_request - there's no guarantee it still exists * must call clone_init as early as possible - at least before call bio_put(clone) in any error path. Otherwise, the destructor will try to dereference bi_private, which may still be NULL. * get rid of first_clone in dm-crypt, which is not really needed and can cause problems if we try to create a clone off first_clone after it has completed, and crypt_endio has destroyed its bvec. Kernel with fixes compiled and tested with consistency and load tests on dm-crypted drive. --- drivers/md/dm-crypt.c | 73 +++++++++++++++----------------------------------- 1 file changed, 23 insertions(+), 50 deletions(-) Index: linux-2.6.18/drivers/md/dm-crypt.c =================================================================== --- linux-2.6.18.orig/drivers/md/dm-crypt.c 2007-05-23 16:42:41.000000000 +0200 +++ linux-2.6.18/drivers/md/dm-crypt.c 2007-05-23 17:26:49.000000000 +0200 @@ -30,7 +30,6 @@ struct crypt_io { struct dm_target *target; struct bio *base_bio; - struct bio *first_clone; struct work_struct work; atomic_t pending; int error; @@ -99,6 +98,8 @@ struct crypt_config { static kmem_cache_t *_crypt_io_pool; +static void clone_init(struct crypt_io *, struct bio *); + /* * Different IV generation algorithms: * @@ -321,36 +322,21 @@ static int crypt_convert(struct crypt_co * This should never violate the device limitations * May return a smaller bio when running out of pages */ -static struct bio * -crypt_alloc_buffer(struct crypt_config *cc, unsigned int size, - struct bio *base_bio, unsigned int *bio_vec_idx) +static struct bio *crypt_alloc_buffer(struct crypt_io *io, unsigned int size) { + struct crypt_config *cc = io->target->private; struct bio *clone; unsigned int nr_iovecs = (size + PAGE_SIZE - 1) >> PAGE_SHIFT; gfp_t gfp_mask = GFP_NOIO | __GFP_HIGHMEM; unsigned int i; - if (base_bio) { - clone = bio_alloc_bioset(GFP_NOIO, base_bio->bi_max_vecs, cc->bs); - __bio_clone(clone, base_bio); - } else - clone = bio_alloc_bioset(GFP_NOIO, nr_iovecs, cc->bs); - + clone = bio_alloc_bioset(GFP_NOIO, nr_iovecs, cc->bs); if (!clone) return NULL; - clone->bi_destructor = dm_crypt_bio_destructor; - - /* if the last bio was not complete, continue where that one ended */ - clone->bi_idx = *bio_vec_idx; - clone->bi_vcnt = *bio_vec_idx; - clone->bi_size = 0; - clone->bi_flags &= ~(1 << BIO_SEG_VALID); - - /* clone->bi_idx pages have already been allocated */ - size -= clone->bi_idx * PAGE_SIZE; + clone_init(io, clone); - for (i = clone->bi_idx; i < nr_iovecs; i++) { + for (i = 0; i < nr_iovecs; i++) { struct bio_vec *bv = bio_iovec_idx(clone, i); bv->bv_page = mempool_alloc(cc->page_pool, gfp_mask); @@ -362,7 +348,7 @@ crypt_alloc_buffer(struct crypt_config * * return a partially allocated bio, the caller will then try * to allocate additional bios while submitting this partial bio */ - if ((i - clone->bi_idx) == (MIN_BIO_PAGES - 1)) + if (i == (MIN_BIO_PAGES - 1)) gfp_mask = (gfp_mask | __GFP_NOWARN) & ~__GFP_WAIT; bv->bv_offset = 0; @@ -381,12 +367,6 @@ crypt_alloc_buffer(struct crypt_config * return NULL; } - /* - * Remember the last bio_vec allocated to be able - * to correctly continue after the splitting. - */ - *bio_vec_idx = clone->bi_vcnt; - return clone; } @@ -438,9 +418,6 @@ static void dec_pending(struct crypt_io if (!atomic_dec_and_test(&io->pending)) return; - if (io->first_clone) - bio_put(io->first_clone); - bio_endio(io->base_bio, io->base_bio->bi_size, io->error); mempool_free(io, cc->io_pool); @@ -505,6 +482,7 @@ static void clone_init(struct crypt_io * clone->bi_end_io = crypt_endio; clone->bi_bdev = cc->dev->bdev; clone->bi_rw = io->base_bio->bi_rw; + clone->bi_destructor = dm_crypt_bio_destructor; } static void process_read(struct crypt_io *io) @@ -528,7 +506,6 @@ static void process_read(struct crypt_io } clone_init(io, clone); - clone->bi_destructor = dm_crypt_bio_destructor; clone->bi_idx = 0; clone->bi_vcnt = bio_segments(base_bio); clone->bi_size = base_bio->bi_size; @@ -547,7 +524,6 @@ static void process_write(struct crypt_i struct convert_context ctx; unsigned remaining = base_bio->bi_size; sector_t sector = base_bio->bi_sector - io->target->begin; - unsigned bvec_idx = 0; atomic_inc(&io->pending); @@ -558,14 +534,14 @@ static void process_write(struct crypt_i * so repeat the whole process until all the data can be handled. */ while (remaining) { - clone = crypt_alloc_buffer(cc, base_bio->bi_size, - io->first_clone, &bvec_idx); + clone = crypt_alloc_buffer(io, remaining); if (unlikely(!clone)) { dec_pending(io, -ENOMEM); return; } ctx.bio_out = clone; + ctx.idx_out = 0; if (unlikely(crypt_convert(cc, &ctx) < 0)) { crypt_free_buffer_pages(cc, clone, clone->bi_size); @@ -574,31 +550,26 @@ static void process_write(struct crypt_i return; } - clone_init(io, clone); - clone->bi_sector = cc->start + sector; - - if (!io->first_clone) { - /* - * hold a reference to the first clone, because it - * holds the bio_vec array and that can't be freed - * before all other clones are released - */ - bio_get(clone); - io->first_clone = clone; - } + /* crypt_convert should have filled the clone bio */ + BUG_ON(ctx.idx_out < clone->bi_vcnt); + clone->bi_sector = cc->start + sector; remaining -= clone->bi_size; sector += bio_sectors(clone); - /* prevent bio_put of first_clone */ + /* Grab another reference to the io struct + * before we kick off the request */ if (remaining) atomic_inc(&io->pending); generic_make_request(clone); + /* Do not reference clone after this - it + * may be gone already. */ + /* out of memory -> run queues */ if (remaining) - blk_congestion_wait(bio_data_dir(clone), HZ/100); + blk_congestion_wait(WRITE, HZ/100); } } @@ -902,10 +873,12 @@ static int crypt_map(struct dm_target *t struct crypt_config *cc = ti->private; struct crypt_io *io; + if (bio_barrier(bio)) + return -EOPNOTSUPP; + io = mempool_alloc(cc->io_pool, GFP_NOIO); io->target = ti; io->base_bio = bio; - io->first_clone = NULL; io->error = io->post_process = 0; atomic_set(&io->pending, 0); kcryptd_queue_io(io);