Sophie

Sophie

distrib > Scientific%20Linux > 5x > x86_64 > by-pkgid > 27922b4260f65d317aabda37e42bbbff > files > 1015

kernel-2.6.18-238.el5.src.rpm

From: Eric Sandeen <sandeen@redhat.com>
Date: Tue, 19 May 2009 09:53:56 -0500
Subject: [fs] ext4: corruption fixes
Message-id: 4A12C804.9000700@redhat.com
O-Subject: [PATCH RHEL5.4] ext4: corruption fixes
Bugzilla: 501082
RH-Acked-by: Josef Bacik <josef@redhat.com>
RH-Acked-by: Edward Shishkin <edward@redhat.com>
RH-Acked-by: Rik van Riel <riel@redhat.com>

This is for [Bug 501082] RHEL5.4 ext4: backport corruption fixes from .30

A few last-minute fixes for ext4, all recently committed upstream:

9c1ee184a30394e54165fa4c15923cabd952c106 ext4: Fix sub-block zeroing for writes into preallocated extents
33b9817e2ae097c7b8d256e3510ac6c54fc6d9d0 ext4: Use a fake block number for delayed new buffer_head
2a8964d63d50dd2d65d71d342bc7fb6ef4117614 ext4: Clear the unwritten buffer_head flag after the extent is initialized
2ec0ae3acec47f628179ee95fe2c4da01b5e9fc4 ext4: Fix race in ext4_inode_info.i_cached_extent

The main bugs fixed here are:

1) a partial write into a preallocated block will expose garbage
from in the unwritten portion of the block
2) races between reading & setting the cached extent on the inode
can lead to a corrupt cached extent, and subsequent fs corruption

Thanks,
-Eric

diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c
index a90617c..fe1645d 100644
--- a/fs/ext4/extents.c
+++ b/fs/ext4/extents.c
@@ -1740,11 +1740,13 @@ ext4_ext_put_in_cache(struct inode *inode, ext4_lblk_t block,
 {
 	struct ext4_ext_cache *cex;
 	BUG_ON(len == 0);
+	spin_lock(&EXT4_I(inode)->i_block_reservation_lock);
 	cex = &EXT4_I(inode)->i_cached_extent;
 	cex->ec_type = type;
 	cex->ec_block = block;
 	cex->ec_len = len;
 	cex->ec_start = start;
+	spin_unlock(&EXT4_I(inode)->i_block_reservation_lock);
 }
 
 /*
@@ -1801,12 +1803,17 @@ ext4_ext_in_cache(struct inode *inode, ext4_lblk_t block,
 			struct ext4_extent *ex)
 {
 	struct ext4_ext_cache *cex;
+	int ret = EXT4_EXT_CACHE_NO;
 
+	/* 
+	 * We borrow i_block_reservation_lock to protect i_cached_extent
+	 */
+	spin_lock(&EXT4_I(inode)->i_block_reservation_lock);
 	cex = &EXT4_I(inode)->i_cached_extent;
 
 	/* has cache valid data? */
 	if (cex->ec_type == EXT4_EXT_CACHE_NO)
-		return EXT4_EXT_CACHE_NO;
+		goto errout;
 
 	BUG_ON(cex->ec_type != EXT4_EXT_CACHE_GAP &&
 			cex->ec_type != EXT4_EXT_CACHE_EXTENT);
@@ -1817,11 +1824,11 @@ ext4_ext_in_cache(struct inode *inode, ext4_lblk_t block,
 		ext_debug("%u cached by %u:%u:%llu\n",
 				block,
 				cex->ec_block, cex->ec_len, cex->ec_start);
-		return cex->ec_type;
+		ret = cex->ec_type;
 	}
-
-	/* not in cache */
-	return EXT4_EXT_CACHE_NO;
+errout:
+	spin_unlock(&EXT4_I(inode)->i_block_reservation_lock);
+	return ret;
 }
 
 /*
@@ -2778,6 +2785,8 @@ int ext4_ext_get_blocks(handle_t *handle, struct inode *inode,
 				if (allocated > max_blocks)
 					allocated = max_blocks;
 				set_buffer_unwritten(bh_result);
+				bh_result->b_bdev = inode->i_sb->s_bdev;
+				bh_result->b_blocknr = newblock;
 				goto out2;
 			}
 
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index 75546bd..34bcc08 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -1069,6 +1069,7 @@ int ext4_get_blocks_wrap(handle_t *handle, struct inode *inode, sector_t block,
 	int retval;
 
 	clear_buffer_mapped(bh);
+	clear_buffer_unwritten(bh);
 
 	/*
 	 * Try to see if we can get  the block without requesting
@@ -1099,6 +1100,18 @@ int ext4_get_blocks_wrap(handle_t *handle, struct inode *inode, sector_t block,
 		return retval;
 
 	/*
+	 * When we call get_blocks without the create flag, the
+	 * BH_Unwritten flag could have gotten set if the blocks
+	 * requested were part of a uninitialized extent.  We need to
+	 * clear this flag now that we are committed to convert all or
+	 * part of the uninitialized extent to be an initialized
+	 * extent.  This is because we need to avoid the combination
+	 * of BH_Unwritten and BH_Mapped flags being simultaneously
+	 * set on the buffer_head.
+	 */
+	clear_buffer_unwritten(bh);
+
+	/*
 	 * New blocks allocate and/or writing to uninitialized extent
 	 * will possibly result in updating i_data, so we take
 	 * the write lock of i_data_sem, and call get_blocks()
@@ -2215,6 +2228,10 @@ static int ext4_da_get_block_prep(struct inode *inode, sector_t iblock,
 				  struct buffer_head *bh_result, int create)
 {
 	int ret = 0;
+	sector_t invalid_block = ~((sector_t) 0xffff);
+
+	if (invalid_block < ext4_blocks_count(EXT4_SB(inode->i_sb)->s_es))
+		invalid_block = ~0;
 
 	BUG_ON(create == 0);
 	BUG_ON(bh_result->b_size != inode->i_sb->s_blocksize);
@@ -2236,11 +2253,18 @@ static int ext4_da_get_block_prep(struct inode *inode, sector_t iblock,
 			/* not enough space to reserve */
 			return ret;
 
-		map_bh(bh_result, inode->i_sb, 0);
+		map_bh(bh_result, inode->i_sb, invalid_block);
 		set_buffer_new(bh_result);
 		set_buffer_delay(bh_result);
 	} else if (ret > 0) {
 		bh_result->b_size = (ret << inode->i_blkbits);
+		/*
+		 * With sub-block writes into unwritten extents
+		 * we also need to mark the buffer as new so that
+		 * the unwritten parts of the buffer gets correctly zeroed.
+		 */
+		if (buffer_unwritten(bh_result))
+			set_buffer_new(bh_result);
 		ret = 0;
 	}