Sophie: kernel-2.6.18-194.11.1.el5 src

kernel-2.6.18-194.11.1.el5.src.rpm

From: Benjamin Marzinski <bmarzins@redhat.com>
Date: Tue, 12 Aug 2008 13:49:19 -0500
Subject: [gfs2] d_rwdirectempty fails with short read
Message-id: 20080812184919.GB4354@ether.msp.redhat.com
O-Subject: [RHEL 5.3 PATCH] GFS2 - bz #456453: GFS2: d_rwdirectempty fails with short read
Bugzilla: 456453
RH-Acked-by: Steven Whitehouse <swhiteho@redhat.com>
RH-Acked-by: Bob Peterson <rpeterso@redhat.com>

BZ#456453
https://bugzilla.redhat.com/show_bug.cgi?id=456453

When you run GFS2 in clustered mode, the i_size field of an inode does
not reflect the changes caused by writes from another node until you
lock the file for reading.  However, do_generic_mapping_read() checks
the i_size before GFS2 acquires its lock. This causes reads to the
region grown by writes from the other node to fail.

This works on upstream kernels because of a patch that was applied to
fix a read/truncate race.

http://git.kernel.org/gitweb.cgi?p=linux/kernel/git/torvalds/linux-2.6.git;a=commit;h=a32ea1e1f925399e0d81ca3f7394a44a6dafa12c

This patch moves the i_size check in do_generic_mapping_read() until
after it has an uptodate page (which means GFS2 will have acquired its
lock).  However, this patch caused a regression in ntfs which was fixed
by a later patch.

http://git.kernel.org/gitweb.cgi?p=linux/kernel/git/torvalds/linux-2.6.git;a=commit;h=ebab89909e0dc716282d5e7f6e73a3155fe66d4a

That attached patch is a trivial port of both of these to RHEL5.

-Ben

Signed-off-by: Benjamin Marzinski <bmarzins@redhat.com>
--
 fs/ntfs/aops.c     |   10 ++++++-
 fs/ntfs/attrib.c   |    5 ---
 fs/ntfs/compress.c |   10 +++++++
 mm/filemap.c       |   72 ++++++++++++++++++++---------------------------------
 4 files changed, 48 insertions(+), 49 deletions(-)

diff --git a/fs/ntfs/aops.c b/fs/ntfs/aops.c
index bc579bf..8bb058f 100644
--- a/fs/ntfs/aops.c
+++ b/fs/ntfs/aops.c
@@ -410,6 +410,15 @@ static int ntfs_readpage(struct file *file, struct page *page)
 
 retry_readpage:
 	BUG_ON(!PageLocked(page));
+	vi = page->mapping->host;
+	i_size = i_size_read(vi);
+	/* Is the page fully outside i_size? (truncate in progress) */
+	if (unlikely(page->index >= (i_size + PAGE_CACHE_SIZE - 1) >>
+			PAGE_CACHE_SHIFT)) {
+		zero_user_page(page, 0, PAGE_CACHE_SIZE, KM_USER0);
+		ntfs_debug("Read outside i_size - truncated?");
+		goto done;
+	}
 	/*
 	 * This can potentially happen because we clear PageUptodate() during
 	 * ntfs_writepage() of MstProtected() attributes.
@@ -418,7 +427,6 @@ retry_readpage:
 		unlock_page(page);
 		return 0;
 	}
-	vi = page->mapping->host;
 	ni = NTFS_I(vi);
 	/*
 	 * Only $DATA attributes can be encrypted and only unnamed $DATA
diff --git a/fs/ntfs/attrib.c b/fs/ntfs/attrib.c
index 6708e1d..3b540ac 100644
--- a/fs/ntfs/attrib.c
+++ b/fs/ntfs/attrib.c
@@ -179,10 +179,7 @@ int ntfs_map_runlist_nolock(ntfs_inode *ni, VCN vcn, ntfs_attr_search_ctx *ctx)
 	 * ntfs_mapping_pairs_decompress() fails.
 	 */
 	end_vcn = sle64_to_cpu(a->data.non_resident.highest_vcn) + 1;
-	if (!a->data.non_resident.lowest_vcn && end_vcn == 1)
-		end_vcn = sle64_to_cpu(a->data.non_resident.allocated_size) >>
-				ni->vol->cluster_size_bits;
-	if (unlikely(vcn >= end_vcn)) {
+	if (unlikely(vcn && vcn >= end_vcn)) {
 		err = -ENOENT;
 		goto err_out;
 	}
diff --git a/fs/ntfs/compress.c b/fs/ntfs/compress.c
index 68a607f..31d2174 100644
--- a/fs/ntfs/compress.c
+++ b/fs/ntfs/compress.c
@@ -561,6 +561,16 @@ int ntfs_read_compressed_block(struct page *page)
 	read_unlock_irqrestore(&ni->size_lock, flags);
 	max_page = ((i_size + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT) -
 			offset;
+	/* Is the page fully outside i_size? (truncate in progress) */
+	if (xpage >= max_page) {
+		kfree(bhs);
+		kfree(pages);
+		zero_user_page(page, 0, PAGE_CACHE_SIZE, KM_USER0);
+		ntfs_debug("Compressed read outside i_size - truncated?");
+		SetPageUptodate(page);
+		unlock_page(page);
+		return 0;
+	}
 	if (nr_pages < max_page)
 		max_page = nr_pages;
 	for (i = 0; i < max_page; i++, offset++) {
diff --git a/mm/filemap.c b/mm/filemap.c
index 6557fbe..f5622e5 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -908,12 +908,10 @@ void do_generic_mapping_read(struct address_space *mapping,
 {
 	struct inode *inode = mapping->host;
 	unsigned long index;
-	unsigned long end_index;
 	unsigned long offset;
 	unsigned long last_index;
 	unsigned long next_index;
 	unsigned long prev_index;
-	loff_t isize;
 	struct page *cached_page;
 	int error;
 	struct file_ra_state ra = *_ra;
@@ -925,27 +923,12 @@ void do_generic_mapping_read(struct address_space *mapping,
 	last_index = (*ppos + desc->count + PAGE_CACHE_SIZE-1) >> PAGE_CACHE_SHIFT;
 	offset = *ppos & ~PAGE_CACHE_MASK;
 
-	isize = i_size_read(inode);
-	if (!isize)
-		goto out;
-
-	end_index = (isize - 1) >> PAGE_CACHE_SHIFT;
 	for (;;) {
 		struct page *page;
+		unsigned long end_index;
+		loff_t isize;
 		unsigned long nr, ret;
 
-		/* nr is the maximum number of bytes to copy from this page */
-		nr = PAGE_CACHE_SIZE;
-		if (index >= end_index) {
-			if (index > end_index)
-				goto out;
-			nr = ((isize - 1) & ~PAGE_CACHE_MASK) + 1;
-			if (nr <= offset) {
-				goto out;
-			}
-		}
-		nr = nr - offset;
-
 		cond_resched();
 		if (index == next_index)
 			next_index = page_cache_readahead(mapping, &ra, filp,
@@ -970,6 +953,32 @@ find_page:
 			goto page_not_up_to_date;
 		}
 page_ok:
+		/*
+		 * i_size must be checked after we know the page is Uptodate.
+		 *
+		 * Checking i_size after the check allows us to calculate
+		 * the correct value for "nr", which means the zero-filled
+		 * part of the page is not copied back to userspace (unless
+		 * another truncate extends the file - this is desired though).
+		 */
+
+		isize = i_size_read(inode);
+		end_index = (isize - 1) >> PAGE_CACHE_SHIFT;
+		if (unlikely(!isize || index > end_index)) {
+			page_cache_release(page);
+			goto out;
+		}
+
+		/* nr is the maximum number of bytes to copy from this page */
+		nr = PAGE_CACHE_SIZE;
+		if (index == end_index) {
+			nr = ((isize - 1) & ~PAGE_CACHE_MASK) + 1;
+			if (nr <= offset) {
+				page_cache_release(page);
+				goto out;
+			}
+		}
+		nr = nr - offset;
 
 		/* If users can be writing to this page using arbitrary
 		 * virtual addresses, take care about potential aliasing
@@ -1054,31 +1063,6 @@ readpage:
 			unlock_page(page);
 		}
 
-		/*
-		 * i_size must be checked after we have done ->readpage.
-		 *
-		 * Checking i_size after the readpage allows us to calculate
-		 * the correct value for "nr", which means the zero-filled
-		 * part of the page is not copied back to userspace (unless
-		 * another truncate extends the file - this is desired though).
-		 */
-		isize = i_size_read(inode);
-		end_index = (isize - 1) >> PAGE_CACHE_SHIFT;
-		if (unlikely(!isize || index > end_index)) {
-			page_cache_release(page);
-			goto out;
-		}
-
-		/* nr is the maximum number of bytes to copy from this page */
-		nr = PAGE_CACHE_SIZE;
-		if (index == end_index) {
-			nr = ((isize - 1) & ~PAGE_CACHE_MASK) + 1;
-			if (nr <= offset) {
-				page_cache_release(page);
-				goto out;
-			}
-		}
-		nr = nr - offset;
 		goto page_ok;
 
 readpage_error: