From: Benjamin Marzinski <bmarzins@redhat.com> Date: Tue, 12 Aug 2008 13:49:19 -0500 Subject: [gfs2] d_rwdirectempty fails with short read Message-id: 20080812184919.GB4354@ether.msp.redhat.com O-Subject: [RHEL 5.3 PATCH] GFS2 - bz #456453: GFS2: d_rwdirectempty fails with short read Bugzilla: 456453 RH-Acked-by: Steven Whitehouse <swhiteho@redhat.com> RH-Acked-by: Bob Peterson <rpeterso@redhat.com> BZ#456453 https://bugzilla.redhat.com/show_bug.cgi?id=456453 When you run GFS2 in clustered mode, the i_size field of an inode does not reflect the changes caused by writes from another node until you lock the file for reading. However, do_generic_mapping_read() checks the i_size before GFS2 acquires its lock. This causes reads to the region grown by writes from the other node to fail. This works on upstream kernels because of a patch that was applied to fix a read/truncate race. http://git.kernel.org/gitweb.cgi?p=linux/kernel/git/torvalds/linux-2.6.git;a=commit;h=a32ea1e1f925399e0d81ca3f7394a44a6dafa12c This patch moves the i_size check in do_generic_mapping_read() until after it has an uptodate page (which means GFS2 will have acquired its lock). However, this patch caused a regression in ntfs which was fixed by a later patch. http://git.kernel.org/gitweb.cgi?p=linux/kernel/git/torvalds/linux-2.6.git;a=commit;h=ebab89909e0dc716282d5e7f6e73a3155fe66d4a That attached patch is a trivial port of both of these to RHEL5. -Ben Signed-off-by: Benjamin Marzinski <bmarzins@redhat.com> -- fs/ntfs/aops.c | 10 ++++++- fs/ntfs/attrib.c | 5 --- fs/ntfs/compress.c | 10 +++++++ mm/filemap.c | 72 ++++++++++++++++++++--------------------------------- 4 files changed, 48 insertions(+), 49 deletions(-) diff --git a/fs/ntfs/aops.c b/fs/ntfs/aops.c index bc579bf..8bb058f 100644 --- a/fs/ntfs/aops.c +++ b/fs/ntfs/aops.c @@ -410,6 +410,15 @@ static int ntfs_readpage(struct file *file, struct page *page) retry_readpage: BUG_ON(!PageLocked(page)); + vi = page->mapping->host; + i_size = i_size_read(vi); + /* Is the page fully outside i_size? (truncate in progress) */ + if (unlikely(page->index >= (i_size + PAGE_CACHE_SIZE - 1) >> + PAGE_CACHE_SHIFT)) { + zero_user_page(page, 0, PAGE_CACHE_SIZE, KM_USER0); + ntfs_debug("Read outside i_size - truncated?"); + goto done; + } /* * This can potentially happen because we clear PageUptodate() during * ntfs_writepage() of MstProtected() attributes. @@ -418,7 +427,6 @@ retry_readpage: unlock_page(page); return 0; } - vi = page->mapping->host; ni = NTFS_I(vi); /* * Only $DATA attributes can be encrypted and only unnamed $DATA diff --git a/fs/ntfs/attrib.c b/fs/ntfs/attrib.c index 6708e1d..3b540ac 100644 --- a/fs/ntfs/attrib.c +++ b/fs/ntfs/attrib.c @@ -179,10 +179,7 @@ int ntfs_map_runlist_nolock(ntfs_inode *ni, VCN vcn, ntfs_attr_search_ctx *ctx) * ntfs_mapping_pairs_decompress() fails. */ end_vcn = sle64_to_cpu(a->data.non_resident.highest_vcn) + 1; - if (!a->data.non_resident.lowest_vcn && end_vcn == 1) - end_vcn = sle64_to_cpu(a->data.non_resident.allocated_size) >> - ni->vol->cluster_size_bits; - if (unlikely(vcn >= end_vcn)) { + if (unlikely(vcn && vcn >= end_vcn)) { err = -ENOENT; goto err_out; } diff --git a/fs/ntfs/compress.c b/fs/ntfs/compress.c index 68a607f..31d2174 100644 --- a/fs/ntfs/compress.c +++ b/fs/ntfs/compress.c @@ -561,6 +561,16 @@ int ntfs_read_compressed_block(struct page *page) read_unlock_irqrestore(&ni->size_lock, flags); max_page = ((i_size + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT) - offset; + /* Is the page fully outside i_size? (truncate in progress) */ + if (xpage >= max_page) { + kfree(bhs); + kfree(pages); + zero_user_page(page, 0, PAGE_CACHE_SIZE, KM_USER0); + ntfs_debug("Compressed read outside i_size - truncated?"); + SetPageUptodate(page); + unlock_page(page); + return 0; + } if (nr_pages < max_page) max_page = nr_pages; for (i = 0; i < max_page; i++, offset++) { diff --git a/mm/filemap.c b/mm/filemap.c index 6557fbe..f5622e5 100644 --- a/mm/filemap.c +++ b/mm/filemap.c @@ -908,12 +908,10 @@ void do_generic_mapping_read(struct address_space *mapping, { struct inode *inode = mapping->host; unsigned long index; - unsigned long end_index; unsigned long offset; unsigned long last_index; unsigned long next_index; unsigned long prev_index; - loff_t isize; struct page *cached_page; int error; struct file_ra_state ra = *_ra; @@ -925,27 +923,12 @@ void do_generic_mapping_read(struct address_space *mapping, last_index = (*ppos + desc->count + PAGE_CACHE_SIZE-1) >> PAGE_CACHE_SHIFT; offset = *ppos & ~PAGE_CACHE_MASK; - isize = i_size_read(inode); - if (!isize) - goto out; - - end_index = (isize - 1) >> PAGE_CACHE_SHIFT; for (;;) { struct page *page; + unsigned long end_index; + loff_t isize; unsigned long nr, ret; - /* nr is the maximum number of bytes to copy from this page */ - nr = PAGE_CACHE_SIZE; - if (index >= end_index) { - if (index > end_index) - goto out; - nr = ((isize - 1) & ~PAGE_CACHE_MASK) + 1; - if (nr <= offset) { - goto out; - } - } - nr = nr - offset; - cond_resched(); if (index == next_index) next_index = page_cache_readahead(mapping, &ra, filp, @@ -970,6 +953,32 @@ find_page: goto page_not_up_to_date; } page_ok: + /* + * i_size must be checked after we know the page is Uptodate. + * + * Checking i_size after the check allows us to calculate + * the correct value for "nr", which means the zero-filled + * part of the page is not copied back to userspace (unless + * another truncate extends the file - this is desired though). + */ + + isize = i_size_read(inode); + end_index = (isize - 1) >> PAGE_CACHE_SHIFT; + if (unlikely(!isize || index > end_index)) { + page_cache_release(page); + goto out; + } + + /* nr is the maximum number of bytes to copy from this page */ + nr = PAGE_CACHE_SIZE; + if (index == end_index) { + nr = ((isize - 1) & ~PAGE_CACHE_MASK) + 1; + if (nr <= offset) { + page_cache_release(page); + goto out; + } + } + nr = nr - offset; /* If users can be writing to this page using arbitrary * virtual addresses, take care about potential aliasing @@ -1054,31 +1063,6 @@ readpage: unlock_page(page); } - /* - * i_size must be checked after we have done ->readpage. - * - * Checking i_size after the readpage allows us to calculate - * the correct value for "nr", which means the zero-filled - * part of the page is not copied back to userspace (unless - * another truncate extends the file - this is desired though). - */ - isize = i_size_read(inode); - end_index = (isize - 1) >> PAGE_CACHE_SHIFT; - if (unlikely(!isize || index > end_index)) { - page_cache_release(page); - goto out; - } - - /* nr is the maximum number of bytes to copy from this page */ - nr = PAGE_CACHE_SIZE; - if (index == end_index) { - nr = ((isize - 1) & ~PAGE_CACHE_MASK) + 1; - if (nr <= offset) { - page_cache_release(page); - goto out; - } - } - nr = nr - offset; goto page_ok; readpage_error: