From: Bob Peterson <rpeterso@redhat.com> Date: Tue, 29 Apr 2008 17:07:37 -0500 Subject: [gfs2] inode indirect buffer corruption Message-id: 1209506857.3430.13.camel@technetium.msp.redhat.com O-Subject: [RHEL5.3 PATCH][GFS2] - bz345401: GFS2: inode indirect buffer corruption Bugzilla: 345401 RH-Acked-by: Steven Whitehouse <swhiteho@redhat.com> Hi, This patch fixes a GFS2 filesystem consistency error reported from function do_strip. The problem was caused by a timing window that allowed two vfs inodes to be created in memory that point to the same file. The problem is fixed by making the vfs's iget_test, iget_set mechanism in GFS2 check and set a new bit in the in-core gfs2_inode structure while the vfs inode spin_lock is held. This is in lieu of inode->i_private which can only be set after vfs inode initialization, which opens the timing window. This patch fixes bug #345401 and was tested on the "bigi" performance testing system. A nearly identical patch was sent upstream for inclusion there as well. See the bz record for more details. Regards, Bob Peterson Red Hat Clustering & GFS Signed-off-by: Bob Peterson <rpeterso@redhat.com> -- diff --git a/fs/gfs2/glops.c b/fs/gfs2/glops.c index 563f026..ee71475 100644 --- a/fs/gfs2/glops.c +++ b/fs/gfs2/glops.c @@ -306,7 +306,7 @@ static int inode_go_lock(struct gfs2_holder *gh) struct gfs2_inode *ip = gl->gl_object; int error = 0; - if (!ip) + if (!ip || (gh->gh_flags & GL_SKIP)) return 0; if (test_bit(GIF_INVALID, &ip->i_flags)) { diff --git a/fs/gfs2/incore.h b/fs/gfs2/incore.h index a434e90..27cafb6 100644 --- a/fs/gfs2/incore.h +++ b/fs/gfs2/incore.h @@ -239,6 +239,7 @@ enum { GIF_QD_LOCKED = 1, GIF_PAGED = 2, GIF_SW_PAGED = 3, + GIF_USER = 4, /* user inode, not metadata addr space */ }; struct gfs2_dinode_host { diff --git a/fs/gfs2/inode.c b/fs/gfs2/inode.c index 948baeb..fe50e63 100644 --- a/fs/gfs2/inode.c +++ b/fs/gfs2/inode.c @@ -48,8 +48,7 @@ static int iget_test(struct inode *inode, void *opaque) struct gfs2_inode *ip = GFS2_I(inode); u64 *no_addr = opaque; - if (ip->i_no_addr == *no_addr && - inode->i_private != NULL) + if (ip->i_no_addr == *no_addr && test_bit(GIF_USER, &ip->i_flags)) return 1; return 0; @@ -62,6 +61,7 @@ static int iget_set(struct inode *inode, void *opaque) inode->i_ino = (unsigned long)*no_addr; ip->i_no_addr = *no_addr; + set_bit(GIF_USER, &ip->i_flags); return 0; } @@ -87,7 +87,7 @@ static int iget_skip_test(struct inode *inode, void *opaque) struct gfs2_inode *ip = GFS2_I(inode); struct gfs2_skip_data *data = opaque; - if (ip->i_no_addr == data->no_addr && inode->i_private != NULL){ + if (ip->i_no_addr == data->no_addr && test_bit(GIF_USER, &ip->i_flags)){ if (inode->i_state & (I_FREEING|I_CLEAR|I_WILL_FREE)){ data->skipped = 1; return 0; @@ -106,6 +106,7 @@ static int iget_skip_set(struct inode *inode, void *opaque) return 1; inode->i_ino = (unsigned long)(data->no_addr); ip->i_no_addr = data->no_addr; + set_bit(GIF_USER, &ip->i_flags); return 0; } @@ -187,7 +188,6 @@ struct inode *gfs2_inode_lookup(struct super_block *sb, if (inode->i_state & I_NEW) { struct gfs2_sbd *sdp = GFS2_SB(inode); - inode->i_private = ip; ip->i_no_formal_ino = no_formal_ino; error = gfs2_glock_get(sdp, no_addr, &gfs2_inode_glops, CREATE, &ip->i_gl); diff --git a/fs/gfs2/meta_io.c b/fs/gfs2/meta_io.c index 232c1ff..cdcedf0 100644 --- a/fs/gfs2/meta_io.c +++ b/fs/gfs2/meta_io.c @@ -68,13 +68,15 @@ static const struct address_space_operations aspace_aops = { struct inode *gfs2_aspace_get(struct gfs2_sbd *sdp) { struct inode *aspace; + struct gfs2_inode *ip; aspace = new_inode(sdp->sd_vfs); if (aspace) { mapping_set_gfp_mask(aspace->i_mapping, GFP_NOFS); aspace->i_mapping->a_ops = &aspace_aops; aspace->i_size = ~0ULL; - aspace->i_private = NULL; + ip = GFS2_I(aspace); + clear_bit(GIF_USER, &ip->i_flags); insert_inode_hash(aspace); } return aspace; diff --git a/fs/gfs2/ops_super.c b/fs/gfs2/ops_super.c index c5e1b67..205e068 100644 --- a/fs/gfs2/ops_super.c +++ b/fs/gfs2/ops_super.c @@ -53,7 +53,7 @@ static int gfs2_write_inode(struct inode *inode, int sync) struct gfs2_inode *ip = GFS2_I(inode); /* Check this is a "normal" inode */ - if (inode->i_private) { + if (test_bit(GIF_USER, &ip->i_flags)) { if (current->flags & PF_MEMALLOC) return 0; if (sync) @@ -298,8 +298,9 @@ static int gfs2_remount_fs(struct super_block *sb, int *flags, char *data) */ static void gfs2_drop_inode(struct inode *inode) { - if (inode->i_private && inode->i_nlink) { - struct gfs2_inode *ip = GFS2_I(inode); + struct gfs2_inode *ip = GFS2_I(inode); + + if (test_bit(GIF_USER, &ip->i_flags) && inode->i_nlink) { struct gfs2_glock *gl = ip->i_iopen_gh.gh_gl; if (gl && test_bit(GLF_DEMOTE, &gl->gl_flags)) inode->i_nlink = 0; @@ -315,12 +316,13 @@ static void gfs2_drop_inode(struct inode *inode) static void gfs2_clear_inode(struct inode *inode) { + struct gfs2_inode *ip = GFS2_I(inode); + /* This tells us its a "real" inode and not one which only * serves to contain an address space (see rgrp.c, meta_io.c) * which therefore doesn't have its own glocks. */ - if (inode->i_private) { - struct gfs2_inode *ip = GFS2_I(inode); + if (test_bit(GIF_USER, &ip->i_flags)) { ip->i_gl->gl_object = NULL; gfs2_glock_schedule_for_reclaim(ip->i_gl); gfs2_glock_put(ip->i_gl); @@ -420,7 +422,7 @@ static void gfs2_delete_inode(struct inode *inode) struct gfs2_holder gh; int error; - if (!inode->i_private) + if (!test_bit(GIF_USER, &ip->i_flags)) goto out; error = gfs2_glock_nq_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, &gh);