Sophie

Sophie

distrib > Scientific%20Linux > 5x > x86_64 > by-pkgid > 27922b4260f65d317aabda37e42bbbff > files > 1342

kernel-2.6.18-238.el5.src.rpm

From: Steven Whitehouse <swhiteho@redhat.com>
Subject: [RHEL 5.1] [GFS2] Missing lost inode recovery code (bz #201012)
Date: Mon, 11 Jun 2007 09:43:48 +0100
Bugzilla: 201012
Message-Id: <1181551428.25918.295.camel@quoit>
Changelog: [GFS2] Missing lost inode recovery code


Hi,

This is a combination of 5 upstream patches which fix bz #201012. This
should be applied to RHEL 5.1 after all the previously sent patches
(Don: in order words as per the patch ordering email I sent last week).

The various parts of this patch have been in the upstream -nmw git tree
for varying lengths of time and have been tested both separately and as
a whole. We've done some single node postmark runs to verify that the
changes don't cause any performance decrease as they are in a
performance critical path, as well as the clustered tests to ensure that
unlinked inodes are now being disposed of in a timely manner.

We believe that it is reasonable to do testing in the upstream -nmw git
tree since, so far as GFS2 is concerned its nearly identical to the
RHEL-5.1 tree. The differences are 1) the macros for adjusting i_nlink
(which are trivial no-ops anyway) and the first part of the patch for bz
#238162 which will be submitted to RHEL 5.1 as soon as possible.

Steve.

------------------------------------------------------------------------------
diff -Nru linux-rhel-base/fs/gfs2/glock.c linux-2.6.18.noarch/fs/gfs2/glock.c
--- linux-rhel-base/fs/gfs2/glock.c	2007-06-11 09:00:57.000000000 +0100
+++ linux-2.6.18.noarch/fs/gfs2/glock.c	2007-06-11 09:23:32.000000000 +0100
@@ -421,11 +421,11 @@
 static void gfs2_holder_wake(struct gfs2_holder *gh)
 {
 	clear_bit(HIF_WAIT, &gh->gh_iflags);
-	smp_mb();
+	smp_mb__after_clear_bit();
 	wake_up_bit(&gh->gh_iflags, HIF_WAIT);
 }
 
-static int holder_wait(void *word)
+static int just_schedule(void *word)
 {
         schedule();
         return 0;
@@ -434,7 +434,20 @@
 static void wait_on_holder(struct gfs2_holder *gh)
 {
 	might_sleep();
-	wait_on_bit(&gh->gh_iflags, HIF_WAIT, holder_wait, TASK_UNINTERRUPTIBLE);
+	wait_on_bit(&gh->gh_iflags, HIF_WAIT, just_schedule, TASK_UNINTERRUPTIBLE);
+}
+
+static void gfs2_demote_wake(struct gfs2_glock *gl)
+{
+        clear_bit(GLF_DEMOTE, &gl->gl_flags);
+        smp_mb__after_clear_bit();
+        wake_up_bit(&gl->gl_flags, GLF_DEMOTE);
+}
+
+static void wait_on_demote(struct gfs2_glock *gl)
+{
+	might_sleep();
+	wait_on_bit(&gl->gl_flags, GLF_DEMOTE, just_schedule, TASK_UNINTERRUPTIBLE);
 }
 
 /**
@@ -527,7 +540,7 @@
 
 	if (gl->gl_state == gl->gl_demote_state ||
 	    gl->gl_state == LM_ST_UNLOCKED) {
-		clear_bit(GLF_DEMOTE, &gl->gl_flags);
+		gfs2_demote_wake(gl);
 		return 0;
 	}
 	set_bit(GLF_LOCK, &gl->gl_flags);
@@ -665,12 +678,22 @@
  * practise: LM_ST_SHARED and LM_ST_UNLOCKED
  */
 
-static void handle_callback(struct gfs2_glock *gl, unsigned int state)
+static void handle_callback(struct gfs2_glock *gl, unsigned int state, int remote)
 {
 	spin_lock(&gl->gl_spin);
 	if (test_and_set_bit(GLF_DEMOTE, &gl->gl_flags) == 0) {
 		gl->gl_demote_state = state;
 		gl->gl_demote_time = jiffies;
+		if (remote && gl->gl_ops->go_type == LM_TYPE_IOPEN &&
+		    gl->gl_object) {
+			struct inode *inode = igrab(gl->gl_object);
+			spin_unlock(&gl->gl_spin);
+			if (inode) {
+				d_prune_aliases(inode);
+				iput(inode);
+			}
+			return;
+		}
 	} else if (gl->gl_demote_state != LM_ST_UNLOCKED) {
 		gl->gl_demote_state = state;
 	}
@@ -739,7 +762,7 @@
 		if (ret & LM_OUT_CANCELED)
 			op_done = 0;
 		else
-			clear_bit(GLF_DEMOTE, &gl->gl_flags);
+			gfs2_demote_wake(gl);
 	} else {
 		spin_lock(&gl->gl_spin);
 		list_del_init(&gh->gh_list);
@@ -847,7 +870,7 @@
 	gfs2_assert_warn(sdp, !ret);
 
 	state_change(gl, LM_ST_UNLOCKED);
-	clear_bit(GLF_DEMOTE, &gl->gl_flags);
+	gfs2_demote_wake(gl);
 
 	if (glops->go_inval)
 		glops->go_inval(gl, DIO_METADATA);
@@ -1173,7 +1196,7 @@
 	const struct gfs2_glock_operations *glops = gl->gl_ops;
 
 	if (gh->gh_flags & GL_NOCACHE)
-		handle_callback(gl, LM_ST_UNLOCKED);
+		handle_callback(gl, LM_ST_UNLOCKED, 0);
 
 	gfs2_glmutex_lock(gl);
 
@@ -1195,6 +1218,13 @@
 	spin_unlock(&gl->gl_spin);
 }
 
+void gfs2_glock_dq_wait(struct gfs2_holder *gh)
+{
+	struct gfs2_glock *gl = gh->gh_gl;
+	gfs2_glock_dq(gh);
+	wait_on_demote(gl);
+}
+
 /**
  * gfs2_glock_dq_uninit - dequeue a holder from a glock and initialize it
  * @gh: the holder structure
@@ -1455,7 +1485,7 @@
 	if (!gl)
 		return;
 
-	handle_callback(gl, state);
+	handle_callback(gl, state, 1);
 
 	spin_lock(&gl->gl_spin);
 	run_queue(gl);
@@ -1595,7 +1625,7 @@
 	if (gfs2_glmutex_trylock(gl)) {
 		if (list_empty(&gl->gl_holders) &&
 		    gl->gl_state != LM_ST_UNLOCKED && demote_ok(gl))
-			handle_callback(gl, LM_ST_UNLOCKED);
+			handle_callback(gl, LM_ST_UNLOCKED, 0);
 		gfs2_glmutex_unlock(gl);
 	}
 
@@ -1708,7 +1738,7 @@
 	if (gfs2_glmutex_trylock(gl)) {
 		if (list_empty(&gl->gl_holders) &&
 		    gl->gl_state != LM_ST_UNLOCKED)
-			handle_callback(gl, LM_ST_UNLOCKED);
+			handle_callback(gl, LM_ST_UNLOCKED, 0);
 		gfs2_glmutex_unlock(gl);
 	}
 }
diff -Nru linux-rhel-base/fs/gfs2/glock.h linux-2.6.18.noarch/fs/gfs2/glock.h
--- linux-rhel-base/fs/gfs2/glock.h	2007-06-11 09:00:56.000000000 +0100
+++ linux-2.6.18.noarch/fs/gfs2/glock.h	2007-06-11 09:23:32.000000000 +0100
@@ -86,6 +86,7 @@
 int gfs2_glock_poll(struct gfs2_holder *gh);
 int gfs2_glock_wait(struct gfs2_holder *gh);
 void gfs2_glock_dq(struct gfs2_holder *gh);
+void gfs2_glock_dq_wait(struct gfs2_holder *gh);
 
 void gfs2_glock_dq_uninit(struct gfs2_holder *gh);
 int gfs2_glock_nq_num(struct gfs2_sbd *sdp,
diff -Nru linux-rhel-base/fs/gfs2/incore.h linux-2.6.18.noarch/fs/gfs2/incore.h
--- linux-rhel-base/fs/gfs2/incore.h	2007-06-11 09:00:57.000000000 +0100
+++ linux-2.6.18.noarch/fs/gfs2/incore.h	2007-06-11 09:22:33.000000000 +0100
@@ -95,6 +95,8 @@
 	u32 rd_last_alloc_data;
 	u32 rd_last_alloc_meta;
 	struct gfs2_sbd *rd_sbd;
+	unsigned long rd_flags;
+#define GFS2_RDF_CHECK        0x0001          /* Need to check for unlinked inodes */
 };
 
 enum gfs2_state_bits {
diff -Nru linux-rhel-base/fs/gfs2/inode.c linux-2.6.18.noarch/fs/gfs2/inode.c
--- linux-rhel-base/fs/gfs2/inode.c	2007-06-11 09:00:57.000000000 +0100
+++ linux-2.6.18.noarch/fs/gfs2/inode.c	2007-06-11 09:23:32.000000000 +0100
@@ -98,22 +98,8 @@
 
 	if (inode->i_state & I_NEW) {
 		struct gfs2_sbd *sdp = GFS2_SB(inode);
-		umode_t mode = DT2IF(type);
+		umode_t mode;
 		inode->i_private = ip;
-		inode->i_mode = mode;
-
-		if (S_ISREG(mode)) {
-			inode->i_op = &gfs2_file_iops;
-			inode->i_fop = &gfs2_file_fops;
-			inode->i_mapping->a_ops = &gfs2_file_aops;
-		} else if (S_ISDIR(mode)) {
-			inode->i_op = &gfs2_dir_iops;
-			inode->i_fop = &gfs2_dir_fops;
-		} else if (S_ISLNK(mode)) {
-			inode->i_op = &gfs2_symlink_iops;
-		} else {
-			inode->i_op = &gfs2_dev_iops;
-		}
 
 		error = gfs2_glock_get(sdp, no_addr, &gfs2_inode_glops, CREATE, &ip->i_gl);
 		if (unlikely(error))
@@ -128,12 +114,47 @@
 		error = gfs2_glock_nq_init(io_gl, LM_ST_SHARED, GL_EXACT, &ip->i_iopen_gh);
 		if (unlikely(error))
 			goto fail_iopen;
+		ip->i_iopen_gh.gh_gl->gl_object = ip;
 
 		gfs2_glock_put(io_gl);
+
+		/*
+		 * We must read the inode in order to work out its type in
+		 * this case. Note that this doesn't happen often as we normally
+		 * know the type beforehand. This code path only occurs during
+		 * unlinked inode recovery (where it is safe to do this glock,
+		 * which is not true in the general case).
+		 */
+		inode->i_mode = mode = DT2IF(type);
+		if (type == DT_UNKNOWN) {
+			struct gfs2_holder gh;
+			error = gfs2_glock_nq_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, &gh);
+			if (unlikely(error))
+				goto fail_glock;
+			/* Inode is now uptodate */
+			mode = inode->i_mode;
+			gfs2_glock_dq_uninit(&gh);
+		}
+
+		if (S_ISREG(mode)) {
+			inode->i_op = &gfs2_file_iops;
+			inode->i_fop = &gfs2_file_fops;
+			inode->i_mapping->a_ops = &gfs2_file_aops;
+		} else if (S_ISDIR(mode)) {
+			inode->i_op = &gfs2_dir_iops;
+			inode->i_fop = &gfs2_dir_fops;
+		} else if (S_ISLNK(mode)) {
+			inode->i_op = &gfs2_symlink_iops;
+		} else {
+			inode->i_op = &gfs2_dev_iops;
+		}
+
 		unlock_new_inode(inode);
 	}
 
 	return inode;
+fail_glock:
+	gfs2_glock_dq(&ip->i_iopen_gh);
 fail_iopen:
 	gfs2_glock_put(io_gl);
 fail_put:
@@ -369,7 +390,7 @@
 	struct super_block *sb = dir->i_sb;
 	struct gfs2_inode *dip = GFS2_I(dir);
 	struct gfs2_holder d_gh;
-	int error;
+	int error = 0;
 	struct inode *inode = NULL;
 	int unlock = 0;
 
@@ -857,7 +878,7 @@
 struct inode *gfs2_createi(struct gfs2_holder *ghs, const struct qstr *name,
 			   unsigned int mode, dev_t dev)
 {
-	struct inode *inode;
+	struct inode *inode = NULL;
 	struct gfs2_inode *dip = ghs->gh_gl->gl_object;
 	struct inode *dir = &dip->i_inode;
 	struct gfs2_sbd *sdp = GFS2_SB(&dip->i_inode);
@@ -900,28 +921,28 @@
 
 	error = gfs2_inode_refresh(GFS2_I(inode));
 	if (error)
-		goto fail_iput;
+		goto fail_gunlock2;
 
 	error = gfs2_acl_create(dip, GFS2_I(inode));
 	if (error)
-		goto fail_iput;
+		goto fail_gunlock2;
 
 	error = gfs2_security_init(dip, GFS2_I(inode));
 	if (error)
-		goto fail_iput;
+		goto fail_gunlock2;
 
 	error = link_dinode(dip, name, GFS2_I(inode));
 	if (error)
-		goto fail_iput;
+		goto fail_gunlock2;
 
 	if (!inode)
 		return ERR_PTR(-ENOMEM);
 	return inode;
 
-fail_iput:
-	iput(inode);
 fail_gunlock2:
 	gfs2_glock_dq_uninit(ghs + 1);
+	if (inode)
+		iput(inode);
 fail_gunlock:
 	gfs2_glock_dq(ghs);
 fail:
diff -Nru linux-rhel-base/fs/gfs2/ops_inode.c linux-2.6.18.noarch/fs/gfs2/ops_inode.c
--- linux-rhel-base/fs/gfs2/ops_inode.c	2007-06-11 09:00:57.000000000 +0100
+++ linux-2.6.18.noarch/fs/gfs2/ops_inode.c	2007-06-11 09:22:25.000000000 +0100
@@ -749,7 +749,7 @@
 		if (error)
 			goto out_end_trans;
 
-		error = gfs2_dir_mvino(ip, &name, nip, DT_DIR);
+		error = gfs2_dir_mvino(ip, &name, ndip, DT_DIR);
 		if (error)
 			goto out_end_trans;
 	} else {
diff -Nru linux-rhel-base/fs/gfs2/ops_super.c linux-2.6.18.noarch/fs/gfs2/ops_super.c
--- linux-rhel-base/fs/gfs2/ops_super.c	2007-06-11 09:00:56.000000000 +0100
+++ linux-2.6.18.noarch/fs/gfs2/ops_super.c	2007-06-11 09:23:32.000000000 +0100
@@ -326,8 +326,10 @@
 		gfs2_glock_schedule_for_reclaim(ip->i_gl);
 		gfs2_glock_put(ip->i_gl);
 		ip->i_gl = NULL;
-		if (ip->i_iopen_gh.gh_gl)
+		if (ip->i_iopen_gh.gh_gl) {
+			ip->i_iopen_gh.gh_gl->gl_object = NULL;
 			gfs2_glock_dq_uninit(&ip->i_iopen_gh);
+		}
 	}
 }
 
@@ -422,13 +424,13 @@
 	if (!inode->i_private)
 		goto out;
 
-	error = gfs2_glock_nq_init(ip->i_gl, LM_ST_EXCLUSIVE, LM_FLAG_TRY_1CB, &gh);
+	error = gfs2_glock_nq_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, &gh);
 	if (unlikely(error)) {
 		gfs2_glock_dq_uninit(&ip->i_iopen_gh);
 		goto out;
 	}
 
-	gfs2_glock_dq(&ip->i_iopen_gh);
+	gfs2_glock_dq_wait(&ip->i_iopen_gh);
 	gfs2_holder_reinit(LM_ST_EXCLUSIVE, LM_FLAG_TRY_1CB | GL_NOCACHE, &ip->i_iopen_gh);
 	error = gfs2_glock_nq(&ip->i_iopen_gh);
 	if (error)
diff -Nru linux-rhel-base/fs/gfs2/rgrp.c linux-2.6.18.noarch/fs/gfs2/rgrp.c
--- linux-rhel-base/fs/gfs2/rgrp.c	2007-06-11 09:00:57.000000000 +0100
+++ linux-2.6.18.noarch/fs/gfs2/rgrp.c	2007-06-11 09:22:33.000000000 +0100
@@ -28,6 +28,7 @@
 #include "ops_file.h"
 #include "util.h"
 #include "log.h"
+#include "inode.h"
 
 #define BFITNOENT ((u32)~0)
 
@@ -50,6 +51,9 @@
 	        1, 0, 0, 0
 };
 
+static u32 rgblk_search(struct gfs2_rgrpd *rgd, u32 goal,
+                        unsigned char old_state, unsigned char new_state);
+
 /**
  * gfs2_setbit - Set a bit in the bitmaps
  * @buffer: the buffer that holds the bitmaps
@@ -531,6 +535,7 @@
 
 	rgd->rd_gl->gl_object = rgd;
 	rgd->rd_rg_vn = rgd->rd_gl->gl_vn - 1;
+	rgd->rd_flags |= GFS2_RDF_CHECK;
 	return error;
 }
 
@@ -846,6 +851,37 @@
 }
 
 /**
+ * try_rgrp_unlink - Look for any unlinked, allocated, but unused inodes
+ * @rgd: The rgrp
+ *
+ * Returns: The inode, if one has been found
+ */
+
+static struct inode *try_rgrp_unlink(struct gfs2_rgrpd *rgd, u64 *last_unlinked)
+{
+	struct inode *inode;
+	u32 goal = 0;
+	u64 ino;
+
+	for(;;) {
+		goal = rgblk_search(rgd, goal, GFS2_BLKST_UNLINKED,
+				    GFS2_BLKST_UNLINKED);
+		if (goal == 0)
+			return 0;
+		ino = goal + rgd->rd_data0;
+		if (ino <= *last_unlinked)
+			continue;
+		*last_unlinked = ino;
+		inode = gfs2_inode_lookup(rgd->rd_sbd->sd_vfs, ino, DT_UNKNOWN);
+		if (!IS_ERR(inode))
+			return inode;
+	}
+
+	rgd->rd_flags &= ~GFS2_RDF_CHECK;
+	return NULL;
+}
+
+/**
  * recent_rgrp_first - get first RG from "recent" list
  * @sdp: The GFS2 superblock
  * @rglast: address of the rgrp used last
@@ -1006,8 +1042,9 @@
  * Returns: errno
  */
 
-static int get_local_rgrp(struct gfs2_inode *ip)
+static struct inode *get_local_rgrp(struct gfs2_inode *ip, u64 *last_unlinked)
 {
+	struct inode *inode = NULL;
 	struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
 	struct gfs2_rgrpd *rgd, *begin = NULL;
 	struct gfs2_alloc *al = &ip->i_alloc;
@@ -1027,7 +1064,11 @@
 		case 0:
 			if (try_rgrp_fit(rgd, al))
 				goto out;
+			if (rgd->rd_flags & GFS2_RDF_CHECK)
+				inode = try_rgrp_unlink(rgd, last_unlinked);
 			gfs2_glock_dq_uninit(&al->al_rgd_gh);
+			if (inode)
+				return inode;
 			rgd = recent_rgrp_next(rgd, 1);
 			break;
 
@@ -1036,7 +1077,7 @@
 			break;
 
 		default:
-			return error;
+			return ERR_PTR(error);
 		}
 	}
 
@@ -1051,7 +1092,11 @@
 		case 0:
 			if (try_rgrp_fit(rgd, al))
 				goto out;
+			if (rgd->rd_flags & GFS2_RDF_CHECK)
+				inode = try_rgrp_unlink(rgd, last_unlinked);
 			gfs2_glock_dq_uninit(&al->al_rgd_gh);
+			if (inode)
+				return inode;
 			break;
 
 		case GLR_TRYFAILED:
@@ -1059,7 +1104,7 @@
 			break;
 
 		default:
-			return error;
+			return ERR_PTR(error);
 		}
 
 		rgd = gfs2_rgrpd_get_next(rgd);
@@ -1068,7 +1113,7 @@
 
 		if (rgd == begin) {
 			if (++loops >= 3)
-				return -ENOSPC;
+				return ERR_PTR(-ENOSPC);
 			if (!skipped)
 				loops++;
 			flags = 0;
@@ -1088,7 +1133,7 @@
 		forward_rgrp_set(sdp, rgd);
 	}
 
-	return 0;
+	return NULL;
 }
 
 /**
@@ -1102,11 +1147,14 @@
 {
 	struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
 	struct gfs2_alloc *al = &ip->i_alloc;
+	struct inode *inode;
 	int error = 0;
+	u64 last_unlinked = 0;
 
 	if (gfs2_assert_warn(sdp, al->al_requested))
 		return -EINVAL;
 
+try_again:
 	/* We need to hold the rindex unless the inode we're using is
 	   the rindex itself, in which case it's already held. */
 	if (ip != GFS2_I(sdp->sd_rindex))
@@ -1117,11 +1165,15 @@
 	if (error)
 		return error;
 
-	error = get_local_rgrp(ip);
-	if (error) {
+	inode = get_local_rgrp(ip, &last_unlinked);
+	if (inode) {
 		if (ip != GFS2_I(sdp->sd_rindex))
 			gfs2_glock_dq_uninit(&al->al_ri_gh);
-		return error;
+		if (IS_ERR(inode))
+			return PTR_ERR(inode);
+		iput(inode);
+		gfs2_log_flush(sdp, NULL);
+		goto try_again;
 	}
 
 	al->al_file = file;
@@ -1209,7 +1261,7 @@
  */
 
 static u32 rgblk_search(struct gfs2_rgrpd *rgd, u32 goal,
-			     unsigned char old_state, unsigned char new_state)
+			unsigned char old_state, unsigned char new_state)
 {
 	struct gfs2_bitmap *bi = NULL;
 	u32 length = rgd->rd_length;
@@ -1250,17 +1302,18 @@
 		goal = 0;
 	}
 
-	if (gfs2_assert_withdraw(rgd->rd_sbd, x <= length))
-		blk = 0;
+	if (old_state != new_state) {
+		gfs2_assert_withdraw(rgd->rd_sbd, blk != BFITNOENT);
 
-	gfs2_trans_add_bh(rgd->rd_gl, bi->bi_bh, 1);
-	gfs2_setbit(rgd, bi->bi_bh->b_data + bi->bi_offset,
-		    bi->bi_len, blk, new_state);
-	if (bi->bi_clone)
-		gfs2_setbit(rgd, bi->bi_clone + bi->bi_offset,
+		gfs2_trans_add_bh(rgd->rd_gl, bi->bi_bh, 1);
+		gfs2_setbit(rgd, bi->bi_bh->b_data + bi->bi_offset,
 			    bi->bi_len, blk, new_state);
+		if (bi->bi_clone)
+			gfs2_setbit(rgd, bi->bi_clone + bi->bi_offset,
+				    bi->bi_len, blk, new_state);
+	}
 
-	return bi->bi_start * GFS2_NBBY + blk;
+	return (blk == BFITNOENT) ? 0 : (bi->bi_start * GFS2_NBBY) + blk;
 }
 
 /**