Sophie

Sophie

distrib > Scientific%20Linux > 5x > x86_64 > by-pkgid > 9383e745e23602bc45f9c92184feea59 > files > 108

gfs2-utils-0.1.62-28.el5.src.rpm

commit c5311da33ef0b2558f3a75d1a9ab763e8ac8d365
Author: Bob Peterson <rpeterso@redhat.com>
Date:   Wed Aug 11 11:50:45 2010 -0500

    gfs2_fsck segfault when statfs system file is missing
    
    This patch repairs badly damaged gfs2 file systems.  It can
    rebuild destroyed superblock, master directory, root dinode and
    all of the system dinodes.
    
    rhbz#575968

diff --git a/gfs2/edit/hexedit.c b/gfs2/edit/hexedit.c
index 2394eac..8919cef 100644
--- a/gfs2/edit/hexedit.c
+++ b/gfs2/edit/hexedit.c
@@ -1756,6 +1756,8 @@ static void read_superblock(int fd)
 		sbd.fssize = sbd.device.length;
 		gfs1_rindex_read(&sbd, 0, &count);
 	} else {
+		int sane;
+
 		sbd.sd_inptrs = (sbd.bsize - sizeof(struct gfs2_meta_header)) /
 			sizeof(uint64_t);
 		sbd.sd_diptrs = (sbd.bsize - sizeof(struct gfs2_dinode)) /
@@ -1764,7 +1766,7 @@ static void read_superblock(int fd)
 					    sbd.sd_sb.sb_master_dir.no_addr);
 		gfs2_lookupi(sbd.master_dir, "rindex", 6, &sbd.md.riinode);
 		sbd.fssize = sbd.device.length;
-		rindex_read(&sbd, 0, &count);
+		rindex_read(&sbd, 0, &count, &sane);
 	}
 
 }
diff --git a/gfs2/edit/savemeta.c b/gfs2/edit/savemeta.c
index 5cec3a0..273178f 100644
--- a/gfs2/edit/savemeta.c
+++ b/gfs2/edit/savemeta.c
@@ -568,12 +568,14 @@ void savemeta(char *out_fn, int saveoption)
 		brelse(lbh);
 	}
 	if (!slow) {
+		int sane;
+
 		printf("Reading resource groups...");
 		fflush(stdout);
 		if (gfs1)
 			slow = gfs1_ri_update(&sbd, 0, &rgcount, 0);
 		else
-			slow = ri_update(&sbd, 0, &rgcount);
+			slow = ri_update(&sbd, 0, &rgcount, &sane);
 		printf("Done.\n\n");
 		fflush(stdout);
 	}
diff --git a/gfs2/fsck/fs_recovery.c b/gfs2/fsck/fs_recovery.c
index d5ccb75..61262a2 100644
--- a/gfs2/fsck/fs_recovery.c
+++ b/gfs2/fsck/fs_recovery.c
@@ -553,9 +553,9 @@ out:
 	log_info( _("jid=%u: Failed\n"), j);
 reinit:
 	if (query( _("Do you want to clear the journal instead? (y/n)")))
-		error = write_journal(sdp, sdp->md.journal[j], j,
-				      sdp->md.journal[j]->i_di.di_size /
-				      sdp->sd_sb.sb_bsize);
+		write_journal(sdp, sdp->md.journal[j], j,
+			      sdp->md.journal[j]->i_di.di_size /
+			      sdp->sd_sb.sb_bsize);
 	else
 		log_err( _("jid=%u: journal not cleared.\n"), j);
 	return error;
@@ -583,7 +583,6 @@ int replay_journals(struct gfs2_sbd *sdp, int preen, int force_check,
 	*clean_journals = 0;
 
 	/* Get master dinode */
-	sdp->master_dir = inode_read(sdp, sdp->sd_sb.sb_master_dir.no_addr);
 	gfs2_lookupi(sdp->master_dir, "jindex", 6, &sdp->md.jiinode);
 
 	/* read in the journal index data */
@@ -609,7 +608,6 @@ int replay_journals(struct gfs2_sbd *sdp, int preen, int force_check,
 		}
 		inode_put(&sdp->md.journal[i]);
 	}
-	inode_put(&sdp->master_dir);
 	inode_put(&sdp->md.jiinode);
 	/* Sync the buffers to disk so we get a fresh start. */
 	fsync(sdp->device_fd);
diff --git a/gfs2/fsck/fsck.h b/gfs2/fsck/fsck.h
index ddfee17..d92d0db 100644
--- a/gfs2/fsck/fsck.h
+++ b/gfs2/fsck/fsck.h
@@ -91,10 +91,12 @@ struct inode_with_dups {
 enum rgindex_trust_level { /* how far can we trust our RG index? */
 	blind_faith = 0, /* We'd like to trust the rgindex. We always used to
 			    before bz 179069. This should cover most cases. */
-	open_minded = 1, /* At least 1 RG is corrupt. Try to calculate what it
+	ye_of_little_faith = 1, /* The rindex seems trustworthy but there's
+				   rg damage that need to be fixed. */
+	open_minded = 2, /* At least 1 RG is corrupt. Try to calculate what it
 			    should be, in a perfect world where our RGs are all
 			    on even boundaries. Blue sky. Chirping birds. */
-	distrust = 2   /* The world isn't perfect, our RGs are not on nice neat
+	distrust = 3   /* The world isn't perfect, our RGs are not on nice neat
 			  boundaries.  The fs must have been messed with by
 			  gfs2_grow or something.  Count the RGs by hand. */
 };
@@ -114,7 +116,8 @@ extern int pass2(struct gfs2_sbd *sbp);
 extern int pass3(struct gfs2_sbd *sbp);
 extern int pass4(struct gfs2_sbd *sbp);
 extern int pass5(struct gfs2_sbd *sbp);
-extern int rg_repair(struct gfs2_sbd *sdp, int trust_lvl, int *rg_count);
+extern int rg_repair(struct gfs2_sbd *sdp, int trust_lvl, int *rg_count,
+		     int *sane);
 extern void gfs2_dup_free(void);
 extern int fsck_query(const char *format, ...)
 	__attribute__((format(printf,1,2)));
diff --git a/gfs2/fsck/initialize.c b/gfs2/fsck/initialize.c
index 64685ed..bd364e4 100644
--- a/gfs2/fsck/initialize.c
+++ b/gfs2/fsck/initialize.c
@@ -38,8 +38,11 @@
 		free(x); \
 		x = NULL; \
 	}
+#define HIGHEST_BLOCK 0xffffffffffffffff
 
 static int was_mounted_ro = 0;
+static uint64_t possible_root = HIGHEST_BLOCK;
+static struct master_dir fix_md;
 
 /**
  * block_mounters
@@ -322,6 +325,87 @@ static int check_rgrps_integrity(struct gfs2_sbd *sdp)
 }
 
 /**
+ * rebuild_master - rebuild a destroyed master directory
+ */
+static int rebuild_master(struct gfs2_sbd *sdp)
+{
+	struct gfs2_inum inum;
+	struct gfs2_buffer_head *bh;
+
+	log_err(_("The system master directory seems to be destroyed.\n"));
+	if (!query(_("Okay to rebuild it? (y/n)"))) {
+		log_err(_("System master not rebuilt; aborting.\n"));
+		return -1;
+	}
+	log_err(_("Trying to rebuild the master directory.\n"));
+	inum.no_formal_ino = sdp->md.next_inum++;
+	inum.no_addr = sdp->sd_sb.sb_master_dir.no_addr;
+	bh = init_dinode(sdp, &inum, S_IFDIR | 0755, GFS2_DIF_SYSTEM, &inum);
+	sdp->master_dir = inode_get(sdp, bh);
+	sdp->master_dir->bh_owned = 1;
+
+	if (fix_md.jiinode) {
+		inum.no_formal_ino = sdp->md.next_inum++;
+		inum.no_addr = fix_md.jiinode->i_di.di_num.no_addr;
+		dir_add(sdp->master_dir, "jindex", 6, &inum,
+			IF2DT(S_IFDIR | 0700));
+		sdp->master_dir->i_di.di_nlink++;
+	} else {
+		build_jindex(sdp);
+	}
+
+	if (fix_md.pinode) {
+		inum.no_formal_ino = sdp->md.next_inum++;
+		inum.no_addr = fix_md.pinode->i_di.di_num.no_addr;
+		dir_add(sdp->master_dir, "per_node", 8, &inum,
+			IF2DT(S_IFDIR | 0700));
+		sdp->master_dir->i_di.di_nlink++;
+	} else {
+		build_per_node(sdp);
+	}
+
+	if (fix_md.inum) {
+		inum.no_formal_ino = sdp->md.next_inum++;
+		inum.no_addr = fix_md.inum->i_di.di_num.no_addr;
+		dir_add(sdp->master_dir, "inum", 4, &inum,
+			IF2DT(S_IFREG | 0600));
+	} else {
+		build_inum(sdp);
+	}
+
+	if (fix_md.statfs) {
+		inum.no_formal_ino = sdp->md.next_inum++;
+		inum.no_addr = fix_md.statfs->i_di.di_num.no_addr;
+		dir_add(sdp->master_dir, "statfs", 6, &inum,
+			IF2DT(S_IFREG | 0600));
+	} else {
+		build_statfs(sdp);
+	}
+
+	if (fix_md.riinode) {
+		inum.no_formal_ino = sdp->md.next_inum++;
+		inum.no_addr = fix_md.riinode->i_di.di_num.no_addr;
+		dir_add(sdp->master_dir, "rindex", 6, &inum,
+			IF2DT(S_IFREG | 0600));
+	} else {
+		build_rindex(sdp);
+	}
+
+	if (fix_md.qinode) {
+		inum.no_formal_ino = sdp->md.next_inum++;
+		inum.no_addr = fix_md.qinode->i_di.di_num.no_addr;
+		dir_add(sdp->master_dir, "quota", 5, &inum,
+			IF2DT(S_IFREG | 0600));
+	} else {
+		build_quota(sdp);
+	}
+
+	log_err(_("Master directory rebuilt.\n"));
+	inode_put(&sdp->master_dir);
+	return 0;
+}
+
+/**
  * init_system_inodes
  *
  * Returns: 0 on success, -1 on failure
@@ -331,7 +415,7 @@ static int init_system_inodes(struct gfs2_sbd *sdp)
 	uint64_t inumbuf;
 	char *buf;
 	struct gfs2_statfs_change sc;
-	int rgcount;
+	int rgcount, sane = 1;
 	enum rgindex_trust_level trust_lvl;
 	uint64_t addl_mem_needed;
 
@@ -341,51 +425,43 @@ static int init_system_inodes(struct gfs2_sbd *sdp)
 
 	log_info( _("Initializing special inodes...\n"));
 
-	/* Get master dinode */
-	sdp->master_dir = inode_read(sdp, sdp->sd_sb.sb_master_dir.no_addr);
 	/* Get root dinode */
 	sdp->md.rooti = inode_read(sdp, sdp->sd_sb.sb_root_dir.no_addr);
 
-	/* Look for "inum" entry in master dinode */
-	gfs2_lookupi(sdp->master_dir, "inum", 4, &sdp->md.inum);
-	/* Read inum entry into buffer */
-	gfs2_readi(sdp->md.inum, &inumbuf, 0, sdp->md.inum->i_di.di_size);
-	/* call gfs2_inum_range_in() to retrieve range */
-	sdp->md.next_inum = be64_to_cpu(inumbuf);
-
-	gfs2_lookupi(sdp->master_dir, "statfs", 6, &sdp->md.statfs);
-	buf = malloc(sdp->md.statfs->i_di.di_size);
-	gfs2_readi(sdp->md.statfs, buf, 0, sdp->md.statfs->i_di.di_size);
-	/* call gfs2_inum_range_in() to retrieve range */
-	gfs2_statfs_change_in(&sc, buf);
-	free(buf);
-
-
-	gfs2_lookupi(sdp->master_dir, "jindex", 6, &sdp->md.jiinode);
-
 	gfs2_lookupi(sdp->master_dir, "rindex", 6, &sdp->md.riinode);
-
-	gfs2_lookupi(sdp->master_dir, "quota", 5, &sdp->md.qinode);
-
-	gfs2_lookupi(sdp->master_dir, "per_node", 8, &sdp->md.pinode);
-
-	/* FIXME fill in per_node structure */
+	if (!sdp->md.riinode) {
+		if (query( _("The gfs2 system rindex inode is missing. "
+			     "Okay to rebuild it? (y/n) ")))
+			build_rindex(sdp);
+	}
 
 	/*******************************************************************
-	 *******  Fill in rgrp and journal indexes and related fields  *****
+	 ******************  Fill in journal information  ******************
 	 *******************************************************************/
 
+	/* rgrepair requires the journals be read in in order to distinguish
+	   "real" rgrps from rgrps that are just copies left in journals. */
+	gfs2_lookupi(sdp->master_dir, "jindex", 6, &sdp->md.jiinode);
+	if (!sdp->md.jiinode) {
+		if (query( _("The gfs2 system jindex inode is missing. "
+			     "Okay to rebuild it? (y/n) ")))
+			build_jindex(sdp);
+	}
+
 	/* read in the ji data */
 	if (ji_update(sdp)){
-		log_err( _("Unable to read in ji inode.\n"));
+		log_err( _("Unable to read in jindex inode.\n"));
 		return -1;
 	}
 
+	/*******************************************************************
+	 ********  Validate and read in resource group information  ********
+	 *******************************************************************/
 	log_warn( _("Validating Resource Group index.\n"));
 	for (trust_lvl = blind_faith; trust_lvl <= distrust; trust_lvl++) {
 		log_warn( _("Level %d RG check.\n"), trust_lvl + 1);
-		if ((rg_repair(sdp, trust_lvl, &rgcount) == 0) &&
-		    (ri_update(sdp, 0, &rgcount) == 0)) {
+		if ((rg_repair(sdp, trust_lvl, &rgcount, &sane) == 0) &&
+		    (ri_update(sdp, 0, &rgcount, &sane) == 0)) {
 			log_warn( _("(level %d passed)\n"), trust_lvl + 1);
 			break;
 		}
@@ -401,6 +477,54 @@ static int init_system_inodes(struct gfs2_sbd *sdp)
 	check_rgrps_integrity(sdp);
 
 	/*******************************************************************
+	 *****************  Initialize more system inodes  *****************
+	 *******************************************************************/
+	/* Look for "inum" entry in master dinode */
+	gfs2_lookupi(sdp->master_dir, "inum", 4, &sdp->md.inum);
+	if (!sdp->md.inum) {
+		if (query( _("The gfs2 system inum inode is missing. "
+			     "Okay to rebuild it? (y/n) ")))
+			build_inum(sdp);
+	}
+	/* Read inum entry into buffer */
+	gfs2_readi(sdp->md.inum, &inumbuf, 0, sdp->md.inum->i_di.di_size);
+	/* call gfs2_inum_range_in() to retrieve range */
+	sdp->md.next_inum = be64_to_cpu(inumbuf);
+
+	gfs2_lookupi(sdp->master_dir, "statfs", 6, &sdp->md.statfs);
+	if (!sdp->md.statfs) {
+		if (query( _("The gfs2 system statfs inode is missing. "
+			     "Okay to rebuild it? (y/n) ")))
+			build_statfs(sdp);
+		else {
+			log_err( _("fsck.gfs2 cannot continue without a "
+				   "valid statfs file; aborting.\n"));
+			return FSCK_ERROR;
+		}
+	}
+	buf = malloc(sdp->md.statfs->i_di.di_size);
+	// FIXME: handle failed malloc
+	gfs2_readi(sdp->md.statfs, buf, 0, sdp->md.statfs->i_di.di_size);
+	/* call gfs2_inum_range_in() to retrieve range */
+	gfs2_statfs_change_in(&sc, buf);
+	free(buf);
+
+	gfs2_lookupi(sdp->master_dir, "quota", 5, &sdp->md.qinode);
+	if (!sdp->md.qinode) {
+		if (query( _("The gfs2 system quota inode is missing. "
+			     "Okay to rebuild it? (y/n) ")))
+			build_quota(sdp);
+	}
+
+	gfs2_lookupi(sdp->master_dir, "per_node", 8, &sdp->md.pinode);
+	if (!sdp->md.pinode) {
+		if (query( _("The gfs2 system per_node directory inode is "
+			     "missing. Okay to rebuild it? (y/n) ")))
+			build_per_node(sdp);
+	}
+
+	/* FIXME fill in per_node structure */
+	/*******************************************************************
 	 *******  Now, set boundary fields in the super block  *************
 	 *******************************************************************/
 	if(set_block_ranges(sdp)){
@@ -424,6 +548,454 @@ static int init_system_inodes(struct gfs2_sbd *sdp)
 	return -1;
 }
 
+static int get_lockproto_table(struct gfs2_sbd *sdp)
+{
+	FILE *fp;
+	char line[PATH_MAX], *p, *p2;
+	char fsname[PATH_MAX];
+
+	memset(sdp->lockproto, 0, sizeof(sdp->lockproto));
+	memset(sdp->locktable, 0, sizeof(sdp->locktable));
+	fp = fopen("/etc/cluster/cluster.conf", "rt");
+	if (!fp) {
+		/* no cluster.conf; must be a stand-alone file system */
+		strcpy(sdp->lockproto, "lock_nolock");
+		log_warn(_("Lock protocol determined to be: lock_nolock\n"));
+		log_warn(_("Stand-alone file system: No need for a lock "
+			   "table.\n"));
+		return 0;
+	}
+	/* We found a cluster.conf so assume it's a clustered file system */
+	log_warn(_("Lock protocol assumed to be: " GFS2_DEFAULT_LOCKPROTO
+		   "\n"));
+	strcpy(sdp->lockproto, GFS2_DEFAULT_LOCKPROTO);
+	while (fgets(line, sizeof(line) - 1, fp)) {
+		p = strstr(line,"<cluster name=");
+		if (p) {
+			p += 15;
+			p2 = strchr(p,'"');
+			strncpy(sdp->locktable, p, p2 - p);
+			break;
+		}
+	}
+	if (sdp->locktable[0] == '\0') {
+		log_err(_("Error: Unable to determine cluster name from "
+			  "/etc/cluster.conf\n"));
+	} else {
+		memset(fsname, 0, sizeof(fsname));
+		p = strrchr(opts.device, '/');
+		if (p) {
+			p++;
+			strncpy(fsname, p, sizeof(fsname));
+		} else
+			strcpy(fsname, "repaired");
+		strcat(sdp->locktable, ":");
+		strcat(sdp->locktable, fsname);
+		log_warn(_("Lock table determined to be: %s\n"),
+			 sdp->locktable);
+	}
+	fclose(fp);
+	return 0;
+}
+
+/**
+ * is_journal_copy - Is this a "real" dinode or a copy inside a journal?
+ * A real dinode will be located at the block number in its no_addr.
+ * A journal-copy will be at a different block (inside the journal).
+ */
+static int is_journal_copy(struct gfs2_inode *ip, struct gfs2_buffer_head *bh)
+{
+	if (ip->i_di.di_num.no_addr == bh->b_blocknr)
+		return 0;
+	return 1; /* journal copy */
+}
+
+/**
+ * peruse_system_dinode - process a system dinode
+ *
+ * This function looks at a system dinode and tries to figure out which
+ * dinode it is: statfs, inum, per_node, master, etc.  Some of them we
+ * can deduce from the contents.  For example, di_size will be a multiple
+ * of 96 for the rindex.  di_size will be 8 for inum, 24 for statfs, etc.
+ * the per_node directory will have a ".." entry that will lead us to
+ * the master dinode if it's been destroyed.
+ */
+static void peruse_system_dinode(struct gfs2_sbd *sdp, struct gfs2_dinode *di,
+				 struct gfs2_buffer_head *bh)
+{
+	struct gfs2_inode *ip, *child_ip;
+	struct gfs2_inum inum;
+	int error;
+
+	if (di->di_num.no_formal_ino == 2) {
+		if (sdp->sd_sb.sb_master_dir.no_addr)
+			return;
+		log_warn(_("Found system master directory at: 0x%llx.\n"),
+			 di->di_num.no_addr);
+		sdp->sd_sb.sb_master_dir.no_addr = di->di_num.no_addr;
+		return;
+	}
+	ip = inode_read(sdp, di->di_num.no_addr);
+	if (di->di_num.no_formal_ino == 3) {
+		if (fix_md.jiinode || is_journal_copy(ip, bh))
+			return;
+		log_warn(_("Found system jindex file at: 0x%llx\n"),
+			 di->di_num.no_addr);
+		fix_md.jiinode = ip;
+	} else if (S_ISDIR(di->di_mode)) {
+		/* Check for a jindex dir entry. Only one system dir has a
+		   jindex: master */
+		gfs2_lookupi(ip, "jindex", 6, &child_ip);
+		if (child_ip) {
+			if (fix_md.jiinode || is_journal_copy(ip, bh))
+				return;
+			fix_md.jiinode = child_ip;
+			sdp->sd_sb.sb_master_dir.no_addr = di->di_num.no_addr;
+			log_warn(_("Found system master directory at: "
+				   "0x%llx\n"), di->di_num.no_addr);
+			return;
+		}
+
+		/* Check for a statfs_change0 dir entry. Only one system dir
+		   has a statfs_change: per_node, and its .. will be master. */
+		gfs2_lookupi(ip, "statfs_change0", 14, &child_ip);
+		if (child_ip) {
+			if (fix_md.pinode || is_journal_copy(ip, bh))
+				return;
+			log_warn(_("Found system per_node directory at: "
+				   "0x%llx\n"), ip->i_di.di_num.no_addr);
+			fix_md.pinode = ip;
+			error = dir_search(ip, "..", 2, NULL, &inum);
+			if (!error && inum.no_addr) {
+				sdp->sd_sb.sb_master_dir.no_addr =
+					inum.no_addr;
+				log_warn(_("From per_node\'s \'..\' I "
+					   "backtracked the master directory "
+					   "to: 0x%llx\n"), inum.no_addr);
+			}
+			return;
+		}
+		log_debug(_("Unknown system directory at block 0x%llx\n"),
+			  di->di_num.no_addr);
+		inode_put(&ip);
+	} else if (di->di_size == 8) {
+		if (fix_md.inum || is_journal_copy(ip, bh))
+			return;
+		fix_md.inum = ip;
+		log_warn(_("Found system inum file at: 0x%llx\n"),
+			 di->di_num.no_addr);
+	} else if (di->di_size == 24) {
+		if (fix_md.statfs || is_journal_copy(ip, bh))
+			return;
+		fix_md.statfs = ip;
+		log_warn(_("Found system statfs file at: 0x%llx\n"),
+			 di->di_num.no_addr);
+	} else if ((di->di_size % 96) == 0) {
+		if (fix_md.riinode || is_journal_copy(ip, bh))
+			return;
+		fix_md.riinode = ip;
+		log_warn(_("Found system rindex file at: 0x%llx\n"),
+			 di->di_num.no_addr);
+	} else if (!fix_md.qinode && di->di_size >= 176 &&
+		   di->di_num.no_formal_ino >= 12 &&
+		   di->di_num.no_formal_ino <= 100) {
+		if (is_journal_copy(ip, bh))
+			return;
+		fix_md.qinode = ip;
+		log_warn(_("Found system quota file at: 0x%llx\n"),
+			 di->di_num.no_addr);
+	}
+}
+
+/**
+ * peruse_user_dinode - process a user dinode trying to find the root directory
+ *
+ */
+static void peruse_user_dinode(struct gfs2_sbd *sdp, struct gfs2_dinode *di,
+			       struct gfs2_buffer_head *bh)
+{
+	struct gfs2_inode *ip, *parent_ip;
+	struct gfs2_inum inum;
+	int error;
+
+	if (sdp->sd_sb.sb_root_dir.no_addr) /* if we know the root dinode */
+		return;             /* we don't need to find the root */
+	if (!S_ISDIR(di->di_mode))  /* if this isn't a directory */
+		return;             /* it can't lead us to the root anyway */
+
+	if (di->di_num.no_formal_ino == 1) {
+		struct gfs2_buffer_head *root_bh;
+
+		if (di->di_num.no_addr == bh->b_blocknr) {
+			log_warn(_("Found the root directory at: 0x%llx.\n"),
+				 di->di_num.no_addr);
+			sdp->sd_sb.sb_root_dir.no_addr = di->di_num.no_addr;
+			return;
+		}
+		log_warn(_("The root dinode should be at block 0x%llx but it "
+			   "seems to be destroyed.\n"),
+			 (unsigned long long)di->di_num.no_addr);
+		log_warn(_("Found a copy of the root directory in a journal "
+			   "at block: 0x%llx.\n"),
+			 (unsigned long long)bh->b_blocknr);
+		if (!query(_("Do you want to replace the root dinode from the "
+			     "copy? (y/n)"))) {
+			log_err(_("Damaged root dinode not fixed.\n"));
+			return;
+		}
+		root_bh = bread(sdp, di->di_num.no_addr);
+		memcpy(root_bh->b_data, bh->b_data, sdp->bsize);
+		bmodified(root_bh);
+		brelse(root_bh);
+		log_warn(_("Root directory copied from the journal.\n"));
+		return;
+	}
+	ip = inode_read(sdp, di->di_num.no_addr);
+	while (ip) {
+		gfs2_lookupi(ip, "..", 2, &parent_ip);
+		if (parent_ip && parent_ip->i_di.di_num.no_addr ==
+		    ip->i_di.di_num.no_addr) {
+			log_warn(_("fsck found the root inode at: 0x%llx\n"),
+				 ip->i_di.di_num.no_addr);
+			sdp->sd_sb.sb_root_dir.no_addr =
+				ip->i_di.di_num.no_addr;
+			inode_put(&parent_ip);
+			inode_put(&ip);
+			return;
+		}
+		if (!parent_ip)
+			break;
+		inode_put(&ip);
+		ip = parent_ip;
+	}
+	error = dir_search(ip, "..", 2, NULL, &inum);
+	if (!error && inum.no_addr && inum.no_addr < possible_root) {
+			possible_root = inum.no_addr;
+			log_debug(_("Found a possible root at: 0x%llx\n"),
+				  (unsigned long long)possible_root);
+	}
+	inode_put(&ip);
+}
+
+/**
+ * find_rgs_for_bsize - check a range of blocks for rgrps to determine bsize.
+ * Assumes: device is open.
+ */
+static int find_rgs_for_bsize(struct gfs2_sbd *sdp, uint64_t startblock,
+			      uint32_t *known_bsize)
+{
+	uint64_t blk, max_rg_size, rb_addr;
+	struct gfs2_buffer_head *bh, *rb_bh;
+	uint32_t bsize, bsize2;
+	uint32_t chk;
+	char *p;
+	int found_rg;
+	struct gfs2_meta_header mh;
+
+	sdp->bsize = GFS2_DEFAULT_BSIZE;
+	max_rg_size = 524288;
+	/* Max RG size is 2GB. Max block size is 4K. 2G / 4K blks = 524288,
+	   So this is traversing 2GB in 4K block increments. */
+	for (blk = startblock; blk < startblock + max_rg_size; blk++) {
+		bh = bread(sdp, blk);
+		found_rg = 0;
+		for (bsize = 0; bsize < GFS2_DEFAULT_BSIZE;
+		     bsize += GFS2_BASIC_BLOCK) {
+			p = bh->b_data + bsize;
+			chk = ((struct gfs2_meta_header *)p)->mh_magic;
+			if (be32_to_cpu(chk) != GFS2_MAGIC)
+				continue;
+			chk = ((struct gfs2_meta_header *)p)->mh_type;
+			if (be32_to_cpu(chk) == GFS2_METATYPE_RG) {
+				found_rg = 1;
+				break;
+			}
+		}
+		if (!found_rg)
+			continue;
+		/* Try all the block sizes in 512 byte multiples */
+		for (bsize2 = GFS2_BASIC_BLOCK; bsize2 <= GFS2_DEFAULT_BSIZE;
+		     bsize2 += GFS2_BASIC_BLOCK) {
+			rb_addr = (bh->b_blocknr *
+				   (GFS2_DEFAULT_BSIZE / bsize2)) +
+				(bsize / bsize2) + 1;
+			sdp->bsize = bsize2; /* temporarily */
+			rb_bh = bread(sdp, rb_addr);
+			gfs2_meta_header_in(&mh, rb_bh);
+			brelse(rb_bh);
+			if (mh.mh_magic == GFS2_MAGIC &&
+			    mh.mh_type == GFS2_METATYPE_RB) {
+				log_debug(_("boff:%d bsize2:%d rg:0x%llx, "
+					    "rb:0x%llx\n"), bsize, bsize2,
+					  (unsigned long long)blk,
+					  (unsigned long long)rb_addr);
+				*known_bsize = bsize2;
+				break;
+			}
+		}
+		brelse(bh);
+		if (!(*known_bsize)) {
+			sdp->bsize = GFS2_DEFAULT_BSIZE;
+			continue;
+		}
+
+		sdp->bsize = *known_bsize;
+		log_warn(_("Block size determined to be: %d\n"), *known_bsize);
+		return 0;
+	}
+	return 0;
+}
+
+/**
+ * peruse_metadata - check a range of blocks for metadata
+ * Assumes: device is open.
+ */
+static int peruse_metadata(struct gfs2_sbd *sdp, uint64_t startblock)
+{
+	uint64_t blk, max_rg_size;
+	struct gfs2_buffer_head *bh;
+	struct gfs2_dinode di;
+	int found_gfs2_dinodes = 0, possible_gfs1_dinodes = 0;
+
+	max_rg_size = 2147483648ull / sdp->bsize;
+	/* Max RG size is 2GB. 2G / bsize. */
+	for (blk = startblock; blk < startblock + max_rg_size; blk++) {
+		bh = bread(sdp, blk);
+		if (gfs2_check_meta(bh, GFS2_METATYPE_DI)) {
+			brelse(bh);
+			continue;
+		}
+		gfs2_dinode_in(&di, bh);
+		if (!found_gfs2_dinodes &&
+		    di.di_num.no_addr == di.di_num.no_formal_ino) {
+			possible_gfs1_dinodes++;
+			if (possible_gfs1_dinodes > 5) {
+				log_err(_("Found several gfs (version 1) "
+					  "dinodes; aborting.\n"));
+				brelse(bh);
+				return -1;
+			}
+		} else {
+			found_gfs2_dinodes++;
+		}
+		if (di.di_flags & GFS2_DIF_SYSTEM)
+			peruse_system_dinode(sdp, &di, bh);
+		else
+			peruse_user_dinode(sdp, &di, bh);
+		brelse(bh);
+	}
+	return 0;
+}
+
+/**
+ * sb_repair - repair a damaged superblock
+ * Assumes: device is open.
+ *          The biggest RG size is 2GB
+ */
+static int sb_repair(struct gfs2_sbd *sdp)
+{
+	uint64_t real_device_size, half;
+	uint32_t known_bsize = 0;
+	unsigned char uuid[16];
+	int error = 0;
+
+	memset(&fix_md, 0, sizeof(fix_md));
+	/* Step 1 - First we need to determine the correct block size. */
+	sdp->bsize = GFS2_DEFAULT_BSIZE;
+	log_warn(_("Gathering information to repair the gfs2 superblock.  "
+		   "This may take some time.\n"));
+	error = find_rgs_for_bsize(sdp, (GFS2_SB_ADDR * GFS2_BASIC_BLOCK) /
+				   GFS2_DEFAULT_BSIZE, &known_bsize);
+	if (error)
+		return error;
+	if (!known_bsize) {
+		log_warn(_("Block size not apparent; checking elsewhere.\n"));
+		/* First, figure out the device size.  We need that so we can
+		   find a suitable start point to determine what's what. */
+		device_size(sdp->device_fd, &real_device_size);
+		half = real_device_size / 2; /* in bytes */
+		half /= sdp->bsize;
+		/* Start looking halfway through the device for gfs2
+		   structures.  If there aren't any at all, forget it. */
+		error = find_rgs_for_bsize(sdp, half, &known_bsize);
+		if (error)
+			return error;
+	}
+	if (!known_bsize) {
+		log_err(_("Unable to determine the block size; this "
+			  "does not look like a gfs2 file system.\n"));
+		return -1;
+	}
+	/* Step 2 - look for the sytem dinodes */
+	error = peruse_metadata(sdp, (GFS2_SB_ADDR * GFS2_BASIC_BLOCK) /
+				GFS2_DEFAULT_BSIZE);
+	if (error)
+		return error;
+	if (!sdp->sd_sb.sb_master_dir.no_addr) {
+		log_err(_("Unable to locate the system master  directory.\n"));
+		return -1;
+	}
+	if (!sdp->sd_sb.sb_root_dir.no_addr) {
+		struct gfs2_inum inum;
+
+		log_err(_("Unable to locate the root directory.\n"));
+		if (possible_root == HIGHEST_BLOCK) {
+			/* Take advantage of the fact that mkfs.gfs2
+			   creates master immediately after root. */
+			log_err(_("Can't find any dinodes that might "
+				  "be the root; using master - 1.\n"));
+			possible_root = sdp->sd_sb.sb_master_dir.no_addr - 1;
+		}
+		log_err(_("Found a root directory candidate at  0x%llx\n"),
+			(unsigned long long)possible_root);
+		sdp->sd_sb.sb_root_dir.no_addr = possible_root;
+		sdp->md.rooti = inode_read(sdp, possible_root);
+		if (!sdp->md.rooti ||
+		    sdp->md.rooti->i_di.di_header.mh_magic != GFS2_MAGIC) {
+			struct gfs2_buffer_head *bh;
+
+			log_err(_("The root dinode block is destroyed.\n"));
+			log_err(_("At this point I recommend "
+				  "reinitializing it.\n"
+				  "Hopefully everything will later "
+				  "be put into lost+found.\n"));
+			if (!query(_("Okay to reinitialize the root "
+				     "dinode? (y/n)"))) {
+				log_err(_("The root dinode was not "
+					  "reinitialized; aborting.\n"));
+				return -1;
+			}
+			inum.no_formal_ino = 1;
+			inum.no_addr = possible_root;
+			bh = init_dinode(sdp, &inum, S_IFDIR | 0755, 0, &inum);
+			brelse(bh);
+		}
+	}
+	/* Step 3 - Rebuild the lock protocol and file system table name */
+	get_lockproto_table(sdp);
+	if (query(_("Okay to fix the GFS2 superblock? (y/n)"))) {
+		log_info(_("Master system directory found at: 0x%llx\n"),
+			 sdp->sd_sb.sb_master_dir.no_addr);
+		sdp->master_dir = inode_read(sdp,
+					     sdp->sd_sb.sb_master_dir.no_addr);
+		sdp->master_dir->i_di.di_num.no_addr =
+			sdp->sd_sb.sb_master_dir.no_addr;
+		log_info(_("Root directory found at: 0x%llx\n"),
+			 sdp->sd_sb.sb_root_dir.no_addr);
+		sdp->md.rooti = inode_read(sdp,
+					   sdp->sd_sb.sb_root_dir.no_addr);
+		get_random_bytes(uuid, sizeof(uuid));
+		build_sb(sdp, uuid);
+		inode_put(&sdp->md.rooti);
+		inode_put(&sdp->master_dir);
+	} else {
+		log_crit(_("GFS2 superblock not fixed; fsck cannot proceed "
+			   "without a valid superblock.\n"));
+		return -1;
+	}
+	return 0;
+}
+
 /**
  * fill_super_block
  * @sdp:
@@ -453,8 +1025,17 @@ static int fill_super_block(struct gfs2_sbd *sdp)
 	}
 
 	compute_constants(sdp);
-	if(read_sb(sdp) < 0){
-		return -1;
+	if (read_sb(sdp) < 0) {
+		/* First, check for a gfs1 (not gfs2) file system */
+		if (sdp->sd_sb.sb_header.mh_magic == GFS2_MAGIC &&
+		    sdp->sd_sb.sb_header.mh_type == GFS2_METATYPE_SB)
+			return -1; /* This is gfs1, don't try to repair */
+		/* It's not a "sane" gfs1 fs so try to repair it */
+		if (sb_repair(sdp) != 0)
+			return -1; /* unrepairable, so exit */
+		/* Now that we've tried to repair it, re-read it. */
+		if (read_sb(sdp) < 0)
+			return -1;
 	}
 
 	return 0;
@@ -515,10 +1096,8 @@ int initialize(struct gfs2_sbd *sbp, int force_check, int preen,
 	}
 
 	/* read in sb from disk */
-	if (fill_super_block(sbp)) {
-		stack;
+	if (fill_super_block(sbp))
 		return FSCK_ERROR;
-	}
 
 	/* Change lock protocol to be fsck_* instead of lock_* */
 	if(!opts.no && preen_is_safe(sbp, preen, force_check)) {
@@ -528,6 +1107,17 @@ int initialize(struct gfs2_sbd *sbp, int force_check, int preen,
 		}
 	}
 
+	/* Get master dinode */
+	sbp->master_dir = inode_read(sbp, sbp->sd_sb.sb_master_dir.no_addr);
+	if (sbp->master_dir->i_di.di_header.mh_magic != GFS2_MAGIC ||
+	    sbp->master_dir->i_di.di_header.mh_type != GFS2_METATYPE_DI ||
+	    !sbp->master_dir->i_di.di_size) {
+		inode_put(&sbp->master_dir);
+		rebuild_master(sbp);
+		sbp->master_dir = inode_read(sbp,
+					     sbp->sd_sb.sb_master_dir.no_addr);
+	}
+
 	/* verify various things */
 
 	if(replay_journals(sbp, preen, force_check, &clean_journals)) {
diff --git a/gfs2/fsck/main.c b/gfs2/fsck/main.c
index ae8d6c0..259fff3 100644
--- a/gfs2/fsck/main.c
+++ b/gfs2/fsck/main.c
@@ -157,117 +157,6 @@ static void interrupt(int sig)
 	}
 }
 
-/* Check system inode and verify it's marked "in use" in the bitmap:       */
-/* Should work for all system inodes: root, master, jindex, per_node, etc. */
-static int check_system_inode(struct gfs2_inode *sysinode, const char *filename,
-		       void builder(struct gfs2_sbd *sbp),
-		       enum gfs2_mark_block mark)
-{
-	uint64_t iblock = 0;
-	struct dir_status ds = {0};
-
-	log_info( _("Checking system inode '%s'\n"), filename);
-	if (sysinode) {
-		/* Read in the system inode, look at its dentries, and start
-		 * reading through them */
-		iblock = sysinode->i_di.di_num.no_addr;
-		log_info( _("System inode for '%s' is located at block %"
-			 PRIu64 " (0x%" PRIx64 ")\n"), filename,
-			 iblock, iblock);
-		
-		/* FIXME: check this block's validity */
-
-		ds.q = block_type(iblock);
-		/* If the inode exists but the block is marked      */
-		/* free, we might be recovering from a corrupt      */
-		/* bitmap.  In that case, don't rebuild the inode.  */
-		/* Just reuse the inode and fix the bitmap.         */
-		if (ds.q == gfs2_block_free) {
-			log_info( _("The inode exists but the block is not "
-				    "marked 'in use'; fixing it.\n"));
-			fsck_blockmap_set(sysinode,
-					  sysinode->i_di.di_num.no_addr,
-					  filename, mark);
-			ds.q = mark;
-			if (mark == gfs2_inode_dir)
-				dirtree_insert(sysinode->i_di.di_num.no_addr);
-		}
-	}
-	else
-		log_info( _("System inode for '%s' is missing.\n"), filename);
-	/* If there are errors with the inode here, we need to
-	 * create a new inode and get it all setup - of course,
-	 * everything will be in lost+found then, but we *need* our
-	 * system inodes before we can do any of that. */
-	if(!sysinode || ds.q != mark) {
-		log_err( _("Invalid or missing %s system inode.\n"), filename);
-		if (query(_("Create new %s system inode? (y/n) "), filename)) {
-			builder(sysinode->i_sbd);
-			fsck_blockmap_set(sysinode,
-					  sysinode->i_di.di_num.no_addr,
-					  filename, mark);
-			ds.q = mark;
-			if (mark == gfs2_inode_dir)
-				dirtree_insert(sysinode->i_di.di_num.no_addr);
-		}
-		else {
-			log_err( _("Cannot continue without valid %s inode\n"),
-				filename);
-			return -1;
-		}
-	}
-
-	return 0;
-}
-
-static int check_system_inodes(struct gfs2_sbd *sdp)
-{
-	/*******************************************************************
-	 *******  Check the system inode integrity             *************
-	 *******************************************************************/
-	if (check_system_inode(sdp->master_dir, "master", build_master,
-			       gfs2_inode_dir)) {
-		stack;
-		return -1;
-	}
-	if (check_system_inode(sdp->md.rooti, "root", build_root,
-			       gfs2_inode_dir)) {
-		stack;
-		return -1;
-	}
-	if (check_system_inode(sdp->md.inum, "inum", build_inum,
-			       gfs2_inode_file)) {
-		stack;
-		return -1;
-	}
-	if (check_system_inode(sdp->md.statfs, "statfs", build_statfs,
-			       gfs2_inode_file)) {
-		stack;
-		return -1;
-	}
-	if (check_system_inode(sdp->md.jiinode, "jindex", build_jindex,
-			       gfs2_inode_dir)) {
-		stack;
-		return -1;
-	}
-	if (check_system_inode(sdp->md.riinode, "rindex", build_rindex,
-			       gfs2_inode_file)) {
-		stack;
-		return -1;
-	}
-	if (check_system_inode(sdp->md.qinode, "quota", build_quota,
-			       gfs2_inode_file)) {
-		stack;
-		return -1;
-	}
-	if (check_system_inode(sdp->md.pinode, "per_node", build_per_node,
-			       gfs2_inode_dir)) {
-		stack;
-		return -1;
-	}
-	return 0;
-}
-
 static void check_statfs(struct gfs2_sbd *sdp)
 {
 	osi_list_t *tmp;
@@ -376,9 +265,6 @@ int main(int argc, char **argv)
 	else
 		log_notice( _("Pass1 complete      \n"));
 
-	/* Make sure the system inodes are okay & represented in the bitmap. */
-	check_system_inodes(sbp);
-
 	if (!fsck_abort) {
 		last_reported_block = 0;
 		pass = "pass 1b";
diff --git a/gfs2/fsck/metawalk.c b/gfs2/fsck/metawalk.c
index 5bcfb02..09305da 100644
--- a/gfs2/fsck/metawalk.c
+++ b/gfs2/fsck/metawalk.c
@@ -186,8 +186,7 @@ struct duptree *dupfind(uint64_t block)
 	return NULL;
 }
 
-static struct gfs2_inode *fsck_system_inode(struct gfs2_sbd *sdp,
-					    uint64_t block)
+struct gfs2_inode *fsck_system_inode(struct gfs2_sbd *sdp, uint64_t block)
 {
 	if (lf_dip && lf_dip->i_di.di_num.no_addr == block)
 		return lf_dip;
diff --git a/gfs2/fsck/metawalk.h b/gfs2/fsck/metawalk.h
index 13f4c70..3ea2991 100644
--- a/gfs2/fsck/metawalk.h
+++ b/gfs2/fsck/metawalk.h
@@ -46,6 +46,8 @@ extern int check_n_fix_bitmap(struct gfs2_sbd *sdp, uint64_t blk,
 		       enum gfs2_mark_block new_blockmap_state);
 extern void reprocess_inode(struct gfs2_inode *ip, const char *desc);
 extern struct duptree *dupfind(uint64_t block);
+extern struct gfs2_inode *fsck_system_inode(struct gfs2_sbd *sdp,
+					    uint64_t block);
 
 #define is_duplicate(dblock) ((dupfind(dblock)) ? 1 : 0)
 
diff --git a/gfs2/fsck/pass1.c b/gfs2/fsck/pass1.c
index 0b8ca09..bea0ef5 100644
--- a/gfs2/fsck/pass1.c
+++ b/gfs2/fsck/pass1.c
@@ -87,6 +87,7 @@ static int invalidate_eattr_indir(struct gfs2_inode *ip, uint64_t block,
 static int invalidate_eattr_leaf(struct gfs2_inode *ip, uint64_t block,
 				 uint64_t parent, struct gfs2_buffer_head **bh,
 				 void *private);
+static int handle_ip(struct gfs2_sbd *sdp, struct gfs2_inode *ip);
 
 struct metawalk_fxns pass1_fxns = {
 	.private = NULL,
@@ -116,6 +117,98 @@ struct metawalk_fxns invalidate_fxns = {
 	.check_eattr_leaf = invalidate_eattr_leaf,
 };
 
+/*
+ * resuscitate_metalist - make sure a system directory entry's metadata blocks
+ *                        are marked "in use" in the bitmap.
+ *
+ * This function makes sure metadata blocks for system and root directories are
+ * marked "in use" by the bitmap.  You don't want root's indirect blocks
+ * deleted, do you? Or worse, reused for lost+found.
+ */
+static int resuscitate_metalist(struct gfs2_inode *ip, uint64_t block,
+				struct gfs2_buffer_head **bh, void *private)
+{
+	struct block_count *bc = (struct block_count *)private;
+
+	*bh = NULL;
+	if (gfs2_check_range(ip->i_sbd, block)){ /* blk outside of FS */
+		fsck_blockmap_set(ip, ip->i_di.di_num.no_addr,
+				  _("itself"), gfs2_bad_block);
+		log_err( _("Bad indirect block pointer (out of range) "
+			   "found in system inode %lld (0x%llx).\n"),
+			 (unsigned long long)ip->i_di.di_num.no_addr,
+			 (unsigned long long)ip->i_di.di_num.no_addr);
+		return 1;
+	}
+	if (fsck_system_inode(ip->i_sbd, block))
+		fsck_blockmap_set(ip, block, _("system file"), gfs2_indir_blk);
+	else
+		check_n_fix_bitmap(ip->i_sbd, block, gfs2_indir_blk);
+	bc->indir_count++;
+	return 0;
+}
+
+/*
+ * resuscitate_dentry - make sure a system directory entry is alive
+ *
+ * This function makes sure directory entries in system directories are
+ * kept alive.  You don't want journal0 deleted from jindex, do you?
+ */
+static int resuscitate_dentry(struct gfs2_inode *ip, struct gfs2_dirent *dent,
+			      struct gfs2_dirent *prev_de,
+			      struct gfs2_buffer_head *bh, char *filename,
+			      uint16_t *count, void *priv)
+{
+	struct gfs2_sbd *sdp = ip->i_sbd;
+	struct gfs2_dirent dentry, *de;
+	char tmp_name[PATH_MAX];
+	uint64_t block;
+	enum gfs2_mark_block dinode_type;
+
+	memset(&dentry, 0, sizeof(struct gfs2_dirent));
+	gfs2_dirent_in(&dentry, (char *)dent);
+	de = &dentry;
+	block = de->de_inum.no_addr;
+	/* Start of checks */
+	memset(tmp_name, 0, sizeof(tmp_name));
+	if(de->de_name_len < sizeof(tmp_name))
+		strncpy(tmp_name, filename, de->de_name_len);
+	else
+		strncpy(tmp_name, filename, sizeof(tmp_name) - 1);
+	if(gfs2_check_range(sdp, block)) {
+		log_err( _("Block # referenced by system directory entry %s "
+			   "in inode %lld (0x%llx) is out of range; "
+			   "ignored.\n"),
+			 tmp_name, (unsigned long long)ip->i_di.di_num.no_addr,
+			 (unsigned long long)ip->i_di.di_num.no_addr);
+		return 0;
+	}
+	if (block == sdp->md.jiinode->i_di.di_num.no_addr ||
+	    block == sdp->md.pinode->i_di.di_num.no_addr ||
+	    block == sdp->master_dir->i_di.di_num.no_addr)
+		dinode_type = gfs2_inode_dir;
+	else
+		dinode_type = gfs2_inode_file;
+	/* If this is a system dinode, we'll handle it later in
+	   check_system_inodes.  If not, it'll be handled by pass1 but
+	   since it's in a system directory we need to make sure it's
+	   represented in the rgrp bitmap. */
+	if (fsck_system_inode(sdp, block))
+		fsck_blockmap_set(ip, block, _("system file"), dinode_type);
+	else
+		check_n_fix_bitmap(sdp, block, dinode_type);
+	/* Return the number of leaf entries so metawalk doesn't flag this
+	   leaf as having none. */
+	*count = be16_to_cpu(((struct gfs2_leaf *)bh->b_data)->lf_entries);
+	return 0;
+}
+
+struct metawalk_fxns sysdir_fxns = {
+	.private = NULL,
+	.check_metalist = resuscitate_metalist,
+	.check_dentry = resuscitate_dentry,
+};
+
 static int leaf(struct gfs2_inode *ip, uint64_t block,
 		struct gfs2_buffer_head *bh, void *private)
 {
@@ -880,40 +973,15 @@ struct metawalk_fxns rangecheck_fxns = {
         .check_eattr_leaf = rangecheck_eattr_leaf,
 };
 
-static int handle_di(struct gfs2_sbd *sdp, struct gfs2_buffer_head *bh)
+/*
+ * handle_ip - process an incore structure representing a dinode.
+ */
+static int handle_ip(struct gfs2_sbd *sdp, struct gfs2_inode *ip)
 {
-	uint8_t q;
-	struct gfs2_inode *ip;
 	int error;
 	struct block_count bc = {0};
-	uint64_t block = bh->b_blocknr;
 	long bad_pointers;
-
-	q = block_type(block);
-	if(q != gfs2_block_free) {
-		log_err( _("Found a duplicate inode block at #%" PRIu64
-			   " (0x%" PRIx64 ") previously marked as a %s\n"),
-			 block, block, block_type_string(q));
-		add_duplicate_ref(ip, block, ref_as_meta, 0, INODE_VALID);
-		return 0;
-	}
-
-	ip = fsck_inode_get(sdp, bh);
-	if (ip->i_di.di_num.no_addr != block) {
-		log_err( _("Inode #%llu (0x%llx): Bad inode address found: %llu "
-			"(0x%llx)\n"), (unsigned long long)block,
-			(unsigned long long)block,
-			(unsigned long long)ip->i_di.di_num.no_addr,
-			(unsigned long long)ip->i_di.di_num.no_addr);
-		if(query( _("Fix address in inode at block #%"
-			    PRIu64 " (0x%" PRIx64 ")? (y/n) "),
-			 block, block)) {
-			ip->i_di.di_num.no_addr = ip->i_di.di_num.no_formal_ino = block;
-			bmodified(ip->i_bh);
-		} else
-			log_err( _("Address in inode at block #%" PRIu64
-				 " (0x%" PRIx64 ") not fixed\n"), block, block);
-	}
+	uint64_t block = ip->i_bh->b_blocknr;
 
 	bad_pointers = 0L;
 
@@ -931,7 +999,6 @@ static int handle_di(struct gfs2_sbd *sdp, struct gfs2_buffer_head *bh)
 			 BAD_POINTER_TOLERANCE);
 		fsck_blockmap_set(ip, ip->i_di.di_num.no_addr,
 				  _("badly corrupt"), gfs2_block_free);
-		fsck_inode_put(&ip);
 		return 0;
 	}
 
@@ -939,64 +1006,40 @@ static int handle_di(struct gfs2_sbd *sdp, struct gfs2_buffer_head *bh)
 
 	case S_IFDIR:
 		if (fsck_blockmap_set(ip, block, _("directory"),
-				      gfs2_inode_dir)) {
-			stack;
-			fsck_inode_put(&ip);
-			return -1;
-		}
-		if(!dirtree_insert(block)) {
-			stack;
-			fsck_inode_put(&ip);
-			return -1;
-		}
+				      gfs2_inode_dir))
+			goto bad_dinode;
+		if(!dirtree_insert(block))
+			goto bad_dinode;
 		break;
 	case S_IFREG:
 		if (fsck_blockmap_set(ip, block, _("file"),
-				      gfs2_inode_file)) {
-			stack;
-			fsck_inode_put(&ip);
-			return -1;
-		}
+				      gfs2_inode_file))
+			goto bad_dinode;
 		break;
 	case S_IFLNK:
 		if (fsck_blockmap_set(ip, block, _("symlink"),
-				      gfs2_inode_lnk)) {
-			stack;
-			fsck_inode_put(&ip);
-			return -1;
-		}
+				      gfs2_inode_lnk))
+			goto bad_dinode;
 		break;
 	case S_IFBLK:
 		if (fsck_blockmap_set(ip, block, _("block device"),
-				      gfs2_inode_blk)) {
-			stack;
-			fsck_inode_put(&ip);
-			return -1;
-		}
+				      gfs2_inode_blk))
+			goto bad_dinode;
 		break;
 	case S_IFCHR:
 		if (fsck_blockmap_set(ip, block, _("character device"),
-				      gfs2_inode_chr)) {
-			stack;
-			fsck_inode_put(&ip);
-			return -1;
-		}
+				      gfs2_inode_chr))
+			goto bad_dinode;
 		break;
 	case S_IFIFO:
 		if (fsck_blockmap_set(ip, block, _("fifo"),
-				      gfs2_inode_fifo)) {
-			stack;
-			fsck_inode_put(&ip);
-			return -1;
-		}
+				      gfs2_inode_fifo))
+			goto bad_dinode;
 		break;
 	case S_IFSOCK:
 		if (fsck_blockmap_set(ip, block, _("socket"),
-				      gfs2_inode_sock)) {
-			stack;
-			fsck_inode_put(&ip);
-			return -1;
-		}
+				      gfs2_inode_sock))
+			goto bad_dinode;
 		break;
 	default:
 		/* We found a dinode that has an invalid mode, so we can't
@@ -1012,19 +1055,12 @@ static int handle_di(struct gfs2_sbd *sdp, struct gfs2_buffer_head *bh)
 		   skip parts that we can't be sure of based on dinode type. */
 		check_metatree(ip, &invalidate_fxns);
 		if (fsck_blockmap_set(ip, block, _("invalid mode"),
-				      gfs2_inode_invalid)) {
-			stack;
-			fsck_inode_put(&ip);
-			return -1;
-		}
-		fsck_inode_put(&ip);
+				      gfs2_inode_invalid))
+			goto bad_dinode;
 		return 0;
 	}
-	if(set_link_count(ip->i_di.di_num.no_addr, ip->i_di.di_nlink)) {
-		stack;
-		fsck_inode_put(&ip);
-		return -1;
-	}
+	if(set_link_count(ip->i_di.di_num.no_addr, ip->i_di.di_nlink))
+		goto bad_dinode;
 
 	if (S_ISDIR(ip->i_di.di_mode) &&
 	    (ip->i_di.di_flags & GFS2_DIF_EXHASH)) {
@@ -1036,22 +1072,16 @@ static int handle_di(struct gfs2_sbd *sdp, struct gfs2_buffer_head *bh)
 				 ip->i_di.di_depth,
 				 (1 >> (ip->i_di.di_size/sizeof(uint64_t))));
 			if(fsck_blockmap_set(ip, block, _("bad depth"),
-					     gfs2_block_free)) {
-				stack;
-				fsck_inode_put(&ip);
-				return -1;
-			}
-			fsck_inode_put(&ip);
+					     gfs2_block_free))
+				goto bad_dinode;
 			return 0;
 		}
 	}
 
 	pass1_fxns.private = &bc;
 	error = check_metatree(ip, &pass1_fxns);
-	if (fsck_abort || error < 0) {
-		fsck_inode_put(&ip);
+	if (fsck_abort || error < 0)
 		return 0;
-	}
 	if (error > 0) {
 		log_err( _("Error: inode %llu (0x%llx) has unrecoverable "
 			   "errors; invalidating.\n"),
@@ -1065,7 +1095,6 @@ static int handle_di(struct gfs2_sbd *sdp, struct gfs2_buffer_head *bh)
 		   Therefore we mark the inode as free space. */
 		fsck_blockmap_set(ip, ip->i_di.di_num.no_addr,
 				  _("corrupt"), gfs2_block_free);
-		fsck_inode_put(&ip);
 		return 0;
 	}
 
@@ -1104,7 +1133,195 @@ static int handle_di(struct gfs2_sbd *sdp, struct gfs2_buffer_head *bh)
 				(unsigned long long)ip->i_di.di_num.no_addr);
 	}
 
+	return 0;
+bad_dinode:
+	stack;
+	return -1;
+}
+
+/*
+ * handle_di - This is now a wrapper function that takes a gfs2_buffer_head
+ *             and calls handle_ip, which takes an in-code dinode structure.
+ */
+static int handle_di(struct gfs2_sbd *sdp, struct gfs2_buffer_head *bh)
+{
+	uint8_t q;
+	int error = 0;
+	uint64_t block = bh->b_blocknr;
+	struct gfs2_inode *ip;
+
+	ip = fsck_inode_get(sdp, bh);
+	q = block_type(block);
+	if(q != gfs2_block_free) {
+		log_err( _("Found a duplicate inode block at #%" PRIu64
+			   " (0x%" PRIx64 ") previously marked as a %s\n"),
+			 block, block, block_type_string(q));
+		add_duplicate_ref(ip, block, ref_as_meta, 0, INODE_VALID);
+		fsck_inode_put(&ip);
+		return 0;
+	}
+
+	if (ip->i_di.di_num.no_addr != block) {
+		log_err( _("Inode #%llu (0x%llx): Bad inode address found: %llu "
+			"(0x%llx)\n"), (unsigned long long)block,
+			(unsigned long long)block,
+			(unsigned long long)ip->i_di.di_num.no_addr,
+			(unsigned long long)ip->i_di.di_num.no_addr);
+		if(query( _("Fix address in inode at block #%"
+			    PRIu64 " (0x%" PRIx64 ")? (y/n) "),
+			  block, block)) {
+			ip->i_di.di_num.no_addr = ip->i_di.di_num.no_formal_ino = block;
+			bmodified(ip->i_bh);
+		} else
+			log_err( _("Address in inode at block #%" PRIu64
+				 " (0x%" PRIx64 ") not fixed\n"), block, block);
+	}
+	error = handle_ip(sdp, ip);
 	fsck_inode_put(&ip);
+	return error;
+}
+
+/* Check system inode and verify it's marked "in use" in the bitmap:       */
+/* Should work for all system inodes: root, master, jindex, per_node, etc. */
+static int check_system_inode(struct gfs2_inode *sysinode, const char *filename,
+		       void builder(struct gfs2_sbd *sbp),
+		       enum gfs2_mark_block mark)
+{
+	uint64_t iblock = 0;
+	struct dir_status ds = {0};
+	int error;
+
+	log_info( _("Checking system inode '%s'\n"), filename);
+	if (sysinode) {
+		/* Read in the system inode, look at its dentries, and start
+		 * reading through them */
+		iblock = sysinode->i_di.di_num.no_addr;
+		log_info( _("System inode for '%s' is located at block %"
+			 PRIu64 " (0x%" PRIx64 ")\n"), filename,
+			 iblock, iblock);
+
+		/* FIXME: check this block's validity */
+
+		ds.q = block_type(iblock);
+		/* If the inode exists but the block is marked free, we might
+		   be recovering from a corrupt bitmap.  In that case, don't
+		   rebuild the inode.  Just reuse the inode and fix the
+		   bitmap. */
+		if (ds.q == gfs2_block_free) {
+			log_info( _("The inode exists but the block is not "
+				    "marked 'in use'; fixing it.\n"));
+			fsck_blockmap_set(sysinode,
+					  sysinode->i_di.di_num.no_addr,
+					  filename, mark);
+			ds.q = mark;
+			if (mark == gfs2_inode_dir)
+				dirtree_insert(sysinode->i_di.di_num.no_addr);
+		}
+	} else
+		log_info( _("System inode for '%s' is missing.\n"), filename);
+	/* If there are errors with the inode here, we need to create a new
+	   inode and get it all setup - of course, everything will be in
+	   lost+found then, but we *need* our system inodes before we can
+	   do any of that. */
+	if(!sysinode || ds.q != mark) {
+		log_err( _("Invalid or missing %s system inode (should be %d, "
+			   "is %d).\n"), filename, mark, ds.q);
+		if (query(_("Create new %s system inode? (y/n) "), filename)) {
+			builder(sysinode->i_sbd);
+			fsck_blockmap_set(sysinode,
+					  sysinode->i_di.di_num.no_addr,
+					  filename, mark);
+			ds.q = mark;
+			if (mark == gfs2_inode_dir)
+				dirtree_insert(sysinode->i_di.di_num.no_addr);
+		} else {
+			log_err( _("Cannot continue without valid %s inode\n"),
+				filename);
+			return -1;
+		}
+	}
+	if (S_ISDIR(sysinode->i_di.di_mode)) {
+		struct block_count bc = {0};
+
+		sysdir_fxns.private = &bc;
+		if (sysinode->i_di.di_flags & GFS2_DIF_EXHASH)
+			check_metatree(sysinode, &sysdir_fxns);
+		else
+			check_linear_dir(sysinode, sysinode->i_bh,
+					 &sysdir_fxns);
+	}
+	error = handle_ip(sysinode->i_sbd, sysinode);
+	return error;
+}
+
+static void build_a_journal(struct gfs2_sbd *sdp)
+{
+	build_journal(sdp, sdp->md.journals, sdp->md.jiinode);
+}
+
+static int check_system_inodes(struct gfs2_sbd *sdp)
+{
+	int journal_count;
+
+	/*******************************************************************
+	 *******  Check the system inode integrity             *************
+	 *******************************************************************/
+	if (check_system_inode(sdp->master_dir, "master", build_master,
+			       gfs2_inode_dir)) {
+		stack;
+		return -1;
+	}
+	if (check_system_inode(sdp->md.rooti, "root", build_root,
+			       gfs2_inode_dir)) {
+		stack;
+		return -1;
+	}
+	if (check_system_inode(sdp->md.inum, "inum", build_inum,
+			       gfs2_inode_file)) {
+		stack;
+		return -1;
+	}
+	if (check_system_inode(sdp->md.statfs, "statfs", build_statfs,
+			       gfs2_inode_file)) {
+		stack;
+		return -1;
+	}
+	if (check_system_inode(sdp->md.jiinode, "jindex", build_jindex,
+			       gfs2_inode_dir)) {
+		stack;
+		return -1;
+	}
+	if (check_system_inode(sdp->md.riinode, "rindex", build_rindex,
+			       gfs2_inode_file)) {
+		stack;
+		return -1;
+	}
+	if (check_system_inode(sdp->md.qinode, "quota", build_quota,
+			       gfs2_inode_file)) {
+		stack;
+		return -1;
+	}
+	if (check_system_inode(sdp->md.pinode, "per_node", build_per_node,
+			       gfs2_inode_dir)) {
+		stack;
+		return -1;
+	}
+	/* We have to play a trick on build_journal:  We swap md.journals
+	   in order to keep a count of which journal we need to build. */
+	journal_count = sdp->md.journals;
+	for (sdp->md.journals = 0; sdp->md.journals < journal_count;
+	     sdp->md.journals++) {
+		char jname[16];
+
+		sprintf(jname, "journal%d", sdp->md.journals);
+		if (check_system_inode(sdp->md.journal[sdp->md.journals],
+				       jname, build_a_journal,
+				       gfs2_inode_file)) {
+			stack;
+			return -1;
+		}
+	}
+
 	return 0;
 }
 
@@ -1139,6 +1356,9 @@ int pass1(struct gfs2_sbd *sbp)
 	 * sweep - is there any metadata we need to mark here before
 	 * the sweeps start that we won't find otherwise? */
 
+	/* Make sure the system inodes are okay & represented in the bitmap. */
+	check_system_inodes(sbp);
+
 	/* So, do we do a depth first search starting at the root
 	 * inode, or use the rg bitmaps, or just read every fs block
 	 * to find the inodes?  If we use the depth first search, why
@@ -1205,6 +1425,11 @@ int pass1(struct gfs2_sbd *sbp)
 				}
 				check_n_fix_bitmap(sbp, block,
 						   gfs2_block_free);
+			} else if (fsck_system_inode(sbp, block)) {
+				log_debug(_("Already processed system inode "
+					    "%lld (0x%llx)\n"),
+					  (unsigned long long)block,
+					  (unsigned long long)block);
 			} else if (handle_di(sbp, bh) < 0) {
 				stack;
 				brelse(bh);
diff --git a/gfs2/fsck/rgrepair.c b/gfs2/fsck/rgrepair.c
index 3654356..510426f 100644
--- a/gfs2/fsck/rgrepair.c
+++ b/gfs2/fsck/rgrepair.c
@@ -383,10 +383,11 @@ static int rewrite_rg_block(struct gfs2_sbd *sdp, struct rgrp_list *rg,
 {
 	int x = errblock - rg->ri.ri_addr;
 
-	log_err( _("Block #%"PRIu64" (0x%" PRIx64") (%d of %d) is neither"
+	log_err( _("Block #%lld (0x%llx) (%d of %d) is neither"
 		" GFS2_METATYPE_RB nor GFS2_METATYPE_RG.\n"),
-		rg->bh[x]->b_blocknr, rg->bh[x]->b_blocknr,
-		(int)x+1, (int)rg->ri.ri_length);
+		 (unsigned long long)rg->ri.ri_addr + x,
+		 (unsigned long long)rg->ri.ri_addr + x,
+		 (int)x+1, (int)rg->ri.ri_length);
 	if (query( _("Fix the Resource Group? (y/n)"))) {
 		log_err( _("Attempting to repair the RG.\n"));
 		rg->bh[x] = bread(sdp, rg->ri.ri_addr + x);
@@ -406,12 +407,46 @@ static int rewrite_rg_block(struct gfs2_sbd *sdp, struct rgrp_list *rg,
 			gfs2_rgrp_out(&rg->rg, rg->bh[x]);
 		}
 		brelse(rg->bh[x]);
+		rg->bh[x] = NULL;
 		return 0;
 	}
 	return 1;
 }
 
 /*
+ * expect_rindex_sanity - the rindex file seems trustworthy, so use those
+ *                        values as our expected values and assume the
+ *                        damage is only to the rgrps themselves.
+ */
+static int expect_rindex_sanity(struct gfs2_sbd *sdp, osi_list_t *ret_list,
+				int *num_rgs)
+{
+	osi_list_t *tmp;
+	struct rgrp_list *exp, *rgd; /* expected, actual */
+
+	*num_rgs = sdp->md.riinode->i_di.di_size / sizeof(struct gfs2_rindex);
+	osi_list_init(ret_list);
+	for (tmp = sdp->rglist.next; tmp != &sdp->rglist; tmp = tmp->next) {
+		rgd = osi_list_entry(tmp, struct rgrp_list, list);
+
+		exp = calloc(1, sizeof(struct rgrp_list));
+		if (exp == NULL) {
+			fprintf(stderr, "Out of memory in %s\n", __FUNCTION__);
+			exit(-1);
+		}
+		exp->start = rgd->start;
+		exp->length = rgd->length;
+		memcpy(&exp->ri, &rgd->ri, sizeof(exp->ri));
+		memcpy(&exp->rg, &rgd->rg, sizeof(exp->rg));
+		exp->bits = NULL;
+		gfs2_compute_bitstructs(sdp, exp);
+		osi_list_add_prev(&exp->list, ret_list);
+	}
+	sdp->rgrps = *num_rgs;
+	return 0;
+}
+
+/*
  * rg_repair - try to repair a damaged rg index (rindex)
  * trust_lvl - This is how much we trust the rindex file.
  *             blind_faith means we take the rindex at face value.
@@ -419,7 +454,7 @@ static int rewrite_rg_block(struct gfs2_sbd *sdp, struct rgrp_list *rg,
  *             distrust means it's not to be trusted, so we should go to
  *             greater lengths to build it from scratch.
  */
-int rg_repair(struct gfs2_sbd *sdp, int trust_lvl, int *rg_count)
+int rg_repair(struct gfs2_sbd *sdp, int trust_lvl, int *rg_count, int *sane)
 {
 	int error, discrepancies;
 	osi_list_t expected_rglist;
@@ -429,10 +464,20 @@ int rg_repair(struct gfs2_sbd *sdp, int trust_lvl, int *rg_count)
 
 	if (trust_lvl == blind_faith)
 		return 0;
-	else if (trust_lvl == open_minded) { /* If we can't trust RG index */
+	else if (trust_lvl == ye_of_little_faith) { /* if rindex seems sane */
+		if (!(*sane)) {
+			log_err(_("The rindex file does not meet our "
+				  "expectations.\n"));
+			return -1;
+		}
+		error = expect_rindex_sanity(sdp, &expected_rglist,
+					     &calc_rg_count);
+		if (error)
+			return error;
+	} else if (trust_lvl == open_minded) { /* If we can't trust RG index */
 		/* Calculate our own RG index for comparison */
 		error = gfs2_rindex_calculate(sdp, &expected_rglist,
-					       &calc_rg_count);
+					      &calc_rg_count);
 		if (error) { /* If calculated RGs don't match the fs */
 			gfs2_rgrp_free(&expected_rglist);
 			return -1;
@@ -450,7 +495,7 @@ int rg_repair(struct gfs2_sbd *sdp, int trust_lvl, int *rg_count)
 	}
 	/* Read in the rindex */
 	osi_list_init(&sdp->rglist); /* Just to be safe */
-	rindex_read(sdp, 0, &rgcount_from_index);
+	rindex_read(sdp, 0, &rgcount_from_index, sane);
 	if (sdp->md.riinode->i_di.di_size % sizeof(struct gfs2_rindex)) {
 		log_warn( _("WARNING: rindex file is corrupt.\n"));
 		gfs2_rgrp_free(&expected_rglist);
diff --git a/gfs2/libgfs2/fs_ops.c b/gfs2/libgfs2/fs_ops.c
index 9aeeb23..4beb3c4 100644
--- a/gfs2/libgfs2/fs_ops.c
+++ b/gfs2/libgfs2/fs_ops.c
@@ -1514,7 +1514,7 @@ static int dir_l_search(struct gfs2_inode *dip, const char *filename,
  *
  * Returns: 0 if found, -1 on failure, -ENOENT if not found.
  */
-static int dir_search(struct gfs2_inode *dip, const char *filename, int len,
+int dir_search(struct gfs2_inode *dip, const char *filename, int len,
 		      unsigned int *type, struct gfs2_inum *inum)
 {
 	int error;
diff --git a/gfs2/libgfs2/libgfs2.h b/gfs2/libgfs2/libgfs2.h
index 8b4a135..f6cfc1a 100644
--- a/gfs2/libgfs2/libgfs2.h
+++ b/gfs2/libgfs2/libgfs2.h
@@ -455,6 +455,8 @@ extern struct gfs2_inode *createi(struct gfs2_inode *dip, const char *filename,
 				  unsigned int mode, uint32_t flags);
 extern void dirent2_del(struct gfs2_inode *dip, struct gfs2_buffer_head *bh,
 			struct gfs2_dirent *prev, struct gfs2_dirent *cur);
+extern int dir_search(struct gfs2_inode *dip, const char *filename, int len,
+		      unsigned int *type, struct gfs2_inum *inum);
 extern int gfs2_lookupi(struct gfs2_inode *dip, const char *filename, int len,
 			struct gfs2_inode **ipp);
 extern void dir_add(struct gfs2_inode *dip, const char *filename, int len,
@@ -478,8 +480,8 @@ extern int gfs2_dirent_next(struct gfs2_inode *dip, struct gfs2_buffer_head *bh,
 extern void build_height(struct gfs2_inode *ip, int height);
 extern void unstuff_dinode(struct gfs2_inode *ip);
 extern unsigned int calc_tree_height(struct gfs2_inode *ip, uint64_t size);
-extern int write_journal(struct gfs2_sbd *sdp, struct gfs2_inode *ip,
-			 unsigned int j, unsigned int blocks);
+extern void write_journal(struct gfs2_sbd *sdp, struct gfs2_inode *ip,
+			  unsigned int j, unsigned int blocks);
 
 /**
  * device_size - figure out a device's size
@@ -612,37 +614,36 @@ extern int gfs2_query(int *setonabort, struct gfs2_options *opts,
 		      const char *format, ...)
 	__attribute__((format(printf,3,4)));
 
-char generic_interrupt(const char *caller, const char *where,
-		       const char *progress, const char *question,
-		       const char *answers);
-int gfs2_query(int *setonabort, struct gfs2_options *opts,
-	       const char *format, ...);
-
 /* misc.c */
 #define SYS_BASE "/sys/fs/gfs2"
 
-uint32_t compute_heightsize(struct gfs2_sbd *sdp, uint64_t *heightsize,
-			    uint32_t bsize1, int diptrs, int inptrs);
-void compute_constants(struct gfs2_sbd *sdp);
-int is_pathname_mounted(struct gfs2_sbd *sdp, int *ro_mount);
-int is_gfs2(struct gfs2_sbd *sdp);
-void check_for_gfs2(struct gfs2_sbd *sdp);
-void mount_gfs2_meta(struct gfs2_sbd *sdp);
-void cleanup_metafs(struct gfs2_sbd *sdp);
-char *get_list(void);
-char **str2lines(char *str);
-char *find_debugfs_mount(void);
-char *mp2fsname(char *mp);
-char *mp2fsname2(char *devname);
-char *name2value(char *str, char *name);
-uint32_t name2u32(char *str, char *name);
-uint64_t name2u64(char *str, char *name);
-char *get_sysfs(const char *fsname, const char *filename);
-unsigned int get_sysfs_uint(char *fsname, char *filename);
-void set_sysfs(char *fsname, char *filename, char *val);
-char *do_basename(char *device);
-char *mp2devname(char *mp);
-int is_fsname(char *name);
+extern char *get_list(void);
+extern char **str2lines(char *str);
+extern char *find_debugfs_mount(void);
+extern char *mp2fsname2(char *devname);
+extern char *name2value(char *str, char *name);
+extern uint32_t name2u32(char *str, char *name);
+extern uint64_t name2u64(char *str, char *name);
+extern char *do_basename(char *device);
+extern char *mp2devname(char *mp);
+
+extern int compute_heightsize(struct gfs2_sbd *sdp, uint64_t *heightsize,
+			      uint32_t bsize1, int diptrs, int inptrs);
+extern void compute_constants(struct gfs2_sbd *sdp);
+extern int is_pathname_mounted(struct gfs2_sbd *sdp, int *ro_mount);
+extern int is_gfs2(struct gfs2_sbd *sdp);
+extern int find_gfs2_meta(struct gfs2_sbd *sdp);
+extern int dir_exists(const char *dir);
+extern void check_for_gfs2(struct gfs2_sbd *sdp);
+extern void mount_gfs2_meta(struct gfs2_sbd *sdp);
+extern void cleanup_metafs(struct gfs2_sbd *sdp);
+extern char *find_debugfs_mount(void);
+extern char *mp2fsname(char *mp);
+extern char *get_sysfs(const char *fsname, const char *filename);
+extern unsigned int get_sysfs_uint(char *fsname, char *filename);
+extern void set_sysfs(const char *fsname, const char *filename, const char *val);
+extern int is_fsname(char *name);
+extern void get_random_bytes(void *buf, int nbytes);
 
 /* recovery.c */
 void gfs2_replay_incr_blk(struct gfs2_inode *ip, unsigned int *blk);
@@ -670,6 +671,8 @@ extern void gfs2_rgrp_free(osi_list_t *rglist);
 /* structures.c */
 extern void build_master(struct gfs2_sbd *sdp);
 extern void build_sb(struct gfs2_sbd *sdp, const unsigned char *uuid);
+extern void build_journal(struct gfs2_sbd *sdp, int j,
+			 struct gfs2_inode *jindex);
 extern void build_jindex(struct gfs2_sbd *sdp);
 extern void build_per_node(struct gfs2_sbd *sdp);
 extern void build_inum(struct gfs2_sbd *sdp);
@@ -688,8 +691,8 @@ extern int gfs2_next_rg_metatype(struct gfs2_sbd *sdp, struct rgrp_list *rgd,
 extern int check_sb(struct gfs2_sb *sb);
 extern int read_sb(struct gfs2_sbd *sdp);
 extern int ji_update(struct gfs2_sbd *sdp);
-extern int rindex_read(struct gfs2_sbd *sdp, int fd, int *count1);
-extern int ri_update(struct gfs2_sbd *sdp, int fd, int *rgcount);
+extern int rindex_read(struct gfs2_sbd *sdp, int fd, int *count1, int *sane);
+extern int ri_update(struct gfs2_sbd *sdp, int fd, int *rgcount, int *sane);
 extern int write_sb(struct gfs2_sbd *sdp);
 
 /* ondisk.c */
diff --git a/gfs2/libgfs2/misc.c b/gfs2/libgfs2/misc.c
index 81c7e2e..98ffb60 100644
--- a/gfs2/libgfs2/misc.c
+++ b/gfs2/libgfs2/misc.c
@@ -29,6 +29,7 @@
 #include <linux/kdev_t.h>
 #include <sys/statfs.h>
 #include <mntent.h>
+#include <sys/time.h>
 
 #include "libgfs2.h"
 
@@ -36,8 +37,8 @@
 
 static char sysfs_buf[PAGE_SIZE];
 
-uint32_t compute_heightsize(struct gfs2_sbd *sdp, uint64_t *heightsize,
-			    uint32_t bsize1, int diptrs, int inptrs)
+int compute_heightsize(struct gfs2_sbd *sdp, uint64_t *heightsize,
+		       uint32_t bsize1, int diptrs, int inptrs)
 {
 	int x;
 
@@ -295,7 +296,7 @@ get_sysfs_uint(char *fsname, char *filename)
 }
 
 void
-set_sysfs(char *fsname, char *filename, char *val)
+set_sysfs(const char *fsname, const char *filename, const char *val)
 {
 	char path[PATH_MAX];
 	int fd, rv, len;
@@ -690,74 +691,46 @@ mp2fsname2(char *mp)
 	return fsname;
 }
 
-/**
- * name2value - find the value of a name-value pair in a string
- * @str_in:
- * @name:
+/*
+ * get_random_bytes - Generate a series of random bytes using /dev/urandom.
  *
- * Returns: the value string in a static buffer
+ * Modified from original code in gen_uuid.c in e2fsprogs/lib
  */
-
-char *
-name2value(char *str_in, char *name)
+void get_random_bytes(void *buf, int nbytes)
 {
-	char str[strlen(str_in) + 1];
-	static char value[PATH_MAX];
-	char **lines;
-	unsigned int x;
-	unsigned int len = strlen(name);
-
-	strcpy(str, str_in);
-	value[0] = 0;
-
-	lines = str2lines(str);
-
-	for (x = 0; *lines[x]; x++)
-		if (memcmp(lines[x], name, len) == 0 &&
-		    lines[x][len] == ' ') {
-			strcpy(value, lines[x] + len + 1);
-			break;
+	int i, n = nbytes, fd;
+	int lose_counter = 0;
+	unsigned char *cp = (unsigned char *) buf;
+	struct timeval	tv;
+
+	gettimeofday(&tv, 0);
+	fd = open("/dev/urandom", O_RDONLY);
+	srand((getpid() << 16) ^ getuid() ^ tv.tv_sec ^ tv.tv_usec);
+	/* Crank the random number generator a few times */
+	gettimeofday(&tv, 0);
+	for (i = (tv.tv_sec ^ tv.tv_usec) & 0x1F; i > 0; i--)
+		rand();
+	if (fd >= 0) {
+		while (n > 0) {
+			i = read(fd, cp, n);
+			if (i <= 0) {
+				if (lose_counter++ > 16)
+					break;
+				continue;
+			}
+			n -= i;
+			cp += i;
+			lose_counter = 0;
 		}
+		close(fd);
+	}
 
-	free(lines);
-
-	return value;
-}
-
-/**
- * name2u32 - find the value of a name-value pair in a string
- * @str_in:
- * @name:
- *
- * Returns: the value uint32
- */
-
-uint32_t
-name2u32(char *str, char *name)
-{
-	char *value = name2value(str, name);
-	uint32_t x = 0;
-
-	sscanf(value, "%u", &x);
-
-	return x;
-}
-
-/**
- * name2u64 - find the value of a name-value pair in a string
- * @str_in:
- * @name:
- *
- * Returns: the value uint64
- */
-
-uint64_t
-name2u64(char *str, char *name)
-{
-	char *value = name2value(str, name);
-	uint64_t x = 0;
-
-	sscanf(value, "%"SCNu64, &x);
+	/*
+	 * We do this all the time, but this is the only source of
+	 * randomness if /dev/random/urandom is out to lunch.
+	 */
+	for (cp = buf, i = 0; i < nbytes; i++)
+		*cp++ ^= (rand() >> 7) & 0xFF;
 
-	return x;
+	return;
 }
diff --git a/gfs2/libgfs2/rgrp.c b/gfs2/libgfs2/rgrp.c
index 2c3187d..bd36038 100644
--- a/gfs2/libgfs2/rgrp.c
+++ b/gfs2/libgfs2/rgrp.c
@@ -146,8 +146,10 @@ uint64_t gfs2_rgrp_read(struct gfs2_sbd *sdp, struct rgrp_list *rgd)
 			uint64_t error;
 
 			error = rgd->ri.ri_addr + x;
-			for (; x >= 0; x--)
+			for (; x >= 0; x--) {
 				brelse(rgd->bh[x]);
+				rgd->bh[x] = NULL;
+			}
 			return error;
 		}
 	}
diff --git a/gfs2/libgfs2/structures.c b/gfs2/libgfs2/structures.c
index ce12174..bbde7ba 100644
--- a/gfs2/libgfs2/structures.c
+++ b/gfs2/libgfs2/structures.c
@@ -86,12 +86,12 @@ build_sb(struct gfs2_sbd *sdp, const unsigned char *uuid)
 	}
 }
 
-int write_journal(struct gfs2_sbd *sdp, struct gfs2_inode *ip, unsigned int j,
-		  unsigned int blocks)
+void write_journal(struct gfs2_sbd *sdp, struct gfs2_inode *ip, unsigned int j,
+				   unsigned int blocks)
 {
 	struct gfs2_log_header lh;
 	unsigned int x;
-	uint64_t seq = ((blocks) * (random() / (RAND_MAX + 1.0)));
+	uint64_t seq = RANDOM(blocks);
 	uint32_t hash;
 	unsigned int height;
 
@@ -109,14 +109,14 @@ int write_journal(struct gfs2_sbd *sdp, struct gfs2_inode *ip, unsigned int j,
 	for (x = 0; x < blocks; x++) {
 		struct gfs2_buffer_head *bh = get_file_buf(ip, x, TRUE);
 		if (!bh)
-			return -1;
+			die("write_journal\n");
 		bmodified(bh);
 		brelse(bh);
 	}
 	for (x = 0; x < blocks; x++) {
 		struct gfs2_buffer_head *bh = get_file_buf(ip, x, FALSE);
 		if (!bh)
-			return -1;
+			die("write_journal\n");
 
 		memset(bh->b_data, 0, sdp->bsize);
 		lh.lh_sequence = seq;
@@ -136,11 +136,21 @@ int write_journal(struct gfs2_sbd *sdp, struct gfs2_inode *ip, unsigned int j,
 		printf("\nJournal %u:\n", j);
 		gfs2_dinode_print(&ip->i_di);
 	}
-	return 0;
 }
 
-void
-build_jindex(struct gfs2_sbd *sdp)
+void build_journal(struct gfs2_sbd *sdp, int j, struct gfs2_inode *jindex)
+{
+	char name[256];
+	struct gfs2_inode *ip;
+
+	sprintf(name, "journal%u", j);
+	ip = createi(jindex, name, S_IFREG | 0600, GFS2_DIF_SYSTEM);
+	write_journal(sdp, ip, j,
+		      sdp->jsize << 20 >> sdp->sd_sb.sb_bsize_shift);
+	inode_put(&ip);
+}
+
+void build_jindex(struct gfs2_sbd *sdp)
 {
 	struct gfs2_inode *jindex;
 	unsigned int j;
@@ -148,17 +158,8 @@ build_jindex(struct gfs2_sbd *sdp)
 	jindex = createi(sdp->master_dir, "jindex", S_IFDIR | 0700,
 			 GFS2_DIF_SYSTEM);
 
-	for (j = 0; j < sdp->md.journals; j++) {
-		char name[256];
-		struct gfs2_inode *ip;
-
-		sprintf(name, "journal%u", j);
-		ip = createi(jindex, name, S_IFREG | 0600, GFS2_DIF_SYSTEM);
-		write_journal(sdp, ip, j,
-			      sdp->jsize << 20 >> sdp->sd_sb.sb_bsize_shift);
-		inode_put(&ip);
-	}
-
+	for (j = 0; j < sdp->md.journals; j++)
+		build_journal(sdp, j, jindex);
 	if (sdp->debug) {
 		printf("\nJindex:\n");
 		gfs2_dinode_print(&jindex->i_di);
diff --git a/gfs2/libgfs2/super.c b/gfs2/libgfs2/super.c
index 072f681..c40bd38 100644
--- a/gfs2/libgfs2/super.c
+++ b/gfs2/libgfs2/super.c
@@ -172,10 +172,11 @@ int ji_update(struct gfs2_sbd *sdp)
  * fd: optional file handle for rindex file (if meta_fs file system is mounted)
  *     (if fd is <= zero, it will read from raw device)
  * @count1: return count of the rgs.
+ * @sane: return whether rindex is consistent
  *
  * Returns: 0 on success, -1 on failure
  */
-int rindex_read(struct gfs2_sbd *sdp, int fd, int *count1)
+int rindex_read(struct gfs2_sbd *sdp, int fd, int *count1, int *sane)
 {
 	unsigned int rg;
 	int error;
@@ -183,8 +184,11 @@ int rindex_read(struct gfs2_sbd *sdp, int fd, int *count1)
 	struct rgrp_list *rgd, *prev_rgd;
 	uint64_t prev_length = 0;
 
+	*sane = 1;
 	*count1 = 0;
 	prev_rgd = NULL;
+	if (!fd && sdp->md.riinode->i_di.di_size % sizeof(struct gfs2_rindex))
+		*sane = 0; /* rindex file size must be a multiple of 96 */
 	for (rg = 0; ; rg++) {
 		if (fd > 0)
 			error = read(fd, &buf, sizeof(struct gfs2_rindex));
@@ -209,12 +213,27 @@ int rindex_read(struct gfs2_sbd *sdp, int fd, int *count1)
 
 		rgd->start = rgd->ri.ri_addr;
 		if (prev_rgd) {
+			/* If rg addresses go backwards, it's not sane
+			   (or it's converted from gfs1). */
+			if (prev_rgd->start >= rgd->start)
+				*sane = 0;
+			/* If rg lengths are not consistent, it's not sane
+			   (or it's converted from gfs1).  The first RG will
+			   be a different length due to space allocated for
+			   the superblock, so we can't detect this until
+			   we check rgrp 3, when we can compare the distance
+			   between rgrp 1 and rgrp 2. */
+			if (rg > 2 && prev_length &&
+			    prev_length != rgd->start - prev_rgd->start)
+				*sane = 0;
 			prev_length = rgd->start - prev_rgd->start;
 			prev_rgd->length = prev_length;
 		}
 
-		if(gfs2_compute_bitstructs(sdp, rgd))
+		if(gfs2_compute_bitstructs(sdp, rgd)) {
+			*sane = 0;
 			return -1;
+		}
 
 		(*count1)++;
 		prev_rgd = rgd;
@@ -235,7 +254,7 @@ int rindex_read(struct gfs2_sbd *sdp, int fd, int *count1)
  *
  * Returns: 0 on success, -1 on failure.
  */
-int ri_update(struct gfs2_sbd *sdp, int fd, int *rgcount)
+int ri_update(struct gfs2_sbd *sdp, int fd, int *rgcount, int *sane)
 {
 	struct rgrp_list *rgd;
 	struct gfs2_rindex *ri;
@@ -244,7 +263,7 @@ int ri_update(struct gfs2_sbd *sdp, int fd, int *rgcount)
 	uint64_t errblock = 0;
 	uint64_t rmax = 0;
 
-	if (rindex_read(sdp, fd, &count1))
+	if (rindex_read(sdp, fd, &count1, sane))
 	    goto fail;
 	for (tmp = sdp->rglist.next; tmp != &sdp->rglist; tmp = tmp->next) {
 		rgd = osi_list_entry(tmp, struct rgrp_list, list);
diff --git a/gfs2/mkfs/main_grow.c b/gfs2/mkfs/main_grow.c
index a6fac0a..75b6715 100644
--- a/gfs2/mkfs/main_grow.c
+++ b/gfs2/mkfs/main_grow.c
@@ -269,6 +269,8 @@ main_grow(int argc, char *argv[])
 	decode_arguments(argc, argv, sdp);
 	
 	while ((argc - optind) > 0) {
+		int sane;
+
 		sdp->path_name = argv[optind++];
 		sdp->path_fd = open(sdp->path_name, O_RDONLY);
 		if (sdp->path_fd < 0)
@@ -309,7 +311,7 @@ main_grow(int argc, char *argv[])
 		/* and therefore out of date.  It shouldn't matter because  */
 		/* we're only going to write out new RG information after   */
 		/* the existing RGs, and only write to the index at EOF.    */
-		ri_update(sdp, rindex_fd, &rgcount);
+		ri_update(sdp, rindex_fd, &rgcount, &sane);
 		fssize = filesystem_size(sdp);
 		figure_out_rgsize(sdp, &rgsize);
 		fsgrowth = ((sdp->device.length - fssize) * sdp->bsize);
diff --git a/gfs2/mkfs/main_mkfs.c b/gfs2/mkfs/main_mkfs.c
index 3dc3337..80126d1 100644
--- a/gfs2/mkfs/main_mkfs.c
+++ b/gfs2/mkfs/main_mkfs.c
@@ -324,50 +324,6 @@ static void check_mount(char *device)
 	return;
 }
 
-/*
- * get_random_bytes - Generate a series of random bytes using /dev/urandom.
- *
- * Modified from original code in gen_uuid.c in e2fsprogs/lib
- */
-static void get_random_bytes(void *buf, int nbytes)
-{
-	int i, n = nbytes, fd;
-	int lose_counter = 0;
-	unsigned char *cp = (unsigned char *) buf;
-	struct timeval	tv;
-
-	gettimeofday(&tv, 0);
-	fd = open("/dev/urandom", O_RDONLY);
-	srand((getpid() << 16) ^ getuid() ^ tv.tv_sec ^ tv.tv_usec);
-	/* Crank the random number generator a few times */
-	gettimeofday(&tv, 0);
-	for (i = (tv.tv_sec ^ tv.tv_usec) & 0x1F; i > 0; i--)
-		rand();
-	if (fd >= 0) {
-		while (n > 0) {
-			i = read(fd, cp, n);
-			if (i <= 0) {
-				if (lose_counter++ > 16)
-					break;
-				continue;
-			}
-			n -= i;
-			cp += i;
-			lose_counter = 0;
-		}
-		close(fd);
-	}
-
-	/*
-	 * We do this all the time, but this is the only source of
-	 * randomness if /dev/random/urandom is out to lunch.
-	 */
-	for (cp = buf, i = 0; i < nbytes; i++)
-		*cp++ ^= (rand() >> 7) & 0xFF;
-
-	return;
-}
-
 /**
  * print_results - print out summary information
  * @sdp: the command line