Sophie

Sophie

distrib > Scientific%20Linux > 5x > x86_64 > by-pkgid > 89877e42827f16fa5f86b1df0c2860b1 > files > 680

kernel-2.6.18-128.1.10.el5.src.rpm

From: Benjamin Marzinski <bmarzins@redhat.com>
Subject: [RHEL 5.1 PATCH] GFS2 - bz #248480: GFS2: distributed mmap test cases 	deadlock
Date: Fri, 24 Aug 2007 13:24:38 -0500
Bugzilla: 248480
Message-Id: <20070824182437.GP24772@ether.msp.redhat.com>
Changelog: [GFS2] distributed mmap test cases deadlock


BZ#248480
https://bugzilla.redhat.com/bugzilla/show_bug.cgi?id=248480

Description:
When a lot of IO, with some distributed mmap IO, is run on a GFS2 filesystem in
a cluster, it will deadlock. The reason is that do_no_page() will repeatedly
call gfs2_sharewrite_nopage(), because each node keeps giving up the glock too
early, and is forced to call unmap_mapping_range(). This bumps the
mapping->truncate_count sequence count, forcing do_no_page() to retry. This
patch institutes a minimum glock hold time of a tenth a second, by putting the
demote on a work queue with the appropriate delay. This insures that even in
heavy contention cases, the node has enough time to get some useful work done
before it gives up the glock.

A second issue is that when gfs2_glock_dq() is called from within a page fault
to demote a lock, and the associated page needs to be written out, it will try
to acqire a lock on it, but it has already been locked at a higher level This
patch makes gfs2_glock_dq() use the work queue as well, to avoid this issue.

This patch is upstream:
http://git.kernel.org/?p=linux/kernel/git/steve/gfs2-2.6-nmw.git;a=commit;h=478781d55a347853c66c4f32df7bd3642280a458

Signed-off-by: Benjamin E. Marzinski <bmarzins@redhat.com>
Signed-off-by: Steven Whitehouse <swhiteho@redhat.com>


diff -uprN --exclude-from=linux-2.6.18-43.gfs2abhi.001_clean/Documentation/dontdiff linux-2.6.18-43.gfs2abhi.001_clean/fs/gfs2/glock.c linux-2.6.18-43.gfs2abhi.001_test/fs/gfs2/glock.c
--- linux-2.6.18-43.gfs2abhi.001_clean/fs/gfs2/glock.c	2007-08-23 04:53:34.000000000 -0500
+++ linux-2.6.18-43.gfs2abhi.001_test/fs/gfs2/glock.c	2007-08-23 05:01:59.000000000 -0500
@@ -28,6 +28,8 @@
 #include <linux/module.h>
 #include <linux/kallsyms.h>
 #include <linux/kthread.h>
+#include <linux/workqueue.h>
+#include <linux/jiffies.h>
 
 #include "gfs2.h"
 #include "incore.h"
@@ -59,6 +61,7 @@ static int gfs2_dump_lockstate(struct gf
 static int dump_glock(struct glock_iter *gi, struct gfs2_glock *gl);
 static void gfs2_glock_xmote_th(struct gfs2_glock *gl, struct gfs2_holder *gh);
 static void gfs2_glock_drop_th(struct gfs2_glock *gl);
+static void run_queue(struct gfs2_glock *gl);
 static DECLARE_RWSEM(gfs2_umount_flush_sem);
 
 #define GFS2_GL_HASH_SHIFT      15
@@ -69,6 +72,7 @@ static struct gfs2_gl_hash_bucket gl_has
 static struct dentry *gfs2_root;
 static struct task_struct *scand_process;
 static unsigned int scand_secs = 5;
+static struct workqueue_struct *glock_workqueue;
 
 /*
  * Despite what you might think, the numbers below are not arbitrary :-)
@@ -277,6 +281,18 @@ static struct gfs2_glock *gfs2_glock_fin
 	return gl;
 }
 
+static void glock_work_func(void *data)
+{
+	struct gfs2_glock *gl = (struct gfs2_glock *)data;
+
+	spin_lock(&gl->gl_spin);
+	if (test_and_clear_bit(GLF_PENDING_DEMOTE, &gl->gl_flags))
+		set_bit(GLF_DEMOTE, &gl->gl_flags);
+	run_queue(gl);
+	spin_unlock(&gl->gl_spin);
+	gfs2_glock_put(gl);
+}
+
 /**
  * gfs2_glock_get() - Get a glock, or create one if one doesn't exist
  * @sdp: The GFS2 superblock
@@ -316,6 +332,7 @@ int gfs2_glock_get(struct gfs2_sbd *sdp,
 	gl->gl_name = name;
 	atomic_set(&gl->gl_ref, 1);
 	gl->gl_state = LM_ST_UNLOCKED;
+	gl->gl_demote_state = LM_ST_EXCLUSIVE;
 	gl->gl_hash = hash;
 	gl->gl_owner_pid = 0;
 	gl->gl_ip = 0;
@@ -324,10 +341,12 @@ int gfs2_glock_get(struct gfs2_sbd *sdp,
 	gl->gl_req_bh = NULL;
 	gl->gl_vn = 0;
 	gl->gl_stamp = jiffies;
+	gl->gl_tchange = jiffies;
 	gl->gl_object = NULL;
 	gl->gl_sbd = sdp;
 	gl->gl_aspace = NULL;
 	lops_init_le(&gl->gl_le, &gfs2_glock_lops);
+	INIT_WORK(&gl->gl_work, glock_work_func, (void *)gl);
 
 	/* If this glock protects actual on-disk data or metadata blocks,
 	   create a VFS inode to manage the pages/buffers holding them. */
@@ -441,6 +460,8 @@ static void wait_on_holder(struct gfs2_h
 
 static void gfs2_demote_wake(struct gfs2_glock *gl)
 {
+	BUG_ON(!spin_is_locked(&gl->gl_spin));
+	gl->gl_demote_state = LM_ST_EXCLUSIVE;
         clear_bit(GLF_DEMOTE, &gl->gl_flags);
         smp_mb__after_clear_bit();
         wake_up_bit(&gl->gl_flags, GLF_DEMOTE);
@@ -682,10 +703,14 @@ static void gfs2_glmutex_unlock(struct g
  * practise: LM_ST_SHARED and LM_ST_UNLOCKED
  */
 
-static void handle_callback(struct gfs2_glock *gl, unsigned int state, int remote)
+static void handle_callback(struct gfs2_glock *gl, unsigned int state,
+			    int remote, unsigned long delay)
 {
+	int bit = delay ? GLF_PENDING_DEMOTE : GLF_DEMOTE;
+
 	spin_lock(&gl->gl_spin);
-	if (test_and_set_bit(GLF_DEMOTE, &gl->gl_flags) == 0) {
+	set_bit(bit, &gl->gl_flags);
+	if (gl->gl_demote_state == LM_ST_EXCLUSIVE) {
 		gl->gl_demote_state = state;
 		gl->gl_demote_time = jiffies;
 		if (remote && gl->gl_ops->go_type == LM_TYPE_IOPEN &&
@@ -727,6 +752,7 @@ static void state_change(struct gfs2_glo
 	}
 
 	gl->gl_state = new_state;
+	gl->gl_tchange = jiffies;
 }
 
 /**
@@ -813,7 +839,6 @@ out:
 		gl->gl_req_gh = NULL;
 		gl->gl_req_bh = NULL;
 		clear_bit(GLF_LOCK, &gl->gl_flags);
-		run_queue(gl);
 		spin_unlock(&gl->gl_spin);
 	}
 
@@ -885,7 +910,6 @@ static void drop_bh(struct gfs2_glock *g
 	gfs2_assert_warn(sdp, !ret);
 
 	state_change(gl, LM_ST_UNLOCKED);
-	gfs2_demote_wake(gl);
 
 	if (glops->go_inval)
 		glops->go_inval(gl, DIO_METADATA);
@@ -898,10 +922,10 @@ static void drop_bh(struct gfs2_glock *g
 	}
 
 	spin_lock(&gl->gl_spin);
+	gfs2_demote_wake(gl);
 	gl->gl_req_gh = NULL;
 	gl->gl_req_bh = NULL;
 	clear_bit(GLF_LOCK, &gl->gl_flags);
-	run_queue(gl);
 	spin_unlock(&gl->gl_spin);
 
 	gfs2_glock_put(gl);
@@ -1209,9 +1233,10 @@ void gfs2_glock_dq(struct gfs2_holder *g
 {
 	struct gfs2_glock *gl = gh->gh_gl;
 	const struct gfs2_glock_operations *glops = gl->gl_ops;
+	unsigned delay = 0;
 
 	if (gh->gh_flags & GL_NOCACHE)
-		handle_callback(gl, LM_ST_UNLOCKED, 0);
+		handle_callback(gl, LM_ST_UNLOCKED, 0, 0);
 
 	gfs2_glmutex_lock(gl);
 
@@ -1229,8 +1254,14 @@ void gfs2_glock_dq(struct gfs2_holder *g
 	}
 
 	clear_bit(GLF_LOCK, &gl->gl_flags);
-	run_queue(gl);
 	spin_unlock(&gl->gl_spin);
+
+	gfs2_glock_hold(gl);
+	if (test_bit(GLF_PENDING_DEMOTE, &gl->gl_flags) &&
+	    !test_bit(GLF_DEMOTE, &gl->gl_flags))
+		delay = gl->gl_ops->go_min_hold_time;
+	if (queue_delayed_work(glock_workqueue, &gl->gl_work, delay) == 0)
+		gfs2_glock_put(gl);
 }
 
 void gfs2_glock_dq_wait(struct gfs2_holder *gh)
@@ -1457,18 +1488,21 @@ static void blocking_cb(struct gfs2_sbd 
 			unsigned int state)
 {
 	struct gfs2_glock *gl;
+	unsigned long delay = 0;
+	unsigned long holdtime;
+	unsigned long now = jiffies;
 
 	gl = gfs2_glock_find(sdp, name);
 	if (!gl)
 		return;
 
-	handle_callback(gl, state, 1);
-
-	spin_lock(&gl->gl_spin);
-	run_queue(gl);
-	spin_unlock(&gl->gl_spin);
+	holdtime = gl->gl_tchange + gl->gl_ops->go_min_hold_time;
+	if (time_before(now, holdtime))
+		delay = holdtime - now;
 
-	gfs2_glock_put(gl);
+	handle_callback(gl, state, 1, delay);
+	if (queue_delayed_work(glock_workqueue, &gl->gl_work, delay) == 0)
+		gfs2_glock_put(gl);
 }
 
 /**
@@ -1509,7 +1543,8 @@ void gfs2_glock_cb(void *cb_data, unsign
 			return;
 		if (!gfs2_assert_warn(sdp, gl->gl_req_bh))
 			gl->gl_req_bh(gl, async->lc_ret);
-		gfs2_glock_put(gl);
+		if (queue_delayed_work(glock_workqueue, &gl->gl_work, 0) == 0)
+			gfs2_glock_put(gl);
 		up_read(&gfs2_umount_flush_sem);
 		return;
 	}
@@ -1602,7 +1637,7 @@ void gfs2_reclaim_glock(struct gfs2_sbd 
 	if (gfs2_glmutex_trylock(gl)) {
 		if (list_empty(&gl->gl_holders) &&
 		    gl->gl_state != LM_ST_UNLOCKED && demote_ok(gl))
-			handle_callback(gl, LM_ST_UNLOCKED, 0);
+			handle_callback(gl, LM_ST_UNLOCKED, 0, 0);
 		gfs2_glmutex_unlock(gl);
 	}
 
@@ -1702,7 +1737,7 @@ static void clear_glock(struct gfs2_gloc
 	if (gfs2_glmutex_trylock(gl)) {
 		if (list_empty(&gl->gl_holders) &&
 		    gl->gl_state != LM_ST_UNLOCKED)
-			handle_callback(gl, LM_ST_UNLOCKED, 0);
+			handle_callback(gl, LM_ST_UNLOCKED, 0, 0);
 		gfs2_glmutex_unlock(gl);
 	}
 }
@@ -2009,11 +2044,18 @@ int __init gfs2_glock_init(void)
 	if (IS_ERR(scand_process))
 		return PTR_ERR(scand_process);
 
+	glock_workqueue = create_workqueue("glock_workqueue");
+	if (IS_ERR(glock_workqueue)) {
+		kthread_stop(scand_process);
+		return PTR_ERR(glock_workqueue);
+	}
+
 	return 0;
 }
 
 void gfs2_glock_exit(void)
 {
+	destroy_workqueue(glock_workqueue);
 	kthread_stop(scand_process);
 }
 
diff -uprN --exclude-from=linux-2.6.18-43.gfs2abhi.001_clean/Documentation/dontdiff linux-2.6.18-43.gfs2abhi.001_clean/fs/gfs2/glops.c linux-2.6.18-43.gfs2abhi.001_test/fs/gfs2/glops.c
--- linux-2.6.18-43.gfs2abhi.001_clean/fs/gfs2/glops.c	2007-08-23 04:53:34.000000000 -0500
+++ linux-2.6.18-43.gfs2abhi.001_test/fs/gfs2/glops.c	2007-08-23 04:58:29.000000000 -0500
@@ -457,6 +457,7 @@ const struct gfs2_glock_operations gfs2_
 	.go_lock = inode_go_lock,
 	.go_unlock = inode_go_unlock,
 	.go_type = LM_TYPE_INODE,
+	.go_min_hold_time = HZ / 10,
 };
 
 const struct gfs2_glock_operations gfs2_rgrp_glops = {
@@ -467,6 +468,7 @@ const struct gfs2_glock_operations gfs2_
 	.go_lock = rgrp_go_lock,
 	.go_unlock = rgrp_go_unlock,
 	.go_type = LM_TYPE_RGRP,
+	.go_min_hold_time = HZ / 10,
 };
 
 const struct gfs2_glock_operations gfs2_trans_glops = {
diff -uprN --exclude-from=linux-2.6.18-43.gfs2abhi.001_clean/Documentation/dontdiff linux-2.6.18-43.gfs2abhi.001_clean/fs/gfs2/incore.h linux-2.6.18-43.gfs2abhi.001_test/fs/gfs2/incore.h
--- linux-2.6.18-43.gfs2abhi.001_clean/fs/gfs2/incore.h	2007-08-23 04:53:34.000000000 -0500
+++ linux-2.6.18-43.gfs2abhi.001_test/fs/gfs2/incore.h	2007-08-23 04:58:29.000000000 -0500
@@ -11,6 +11,7 @@
 #define __INCORE_DOT_H__
 
 #include <linux/fs.h>
+#include <linux/workqueue.h>
 
 #define DIO_WAIT	0x00000010
 #define DIO_METADATA	0x00000020
@@ -130,6 +131,7 @@ struct gfs2_glock_operations {
 	int (*go_lock) (struct gfs2_holder *gh);
 	void (*go_unlock) (struct gfs2_holder *gh);
 	const int go_type;
+	const unsigned long go_min_hold_time;
 };
 
 enum {
@@ -161,6 +163,7 @@ enum {
 	GLF_LOCK		= 1,
 	GLF_STICKY		= 2,
 	GLF_DEMOTE		= 3,
+	GLF_PENDING_DEMOTE	= 4,
 	GLF_DIRTY		= 5,
 };
 
@@ -193,6 +196,7 @@ struct gfs2_glock {
 
 	u64 gl_vn;
 	unsigned long gl_stamp;
+	unsigned long gl_tchange;
 	void *gl_object;
 
 	struct list_head gl_reclaim;
@@ -203,6 +207,7 @@ struct gfs2_glock {
 	struct gfs2_log_element gl_le;
 	struct list_head gl_ail_list;
 	atomic_t gl_ail_count;
+	struct work_struct gl_work;
 };
 
 struct gfs2_alloc {