Sophie

Sophie

distrib > Scientific%20Linux > 5x > x86_64 > by-pkgid > d236c5da97a239a1b6991cfba2865b66 > files > 97

cman-2.0.115-68.el5_6.1.src.rpm

From 0fc5da5fdc190ed50e94640136fea177a900ea57 Mon Sep 17 00:00:00 2001
From: David Teigland <teigland@redhat.com>
Date: Thu, 5 Aug 2010 17:05:26 -0500
Subject: [PATCH] fenced: use post_join_delay after cluster join

When the cluster has lost quorum due to a node failure,
the next event is generally a cluster node join which
gives the cluster quorum again.  With quorum, fenced
begins fencing any failed nodes, applying post_fail_delay
since the last cpg event was a node failure.  In this
case, however, post_join_delay is more appropriate since
the chances are good that nodes being fenced will be joining.
Detect this case where a node joins the cluster giving it
quorum, and use post_join_delay.

bz 575952

Signed-off-by: David Teigland <teigland@redhat.com>
---
 fence/fenced/member_cman.c |   70 ++++++++++++++++++++++++++++++++++++++++++--
 fence/fenced/recover.c     |    6 +++-
 2 files changed, 72 insertions(+), 4 deletions(-)

diff --git a/fence/fenced/member_cman.c b/fence/fenced/member_cman.c
index 9e22ece..6ef74c3 100644
--- a/fence/fenced/member_cman.c
+++ b/fence/fenced/member_cman.c
@@ -17,9 +17,14 @@
 #define BUFLEN		128
 
 static cman_handle_t	ch;
-static int		cman_quorate;
+static cman_node_t	old_nodes[MAX_NODES];
+static int		old_node_count;
+static int		old_quorate;
 static cman_node_t	cman_nodes[MAX_NODES];
 static int		cman_node_count;
+static int		cman_quorate;
+int			cman_quorate_from_last_change;
+
 static char		name_buf[CMAN_MAX_NODENAME_LEN+1];
 
 extern struct list_head domains;
@@ -27,6 +32,26 @@ extern struct list_head domains;
 char			*our_name;
 int			our_nodeid;
 
+static int _is_member(cman_node_t *node_list, int count, int nodeid)
+{
+	int i;
+
+	for (i = 0; i < count; i++) {
+		if (node_list[i].cn_nodeid == nodeid)
+			return node_list[i].cn_member;
+	}
+	return 0;
+}
+
+static int is_old_member(int nodeid)
+{
+	return _is_member(old_nodes, old_node_count, nodeid);
+}
+
+static int is_cman_member(int nodeid)
+{
+	return _is_member(cman_nodes, cman_node_count, nodeid);
+}
 
 static int name_equal(char *name1, char *name2)
 {
@@ -89,15 +114,54 @@ static cman_node_t *find_cluster_node_name(char *name)
 
 static void statechange(void)
 {
-	int rv;
+	int i, rv;
+	int removed = 0, added = 0;
+
+	old_quorate = cman_quorate;
+	old_node_count = cman_node_count;
+	memcpy(&old_nodes, &cman_nodes, sizeof(old_nodes));
 
 	cman_quorate = cman_is_quorate(ch);
 	cman_node_count = 0;
 	memset(&cman_nodes, 0, sizeof(cman_nodes));
-
 	rv = cman_get_nodes(ch, MAX_NODES, &cman_node_count, cman_nodes);
 	if (rv < 0)
 		log_error("cman_get_nodes error %d %d", rv, errno);
+
+	/* Never allow node ID 0 to be considered a member #315711 */
+	for (i = 0; i < cman_node_count; i++) {
+		if (cman_nodes[i].cn_nodeid == 0) {
+			cman_nodes[i].cn_member = 0;
+			break;
+		}
+	}
+
+	for (i = 0; i < old_node_count; i++) {
+		if (old_nodes[i].cn_member &&
+		    !is_cman_member(old_nodes[i].cn_nodeid)) {
+			removed++;
+			log_debug("cman: node %d removed",
+				  old_nodes[i].cn_nodeid);
+		}
+	}
+
+	for (i = 0; i < cman_node_count; i++) {
+		if (cman_nodes[i].cn_member &&
+		    !is_old_member(cman_nodes[i].cn_nodeid)) {
+			added++;
+			log_debug("cman: node %d added",
+				  cman_nodes[i].cn_nodeid);
+		}
+	}
+
+	if (removed) {
+		cman_quorate_from_last_change = 0;
+	} else if (added) {
+		if (!old_quorate && cman_quorate)
+			cman_quorate_from_last_change = 1;
+		else
+			cman_quorate_from_last_change = 0;
+	}
 }
 
 static void cman_callback(cman_handle_t h, void *private, int reason, int arg)
diff --git a/fence/fenced/recover.c b/fence/fenced/recover.c
index 7f8aace..732ec81 100644
--- a/fence/fenced/recover.c
+++ b/fence/fenced/recover.c
@@ -18,6 +18,7 @@
 
 extern int our_nodeid;
 extern commandline_t comline;
+extern int cman_quorate_from_last_change;
 
 /* Fencing recovery algorithm
 
@@ -302,7 +303,7 @@ static void delay_fencing(fd_t *fd, int start_type)
 	fd_node_t *node;
 	char *delay_type;
 
-	if (start_type == GROUP_NODE_JOIN) {
+	if ((start_type == GROUP_NODE_JOIN) || cman_quorate_from_last_change) {
 		delay = comline.post_join_delay;
 		delay_type = "post_join_delay";
 	} else {
@@ -310,6 +311,9 @@ static void delay_fencing(fd_t *fd, int start_type)
 		delay_type = "post_fail_delay";
 	}
 
+	log_debug("delay_fencing %s %d quorate_from_last_change %d",
+		  delay_type, delay, cman_quorate_from_last_change);
+
 	if (delay == 0)
 		goto out;
 
-- 
1.7.1.1