Sophie

Sophie

distrib > Scientific%20Linux > 5x > x86_64 > media > main-src > by-pkgid > e536fc0c6270ec1d92a0fd41bb1f8360 > files > 49

rgmanager-2.0.52-28.el5_8.2.src.rpm

From fec58c0da35b360c9454bacd28371a22fd7fb146 Mon Sep 17 00:00:00 2001
From: Lon Hohberger <lhh@redhat.com>
Date: Wed, 16 Dec 2009 12:56:43 -0500
Subject: [PATCH] rgmanager: Make VF timeout scale with token timeout

Rgmanager was not waiting long enough to account for
failures mid-state transition, allowing the possibility
for services to enter the 'failed' state erroneously.

Resolves: rhbz#548133

Signed-off-by: Lon Hohberger <lhh@redhat.com>
---
 rgmanager/include/vf.h       |    2 +-
 rgmanager/src/clulib/vft.c   |   10 +++++++---
 rgmanager/src/daemons/main.c |   22 +++++++++++++++++-----
 3 files changed, 25 insertions(+), 9 deletions(-)

diff --git a/rgmanager/include/vf.h b/rgmanager/include/vf.h
index abcca1b..3be7e15 100644
--- a/rgmanager/include/vf.h
+++ b/rgmanager/include/vf.h
@@ -170,7 +170,7 @@ typedef struct _key_node {
 /* 
  * VF Stuff.  VF only talks to peers.
  */
-int vf_init(int, uint16_t, vf_vote_cb_t, vf_commit_cb_t);
+int vf_init(int, uint16_t, vf_vote_cb_t, vf_commit_cb_t, int);
 int vf_invalidate(void);
 int vf_shutdown(void);
 
diff --git a/rgmanager/src/clulib/vft.c b/rgmanager/src/clulib/vft.c
index aed1d30..26191b5 100644
--- a/rgmanager/src/clulib/vft.c
+++ b/rgmanager/src/clulib/vft.c
@@ -45,6 +45,7 @@
 static key_node_t *key_list = NULL;	/** List of key nodes. */
 static int _node_id = (int)-1;/** Our node ID, set with vf_init. */
 static uint16_t _port = 0;		/** Our daemon ID, set with vf_init. */
+static int _vf_timeout = 10;
 
 /*
  * TODO: We could make it thread safe, but this might be unnecessary work
@@ -104,7 +105,8 @@ static int tv_cmp(struct timeval *left, struct timeval *right);
 static uint32_t vf_try_commit(key_node_t *key_node);
 
 int vf_init(int my_node_id, uint16_t my_port,
-	    vf_vote_cb_t vote_cb, vf_commit_cb_t commit_cb);
+	    vf_vote_cb_t vote_cb, vf_commit_cb_t commit_cb,
+	    int cluster_timeout);
 int vf_key_init(char *keyid, int timeout, vf_vote_cb_t vote_cb,
 		vf_commit_cb_t commit_cb);
 static int vf_key_init_nt(char *keyid, int timeout, vf_vote_cb_t vote_cb,
@@ -910,7 +912,7 @@ vf_server(void *arg)
  */
 int
 vf_init(int my_node_id, uint16_t my_port, vf_vote_cb_t vcb,
-	vf_commit_cb_t ccb)
+	vf_commit_cb_t ccb, int cluster_timeout)
 {
 	struct vf_args *args;
 	msgctx_t *ctx;
@@ -937,6 +939,8 @@ vf_init(int my_node_id, uint16_t my_port, vf_vote_cb_t vcb,
 	pthread_mutex_lock(&vf_mutex);
 	_port = my_port;
 	_node_id = my_node_id;
+	if (cluster_timeout)
+		_vf_timeout = cluster_timeout;
 	default_vote_cb = vcb;
 	default_commit_cb = ccb;
 	pthread_mutex_unlock(&vf_mutex);
@@ -1248,7 +1252,7 @@ vf_write(cluster_member_list_t *membership, uint32_t flags, char *keyid,
 	 * See if we have a consensus =)
 	 */
 	if ((rv = (vf_unanimous(&everyone, trans, remain,
-				5))) == VFR_OK) {
+				_vf_timeout))) == VFR_OK) {
 		vf_send_commit(&everyone, trans);
 #ifdef DEBUG
 		printf("VF: Consensus reached!\n");
diff --git a/rgmanager/src/daemons/main.c b/rgmanager/src/daemons/main.c
index 7f12f08..601e7d0 100644
--- a/rgmanager/src/daemons/main.c
+++ b/rgmanager/src/daemons/main.c
@@ -45,7 +45,7 @@
 #ifdef WRAP_THREADS
 void dump_thread_states(FILE *);
 #endif
-int configure_rgmanager(int ccsfd, int debug);
+int configure_rgmanager(int ccsfd, int debug, int *cluster_timeout);
 
 void node_event(int, int, int, int);
 void node_event_q(int, int, int, int);
@@ -792,7 +792,7 @@ event_loop(msgctx_t *localctx, msgctx_t *clusterctx)
 
 	if (need_reconfigure || check_config_update(&oldver, &newver)) {
 		need_reconfigure = 0;
-		configure_rgmanager(-1, 0);
+		configure_rgmanager(-1, 0, NULL);
 		config_event_q(oldver, newver);
 		return 0;
 	}
@@ -848,11 +848,12 @@ statedump(int __attribute__ ((unused)) sig)
  * Configure logging based on data in cluster.conf
  */
 int
-configure_rgmanager(int ccsfd, int dbg)
+configure_rgmanager(int ccsfd, int dbg, int *token_secs)
 {
 	char *v;
 	char internal = 0;
 	int status_child_max = 0;
+	int tmp;
 
 	if (ccsfd == -1) {
 		internal = 1;
@@ -861,6 +862,16 @@ configure_rgmanager(int ccsfd, int dbg)
 			return -1;
 	}
 
+	if (token_secs && ccs_get(ccsfd, "/cluster/totem/@token", &v) == 0) {
+		tmp = atoi(v);
+		if (tmp >= 1000) {
+			*token_secs = tmp / 1000;
+			if (tmp % 1000)
+				++(*token_secs);
+		}
+		free(v);
+	}
+
 	if (ccs_get(ccsfd, "/cluster/rm/@log_facility", &v) == 0) {
 		clu_set_facility(v);
 		free(v);
@@ -1011,6 +1022,7 @@ main(int argc, char **argv)
 	msgctx_t *local_ctx;
 	pthread_t th;
 	cman_handle_t clu = NULL;
+	int cluster_timeout = 10;
 
 	while ((rv = getopt(argc, argv, "wfdN")) != EOF) {
 		switch (rv) {
@@ -1089,7 +1101,7 @@ main(int argc, char **argv)
 	   We know we're quorate.  At this point, we need to
 	   read the resource group trees from ccsd.
 	 */
-	configure_rgmanager(-1, debug);
+	configure_rgmanager(-1, debug, &cluster_timeout);
 	clulog(LOG_NOTICE, "Resource Group Manager Starting\n");
 
 	if (init_resource_groups(0, do_init, 0) != 0) {
@@ -1132,7 +1144,7 @@ main(int argc, char **argv)
 
 	ds_key_init("rg_lockdown", 32, 10);
 #else
-	if (vf_init(me.cn_nodeid, port, NULL, NULL) != 0) {
+	if (vf_init(me.cn_nodeid, port, NULL, NULL, cluster_timeout) != 0) {
 		clulog(LOG_CRIT, "#11: Couldn't set up VF listen socket\n");
 		return -1;
 	}
-- 
1.6.2.5