Sophie

Sophie

distrib > Scientific%20Linux > 5x > x86_64 > by-pkgid > d236c5da97a239a1b6991cfba2865b66 > files > 122

cman-2.0.115-68.el5_6.1.src.rpm

commit fe46f6b6e9ed9a40c37fa60966fafc1cf07e36d2
Author: Eduardo Damato <edamato@redhat.com>
Date:   Tue Sep 29 10:06:26 2009 -0400

    qdisk: Implement I/O timeout for read
    
    This patch creates a timer for last successful read and
    reboots the system if last successful read was more than
    interval*tko ago.
    
    Resolves: rhbz#511113
    
    Part 3/4
    
    Signed-off-by: Eduardo Damato <edamato@redhat.com>
    Signed-off-by: Lon Hohberger <lhh@redhat.com>

diff --git a/cman/qdisk/main.c b/cman/qdisk/main.c
index b698f2c..250406a 100644
--- a/cman/qdisk/main.c
+++ b/cman/qdisk/main.c
@@ -867,7 +867,7 @@ quorum_loop(qd_ctx *ctx, node_info_t *ni, int max)
 	int low_id, bid_pending = 0, score, score_max, score_req,
 	    upgrade = 0, count, errors, error_cycles = 0;
 	memb_mask_t mask, master_mask;
-	struct timeval maxtime, oldtime, newtime, diff, sleeptime, interval, lastok;
+	struct timeval maxtime, oldtime, newtime, diff, sleeptime, interval, rd_lastok, wr_lastok;
 
 	ctx->qc_status = S_NONE;
 	
@@ -877,8 +877,11 @@ quorum_loop(qd_ctx *ctx, node_info_t *ni, int max)
 	interval.tv_usec = 0;
 	interval.tv_sec = ctx->qc_interval;
 	
-	lastok.tv_usec = 0;
-	lastok.tv_sec = 0;
+	rd_lastok.tv_usec = 0;
+	rd_lastok.tv_sec = 0;
+	
+	wr_lastok.tv_usec = 0;
+	wr_lastok.tv_sec = 0;
 	
 	get_my_score(&score, &score_max);
 	if (score_max < ctx->qc_scoremin) {
@@ -893,7 +896,8 @@ quorum_loop(qd_ctx *ctx, node_info_t *ni, int max)
 		get_time(&oldtime, (ctx->qc_flags&RF_UPTIME));
 		
 		/* Read everyone else's status */
-		errors = read_node_blocks(ctx, ni, max);
+		if ( (errors = read_node_blocks(ctx, ni, max) == 0 )) 
+			get_time(&rd_lastok, ctx->qc_flags&RF_UPTIME);
 
 		/* Check for node transitions */
 		check_transitions(ctx, ni, max, mask);
@@ -1069,7 +1073,7 @@ quorum_loop(qd_ctx *ctx, node_info_t *ni, int max)
 			errors++; /* this value isn't really used 
 				     at this point */
  		} else {
- 			get_time(&lastok, ctx->qc_flags&RF_UPTIME);
+ 			get_time(&wr_lastok, ctx->qc_flags&RF_UPTIME);
 		}
 
 		/* write out our local status */
@@ -1082,7 +1086,7 @@ quorum_loop(qd_ctx *ctx, node_info_t *ni, int max)
  		/*
 		 * Reboot if the last successful hearbeat was longer ago than interval*TKO_COUNT
 		 */
-		_diff_tv(&diff, &lastok, &newtime);
+		_diff_tv(&diff, &wr_lastok, &newtime);
 		if (_cmp_tv(&maxtime, &diff) == 1 &&
 		    ctx->qc_flags & RF_IOTIMEOUT) {
 			clulog(LOG_EMERG, "Failed to send a heartbeat within "
@@ -1094,6 +1098,22 @@ quorum_loop(qd_ctx *ctx, node_info_t *ni, int max)
 			if (!(ctx->qc_flags & RF_DEBUG)) 
 				reboot(RB_AUTOBOOT);
 		}
+
+ 		/*
+		 * Reboot if the last successful hearbeat was longer ago than interval*TKO_COUNT
+		 */
+		_diff_tv(&diff, &rd_lastok, &newtime);
+		if (_cmp_tv(&maxtime, &diff) == 1 &&
+		    ctx->qc_flags & RF_IOTIMEOUT) {
+			clulog(LOG_EMERG, "Failed to read from qdisk within "
+			       "%d second%s (%d.%06d) - REBOOTING\n",
+			       (int)maxtime.tv_sec,
+			       maxtime.tv_sec==1?"":"s",
+			       (int)diff.tv_sec,
+			       (int)diff.tv_usec);
+			if (!(ctx->qc_flags & RF_DEBUG)) 
+				reboot(RB_AUTOBOOT);
+		}
 	
 		/*
 		 * Reboot if we didn't send a heartbeat in interval*TKO_COUNT