Sophie

Sophie

distrib > Scientific%20Linux > 5x > x86_64 > by-pkgid > 27922b4260f65d317aabda37e42bbbff > files > 675

kernel-2.6.18-238.el5.src.rpm

From: David Teigland <teigland@redhat.com>
Subject: [RHEL5 PATCH] dlm: use recovery seq number to discard old replies
Date: Thu, 30 Nov 2006 16:06:30 -0600
Bugzilla: 215596
Message-Id: <20061130220630.GB31429@redhat.com>
Changelog: dlm: use recovery seq number to discard old replies


bz 215596
patch 2 of 3

A reply to a recovery message will often be received after the relevant
recovery sequence has aborted and the next recovery sequence has begun.
We need to ignore replies to these old messages from the previous
recovery.  There's already a way to do this for synchronous recovery
requests using the rc_id number, but not for async.

Each recovery sequence already has a locally unique sequence number
associated with it.  This patch adds a field to the rcom (recovery
message) structure where this recovery sequence number can be placed,
rc_seq.  When a node sends a reply to a recovery request, it copies the
rc_seq number it received into rc_seq_reply.  When the first node receives
the reply to its recovery message, it will check whether rc_seq_reply
matches the current recovery sequence number, ls_recover_seq, and if not
then it ignores the old reply.

An old, inadequate approach to filtering out old replies (checking if the
current stage of recovery has moved back to the start) has been removed
from two spots.

The protocol version number is changed to reflect the different rcom
structures.

The previous patch in this sequence (1 of 3 for bz 215596) makes this
filtering of old async replies especially necessary.


Index: latest/fs/dlm/dlm_internal.h
===================================================================
--- latest.orig/fs/dlm/dlm_internal.h
+++ latest/fs/dlm/dlm_internal.h
@@ -309,8 +309,8 @@ static inline int rsb_flag(struct dlm_rs
 
 /* dlm_header is first element of all structs sent between nodes */
 
-#define DLM_HEADER_MAJOR	0x00020000
-#define DLM_HEADER_MINOR	0x00000001
+#define DLM_HEADER_MAJOR	0x00030000
+#define DLM_HEADER_MINOR	0x00000000
 
 #define DLM_MSG			1
 #define DLM_RCOM		2
@@ -386,6 +386,8 @@ struct dlm_rcom {
 	uint32_t		rc_type;	/* DLM_RCOM_ */
 	int			rc_result;	/* multi-purpose */
 	uint64_t		rc_id;		/* match reply with request */
+	uint64_t		rc_seq;		/* sender's ls_recover_seq */
+	uint64_t		rc_seq_reply;	/* remote ls_recover_seq */
 	char			rc_buf[0];
 };
 
Index: latest/fs/dlm/rcom.c
===================================================================
--- latest.orig/fs/dlm/rcom.c
+++ latest/fs/dlm/rcom.c
@@ -56,6 +56,10 @@ static int create_rcom(struct dlm_ls *ls
 
 	rc->rc_type = type;
 
+	spin_lock(&ls->ls_recover_lock);
+	rc->rc_seq = ls->ls_recover_seq;
+	spin_unlock(&ls->ls_recover_lock);
+
 	*mh_ret = mh;
 	*rc_ret = rc;
 	return 0;
@@ -150,6 +154,7 @@ static void receive_rcom_status(struct d
 	if (error)
 		return;
 	rc->rc_id = rc_in->rc_id;
+	rc->rc_seq_reply = rc_in->rc_seq;
 	rc->rc_result = dlm_recover_status(ls);
 	make_config(ls, (struct rcom_config *) rc->rc_buf);
 
@@ -207,21 +212,7 @@ static void receive_rcom_names(struct dl
 {
 	struct dlm_rcom *rc;
 	struct dlm_mhandle *mh;
-	int error, inlen, outlen;
-	int nodeid = rc_in->rc_header.h_nodeid;
-	uint32_t status = dlm_recover_status(ls);
-
-	/*
-	 * We can't run dlm_dir_rebuild_send (which uses ls_nodes) while
-	 * dlm_recoverd is running ls_nodes_reconfig (which changes ls_nodes).
-	 * It could only happen in rare cases where we get a late NAMES
-	 * message from a previous instance of recovery.
-	 */
-
-	if (!(status & DLM_RS_NODES)) {
-		log_debug(ls, "ignoring RCOM_NAMES from %u", nodeid);
-		return;
-	}
+	int error, inlen, outlen, nodeid;
 
 	nodeid = rc_in->rc_header.h_nodeid;
 	inlen = rc_in->rc_header.h_length - sizeof(struct dlm_rcom);
@@ -231,6 +222,7 @@ static void receive_rcom_names(struct dl
 	if (error)
 		return;
 	rc->rc_id = rc_in->rc_id;
+	rc->rc_seq_reply = rc_in->rc_seq;
 
 	dlm_copy_master_names(ls, rc_in->rc_buf, inlen, rc->rc_buf, outlen,
 			      nodeid);
@@ -277,6 +269,7 @@ static void receive_rcom_lookup(struct d
 		ret_nodeid = error;
 	rc->rc_result = ret_nodeid;
 	rc->rc_id = rc_in->rc_id;
+	rc->rc_seq_reply = rc_in->rc_seq;
 
 	send_rcom(ls, mh, rc);
 }
@@ -358,20 +351,13 @@ static void receive_rcom_lock(struct dlm
 
 	memcpy(rc->rc_buf, rc_in->rc_buf, sizeof(struct rcom_lock));
 	rc->rc_id = rc_in->rc_id;
+	rc->rc_seq_reply = rc_in->rc_seq;
 
 	send_rcom(ls, mh, rc);
 }
 
 static void receive_rcom_lock_reply(struct dlm_ls *ls, struct dlm_rcom *rc_in)
 {
-	uint32_t status = dlm_recover_status(ls);
-
-	if (!(status & DLM_RS_DIR)) {
-		log_debug(ls, "ignoring RCOM_LOCK_REPLY from %u",
-			  rc_in->rc_header.h_nodeid);
-		return;
-	}
-
 	dlm_recover_process_copy(ls, rc_in);
 }
 
@@ -397,6 +383,7 @@ static int send_ls_not_ready(int nodeid,
 
 	rc->rc_type = DLM_RCOM_STATUS_REPLY;
 	rc->rc_id = rc_in->rc_id;
+	rc->rc_seq_reply = rc_in->rc_seq;
 	rc->rc_result = -ESRCH;
 
 	dlm_rcom_out(rc);
@@ -405,6 +392,30 @@ static int send_ls_not_ready(int nodeid,
 	return 0;
 }
 
+static int is_old_reply(struct dlm_ls *ls, struct dlm_rcom *rc)
+{
+	uint64_t seq;
+	int rv = 0;
+
+	switch (rc->rc_type) {
+	case DLM_RCOM_STATUS_REPLY:
+	case DLM_RCOM_NAMES_REPLY:
+	case DLM_RCOM_LOOKUP_REPLY:
+	case DLM_RCOM_LOCK_REPLY:
+		spin_lock(&ls->ls_recover_lock);
+		seq = ls->ls_recover_seq;
+		spin_unlock(&ls->ls_recover_lock);
+		if (rc->rc_seq_reply != seq) {
+			log_error(ls, "ignoring old reply %x from %d "
+				      "seq_reply %llx expect %llx",
+				      rc->rc_type, rc->rc_header.h_nodeid,
+				      rc->rc_seq_reply, seq);
+			rv = 1;
+		}
+	}
+	return rv;
+}
+
 /* Called by dlm_recvd; corresponds to dlm_receive_message() but special
    recovery-only comms are sent through here. */
 
@@ -433,6 +444,9 @@ void dlm_receive_rcom(struct dlm_header 
 		goto out;
 	}
 
+	if (is_old_reply(ls, rc))
+		goto out;
+
 	if (nodeid != rc->rc_header.h_nodeid) {
 		log_error(ls, "bad rcom nodeid %d from %d",
 			  rc->rc_header.h_nodeid, nodeid);
Index: latest/fs/dlm/util.c
===================================================================
--- latest.orig/fs/dlm/util.c
+++ latest/fs/dlm/util.c
@@ -134,6 +134,8 @@ void dlm_rcom_out(struct dlm_rcom *rc)
 	rc->rc_type		= cpu_to_le32(rc->rc_type);
 	rc->rc_result		= cpu_to_le32(rc->rc_result);
 	rc->rc_id		= cpu_to_le64(rc->rc_id);
+	rc->rc_seq		= cpu_to_le64(rc->rc_seq);
+	rc->rc_seq_reply	= cpu_to_le64(rc->rc_seq_reply);
 
 	if (type == DLM_RCOM_LOCK)
 		rcom_lock_out((struct rcom_lock *) rc->rc_buf);
@@ -151,6 +153,8 @@ void dlm_rcom_in(struct dlm_rcom *rc)
 	rc->rc_type		= le32_to_cpu(rc->rc_type);
 	rc->rc_result		= le32_to_cpu(rc->rc_result);
 	rc->rc_id		= le64_to_cpu(rc->rc_id);
+	rc->rc_seq		= le64_to_cpu(rc->rc_seq);
+	rc->rc_seq_reply	= le64_to_cpu(rc->rc_seq_reply);
 
 	if (rc->rc_type == DLM_RCOM_LOCK)
 		rcom_lock_in((struct rcom_lock *) rc->rc_buf);