Sophie

Sophie

distrib > Scientific%20Linux > 5x > x86_64 > by-pkgid > fc11cd6e1c513a17304da94a5390f3cd > files > 2310

kernel-2.6.18-194.11.1.el5.src.rpm

From: Neil Horman <nhorman@redhat.com>
Date: Thu, 22 Oct 2009 10:33:21 -0400
Subject: [net] lvs: adjust sync protocol handling for ipvsadm -2
Message-id: 20091022143321.GB25788@shamino.rdu.redhat.com
O-Subject: Re: [RHEL 5.5 PATCH] lvs: adjust sync protocol handling so timeout values are taken correctly from ipvsadm (bz 524129)
Bugzilla: 524129
RH-Acked-by: Thomas Graf <tgraf@redhat.com>
RH-Acked-by: Jiri Pirko <jpirko@redhat.com>
RH-Acked-by: Cong Wang <amwang@redhat.com>
RH-Acked-by: Danny Feng <dfeng@redhat.com>

Ok, in response to congs concern over the check on that state variable, I've
gone and backported the entirety of this LVS upstream commit.  I've built it in
brew and booted it, and that all works fine. I need to point out two things:

1) The functionality of this patch is completely untested.  I'm posting it here
for early review to save time, but we're expecting test results from NASA next
week, so we need to wait until at least then to incorporate the patch.  Early
reviews would be appreciated however

2) The extra bits that this patch takes in constitute an ABI break.  The
affected symbols aren't on the whitelist, so from a strict ABI standpoint, we're
ok, but because of how REGISTER_PROTOCOL works in LVS, any 3rd party modules out
there that register LVS protocol support are going to start scribbling over
memory with this patch.  If anyone knows of any partners/vendors/users doing
this sort of thing, please let me know now, so we can give them an early heads
up.

Thanks
Neil

diff --git a/include/net/ip_vs.h b/include/net/ip_vs.h
index 3b57b15..6f0c2ea 100644
--- a/include/net/ip_vs.h
+++ b/include/net/ip_vs.h
@@ -436,7 +436,8 @@ struct sk_buff;
 struct ip_vs_protocol {
 	struct ip_vs_protocol	*next;
 	char			*name;
-	__u16			protocol;
+	u16			protocol;
+	u16			num_states;
 	int			dont_defrag;
 	atomic_t		appcnt;		/* counter of proto app incs */
 	int			*timeout_table;	/* protocol timeout table */
diff --git a/net/ipv4/ipvs/ip_vs_proto.c b/net/ipv4/ipvs/ip_vs_proto.c
index 867d4e9..4bd7dd9 100644
--- a/net/ipv4/ipvs/ip_vs_proto.c
+++ b/net/ipv4/ipvs/ip_vs_proto.c
@@ -154,7 +154,7 @@ const char * ip_vs_state_name(__u16 proto, int state)
 	struct ip_vs_protocol *pp = ip_vs_proto_get(proto);
 
 	if (pp == NULL || pp->state_name == NULL)
-		return "ERR!";
+		return (IPPROTO_IP == proto) ? "NONE" : "ERR!";
 	return pp->state_name(state);
 }
 
diff --git a/net/ipv4/ipvs/ip_vs_proto_ah.c b/net/ipv4/ipvs/ip_vs_proto_ah.c
index 8b0505b..5a90372 100644
--- a/net/ipv4/ipvs/ip_vs_proto_ah.c
+++ b/net/ipv4/ipvs/ip_vs_proto_ah.c
@@ -160,6 +160,7 @@ static void ah_exit(struct ip_vs_protocol *pp)
 struct ip_vs_protocol ip_vs_protocol_ah = {
 	.name =			"AH",
 	.protocol =		IPPROTO_AH,
+	.num_states =		1,
 	.dont_defrag =		1,
 	.init =			ah_init,
 	.exit =			ah_exit,
diff --git a/net/ipv4/ipvs/ip_vs_proto_esp.c b/net/ipv4/ipvs/ip_vs_proto_esp.c
index c36ccf0..2cdc6a5 100644
--- a/net/ipv4/ipvs/ip_vs_proto_esp.c
+++ b/net/ipv4/ipvs/ip_vs_proto_esp.c
@@ -159,6 +159,7 @@ static void esp_exit(struct ip_vs_protocol *pp)
 struct ip_vs_protocol ip_vs_protocol_esp = {
 	.name =			"ESP",
 	.protocol =		IPPROTO_ESP,
+	.num_states =		1,
 	.dont_defrag =		1,
 	.init =			esp_init,
 	.exit =			esp_exit,
diff --git a/net/ipv4/ipvs/ip_vs_proto_tcp.c b/net/ipv4/ipvs/ip_vs_proto_tcp.c
index bc28b11..7e8364b 100644
--- a/net/ipv4/ipvs/ip_vs_proto_tcp.c
+++ b/net/ipv4/ipvs/ip_vs_proto_tcp.c
@@ -597,6 +597,7 @@ static void ip_vs_tcp_exit(struct ip_vs_protocol *pp)
 struct ip_vs_protocol ip_vs_protocol_tcp = {
 	.name =			"TCP",
 	.protocol =		IPPROTO_TCP,
+	.num_states =		IP_VS_TCP_S_LAST,
 	.dont_defrag =		0,
 	.appcnt =		ATOMIC_INIT(0),
 	.init =			ip_vs_tcp_init,
diff --git a/net/ipv4/ipvs/ip_vs_proto_udp.c b/net/ipv4/ipvs/ip_vs_proto_udp.c
index 89d9175..c96ae99 100644
--- a/net/ipv4/ipvs/ip_vs_proto_udp.c
+++ b/net/ipv4/ipvs/ip_vs_proto_udp.c
@@ -410,6 +410,7 @@ static void udp_exit(struct ip_vs_protocol *pp)
 struct ip_vs_protocol ip_vs_protocol_udp = {
 	.name =			"UDP",
 	.protocol =		IPPROTO_UDP,
+	.num_states =		IP_VS_UDP_S_LAST,
 	.dont_defrag =		0,
 	.init =			udp_init,
 	.exit =			udp_exit,
diff --git a/net/ipv4/ipvs/ip_vs_sync.c b/net/ipv4/ipvs/ip_vs_sync.c
index 07da8e4..03fcde8 100644
--- a/net/ipv4/ipvs/ip_vs_sync.c
+++ b/net/ipv4/ipvs/ip_vs_sync.c
@@ -278,15 +278,21 @@ static void ip_vs_process_message(const char *buffer, const size_t buflen)
 	struct ip_vs_sync_mesg *m = (struct ip_vs_sync_mesg *)buffer;
 	struct ip_vs_sync_conn *s;
 	struct ip_vs_sync_conn_options *opt;
+	struct ip_vs_protocol *pp;
 	struct ip_vs_conn *cp;
 	char *p;
 	int i;
 
+	if (buflen < sizeof(struct ip_vs_sync_mesg)) {
+		IP_VS_ERR_RL("sync message header too short\n");
+		return;
+	}
+
 	/* Convert size back to host byte order */
 	m->size = ntohs(m->size);
 
 	if (buflen != m->size) {
-		IP_VS_ERR("bogus message\n");
+		IP_VS_ERR_RL("bogus sync message size\n");
 		return;
 	}
 
@@ -299,10 +305,52 @@ static void ip_vs_process_message(const char *buffer, const size_t buflen)
 
 	p = (char *)buffer + sizeof(struct ip_vs_sync_mesg);
 	for (i=0; i<m->nr_conns; i++) {
-		unsigned flags;
+		unsigned flags, state;
+
+		if (p + SIMPLE_CONN_SIZE > buffer+buflen) {
+			IP_VS_ERR_RL("bogus conn in sync message\n");
+			return;
+		}
+		s = (struct ip_vs_sync_conn *) p;
+
+		flags = ntohs(s->flags) | IP_VS_CONN_F_SYNC;
+		flags &= ~IP_VS_CONN_F_HASHED;
+		if (flags & IP_VS_CONN_F_SEQ_MASK) {
+			opt = (struct ip_vs_sync_conn_options *)&s[1];
+			p += FULL_CONN_SIZE;
+			if (p > buffer+buflen) {
+				IP_VS_ERR_RL("bogus conn options in sync message\n");
+				return;
+			}
+		} else {
+			opt = NULL;
+			p += SIMPLE_CONN_SIZE;
+		}
 
-		s = (struct ip_vs_sync_conn *)p;
-		flags = ntohs(s->flags);
+		state = ntohs(s->state);
+
+		if (!(flags & IP_VS_CONN_F_TEMPLATE)) {
+			pp = ip_vs_proto_get(s->protocol);
+			if (!pp) {
+				IP_VS_ERR_RL("Unsupported protocol %u in sync msg\n",
+					s->protocol);
+				continue;
+			}
+			if (state >= pp->num_states) {
+				IP_VS_DBG(2, "Invalid %s state %u in sync msg\n",
+					pp->name, state);
+				continue;
+			}
+		} else {
+			/* protocol in templates is not used for state/timeout */
+			pp = NULL;
+			if (state > 0) {
+				IP_VS_DBG(2, "Invalid template state %u in sync msg\n",
+					state);
+				state = 0;
+			}
+		}
+  
 		if (!(flags & IP_VS_CONN_F_TEMPLATE))
 			cp = ip_vs_conn_in_get(s->protocol,
 					       s->caddr, s->cport,
@@ -321,7 +369,6 @@ static void ip_vs_process_message(const char *buffer, const size_t buflen)
 				IP_VS_ERR("ip_vs_conn_new failed\n");
 				return;
 			}
-			cp->state = ntohs(s->state);
 		} else if (!cp->dest) {
 			/* it is an entry created by the synchronization */
 			cp->state = ntohs(s->state);
@@ -329,21 +376,23 @@ static void ip_vs_process_message(const char *buffer, const size_t buflen)
 		}	/* Note that we don't touch its state and flags
 			   if it is a normal entry. */
 
-		if (flags & IP_VS_CONN_F_SEQ_MASK) {
-			opt = (struct ip_vs_sync_conn_options *)&s[1];
+		if (opt)
 			memcpy(&cp->in_seq, opt, sizeof(*opt));
-			p += FULL_CONN_SIZE;
-		} else
-			p += SIMPLE_CONN_SIZE;
 
 		atomic_set(&cp->in_pkts, sysctl_ip_vs_sync_threshold[0]);
-		cp->timeout = IP_VS_SYNC_CONN_TIMEOUT;
+		cp->state = state;
+		/*
+		 * We can not recover the right timeout for templates
+		 * in all cases, we can not find the right fwmark
+		 * virtual service. If needed, we can do it for
+		 * non-fwmark persistent services.
+		 */
+		if (!(flags & IP_VS_CONN_F_TEMPLATE) && pp->timeout_table)
+			cp->timeout = pp->timeout_table[state];
+		else
+			cp->timeout = (3*60*HZ);
 		ip_vs_conn_put(cp);
 
-		if (p > buffer+buflen) {
-			IP_VS_ERR("bogus message\n");
-			return;
-		}
 	}
 }