Sophie

Sophie

distrib > Scientific%20Linux > 5x > x86_64 > by-pkgid > 27922b4260f65d317aabda37e42bbbff > files > 2487

kernel-2.6.18-238.el5.src.rpm

From: Flavio Leitner <fbl@redhat.com>
Date: Wed, 6 Oct 2010 18:41:24 -0400
Subject: [net] bonding: fix IGMP report on slave during failover
Message-id: <20101006184124.GB2730@redhat.com>
Patchwork-id: 28631
O-Subject: [PATCH RHEL5.6 v2] bonding: interface doesn't issue IGMP report on
	slave interface during failover
Bugzilla: 637764
RH-Acked-by: David S. Miller <davem@redhat.com>
RH-Acked-by: Andy Gospodarek <gospo@redhat.com>
RH-Acked-by: Amerigo Wang <amwang@redhat.com>
RH-Acked-by: Jiri Pirko <jpirko@redhat.com>

RHBZ: 637764 Bonded interface doesn't issue IGMP report (join)
      on slave interface during failover

This is a backport of three commits applied on net-next-2.6
to fix the issue:

commit: 5a37e8ca8536c47871d46c82211f399adf06fd44

   bonding: rejoin multicast groups on VLANs

   During a failover, the IGMP membership is sent to update
   the switch restoring the traffic, but it misses groups added
   to VLAN devices running on top of bonding devices.

   This patch changes it to iterate over all VLAN devices
   on top of it sending IGMP memberships too.

   Signed-off-by: Flavio Leitner <fleitner@redhat.com>
   Signed-off-by: David S. Miller <davem@davemloft.net>

commit: e12b453904c54bbdc515778ff664d87a7f9473af

   bonding: fix to rejoin multicast groups immediately

   The IGMP specs states that if the system receives a
   membership report, it shouldn't send another for the
   next minute. However, if a link failure happens right
   after that, the backup slave and the switch connected
   to this slave will not know about the multicast and
   the traffic will hang for about a minute.

   This patch fixes it to rejoin multicast groups immediately
   after a failover restoring the multicast traffic.

   Signed-off-by: Flavio Leitner <fleitner@redhat.com>
   Signed-off-by: David S. Miller <davem@davemloft.net>

commit: c2952c314b4fe61820ba8fd6c949eed636140d52

   bonding: add retransmit membership reports tunable

   Allow sysadmins to configure the number of multicast
   membership report sent on a link failure event.

   Signed-off-by: Flavio Leitner <fleitner@redhat.com>
   Signed-off-by: David S. Miller <davem@davemloft.net>

Test Build:
https://brewweb.devel.redhat.com/taskinfo?taskID=2804683

Testing Status:
Successfully tested by myself while reproducing the issues.

The proposed patch below is based on 2.6.18-225.el5

Signed-off-by: Flavio Leitner <fleitner@redhat.com>


diff --git a/Documentation/networking/bonding.txt b/Documentation/networking/bonding.txt
index a4237e0..f945a30 100644
--- a/Documentation/networking/bonding.txt
+++ b/Documentation/networking/bonding.txt
@@ -684,6 +684,14 @@ xmit_hash_policy
 	does not exist, and the layer2 policy is the only policy.  The
 	layer2+3 value was added for bonding version 3.2.2.
 
+resend_igmp
+
+	Specifies the number of IGMP membership reports to be issued after
+	a failover event. One membership report is issued immediately after
+	the failover, subsequent packets are sent in each 200ms interval.
+
+	The valid range is 0 - 255; the default value is 1. This option
+	was added for bonding version 3.7.0.
 
 3. Configuring Bonding Devices
 ==============================
diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c
index cddd517..e2c5c1d 100644
--- a/drivers/net/bonding/bond_main.c
+++ b/drivers/net/bonding/bond_main.c
@@ -102,6 +102,7 @@ static char *arp_ip_target[BOND_MAX_ARP_TARGETS] = { NULL, };
 static char *arp_validate = NULL;
 static char *fail_over_mac = NULL;
 struct bond_params bonding_defaults;
+static int resend_igmp = BOND_DEFAULT_RESEND_IGMP;
 int debug = 0;
 
 module_param(max_bonds, int, 0);
@@ -149,6 +150,8 @@ module_param(arp_validate, charp, 0);
 MODULE_PARM_DESC(arp_validate, "validate src/dst of ARP probes: none (default), active, backup or all");
 module_param(fail_over_mac, charp, 0);
 MODULE_PARM_DESC(fail_over_mac, "For active-backup, do not set all slaves to the same MAC.  none (default), active or follow");
+module_param(resend_igmp, int, 0);
+MODULE_PARM_DESC(resend_igmp, "Number of IGMP membership reports to send on link failure");
 module_param(debug, int, 0);
 MODULE_PARM_DESC(debug, "Print debug messages; 0 for off (default), 1 for on");
 
@@ -901,18 +904,13 @@ static void bond_mc_delete(struct bonding *bond, void *addr, int alen)
 }
 
 
-/*
- * Retrieve the list of registered multicast addresses for the bonding
- * device and retransmit an IGMP JOIN request to the current active
- * slave.
- */
-static void bond_resend_igmp_join_requests(struct bonding *bond)
+static void __bond_resend_igmp_join_requests(struct net_device *dev)
 {
 	struct in_device *in_dev;
 	struct ip_mc_list *im;
 
 	rcu_read_lock();
-	in_dev = __in_dev_get_rcu(bond->dev);
+	in_dev = __in_dev_get_rcu(dev);
 	if (in_dev) {
 		for (im = in_dev->mc_list; im; im = im->next) {
 			ip_mc_rejoin_group(im);
@@ -922,6 +920,44 @@ static void bond_resend_igmp_join_requests(struct bonding *bond)
 	rcu_read_unlock();
 }
 
+
+/*
+ * Retrieve the list of registered multicast addresses for the bonding
+ * device and retransmit an IGMP JOIN request to the current active
+ * slave.
+ */
+static void bond_resend_igmp_join_requests(struct bonding *bond)
+{
+	struct net_device *vlan_dev;
+	struct vlan_entry *vlan;
+
+	read_lock(&bond->lock);
+
+	/* rejoin all groups on bond device */
+	__bond_resend_igmp_join_requests(bond->dev);
+
+	/* rejoin all groups on vlan devices */
+	if (bond->vlgrp) {
+		list_for_each_entry(vlan, &bond->vlan_list, vlan_list) {
+			vlan_dev = vlan_group_get_device(bond->vlgrp,
+							 vlan->vlan_id);
+			if (vlan_dev)
+				__bond_resend_igmp_join_requests(vlan_dev);
+		}
+	}
+
+	if (--bond->igmp_retrans > 0)
+		queue_delayed_work(bond->wq, &bond->mcast_work, HZ/5);
+
+	read_unlock(&bond->lock);
+}
+
+void bond_resend_igmp_join_requests_delayed(void *work_data)
+{
+	struct bonding *bond = work_data;
+	bond_resend_igmp_join_requests(bond);
+}
+
 /*
  * Totally destroys the mc_list in bond
  */
@@ -1029,7 +1065,6 @@ static void bond_mc_swap(struct bonding *bond, struct slave *new_active, struct
 		for (dmi = bond->dev->mc_list; dmi; dmi = dmi->next) {
 			dev_mc_add(new_active->dev, dmi->dmi_addr, dmi->dmi_addrlen, 0);
 		}
-		bond_resend_igmp_join_requests(bond);
 	}
 }
 
@@ -1276,10 +1311,13 @@ void bond_change_active_slave(struct bonding *bond, struct slave *new_active)
 		}
 	}
 
-	/* resend IGMP joins since all were sent on curr_active_slave */
-	if (bond->params.mode == BOND_MODE_ROUNDROBIN) {
-		bond_resend_igmp_join_requests(bond);
-	}
+	/* resend IGMP joins since active slave has changed or
+	 * all were sent on curr_active_slave */
+	if ((USES_PRIMARY(bond->params.mode) && new_active) ||
+	    bond->params.mode == BOND_MODE_ROUNDROBIN) {
+		bond->igmp_retrans = bond->params.resend_igmp;
+		queue_delayed_work(bond->wq, &bond->mcast_work, 0);
+ 	}
 }
 
 /**
@@ -3866,6 +3904,8 @@ static int bond_open(struct net_device *bond_dev)
 
 	bond->kill_timers = 0;
 
+	INIT_WORK(&bond->mcast_work, bond_resend_igmp_join_requests_delayed, (void *)bond);
+
 	if ((bond->params.mode == BOND_MODE_TLB) ||
 	    (bond->params.mode == BOND_MODE_ALB)) {
 		/* bond_alb_initialize must be called before the timer
@@ -3952,6 +3992,8 @@ static int bond_close(struct net_device *bond_dev)
 		break;
 	}
 
+	if (delayed_work_pending(&bond->mcast_work))
+		cancel_delayed_work(&bond->mcast_work);
 
 	if ((bond->params.mode == BOND_MODE_TLB) ||
 	    (bond->params.mode == BOND_MODE_ALB)) {
@@ -4774,6 +4816,9 @@ static void bond_work_cancel_all(struct bonding *bond)
 	if (bond->params.mode == BOND_MODE_8023AD &&
 	    delayed_work_pending(&bond->ad_work))
 		cancel_delayed_work(&bond->ad_work);
+
+	if (delayed_work_pending(&bond->mcast_work))
+		cancel_delayed_work(&bond->mcast_work);
 }
 
 /* De-initialize device specific data.
@@ -4967,6 +5012,13 @@ static int bond_check_params(struct bond_params *params)
 		}
 	}
 
+	if (resend_igmp < 0 || resend_igmp > 255) {
+		pr_warning("Warning: resend_igmp (%d) should be between "
+			   "0 and 255, resetting to %d\n",
+			   resend_igmp, BOND_DEFAULT_RESEND_IGMP);
+		resend_igmp = BOND_DEFAULT_RESEND_IGMP;
+	}
+
 	/* reset values for TLB/ALB */
 	if ((bond_mode == BOND_MODE_TLB) ||
 	    (bond_mode == BOND_MODE_ALB)) {
@@ -5179,6 +5231,7 @@ static int bond_check_params(struct bond_params *params)
 	params->primary[0] = 0;
 	params->primary_reselect = primary_reselect_value;
 	params->fail_over_mac = fail_over_mac_value;
+	params->resend_igmp = resend_igmp;
 
 	if (primary) {
 		strncpy(params->primary, primary, IFNAMSIZ);
diff --git a/drivers/net/bonding/bond_sysfs.c b/drivers/net/bonding/bond_sysfs.c
index 977ae5a..8512082 100644
--- a/drivers/net/bonding/bond_sysfs.c
+++ b/drivers/net/bonding/bond_sysfs.c
@@ -1467,6 +1467,46 @@ static CLASS_DEVICE_ATTR(mark_steering, S_IWUSR | S_IRUGO, bonding_show_mark_ste
 			 bonding_store_mark_steering);
 
 
+/*
+ * Show and set the number of IGMP membership reports to send on link failure
+ */
+static ssize_t bonding_show_resend_igmp(struct class_device *d, char *buf)
+{
+	struct bonding *bond = to_bond(d);
+
+	return sprintf(buf, "%d\n", bond->params.resend_igmp);
+}
+
+static ssize_t bonding_store_resend_igmp(struct class_device *d,
+					  const char *buf, size_t count)
+{
+	int new_value, ret = count;
+	struct bonding *bond = to_bond(d);
+
+	if (sscanf(buf, "%d", &new_value) != 1) {
+		pr_err("%s: no resend_igmp value specified.\n",
+		       bond->dev->name);
+		ret = -EINVAL;
+		goto out;
+	}
+
+	if (new_value < 0) {
+		pr_err("%s: Invalid resend_igmp value %d not in range 0-255; rejected.\n",
+		       bond->dev->name, new_value);
+		ret = -EINVAL;
+		goto out;
+	}
+
+	pr_info("%s: Setting resend_igmp to %d.\n",
+		bond->dev->name, new_value);
+	bond->params.resend_igmp = new_value;
+out:
+	return ret;
+}
+
+static CLASS_DEVICE_ATTR(resend_igmp, S_IRUGO | S_IWUSR,
+		   bonding_show_resend_igmp, bonding_store_resend_igmp);
+
 static struct attribute *per_bond_attrs[] = {
 	&class_device_attr_slaves.attr,
 	&class_device_attr_mode.attr,
@@ -1492,6 +1532,7 @@ static struct attribute *per_bond_attrs[] = {
 	&class_device_attr_ad_partner_key.attr,
 	&class_device_attr_ad_partner_mac.attr,
 	&class_device_attr_mark_steering.attr,
+	&class_device_attr_resend_igmp.attr,
 	NULL,
 };
 
diff --git a/drivers/net/bonding/bonding.h b/drivers/net/bonding/bonding.h
index cddc6d4..64425ea 100644
--- a/drivers/net/bonding/bonding.h
+++ b/drivers/net/bonding/bonding.h
@@ -140,6 +140,7 @@ struct bond_params {
 	char primary[IFNAMSIZ];
 	int primary_reselect;
 	__be32 arp_targets[BOND_MAX_ARP_TARGETS];
+	int resend_igmp;
 };
 
 struct bond_parm_tbl {
@@ -208,6 +209,7 @@ struct bonding {
 	s32      slave_cnt; /* never change this value outside the attach/detach wrappers */
 	rwlock_t lock;
 	rwlock_t curr_slave_lock;
+	s8       igmp_retrans;
 	s8       kill_timers;
 	s8	 send_grat_arp;
 	s8	 send_unsol_na;
@@ -234,6 +236,7 @@ struct bonding {
 	struct   work_struct arp_work;
 	struct   work_struct alb_work;
 	struct   work_struct ad_work;
+	struct   work_struct mcast_work;
 #if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
 	struct   in6_addr master_ipv6;
 #endif
diff --git a/include/linux/if_bonding.h b/include/linux/if_bonding.h
index 65c2d24..eea8a25 100644
--- a/include/linux/if_bonding.h
+++ b/include/linux/if_bonding.h
@@ -83,6 +83,8 @@
 
 #define BOND_DEFAULT_MAX_BONDS  1   /* Default maximum number of devices to support */
 
+#define BOND_DEFAULT_RESEND_IGMP	1 /* Default number of IGMP membership reports */
+
 /* hashing types */
 #define BOND_XMIT_POLICY_LAYER2		0 /* layer 2 (MAC only), default */
 #define BOND_XMIT_POLICY_LAYER34	1 /* layer 3+4 (IP ^ (TCP || UDP)) */
diff --git a/net/ipv4/igmp.c b/net/ipv4/igmp.c
index 15f9b10..9224b8a 100644
--- a/net/ipv4/igmp.c
+++ b/net/ipv4/igmp.c
@@ -1261,14 +1261,14 @@ void ip_mc_rejoin_group(struct ip_mc_list *im)
 	if (im->multiaddr == IGMP_ALL_HOSTS)
 		return;
 
-	if (IGMP_V1_SEEN(in_dev) || IGMP_V2_SEEN(in_dev)) {
-		igmp_mod_timer(im, IGMP_Initial_Report_Delay);
-		return;
-	}
-	/* else, v3 */
-	im->crcount = in_dev->mr_qrv ? in_dev->mr_qrv :
-		IGMP_Unsolicited_Report_Count;
-	igmp_ifc_event(in_dev);
+	/* a failover is happening and switches
+	 * must be notified immediately */
+	if (IGMP_V1_SEEN(in_dev))
+		igmp_send_report(in_dev, im, IGMP_HOST_MEMBERSHIP_REPORT);
+	else if (IGMP_V2_SEEN(in_dev))
+		igmp_send_report(in_dev, im, IGMPV2_HOST_MEMBERSHIP_REPORT);
+	else
+		igmp_send_report(in_dev, im, IGMPV3_HOST_MEMBERSHIP_REPORT);
 #endif
 }