Sophie

Sophie

distrib > Scientific%20Linux > 5x > x86_64 > media > main-src > by-pkgid > d0a35cd31c1125e2132804d68547073d > files > 2066

kernel-2.6.18-194.26.1.el5.src.rpm

From: Flavio Leitner <fbl@redhat.com>
Date: Thu, 7 Oct 2010 19:23:05 -0300
Subject: [net] bonding: fix IGMP report on slave during failover
Message-id: <20101007222305.GG2730@redhat.com>
O-Subject: [RHEL5.5.z patch v2] BZ640973 [net] bonding: fix IGMP report on slave during failover
Bugzilla: 640973
RH-Acked-by: Cong Wang <amwang@redhat.com>
RH-Acked-by: Andy Gospodarek <gospo@redhat.com>
RH-Acked-by: David S. Miller <davem@redhat.com>
RH-Acked-by: Jiri Pirko <jpirko@redhat.com>

This is pretty much the same patch but z-stream tree
doesn't provide either pr_warning/pr_err or vlan_group_get_device() 

I have retested this one too.

RHBZ: 640973 Bonded interface doesn't issue IGMP report (join)
      on slave interface during failover

This is a backport of three commits applied on net-next-2.6
to fix the issue:

commit: 5a37e8ca8536c47871d46c82211f399adf06fd44

   bonding: rejoin multicast groups on VLANs
   
   During a failover, the IGMP membership is sent to update
   the switch restoring the traffic, but it misses groups added
   to VLAN devices running on top of bonding devices.
   
   This patch changes it to iterate over all VLAN devices
   on top of it sending IGMP memberships too.
   
   Signed-off-by: Flavio Leitner <fleitner@redhat.com>
   Signed-off-by: David S. Miller <davem@davemloft.net>
   
commit: e12b453904c54bbdc515778ff664d87a7f9473af
   
   bonding: fix to rejoin multicast groups immediately
   
   The IGMP specs states that if the system receives a
   membership report, it shouldn't send another for the
   next minute. However, if a link failure happens right
   after that, the backup slave and the switch connected
   to this slave will not know about the multicast and
   the traffic will hang for about a minute.
   
   This patch fixes it to rejoin multicast groups immediately
   after a failover restoring the multicast traffic.
   
   Signed-off-by: Flavio Leitner <fleitner@redhat.com>
   Signed-off-by: David S. Miller <davem@davemloft.net>
   
commit: c2952c314b4fe61820ba8fd6c949eed636140d52

   bonding: add retransmit membership reports tunable
   
   Allow sysadmins to configure the number of multicast
   membership report sent on a link failure event.
   
   Signed-off-by: Flavio Leitner <fleitner@redhat.com>
   Signed-off-by: David S. Miller <davem@davemloft.net>

Test Build:
http://brewweb.devel.redhat.com/brew/taskinfo?taskID=2809482

Testing Status:
Successfully tested by myself while reproducing the issues.

The proposed patch below is based on 2.6.18-194.20.1.el5

Signed-off-by: Flavio Leitner <fleitner@redhat.com>
---
 Documentation/networking/bonding.txt |    8 +++
 drivers/net/bonding/bond_main.c      |   77 +++++++++++++++++++++++++++++------
 drivers/net/bonding/bond_sysfs.c     |   43 +++++++++++++++++++
 drivers/net/bonding/bonding.h        |    3 +
 include/linux/if_bonding.h           |    2 
 net/ipv4/igmp.c                      |   16 +++----
 6 files changed, 129 insertions(+), 20 deletions(-)

Index: linux-2.6.18.x86_64/drivers/net/bonding/bond_main.c
===================================================================
--- linux-2.6.18.x86_64.orig/drivers/net/bonding/bond_main.c
+++ linux-2.6.18.x86_64/drivers/net/bonding/bond_main.c
@@ -102,6 +102,7 @@ static char *arp_ip_target[BOND_MAX_ARP_
 static char *arp_validate = NULL;
 static char *fail_over_mac = NULL;
 struct bond_params bonding_defaults;
+static int resend_igmp = BOND_DEFAULT_RESEND_IGMP;
 int debug = 0;
 
 module_param(max_bonds, int, 0);
@@ -149,6 +150,8 @@ module_param(arp_validate, charp, 0);
 MODULE_PARM_DESC(arp_validate, "validate src/dst of ARP probes: none (default), active, backup or all");
 module_param(fail_over_mac, charp, 0);
 MODULE_PARM_DESC(fail_over_mac, "For active-backup, do not set all slaves to the same MAC.  none (default), active or follow");
+module_param(resend_igmp, int, 0);
+MODULE_PARM_DESC(resend_igmp, "Number of IGMP membership reports to send on link failure");
 module_param(debug, int, 0);
 MODULE_PARM_DESC(debug, "Print debug messages; 0 for off (default), 1 for on");
 
@@ -899,18 +902,13 @@ static void bond_mc_delete(struct bondin
 }
 
 
-/*
- * Retrieve the list of registered multicast addresses for the bonding
- * device and retransmit an IGMP JOIN request to the current active
- * slave.
- */
-static void bond_resend_igmp_join_requests(struct bonding *bond)
+static void __bond_resend_igmp_join_requests(struct net_device *dev)
 {
 	struct in_device *in_dev;
 	struct ip_mc_list *im;
 
 	rcu_read_lock();
-	in_dev = __in_dev_get_rcu(bond->dev);
+	in_dev = __in_dev_get_rcu(dev);
 	if (in_dev) {
 		for (im = in_dev->mc_list; im; im = im->next) {
 			ip_mc_rejoin_group(im);
@@ -920,6 +918,43 @@ static void bond_resend_igmp_join_reques
 	rcu_read_unlock();
 }
 
+
+/*
+ * Retrieve the list of registered multicast addresses for the bonding
+ * device and retransmit an IGMP JOIN request to the current active
+ * slave.
+ */
+static void bond_resend_igmp_join_requests(struct bonding *bond)
+{
+	struct net_device *vlan_dev;
+	struct vlan_entry *vlan;
+
+	read_lock(&bond->lock);
+
+	/* rejoin all groups on bond device */
+	__bond_resend_igmp_join_requests(bond->dev);
+
+	/* rejoin all groups on vlan devices */
+	if (bond->vlgrp) {
+		list_for_each_entry(vlan, &bond->vlan_list, vlan_list) {
+			vlan_dev = bond->vlgrp->vlan_devices[vlan->vlan_id];
+			if (vlan_dev)
+				__bond_resend_igmp_join_requests(vlan_dev);
+		}
+	}
+
+	if (--bond->igmp_retrans > 0)
+		queue_delayed_work(bond->wq, &bond->mcast_work, HZ/5);
+
+	read_unlock(&bond->lock);
+}
+
+void bond_resend_igmp_join_requests_delayed(void *work_data)
+{
+	struct bonding *bond = work_data;
+	bond_resend_igmp_join_requests(bond);
+}
+
 /*
  * Totally destroys the mc_list in bond
  */
@@ -1027,7 +1062,6 @@ static void bond_mc_swap(struct bonding 
 		for (dmi = bond->dev->mc_list; dmi; dmi = dmi->next) {
 			dev_mc_add(new_active->dev, dmi->dmi_addr, dmi->dmi_addrlen, 0);
 		}
-		bond_resend_igmp_join_requests(bond);
 	}
 }
 
@@ -1274,10 +1308,13 @@ void bond_change_active_slave(struct bon
 		}
 	}
 
-	/* resend IGMP joins since all were sent on curr_active_slave */
-	if (bond->params.mode == BOND_MODE_ROUNDROBIN) {
-		bond_resend_igmp_join_requests(bond);
-	}
+	/* resend IGMP joins since active slave has changed or
+	 * all were sent on curr_active_slave */
+	if ((USES_PRIMARY(bond->params.mode) && new_active) ||
+	    bond->params.mode == BOND_MODE_ROUNDROBIN) {
+		bond->igmp_retrans = bond->params.resend_igmp;
+		queue_delayed_work(bond->wq, &bond->mcast_work, 0);
+ 	}
 }
 
 /**
@@ -3846,6 +3883,8 @@ static int bond_open(struct net_device *
 
 	bond->kill_timers = 0;
 
+	INIT_WORK(&bond->mcast_work, bond_resend_igmp_join_requests_delayed, (void *)bond);
+
 	if ((bond->params.mode == BOND_MODE_TLB) ||
 	    (bond->params.mode == BOND_MODE_ALB)) {
 		/* bond_alb_initialize must be called before the timer
@@ -3932,6 +3971,8 @@ static int bond_close(struct net_device 
 		break;
 	}
 
+	if (delayed_work_pending(&bond->mcast_work))
+		cancel_delayed_work(&bond->mcast_work);
 
 	if ((bond->params.mode == BOND_MODE_TLB) ||
 	    (bond->params.mode == BOND_MODE_ALB)) {
@@ -4709,6 +4750,9 @@ static void bond_work_cancel_all(struct 
 	if (bond->params.mode == BOND_MODE_8023AD &&
 	    delayed_work_pending(&bond->ad_work))
 		cancel_delayed_work(&bond->ad_work);
+
+	if (delayed_work_pending(&bond->mcast_work))
+		cancel_delayed_work(&bond->mcast_work);
 }
 
 /* De-initialize device specific data.
@@ -4902,6 +4946,14 @@ static int bond_check_params(struct bond
 		}
 	}
 
+	if (resend_igmp < 0 || resend_igmp > 255) {
+		printk(KERN_WARNING DRV_NAME
+			   ": Warning: resend_igmp (%d) should be between "
+			   "0 and 255, resetting to %d\n",
+			   resend_igmp, BOND_DEFAULT_RESEND_IGMP);
+		resend_igmp = BOND_DEFAULT_RESEND_IGMP;
+	}
+
 	/* reset values for TLB/ALB */
 	if ((bond_mode == BOND_MODE_TLB) ||
 	    (bond_mode == BOND_MODE_ALB)) {
@@ -5114,6 +5166,7 @@ static int bond_check_params(struct bond
 	params->primary[0] = 0;
 	params->primary_reselect = primary_reselect_value;
 	params->fail_over_mac = fail_over_mac_value;
+	params->resend_igmp = resend_igmp;
 
 	if (primary) {
 		strncpy(params->primary, primary, IFNAMSIZ);
Index: linux-2.6.18.x86_64/drivers/net/bonding/bond_sysfs.c
===================================================================
--- linux-2.6.18.x86_64.orig/drivers/net/bonding/bond_sysfs.c
+++ linux-2.6.18.x86_64/drivers/net/bonding/bond_sysfs.c
@@ -1439,6 +1439,48 @@ static CLASS_DEVICE_ATTR(ad_partner_mac,
 
 
 
+/*
+ * Show and set the number of IGMP membership reports to send on link failure
+ */
+static ssize_t bonding_show_resend_igmp(struct class_device *d, char *buf)
+{
+	struct bonding *bond = to_bond(d);
+
+	return sprintf(buf, "%d\n", bond->params.resend_igmp);
+}
+
+static ssize_t bonding_store_resend_igmp(struct class_device *d,
+					  const char *buf, size_t count)
+{
+	int new_value, ret = count;
+	struct bonding *bond = to_bond(d);
+
+	if (sscanf(buf, "%d", &new_value) != 1) {
+		printk(KERN_INFO DRV_NAME
+		       ": %s: no resend_igmp value specified.\n",
+		       bond->dev->name);
+		ret = -EINVAL;
+		goto out;
+	}
+
+	if (new_value < 0) {
+		printk(KERN_INFO DRV_NAME
+		       ": %s: Invalid resend_igmp value %d not in range 0-255; rejected.\n",
+		       bond->dev->name, new_value);
+		ret = -EINVAL;
+		goto out;
+	}
+
+	printk(KERN_INFO DRV_NAME ": %s: Setting resend_igmp to %d.\n",
+		bond->dev->name, new_value);
+	bond->params.resend_igmp = new_value;
+out:
+	return ret;
+}
+
+static CLASS_DEVICE_ATTR(resend_igmp, S_IRUGO | S_IWUSR,
+		   bonding_show_resend_igmp, bonding_store_resend_igmp);
+
 static struct attribute *per_bond_attrs[] = {
 	&class_device_attr_slaves.attr,
 	&class_device_attr_mode.attr,
@@ -1463,6 +1505,7 @@ static struct attribute *per_bond_attrs[
 	&class_device_attr_ad_actor_key.attr,
 	&class_device_attr_ad_partner_key.attr,
 	&class_device_attr_ad_partner_mac.attr,
+	&class_device_attr_resend_igmp.attr,
 	NULL,
 };
 
Index: linux-2.6.18.x86_64/drivers/net/bonding/bonding.h
===================================================================
--- linux-2.6.18.x86_64.orig/drivers/net/bonding/bonding.h
+++ linux-2.6.18.x86_64/drivers/net/bonding/bonding.h
@@ -138,6 +138,7 @@ struct bond_params {
 	char primary[IFNAMSIZ];
 	int primary_reselect;
 	__be32 arp_targets[BOND_MAX_ARP_TARGETS];
+	int resend_igmp;
 };
 
 struct bond_parm_tbl {
@@ -205,6 +206,7 @@ struct bonding {
 	s32      slave_cnt; /* never change this value outside the attach/detach wrappers */
 	rwlock_t lock;
 	rwlock_t curr_slave_lock;
+	s8       igmp_retrans;
 	s8       kill_timers;
 	s8	 send_grat_arp;
 	s8	 send_unsol_na;
@@ -231,6 +233,7 @@ struct bonding {
 	struct   work_struct arp_work;
 	struct   work_struct alb_work;
 	struct   work_struct ad_work;
+	struct   work_struct mcast_work;
 #if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
 	struct   in6_addr master_ipv6;
 #endif
Index: linux-2.6.18.x86_64/include/linux/if_bonding.h
===================================================================
--- linux-2.6.18.x86_64.orig/include/linux/if_bonding.h
+++ linux-2.6.18.x86_64/include/linux/if_bonding.h
@@ -83,6 +83,8 @@
 
 #define BOND_DEFAULT_MAX_BONDS  1   /* Default maximum number of devices to support */
 
+#define BOND_DEFAULT_RESEND_IGMP	1 /* Default number of IGMP membership reports */
+
 /* hashing types */
 #define BOND_XMIT_POLICY_LAYER2		0 /* layer 2 (MAC only), default */
 #define BOND_XMIT_POLICY_LAYER34	1 /* layer 3+4 (IP ^ (TCP || UDP)) */
Index: linux-2.6.18.x86_64/net/ipv4/igmp.c
===================================================================
--- linux-2.6.18.x86_64.orig/net/ipv4/igmp.c
+++ linux-2.6.18.x86_64/net/ipv4/igmp.c
@@ -1261,14 +1261,14 @@ void ip_mc_rejoin_group(struct ip_mc_lis
 	if (im->multiaddr == IGMP_ALL_HOSTS)
 		return;
 
-	if (IGMP_V1_SEEN(in_dev) || IGMP_V2_SEEN(in_dev)) {
-		igmp_mod_timer(im, IGMP_Initial_Report_Delay);
-		return;
-	}
-	/* else, v3 */
-	im->crcount = in_dev->mr_qrv ? in_dev->mr_qrv :
-		IGMP_Unsolicited_Report_Count;
-	igmp_ifc_event(in_dev);
+	/* a failover is happening and switches
+	 * must be notified immediately */
+	if (IGMP_V1_SEEN(in_dev))
+		igmp_send_report(in_dev, im, IGMP_HOST_MEMBERSHIP_REPORT);
+	else if (IGMP_V2_SEEN(in_dev))
+		igmp_send_report(in_dev, im, IGMPV2_HOST_MEMBERSHIP_REPORT);
+	else
+		igmp_send_report(in_dev, im, IGMPV3_HOST_MEMBERSHIP_REPORT);
 #endif
 }
 
Index: linux-2.6.18.x86_64/Documentation/networking/bonding.txt
===================================================================
--- linux-2.6.18.x86_64.orig/Documentation/networking/bonding.txt
+++ linux-2.6.18.x86_64/Documentation/networking/bonding.txt
@@ -684,6 +684,14 @@ xmit_hash_policy
 	does not exist, and the layer2 policy is the only policy.  The
 	layer2+3 value was added for bonding version 3.2.2.
 
+resend_igmp
+
+	Specifies the number of IGMP membership reports to be issued after
+	a failover event. One membership report is issued immediately after
+	the failover, subsequent packets are sent in each 200ms interval.
+
+	The valid range is 0 - 255; the default value is 1. This option
+	was added for bonding version 3.7.0.
 
 3. Configuring Bonding Devices
 ==============================