Sophie

Sophie

distrib > Scientific%20Linux > 5x > x86_64 > by-pkgid > fc11cd6e1c513a17304da94a5390f3cd > files > 2035

kernel-2.6.18-194.11.1.el5.src.rpm

From: Andy Gospodarek <gospo@redhat.com>
Date: Thu, 23 Apr 2009 14:44:45 -0400
Subject: [net] bonding: support for bonding of IPoIB interfaces
Message-id: 20090423184444.GB26634@gospo.rdu.redhat.com
O-Subject: [RHEL5.4 PATCH 2/2] bonding: add support for bonding of IPoIB interfaces
Bugzilla: 430758
RH-Acked-by: John W. Linville <linville@redhat.com>
RH-Acked-by: Neil Horman <nhorman@redhat.com>
RH-Acked-by: Prarit Bhargava <prarit@redhat.com>
RH-Acked-by: Thomas Graf <tgraf@redhat.com>

The original patches that added these features were:

    commit 872254dd6b1f80cb95ee9e2e22980888533fc293
    Author: Moni Shoua <monis@voltaire.com>
    Date:   Tue Oct 9 19:43:38 2007 -0700

        net/bonding: Enable bonding to enslave non ARPHRD_ETHER

    commit 6b1bf096508c870889c2be63c7757a04d72116fe
    Author: Moni Shoua <monis@voltaire.com>
    Date:   Tue Oct 9 19:43:40 2007 -0700

        net/bonding: Enable IP multicast for bonding IPoIB devices

    commit 3158bf7d414b69fdc0c715d0a4d82e12b74ef974
    Author: Moni Shoua <monis@voltaire.com>
    Date:   Tue Oct 9 19:43:41 2007 -0700

        net/bonding: Handlle wrong assumptions that slave is always an Ethernet device

The following patches were also important to ensure reliable failover,
so they were also added:

    commit c1da4ac752b8b0411791d26c678fcf23d2eed242
    Author: Or Gerlitz <ogerlitz@voltaire.com>
    Date:   Fri Jun 13 18:12:00 2008 -0700

        net/core: add NETDEV_BONDING_FAILOVER event

    commit 01f3109de49a889db8adf9116449727547ee497e
    Author: Or Gerlitz <ogerlitz@voltaire.com>
    Date:   Fri Jun 13 18:12:02 2008 -0700

        bonding: deliver netdev event for fail-over under the active-backup mode

Doug Ledford recently posted the following commit as part of his
inifiniband update, so we will be able to take advantage of the two
patches above as well.

    commit dd5bdff83b19d9174126e0398b47117c3a80e22d
    Author: Or Gerlitz <ogerlitz@voltaire.com>
    Date:   Tue Jul 22 14:14:22 2008 -0700

        RDMA/cma: Add RDMA_CM_EVENT_ADDR_CHANGE event

This has been tested exclusively by customers/partners with positive
feedback.

This will resolve RHBZ 430758 and RHBZ 475663.

diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c
index 089e658..d7bb5a6 100644
--- a/drivers/net/bonding/bond_main.c
+++ b/drivers/net/bonding/bond_main.c
@@ -1268,6 +1268,14 @@ void bond_select_active_slave(struct bonding *bond)
 			printk(KERN_INFO DRV_NAME ": %s: "
 			       "now running without any active interface !\n",
 			       bond->dev->name);
+
+			write_unlock_bh(&bond->curr_slave_lock);
+			read_unlock(&bond->lock);
+
+			netdev_bonding_change(bond->dev);
+
+			read_lock(&bond->lock);
+			write_lock_bh(&bond->curr_slave_lock);
 		}
 	}
 }
@@ -1354,7 +1362,8 @@ static int bond_compute_features(struct bonding *bond)
 	struct slave *slave;
 	struct net_device *bond_dev = bond->dev;
 	unsigned long features = bond_dev->features;
-	unsigned short max_hard_header_len = ETH_HLEN;
+	unsigned short max_hard_header_len = max((u16)ETH_HLEN,
+						bond_dev->hard_header_len);
 	int i;
 
 	features &= ~(NETIF_F_ALL_CSUM | BOND_VLAN_FEATURES);
@@ -1381,6 +1390,29 @@ done:
 	return 0;
 }
 
+
+
+static void bond_setup_by_slave(struct net_device *bond_dev,
+				struct net_device *slave_dev)
+{
+	struct bonding *bond = bond_dev->priv;
+	bond_dev->hard_header	        = slave_dev->hard_header;
+	bond_dev->rebuild_header        = slave_dev->rebuild_header;
+	bond_dev->hard_header_cache	= slave_dev->hard_header_cache;
+	bond_dev->header_cache_update   = slave_dev->header_cache_update;
+	bond_dev->hard_header_parse	= slave_dev->hard_header_parse;
+
+	bond_dev->neigh_setup           = slave_dev->neigh_setup;
+
+	bond_dev->type		    = slave_dev->type;
+	bond_dev->hard_header_len   = slave_dev->hard_header_len;
+	bond_dev->addr_len	    = slave_dev->addr_len;
+
+	memcpy(bond_dev->broadcast, slave_dev->broadcast,
+		slave_dev->addr_len);
+	bond->setup_by_slave = 1;
+}
+
 /* enslave device <slave> to bond device <master> */
 int bond_enslave(struct net_device *bond_dev, struct net_device *slave_dev)
 {
@@ -1401,8 +1433,9 @@ int bond_enslave(struct net_device *bond_dev, struct net_device *slave_dev)
 
 	/* bond must be initialized by bond_open() before enslaving */
 	if (!(bond_dev->flags & IFF_UP)) {
-		dprintk("Error, master_dev is not up\n");
-		return -EPERM;
+		printk(KERN_WARNING DRV_NAME
+			" %s: master_dev is not up in bond_enslave\n",
+			bond_dev->name);
 	}
 
 	/* already enslaved */
@@ -1455,6 +1488,25 @@ int bond_enslave(struct net_device *bond_dev, struct net_device *slave_dev)
 		goto err_undo_flags;
 	}
 
+	/* set bonding device ether type by slave - bonding netdevices are
+	 * created with ether_setup, so when the slave type is not ARPHRD_ETHER
+	 * there is a need to override some of the type dependent attribs/funcs.
+	 *
+	 * bond ether type mutual exclusion - don't allow slaves of dissimilar
+	 * ether type (eg ARPHRD_ETHER and ARPHRD_INFINIBAND) share the same bond
+	 */
+	if (bond->slave_cnt == 0) {
+		if (slave_dev->type != ARPHRD_ETHER)
+			bond_setup_by_slave(bond_dev, slave_dev);
+	} else if (bond_dev->type != slave_dev->type) {
+		printk(KERN_ERR DRV_NAME ": %s ether type (%d) is different "
+			"from other slaves (%d), can not enslave it.\n",
+			slave_dev->name,
+			slave_dev->type, bond_dev->type);
+			res = -EINVAL;
+			goto err_undo_flags;
+	}
+
 	if (slave_dev->set_mac_address == NULL) {
 		if (bond->slave_cnt == 0) {
 			printk(KERN_WARNING DRV_NAME
@@ -3535,7 +3587,10 @@ static int bond_slave_netdev_event(unsigned long event, struct net_device *slave
 	switch (event) {
 	case NETDEV_UNREGISTER:
 		if (bond_dev) {
-			bond_release(bond_dev, slave_dev);
+			if (bond->setup_by_slave)
+				bond_release_and_destroy(bond_dev, slave_dev);
+			else
+				bond_release(bond_dev, slave_dev);
 		}
 		break;
 	case NETDEV_CHANGE:
@@ -3550,11 +3605,6 @@ static int bond_slave_netdev_event(unsigned long event, struct net_device *slave
 		 * ... Or is it this?
 		 */
 		break;
-	case NETDEV_GOING_DOWN:
-		dprintk("slave %s is going down\n", slave_dev->name);
-		if (bond->setup_by_slave)
-			bond_release_and_destroy(bond_dev, slave_dev);
-		break;
 	case NETDEV_CHANGEMTU:
 		/*
 		 * TODO: Should slaves be allowed to
diff --git a/drivers/net/bonding/bond_sysfs.c b/drivers/net/bonding/bond_sysfs.c
index 4f4ce7b..a39b61d 100644
--- a/drivers/net/bonding/bond_sysfs.c
+++ b/drivers/net/bonding/bond_sysfs.c
@@ -244,17 +244,16 @@ static ssize_t bonding_store_slaves(struct class_device *cd, const char *buffer,
 	char command[IFNAMSIZ + 1] = { 0, };
 	char *ifname;
 	int i, res, found, ret = count;
+	u32 original_mtu;
 	struct slave *slave;
 	struct net_device *dev = NULL;
 	struct bonding *bond = to_bond(cd);
 
 	/* Quick sanity check -- is the bond interface up? */
 	if (!(bond->dev->flags & IFF_UP)) {
-		printk(KERN_ERR DRV_NAME
-		       ": %s: Unable to update slaves because interface is down.\n",
+		printk(KERN_WARNING DRV_NAME
+		       ": %s: doing slave updates when interface is down.\n",
 		       bond->dev->name);
-		/* early return before rtnl_lock() */
-		return -EPERM;
 	}
 
 	/* Note:  We can't hold bond->lock here, as bond_create grabs it. */
@@ -314,6 +313,7 @@ static ssize_t bonding_store_slaves(struct class_device *cd, const char *buffer,
 		}
 
 		/* Set the slave's MTU to match the bond */
+		original_mtu = dev->mtu;
 		if (dev->mtu != bond->dev->mtu) {
 			if (dev->change_mtu) {
 				res = dev->change_mtu(dev,
@@ -327,6 +327,9 @@ static ssize_t bonding_store_slaves(struct class_device *cd, const char *buffer,
 			}
 		}
 		res = bond_enslave(bond->dev, dev);
+		bond_for_each_slave(bond, slave, i)
+			if (strnicmp(slave->dev->name, ifname, IFNAMSIZ) == 0)
+				slave->original_mtu = original_mtu;
 		if (res) {
 			ret = res;
 		}
@@ -335,9 +338,11 @@ static ssize_t bonding_store_slaves(struct class_device *cd, const char *buffer,
 
 	if (command[0] == '-') {
 		dev = NULL;
+		original_mtu = 0;
 		bond_for_each_slave(bond, slave, i)
 			if (strnicmp(slave->dev->name, ifname, IFNAMSIZ) == 0) {
 				dev = slave->dev;
+				original_mtu = slave->original_mtu;
 				break;
 			}
 		if (dev) {
@@ -350,9 +355,9 @@ static ssize_t bonding_store_slaves(struct class_device *cd, const char *buffer,
 			}
 			/* set the slave MTU to the default */
 			if (dev->change_mtu) {
-				dev->change_mtu(dev, 1500);
+				dev->change_mtu(dev, original_mtu);
 			} else {
-				dev->mtu = 1500;
+				dev->mtu = original_mtu;
 			}
 		}
 		else {
diff --git a/drivers/net/bonding/bonding.h b/drivers/net/bonding/bonding.h
index 8a9c376..027dc08 100644
--- a/drivers/net/bonding/bonding.h
+++ b/drivers/net/bonding/bonding.h
@@ -168,6 +168,7 @@ struct slave {
 	s8     new_link;
 	s8     state;   /* one of BOND_STATE_XXXX */
 	u32    original_flags;
+	u32    original_mtu;
 	u32    link_failure_count;
 	u16    speed;
 	u8     duplex;
diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 41a0c78..4e3fd45 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -1012,6 +1012,7 @@ extern void		dev_mc_discard(struct net_device *dev);
 extern void		dev_set_promiscuity(struct net_device *dev, int inc);
 extern void		dev_set_allmulti(struct net_device *dev, int inc);
 extern void		netdev_state_change(struct net_device *dev);
+extern void		netdev_bonding_change(struct net_device *dev);
 extern void		netdev_features_change(struct net_device *dev);
 /* Load a device via the kmod */
 extern void		dev_load(const char *name);
diff --git a/include/linux/notifier.h b/include/linux/notifier.h
index 6b4360e..1ec5e05 100644
--- a/include/linux/notifier.h
+++ b/include/linux/notifier.h
@@ -139,6 +139,7 @@ extern int raw_notifier_call_chain(struct raw_notifier_head *,
 #define NETDEV_GOING_DOWN	0x0009
 #define NETDEV_CHANGENAME	0x000A
 #define NETDEV_FEAT_CHANGE	0x000B
+#define NETDEV_BONDING_FAILOVER 0x000C
 
 #define SYS_DOWN	0x0001	/* Notify of system down */
 #define SYS_RESTART	SYS_DOWN
diff --git a/net/core/dev.c b/net/core/dev.c
index 16ac4df..dffc3e3 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -811,6 +811,12 @@ void netdev_state_change(struct net_device *dev)
 	}
 }
 
+void netdev_bonding_change(struct net_device *dev)
+{
+	call_netdevice_notifiers(NETDEV_BONDING_FAILOVER, dev);
+}
+EXPORT_SYMBOL(netdev_bonding_change);
+
 /**
  *	dev_load 	- load a network module
  *	@name: name of interface