Sophie

Sophie

distrib > Scientific%20Linux > 5x > x86_64 > by-pkgid > 27922b4260f65d317aabda37e42bbbff > files > 2641

kernel-2.6.18-238.el5.src.rpm

From: Shyam Iyer <shiyer@redhat.com>
Date: Thu, 15 Jul 2010 13:30:05 -0400
Subject: [net] fix lockups and dupe addresses w/bonding and ipv6
Message-id: <1279200605.20326.55.camel@shyamiyer>
Patchwork-id: 26901
O-Subject: [PATCH] [RHEL5.6] Kernel lockups with  bonding and IPV6 and
	'kernel: bond0: duplicate address detected!' messages]
Bugzilla: 516985
RH-Acked-by: David S. Miller <davem@redhat.com>

Kernel lockups with bonding and IPV6 and 'kernel: bond0: duplicate address detected!' messages]

Problem:
The kernel lockup observed when running multiple nics in a bonded IPv6
configuration.
Sometimes these panics occur when there is not much network activity and
sometimes when there is some amount of network stress.
The timing of the panics really depend on the router advertisements in
the network confusing the addrconf logic resulting in miscalculated
freeups in the dst cache.

A simulation tool that floods the network with router advertisements can
reproduce this problem quickly.

Redhat BZ:
https://bugzilla.redhat.com/show_bug.cgi?id=516985

Upstream Commits:
e9d3e084975869754d16f639378675c353560be9 ipv6: Replace
inet6_ifaddr->dead with state
4c5ff6a6fe794f102479db998c69054319279e3c ipv6: Use state_lock to protect
ifa state
f2344a131bccdbfc5338e17fa71a807dee7944fa ipv6: Use POSTDAD state
622ccdf107bcb49c4d8fb65512652566d4c8928a ipv6: Never schedule DAD timer
on dead address

KABI:
The patch was modified to not affect KABI by introducing a global lock
ifa_state_lock instead of modifying the inet6_ifaddr structure.
No KABI symbols were affected by the patch with this approach.

Testing
Done Testing on this using the v6blast simulator tool that floods the
network with router advertisements with positive results.
http://rhts.redhat.com/cgi-bin/rhts/jobs.cgi?id=166457 gave positive
results. There are a few errors which I don't believe is due to this
patch.

Shyam Iyer
Dell Onsite Engineer
Problem:
The kernel lockup observed when running multiple nics in a bonded IPv6 configuration.
Sometimes these panics occur when there is not much network activity and sometimes when there is some amount of network stress.
The timing of the panics really depend on the router advertisements in the network confusing the addrconf logic resulting in miscalculated freeups in the dst cache.

A simulation tool that floods the network with router advertisements can reproduce this problem quickly.

Redhat BZ:
https://bugzilla.redhat.com/show_bug.cgi?id=516985

Upstream Commits:
e9d3e084975869754d16f639378675c353560be9 ipv6: Replace inet6_ifaddr->dead with state
4c5ff6a6fe794f102479db998c69054319279e3c ipv6: Use state_lock to protect ifa state
f2344a131bccdbfc5338e17fa71a807dee7944fa ipv6: Use POSTDAD state
622ccdf107bcb49c4d8fb65512652566d4c8928a ipv6: Never schedule DAD timer on dead address

KABI:
The patch was modified to not affect KABI by introducing a global lock ifa_state_lock instead of modifying the inet6_ifaddr structure.
No KABI symbols were affected by the patch with this approach.

Testing
Done Testing on this using the v6blast simulator tool that floods the network with router advertisements with positive results.
http://rhts.redhat.com/cgi-bin/rhts/jobs.cgi?id=166457 gave positive results. There are a few errors which I don't believe is due to this patch.

Shyam Iyer
Dell Onsite Engineer

diff --git a/include/net/if_inet6.h b/include/net/if_inet6.h
index 2ee8a96..93be582 100644
--- a/include/net/if_inet6.h
+++ b/include/net/if_inet6.h
@@ -30,6 +30,13 @@
 #define IF_PREFIX_ONLINK	0x01
 #define IF_PREFIX_AUTOCONF	0x02
 
+enum {
+	INET6_IFADDR_STATE_DAD,
+	INET6_IFADDR_STATE_POSTDAD,
+	INET6_IFADDR_STATE_UP,
+	INET6_IFADDR_STATE_DEAD,
+};
+
 #ifdef __KERNEL__
 
 struct inet6_ifaddr 
diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
index 7490e97..4c2f6c3 100644
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c
@@ -538,7 +538,7 @@ void inet6_ifa_finish_destroy(struct inet6_ifaddr *ifp)
 	if (del_timer(&ifp->timer))
 		printk("Timer is still running, when freeing ifa=%p\n", ifp);
 
-	if (!ifp->dead) {
+	if (ifp->dead != INET6_IFADDR_STATE_DEAD) {
 		printk("Freeing alive inet6 address %p\n", ifp);
 		return;
 	}
@@ -567,6 +567,8 @@ ipv6_link_dev_addr(struct inet6_dev *idev, struct inet6_ifaddr *ifp)
 	*ifap = ifp;
 }
 
+DEFINE_SPINLOCK(ifa_state_lock);
+
 /* On success it returns ifp with increased reference count */
 
 static struct inet6_ifaddr *
@@ -684,13 +686,21 @@ static void ipv6_del_addr(struct inet6_ifaddr *ifp)
 {
 	struct inet6_ifaddr *ifa, **ifap;
 	struct inet6_dev *idev = ifp->idev;
+	int state;
 	int hash;
 	int deleted = 0, onlink = 0;
 	unsigned long expires = jiffies;
 
 	hash = ipv6_addr_hash(&ifp->addr);
 
-	ifp->dead = 1;
+	spin_lock_bh(&ifa_state_lock);
+	state = ifp->dead;
+	ifp->dead = INET6_IFADDR_STATE_DEAD;
+	spin_unlock_bh(&ifa_state_lock);
+
+	if (state == INET6_IFADDR_STATE_DEAD)
+		goto out;
+
 
 	write_lock_bh(&addrconf_hash_lock);
 	for (ifap = &inet6_addr_lst[hash]; (ifa=*ifap) != NULL;
@@ -799,6 +809,7 @@ static void ipv6_del_addr(struct inet6_ifaddr *ifp)
 		dst_release(&rt->u.dst);
 	}
 
+out:
 	in6_ifa_put(ifp);
 }
 
@@ -1340,11 +1351,28 @@ static void addrconf_dad_stop(struct inet6_ifaddr *ifp)
 		ipv6_del_addr(ifp);
 }
 
+static int addrconf_dad_end(struct inet6_ifaddr *ifp)
+{
+	int err = -ENOENT;
+
+	spin_lock(&ifa_state_lock);
+	if (ifp->dead == INET6_IFADDR_STATE_DAD) {
+		ifp->dead = INET6_IFADDR_STATE_POSTDAD;
+		err = 0;
+	}
+	spin_unlock(&ifa_state_lock);
+
+	return err;
+}
+
 void addrconf_dad_failure(struct inet6_ifaddr *ifp)
 {
 	struct inet6_dev *idev = ifp->idev;
 	struct net_device_extended *ext;
 
+	if (addrconf_dad_end(ifp))
+		return;
+
 	if (net_ratelimit())
 		printk(KERN_INFO "%s: IPv6 duplicate address detected!\n",
 			ifp->idev->dev->name);
@@ -2416,6 +2444,7 @@ static int addrconf_ifdown(struct net_device *dev, int how)
 {
 	struct inet6_dev *idev;
 	struct inet6_ifaddr *ifa, **bifa;
+	int state;
 	int i;
 
 	ASSERT_RTNL();
@@ -2477,7 +2506,6 @@ static int addrconf_ifdown(struct net_device *dev, int how)
 	while ((ifa = idev->tempaddr_list) != NULL) {
 		idev->tempaddr_list = ifa->tmp_next;
 		ifa->tmp_next = NULL;
-		ifa->dead = 1;
 		write_unlock_bh(&idev->lock);
 		spin_lock_bh(&ifa->lock);
 
@@ -2493,11 +2521,19 @@ static int addrconf_ifdown(struct net_device *dev, int how)
 	while ((ifa = idev->addr_list) != NULL) {
 		idev->addr_list = ifa->if_next;
 		ifa->if_next = NULL;
-		ifa->dead = 1;
 		addrconf_del_timer(ifa);
 		write_unlock_bh(&idev->lock);
+		spin_lock_bh(&ifa_state_lock);
+		state = ifa->dead;
+		ifa->dead = INET6_IFADDR_STATE_DEAD;
+		spin_unlock_bh(&ifa_state_lock);
 
+		if (state == INET6_IFADDR_STATE_DEAD)
+			goto put_ifa;
 		__ipv6_ifa_notify(RTM_DELADDR, ifa);
+		atomic_notifier_call_chain(&inet6addr_chain, NETDEV_DOWN, ifa);
+
+put_ifa:
 		in6_ifa_put(ifa);
 
 		write_lock_bh(&idev->lock);
@@ -2603,9 +2639,9 @@ static void addrconf_dad_start(struct inet6_ifaddr *ifp, u32 flags)
 	net_srandom(ifp->addr.s6_addr32[3]);
 
 	read_lock_bh(&idev->lock);
-	if (ifp->dead)
-		goto out;
 	spin_lock_bh(&ifp->lock);
+	if (ifp->dead == INET6_IFADDR_STATE_DEAD)
+		goto out;
 
 	if (dev->flags&(IFF_NOARP|IFF_LOOPBACK) ||
 	    !(dev->flags&IFF_MULTICAST) ||
@@ -2639,8 +2675,8 @@ static void addrconf_dad_start(struct inet6_ifaddr *ifp, u32 flags)
 		ip6_ins_rt(ifp->rt, NULL, NULL, NULL);
 
 	addrconf_dad_kick(ifp);
-	spin_unlock_bh(&ifp->lock);
 out:
+	spin_unlock_bh(&ifp->lock);
 	read_unlock_bh(&idev->lock);
 }
 
@@ -2651,6 +2687,9 @@ static void addrconf_dad_timer(unsigned long data)
 	struct in6_addr unspec;
 	struct in6_addr mcaddr;
 
+	if (!ifp->probes && addrconf_dad_end(ifp))
+		goto out;
+
 	read_lock_bh(&idev->lock);
 	if (idev->dead) {
 		read_unlock_bh(&idev->lock);
@@ -2658,6 +2697,12 @@ static void addrconf_dad_timer(unsigned long data)
 	}
 
 	spin_lock_bh(&ifp->lock);
+	if (ifp->dead == INET6_IFADDR_STATE_DEAD) {
+		spin_unlock_bh(&ifp->lock);
+		read_unlock_bh(&idev->lock);
+		goto out;
+	}
+
 	if (ifp->probes == 0) {
 		/*
 		 * DAD was successful
@@ -2729,12 +2774,10 @@ static void addrconf_dad_run(struct inet6_dev *idev) {
 	read_lock_bh(&idev->lock);
 	for (ifp = idev->addr_list; ifp; ifp = ifp->if_next) {
 		spin_lock_bh(&ifp->lock);
-		if (!(ifp->flags & IFA_F_TENTATIVE)) {
-			spin_unlock_bh(&ifp->lock);
-			continue;
-		}
+		if (ifp->flags & IFA_F_TENTATIVE &&
+		    ifp->dead == INET6_IFADDR_STATE_DAD)
+			addrconf_dad_kick(ifp);
 		spin_unlock_bh(&ifp->lock);
-		addrconf_dad_kick(ifp);
 	}
 	read_unlock_bh(&idev->lock);
 }