From: Shyam Iyer <shiyer@redhat.com> Date: Thu, 15 Jul 2010 13:30:05 -0400 Subject: [net] fix lockups and dupe addresses w/bonding and ipv6 Message-id: <1279200605.20326.55.camel@shyamiyer> Patchwork-id: 26901 O-Subject: [PATCH] [RHEL5.6] Kernel lockups with bonding and IPV6 and 'kernel: bond0: duplicate address detected!' messages] Bugzilla: 516985 RH-Acked-by: David S. Miller <davem@redhat.com> Kernel lockups with bonding and IPV6 and 'kernel: bond0: duplicate address detected!' messages] Problem: The kernel lockup observed when running multiple nics in a bonded IPv6 configuration. Sometimes these panics occur when there is not much network activity and sometimes when there is some amount of network stress. The timing of the panics really depend on the router advertisements in the network confusing the addrconf logic resulting in miscalculated freeups in the dst cache. A simulation tool that floods the network with router advertisements can reproduce this problem quickly. Redhat BZ: https://bugzilla.redhat.com/show_bug.cgi?id=516985 Upstream Commits: e9d3e084975869754d16f639378675c353560be9 ipv6: Replace inet6_ifaddr->dead with state 4c5ff6a6fe794f102479db998c69054319279e3c ipv6: Use state_lock to protect ifa state f2344a131bccdbfc5338e17fa71a807dee7944fa ipv6: Use POSTDAD state 622ccdf107bcb49c4d8fb65512652566d4c8928a ipv6: Never schedule DAD timer on dead address KABI: The patch was modified to not affect KABI by introducing a global lock ifa_state_lock instead of modifying the inet6_ifaddr structure. No KABI symbols were affected by the patch with this approach. Testing Done Testing on this using the v6blast simulator tool that floods the network with router advertisements with positive results. http://rhts.redhat.com/cgi-bin/rhts/jobs.cgi?id=166457 gave positive results. There are a few errors which I don't believe is due to this patch. Shyam Iyer Dell Onsite Engineer Problem: The kernel lockup observed when running multiple nics in a bonded IPv6 configuration. Sometimes these panics occur when there is not much network activity and sometimes when there is some amount of network stress. The timing of the panics really depend on the router advertisements in the network confusing the addrconf logic resulting in miscalculated freeups in the dst cache. A simulation tool that floods the network with router advertisements can reproduce this problem quickly. Redhat BZ: https://bugzilla.redhat.com/show_bug.cgi?id=516985 Upstream Commits: e9d3e084975869754d16f639378675c353560be9 ipv6: Replace inet6_ifaddr->dead with state 4c5ff6a6fe794f102479db998c69054319279e3c ipv6: Use state_lock to protect ifa state f2344a131bccdbfc5338e17fa71a807dee7944fa ipv6: Use POSTDAD state 622ccdf107bcb49c4d8fb65512652566d4c8928a ipv6: Never schedule DAD timer on dead address KABI: The patch was modified to not affect KABI by introducing a global lock ifa_state_lock instead of modifying the inet6_ifaddr structure. No KABI symbols were affected by the patch with this approach. Testing Done Testing on this using the v6blast simulator tool that floods the network with router advertisements with positive results. http://rhts.redhat.com/cgi-bin/rhts/jobs.cgi?id=166457 gave positive results. There are a few errors which I don't believe is due to this patch. Shyam Iyer Dell Onsite Engineer diff --git a/include/net/if_inet6.h b/include/net/if_inet6.h index 2ee8a96..93be582 100644 --- a/include/net/if_inet6.h +++ b/include/net/if_inet6.h @@ -30,6 +30,13 @@ #define IF_PREFIX_ONLINK 0x01 #define IF_PREFIX_AUTOCONF 0x02 +enum { + INET6_IFADDR_STATE_DAD, + INET6_IFADDR_STATE_POSTDAD, + INET6_IFADDR_STATE_UP, + INET6_IFADDR_STATE_DEAD, +}; + #ifdef __KERNEL__ struct inet6_ifaddr diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index 7490e97..4c2f6c3 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -538,7 +538,7 @@ void inet6_ifa_finish_destroy(struct inet6_ifaddr *ifp) if (del_timer(&ifp->timer)) printk("Timer is still running, when freeing ifa=%p\n", ifp); - if (!ifp->dead) { + if (ifp->dead != INET6_IFADDR_STATE_DEAD) { printk("Freeing alive inet6 address %p\n", ifp); return; } @@ -567,6 +567,8 @@ ipv6_link_dev_addr(struct inet6_dev *idev, struct inet6_ifaddr *ifp) *ifap = ifp; } +DEFINE_SPINLOCK(ifa_state_lock); + /* On success it returns ifp with increased reference count */ static struct inet6_ifaddr * @@ -684,13 +686,21 @@ static void ipv6_del_addr(struct inet6_ifaddr *ifp) { struct inet6_ifaddr *ifa, **ifap; struct inet6_dev *idev = ifp->idev; + int state; int hash; int deleted = 0, onlink = 0; unsigned long expires = jiffies; hash = ipv6_addr_hash(&ifp->addr); - ifp->dead = 1; + spin_lock_bh(&ifa_state_lock); + state = ifp->dead; + ifp->dead = INET6_IFADDR_STATE_DEAD; + spin_unlock_bh(&ifa_state_lock); + + if (state == INET6_IFADDR_STATE_DEAD) + goto out; + write_lock_bh(&addrconf_hash_lock); for (ifap = &inet6_addr_lst[hash]; (ifa=*ifap) != NULL; @@ -799,6 +809,7 @@ static void ipv6_del_addr(struct inet6_ifaddr *ifp) dst_release(&rt->u.dst); } +out: in6_ifa_put(ifp); } @@ -1340,11 +1351,28 @@ static void addrconf_dad_stop(struct inet6_ifaddr *ifp) ipv6_del_addr(ifp); } +static int addrconf_dad_end(struct inet6_ifaddr *ifp) +{ + int err = -ENOENT; + + spin_lock(&ifa_state_lock); + if (ifp->dead == INET6_IFADDR_STATE_DAD) { + ifp->dead = INET6_IFADDR_STATE_POSTDAD; + err = 0; + } + spin_unlock(&ifa_state_lock); + + return err; +} + void addrconf_dad_failure(struct inet6_ifaddr *ifp) { struct inet6_dev *idev = ifp->idev; struct net_device_extended *ext; + if (addrconf_dad_end(ifp)) + return; + if (net_ratelimit()) printk(KERN_INFO "%s: IPv6 duplicate address detected!\n", ifp->idev->dev->name); @@ -2416,6 +2444,7 @@ static int addrconf_ifdown(struct net_device *dev, int how) { struct inet6_dev *idev; struct inet6_ifaddr *ifa, **bifa; + int state; int i; ASSERT_RTNL(); @@ -2477,7 +2506,6 @@ static int addrconf_ifdown(struct net_device *dev, int how) while ((ifa = idev->tempaddr_list) != NULL) { idev->tempaddr_list = ifa->tmp_next; ifa->tmp_next = NULL; - ifa->dead = 1; write_unlock_bh(&idev->lock); spin_lock_bh(&ifa->lock); @@ -2493,11 +2521,19 @@ static int addrconf_ifdown(struct net_device *dev, int how) while ((ifa = idev->addr_list) != NULL) { idev->addr_list = ifa->if_next; ifa->if_next = NULL; - ifa->dead = 1; addrconf_del_timer(ifa); write_unlock_bh(&idev->lock); + spin_lock_bh(&ifa_state_lock); + state = ifa->dead; + ifa->dead = INET6_IFADDR_STATE_DEAD; + spin_unlock_bh(&ifa_state_lock); + if (state == INET6_IFADDR_STATE_DEAD) + goto put_ifa; __ipv6_ifa_notify(RTM_DELADDR, ifa); + atomic_notifier_call_chain(&inet6addr_chain, NETDEV_DOWN, ifa); + +put_ifa: in6_ifa_put(ifa); write_lock_bh(&idev->lock); @@ -2603,9 +2639,9 @@ static void addrconf_dad_start(struct inet6_ifaddr *ifp, u32 flags) net_srandom(ifp->addr.s6_addr32[3]); read_lock_bh(&idev->lock); - if (ifp->dead) - goto out; spin_lock_bh(&ifp->lock); + if (ifp->dead == INET6_IFADDR_STATE_DEAD) + goto out; if (dev->flags&(IFF_NOARP|IFF_LOOPBACK) || !(dev->flags&IFF_MULTICAST) || @@ -2639,8 +2675,8 @@ static void addrconf_dad_start(struct inet6_ifaddr *ifp, u32 flags) ip6_ins_rt(ifp->rt, NULL, NULL, NULL); addrconf_dad_kick(ifp); - spin_unlock_bh(&ifp->lock); out: + spin_unlock_bh(&ifp->lock); read_unlock_bh(&idev->lock); } @@ -2651,6 +2687,9 @@ static void addrconf_dad_timer(unsigned long data) struct in6_addr unspec; struct in6_addr mcaddr; + if (!ifp->probes && addrconf_dad_end(ifp)) + goto out; + read_lock_bh(&idev->lock); if (idev->dead) { read_unlock_bh(&idev->lock); @@ -2658,6 +2697,12 @@ static void addrconf_dad_timer(unsigned long data) } spin_lock_bh(&ifp->lock); + if (ifp->dead == INET6_IFADDR_STATE_DEAD) { + spin_unlock_bh(&ifp->lock); + read_unlock_bh(&idev->lock); + goto out; + } + if (ifp->probes == 0) { /* * DAD was successful @@ -2729,12 +2774,10 @@ static void addrconf_dad_run(struct inet6_dev *idev) { read_lock_bh(&idev->lock); for (ifp = idev->addr_list; ifp; ifp = ifp->if_next) { spin_lock_bh(&ifp->lock); - if (!(ifp->flags & IFA_F_TENTATIVE)) { - spin_unlock_bh(&ifp->lock); - continue; - } + if (ifp->flags & IFA_F_TENTATIVE && + ifp->dead == INET6_IFADDR_STATE_DAD) + addrconf_dad_kick(ifp); spin_unlock_bh(&ifp->lock); - addrconf_dad_kick(ifp); } read_unlock_bh(&idev->lock); }