Sophie

Sophie

distrib > Scientific%20Linux > 5x > x86_64 > by-pkgid > 27922b4260f65d317aabda37e42bbbff > files > 2775

kernel-2.6.18-238.el5.src.rpm

From: Neil Horman <nhorman@redhat.com>
Date: Fri, 12 Sep 2008 09:53:21 -0400
Subject: [net] ipv6: extra sysctls for additional TAHI tests
Message-id: 20080912135321.GE29710@hmsendeavour.rdu.redhat.com
O-Subject: [RHEL 5.3 PATCH]: ipv6: add extra ipv6 sysctls to support additional TAHI tests (bz 458270)
Bugzilla: 458270
RH-Acked-by: Thomas Graf <tgraf@redhat.com>

Hey all-
	This is a patch to add the accept_dad and disable_ipv6 per-interface
sysctls to the RHEL5 kernel.  We need them to pass a few TAHI ipv6 certification
tests.  Unfortunately, since these sysctls live in the ipv6_devconf structure,
we can't directly backport them from upstream, since thats a big ABI breaker.
So I've had to shuffle to stuff off into an extension on the net_device
structure like with did in RHEL4 for netpoll.  I'm not a big fan of doing that,
but theres no other way without really messing up the user space interface that
I can see.  Plus we have a few other potential patches I think that may want to make
use of an extended net_device struct.  Tested by our TAHI test group
extensively, satisfies bz 458270.  This is flagged as an exception so reviews
ASAP appreciated.

Regards Neil

diff --git a/include/linux/if.h b/include/linux/if.h
index 92faf44..554e1a8 100644
--- a/include/linux/if.h
+++ b/include/linux/if.h
@@ -61,6 +61,7 @@
 #define IFF_MASTER_ALB	0x10		/* bonding master, balance-alb.	*/
 #define IFF_BONDING	0x20		/* bonding master or slave	*/
 #define IFF_SLAVE_NEEDARP 0x40		/* need ARPs for validation	*/
+#define IFF_EXTENDED	0x80		/* Extended data available	*/
 
 #define IF_GET_IFACE	0x0001		/* for querying only */
 #define IF_GET_PROTO	0x0002
diff --git a/include/linux/ipv6.h b/include/linux/ipv6.h
index 254c6a7..073ad8e 100644
--- a/include/linux/ipv6.h
+++ b/include/linux/ipv6.h
@@ -182,6 +182,8 @@ enum {
 	DEVCONF_ACCEPT_RA_RTR_PREF,
 	DEVCONF_RTR_PROBE_INTERVAL,
 	DEVCONF_ACCEPT_RA_RT_INFO_MAX_PLEN,
+	DEVCONF_DISABLE_IPV6,
+	DEVCONF_ACCEPT_DAD,
 	DEVCONF_MAX
 };
 
diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index d8e82c9..8d93f17 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -535,6 +535,23 @@ struct net_device
 	struct class_device	class_dev;
 	/* space for optional statistics and wireless sysfs groups */
 	struct attribute_group  *sysfs_groups[3];
+#ifndef __GENKSYMS__
+	unsigned short priv_len;
+#endif
+};
+
+/*
+ * struct net_device can't be modified without breaking ABI, so we 
+ * add net_device_extended to the end in alloc_netdev.  Anything that 
+ * needs to be added to a net_device can be appended here
+ */
+struct ipv6_devconf_extensions {
+	s32 disable_ipv6;
+	s32 accept_dad;
+};
+
+struct net_device_extended {
+	struct ipv6_devconf_extensions ipv6_devconf_ext;
 };
 
 #define	NETDEV_ALIGN		32
@@ -547,6 +564,14 @@ static inline void *netdev_priv(struct net_device *dev)
 				& ~NETDEV_ALIGN_CONST);
 }
 
+static inline struct net_device_extended *dev_extended(struct net_device *dev)
+{
+	if (!(dev->priv_flags & IFF_EXTENDED))
+		return NULL;
+	return (struct net_device_extended *) ((char *) netdev_priv(dev) +
+		((dev->priv_len + NETDEV_ALIGN_CONST) & ~NETDEV_ALIGN_CONST));
+}
+
 #define SET_MODULE_OWNER(dev) do { } while (0)
 /* Set the sysfs physical device reference for the network logical device
  * if set prior to registration will cause a symlink during initialization.
diff --git a/include/linux/sysctl.h b/include/linux/sysctl.h
index 24d477b..d4b00d4 100644
--- a/include/linux/sysctl.h
+++ b/include/linux/sysctl.h
@@ -585,6 +585,9 @@ enum {
 	NET_IPV6_ACCEPT_RA_RTR_PREF=20,
 	NET_IPV6_RTR_PROBE_INTERVAL=21,
 	NET_IPV6_ACCEPT_RA_RT_INFO_MAX_PLEN=22,
+#define NET_IPV6_SYSCTL_EXTENDED_BOUNDARY NET_IPV6_ACCEPT_RA_RT_INFO_MAX_PLEN
+	NET_IPV6_DISABLE=23,
+	NET_IPV6_ACCEPT_DAD=24,
 	__NET_IPV6_MAX
 };
 
diff --git a/net/core/dev.c b/net/core/dev.c
index 7cb219c..a63500a 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -3257,7 +3257,8 @@ struct net_device *alloc_netdev(int sizeof_priv, const char *name,
 
 	/* ensure 32-byte alignment of both the device and private area */
 	alloc_size = (sizeof(*dev) + NETDEV_ALIGN_CONST) & ~NETDEV_ALIGN_CONST;
-	alloc_size += sizeof_priv + NETDEV_ALIGN_CONST;
+	alloc_size += (sizeof_priv + NETDEV_ALIGN_CONST) & ~NETDEV_ALIGN_CONST;
+	alloc_size += sizeof(struct net_device_extended) + NETDEV_ALIGN_CONST;
 
 	p = kzalloc(alloc_size, GFP_KERNEL);
 	if (!p) {
@@ -3274,6 +3275,8 @@ struct net_device *alloc_netdev(int sizeof_priv, const char *name,
 
 	setup(dev);
 	strcpy(dev->name, name);
+	dev->priv_flags |= IFF_EXTENDED;
+	dev->priv_len = sizeof_priv;
 	return dev;
 }
 EXPORT_SYMBOL(alloc_netdev);
diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
index 2db3e94..3744762 100644
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c
@@ -131,6 +131,7 @@ static void addrconf_leave_anycast(struct inet6_ifaddr *ifp);
 
 static int addrconf_ifdown(struct net_device *dev, int how);
 
+static int ipv6_generate_eui64(u8 *eui, struct net_device *dev);
 static void addrconf_dad_start(struct inet6_ifaddr *ifp, u32 flags);
 static void addrconf_dad_timer(unsigned long data);
 static void addrconf_dad_completed(struct inet6_ifaddr *ifp);
@@ -176,6 +177,11 @@ struct ipv6_devconf ipv6_devconf = {
 #endif
 };
 
+struct ipv6_devconf_extensions ipv6_devconf_extensions = {
+	.disable_ipv6 = 0,
+	.accept_dad = 1,
+};
+
 static struct ipv6_devconf ipv6_devconf_dflt = {
 	.forwarding		= 0,
 	.hop_limit		= IPV6_DEFAULT_HOPLIMIT,
@@ -206,6 +212,11 @@ static struct ipv6_devconf ipv6_devconf_dflt = {
 #endif
 };
 
+static struct ipv6_devconf_extensions ipv6_devconf_extensions_dflt = {
+	.disable_ipv6		= 0,
+	.accept_dad		= 1,
+};
+
 int sysctl_ip6_odad = 0;
 
 /* IPv6 Wildcard Address and Loopback Address defined by RFC2553 */
@@ -345,6 +356,7 @@ static struct inet6_dev * ipv6_add_dev(struct net_device *dev)
 {
 	struct inet6_dev *ndev;
 	struct in6_addr maddr;
+	struct net_device_extended *ext = dev_extended(dev);
 
 	ASSERT_RTNL();
 
@@ -359,6 +371,10 @@ static struct inet6_dev * ipv6_add_dev(struct net_device *dev)
 	rwlock_init(&ndev->lock);
 	ndev->dev = dev;
 	memcpy(&ndev->cnf, &ipv6_devconf_dflt, sizeof(ndev->cnf));
+	if (ext != NULL) {
+		memcpy(&ext->ipv6_devconf_ext, &ipv6_devconf_extensions_dflt,
+			sizeof(ext->ipv6_devconf_ext));
+	}
 	ndev->cnf.mtu6 = dev->mtu;
 	ndev->cnf.sysctl = NULL;
 	ndev->nd_parms = neigh_parms_alloc(dev, &nd_tbl);
@@ -394,6 +410,9 @@ static struct inet6_dev * ipv6_add_dev(struct net_device *dev)
 	 */
 	in6_dev_hold(ndev);
 
+	if (ext && (dev->flags & (IFF_NOARP | IFF_LOOPBACK)))
+		ext->ipv6_devconf_ext.accept_dad = -1;
+
 #ifdef CONFIG_IPV6_PRIVACY
 	init_timer(&ndev->regen_timer);
 	ndev->regen_timer.function = ipv6_regen_rndid;
@@ -1313,6 +1332,25 @@ static void addrconf_dad_stop(struct inet6_ifaddr *ifp)
 
 void addrconf_dad_failure(struct inet6_ifaddr *ifp)
 {
+	struct inet6_dev *idev = ifp->idev;
+	struct net_device_extended *ext;
+
+	ext = (idev->dev == NULL) ? NULL : dev_extended(idev->dev);
+
+	if (ext && ext->ipv6_devconf_ext.accept_dad > 1 && 
+	    !ext->ipv6_devconf_ext.disable_ipv6) {
+		struct in6_addr addr;
+
+		addr.s6_addr32[0] = htonl(0xfe800000);
+		addr.s6_addr32[1] = 0;
+
+		if (!ipv6_generate_eui64(addr.s6_addr + 8, idev->dev) &&
+		    ipv6_addr_equal(&ifp->addr, &addr)) {
+			/* DAD failed for link-local based on MAC address */
+			ext->ipv6_devconf_ext.disable_ipv6 = 1;
+		}
+	}
+
 	if (net_ratelimit())
 		printk(KERN_INFO "%s: duplicate address detected!\n", ifp->idev->dev->name);
 	addrconf_dad_stop(ifp);
@@ -2597,12 +2635,21 @@ static void addrconf_dad_timer(unsigned long data)
 	struct inet6_dev *idev = ifp->idev;
 	struct in6_addr unspec;
 	struct in6_addr mcaddr;
+	struct net_device_extended *ext = (idev == NULL) ? NULL : dev_extended(idev->dev);
 
 	read_lock_bh(&idev->lock);
 	if (idev->dead) {
 		read_unlock_bh(&idev->lock);
 		goto out;
 	}
+
+	if (ext && ext->ipv6_devconf_ext.accept_dad > 1 &&
+	    ext->ipv6_devconf_ext.disable_ipv6) {
+		read_unlock_bh(&idev->lock);
+		addrconf_dad_failure(ifp);
+		return;
+	}
+
 	spin_lock_bh(&ifp->lock);
 	if (ifp->probes == 0) {
 		/*
@@ -3348,7 +3395,7 @@ static void inet6_ifa_notify(int event, struct inet6_ifaddr *ifa)
 	netlink_broadcast(rtnl, skb, 0, RTNLGRP_IPV6_IFADDR, GFP_ATOMIC);
 }
 
-static void inline ipv6_store_devconf(struct ipv6_devconf *cnf,
+static void inline ipv6_store_devconf(struct ipv6_devconf *cnf, struct ipv6_devconf_extensions *xcnf,
 				__s32 *array, int bytes)
 {
 	memset(array, 0, bytes);
@@ -3380,6 +3427,10 @@ static void inline ipv6_store_devconf(struct ipv6_devconf *cnf,
 	array[DEVCONF_ACCEPT_RA_RT_INFO_MAX_PLEN] = cnf->accept_ra_rt_info_max_plen;
 #endif
 #endif
+	if (xcnf) {
+		array[DEVCONF_DISABLE_IPV6] = xcnf->disable_ipv6;
+		array[DEVCONF_ACCEPT_DAD] = xcnf->accept_dad;
+	}
 }
 
 /* Maximum length of ifinfomsg attributes */
@@ -3404,6 +3455,8 @@ static int inet6_fill_ifinfo(struct sk_buff *skb, struct inet6_dev *idev,
 	struct rtattr		*subattr;
 	__u32			mtu = dev->mtu;
 	struct ifla_cacheinfo	ci;
+	struct net_device_extended *ext = (dev == NULL) ? NULL : dev_extended(dev);
+	struct ipv6_devconf_extensions *xcnf = (ext == NULL) ? NULL : &ext->ipv6_devconf_ext;
 
 	nlh = NLMSG_NEW(skb, pid, seq, event, sizeof(*r), flags);
 	r = NLMSG_DATA(nlh);
@@ -3441,7 +3494,7 @@ static int inet6_fill_ifinfo(struct sk_buff *skb, struct inet6_dev *idev,
 	/* return the device sysctl params */
 	if ((array = kmalloc(DEVCONF_MAX * sizeof(*array), GFP_ATOMIC)) == NULL)
 		goto rtattr_failure;
-	ipv6_store_devconf(&idev->cnf, array, DEVCONF_MAX * sizeof(*array));
+	ipv6_store_devconf(&idev->cnf, xcnf, array, DEVCONF_MAX * sizeof(*array));
 	RTA_PUT(skb, IFLA_INET6_CONF, DEVCONF_MAX * sizeof(*array), array);
 
 	/* XXX - Statistics/MC not implemented */
@@ -3813,6 +3866,22 @@ static struct addrconf_sysctl_table
 			.mode		=	0644,
          		.proc_handler	=	&proc_dointvec,
 		},
+		{
+			.ctl_name	=	NET_IPV6_DISABLE,
+			.procname	=	"disable_ipv6",
+			.data		=	&ipv6_devconf_extensions.disable_ipv6,
+			.maxlen		=	sizeof(int),
+			.mode		=	0644,
+			.proc_handler	=	&proc_dointvec,
+		},
+		{
+			.ctl_name	=	NET_IPV6_ACCEPT_DAD,
+			.procname	=	"accept_dad",
+			.data		=	&ipv6_devconf_extensions.accept_dad,
+			.maxlen		=	sizeof(int),
+			.mode		=	0644,
+			.proc_handler	=	&proc_dointvec,
+		},
 #ifdef CONFIG_IPV6_PRIVACY
 		{
 			.ctl_name	=	NET_IPV6_USE_TEMPADDR,
@@ -3963,6 +4032,8 @@ static void addrconf_sysctl_register(struct inet6_dev *idev, struct ipv6_devconf
 	int i;
 	struct net_device *dev = idev ? idev->dev : NULL;
 	struct addrconf_sysctl_table *t;
+	struct net_device_extended *ext;
+	struct ipv6_devconf_extensions *dext;
 	char *dev_name = NULL;
 
 	t = kmalloc(sizeof(*t), GFP_KERNEL);
@@ -3970,9 +4041,44 @@ static void addrconf_sysctl_register(struct inet6_dev *idev, struct ipv6_devconf
 		return;
 	memcpy(t, &addrconf_sysctl, sizeof(*t));
 	for (i=0; t->addrconf_vars[i].data; i++) {
-		t->addrconf_vars[i].data += (char*)p - (char*)&ipv6_devconf;
-		t->addrconf_vars[i].de = NULL;
-		t->addrconf_vars[i].extra1 = idev; /* embedded; no ref */
+		if (t->addrconf_vars[i].ctl_name <= NET_IPV6_SYSCTL_EXTENDED_BOUNDARY) {
+			t->addrconf_vars[i].data += (char*)p - (char*)&ipv6_devconf;
+			t->addrconf_vars[i].de = NULL;
+			t->addrconf_vars[i].extra1 = idev; /* embedded; no ref */
+		} else {
+			ext = (dev == NULL) ? NULL : dev_extended(dev);
+			if (ext == NULL) {
+				dext = &ipv6_devconf_extensions_dflt;
+				if (p != &ipv6_devconf_dflt)
+					/*
+					 * We're registering an interface here that 
+					 * doesn't have an extended segment, so we
+					 * register the extra sysctls, but we give them
+					 * no permissions so that we can't write to them
+					 */
+					t->addrconf_vars[i].mode = 0000;
+			} else
+				dext = &ext->ipv6_devconf_ext;
+
+			if (dext == NULL)
+				continue;
+			/*
+			 * Common settings for the extensions
+			 */
+			t->addrconf_vars[i].de = NULL;
+			t->addrconf_vars[i].extra1 = idev;
+			switch (t->addrconf_vars[i].ctl_name) {
+			case NET_IPV6_DISABLE:
+				t->addrconf_vars[i].data = &dext->disable_ipv6;
+				break;
+			case NET_IPV6_ACCEPT_DAD:
+				t->addrconf_vars[i].data = &dext->accept_dad;
+				break;
+			default:
+				BUG();
+			}
+		}
+
 	}
 	if (dev) {
 		dev_name = dev->name; 
diff --git a/net/ipv6/ip6_input.c b/net/ipv6/ip6_input.c
index 48bf675..7c02a9c 100644
--- a/net/ipv6/ip6_input.c
+++ b/net/ipv6/ip6_input.c
@@ -61,6 +61,7 @@ int ipv6_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt
 	struct ipv6hdr *hdr;
 	u32 		pkt_len;
 	struct inet6_dev *idev;
+	struct net_device_extended *ext = dev_extended(dev);
 
 	if (skb->pkt_type == PACKET_OTHERHOST) {
 		kfree_skb(skb);
@@ -73,7 +74,8 @@ int ipv6_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt
 
 	IP6_INC_STATS_BH(idev, IPSTATS_MIB_INRECEIVES);
 
-	if ((skb = skb_share_check(skb, GFP_ATOMIC)) == NULL) {
+	if ((skb = skb_share_check(skb, GFP_ATOMIC)) == NULL ||
+	    (ext && unlikely(ext->ipv6_devconf_ext.disable_ipv6))) {
 		IP6_INC_STATS_BH(idev, IPSTATS_MIB_INDISCARDS);
 		rcu_read_unlock();
 		goto out;
diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c
index 920625f..94cf058 100644
--- a/net/ipv6/ip6_output.c
+++ b/net/ipv6/ip6_output.c
@@ -149,6 +149,15 @@ static int ip6_output2(struct sk_buff *skb)
 
 int ip6_output(struct sk_buff *skb)
 {
+	struct inet6_dev *idev = ip6_dst_idev(skb->dst);
+	struct net_device_extended *ext = dev_extended(idev->dev);
+
+	if (unlikely(ext && ext->ipv6_devconf_ext.disable_ipv6)) {
+		IP6_INC_STATS(idev, IPSTATS_MIB_OUTDISCARDS);
+		kfree_skb(skb);
+		return 0;
+	}
+
 	if ((skb->len > dst_mtu(skb->dst) && !skb_is_gso(skb)) ||
 				dst_allfrag(skb->dst))
 		return ip6_fragment(skb, ip6_output2);