Sophie: kernel-2.6.18-238.el5 src

kernel-2.6.18-238.el5.src.rpm

From: Ivan Vecera <ivecera@redhat.com>
Date: Tue, 27 Jan 2009 09:50:09 +0100
Subject: [net] ipt_REJECT: properly handle IP options
Message-id: 1233046209-10378-1-git-send-email-ivecera@redhat.com
O-Subject: [RHEL5.4 PATCH] [net] ipt_REJECT: properly handle IP options
Bugzilla: 473504
RH-Acked-by: Neil Horman <nhorman@redhat.com>
RH-Acked-by: David Miller <davem@redhat.com>

BZs:
#473504 - kernel panic

Description:
When REJECT target are sending reset packet the crash can
occur later in tcp_tso_segment. The problem is the TCP
reset packet is copied from the original and so the GSO
bits are copied also. They could be cleared but as Neil
mentioned the better solution is the creating the reset
packet from scratch - this is implemented b upstream commit
9ba99b0d3f45d0aedeafce1cfa4f720b19d04477.

Testing status:
Tested successfully

Upstream status:
http://git.kernel.org/?p=linux/kernel/git/torvalds/linux-2.6.git;a=commitdiff;h=9ba99b0d3f45d0aedeafce1cfa4f720b19d04477

Signed-off-by: Ivan Vecera <ivecera@redhat.com>

diff --git a/net/ipv4/netfilter/ipt_REJECT.c b/net/ipv4/netfilter/ipt_REJECT.c
index 7f905bf..3b87f2f 100644
--- a/net/ipv4/netfilter/ipt_REJECT.c
+++ b/net/ipv4/netfilter/ipt_REJECT.c
@@ -22,6 +22,7 @@
 #include <net/tcp.h>
 #include <net/route.h>
 #include <net/dst.h>
+#include <net/xfrm.h>
 #include <linux/netfilter_ipv4/ip_tables.h>
 #include <linux/netfilter_ipv4/ipt_REJECT.h>
 #ifdef CONFIG_BRIDGE_NETFILTER
@@ -38,82 +39,84 @@ MODULE_DESCRIPTION("iptables REJECT target module");
 #define DEBUGP(format, args...)
 #endif
 
-static inline struct rtable *route_reverse(struct sk_buff *skb, 
-					   struct tcphdr *tcph, int hook)
+/* Backported 'ip_route_me_harder' that allows to use explicit address type */
+static int ip_route_me_harder_compat(struct sk_buff *skb, unsigned addr_type)
 {
-	struct iphdr *iph = skb->nh.iph;
-	struct dst_entry *odst;
-	struct flowi fl = {};
+	const struct iphdr *iph = ip_hdr(skb);
 	struct rtable *rt;
+	struct flowi fl = {};
+	struct dst_entry *odst;
+	unsigned int hh_len;
 
-	/* We don't require ip forwarding to be enabled to be able to
-	 * send a RST reply for bridged traffic. */
-	if (hook != NF_IP_FORWARD
-#ifdef CONFIG_BRIDGE_NETFILTER
-	    || (skb->nf_bridge && skb->nf_bridge->mask & BRNF_BRIDGED)
-#endif
-	   ) {
-		fl.nl_u.ip4_u.daddr = iph->saddr;
-		if (hook == NF_IP_LOCAL_IN)
-			fl.nl_u.ip4_u.saddr = iph->daddr;
-		fl.nl_u.ip4_u.tos = RT_TOS(iph->tos);
+	if (addr_type == RTN_UNSPEC)
+		addr_type = inet_addr_type(iph->saddr);
 
+	/* some non-standard hacks like ipt_REJECT.c:send_reset() can cause
+	 * packets with foreign saddr to appear on the NF_INET_LOCAL_OUT hook.
+	 */
+	if (addr_type == RTN_LOCAL) {
+		fl.nl_u.ip4_u.daddr = iph->daddr;
+		fl.nl_u.ip4_u.saddr = iph->saddr;
+		fl.nl_u.ip4_u.tos = RT_TOS(iph->tos);
+		fl.oif = skb->sk ? skb->sk->sk_bound_dev_if : 0;
+#ifdef CONFIG_IP_ROUTE_FWMARK
+		fl.nl_u.ip4_u.fwmark = skb->nfmark;
+#endif
 		if (ip_route_output_key(&rt, &fl) != 0)
-			return NULL;
+			return -1;
+
+		/* Drop old route. */
+		dst_release(skb->dst);
+		skb->dst = &rt->u.dst;
 	} else {
 		/* non-local src, find valid iif to satisfy
 		 * rp-filter when calling ip_route_input. */
-		fl.nl_u.ip4_u.daddr = iph->daddr;
+		fl.nl_u.ip4_u.daddr = iph->saddr;
 		if (ip_route_output_key(&rt, &fl) != 0)
-			return NULL;
+			return -1;
 
 		odst = skb->dst;
-		if (ip_route_input(skb, iph->saddr, iph->daddr,
-		                   RT_TOS(iph->tos), rt->u.dst.dev) != 0) {
+		if (ip_route_input(skb, iph->daddr, iph->saddr,
+				   RT_TOS(iph->tos), rt->u.dst.dev) != 0) {
 			dst_release(&rt->u.dst);
-			return NULL;
+			return -1;
 		}
 		dst_release(&rt->u.dst);
-		rt = (struct rtable *)skb->dst;
-		skb->dst = odst;
-
-		fl.nl_u.ip4_u.daddr = iph->saddr;
-		fl.nl_u.ip4_u.saddr = iph->daddr;
-		fl.nl_u.ip4_u.tos = RT_TOS(iph->tos);
+		dst_release(odst);
 	}
 
-	if (rt->u.dst.error) {
-		dst_release(&rt->u.dst);
-		return NULL;
-	}
+	if (skb->dst->error)
+		return -1;
 
-	fl.proto = IPPROTO_TCP;
-	fl.fl_ip_sport = tcph->dest;
-	fl.fl_ip_dport = tcph->source;
-	security_skb_classify_flow(skb, &fl);
+#ifdef CONFIG_XFRM
+	if (!(IPCB(skb)->flags & IPSKB_XFRM_TRANSFORMED) &&
+	    xfrm_decode_session(skb, &fl, AF_INET) == 0)
+		if (xfrm_lookup(&skb->dst, &fl, skb->sk, 0))
+			return -1;
+#endif
 
-	xfrm_lookup((struct dst_entry **)&rt, &fl, NULL, 0);
+	/* Change in oif may mean change in hh_len. */
+	hh_len = skb->dst->dev->hard_header_len;
+	if (skb_headroom(skb) < hh_len &&
+	    pskb_expand_head(skb, hh_len - skb_headroom(skb), 0, GFP_ATOMIC))
+		return -1;
 
-	return rt;
+	return 0;
 }
 
 /* Send RST reply */
 static void send_reset(struct sk_buff *oldskb, int hook)
 {
 	struct sk_buff *nskb;
-	struct iphdr *iph = oldskb->nh.iph;
+	struct iphdr *oiph, *niph;
 	struct tcphdr _otcph, *oth, *tcph;
-	struct rtable *rt;
-	u_int16_t tmp_port;
-	u_int32_t tmp_addr;
-	int needs_ack;
-	int hh_len;
+	unsigned int addr_type;
 
 	/* IP header checks: fragment. */
-	if (oldskb->nh.iph->frag_off & htons(IP_OFFSET))
+	if (ip_hdr(oldskb)->frag_off & htons(IP_OFFSET))
 		return;
 
-	oth = skb_header_pointer(oldskb, oldskb->nh.iph->ihl * 4,
+	oth = skb_header_pointer(oldskb, ip_hdrlen(oldskb),
 				 sizeof(_otcph), &_otcph);
 	if (oth == NULL)
  		return;
@@ -123,85 +126,68 @@ static void send_reset(struct sk_buff *oldskb, int hook)
 		return;
 
 	/* Check checksum */
-	if (nf_ip_checksum(oldskb, hook, iph->ihl * 4, IPPROTO_TCP))
+	if (nf_ip_checksum(oldskb, hook, ip_hdrlen(oldskb), IPPROTO_TCP))
 		return;
+	oiph = ip_hdr(oldskb);
 
-	if ((rt = route_reverse(oldskb, oth, hook)) == NULL)
+	nskb = alloc_skb(sizeof(struct iphdr) + sizeof(struct tcphdr) +
+			 LL_MAX_HEADER, GFP_ATOMIC);
+	if (!nskb)
 		return;
 
-	hh_len = LL_RESERVED_SPACE(rt->u.dst.dev);
-
-	/* We need a linear, writeable skb.  We also need to expand
-	   headroom in case hh_len of incoming interface < hh_len of
-	   outgoing interface */
-	nskb = skb_copy_expand(oldskb, hh_len, skb_tailroom(oldskb),
-			       GFP_ATOMIC);
-	if (!nskb) {
-		dst_release(&rt->u.dst);
-		return;
-	}
-
-	dst_release(nskb->dst);
-	nskb->dst = &rt->u.dst;
-
 	/* This packet will not be the same as the other: clear nf fields */
-	nf_reset(nskb);
-	nskb->nfmark = 0;
-	skb_init_secmark(nskb);
-
-	tcph = (struct tcphdr *)((u_int32_t*)nskb->nh.iph + nskb->nh.iph->ihl);
-
-	/* Swap source and dest */
-	tmp_addr = nskb->nh.iph->saddr;
-	nskb->nh.iph->saddr = nskb->nh.iph->daddr;
-	nskb->nh.iph->daddr = tmp_addr;
-	tmp_port = tcph->source;
-	tcph->source = tcph->dest;
-	tcph->dest = tmp_port;
-
-	/* Truncate to length (no data) */
-	tcph->doff = sizeof(struct tcphdr)/4;
-	skb_trim(nskb, nskb->nh.iph->ihl*4 + sizeof(struct tcphdr));
-	nskb->nh.iph->tot_len = htons(nskb->len);
-
-	if (tcph->ack) {
-		needs_ack = 0;
+	skb_reserve(nskb, LL_MAX_HEADER);
+
+	skb_reset_network_header(nskb);
+	niph = (struct iphdr *)skb_put(nskb, sizeof(struct iphdr));
+	niph->version	= 4;
+	niph->ihl	= sizeof(struct iphdr) / 4;
+	niph->tos	= 0;
+	niph->id	= 0;
+	niph->frag_off	= htons(IP_DF);
+	niph->protocol	= IPPROTO_TCP;
+	niph->check	= 0;
+	niph->saddr	= oiph->daddr;
+	niph->daddr	= oiph->saddr;
+
+	tcph = (struct tcphdr *)skb_put(nskb, sizeof(struct tcphdr));
+	memset(tcph, 0, sizeof(*tcph));
+	tcph->source	= oth->dest;
+	tcph->dest	= oth->source;
+	tcph->doff	= sizeof(struct tcphdr) / 4;
+
+	if (oth->ack)
 		tcph->seq = oth->ack_seq;
-		tcph->ack_seq = 0;
-	} else {
-		needs_ack = 1;
-		tcph->ack_seq = htonl(ntohl(oth->seq) + oth->syn + oth->fin
-				      + oldskb->len - oldskb->nh.iph->ihl*4
-				      - (oth->doff<<2));
-		tcph->seq = 0;
+	else {
+		tcph->ack_seq = htonl(ntohl(oth->seq) + oth->syn + oth->fin +
+				      oldskb->len - ip_hdrlen(oldskb) -
+				      (oth->doff << 2));
+		tcph->ack = 1;
 	}
 
-	/* Reset flags */
-	((u_int8_t *)tcph)[13] = 0;
-	tcph->rst = 1;
-	tcph->ack = needs_ack;
-
-	tcph->window = 0;
-	tcph->urg_ptr = 0;
-
-	/* Adjust TCP checksum */
-	tcph->check = 0;
-	tcph->check = tcp_v4_check(tcph, sizeof(struct tcphdr),
-				   nskb->nh.iph->saddr,
-				   nskb->nh.iph->daddr,
-				   csum_partial((char *)tcph,
-						sizeof(struct tcphdr), 0));
-
-	/* Adjust IP TTL, DF */
-	nskb->nh.iph->ttl = dst_metric(nskb->dst, RTAX_HOPLIMIT);
-	/* Set DF, id = 0 */
-	nskb->nh.iph->frag_off = htons(IP_DF);
-	nskb->nh.iph->id = 0;
-
-	/* Adjust IP checksum */
-	nskb->nh.iph->check = 0;
-	nskb->nh.iph->check = ip_fast_csum((unsigned char *)nskb->nh.iph, 
-					   nskb->nh.iph->ihl);
+	tcph->rst	= 1;
+	tcph->check	= tcp_v4_check(tcph, sizeof(struct tcphdr),
+				       niph->saddr, niph->daddr,
+				       csum_partial((char *)tcph,
+						    sizeof(struct tcphdr), 0));
+
+	addr_type = RTN_UNSPEC;
+	if (hook != NF_IP_FORWARD
+#ifdef CONFIG_BRIDGE_NETFILTER
+	    || (nskb->nf_bridge && nskb->nf_bridge->mask & BRNF_BRIDGED)
+#endif
+	   )
+		addr_type = RTN_LOCAL;
+
+	/* ip_route_me_harder expects skb->dst to be set */
+	dst_hold(oldskb->dst);
+	nskb->dst = oldskb->dst;
+
+	if (ip_route_me_harder_compat(nskb, addr_type))
+		goto free_nskb;
+
+	niph->ttl	= dst_metric(nskb->dst, RTAX_HOPLIMIT);
+	nskb->ip_summed	= CHECKSUM_NONE;
 
 	/* "Never happens" */
 	if (nskb->len > dst_mtu(nskb->dst))
@@ -209,6 +195,8 @@ static void send_reset(struct sk_buff *oldskb, int hook)
 
 	nf_ct_attach(nskb, oldskb);
 
+	niph->tot_len = htons(nskb->len);
+	ip_send_check(niph);
 	NF_HOOK(PF_INET, NF_IP_LOCAL_OUT, nskb, NULL, nskb->dst->dev,
 		dst_output);
 	return;
@@ -234,7 +222,7 @@ static unsigned int reject(struct sk_buff **pskb,
 
 	/* Our naive response construction doesn't deal with IP
            options, and probably shouldn't try. */
-	if ((*pskb)->nh.iph->ihl<<2 != sizeof(struct iphdr))
+	if (ip_hdrlen(*pskb) != sizeof(struct iphdr))
 		return NF_DROP;
 
 	/* WARNING: This code causes reentry within iptables.