From: Jiri Pirko <jpirko@redhat.com> Date: Wed, 18 Feb 2009 18:54:02 +0100 Subject: [net] netfilter: x_tables: add connlimit match Message-id: 20090218175401.GC16181@psychotron.englab.brq.redhat.com O-Subject: [RHEL5.5 patch] BZ483588 netfilter: x_tables: add connlimit match Bugzilla: 483588 RH-Acked-by: Thomas Graf <tgraf@redhat.com> BZ483588 https://bugzilla.redhat.com/show_bug.cgi?id=483588 Description: Iptables package shipped with RHEL 5.3 has already support for connlimit match. So I backported connlimit match to kernel too. I needed to adjust the code to use old conntrack code and because of it I must cut off ipv6 support (anyway, ipv6 connlimit is not supported by our iptables). I also adjusted xt_connlimit_info to match that one used in iptables. Note this patch changes default config file. Upstream: http://git.kernel.org/?p=linux/kernel/git/torvalds/linux-2.6.git;a=commitdiff;h=370786f9cfd430cb424f00ce4110e75bb1b95a19 and relevant parts of following: http://git.kernel.org/?p=linux/kernel/git/torvalds/linux-2.6.git;a=commitdiff;h=8b6f3f62fea7b85fce8f7d12aabba7b191bf60d2 http://git.kernel.org/?p=linux/kernel/git/torvalds/linux-2.6.git;a=commitdiff;h=ba5dc2756cc305c055dbb253b8fcdc459f0f8e73 http://git.kernel.org/?p=linux/kernel/git/torvalds/linux-2.6.git;a=commitdiff;h=d3c5ee6d545b5372fd525ebe16988a5b6efeceb0 http://git.kernel.org/?p=linux/kernel/git/torvalds/linux-2.6.git;a=commitdiff;h=d2ee3f2c4b1db1320c1efb4dcaceeaf6c7e6c2d3 http://git.kernel.org/?p=linux/kernel/git/torvalds/linux-2.6.git;a=commitdiff;h=e2a3123fbe58da9fd3f35cd242087896ace6049f Brew: https://brewweb.devel.redhat.com/taskinfo?taskID=1697463 Test: Booted on x86_64 and tested with self made test applications (one of them is attached in to bz). Works fine. Jirka diff --git a/include/linux/netfilter/xt_connlimit.h b/include/linux/netfilter/xt_connlimit.h new file mode 100644 index 0000000..f40ea17 --- /dev/null +++ b/include/linux/netfilter/xt_connlimit.h @@ -0,0 +1,14 @@ +#ifndef _XT_CONNLIMIT_H +#define _XT_CONNLIMIT_H + +struct xt_connlimit_data; + +struct xt_connlimit_info { + unsigned int limit, inverse; + u_int32_t mask; + + /* Used internally by the kernel */ + struct xt_connlimit_data *data __attribute__((aligned(8))); +}; + +#endif /* _XT_CONNLIMIT_H */ diff --git a/include/linux/netfilter_ipv4/ip_conntrack_core.h b/include/linux/netfilter_ipv4/ip_conntrack_core.h index 907d4f5..ed0dd29 100644 --- a/include/linux/netfilter_ipv4/ip_conntrack_core.h +++ b/include/linux/netfilter_ipv4/ip_conntrack_core.h @@ -27,6 +27,11 @@ ip_ct_get_tuple(const struct iphdr *iph, const struct ip_conntrack_protocol *protocol); extern int +ip_ct_get_tuplepr(const struct sk_buff *skb, + unsigned int nhoff, + struct ip_conntrack_tuple *tuple); + +extern int ip_ct_invert_tuple(struct ip_conntrack_tuple *inverse, const struct ip_conntrack_tuple *orig, const struct ip_conntrack_protocol *protocol); diff --git a/net/ipv4/netfilter/ip_conntrack_core.c b/net/ipv4/netfilter/ip_conntrack_core.c index 7b3e374..caeb391 100644 --- a/net/ipv4/netfilter/ip_conntrack_core.c +++ b/net/ipv4/netfilter/ip_conntrack_core.c @@ -186,6 +186,28 @@ ip_ct_get_tuple(const struct iphdr *iph, } int +ip_ct_get_tuplepr(const struct sk_buff *skb, + unsigned int nhoff, + struct ip_conntrack_tuple *tuple) +{ + struct ip_conntrack_protocol *proto; + u_int8_t protonum; + unsigned int dataoff; + const struct iphdr *iph; + struct iphdr _iph; + + iph = skb_header_pointer(skb, nhoff, sizeof(_iph), &_iph); + if (iph == NULL) + return 0; + if (iph->frag_off & htons(IP_OFFSET)) + return 0; + dataoff = nhoff + (iph->ihl << 2); + protonum = iph->protocol; + proto = ip_conntrack_proto_find_get(protonum); + return ip_ct_get_tuple(iph, skb, dataoff, tuple, proto); +} + +int ip_ct_invert_tuple(struct ip_conntrack_tuple *inverse, const struct ip_conntrack_tuple *orig, const struct ip_conntrack_protocol *protocol) diff --git a/net/ipv4/netfilter/ip_conntrack_standalone.c b/net/ipv4/netfilter/ip_conntrack_standalone.c index 7a9fa04..f75f573 100644 --- a/net/ipv4/netfilter/ip_conntrack_standalone.c +++ b/net/ipv4/netfilter/ip_conntrack_standalone.c @@ -915,6 +915,7 @@ EXPORT_PER_CPU_SYMBOL_GPL(ip_conntrack_ecache); EXPORT_SYMBOL(ip_conntrack_protocol_register); EXPORT_SYMBOL(ip_conntrack_protocol_unregister); EXPORT_SYMBOL(ip_ct_get_tuple); +EXPORT_SYMBOL(ip_ct_get_tuplepr); EXPORT_SYMBOL(invert_tuplepr); EXPORT_SYMBOL(ip_conntrack_alter_reply); EXPORT_SYMBOL(ip_conntrack_destroyed); diff --git a/net/netfilter/Kconfig b/net/netfilter/Kconfig index a9894dd..71a532d 100644 --- a/net/netfilter/Kconfig +++ b/net/netfilter/Kconfig @@ -227,6 +227,13 @@ config NETFILTER_XT_MATCH_CONNBYTES If you want to compile it as a module, say M here and read <file:Documentation/modules.txt>. If unsure, say `N'. +config NETFILTER_XT_MATCH_CONNLIMIT + tristate '"connlimit" match support"' + depends on NETFILTER_XTABLES + ---help--- + This match allows you to match against the number of parallel + connections to a server per client IP address (or address block). + config NETFILTER_XT_MATCH_CONNMARK tristate '"connmark" connection mark match support' depends on NETFILTER_XTABLES diff --git a/net/netfilter/Makefile b/net/netfilter/Makefile index 6fa4b75..82054a4 100644 --- a/net/netfilter/Makefile +++ b/net/netfilter/Makefile @@ -34,6 +34,7 @@ obj-$(CONFIG_NETFILTER_XT_TARGET_CONNSECMARK) += xt_CONNSECMARK.o # matches obj-$(CONFIG_NETFILTER_XT_MATCH_COMMENT) += xt_comment.o obj-$(CONFIG_NETFILTER_XT_MATCH_CONNBYTES) += xt_connbytes.o +obj-$(CONFIG_NETFILTER_XT_MATCH_CONNLIMIT) += xt_connlimit.o obj-$(CONFIG_NETFILTER_XT_MATCH_CONNMARK) += xt_connmark.o obj-$(CONFIG_NETFILTER_XT_MATCH_CONNTRACK) += xt_conntrack.o obj-$(CONFIG_NETFILTER_XT_MATCH_DCCP) += xt_dccp.o diff --git a/net/netfilter/xt_connlimit.c b/net/netfilter/xt_connlimit.c new file mode 100644 index 0000000..839f6ce --- /dev/null +++ b/net/netfilter/xt_connlimit.c @@ -0,0 +1,268 @@ +/* + * netfilter module to limit the number of parallel tcp + * connections per IP address. + * (c) 2000 Gerd Knorr <kraxel@bytesex.org> + * Nov 2002: Martin Bene <martin.bene@icomedias.com>: + * only ignore TIME_WAIT or gone connections + * (C) CC Computer Consultants GmbH, 2007 + * Contact: <jengelh@computergmbh.de> + * + * based on ... + * + * Kernel module to match connection tracking information. + * GPL (C) 1999 Rusty Russell (rusty@rustcorp.com.au). + */ +#include <linux/in.h> +#include <linux/ip.h> +#include <linux/jhash.h> +#include <linux/list.h> +#include <linux/module.h> +#include <linux/random.h> +#include <linux/skbuff.h> +#include <linux/spinlock.h> +#include <linux/netfilter/nf_conntrack_tcp.h> +#include <linux/netfilter/x_tables.h> +#include <linux/netfilter/xt_connlimit.h> +#include <linux/netfilter_ipv4/ip_conntrack.h> +#include <linux/netfilter_ipv4/ip_conntrack_core.h> +#include <linux/netfilter_ipv4/ip_conntrack_tuple.h> + +/* we will save the tuples of all connections we care about */ +struct xt_connlimit_conn { + struct list_head list; + struct ip_conntrack_tuple tuple; +}; + +struct xt_connlimit_data { + struct list_head iphash[256]; + spinlock_t lock; +}; + +static u_int32_t connlimit_rnd; +static bool connlimit_rnd_inited; + +static inline unsigned int connlimit_iphash(__be32 addr) +{ + if (unlikely(!connlimit_rnd_inited)) { + get_random_bytes(&connlimit_rnd, sizeof(connlimit_rnd)); + connlimit_rnd_inited = true; + } + return jhash_1word((__force __u32)addr, connlimit_rnd) & 0xFF; +} + +static inline bool already_closed(const struct ip_conntrack *conn) +{ + u_int16_t proto = conn->tuplehash[0].tuple.dst.protonum; + + if (proto == IPPROTO_TCP) + return conn->proto.tcp.state == TCP_CONNTRACK_TIME_WAIT || + conn->proto.tcp.state == TCP_CONNTRACK_CLOSE; + else + return 0; +} + +static inline unsigned int +same_source_net(const __be32 addr, const __be32 mask, const __be32 addr2, + unsigned int family) +{ + return (addr & mask) == (addr2 & mask); +} + +static int count_them(struct xt_connlimit_data *data, + const struct ip_conntrack_tuple *tuple, const __be32 addr, + const __be32 mask, const struct xt_match *match) +{ + struct ip_conntrack_tuple_hash *found; + struct xt_connlimit_conn *conn; + struct xt_connlimit_conn *tmp; + struct ip_conntrack *found_ct; + struct list_head *hash; + bool addit = true; + int matches = 0; + + + hash = &data->iphash[connlimit_iphash(addr & mask)]; + + read_lock_bh(&ip_conntrack_lock); + + /* check the saved connections */ + list_for_each_entry_safe(conn, tmp, hash, list) { + found = __ip_conntrack_find(&conn->tuple, NULL); + found_ct = NULL; + + if (found != NULL) + found_ct = tuplehash_to_ctrack(found); + + if (found_ct != NULL && + ip_ct_tuple_equal(&conn->tuple, tuple) && + !already_closed(found_ct)) + /* + * Just to be sure we have it only once in the list. + * We should not see tuples twice unless someone hooks + * this into a table without "-p tcp --syn". + */ + addit = false; + + if (found == NULL) { + /* this one is gone */ + list_del(&conn->list); + kfree(conn); + continue; + } + + if (already_closed(found_ct)) { + /* + * we do not care about connections which are + * closed already -> ditch it + */ + list_del(&conn->list); + kfree(conn); + continue; + } + + if (same_source_net(addr, mask, conn->tuple.src.ip, + match->family)) + /* same source network -> be counted! */ + ++matches; + } + + read_unlock_bh(&ip_conntrack_lock); + + if (addit) { + /* save the new connection in our list */ + conn = kzalloc(sizeof(*conn), GFP_ATOMIC); + if (conn == NULL) + return -ENOMEM; + conn->tuple = *tuple; + list_add(&conn->list, hash); + ++matches; + } + + return matches; +} + +static int +connlimit_mt(const struct sk_buff *skb, const struct net_device *in, + const struct net_device *out, const struct xt_match *match, + const void *matchinfo, int offset, unsigned int protoff, + int *hotdrop) +{ + const struct xt_connlimit_info *info = matchinfo; + __be32 addr; + struct ip_conntrack_tuple tuple; + const struct ip_conntrack_tuple *tuple_ptr = &tuple; + enum ip_conntrack_info ctinfo; + const struct ip_conntrack *ct; + int connections; + struct iphdr *iph; + + ct = ip_conntrack_get(skb, &ctinfo); + if (ct != NULL) + tuple_ptr = &ct->tuplehash[0].tuple; + else if (!ip_ct_get_tuplepr(skb, skb_network_offset(skb), &tuple)) + goto hotdrop; + + iph = ip_hdr(skb); + addr = iph->saddr; + + spin_lock_bh(&info->data->lock); + connections = count_them(info->data, tuple_ptr, addr, + info->mask, match); + spin_unlock_bh(&info->data->lock); + + if (connections < 0) { + /* kmalloc failed, drop it entirely */ + *hotdrop = 1; + return 0; + } + + return (connections > info->limit) ^ info->inverse; + + hotdrop: + *hotdrop = 1; + return 0; +} + +static int +connlimit_mt_check(const char *tablename, const void *ip, + const struct xt_match *match, void *matchinfo, + unsigned int matchinfosize, unsigned int hook_mask) +{ + struct xt_connlimit_info *info = matchinfo; + unsigned int i; + +#if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE) + if (nf_ct_l3proto_try_module_get(match->family) < 0) { + printk(KERN_WARNING "cannot load conntrack support for " + "address family %u\n", match->family); + return 0; + } +#endif + + /* init private data */ + info->data = kmalloc(sizeof(struct xt_connlimit_data), GFP_KERNEL); + if (info->data == NULL) { +#if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE) + nf_ct_l3proto_module_put(match->family); +#endif + return 0; + } + + spin_lock_init(&info->data->lock); + for (i = 0; i < ARRAY_SIZE(info->data->iphash); ++i) + INIT_LIST_HEAD(&info->data->iphash[i]); + + return 1; +} + +static void +connlimit_mt_destroy(const struct xt_match *match, void *matchinfo, + unsigned int matchinfosize) +{ + struct xt_connlimit_info *info = matchinfo; + struct xt_connlimit_conn *conn; + struct xt_connlimit_conn *tmp; + struct list_head *hash = info->data->iphash; + unsigned int i; + +#if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE) + nf_ct_l3proto_module_put(match->family); +#endif + + for (i = 0; i < ARRAY_SIZE(info->data->iphash); ++i) { + list_for_each_entry_safe(conn, tmp, &hash[i], list) { + list_del(&conn->list); + kfree(conn); + } + } + + kfree(info->data); +} + +static struct xt_match connlimit_mt_reg __read_mostly = +{ + .name = "connlimit", + .family = AF_INET, + .checkentry = connlimit_mt_check, + .match = connlimit_mt, + .matchsize = sizeof(struct xt_connlimit_info), + .destroy = connlimit_mt_destroy, + .me = THIS_MODULE, +}; + +static int __init connlimit_mt_init(void) +{ + return xt_register_match(&connlimit_mt_reg); +} + +static void __exit connlimit_mt_exit(void) +{ + xt_unregister_match(&connlimit_mt_reg); +} + +module_init(connlimit_mt_init); +module_exit(connlimit_mt_exit); +MODULE_AUTHOR("Jan Engelhardt <jengelh@computergmbh.de>"); +MODULE_DESCRIPTION("netfilter xt_connlimit match module"); +MODULE_LICENSE("GPL"); +MODULE_ALIAS("ipt_connlimit");