Index: linux-2.4.33-tproxy2/include/linux/netfilter_ipv4/ip_conntrack.h =================================================================== --- linux-2.4.33-tproxy2.orig/include/linux/netfilter_ipv4/ip_conntrack.h 2006-09-12 13:05:54.000000000 +0200 +++ linux-2.4.33-tproxy2/include/linux/netfilter_ipv4/ip_conntrack.h 2006-09-12 13:06:03.000000000 +0200 @@ -54,6 +54,10 @@ IPS_TPROXY_RELATED_BIT = 5, IPS_TPROXY_RELATED = (1 << IPS_TPROXY_RELATED_BIT), + + /* May delete conntrack if its tuple is needed for NAT */ + IPS_MAY_DELETE_BIT = 6, + IPS_MAY_DELETE = (1 << IPS_MAY_DELETE_BIT), #endif }; @@ -242,7 +246,7 @@ /* Is this tuple taken? (ignoring any belonging to the given conntrack). */ -extern int +extern struct ip_conntrack_tuple_hash * ip_conntrack_tuple_taken(const struct ip_conntrack_tuple *tuple, const struct ip_conntrack *ignored_conntrack); @@ -267,6 +271,8 @@ extern void ip_ct_refresh(struct ip_conntrack *ct, unsigned long extra_jiffies); +extern void __death_by_timeout(unsigned long ul_conntrack); + /* These are for NAT. Icky. */ /* Update TCP window tracking data after NAT successfully mangled the packet */ extern int ip_conntrack_tcp_update(struct sk_buff *skb, Index: linux-2.4.33-tproxy2/net/ipv4/netfilter/ip_conntrack_proto_tcp.c =================================================================== --- linux-2.4.33-tproxy2.orig/net/ipv4/netfilter/ip_conntrack_proto_tcp.c 2006-09-12 13:05:21.000000000 +0200 +++ linux-2.4.33-tproxy2/net/ipv4/netfilter/ip_conntrack_proto_tcp.c 2006-09-12 13:06:03.000000000 +0200 @@ -907,6 +907,15 @@ "ip_ct_tcp: invalid SYN "); return -NF_ACCEPT; } +#if defined(CONFIG_IP_NF_TPROXY) || defined (CONFIG_IP_NF_TPROXY_MODULE) + case TCP_CONNTRACK_TIME_WAIT: + /* Set MAY_DELETE if NAT subsystem may drop connection when it is clashing */ + if (test_bit(IPS_TPROXY_BIT, &conntrack->status)) { + DEBUGP(KERN_DEBUG "Marking TPROXY-related TIME_WAIT conntrack entry MAY_DELETE\n"); + set_bit(IPS_MAY_DELETE_BIT, &conntrack->status); + } + break; +#endif case TCP_CONNTRACK_CLOSE: if (index == TCP_RST_SET && test_bit(IPS_SEEN_REPLY_BIT, &conntrack->status) Index: linux-2.4.33-tproxy2/net/ipv4/netfilter/ip_conntrack_standalone.c =================================================================== --- linux-2.4.33-tproxy2.orig/net/ipv4/netfilter/ip_conntrack_standalone.c 2006-09-12 13:05:54.000000000 +0200 +++ linux-2.4.33-tproxy2/net/ipv4/netfilter/ip_conntrack_standalone.c 2006-09-12 13:06:03.000000000 +0200 @@ -511,3 +511,4 @@ EXPORT_SYMBOL(ip_conntrack_hash); EXPORT_SYMBOL_GPL(ip_conntrack_find_get); EXPORT_SYMBOL_GPL(ip_conntrack_put); +EXPORT_SYMBOL_GPL(__death_by_timeout); Index: linux-2.4.33-tproxy2/net/ipv4/netfilter/ip_nat_core.c =================================================================== --- linux-2.4.33-tproxy2.orig/net/ipv4/netfilter/ip_nat_core.c 2006-09-12 13:05:45.000000000 +0200 +++ linux-2.4.33-tproxy2/net/ipv4/netfilter/ip_nat_core.c 2006-09-12 13:06:03.000000000 +0200 @@ -109,6 +109,33 @@ WRITE_UNLOCK(&ip_nat_lock); } +static void __ip_nat_cleanup_conntrack(struct ip_conntrack *conn) +{ + struct ip_nat_info *info = &conn->nat.info; + unsigned int hs, hp; + + if (!info->initialized) + return; + + IP_NF_ASSERT(info->bysource.conntrack); + IP_NF_ASSERT(info->byipsproto.conntrack); + + MUST_BE_WRITE_LOCKED(&ip_nat_lock); + + hs = hash_by_src(&conn->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src, + conn->tuplehash[IP_CT_DIR_ORIGINAL] + .tuple.dst.protonum); + + hp = hash_by_ipsproto(conn->tuplehash[IP_CT_DIR_REPLY].tuple.src.ip, + conn->tuplehash[IP_CT_DIR_REPLY].tuple.dst.ip, + conn->tuplehash[IP_CT_DIR_REPLY] + .tuple.dst.protonum); + + LIST_DELETE(&bysource[hs], &info->bysource); + LIST_DELETE(&byipsproto[hp], &info->byipsproto); +} + + /* We do checksum mangling, so if they were wrong before they're still * wrong. Also works for incomplete packets (eg. ICMP dest * unreachables.) */ @@ -258,7 +285,8 @@ const struct ip_conntrack_manip *peer) { struct ip_nat_reserved *res; - unsigned int h; + struct ip_conntrack_tuple_hash *h = NULL; + unsigned int hash; MUST_BE_WRITE_LOCKED(&ip_nat_lock); @@ -293,24 +321,52 @@ .ip = manip->ip, .u = {.all = manip->u.all}}}; - if (ip_conntrack_tuple_taken(&t, NULL)) { + h = ip_conntrack_tuple_taken(&t, NULL); + +#if defined(CONFIG_IP_NF_TPROXY) || defined (CONFIG_IP_NF_TPROXY_MODULE) + if ((h != NULL) && test_bit(IPS_MAY_DELETE_BIT, &h->ctrack->status)) { + DEBUGP("Deleting old conntrack entry for NAT\n"); + __ip_nat_cleanup_conntrack(h->ctrack); + h->ctrack->nat.info.initialized = 0; + if (del_timer(&h->ctrack->timeout)) + h->ctrack->timeout.function((unsigned long)h->ctrack); + h = NULL; + } +#endif + + if (h) { DEBUGP("__ip_nat_reserved_new_hash: manip clashes with an already existing connection\n"); return NULL; } } else { /* Strong check: we have only a manip, unfortunately we scan the whole conntrack * hash for possible clashing connections... */ - struct ip_conntrack_tuple_hash *h = NULL; unsigned int i; + int repeat; - READ_LOCK(&ip_conntrack_lock); + WRITE_LOCK(&ip_conntrack_lock); for (i = 0; !h && i < ip_conntrack_htable_size; i++) { - h = LIST_FIND(&ip_conntrack_hash[i], clashing_ct_cmp, - struct ip_conntrack_tuple_hash *, manip); + do { + repeat = 0; + h = LIST_FIND(&ip_conntrack_hash[i], clashing_ct_cmp, + struct ip_conntrack_tuple_hash *, manip); +#if defined(CONFIG_IP_NF_TPROXY) || defined (CONFIG_IP_NF_TPROXY_MODULE) + if ((h != NULL) && test_bit(IPS_MAY_DELETE_BIT, &h->ctrack->status)) { + DEBUGP("Deleting old conntrack entry for NAT\n"); + __ip_nat_cleanup_conntrack(h->ctrack); + h->ctrack->nat.info.initialized = 0; + if (del_timer(&h->ctrack->timeout)) + __death_by_timeout((unsigned long)h->ctrack); + h = NULL; + repeat = 1; + } +#endif + } while (repeat); + /* there's a clashing connection, break */ if (h) break; } - READ_UNLOCK(&ip_conntrack_lock); + WRITE_UNLOCK(&ip_conntrack_lock); if (h) { DEBUGP("__ip_nat_reserved_new_hash: manip clashes with an already existing connection\n"); return NULL; @@ -329,9 +385,9 @@ res->peer = *peer; /* put it into the hash */ - h = hash_nat_reserved(manip, peer, proto); + hash = hash_nat_reserved(manip, peer, proto); atomic_inc(&ip_nat_reserved_count); - list_prepend(&natreserved[h], &res->hash); + list_prepend(&natreserved[hash], &res->hash); DEBUGP("__ip_nat_reserved_new_hash: hashed manip proto %u %u.%u.%u.%u:%u\n", proto, NIPQUAD(manip->ip), ntohs(manip->u.all)); @@ -506,6 +562,7 @@ We could keep a separate hash if this proves too slow. */ struct ip_conntrack_tuple reply; + struct ip_conntrack_tuple_hash *h; #ifdef CONFIG_IP_NF_NAT_NRES struct ip_nat_reserved *res; @@ -519,8 +576,23 @@ } #endif + /* check if it's taken by an existing connection */ invert_tuplepr(&reply, tuple); - return ip_conntrack_tuple_taken(&reply, ignored_conntrack); + h = ip_conntrack_tuple_taken(&reply, ignored_conntrack); + +#if defined(CONFIG_IP_NF_TPROXY) || defined (CONFIG_IP_NF_TPROXY_MODULE) + /* check if that conntrack is marked MAY_DELETE, if so, get rid of it... */ + if ((h != NULL) && test_bit(IPS_MAY_DELETE_BIT, &h->ctrack->status)) { + DEBUGP("Deleting old conntrack entry for NAT\n"); + __ip_nat_cleanup_conntrack(h->ctrack); + h->ctrack->nat.info.initialized = 0; + if (del_timer(&h->ctrack->timeout)) + h->ctrack->timeout.function((unsigned long)h->ctrack); + h = NULL; + } +#endif + + return h != NULL; } /* Does tuple + the source manip come within the range mr */ Index: linux-2.4.33-tproxy2/net/ipv4/netfilter/ip_conntrack_core.c =================================================================== --- linux-2.4.33-tproxy2.orig/net/ipv4/netfilter/ip_conntrack_core.c 2006-09-12 13:05:54.000000000 +0200 +++ linux-2.4.33-tproxy2/net/ipv4/netfilter/ip_conntrack_core.c 2006-09-12 13:06:03.000000000 +0200 @@ -367,6 +367,50 @@ atomic_dec(&ip_conntrack_count); } +static void +__destroy_conntrack(struct nf_conntrack *nfct) +{ + struct ip_conntrack *ct = (struct ip_conntrack *)nfct, *master = NULL; + struct ip_conntrack_protocol *proto; + + DEBUGP("__destroy_conntrack(%p)\n", ct); + IP_NF_ASSERT(!timer_pending(&ct->timeout)); + + MUST_BE_WRITE_LOCKED(&ip_conntrack_lock); + + /* To make sure we don't get any weird locking issues here: + * destroy_conntrack() MUST NOT be called with a write lock + * to ip_conntrack_lock!!! -HW */ + proto = __ip_ct_find_proto(ct->tuplehash[IP_CT_DIR_REPLY].tuple.dst.protonum); + if (proto && proto->destroy) + proto->destroy(ct); + + if (ip_conntrack_destroyed) + ip_conntrack_destroyed(ct); + + /* Make sure don't leave any orphaned expectations lying around */ + if (ct->expecting) + remove_expectations(ct, 1); + + /* Delete our master expectation */ + if (ct->master) { + if (ct->master->expectant) { + /* can't call __unexpect_related here, + * since it would screw up expect_list */ + list_del(&ct->master->expected_list); + master = ct->master->expectant; + } + kfree(ct->master); + } + + if (master) + ip_conntrack_put(master); + + DEBUGP("__destroy_conntrack: returning ct=%p to slab\n", ct); + kmem_cache_free(ip_conntrack_cachep, ct); + atomic_dec(&ip_conntrack_count); +} + static void death_by_timeout(unsigned long ul_conntrack) { struct ip_conntrack *ct = (void *)ul_conntrack; @@ -377,6 +421,18 @@ ip_conntrack_put(ct); } +void __death_by_timeout(unsigned long ul_conntrack) +{ + struct ip_conntrack *ct = (void *)ul_conntrack; + + MUST_BE_WRITE_LOCKED(&ip_conntrack_lock); + + clean_from_lists(ct); + + if (atomic_dec_and_test(&ct->ct_general.use)) + __destroy_conntrack((struct nf_conntrack *)ct); +} + static inline int conntrack_tuple_cmp(const struct ip_conntrack_tuple_hash *i, const struct ip_conntrack_tuple *tuple, @@ -512,7 +568,7 @@ /* Returns true if a connection correspondings to the tuple (required for NAT). */ -int +struct ip_conntrack_tuple_hash * ip_conntrack_tuple_taken(const struct ip_conntrack_tuple *tuple, const struct ip_conntrack *ignored_conntrack) { @@ -522,7 +578,7 @@ h = __ip_conntrack_find(tuple, ignored_conntrack); READ_UNLOCK(&ip_conntrack_lock); - return h != NULL; + return h; } /* Returns conntrack if it dealt with ICMP, and filled in skb fields */