diff -urN linux-2.4.30-pre2-20050304-pomng/include/linux/netfilter_ipv4/ip_conntrack_tcp.h linux-2.4.30-pre2-20050304-pomng-tcp/include/linux/netfilter_ipv4/ip_conntrack_tcp.h --- linux-2.4.30-pre2-20050304-pomng/include/linux/netfilter_ipv4/ip_conntrack_tcp.h Sun Mar 6 15:44:32 2005 +++ linux-2.4.30-pre2-20050304-pomng-tcp/include/linux/netfilter_ipv4/ip_conntrack_tcp.h Sun Mar 6 15:47:15 2005 @@ -23,13 +23,16 @@ /* SACK is permitted by the sender */ #define IP_CT_TCP_FLAG_SACK_PERM 0x02 +/* This sender sent FIN first */ +#define IP_CT_TCP_FLAG_CLOSE_INIT 0x03 + struct ip_ct_tcp_state { u_int32_t td_end; /* max of seq + len */ u_int32_t td_maxend; /* max of ack + max(win, 1) */ u_int32_t td_maxwin; /* max(win) */ u_int8_t td_scale; /* window scale factor */ u_int8_t loose; /* used when connection picked up from the middle */ - u_int8_t flags; /* per direction state flags */ + u_int8_t flags; /* per direction options */ }; struct ip_ct_tcp diff -urN linux-2.4.30-pre2-20050304-pomng/net/ipv4/netfilter/ip_conntrack_proto_tcp.c linux-2.4.30-pre2-20050304-pomng-tcp/net/ipv4/netfilter/ip_conntrack_proto_tcp.c --- linux-2.4.30-pre2-20050304-pomng/net/ipv4/netfilter/ip_conntrack_proto_tcp.c Sun Mar 6 15:44:32 2005 +++ linux-2.4.30-pre2-20050304-pomng-tcp/net/ipv4/netfilter/ip_conntrack_proto_tcp.c Sun Mar 6 16:35:09 2005 @@ -258,7 +258,7 @@ * sSS -> sSR Standard open. * sSR -> sSR Retransmitted SYN/ACK. * sES -> sIG Late retransmitted SYN/ACK? - * sFW -> sIG + * sFW -> sIG Might be SYN/ACK answering ignored SYN * sCW -> sIG * sLA -> sIG * sTW -> sIG @@ -277,10 +277,10 @@ * sCL -> sCL */ /* sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sLI */ -/*ack*/ { sIV, sIG, sIV, sES, sCW, sCW, sTW, sTW, sCL, sIV }, +/*ack*/ { sIV, sIG, sSR, sES, sCW, sCW, sTW, sTW, sCL, sIV }, /* * sSS -> sIG Might be a half-open connection - * sSR -> sIV Simultaneous open. + * sSR -> sIG Might answer late resent SYN. * sES -> sES :-) * sFW -> sCW Normal close request answered by ACK. * sCW -> sCW @@ -350,14 +350,19 @@ http://www.nluug.nl/events/sane2000/papers.html http://www.iae.nl/users/guido/papers/tcp_filtering.ps.gz - The boundaries and the conditions are slightly changed: - + The boundaries and the conditions are changed according to RFC793: + the packet must intersect the window (i.e. segments may be + after the right or before the left edge) and thus receivers may ACK + segments after the right edge of the window. + td_maxend = max(sack + max(win,1)) seen in reply packets td_maxwin = max(max(win, 1)) + (sack - ack) seen in sent packets + td_maxwin += seq + len - sender.td_maxend + if seq + len > sender.td_maxend td_end = max(seq + len) seen in sent packets - I. Upper bound for valid data: seq + len <= sender.td_maxend - II. Lower bound for valid data: seq >= sender.td_end - receiver.td_maxwin + I. Upper bound for valid data: seq <= sender.td_maxend + II. Lower bound for valid data: seq + len >= sender.td_end - receiver.td_maxwin III. Upper bound for valid ack: sack <= receiver.td_end IV. Lower bound for valid ack: ack >= receiver.td_end - MAXACKWINDOW @@ -571,10 +576,15 @@ ack = sack = receiver->td_end; } - if (seq == end) + if (seq == end + && (!tcph->rst + || (seq == 0 && state->state == TCP_CONNTRACK_SYN_SENT))) /* * Packets contains no data: we assume it is valid * and check the ack value only. + * However RST segments are always validated by their + * SEQ number, except when seq == 0 (reset sent answering + * SYN. */ seq = end = sender->td_end; @@ -586,24 +596,15 @@ sender->td_end, sender->td_maxend, sender->td_maxwin, sender->td_scale, receiver->td_end, receiver->td_maxend, receiver->td_maxwin, receiver->td_scale); - /* Ignore data over the right edge of the receiver's window. */ - if (after(end, sender->td_maxend) && - before(seq, sender->td_maxend)) { - end = sender->td_maxend; - if (state->stored_seq == TCP_FIN_SET) - state->stored_seq = TCP_ACK_SET; - } DEBUGP("tcp_in_window: I=%i II=%i III=%i IV=%i\n", - before(end, sender->td_maxend + 1) - || before(seq, sender->td_maxend + 1), - after(seq, sender->td_end - receiver->td_maxwin - 1) - || after(end, sender->td_end - receiver->td_maxwin - 1), + before(seq, sender->td_maxend + 1), + after(end, sender->td_end - receiver->td_maxwin - 1), before(sack, receiver->td_end + 1), after(ack, receiver->td_end - MAXACKWINDOW(sender))); if (sender->loose || receiver->loose || - (before(end, sender->td_maxend + 1) && - after(seq, sender->td_end - receiver->td_maxwin - 1) && + (before(seq, sender->td_maxend + 1) && + after(end, sender->td_end - receiver->td_maxwin - 1) && before(sack, receiver->td_end + 1) && after(ack, receiver->td_end - MAXACKWINDOW(sender)))) { /* @@ -620,6 +621,11 @@ sender->td_maxwin = swin; if (after(end, sender->td_end)) sender->td_end = end; + /* + * Update receiver data. + */ + if (after(end, sender->td_maxend)) + receiver->td_maxwin += end - sender->td_maxend; if (after(sack + win, receiver->td_maxend - 1)) { receiver->td_maxend = sack + win; if (win == 0) @@ -652,16 +658,16 @@ if (NET_RATELIMIT(ip_ct_tcp_log_invalid)) nf_log_ip((char *)iph, len, "ip_conntrack_tcp: IGNORED: Out of window data; %s\n", - before(end, sender->td_maxend + 1) ? - after(seq, sender->td_end - receiver->td_maxwin - 1) ? - before(sack, receiver->td_end + 1) ? - after(ack, receiver->td_end - MAXACKWINDOW(sender)) ? "BUG" - : "ACK is under the lower bound (possibly overly delayed ACK)" - : "ACK is over the upper bound (ACKed data has never seen yet)" - : "SEQ is under the lower bound (retransmitted already ACKed data)" + before(seq, sender->td_maxend + 1) ? + after(end, sender->td_end - receiver->td_maxwin - 1) ? + before(sack, receiver->td_end + 1) ? + after(ack, receiver->td_end - MAXACKWINDOW(sender)) ? "BUG" + : "ACK is under the lower bound (possible overly delayed ACK)" + : "ACK is over the upper bound (ACKed data not seen yet)" + : "SEQ is under the lower bound (already ACKed data retransmitted)" : "SEQ is over the upper bound (over the window of the receiver)"); - res = ip_ct_tcp_be_liberal && !tcph->rst; + res = ip_ct_tcp_be_liberal; } DEBUGP("tcp_in_window: res=%i sender end=%u maxend=%u maxwin=%u receiver end=%u maxend=%u maxwin=%u\n", @@ -790,12 +796,11 @@ switch (new_state) { case TCP_CONNTRACK_IGNORE: /* Either SYN in ORIGINAL, - * or SYN/ACK in REPLY - * or ACK in REPLY direction (half-open connection). */ + * or SYN/ACK in REPLY. */ if (index == TCP_SYNACK_SET && conntrack->proto.tcp.stored_seq == TCP_SYN_SET && conntrack->proto.tcp.last_dir != dir - && after(ntohl(tcph->ack_seq), conntrack->proto.tcp.last_seq)) { + && ntohl(tcph->ack_seq) == conntrack->proto.tcp.last_end) { /* This SYN/ACK acknowledges a SYN that we earlier ignored * as invalid. This means that the client and the server * are both in sync, while the firewall is not. We kill @@ -814,6 +819,8 @@ conntrack->proto.tcp.stored_seq = index; conntrack->proto.tcp.last_dir = dir; conntrack->proto.tcp.last_seq = ntohl(tcph->seq); + conntrack->proto.tcp.last_end = + segment_seq_plus_len(ntohl(tcph->seq), skb->len, iph, tcph); WRITE_UNLOCK(&tcp_lock); if (NET_RATELIMIT(ip_ct_tcp_log_invalid)) @@ -831,30 +838,36 @@ "ip_conntrack_tcp: INVALID: invalid state "); return -NF_ACCEPT; case TCP_CONNTRACK_SYN_SENT: - if (old_state >= TCP_CONNTRACK_TIME_WAIT) { + if (old_state < TCP_CONNTRACK_TIME_WAIT) + break; + if ((conntrack->proto.tcp.seen[dir].flags & + IP_CT_TCP_FLAG_CLOSE_INIT) + || after(ntohl(tcph->seq), + conntrack->proto.tcp.seen[dir].td_end)) { /* Attempt to reopen a closed connection. * Delete this connection and look up again. */ WRITE_UNLOCK(&tcp_lock); if (del_timer(&conntrack->timeout)) conntrack->timeout.function((unsigned long)conntrack); return -NF_REPEAT; + } else { + WRITE_UNLOCK(&tcp_lock); + if (LOG_INVALID(IPPROTO_TCP)) + nf_log_packet(PF_INET, 0, skb, NULL, NULL, + "ip_conntrack_tcp: invalid SYN (ignored)"); + return -NF_ACCEPT; } break; case TCP_CONNTRACK_CLOSE: if (index == TCP_RST_SET - && ((test_bit(IPS_SEEN_REPLY_BIT, &conntrack->status) - && conntrack->proto.tcp.stored_seq <= TCP_SYNACK_SET) - || (!test_bit(IPS_ASSURED_BIT, &conntrack->status) - && conntrack->proto.tcp.stored_seq == TCP_ACK_SET)) - && after(ntohl(tcph->ack_seq), - conntrack->proto.tcp.last_seq)) { - /* Ignore RST closing down invalid SYN or ACK - we had let trough. */ - WRITE_UNLOCK(&tcp_lock); - if (NET_RATELIMIT(ip_ct_tcp_log_invalid)) - nf_log_ip((char *)iph, len, - "ip_conntrack_tcp: INVALID: invalid RST (ignored) "); - return NF_ACCEPT; + && test_bit(IPS_SEEN_REPLY_BIT, &conntrack->status) + && conntrack->proto.tcp.stored_seq == TCP_SYN_SET + && ntohl(tcph->ack_seq) == conntrack->proto.tcp.last_end) { + /* RST sent to invalid SYN we had let trough + * SYN was in window then, tear down connection. + * We skip window checking, because packet might ACK + * segments we ignored in the SYN. */ + goto in_window; } /* Just fall trough */ default: @@ -862,16 +875,15 @@ break; } - old_index = conntrack->proto.tcp.stored_seq; - conntrack->proto.tcp.stored_seq = index; if (!tcp_in_window(&conntrack->proto.tcp, dir, iph, len, tcph)) { /* Invalid packet, restore previous state */ conntrack->proto.tcp.stored_seq = old_index; WRITE_UNLOCK(&tcp_lock); return -NF_ACCEPT; } - /* If FIN was trimmed off, don't change state. */ - new_state = tcp_conntracks[dir][conntrack->proto.tcp.stored_seq][old_state]; + in_window: + /* From now on we have got in-window packets */ + conntrack->proto.tcp.stored_seq = index; DEBUGP("tcp_conntracks: src=%u.%u.%u.%u:%hu dst=%u.%u.%u.%u:%hu syn=%i ack=%i fin=%i rst=%i old=%i new=%i\n", NIPQUAD(iph->saddr), ntohs(tcph->source), NIPQUAD(iph->daddr), ntohs(tcph->dest), @@ -879,6 +891,10 @@ old_state, new_state); conntrack->proto.tcp.state = new_state; + if (old_state != new_state + && (new_state == TCP_CONNTRACK_FIN_WAIT + || new_state == TCP_CONNTRACK_CLOSE)) + conntrack->proto.tcp.seen[dir].flags |= IP_CT_TCP_FLAG_CLOSE_INIT; timeout = conntrack->proto.tcp.retrans >= ip_ct_tcp_max_retrans && *tcp_timeouts[new_state] > ip_ct_tcp_timeout_max_retrans ? ip_ct_tcp_timeout_max_retrans : *tcp_timeouts[new_state];