对于TCP协议的报文,由连接跟踪的函数nf_conntrack_tcp_packet进行处理。关于报文的错误检查由函数tcp_error实现,参见:连接跟踪TCP报文错误检查。
int nf_conntrack_tcp_packet(struct nf_conn *ct,
struct sk_buff *skb,
unsigned int dataoff,
enum ip_conntrack_info ctinfo,
const struct nf_hook_state *state)
{
struct net *net = nf_ct_net(ct);
struct nf_tcp_net *tn = nf_tcp_pernet(net);
const struct tcphdr *th;
struct tcphdr _tcph;
th = skb_header_pointer(skb, dataoff, sizeof(_tcph), &_tcph);
if (th == NULL)
return -NF_ACCEPT;
if (tcp_error(th, skb, dataoff, state))
return -NF_ACCEPT;
if (!nf_ct_is_confirmed(ct) && !tcp_new(ct, skb, dataoff, th))
return -NF_ACCEPT;
如果此连接还没有确认(IPS_CONFIRMED_BIT),作为一个新的TCP连接进行处理。
/* It's confirmed if it is, or has been in the hash table. */
static inline int nf_ct_is_confirmed(const struct nf_conn *ct)
{
return test_bit(IPS_CONFIRMED_BIT, &ct->status);
}
在函数tcp_new中,对于TCP新连接,第一个报文一定是原始方向,即dir=0;TCP连接的状态为TCP_CONNTRACK_NONE;索引由get_conntrack_index函数计算。根据以上三个参数,由三维数组得到连接的新状态new_state。
static noinline bool tcp_new(struct nf_conn *ct, const struct sk_buff *skb,
unsigned int dataoff, const struct tcphdr *th)
{
enum tcp_conntrack new_state;
struct net *net = nf_ct_net(ct);
const struct nf_tcp_net *tn = nf_tcp_pernet(net);
const struct ip_ct_tcp_state *sender = &ct->proto.tcp.seen[0];
const struct ip_ct_tcp_state *receiver = &ct->proto.tcp.seen[1];
/* Don't need lock here: this conntrack not in circulation yet */
new_state = tcp_conntracks[0][get_conntrack_index(th)][TCP_CONNTRACK_NONE];
/* Invalid: delete conntrack */
if (new_state >= TCP_CONNTRACK_MAX) {
pr_debug("nf_ct_tcp: invalid new deleting.\n");
return false;
}
如果新状态为SYN_SENT,即SYN报文,记录下结束序号(TCP开始序号与数据长度,以及SYN占用一个序号),对于SYN报文,如果开启了fastopen有可能携带有数据。另外记录下窗口值,最小为1。最大序号值maxend等于以上计算的结束序号。
tcp_open获取TCP的窗口系数和SACK能力信息。参见:连接跟踪TCP序号检查。
if (new_state == TCP_CONNTRACK_SYN_SENT) {
memset(&ct->proto.tcp, 0, sizeof(ct->proto.tcp));
/* SYN packet */
ct->proto.tcp.seen[0].td_end =
segment_seq_plus_len(ntohl(th->seq), skb->len, dataoff, th);
ct->proto.tcp.seen[0].td_maxwin = ntohs(th->window);
if (ct->proto.tcp.seen[0].td_maxwin == 0)
ct->proto.tcp.seen[0].td_maxwin = 1;
ct->proto.tcp.seen[0].td_maxend = ct->proto.tcp.seen[0].td_end;
tcp_options(skb, dataoff, th, &ct->proto.tcp.seen[0]);
反之,如果新状态不是SYN_SENT,并且PROC文件:/proc/sys/net/netfilter/nf_conntrack_tcp_loose值为零(默认为1),不处理此连接。
} else if (tn->tcp_loose == 0) {
/* Don't try to pick up connections. */
return false;
如果nf_conntrack_tcp_loose值为1,在没有接收到SYN报文的(没有看到完整的握手过程)情况下,仍然尝试建立连接信息。
1)td_end记录当前报文的结束序号; 2)td_maxwin记录TCP头部的窗口值; 3)td_maxend记录以上两者之和。
最后,假定连接的两端都使能了SACK能力。由于此值情况下连接信息可能有错误,设置IP_CT_TCP_FLAG_BE_LIBERAL标志,这样在序号检查时,不在窗口内的报文并不丢弃。
} else {
memset(&ct->proto.tcp, 0, sizeof(ct->proto.tcp));
/*
* We are in the middle of a connection,
* its history is lost for us.
* Let's try to use the data from the packet.
*/
ct->proto.tcp.seen[0].td_end =
segment_seq_plus_len(ntohl(th->seq), skb->len, dataoff, th);
ct->proto.tcp.seen[0].td_maxwin = ntohs(th->window);
if (ct->proto.tcp.seen[0].td_maxwin == 0)
ct->proto.tcp.seen[0].td_maxwin = 1;
ct->proto.tcp.seen[0].td_maxend =
ct->proto.tcp.seen[0].td_end + ct->proto.tcp.seen[0].td_maxwin;
/* We assume SACK and liberal window checking to handle
* window scaling */
ct->proto.tcp.seen[0].flags =
ct->proto.tcp.seen[1].flags = IP_CT_TCP_FLAG_SACK_PERM |
IP_CT_TCP_FLAG_BE_LIBERAL;
}
/* tcp_packet will set them */
ct->proto.tcp.last_index = TCP_NONE_SET;
pr_debug("%s: sender end=%u maxend=%u maxwin=%u scale=%i "
"receiver end=%u maxend=%u maxwin=%u scale=%i\n",
__func__,
sender->td_end, sender->td_maxend, sender->td_maxwin,
sender->td_scale,
receiver->td_end, receiver->td_maxend, receiver->td_maxwin,
receiver->td_scale);
return true;
如下,在函数tcp_in_window中,如果连接中的报文发送者设置了IP_CT_TCP_FLAG_BE_LIBERAL,即使不符合序号要求,仍然返回true。
static bool tcp_in_window(const struct nf_conn *ct,
...
const struct tcphdr *tcph)
{
if (before(seq, sender->td_maxend + 1) &&
in_recv_win &&
before(sack, receiver->td_end + 1) &&
after(sack, receiver->td_end - MAXACKWINDOW(sender) - 1)) {
...
res = true;
} else {
res = false;
if (sender->flags & IP_CT_TCP_FLAG_BE_LIBERAL || tn->tcp_be_liberal)
res = true;
if (!res) {
nf_ct_l4proto_log_invalid(skb, ct,
"%s",
before(seq, sender->td_maxend + 1) ?
in_recv_win ?
before(sack, receiver->td_end + 1) ?
after(sack, receiver->td_end - MAXACKWINDOW(sender) - 1) ? "BUG"
: "ACK is under the lower bound (possible overly delayed ACK)"
: "ACK is over the upper bound (ACKed data not seen yet)"
: "SEQ is under the lower bound (already ACKed data retransmitted)"
: "SEQ is over the upper bound (over the window of the receiver)");
内核版本 5.10
|