nfnetlink_log日志内容参见:tcpdump抓取nflog日志报文
定义的日志结构为nfulnl_logger,类型为NF_LOG_TYPE_ULOG,处理函数为nfulnl_log_packet。
static struct nf_logger nfulnl_logger __read_mostly = {
.name = "nfnetlink_log",
.type = NF_LOG_TYPE_ULOG,
.logfn = nfulnl_log_packet,
.me = THIS_MODULE,
};
默认的日志信息结构如下。报文拷贝长度为0xffff(最大IP报文长度),netlink组0,队列长度为1。
static const struct nf_loginfo default_loginfo = {
.type = NF_LOG_TYPE_ULOG,
.u = {
.ulog = {
.copy_len = 0xffff,
.group = 0,
.qthreshold = 1,
},
},
};
由nf_log_register将以上日志结构nfulnl_logger添加到全局loggers数组中。注意这里注册的协议为NFPROTO_UNSPEC(0),对于此协议类型,函数nf_log_register将loggers数组中所有协议的类型为NF_LOG_TYPE_ULOG的日志记录器全部设置为nfulnl_logger(即nfnetlink_log)。
static int __init nfnetlink_log_init(void)
{
status = nfnetlink_subsys_register(&nfulnl_subsys);
if (status < 0) {
pr_err("failed to create netlink socket\n");
goto cleanup_netlink_notifier;
}
status = nf_log_register(NFPROTO_UNSPEC, &nfulnl_logger);
日志处理
首先,如果参数中指定的日志信息是有效的,使用指定信息;否则,使用默认的default_loginfo。如果没有对应组的实例(没有接收者),返回,不生成日志。
static void
nfulnl_log_packet(struct net *net,
u_int8_t pf,
unsigned int hooknum,
const struct sk_buff *skb,
const struct net_device *in,
const struct net_device *out,
const struct nf_loginfo *li_user,
const char *prefix)
{
struct nfulnl_instance *inst;
const struct nf_loginfo *li;
struct nfnl_log_net *log = nfnl_log_pernet(net);
const struct nfnl_ct_hook *nfnl_ct = NULL;
struct nf_conn *ct = NULL;
enum ip_conntrack_info ctinfo;
if (li_user && li_user->type == NF_LOG_TYPE_ULOG)
li = li_user;
else
li = &default_loginfo;
inst = instance_lookup_get(log, li->u.ulog.group);
if (!inst)
return;
以下计算所有的信息所占的长度。
if (prefix)
plen = strlen(prefix) + 1;
/* FIXME: do we want to make the size calculation conditional based on
* what is actually present? way more branches and checks, but more
* memory efficient... */
size = nlmsg_total_size(sizeof(struct nfgenmsg))
+ nla_total_size(sizeof(struct nfulnl_msg_packet_hdr))
+ nla_total_size(sizeof(u_int32_t)) /* ifindex */
+ nla_total_size(sizeof(u_int32_t)) /* ifindex */
优先使用参数中指定的队列长度,其中默认default_loginfo中指定队列长度为1。
qthreshold = inst->qthreshold;
/* per-rule qthreshold overrides per-instance */
if (li->u.ulog.qthreshold)
if (qthreshold > li->u.ulog.qthreshold)
qthreshold = li->u.ulog.qthreshold;
对于实例的模式NFULNL_COPY_PACKET,还将拷贝报文数据的一部分,长度为实例的成员copy_range指定,但是不能超过参数日志信息中指定的值,当然,也不能超过报文自身的长度。
switch (inst->copy_mode) {
case NFULNL_COPY_META:
case NFULNL_COPY_NONE:
data_len = 0;
break;
case NFULNL_COPY_PACKET:
data_len = inst->copy_range;
if ((li->u.ulog.flags & NF_LOG_F_COPY_LEN) &&
(li->u.ulog.copy_len < data_len))
data_len = li->u.ulog.copy_len;
if (data_len > skb->len)
data_len = skb->len;
size += nla_total_size(data_len);
break;
case NFULNL_COPY_DISABLED:
default:
goto unlock_and_release;
}
如果实例中已有skb,并且其尾部不能容纳当前的报文,将此skb执行发送操作。
if (inst->skb && size > skb_tailroom(inst->skb)) {
/* either the queue len is too high or we don't have
* enough room in the skb left. flush to userspace. */
__nfulnl_flush(inst);
}
如果实例中没有skb结构,重新分配。增加实例的队列长度,由函数__build_packet_message组件日志报文。
if (!inst->skb) {
inst->skb = nfulnl_alloc_skb(net, inst->peer_portid,
inst->nlbufsiz, size);
if (!inst->skb)
goto alloc_failure;
}
inst->qlen++;
__build_packet_message(log, inst, skb, data_len, pf,
hooknum, in, out, prefix, plen,
nfnl_ct, ct, ctinfo);
如果实例的队列长度超过阈值,执行发送操作;否者,启动定时器,超时时发送。
if (inst->qlen >= qthreshold)
__nfulnl_flush(inst);
/* timer_pending always called within inst->lock, so there
* is no chance of a race here */
else if (!timer_pending(&inst->timer)) {
instance_get(inst);
inst->timer.expires = jiffies + (inst->flushtimeout*HZ/100);
add_timer(&inst->timer);
}
日志报文组建
首先,初始化netlink消息头部信息nfgenmsg。
static inline int
__build_packet_message(struct nfnl_log_net *log,...)
{
struct nfulnl_msg_packet_hdr pmsg;
struct nlmsghdr *nlh;
struct nfgenmsg *nfmsg;
sk_buff_data_t old_tail = inst->skb->tail;
nlh = nlmsg_put(inst->skb, 0, 0,
nfnl_msg_type(NFNL_SUBSYS_ULOG, NFULNL_MSG_PACKET),
sizeof(struct nfgenmsg), 0);
if (!nlh)
return -1;
nfmsg = nlmsg_data(nlh);
nfmsg->nfgen_family = pf;
nfmsg->version = NFNETLINK_V0;
nfmsg->res_id = htons(inst->group_num);
设置nfulnl_msg_packet_hdr信息,包括协议,触发的hook点。指定的日志前缀字符串。
memset(&pmsg, 0, sizeof(pmsg));
pmsg.hw_protocol = skb->protocol;
pmsg.hook = hooknum;
if (nla_put(inst->skb, NFULA_PACKET_HDR, sizeof(pmsg), &pmsg))
goto nla_put_failure;
if (prefix && nla_put(inst->skb, NFULA_PREFIX, plen, prefix))
goto nla_put_failure;
如果输入设备有效,在没有使能网桥netfilter的时候,记录其接口索引值。否则,如果使能网桥netfilter功能,如果pf等于PF_BRIDGE,即日志是在BRIDGE中生成(ebtables规则),增加设置网桥设备的索引值。
对于pf不等于PF_BRIDGE的情况,如iptables设置的nflog规则,位于IP层,可见的有可能就是网桥设备,由函数nf_bridge_get_physindev获得物理设备,记录物理设备的索引。
if (indev) {
#if !IS_ENABLED(CONFIG_BRIDGE_NETFILTER)
if (nla_put_be32(inst->skb, NFULA_IFINDEX_INDEV, htonl(indev->ifindex)))
goto nla_put_failure;
#else
if (pf == PF_BRIDGE) {
/* Case 1: outdev is physical input device, we need to
* look for bridge group (when called from netfilter_bridge) */
if (nla_put_be32(inst->skb, NFULA_IFINDEX_PHYSINDEV,
htonl(indev->ifindex)) ||
/* this is the bridge group "brX" */
/* rcu_read_lock()ed by nf_hook_thresh or nf_log_packet. */
nla_put_be32(inst->skb, NFULA_IFINDEX_INDEV,
htonl(br_port_get_rcu(indev)->br->dev->ifindex)))
goto nla_put_failure;
} else {
struct net_device *physindev;
/* Case 2: indev is bridge group, we need to look for
* physical device (when called from ipv4) */
if (nla_put_be32(inst->skb, NFULA_IFINDEX_INDEV, htonl(indev->ifindex)))
goto nla_put_failure;
physindev = nf_bridge_get_physindev(skb);
if (physindev && nla_put_be32(inst->skb, NFULA_IFINDEX_PHYSINDEV,
htonl(physindev->ifindex)))
goto nla_put_failure;
}
#endif
}
对于输出设备有效的情况,与以上处理相同。
if (outdev) {
#if !IS_ENABLED(CONFIG_BRIDGE_NETFILTER)
if (nla_put_be32(inst->skb, NFULA_IFINDEX_OUTDEV,
htonl(outdev->ifindex)))
goto nla_put_failure;
#else
if (pf == PF_BRIDGE) {
/* Case 1: outdev is physical output device, we need to
* look for bridge group (when called from netfilter_bridge) */
if (nla_put_be32(inst->skb, NFULA_IFINDEX_PHYSOUTDEV,
htonl(outdev->ifindex)) ||
/* this is the bridge group "brX" */
/* rcu_read_lock()ed by nf_hook_thresh or nf_log_packet.
*/
nla_put_be32(inst->skb, NFULA_IFINDEX_OUTDEV,
htonl(br_port_get_rcu(outdev)->br->dev->ifindex)))
goto nla_put_failure;
} else {
struct net_device *physoutdev;
/* Case 2: indev is a bridge group, we need to look
* for physical device (when called from ipv4) */
if (nla_put_be32(inst->skb, NFULA_IFINDEX_OUTDEV,
htonl(outdev->ifindex)))
goto nla_put_failure;
physoutdev = nf_bridge_get_physoutdev(skb);
if (physoutdev && nla_put_be32(inst->skb, NFULA_IFINDEX_PHYSOUTDEV,
htonl(physoutdev->ifindex)))
goto nla_put_failure;
}
#endif
}
记录mark值和硬件地址字段值。
if (skb->mark &&
nla_put_be32(inst->skb, NFULA_MARK, htonl(skb->mark)))
goto nla_put_failure;
if (indev && skb->dev && skb->mac_header != skb->network_header) {
struct nfulnl_msg_packet_hw phw;
int len;
memset(&phw, 0, sizeof(phw));
len = dev_parse_header(skb, phw.hw_addr);
if (len > 0) {
phw.hw_addrlen = htons(len);
if (nla_put(inst->skb, NFULA_HWADDR, sizeof(phw), &phw))
goto nla_put_failure;
}
}
记录设备类型和硬件头部长度。
if (indev && skb_mac_header_was_set(skb)) {
if (nla_put_be16(inst->skb, NFULA_HWTYPE, htons(skb->dev->type)) ||
nla_put_be16(inst->skb, NFULA_HWLEN,
htons(skb->dev->hard_header_len)))
goto nla_put_failure;
hwhdrp = skb_mac_header(skb);
if (skb->dev->type == ARPHRD_SIT)
hwhdrp -= ETH_HLEN;
if (hwhdrp >= skb->head && nla_put(inst->skb, NFULA_HWHEADER,
skb->dev->hard_header_len, hwhdrp))
goto nla_put_failure;
}
如果hook点小于等于NF_INET_FORWARD,还包括NF_INET_PRE_ROUTING和NF_INET_LOCAL_IN,记录时间戳。
if (hooknum <= NF_INET_FORWARD && skb->tstamp) {
struct nfulnl_msg_packet_timestamp ts;
struct timespec64 kts = ktime_to_timespec64(skb->tstamp);
ts.sec = cpu_to_be64(kts.tv_sec);
ts.usec = cpu_to_be64(kts.tv_nsec / NSEC_PER_USEC);
if (nla_put(inst->skb, NFULA_TIMESTAMP, sizeof(ts), &ts))
goto nla_put_failure;
}
记录UID和GID。
/* UID */
sk = skb->sk;
if (sk && sk_fullsock(sk)) {
read_lock_bh(&sk->sk_callback_lock);
if (sk->sk_socket && sk->sk_socket->file) {
struct file *file = sk->sk_socket->file;
const struct cred *cred = file->f_cred;
struct user_namespace *user_ns = inst->peer_user_ns;
__be32 uid = htonl(from_kuid_munged(user_ns, cred->fsuid));
__be32 gid = htonl(from_kgid_munged(user_ns, cred->fsgid));
read_unlock_bh(&sk->sk_callback_lock);
if (nla_put_be32(inst->skb, NFULA_UID, uid) ||
nla_put_be32(inst->skb, NFULA_GID, gid))
goto nla_put_failure;
} else
read_unlock_bh(&sk->sk_callback_lock);
}
根据标志位,记录本地和全局的序号值。
/* local sequence number */
if ((inst->flags & NFULNL_CFG_F_SEQ) &&
nla_put_be32(inst->skb, NFULA_SEQ, htonl(inst->seq++)))
goto nla_put_failure;
/* global sequence number */
if ((inst->flags & NFULNL_CFG_F_SEQ_GLOBAL) &&
nla_put_be32(inst->skb, NFULA_SEQ_GLOBAL,
htonl(atomic_inc_return(&log->global_seq))))
goto nla_put_failure;
添加连接跟踪信息,参见函数ctnetlink_glue_build。对于pf为NFPROTO_NETDEV或者NFPROTO_BRIDGE,添加VLAN信息,及MAC层信息。
if (ct && nfnl_ct->build(inst->skb, ct, ctinfo,
NFULA_CT, NFULA_CT_INFO) < 0)
goto nla_put_failure;
if ((pf == NFPROTO_NETDEV || pf == NFPROTO_BRIDGE) &&
nfulnl_put_bridge(inst, skb) < 0)
goto nla_put_failure;
最后,拷贝报文数据。
if (data_len) {
struct nlattr *nla;
int size = nla_attr_size(data_len);
if (skb_tailroom(inst->skb) < nla_total_size(data_len))
goto nla_put_failure;
nla = skb_put(inst->skb, nla_total_size(data_len));
nla->nla_type = NFULA_PAYLOAD;
nla->nla_len = size;
if (skb_copy_bits(skb, 0, nla_data(nla), data_len))
BUG();
}
nlh->nlmsg_len = inst->skb->tail - old_tail;
return 0;
内核版本 5.10
|