netfilter連線跟蹤初始化
連線跟蹤的初始化主要有三個地方
(1)連線跟蹤本身初始化。
(3)在對應的Hook上註冊連線跟蹤的處理函式。
(4)初始化連線跟蹤和三層協議、四層協議相關的函式。
1、連線跟蹤本身初始化
連線跟蹤本身初始化函式是nf_conntrack_net_init主要做兩件事情:為連線跟蹤分配slab緩衝、初始化後proc檔案系統
1.1、nf_conntrack_net_init
nf_conntrack_net_init是連線跟蹤初始化的入口函式,主要呼叫nf_conntrack_init函式分配slab緩衝,呼叫nf_conntrack_standalone_init_proc初始化proc檔案系統。
static int nf_conntrack_net_init(struct net *net) { int ret; /*初始化*/ ret = nf_conntrack_init(net); if (ret < 0) goto out_init; /*proc檔案系統初始化*/ ret = nf_conntrack_standalone_init_proc(net); if (ret < 0) goto out_proc; net->ct.sysctl_checksum = 1; net->ct.sysctl_log_invalid = 0; ret = nf_conntrack_standalone_init_sysctl(net); if (ret < 0) goto out_sysctl; return 0; out_sysctl: nf_conntrack_standalone_fini_proc(net); out_proc: nf_conntrack_cleanup(net); out_init: return ret; }
1.2、nf_conntrack_init
nf_conntrack_init主要呼叫nf_conntrack_init_init_ net
int nf_conntrack_init(struct net *net) { int ret; if (net_eq(net, &init_net)) { ret = nf_conntrack_init_init_net(); if (ret < 0) goto out_init_net; } ret = nf_conntrack_init_net(net); if (ret < 0) goto out_net; if (net_eq(net, &init_net)) { /* For use by REJECT target */ rcu_assign_pointer(ip_ct_attach, nf_conntrack_attach); rcu_assign_pointer(nf_ct_destroy, destroy_conntrack); /* Howto get NAT offsets */ rcu_assign_pointer(nf_ct_nat_offset, NULL); } return 0; out_net: if (net_eq(net, &init_net)) nf_conntrack_cleanup_init_net(); out_init_net: return ret; }
1.3、nf_conntrack_init_init_net
static int nf_conntrack_init_init_net(void)
{
int max_factor = 8;
int ret;
/* Idea from tcp.c: use 1/16384 of memory. On i386: 32MB
* machine has 512 buckets. >= 1GB machines have 16384 buckets. */
if (!nf_conntrack_htable_size) {
/*取記憶體的16384分之一*/
nf_conntrack_htable_size
= (((totalram_pages << PAGE_SHIFT) / 16384)
/ sizeof(struct hlist_head));
/*記憶體大於1G則取16384*/
if (totalram_pages > (1024 * 1024 * 1024 / PAGE_SIZE))
nf_conntrack_htable_size = 16384;
if (nf_conntrack_htable_size < 32)
nf_conntrack_htable_size = 32;
/* Use a max. factor of four by default to get the same max as
* with the old struct list_heads. When a table size is given
* we use the old value of 8 to avoid reducing the max.
* entries. */
max_factor = 4;
}
nf_conntrack_max = max_factor * nf_conntrack_htable_size;
printk(KERN_INFO "nf_conntrack version %s (%u buckets, %d max)\n",
NF_CONNTRACK_VERSION, nf_conntrack_htable_size,
nf_conntrack_max);
/*初始化三層協議陣列nf_ct_l3protos*/
ret = nf_conntrack_proto_init();
if (ret < 0)
goto err_proto;
ret = nf_conntrack_helper_init();
if (ret < 0)
goto err_helper;
#ifdef CONFIG_NF_CONNTRACK_ZONES
ret = nf_ct_extend_register(&nf_ct_zone_extend);
if (ret < 0)
goto err_extend;
#endif
/* Set up fake conntrack: to never be deleted, not in any hashes */
#ifdef CONFIG_NET_NS
nf_conntrack_untracked.ct_net = &init_net;
#endif
atomic_set(&nf_conntrack_untracked.ct_general.use, 1);
/* - and look it like as a confirmed connection */
set_bit(IPS_CONFIRMED_BIT, &nf_conntrack_untracked.status);
return 0;
#ifdef CONFIG_NF_CONNTRACK_ZONES
err_extend:
nf_conntrack_helper_fini();
#endif
err_helper:
nf_conntrack_proto_fini();
err_proto:
return ret;
}
2、註冊連線跟蹤的hook函式
2.1、ipv4_defrag_ops
在PREROUTING和OUT鏈上註冊ipv4_conntrack_defrag,這個函式主要是對資料包進行分片操作,PREROUTING和OUT鏈都是netflter框架的兩個入口,一個是接受外界的資料入口,一個是本機產生資料包的入口。
static struct nf_hook_ops ipv4_defrag_ops[] = {
{
/*對資料進行分片*/
.hook = ipv4_conntrack_defrag,
.owner = THIS_MODULE,
.pf = PF_INET,
.hooknum = NF_INET_PRE_ROUTING,
.priority = NF_IP_PRI_CONNTRACK_DEFRAG,
},
{
.hook = ipv4_conntrack_defrag,
.owner = THIS_MODULE,
.pf = PF_INET,
.hooknum = NF_INET_LOCAL_OUT,
.priority = NF_IP_PRI_CONNTRACK_DEFRAG,
},
};
2.2、ipv4_conntrack_ops
netfilter框架有兩個入口(PREROUTING鏈、OUT鏈),兩個出口(LOCAL_IN鏈、POSTROUTING鏈),PREROUTING是接受外界資料包進入的第一個鏈,OUT鏈是本機產生資料包進入的第一個鏈。LOCAL_IN是本機接受資料包的最後一個鏈,POSTROUTING是資料包前送最後一個鏈。所以在PREROUTING、OUT鏈上註冊ipvr_conntrack_in函式建立連線跟蹤,在LOCAL_IN和POSTROUTING鏈上註冊ipv4_confirm確認一條連線跟蹤。
static struct nf_hook_ops ipv4_conntrack_ops[] __read_mostly = {
{
/*剛進入netfilter框架在第一個PREROUTEING鏈上建立連線跟蹤*/
.hook = ipv4_conntrack_in,
.owner = THIS_MODULE,
.pf = NFPROTO_IPV4,
.hooknum = NF_INET_PRE_ROUTING,
.priority = NF_IP_PRI_CONNTRACK,
},
{
/*本機產生的資料包在OUT鏈上建立連線跟蹤*/
.hook = ipv4_conntrack_local,
.owner = THIS_MODULE,
.pf = NFPROTO_IPV4,
.hooknum = NF_INET_LOCAL_OUT,
.priority = NF_IP_PRI_CONNTRACK,
},
{
/*資料包最後出去在POSTROUTING鏈上連線跟蹤確認*/
.hook = ipv4_confirm,
.owner = THIS_MODULE,
.pf = NFPROTO_IPV4,
.hooknum = NF_INET_POST_ROUTING,
.priority = NF_IP_PRI_CONNTRACK_CONFIRM,
},
{
/*在LOCAL_IN鏈進入本機的資料連線跟蹤確認*/
.hook = ipv4_confirm,
.owner = THIS_MODULE,
.pf = NFPROTO_IPV4,
.hooknum = NF_INET_LOCAL_IN,
.priority = NF_IP_PRI_CONNTRACK_CONFIRM,
},
};
2.3、註冊hook
呼叫nf_register_hooks註冊連線跟蹤的hook函式
static int __init nf_conntrack_l3proto_ipv4_init(void)
{
...
/*註冊連線跟蹤的hook處理函式*/
ret = nf_register_hooks(ipv4_conntrack_ops,
ARRAY_SIZE(ipv4_conntrack_ops));
if (ret < 0) {
pr_err("nf_conntrack_ipv4: can't register hooks.\n");
goto cleanup_ipv4;
}
...
}
nf_register_hooks函式
int nf_register_hooks(struct nf_hook_ops *reg, unsigned int n)
{
unsigned int i;
int err = 0;
for (i = 0; i < n; i++) {
err = nf_register_hook(®[i]);
if (err)
goto err;
}
return err;
err:
if (i > 0)
nf_unregister_hooks(reg, i);
return err;
}
nf_register_hook函式
int nf_register_hook(struct nf_hook_ops *reg)
{
struct nf_hook_ops *elem;
int err;
err = mutex_lock_interruptible(&nf_hook_mutex);
if (err < 0)
return err;
//nf_hooks二維陣列,一維座標是協議號,二維座標是鏈
list_for_each_entry(elem, &nf_hooks[reg->pf][reg->hooknum], list) {
//按優先順序插入
if (reg->priority < elem->priority)
break;
}
list_add_rcu(®->list, elem->list.prev);
mutex_unlock(&nf_hook_mutex);
return 0;
}
nf_hooks是一個二維陣列連結串列
extern struct list_head nf_hooks[NFPROTO_NUMPROTO][NF_MAX_HOOKS];
3、註冊三層、四層相關的處理函式
ip、icmp、udp協議都一樣這裡就舉例tcp協議
呼叫nf_conntrack_l4proto_register函式註冊nf_conntrack_l4proto_tcp4到全域性陣列nf_ct_protos中
static int __init nf_conntrack_l3proto_ipv4_init(void)
{
...
/*註冊tcp協議和連線相關處理函式到nf_ct_protos*/
ret = nf_conntrack_l4proto_register(&nf_conntrack_l4proto_tcp4);
if (ret < 0) {
pr_err("nf_conntrack_ipv4: can't register tcp.\n");
goto cleanup_sockopt;
}
/*註冊udp協議和連線相關處理函式到nf_ct_protos*/
ret = nf_conntrack_l4proto_register(&nf_conntrack_l4proto_udp4);
if (ret < 0) {
pr_err("nf_conntrack_ipv4: can't register udp.\n");
goto cleanup_tcp;
}
/*註冊icmp協議和連線相關處理函式到nf_ct_protos*/
ret = nf_conntrack_l4proto_register(&nf_conntrack_l4proto_icmp);
if (ret < 0) {
pr_err("nf_conntrack_ipv4: can't register icmp.\n");
goto cleanup_udp;
}
/*註冊ip協議和連線相關處理函式到nf_ct_protos*/
ret = nf_conntrack_l3proto_register(&nf_conntrack_l3proto_ipv4);
if (ret < 0) {
pr_err("nf_conntrack_ipv4: can't register ipv4\n");
goto cleanup_icmp;
}
...
}
nf_conntrack_l4proto_register
int nf_conntrack_l4proto_register(struct nf_conntrack_l4proto *l4proto)
{
...
/*註冊到全域性資料nf_ct_protos陣列*/
rcu_assign_pointer(nf_ct_protos[l4proto->l3proto][l4proto->l4proto],
l4proto);
out_unlock:
mutex_unlock(&nf_ct_proto_mutex);
return ret;
}
nf_conntrack_l4proto_tcp4
struct nf_conntrack_l4proto nf_conntrack_l4proto_tcp4 __read_mostly =
{
.l3proto = PF_INET,
.l4proto = IPPROTO_TCP,
.name = "tcp",
/*從tcp協議skb中獲取一個tuple的源埠、目的埠*/
.pkt_to_tuple = tcp_pkt_to_tuple,
/*tcp協議把orig方向的源埠、目的埠賦值給reply方向的目的埠、源埠*/
.invert_tuple = tcp_invert_tuple,
/*列印輸出tuple的源埠、目的埠*/
.print_tuple = tcp_print_tuple,
.print_conntrack = tcp_print_conntrack,
.packet = tcp_packet,
.new = tcp_new,
.error = tcp_error,
#if defined(CONFIG_NF_CT_NETLINK) || defined(CONFIG_NF_CT_NETLINK_MODULE)
/*tuple中的源埠、目的埠轉換為nfnetlink格式填充*/
.to_nlattr = tcp_to_nlattr,
.nlattr_size = tcp_nlattr_size,
/*nfnetlink格式的源埠、目的埠轉換為tuple*/
.from_nlattr = nlattr_to_tcp,
.tuple_to_nlattr = nf_ct_port_tuple_to_nlattr,
.nlattr_to_tuple = nf_ct_port_nlattr_to_tuple,
.nlattr_tuple_size = tcp_nlattr_tuple_size,
.nla_policy = nf_ct_port_nla_policy,
#endif
#ifdef CONFIG_SYSCTL
.ctl_table_users = &tcp_sysctl_table_users,
.ctl_table_header = &tcp_sysctl_header,
.ctl_table = tcp_sysctl_table,
#ifdef CONFIG_NF_CONNTRACK_PROC_COMPAT
.ctl_compat_table = tcp_compat_sysctl_table,
#endif
#endif
};
tcp_pkt_to_tuple從資料包skb中獲取源埠、目的埠
static bool tcp_pkt_to_tuple(const struct sk_buff *skb, unsigned int dataoff,
struct nf_conntrack_tuple *tuple)
{
const struct tcphdr *hp;
struct tcphdr _hdr;
/* Actually only need first 8 bytes. */
hp = skb_header_pointer(skb, dataoff, 8, &_hdr);
if (hp == NULL)
return false;
/*獲取源埠*/
tuple->src.u.tcp.port = hp->source;
/*獲取目的埠*/
tuple->dst.u.tcp.port = hp->dest;
return true;
}
tcp_invert_tuple將orig方向的端賦值給reply方向
static bool tcp_invert_tuple(struct nf_conntrack_tuple *tuple,
const struct nf_conntrack_tuple *orig)
{
/*將orig方向的埠賦值給reply方向*/
tuple->src.u.tcp.port = orig->dst.u.tcp.port;
tuple->dst.u.tcp.port = orig->src.u.tcp.port;
return true;
}