netfilter連結跟蹤實現之nf_conntrack_in函式
1、資料包方向
要分析連線連結跟蹤的實現我們就要先分析資料包在協議棧中的方向,總的來說主要分為三個方向:本機轉發的資料包、本機接受的資料包、本機產生的資料包,我們之前分析了連線跟蹤只在四個鏈上註冊了鉤子函式,分別是PRE_ROUTING鏈、OUT鏈、LOCAL_IN鏈、POST_ROUTING鏈。PRE_ROUTING鏈上註冊的是ipv4_conntrack_in,OUT鏈上註冊的是ipv4_conntrack_local。LOCAL_IN鏈、POST_ROUTING鏈上註冊的是ipv4_confirm。其中PRE_ROUTING鏈、OUTING鏈是連線跟蹤的入口。LOCAL_IN、POST_ROUTING是連結跟蹤的出口。
static struct nf_hook_ops ipv4_conntrack_ops[] __read_mostly = { { /*剛進入netfilter框架在第一個PREROUTEING鏈上建立連線跟蹤*/ .hook = ipv4_conntrack_in, .owner = THIS_MODULE, .pf = NFPROTO_IPV4, .hooknum = NF_INET_PRE_ROUTING, .priority = NF_IP_PRI_CONNTRACK, }, { /*本機產生的資料包在OUT鏈上建立連線跟蹤*/ .hook = ipv4_conntrack_local, .owner = THIS_MODULE, .pf = NFPROTO_IPV4, .hooknum = NF_INET_LOCAL_OUT, .priority = NF_IP_PRI_CONNTRACK, }, { /*資料包最後出去在POSTROUTING鏈上連線跟蹤確認*/ .hook = ipv4_confirm, .owner = THIS_MODULE, .pf = NFPROTO_IPV4, .hooknum = NF_INET_POST_ROUTING, .priority = NF_IP_PRI_CONNTRACK_CONFIRM, }, { /*在LOCAL_IN鏈進入本機的資料連線跟蹤確認*/ .hook = ipv4_confirm, .owner = THIS_MODULE, .pf = NFPROTO_IPV4, .hooknum = NF_INET_LOCAL_IN, .priority = NF_IP_PRI_CONNTRACK_CONFIRM, }, };
ip分片的hook函式
static struct nf_hook_ops ipv4_defrag_ops[] = { { /*對資料進行分片*/ .hook = ipv4_conntrack_defrag, .owner = THIS_MODULE, .pf = PF_INET, .hooknum = NF_INET_PRE_ROUTING, .priority = NF_IP_PRI_CONNTRACK_DEFRAG, }, { .hook = ipv4_conntrack_defrag, .owner = THIS_MODULE, .pf = PF_INET, .hooknum = NF_INET_LOCAL_OUT, .priority = NF_IP_PRI_CONNTRACK_DEFRAG, }, };
1.1、本機轉發資料包
本機轉發的資料包要經過PRE_ROUTING鏈、POST_ROUTING鏈,本機轉發要經過的hook函式是ipv4_conntrack_defrag->ipv4_conntrack_in->ipv4_confirm,ipv4_conntrack_defrag主要做資料包分片、ipv4_conntrack_in建立一條連線跟蹤選項,
ipv4_confirm確認一條連結跟蹤選項。
1.2、發往本機的資料包
發往本機的資料包經過了PRE_ROUTING鏈、LOCAL_IN鏈,經過的hook函式是ipv4_conntrack_defrag->ipv4_conntrack_in->ipv4_confirm。
1.3、本機產生的資料包
本機產生的資料包經過了OUTING鏈、POST_ROUTING鏈,經過的hook函式是ipv4_conntrack_defrag->ipv4_conntrack_local->ipv4_confirm。
2、ipv4_conntrack_defrag
ipv4_conntrack_defrag主要是做ip分片,先檢查skb->nfct選項是否為空不為空就說明該資料包的連結跟蹤選項已經建立,就直接返回。沒有就呼叫nf_ct_ipv4_gather_frags做ip分片。
static unsigned int ipv4_conntrack_defrag(unsigned int hooknum,
struct sk_buff *skb,
const struct net_device *in,
const struct net_device *out,
int (*okfn)(struct sk_buff *))
{
...
/*該資料包的連線跟蹤選項已經建立就直接返回*/
if (skb->nfct && !nf_ct_is_template((struct nf_conn *)skb->nfct))
return NF_ACCEPT;
#endif
#endif
/* Gather fragments. */
if (ip_hdr(skb)->frag_off & htons(IP_MF | IP_OFFSET)) {
enum ip_defrag_users user = nf_ct_defrag_user(hooknum, skb);
//資料包分片
if (nf_ct_ipv4_gather_frags(skb, user))
return NF_STOLEN;
}
return NF_ACCEPT;
}
nf_ct_ipv4_gather_frags最終呼叫ip_defrag做資料分片
/* Returns new sk_buff, or NULL */
static int nf_ct_ipv4_gather_frags(struct sk_buff *skb, u_int32_t user)
{
...
/*對資料包分片重組*/
err = ip_defrag(skb, user);
...
}
3、ipv4_conntrack_in
ipv4_conntrack_in實際是呼叫的nf_conntrack_in,下面我們來分析nf_conntrack_in,ipvr_conntrack_local函式也是呼叫nf_conntrack_in,這個函式是在PRE_ROUTING鏈和OUTING鏈呼叫,PRE_ROUTING鏈、OUTING鏈數netfilter的兩個入口鏈,呼叫這個函式主要是初始化一條連結、更新連結狀態。
3.1、判斷sk->nfct
首先判斷skb->nfct不為NULl而且nf_ct_is_template為NULL說明資料包已經建立了連線跟蹤選項,就直接返回
unsigned int
nf_conntrack_in(struct net *net, u_int8_t pf, unsigned int hooknum,
struct sk_buff *skb)
{
...
/*nfct不為NULL說明已經建立連線跟蹤選項*/
if (skb->nfct) {
/* Previously seen (loopback or untracked)? Ignore. */
tmpl = (struct nf_conn *)skb->nfct;
if (!nf_ct_is_template(tmpl)) {
NF_CT_STAT_INC_ATOMIC(net, ignore);
return NF_ACCEPT;
}
skb->nfct = NULL;
}
...
}
行內函數nf_ct_is_template實際就是判斷不是IPS_TEMPLATE_BIT
static inline int nf_ct_is_template(const struct nf_conn *ct)
{
return test_bit(IPS_TEMPLATE_BIT, &ct->status);
}
3.2、獲取協議號
首先呼叫__nf_ct_l3proto_find根據三層協議號在nf_ct_l3protos獲取之前註冊的struct nf_conntrack_l3proto例項,然後呼叫struct nf_conntrack_l3proto結構體中的get_l4proto函式後去四層協議號。最後根據三層協議pf、四層協議號protonum在nf_ct_protos獲取之前註冊的struct nf_conntrack_l4proto例項
unsigned int
nf_conntrack_in(struct net *net, u_int8_t pf, unsigned int hooknum,
struct sk_buff *skb)
{
...
/* rcu_read_lock()ed by nf_hook_slow */
/*根據三層協議號在nf_ct_l3protos陣列中尋找三層struct nf_conntrack_l3proto例項*/
l3proto = __nf_ct_l3proto_find(pf);
/*獲取四層協議號*/
ret = l3proto->get_l4proto(skb, skb_network_offset(skb),
&dataoff, &protonum);
if (ret <= 0) {
pr_debug("not prepared to track yet or error occured\n");
NF_CT_STAT_INC_ATOMIC(net, error);
NF_CT_STAT_INC_ATOMIC(net, invalid);
ret = -ret;
goto out;
}
/*根據三層協議號、四層協議號獲取四層struct nf_conntrack_l4proto例項*/
l4proto = __nf_ct_l4proto_find(pf, protonum);
...
}
3.3 resolve_normal_ct
呼叫resolve_normal_ct獲取struct nf_conn結構體和連結狀態在reply方向資料包標誌set_reply
...
/*從tuple hash表中獲取struct nf_conn結構體和reply方向資料包標誌*/
ct = resolve_normal_ct(net, tmpl, skb, dataoff, pf, protonum,
l3proto, l4proto, &set_reply, &ctinfo);
...
resolve_nomal_ct函式做以下事情
(1)獲取tuple
呼叫nf_ct_get_tuple獲取資料包的tuple,然後在hash表中查詢這個tuple如果沒有找到就新建一個並加入到unconfirmed連結串列中
static inline struct nf_conn *
resolve_normal_ct(struct net *net, struct nf_conn *tmpl,
...)
{
...
//獲取tuple
if (!nf_ct_get_tuple(skb, skb_network_offset(skb),
dataoff, l3num, protonum, &tuple, l3proto,
l4proto)) {
pr_debug("resolve_normal_ct: Can't get tuple\n");
return NULL;
}
...
}
nf_ct_get_tuple函式主要根據協議號呼叫pkt_to_tuple生成一個tuple,tcp/udp協議就是生成五元組(源ip、目的ip、源埠、目的埠、協議號),icmp協議就是(id、code、type)。
bool
nf_ct_get_tuple(const struct sk_buff *skb,
...)
{
memset(tuple, 0, sizeof(*tuple));
tuple->src.l3num = l3num;
/*三層協議從skb中獲取源ip、目的ip儲存到tuple*/
if (l3proto->pkt_to_tuple(skb, nhoff, tuple) == 0)
return false;
tuple->dst.protonum = protonum;
/*方向orig*/
tuple->dst.dir = IP_CT_DIR_ORIGINAL;
/*四層協議tcp/udp後去源埠、目的埠儲存到tuple
如果是icmp就獲取type、code、id*/
return l4proto->pkt_to_tuple(skb, dataoff, tuple);
}
(3)判斷tuple是否存在
呼叫nf_conntrack_find_get在hash表中查詢前面的tuple是否存在,如果不存在就呼叫init_conntrack新建一個tuple
static inline struct nf_conn *
resolve_normal_ct(struct net *net, struct nf_conn *tmpl,
...)
{...
//hash表中查詢tuple
/* look for tuple match */
h = nf_conntrack_find_get(net, zone, &tuple);
if (!h) {
//沒有找到就新建一個tuple
h = init_conntrack(net, tmpl, &tuple, l3proto, l4proto,
skb, dataoff);
if (!h)
return NULL;
if (IS_ERR(h))
return (void *)h;
}
//獲取連線跟蹤結構體
ct = nf_ct_tuplehash_to_ctrack(h);
...
}
nf_conntrack_find_get
struct nf_conntrack_tuple_hash *
nf_conntrack_find_get(struct net *net, u16 zone,
const struct nf_conntrack_tuple *tuple)
{
...
begin:
/*查詢tuple*/
h = __nf_conntrack_find(net, zone, tuple);
...
}
__nf_conntrack_find
struct nf_conntrack_tuple_hash *
__nf_conntrack_find(struct net *net, u16 zone,
const struct nf_conntrack_tuple *tuple)
{
...
/*根據tuple元素算出hash值*/
unsigned int hash = hash_conntrack(net, zone, tuple);
...
begin:
/*遍歷連結串列查詢tuple*/
hlist_nulls_for_each_entry_rcu(h, n, &net->ct.hash[hash], hnnode) {
if (nf_ct_tuple_equal(tuple, &h->tuple) &&
nf_ct_zone(nf_ct_tuplehash_to_ctrack(h)) == zone) {
NF_CT_STAT_INC(net, found);
local_bh_enable();
return h;
}
NF_CT_STAT_INC(net, searched);
}
...
}
init_conntrack
init_conntrack(struct net *net, struct nf_conn *tmpl,
const struct nf_conntrack_tuple *tuple,
struct nf_conntrack_l3proto *l3proto,
struct nf_conntrack_l4proto *l4proto,
struct sk_buff *skb,
unsigned int dataoff)
{
...
/*tuplehash的reply方向的tuple賦值,起始就是orig方向
的反過來*/
if (!nf_ct_invert_tuple(&repl_tuple, tuple, l3proto, l4proto)) {
pr_debug("Can't invert tuple.\n");
return NULL;
}
/*分配一個nf_conn結構體*/
ct = nf_conntrack_alloc(net, zone, tuple, &repl_tuple, GFP_ATOMIC);
if (IS_ERR(ct)) {
pr_debug("Can't allocate conntrack.\n");
return (struct nf_conntrack_tuple_hash *)ct;
}
/*對nf_conn進行四層協議的初始化*/
if (!l4proto->new(ct, skb, dataoff)) {
nf_conntrack_free(ct);
pr_debug("init conntrack: can't track with proto module\n");
return NULL;
}
nf_ct_acct_ext_add(ct, GFP_ATOMIC);
ecache = tmpl ? nf_ct_ecache_find(tmpl) : NULL;
nf_ct_ecache_ext_add(ct, ecache ? ecache->ctmask : 0,
ecache ? ecache->expmask : 0,
GFP_ATOMIC);
spin_lock_bh(&nf_conntrack_lock);
/*查詢是否是已建立連線的期望連線*/
exp = nf_ct_find_expectation(net, zone, tuple);
if (exp) {
pr_debug("conntrack: expectation arrives ct=%p exp=%p\n",
ct, exp);
/* Welcome, Mr. Bond. We've been expecting you... */
/*如果是期望連線設定IPS_EXPECTED_BIT標誌位
並且給ct->master賦值期望*/
__set_bit(IPS_EXPECTED_BIT, &ct->status);
ct->master = exp->master;
if (exp->helper) {
help = nf_ct_helper_ext_add(ct, GFP_ATOMIC);
if (help)
rcu_assign_pointer(help->helper, exp->helper);
}
#ifdef CONFIG_NF_CONNTRACK_MARK
ct->mark = exp->master->mark;
#endif
#ifdef CONFIG_NF_CONNTRACK_SECMARK
ct->secmark = exp->master->secmark;
#endif
nf_conntrack_get(&ct->master->ct_general);
NF_CT_STAT_INC(net, expect_new);
} else {
__nf_ct_try_assign_helper(ct, tmpl, GFP_ATOMIC);
NF_CT_STAT_INC(net, new);
}
/* Overload tuple linked list to put us in unconfirmed list. */
/*將新建立的連線tuple加入到unconfirmed表中*/
hlist_nulls_add_head_rcu(&ct->tuplehash[IP_CT_DIR_ORIGINAL].hnnode,
&net->ct.unconfirmed);
spin_unlock_bh(&nf_conntrack_lock);
if (exp) {
if (exp->expectfn)
exp->expectfn(ct, exp);
nf_ct_expect_put(exp);
}
return &ct->tuplehash[IP_CT_DIR_ORIGINAL];
}
init_conntrack首先呼叫nf_ct_invert_tuple初始化tuplehash的reply方向的tuple。呼叫nf_conntrack_alloc分配一個struct nf_conn,然後判斷是是否是已建立連結的期望連結,如果是就設定連結狀態標誌IPS_EXPECTED_BIT,然後把nf_conn加入到unconfirmed表中。
nf_ct_invert_tuple呼叫invert_tuple初始化reply方向的tuple。就是orig方向的反過來
bool
nf_ct_invert_tuple(struct nf_conntrack_tuple *inverse,
const struct nf_conntrack_tuple *orig,
const struct nf_conntrack_l3proto *l3proto,
const struct nf_conntrack_l4proto *l4proto)
{
memset(inverse, 0, sizeof(*inverse));
inverse->src.l3num = orig->src.l3num;
/*三層reply方向的初始化*/
if (l3proto->invert_tuple(inverse, orig) == 0)
return false;
inverse->dst.dir = !orig->dst.dir;
inverse->dst.protonum = orig->dst.protonum;
/*四層reply方向的tuple初始化*/
return l4proto->invert_tuple(inverse, orig);
}
nf_conntrack_alloc分配struct nf_conn要判斷當前連結總數是否大於最大值nf_conntrack_max,如果大於,則根據tuple算出hash值,對於連線跟蹤項的status的 IPS_ASSURED_BIT位沒有被置位的連線跟蹤項,則強制刪除。
struct nf_conn *nf_conntrack_alloc(struct net *net, u16 zone,
const struct nf_conntrack_tuple *orig,
const struct nf_conntrack_tuple *repl,
gfp_t gfp)
{
...
/*連線跟蹤數量已經超過最大值nf_conntrack_max
根據tuple算出hash值,對於連線跟蹤項的status的
IPS_ASSURED_BIT位沒有被置位的連線跟蹤項,則強制刪除。*/
if (nf_conntrack_max &&
unlikely(atomic_read(&net->ct.count) > nf_conntrack_max)) {
unsigned int hash = hash_conntrack(net, zone, orig);
if (!early_drop(net, hash)) {
atomic_dec(&net->ct.count);
if (net_ratelimit())
printk(KERN_WARNING
"nf_conntrack: table full, dropping"
" packet.\n");
return ERR_PTR(-ENOMEM);
}
}
/*為struct nf_conn分配空間*/
ct = kmem_cache_alloc(net->ct.nf_conntrack_cachep, gfp);
if (ct == NULL) {
pr_debug("nf_conntrack_alloc: Can't alloc conntrack.\n");
atomic_dec(&net->ct.count);
return ERR_PTR(-ENOMEM);
}
...
}
(4)、獲取struct nf_conn
static inline struct nf_conn *
resolve_normal_ct(struct net *net, struct nf_conn *tmpl,
...)
{...
//根據tuple得到nf_conn
ct = nf_ct_tuplehash_to_ctrack(h);
...
}
根據hash值獲取struct nf_conn
nf_ct_tuplehash_to_ctrack(const struct nf_conntrack_tuple_hash *hash)
{
return container_of(hash, struct nf_conn,
tuplehash[hash->tuple.dst.dir]);
}
(5)設定連結狀態
連結狀態要分以下四種情況:
a)、如果資料包的方向是reply,說明連結的兩個方向都有資料包,就設定資料包狀態為IP_CT_ESTABLISHED + IP_CT_IS_REPLY,並且將set_reply設定為1,表示reply方向有資料了
b)、資料包的方向是orig,但已經收到了reply方向的資料包就將設定資料包狀態為IP_CT_ESTABLISHED。
c)、資料包的方向是orig,還沒有收到reply方向的資料,是一個期望連結就設定資料包期望連結標誌IPS_EXPECTED_BIT
d)、資料包方向是orig,還沒有收到reply方向的資料,而且不是一個期望連結,就設定資料包狀態為IP_CT_NEW
static inline struct nf_conn *
resolve_normal_ct(struct net *net, struct nf_conn *tmpl,
...)
{
...
/* It exists; we have (non-exclusive) reference. */
/*資料包是reply方向表名連線雙向已經建立
設定資料包的狀態為IP_CT_ESTABLISHED + IP_CT_IS_REPLY*/
if (NF_CT_DIRECTION(h) == IP_CT_DIR_REPLY) {
*ctinfo = IP_CT_ESTABLISHED + IP_CT_IS_REPLY;
/* Please set reply bit if this packet OK */
*set_reply = 1;
} else {
/* Once we've had two way comms, always ESTABLISHED. */
/*資料包是orig方向,以及收到reply方向的資料則
設定資料包狀態為IP_CT_ESTABLISHED*/
if (test_bit(IPS_SEEN_REPLY_BIT, &ct->status)) {
pr_debug("nf_conntrack_in: normal packet for %p\n", ct);
//兩個方向都已經建立了
*ctinfo = IP_CT_ESTABLISHED;
/*還沒有收到reply方向資料包,是一個期望連線設定
資料包狀態為IP_CT_RELATED*/
} else if (test_bit(IPS_EXPECTED_BIT, &ct->status)) {
pr_debug("nf_conntrack_in: related packet for %p\n",
ct);
*ctinfo = IP_CT_RELATED;
} else {
pr_debug("nf_conntrack_in: new packet for %p\n", ct);
/*沒有收到relply方向的資料包,而且不是期望連線
設定資料包狀態為IP_CT_NEW*/
*ctinfo = IP_CT_NEW;
}
*set_reply = 0;
}
...
}
resolve_normal_ct函式程式碼:
static inline struct nf_conn *
resolve_normal_ct(struct net *net, struct nf_conn *tmpl,
struct sk_buff *skb,
unsigned int dataoff,
u_int16_t l3num,
u_int8_t protonum,
struct nf_conntrack_l3proto *l3proto,
struct nf_conntrack_l4proto *l4proto,
int *set_reply,
enum ip_conntrack_info *ctinfo)
{
struct nf_conntrack_tuple tuple;
struct nf_conntrack_tuple_hash *h;
struct nf_conn *ct;
u16 zone = tmpl ? nf_ct_zone(tmpl) : NF_CT_DEFAULT_ZONE;
//獲取tuple
if (!nf_ct_get_tuple(skb, skb_network_offset(skb),
dataoff, l3num, protonum, &tuple, l3proto,
l4proto)) {
pr_debug("resolve_normal_ct: Can't get tuple\n");
return NULL;
}
//hash表中查詢tuple
/* look for tuple match */
h = nf_conntrack_find_get(net, zone, &tuple);
if (!h) {
//沒有找到就新建一個tuple
h = init_conntrack(net, tmpl, &tuple, l3proto, l4proto,
skb, dataoff);
if (!h)
return NULL;
if (IS_ERR(h))
return (void *)h;
}
//根據tuple得到nf_conn
ct = nf_ct_tuplehash_to_ctrack(h);
/* It exists; we have (non-exclusive) reference. */
/*資料包是reply方向表名連線雙向已經建立
設定資料包的狀態為IP_CT_ESTABLISHED + IP_CT_IS_REPLY*/
if (NF_CT_DIRECTION(h) == IP_CT_DIR_REPLY) {
*ctinfo = IP_CT_ESTABLISHED + IP_CT_IS_REPLY;
/* Please set reply bit if this packet OK */
*set_reply = 1;
} else {
/* Once we've had two way comms, always ESTABLISHED. */
/*資料包是orig方向,以及收到reply方向的資料則
設定連線跟蹤狀態為IP_CT_ESTABLISHED*/
if (test_bit(IPS_SEEN_REPLY_BIT, &ct->status)) {
pr_debug("nf_conntrack_in: normal packet for %p\n", ct);
//兩個方向都已經建立了
*ctinfo = IP_CT_ESTABLISHED;
/*還沒有收到reply方向資料包,是一個期望連線設定
連線跟蹤狀態為IP_CT_RELATED*/
} else if (test_bit(IPS_EXPECTED_BIT, &ct->status)) {
pr_debug("nf_conntrack_in: related packet for %p\n",
ct);
*ctinfo = IP_CT_RELATED;
} else {
pr_debug("nf_conntrack_in: new packet for %p\n", ct);
/*沒有收到relply方向的資料包,而且不是期望連線
設定連線狀態為IP_CT_NEW*/
*ctinfo = IP_CT_NEW;
}
*set_reply = 0;
}
skb->nfct = &ct->ct_general;
skb->nfctinfo = *ctinfo;
return ct;
}
3.4 reply方向資料包處理
呼叫resolve_normal_ct可以得到該資料包是否是reply方向的,如果是set_reply就會設定1,設定連結狀態為IPS_SEEN_REPLY_BIT,呼叫nf_conntrack_event_cache處理狀態改變事件
unsigned int
nf_conntrack_in(struct net *net, u_int8_t pf, unsigned int hooknum,
struct sk_buff *skb)
{
...
/*當在reply方向收到資料包後設置連結狀態為IPS_SEEN_REPLY_BIT
狀態改變呼叫nf_conntrack_event_cache ,由nfnetlink模組處理狀態改變的事件*/
if (set_reply && !test_and_set_bit(IPS_SEEN_REPLY_BIT, &ct->status))
nf_conntrack_event_cache(IPCT_REPLY, ct);
}
nf_conntrack_in程式碼:
unsigned int
nf_conntrack_in(struct net *net, u_int8_t pf, unsigned int hooknum,
struct sk_buff *skb)
{
struct nf_conn *ct, *tmpl = NULL;
enum ip_conntrack_info ctinfo;
struct nf_conntrack_l3proto *l3proto;
struct nf_conntrack_l4proto *l4proto;
unsigned int dataoff;
u_int8_t protonum;
int set_reply = 0;
int ret;
/*nfct不為NULL說明已經建立連線跟蹤選項*/
if (skb->nfct) {
/* Previously seen (loopback or untracked)? Ignore. */
tmpl = (struct nf_conn *)skb->nfct;
if (!nf_ct_is_template(tmpl)) {
NF_CT_STAT_INC_ATOMIC(net, ignore);
return NF_ACCEPT;
}
skb->nfct = NULL;
}
/* rcu_read_lock()ed by nf_hook_slow */
/*根據三層協議號在nf_ct_l3protos陣列中尋找三層struct nf_conntrack_l3proto例項*/
l3proto = __nf_ct_l3proto_find(pf);
/*獲取四層協議號*/
ret = l3proto->get_l4proto(skb, skb_network_offset(skb),
&dataoff, &protonum);
if (ret <= 0) {
pr_debug("not prepared to track yet or error occured\n");
NF_CT_STAT_INC_ATOMIC(net, error);
NF_CT_STAT_INC_ATOMIC(net, invalid);
ret = -ret;
goto out;
}
/*根據三層協議號、四層協議號獲取四層struct nf_conntrack_l4proto例項*/
l4proto = __nf_ct_l4proto_find(pf, protonum);
/* It may be an special packet, error, unclean...
* inverse of the return code tells to the netfilter
* core what to do with the packet. */
if (l4proto->error != NULL) {
ret = l4proto->error(net, tmpl, skb, dataoff, &ctinfo,
pf, hooknum);
if (ret <= 0) {
NF_CT_STAT_INC_ATOMIC(net, error);
NF_CT_STAT_INC_ATOMIC(net, invalid);
ret = -ret;
goto out;
}
}
/*從tuple hash表中獲取struct nf_conn結構體和reply方向資料標誌*/
ct = resolve_normal_ct(net, tmpl, skb, dataoff, pf, protonum,
l3proto, l4proto, &set_reply, &ctinfo);
if (!ct) {
/* Not valid part of a connection */
NF_CT_STAT_INC_ATOMIC(net, invalid);
ret = NF_ACCEPT;
goto out;
}
if (IS_ERR(ct)) {
/* Too stressed to deal. */
NF_CT_STAT_INC_ATOMIC(net, drop);
ret = NF_DROP;
goto out;
}
NF_CT_ASSERT(skb->nfct);
/*填充tuple結構中四層的元素*/
ret = l4proto->packet(ct, skb, dataoff, ctinfo, pf, hooknum);
if (ret <= 0) {
/* Invalid: inverse of the return code tells
* the netfilter core what to do */
pr_debug("nf_conntrack_in: Can't track with proto module\n");
nf_conntrack_put(skb->nfct);
skb->nfct = NULL;
NF_CT_STAT_INC_ATOMIC(net, invalid);
if (ret == -NF_DROP)
NF_CT_STAT_INC_ATOMIC(net, drop);
ret = -ret;
goto out;
}
/*當在reply方向收到資料包後設置連結狀態為IPS_SEEN_REPLY_BIT
狀態改變呼叫nf_conntrack_event_cache ,由nfnetlink模組處理狀態改變的事件*/
if (set_reply && !test_and_set_bit(IPS_SEEN_REPLY_BIT, &ct->status))
nf_conntrack_event_cache(IPCT_REPLY, ct);
out:
if (tmpl)
nf_ct_put(tmpl);
return ret;
}