Tcpdump抓包核心程式碼分析
註冊pf_packet協議
.create函式是在PF_PACKET型別socket建立時呼叫,呼叫時註冊了鉤子函式具體看packet_create函式的實現。
static const struct net_proto_familypacket_family_ops = {
.family= PF_PACKET,
.create= packet_create,
.owner = THIS_MODULE,
};
static int __init packet_init(void)
{
…………..
sock_register(&packet_family_ops);
…………..
}
建立SOCK_PACKET sock時註冊回撥函式
/*
* Create a packet of type SOCK_PACKET.
*/
static int packet_create(struct net *net,struct socket *sock, int protocol,
int kern)
{
structsock *sk;
structpacket_sock *po;
__be16proto = (__force __be16)protocol; /* weird, but documented */
interr;
if(!ns_capable(net->user_ns, CAP_NET_RAW))
return-EPERM;
if(sock->type != SOCK_DGRAM && sock->type != SOCK_RAW &&
sock->type != SOCK_PACKET)
return-ESOCKTNOSUPPORT;
sock->state= SS_UNCONNECTED;
err= -ENOBUFS;
sk= sk_alloc(net, PF_PACKET, GFP_KERNEL, &packet_proto);
if(sk == NULL)
gotoout;
sock->ops= &packet_ops;
if(sock->type == SOCK_PACKET)
sock->ops= &packet_ops_spkt;
sock_init_data(sock,sk);
po= pkt_sk(sk);
sk->sk_family= PF_PACKET;
po->num= proto;
err= packet_alloc_pending(po);
if(err)
gotoout2;
packet_cached_dev_reset(po);
sk->sk_destruct= packet_sock_destruct;
sk_refcnt_debug_inc(sk);
/*
* Attacha protocol block
*/
spin_lock_init(&po->bind_lock);
mutex_init(&po->pg_vec_lock);
po->prot_hook.func= packet_rcv;
//註冊處理函式
if (sock->type == SOCK_PACKET)
po->prot_hook.func =packet_rcv_spkt;
po->prot_hook.af_packet_priv= sk;
if (proto) {
po->prot_hook.type =proto;
將這個socket掛載到ptype_all連結串列上
register_prot_hook(sk);
}
mutex_lock(&net->packet.sklist_lock);
sk_add_node_rcu(sk,&net->packet.sklist);
mutex_unlock(&net->packet.sklist_lock);
preempt_disable();
sock_prot_inuse_add(net,&packet_proto, 1);
preempt_enable();
return0;
out2:
sk_free(sk);
out:
returnerr;
}
接收方向核心抓包函式
兩個呼叫場景,一個是網絡卡啟用NAPI,在輪詢流程中呼叫process_backlog;另外一個是非NAPI場景,直接netif_receive_skb接收資料報文,遞交給網路層。
static int __netif_receive_skb_core(structsk_buff *skb, bool pfmemalloc)
{
structpacket_type *ptype, *pt_prev;
rx_handler_func_t*rx_handler;
structnet_device *orig_dev;
structnet_device *null_or_dev;
booldeliver_exact = false;
intret = NET_RX_DROP;
__be16type;
net_timestamp_check(!netdev_tstamp_prequeue,skb);
trace_netif_receive_skb(skb);
orig_dev= skb->dev;
skb_reset_network_header(skb);
if(!skb_transport_header_was_set(skb))
skb_reset_transport_header(skb);
skb_reset_mac_len(skb);
pt_prev= NULL;
another_round:
skb->skb_iif= skb->dev->ifindex;
__this_cpu_inc(softnet_data.processed);
if(skb->protocol == cpu_to_be16(ETH_P_8021Q) ||
skb->protocol ==cpu_to_be16(ETH_P_8021AD)) {
skb= vlan_untag(skb);
if(unlikely(!skb))
gotoout;
}
#ifdef CONFIG_NET_CLS_ACT
if(skb->tc_verd & TC_NCLS) {
skb->tc_verd= CLR_TC_NCLS(skb->tc_verd);
gotoncls;
}
#endif
if(pfmemalloc)
gotoskip_taps;
//遍歷tcpdumpsocket建立時掛載的鉤子
list_for_each_entry_rcu(ptype,&ptype_all, list) {
if (!ptype->dev ||ptype->dev == skb->dev) {
if (pt_prev)
//拷貝資料報文
ret =deliver_skb(skb, pt_prev, orig_dev);
pt_prev = ptype;
}
}
skip_taps:
#ifdef CONFIG_NET_CLS_ACT
skb= handle_ing(skb, &pt_prev, &ret, orig_dev);
if(!skb)
gotoout;
ncls:
#endif
if(pfmemalloc && !skb_pfmemalloc_protocol(skb))
gotodrop;
if(skb_vlan_tag_present(skb)) {
if(pt_prev) {
ret= deliver_skb(skb, pt_prev, orig_dev);
pt_prev= NULL;
}
if(vlan_do_receive(&skb))
gotoanother_round;
elseif (unlikely(!skb))
gotoout;
}
rx_handler= rcu_dereference(skb->dev->rx_handler);
if(rx_handler) {
if(pt_prev) {
ret= deliver_skb(skb, pt_prev, orig_dev);
pt_prev= NULL;
}
switch(rx_handler(&skb)) {
caseRX_HANDLER_CONSUMED:
ret= NET_RX_SUCCESS;
gotoout;
caseRX_HANDLER_ANOTHER:
gotoanother_round;
caseRX_HANDLER_EXACT:
deliver_exact= true;
caseRX_HANDLER_PASS:
break;
default:
BUG();
}
}
if(unlikely(skb_vlan_tag_present(skb))) {
if(skb_vlan_tag_get_id(skb))
skb->pkt_type= PACKET_OTHERHOST;
/*Note: we might in the future use prio bits
* and set skb->priority like invlan_do_receive()
* For the time being, just ignore PriorityCode Point
*/
skb->vlan_tci= 0;
}
/*deliver only exact match when indicated */
null_or_dev= deliver_exact ? skb->dev : NULL;
type= skb->protocol;
//真實的資料報文處理流程,如果是ip那麼呼叫ip_rcv函數了
list_for_each_entry_rcu(ptype,
&ptype_base[ntohs(type)& PTYPE_HASH_MASK], list) {
if (ptype->type == type&&
(ptype->dev == null_or_dev ||ptype->dev == skb->dev ||
ptype->dev == orig_dev)) {
if (pt_prev)
ret =deliver_skb(skb, pt_prev, orig_dev);
pt_prev = ptype;
}
}
if(pt_prev) {
if(unlikely(skb_orphan_frags(skb, GFP_ATOMIC)))
gotodrop;
else
ret= pt_prev->func(skb, skb->dev, pt_prev, orig_dev);
}else {
drop:
atomic_long_inc(&skb->dev->rx_dropped);
kfree_skb(skb);
/*Jamal, now you will not able to escape explaining
* me how you were going to use this. :-)
*/
ret= NET_RX_DROP;
}
out:
returnret;
}
傳送方向核心抓包函式
資料傳送也存在兩個分支,一個是呼叫dev_queue_xmit直接將資料遞交到網絡卡(沒有配置qdisc);另外一個分支是如果配置了qdisc,dev_queue_xmit流程檢查是否配置了queue,如果配置了將呼叫__dev_xmit_skb函式將資料放入到了qdisc佇列中,然後等待發送中斷函式net_tx_action輪詢呼叫,進而觸發拷貝呼叫流程。
/*
* Support routine. Sends outgoing frames toany network
* taps currently in use.
*/
static void dev_queue_xmit_nit(structsk_buff *skb, struct net_device *dev)
{
structpacket_type *ptype;
structsk_buff *skb2 = NULL;
structpacket_type *pt_prev = NULL;
rcu_read_lock();
//遍歷tcpdumpsocket建立時掛載的鉤子
list_for_each_entry_rcu(ptype,&ptype_all, list) {
/* Never send packets back tothe socket
* they originated from - MvS([email protected])
*/
if ((ptype->dev == dev ||!ptype->dev) &&
(!skb_loop_sk(ptype, skb))) {
if (pt_prev) {
//拷貝資料報文
deliver_skb(skb2,pt_prev, skb->dev);
pt_prev =ptype;
continue;
}
skb2 =skb_clone(skb, GFP_ATOMIC);
if (!skb2)
break;
net_timestamp_set(skb2);
/* skb->nh shouldbe correctly
set by sender, so that the second statementis
just protection against buggy protocols.
*/
skb_reset_mac_header(skb2);
if(skb_network_header(skb2) < skb2->data ||
skb_network_header(skb2) >skb_tail_pointer(skb2)) {
net_crit_ratelimited("protocol%04x is buggy, dev %s\n",
ntohs(skb2->protocol),
dev->name);
skb_reset_network_header(skb2);
}
skb2->transport_header= skb2->network_header;
skb2->pkt_type =PACKET_OUTGOING;
pt_prev = ptype;
}
}
if(pt_prev)
pt_prev->func(skb2,skb->dev, pt_prev, skb->dev);
rcu_read_unlock();
}
銷燬SOCK_PACKET sock時註冊回撥
當sock_packet型別 socket 關閉時會呼叫release函式,這時候會摘掉之前的註冊函式
static int packet_release(struct socket*sock)
{
structsock *sk = sock->sk;
structpacket_sock *po;
structnet *net;
uniontpacket_req_u req_u;
if(!sk)
return0;
net= sock_net(sk);
po= pkt_sk(sk);
mutex_lock(&net->packet.sklist_lock);
sk_del_node_init_rcu(sk);
mutex_unlock(&net->packet.sklist_lock);
preempt_disable();
sock_prot_inuse_add(net,sk->sk_prot, -1);
preempt_enable();
spin_lock(&po->bind_lock);
//從ptype_all函式中摘掉註冊的鉤子函式
unregister_prot_hook(sk, false);
packet_cached_dev_reset(po);
if(po->prot_hook.dev) {
dev_put(po->prot_hook.dev);
po->prot_hook.dev= NULL;
}
spin_unlock(&po->bind_lock);
packet_flush_mclist(sk);
if(po->rx_ring.pg_vec) {
memset(&req_u,0, sizeof(req_u));
packet_set_ring(sk,&req_u, 1, 0);
}
if(po->tx_ring.pg_vec) {
memset(&req_u,0, sizeof(req_u));
packet_set_ring(sk,&req_u, 1, 1);
}
fanout_release(sk);
synchronize_net();
/*
* Nowthe socket is dead. No more input will appear.
*/
sock_orphan(sk);
sock->sk= NULL;
/*Purge queues */
skb_queue_purge(&sk->sk_receive_queue);
packet_free_pending(po);
sk_refcnt_debug_release(sk);
sock_put(sk);
return0;
}
總結
Tcpdump抓包時建立SOCK_PACKET型別的socket,並且在socket建立流程時呼叫了packet_family_opspacket_create函式(packet_create),進而將抓包的鉤子函式註冊到ptype_all連結串列,當在資料接收方向__netif_receive_skb_core函式中呼叫註冊的鉤子函式將資料報文拷貝到af_packet.c檔案的具體處理流程函式中;同樣在傳送函式dev_queue_xmit_nit中呼叫鉤子函式實現資料報文拷貝。