1. 程式人生 > >linux網橋--接受資料包

linux網橋--接受資料包

 版權宣告:如有需要,可供轉載,但請註明出處:https://blog.csdn.net/City_of_skey/article/details/85254786

目錄

1、netif_receive_skb

2、handle_bridge

3、br_handle_frame

4、br_handle_frame_finish

5、br_forward

6、br_flood_forward

7、br_pass_frame_up


1、netif_receive_skb

當一個網絡卡收到資料包後機會觸發硬體中斷,然後呼叫網絡卡驅動程式生成skb幷包skb拷貝到CPU輸入佇列,再觸發軟中斷呼叫netif_receive_skb來處理資料包,netif_receive_skb主要呼叫了__netif_receive_skb,這個函式主要做以下四件事

(1)、遍歷嗅探器ptype_all,有註冊函式就拷貝一份資料包,比如tcpdump抓包就是這在ptype_all中註冊了一個函式。

(2)、呼叫handle_bridge處理網橋

(3)、呼叫handle_macvlan處理vlan

(4)、根據協議號在ptype_base查詢三層處理函式,如果是IP協議就呼叫ip_rcv資料包上三層

二三層簡單互動圖

__netif_receive_skb:

static int __netif_receive_skb(struct sk_buff *skb)
{
	struct packet_type *ptype, *pt_prev;
	struct net_device *orig_dev;
	struct net_device *master;
	struct net_device *null_or_orig;
	struct net_device *orig_or_bond;
	int ret = NET_RX_DROP;
	__be16 type;

	if (!netdev_tstamp_prequeue)
		net_timestamp_check(skb);

	if (vlan_tx_tag_present(skb) && vlan_hwaccel_do_receive(skb))
		return NET_RX_SUCCESS;

	/* if we've gotten here through NAPI, check netpoll */
	if (netpoll_receive_skb(skb))
		return NET_RX_DROP;

	if (!skb->skb_iif)
		skb->skb_iif = skb->dev->ifindex;

	/*
	 * bonding note: skbs received on inactive slaves should only
	 * be delivered to pkt handlers that are exact matches.  Also
	 * the deliver_no_wcard flag will be set.  If packet handlers
	 * are sensitive to duplicate packets these skbs will need to
	 * be dropped at the handler.  The vlan accel path may have
	 * already set the deliver_no_wcard flag.
	 */
	null_or_orig = NULL;
	orig_dev = skb->dev;
	master = ACCESS_ONCE(orig_dev->master);
	if (skb->deliver_no_wcard)
		null_or_orig = orig_dev;
	else if (master) {
		if (skb_bond_should_drop(skb, master)) {
			skb->deliver_no_wcard = 1;
			null_or_orig = orig_dev; /* deliver only exact match */
		} else
			skb->dev = master;
	}

	__get_cpu_var(softnet_data).processed++;

	skb_reset_network_header(skb);
	skb_reset_transport_header(skb);
	skb->mac_len = skb->network_header - skb->mac_header;

	pt_prev = NULL;

	rcu_read_lock();

#ifdef CONFIG_NET_CLS_ACT
	if (skb->tc_verd & TC_NCLS) {
		skb->tc_verd = CLR_TC_NCLS(skb->tc_verd);
		goto ncls;
	}
#endif

	/*網路嗅探器比如tcpdump抓包工具,就是
	在ptype_all中註冊一個函式,然後複製一份資料*/
	list_for_each_entry_rcu(ptype, &ptype_all, list) {
		if (ptype->dev == null_or_orig || ptype->dev == skb->dev ||
		    ptype->dev == orig_dev) {
			if (pt_prev)
				ret = deliver_skb(skb, pt_prev, orig_dev);
			pt_prev = ptype;
		}
	}

#ifdef CONFIG_NET_CLS_ACT
	skb = handle_ing(skb, &pt_prev, &ret, orig_dev);
	if (!skb)
		goto out;
ncls:
#endif
	/*網橋處理函式*/
	skb = handle_bridge(skb, &pt_prev, &ret, orig_dev);
	if (!skb)
		goto out;
	/*vlan處理*/
	skb = handle_macvlan(skb, &pt_prev, &ret, orig_dev);
	if (!skb)
		goto out;

	/*
	 * Make sure frames received on VLAN interfaces stacked on
	 * bonding interfaces still make their way to any base bonding
	 * device that may have registered for a specific ptype.  The
	 * handler may have to adjust skb->dev and orig_dev.
	 */
	orig_or_bond = orig_dev;
	if ((skb->dev->priv_flags & IFF_802_1Q_VLAN) &&
	    (vlan_dev_real_dev(skb->dev)->priv_flags & IFF_BONDING)) {
		orig_or_bond = vlan_dev_real_dev(skb->dev);
	}

	/*根據網路協議在ptype_base中查詢三層協議*/
	type = skb->protocol;
	list_for_each_entry_rcu(ptype,
			&ptype_base[ntohs(type) & PTYPE_HASH_MASK], list) {
		if (ptype->type == type && (ptype->dev == null_or_orig ||
		     ptype->dev == skb->dev || ptype->dev == orig_dev ||
		     ptype->dev == orig_or_bond)) {
			if (pt_prev)
				ret = deliver_skb(skb, pt_prev, orig_dev);
			pt_prev = ptype;
		}
	}

	/*資料包上三層*/
	if (pt_prev) {
		/*三層處理函式,ip_rcv*/
		ret = pt_prev->func(skb, skb->dev, pt_prev, orig_dev);
	} else {
		kfree_skb(skb);
		/* Jamal, now you will not able to escape explaining
		 * me how you were going to use this. :-)
		 */
		ret = NET_RX_DROP;
	}

out:
	rcu_read_unlock();
	return ret;
}

2、handle_bridge

handle_bridge首先判斷資料包型別,不屬於網橋裝置或者是環網資料直接返回。然後呼叫br_handle_frame_hook進入網橋處理流程,br_handle_frame_hook在網橋初始化是賦值給函式br_handle_frame。

static inline struct sk_buff *handle_bridge(struct sk_buff *skb,
					    struct packet_type **pt_prev, int *ret,
					    struct net_device *orig_dev)
{
	struct net_bridge_port *port;

	/*資料包是環網或者裝置不屬於任何網橋
	就直接返回*/
	if (skb->pkt_type == PACKET_LOOPBACK ||
	    (port = rcu_dereference(skb->dev->br_port)) == NULL)
		return skb;

	if (*pt_prev) {
		*ret = deliver_skb(skb, *pt_prev, orig_dev);
		*pt_prev = NULL;
	}
	/*網橋處理入口,網橋初始化時複製給函式br_handle_frame*/
	return br_handle_frame_hook(port, skb);
}

3、br_handle_frame

netfileter框架在二層也維持了一個過濾系統,br_handle_frame首先判斷資料包的型別是本機還是轉發。

1、本地資料包首先要過二層netfilter的NF_BR_LOCAL_IN鏈上處理函式,最後呼叫br_handle_local_finish,如果資料包沒有被丟棄,就返回繼續上層協議處理。

2、如果是轉發資料包,首先ebtables查詢路由表的hook處理函式,如果找了就呼叫hook處理選了路由後返回繼續上層協議處理。如果沒有找到就進入二層netfilter框架的NF_BR_PRE_ROUTING鏈處理,處理完畢呼叫br_handle_frame_finish。

struct sk_buff *br_handle_frame(struct net_bridge_port *p, struct sk_buff *skb)
{
	const unsigned char *dest = eth_hdr(skb)->h_dest;
	int (*rhook)(struct sk_buff *skb);

	/*判斷mac地址是否有效,廣播地址和00....00是非法地址*/
	if (!is_valid_ether_addr(eth_hdr(skb)->h_source))
		goto drop;

	/*共享資料包clone一份*/
	skb = skb_share_check(skb, GFP_ATOMIC);
	if (!skb)
		return NULL;

	/*本地資料包*/
	if (unlikely(is_link_local(dest))) {
		/* Pause frames shouldn't be passed up by driver anyway */
		if (skb->protocol == htons(ETH_P_PAUSE))
			goto drop;

		/* If STP is turned off, then forward */
		if (p->br->stp_enabled == BR_NO_STP && dest[5] == 0)
			goto forward;

		/*二層的netfilet框架進入NF_BR_LOCAL_IN鏈上處理函式*/
		if (NF_HOOK(NFPROTO_BRIDGE, NF_BR_LOCAL_IN, skb, skb->dev,
			    NULL, br_handle_local_finish))
			return NULL;	/* frame consumed by filter */
		else
			/*通過二層的netfilter後繼續上層處理資料*/
			return skb;	/* continue processing */
	}
/*轉發處理*/
forward:
	switch (p->state) {
	case BR_STATE_FORWARDING:
		/*ebtables查詢路由的hook點*/
		rhook = rcu_dereference(br_should_route_hook);
		if (rhook != NULL) {
			/*如果找到了就處理,然後繼續資料包上層處理*/
			if (rhook(skb))
				return skb;
			dest = eth_hdr(skb)->h_dest;
		}
		/* fall through */
	case BR_STATE_LEARNING:
		if (!compare_ether_addr(p->br->dev->dev_addr, dest))
			skb->pkt_type = PACKET_HOST;

		/*NF_BR_PRE_ROUTING表處理完畢呼叫br_handle_frame_finish*/
		NF_HOOK(NFPROTO_BRIDGE, NF_BR_PRE_ROUTING, skb, skb->dev, NULL,
			br_handle_frame_finish);
		break;
	default:
drop:
		kfree_skb(skb);
	}
	return NULL;
}

4、br_handle_frame_finish

br_handle_frame_finish是經過netfilter的NF_BR_PRE_ROUTING鏈處理後呼叫,主要做以下事情:

(1)判斷橋埠是否是開啟的,如果沒有開啟就直接返回

(2)呼叫br_fdb_update做mac地址學習

(3)判斷目的地址型別如果是多播地址就呼叫br_multicast_forward轉發

(4)根據目的地址查詢埠的轉發表,如果找到了就呼叫br_forward從此埠轉發出去,如果沒有找到對應的埠就呼叫br_flood_forward每個埠轉發一份

/* note: already called with rcu_read_lock */
int br_handle_frame_finish(struct sk_buff *skb)
{
	const unsigned char *dest = eth_hdr(skb)->h_dest;
	struct net_bridge_port *p = rcu_dereference(skb->dev->br_port);
	struct net_bridge *br;
	struct net_bridge_fdb_entry *dst;
	struct net_bridge_mdb_entry *mdst;
	struct sk_buff *skb2;

	/*橋埠是否是開啟狀況*/	
	if (!p || p->state == BR_STATE_DISABLED)
		goto drop;

	/* insert into forwarding database after filtering to avoid spoofing */
	br = p->br;
	/*mac地址學習,更新埠-MAC表*/
	br_fdb_update(br, p, eth_hdr(skb)->h_source);

	if (is_multicast_ether_addr(dest) &&
	    br_multicast_rcv(br, p, skb))
		goto drop;

	if (p->state == BR_STATE_LEARNING)
		goto drop;

	BR_INPUT_SKB_CB(skb)->brdev = br->dev;

	/* The packet skb2 goes to the local host (NULL to skip). */
	skb2 = NULL;

	if (br->dev->flags & IFF_PROMISC)
		skb2 = skb;

	dst = NULL;

	/*目的地址是一個多播地址*/
	if (is_multicast_ether_addr(dest)) {
		mdst = br_mdb_get(br, skb);
		if (mdst || BR_INPUT_SKB_CB_MROUTERS_ONLY(skb)) {
			if ((mdst && !hlist_unhashed(&mdst->mglist)) ||
			    br_multicast_is_router(br))
				skb2 = skb;
			/*多播轉發處理*/
			br_multicast_forward(mdst, skb, skb2);
			skb = NULL;
			if (!skb2)
				goto out;
		} else
			skb2 = skb;

		br->dev->stats.multicast++;
		/*根據mac地址獲取對應的埠-MAC表*/
	} else if ((dst = __br_fdb_get(br, dest)) && dst->is_local) {
		skb2 = skb;
		/* Do not forward the packet since it's local. */
		skb = NULL;
	}

	if (skb) {
		if (dst)
			/*找到了mac對應的埠就轉發*/
			br_forward(dst->dst, skb, skb2);
		else
			/*轉發表中沒有找到該mac對應的埠就每個埠轉發一份*/
			br_flood_forward(br, skb, skb2);
	}

    /*副本資料發完本地,走三層轉發*/
	if (skb2)
		return br_pass_frame_up(skb2);

out:
	return 0;
drop:
	kfree_skb(skb);
	goto out;
}

5、br_forward

網橋轉發處理走br_forward,最終處理的是__br_forward函式

/* called with rcu_read_lock */
void br_forward(const struct net_bridge_port *to, struct sk_buff *skb, struct sk_buff *skb0)
{
	if (should_deliver(to, skb)) {
		if (skb0)
			deliver_clone(to, skb, __br_forward);
		else
			__br_forward(to, skb);
		return;
	}

	if (!skb0)
		kfree_skb(skb);
}

__br_forward函式資料包先通過二層netfilter框架的NF_BR_FORWARD鏈處理再呼叫br_forward_finish函式繼續。

__br_forward:

static void __br_forward(const struct net_bridge_port *to, struct sk_buff *skb)
{
	struct net_device *indev;

	if (skb_warn_if_lro(skb)) {
		kfree_skb(skb);
		return;
	}

	indev = skb->dev;
	skb->dev = to->dev;
	skb_forward_csum(skb);

	NF_HOOK(NFPROTO_BRIDGE, NF_BR_FORWARD, skb, indev, skb->dev,
		br_forward_finish);
}

br_forward_finish函式要通過NF_BR_LOCAL_OUT鏈的過濾處理在呼叫br_dev_queue_push_xmit繼續處理。

br_forward_finish:

int br_forward_finish(struct sk_buff *skb)
{
	return NF_HOOK(NFPROTO_BRIDGE, NF_BR_POST_ROUTING, skb, NULL, skb->dev,
		       br_dev_queue_push_xmit);

}

br_dev_queue_push_xmit函式最終呼叫dev_queue_xmit通過網絡卡驅動將資料包傳送出去。

br_dev_queue_push_xmit:

int br_dev_queue_push_xmit(struct sk_buff *skb)
{
	/* drop mtu oversized packets except gso */
	if (packet_length(skb) > skb->dev->mtu && !skb_is_gso(skb))
		kfree_skb(skb);
	else {
		/* ip_fragment doesn't copy the MAC header */
		if (nf_bridge_maybe_copy_header(skb))
			kfree_skb(skb);
		else {
			skb_push(skb, ETH_HLEN);
			dev_queue_xmit(skb);		//驅動轉發出去
		}
	}

	return 0;
}

6、br_flood_forward

br_flood_forward處理方法是遍歷網橋下面的所有埠,每個埠呼叫__br_forward傳送一次。

/* called under bridge lock */
void br_flood_forward(struct net_bridge *br, struct sk_buff *skb,
		      struct sk_buff *skb2)
{
	br_flood(br, skb, skb2, __br_forward);
}

br_flood:

/* called under bridge lock */
static void br_flood(struct net_bridge *br, struct sk_buff *skb,
		     struct sk_buff *skb0,
		     void (*__packet_hook)(const struct net_bridge_port *p,
					   struct sk_buff *skb))
{
	struct net_bridge_port *p;
	struct net_bridge_port *prev;

	prev = NULL;

	/*遍歷網橋下埠連結串列port_list每個埠呼叫__br_forward傳送一次*/
	list_for_each_entry_rcu(p, &br->port_list, list) {
		prev = maybe_deliver(prev, p, skb, __packet_hook);
		if (IS_ERR(prev))
			goto out;
	}

	if (!prev)
		goto out;

	if (skb0)
		deliver_clone(prev, skb, __packet_hook);
	else
		__packet_hook(prev, skb);
	return;

out:
	if (!skb0)
		kfree_skb(skb);
}

7、br_pass_frame_up

br_pass_frame_up會複製一份副本再次進入netif_receive_skb函式走三層轉發處理,不過這次不會在進入handle_bridge,因為skb->dev是網橋裝置而不是一個埠裝置。

static int br_pass_frame_up(struct sk_buff *skb)
{
	struct net_device *indev, *brdev = BR_INPUT_SKB_CB(skb)->brdev;
	struct net_bridge *br = netdev_priv(brdev);
	struct br_cpu_netstats *brstats = this_cpu_ptr(br->stats);

	/*統計網橋收包數*/
	brstats->rx_packets++;
	brstats->rx_bytes += skb->len;

	/*將資料包的裝置改成網橋的裝置*/
	indev = skb->dev;
	skb->dev = brdev;

	/*通過NF_BR_LOCAL_IN鏈處理再次進入netif_receive_skb函式*/
	return NF_HOOK(NFPROTO_BRIDGE, NF_BR_LOCAL_IN, skb, indev, NULL,
		       netif_receive_skb);
}