1. 程式人生 > >linux網橋--簡介

linux網橋--簡介

版權宣告:如有需要,可供轉載,但請註明出處:https://blog.csdn.net/City_of_skey/article/details/85240141

目錄

 

1、網橋簡介

 

2、網橋初始化

2.1 struct net_bridge

2.2 struct net_bridge_port

2.3 struct net_bridge_fdb_entry

2.4 br_init

3、新建網橋

3.1、br_add_bridge

3.2、new_bridge_dev

3.3、br_dev_setup

4、新增埠

4.1、add_del_if

4.2、br_add_if


1、網橋簡介

linux下的網橋是一個虛擬裝置,本質上說是一個高層次的二層虛擬裝置,它把其他的從裝置虛擬為一個port。當把一個網絡卡裝置加入的網橋後,網橋的mac地址就是下面所有網絡卡最小的那個mac地址,然後所有的網絡卡共享網橋的ip。網絡卡的接受、傳送資料包就交給網橋決策。網橋工作在鏈路層。

網橋發包流程

如下圖所示網橋收包流程圖:

網橋收包流程

 

2、網橋初始化

2.1 struct net_bridge

struct net_bridge是描述網橋的結構體,比較重要的元素hash,hash這個雜湊表儲存了mac地址對應的網路埠也就是mac地址學習,網橋轉發時就要根據mac地址在這個雜湊表中查詢從那個埠轉發出去。

struct net_bridge
{
	spinlock_t			lock;		//自旋鎖
	struct list_head		port_list;		//網橋下裝置連結串列
	struct net_device		*dev;		//網橋的裝置例項結構體

	struct br_cpu_netstats __percpu *stats;
	spinlock_t			hash_lock;
	struct hlist_head		hash[BR_HASH_SIZE]; //儲存mac地址學習也就是net_bridge_fsb_entry結構體
	unsigned long			feature_mask;
#ifdef CONFIG_BRIDGE_NETFILTER
	struct rtable 			fake_rtable;
#endif
	unsigned long			flags;
#define BR_SET_MAC_ADDR		0x00000001

	/* STP */
	bridge_id			designated_root;
	bridge_id			bridge_id;
	u32				root_path_cost;
	unsigned long			max_age;
	unsigned long			hello_time;
	unsigned long			forward_delay;
	unsigned long			bridge_max_age;
	unsigned long			ageing_time;
	unsigned long			bridge_hello_time;
	unsigned long			bridge_forward_delay;

	u8				group_addr[ETH_ALEN];
	u16				root_port;

	/*STP協議型別*/
	enum {
		BR_NO_STP, 		/* no spanning tree */
		BR_KERNEL_STP,		/* old STP in kernel */
		BR_USER_STP,		/* new RSTP in userspace */
	} stp_enabled;

	unsigned char			topology_change;
	unsigned char			topology_change_detected;

#ifdef CONFIG_BRIDGE_IGMP_SNOOPING
	unsigned char			multicast_router;

	u8				multicast_disabled:1;

	u32				hash_elasticity;
	u32				hash_max;

	u32				multicast_last_member_count;
	u32				multicast_startup_queries_sent;
	u32				multicast_startup_query_count;

	unsigned long			multicast_last_member_interval;
	unsigned long			multicast_membership_interval;
	unsigned long			multicast_querier_interval;
	unsigned long			multicast_query_interval;
	unsigned long			multicast_query_response_interval;
	unsigned long			multicast_startup_query_interval;

	spinlock_t			multicast_lock;
	struct net_bridge_mdb_htable	*mdb;
	struct hlist_head		router_list;
	struct hlist_head		mglist;

	/*一些定時器*/
	struct timer_list		multicast_router_timer;
	struct timer_list		multicast_querier_timer;
	struct timer_list		multicast_query_timer;
#endif

	struct timer_list		hello_timer;
	struct timer_list		tcn_timer;
	struct timer_list		topology_change_timer;
	struct timer_list		gc_timer;
	struct kobject			*ifobj;
};

2.2 struct net_bridge_port

struct net_bridge_port描述網橋下的網絡卡埠。

struct net_bridge_port
{
	struct net_bridge		*br;			//埠所屬的網橋裝置
	struct net_device		*dev;		//埠裝置的接頭體
	struct list_head		list;			//網橋下的埠連結串列

	/* STP */
	u8				priority;
	u8				state;
	u16				port_no;
	unsigned char			topology_change_ack;
	unsigned char			config_pending;
	port_id				port_id;
	port_id				designated_port;
	bridge_id			designated_root;
	bridge_id			designated_bridge;
	u32				path_cost;
	u32				designated_cost;

	/*埠的一些定時器*/
	struct timer_list		forward_delay_timer;
	struct timer_list		hold_timer;
	struct timer_list		message_age_timer;
	struct kobject			kobj;
	struct rcu_head			rcu;

	unsigned long 			flags;
#define BR_HAIRPIN_MODE		0x00000001

#ifdef CONFIG_BRIDGE_IGMP_SNOOPING
	u32				multicast_startup_queries_sent;
	unsigned char			multicast_router;
	struct timer_list		multicast_router_timer;
	struct timer_list		multicast_query_timer;
	struct hlist_head		mglist;
	struct hlist_node		rlist;
#endif

#ifdef CONFIG_SYSFS
	char				sysfs_name[IFNAMSIZ];
#endif
};

struct br_cpu_netstats {
	unsigned long	rx_packets;
	unsigned long	rx_bytes;
	unsigned long	tx_packets;
	unsigned long	tx_bytes;
};

2.3 struct net_bridge_fdb_entry

struct net_bridge_fdb_entry是一個mac地址和網路埠的對應表,最主要的兩個元素是dst、addr,也就是這個mac地址從那個網路端口出去。

struct net_bridge_fdb_entry
{
	struct hlist_node		hlist;		//連結串列指標
	struct net_bridge_port		*dst;	//網路埠

	struct rcu_head			rcu;
	unsigned long			ageing_timer;
	mac_addr			addr;				//mac地址
	unsigned char			is_local;			//是否是本機mac地址
	unsigned char			is_static;			//是否是靜態mac地址
};

2.4 br_init

網橋在核心中實現是一個模組,所有模組的初始化是br_init函式,函式在/net/bridge/br.c檔案中

br主要做以下幾件事情:

(1)STP協議註冊

(2)MAC學習表的初始化

(3)網橋在netfilter上鉤子函式註冊

(4)網橋的ioctl設定,提供給應用層的操作介面

static int __init br_init(void)
{
	int err;

	/*STP協議註冊*/
	err = stp_proto_register(&br_stp_proto);
	if (err < 0) {
		pr_err("bridge: can't register sap for STP\n");
		return err;
	}
	/*埠-MAC表初始化*/
	err = br_fdb_init();
	if (err)
		goto err_out;

	err = register_pernet_subsys(&br_net_ops);
	if (err)
		goto err_out1;

	/*netfilter鉤子函式註冊*/
	err = br_netfilter_init();
	if (err)
		goto err_out2;

	/*註冊到netdevice通知鏈上*/
	err = register_netdevice_notifier(&br_device_notifier);
	if (err)
		goto err_out3;
	/*netlink初始化*/
	err = br_netlink_init();
	if (err)
		goto err_out4;

	/*設定網橋的ioctl,提供給使用者層的介面*/
	brioctl_set(br_ioctl_deviceless_stub);
	
	/*網橋資料處理介面*/
	br_handle_frame_hook = br_handle_frame;

#if defined(CONFIG_ATM_LANE) || defined(CONFIG_ATM_LANE_MODULE)
	br_fdb_test_addr_hook = br_fdb_test_addr;
#endif

	return 0;
err_out4:
	unregister_netdevice_notifier(&br_device_notifier);
err_out3:
	br_netfilter_fini();
err_out2:
	unregister_pernet_subsys(&br_net_ops);
err_out1:
	br_fdb_fini();
err_out:
	stp_proto_unregister(&br_stp_proto);
	return err;
}

網橋在netfilter框架中主要註冊了7個鉤子函式,接下來會詳細介紹。

/* For br_nf_post_routing, we need (prio = NF_BR_PRI_LAST), because
 * br_dev_queue_push_xmit is called afterwards */
static struct nf_hook_ops br_nf_ops[] __read_mostly = {
	{
		.hook = br_nf_pre_routing,
		.owner = THIS_MODULE,
		.pf = PF_BRIDGE,
		.hooknum = NF_BR_PRE_ROUTING,
		.priority = NF_BR_PRI_BRNF,
	},
	{
		.hook = br_nf_local_in,
		.owner = THIS_MODULE,
		.pf = PF_BRIDGE,
		.hooknum = NF_BR_LOCAL_IN,
		.priority = NF_BR_PRI_BRNF,
	},
	{
		.hook = br_nf_forward_ip,
		.owner = THIS_MODULE,
		.pf = PF_BRIDGE,
		.hooknum = NF_BR_FORWARD,
		.priority = NF_BR_PRI_BRNF - 1,
	},
	{
		.hook = br_nf_forward_arp,
		.owner = THIS_MODULE,
		.pf = PF_BRIDGE,
		.hooknum = NF_BR_FORWARD,
		.priority = NF_BR_PRI_BRNF,
	},
	{
		.hook = br_nf_post_routing,
		.owner = THIS_MODULE,
		.pf = PF_BRIDGE,
		.hooknum = NF_BR_POST_ROUTING,
		.priority = NF_BR_PRI_LAST,
	},
	{
		.hook = ip_sabotage_in,
		.owner = THIS_MODULE,
		.pf = PF_INET,
		.hooknum = NF_INET_PRE_ROUTING,
		.priority = NF_IP_PRI_FIRST,
	},
	{
		.hook = ip_sabotage_in,
		.owner = THIS_MODULE,
		.pf = PF_INET6,
		.hooknum = NF_INET_PRE_ROUTING,
		.priority = NF_IP6_PRI_FIRST,
	},
};

3、新建網橋

我們通過brctl addbr br0命令新建一個網橋br0,核心提供的ioctl介面由函式br_ioctl_deviceless_stub實現,新建網橋呼叫br_add_bridge,刪除網橋呼叫br_del_bridge。

int br_ioctl_deviceless_stub(struct net *net, unsigned int cmd, void __user *uarg)
{
	switch (cmd) {
	case SIOCGIFBR:
	case SIOCSIFBR:
		return old_deviceless(net, uarg);

	case SIOCBRADDBR:
	case SIOCBRDELBR:
	{
		char buf[IFNAMSIZ];

		if (!capable(CAP_NET_ADMIN))
			return -EPERM;
		/*從應用層地址空間拷貝資料*/
		if (copy_from_user(buf, uarg, IFNAMSIZ))
			return -EFAULT;

		buf[IFNAMSIZ-1] = 0;
		if (cmd == SIOCBRADDBR)
			/*新建一個網橋裝置*/
			return br_add_bridge(net, buf);
		/*刪除一個網橋裝置*/
		return br_del_bridge(net, buf);
	}
	}
	return -EOPNOTSUPP;
}

3.1、br_add_bridge

br_add_bridge主要做三件事情:

1、呼叫new_brideg_dev根據網橋名字新建一個網橋裝置

2、網橋裝置也是一個以太裝置所以要呼叫register_netdevice註冊這個網橋裝置的struct net_device

3、初始化網橋裝置在sysfs中的相關資訊,便於管理查詢

int br_add_bridge(struct net *net, const char *name)
{
	struct net_device *dev;
	int ret;

	/*新建一個網橋裝置,name是網橋的名字*/
	dev = new_bridge_dev(net, name);
	if (!dev)
		return -ENOMEM;

	rtnl_lock();
	if (strchr(dev->name, '%')) {
		ret = dev_alloc_name(dev, dev->name);
		if (ret < 0)
			goto out_free;
	}

	SET_NETDEV_DEVTYPE(dev, &br_type);

	/*註冊網橋虛擬裝置*/
	ret = register_netdevice(dev);
	if (ret)
		goto out_free;
	/*在sysfs中建立相關資訊,便於查詢和管理*/
	ret = br_sysfs_addbr(dev);
	if (ret)
		unregister_netdevice(dev);
 out:
	rtnl_unlock();
	return ret;

out_free:
	free_netdev(dev);
	goto out;
}

3.2、new_bridge_dev

網橋裝置也是一個虛擬的以太裝置,所以呼叫alloc_netdev新建一個以太裝置,並執行網橋初始化函式br_dev_setup,net_device的私有資料指向網橋。

static struct net_device *new_bridge_dev(struct net *net, const char *name)
{
	struct net_bridge *br;
	struct net_device *dev;

	/*建立一個虛擬dev,執行網橋初始化函式br_dev_setup*/
	dev = alloc_netdev(sizeof(struct net_bridge), name,
			   br_dev_setup);

	if (!dev)
		return NULL;
	dev_net_set(dev, net);

	/*struct net_device私有資料指向網橋*/
	br = netdev_priv(dev);
	br->dev = dev;

	br->stats = alloc_percpu(struct br_cpu_netstats);
	if (!br->stats) {
		free_netdev(dev);
		return NULL;
	}

	spin_lock_init(&br->lock);
	INIT_LIST_HEAD(&br->port_list);
	spin_lock_init(&br->hash_lock);

	br->bridge_id.prio[0] = 0x80;
	br->bridge_id.prio[1] = 0x00;

	memcpy(br->group_addr, br_group_address, ETH_ALEN);

	br->feature_mask = dev->features;
	/*預設不開啟STP功能*/
	br->stp_enabled = BR_NO_STP;
	br->designated_root = br->bridge_id;
	br->root_path_cost = 0;
	br->root_port = 0;
	br->bridge_max_age = br->max_age = 20 * HZ;
	br->bridge_hello_time = br->hello_time = 2 * HZ;
	br->bridge_forward_delay = br->forward_delay = 15 * HZ;
	br->topology_change = 0;
	br->topology_change_detected = 0;
	br->ageing_time = 300 * HZ;

	/*路由表相關初始化*/
	br_netfilter_rtable_init(br);
	/*相關定時器初始化*/
	br_stp_timer_init(br);
	br_multicast_init(br);

	return dev;
}

3.3、br_dev_setup

這個函式主要是初始化網橋

(1)初始化網橋的MAC

(2)對網橋進行乙太網初始化

(3)設定以太裝置的操作函式指標br_netdev_ops

(4)設定以太裝置管理函式指向br_ethtool_ops。

void br_dev_setup(struct net_device *dev)
{
	/*初始化MAC*/
	random_ether_addr(dev->dev_addr);
	/*網橋是以太裝置,所以要進行以太初始化*/
	ether_setup(dev);

	/*以太裝置的操作函式指標*/
	dev->netdev_ops = &br_netdev_ops;
	dev->destructor = br_dev_free;
	/*以太裝置的管理操作函式指標*/
	SET_ETHTOOL_OPS(dev, &br_ethtool_ops);
	dev->tx_queue_len = 0;
	/*私有資料網橋標誌*/
	dev->priv_flags = IFF_EBRIDGE;

	dev->features = NETIF_F_SG | NETIF_F_FRAGLIST | NETIF_F_HIGHDMA |
			NETIF_F_GSO_MASK | NETIF_F_NO_CSUM | NETIF_F_LLTX |
			NETIF_F_NETNS_LOCAL | NETIF_F_GSO;
}

br_netdev_ops:

static const struct net_device_ops br_netdev_ops = {
	.ndo_open		 = br_dev_open,			//開啟裝置
	.ndo_stop		 = br_dev_stop,			//關閉裝置
	.ndo_start_xmit		 = br_dev_xmit,		//傳送資料
	.ndo_get_stats		 = br_get_stats,
	.ndo_set_mac_address	 = br_set_mac_address,	//設定mac地址
	.ndo_set_multicast_list	 = br_dev_set_multicast_list,
	.ndo_change_mtu		 = br_change_mtu,		//設定mtu
	.ndo_do_ioctl		 = br_dev_ioctl,				//ioctl操作,新增、刪除埠
#ifdef CONFIG_NET_POLL_CONTROLLER
	.ndo_netpoll_cleanup	 = br_netpoll_cleanup,
#endif
};

br_ethtool_ops:

static const struct ethtool_ops br_ethtool_ops = {
	.get_drvinfo    = br_getinfo,
	.get_link	= ethtool_op_get_link,
	.get_tx_csum	= ethtool_op_get_tx_csum,
	.set_tx_csum 	= br_set_tx_csum,
	.get_sg		= ethtool_op_get_sg,
	.set_sg		= br_set_sg,
	.get_tso	= ethtool_op_get_tso,
	.set_tso	= br_set_tso,
	.get_ufo	= ethtool_op_get_ufo,
	.set_ufo	= ethtool_op_set_ufo,
	.get_flags	= ethtool_op_get_flags,
};

4、新增埠

應用層通過brctl addif br0 eth0將eth0新增到網橋br0下面,核心的實現函式是br_dev_ioctl,依據cmd新增或者刪除埠add_del_if

int br_dev_ioctl(struct net_device *dev, struct ifreq *rq, int cmd)
{
	struct net_bridge *br = netdev_priv(dev);

	switch(cmd) {
	case SIOCDEVPRIVATE:
		return old_dev_ioctl(dev, rq, cmd);

	case SIOCBRADDIF:
	case SIOCBRDELIF:
		/*根據cmd新增或者刪除埠*/
		return add_del_if(br, rq->ifr_ifindex, cmd == SIOCBRADDIF);

	}

	br_debug(br, "Bridge does not support ioctl 0x%x\n", cmd);
	return -EOPNOTSUPP;
}

4.1、add_del_if

/* called with RTNL */
static int add_del_if(struct net_bridge *br, int ifindex, int isadd)
{
	struct net_device *dev;
	int ret;

	if (!capable(CAP_NET_ADMIN))
		return -EPERM;
	/*查詢網橋裝置是否存在,不存在直接返回*/
	dev = __dev_get_by_index(dev_net(br->dev), ifindex);
	if (dev == NULL)
		return -EINVAL;

	if (isadd)
		/*新增埠*/
		ret = br_add_if(br, dev);
	else
		/*刪除埠*/
		ret = br_del_if(br, dev);

	return ret;
}

4.2、br_add_if

這個函式是網橋新增埠的關鍵,主要做以下事情

(1)、首先對要新增的裝置檢查,環路裝置、非乙太網裝置直接返回,檢查裝置是否已經新增到其他網橋下面,

(2)、呼叫new_nbp新建一個網橋埠結構體struct net_bridge_port

(3)、裝置的模式為混合模式

(4)、將新增的埠新增到埠-MAC表中

(5)、將新增的埠p新增到sysfs檔案系統中

(6)、將新增的埠p新增到埠連結串列port_list中

/* called with RTNL */
int br_add_if(struct net_bridge *br, struct net_device *dev)
{
	struct net_bridge_port *p;
	int err = 0;

	/*環路埠或者非乙太網埠不新增直接返回*/
	/* Don't allow bridging non-ethernet like devices */
	if ((dev->flags & IFF_LOOPBACK) ||
	    dev->type != ARPHRD_ETHER || dev->addr_len != ETH_ALEN)
		return -EINVAL;
	/*埠本事是橋裝置直接返回*/
	/* No bridging of bridges */
	if (dev->netdev_ops->ndo_start_xmit == br_dev_xmit)
		return -ELOOP;

	/*埠已經新增到了其他網橋下面也直接返回*/
	/* Device is already being bridged */
	if (dev->br_port != NULL)
		return -EBUSY;

	/*不允許新增的網橋標誌*/
	/* No bridging devices that dislike that (e.g. wireless) */
	if (dev->priv_flags & IFF_DONT_BRIDGE)
		return -EOPNOTSUPP;

	/*新建一個埠*/
	p = new_nbp(br, dev);
	if (IS_ERR(p))
		return PTR_ERR(p);

	/*設定裝置的為混雜模式*/
	err = dev_set_promiscuity(dev, 1);
	if (err)
		goto put_back;

	err = kobject_init_and_add(&p->kobj, &brport_ktype, &(dev->dev.kobj),
				   SYSFS_BRIDGE_PORT_ATTR);
	if (err)
		goto err0;

	/*將埠新增到埠-MAC對映表中*/
	err = br_fdb_insert(br, p, dev->dev_addr);
	if (err)
		goto err1;
	/*將新增的埠p新增到sysfs檔案中*/
	err = br_sysfs_addif(p);
	if (err)
		goto err2;

	rcu_assign_pointer(dev->br_port, p);
	dev_disable_lro(dev);

	/*將埠p新增到埠連結串列port_list中*/
	list_add_rcu(&p->list, &br->port_list);

	spin_lock_bh(&br->lock);
	br_stp_recalculate_bridge_id(br);
	br_features_recompute(br);

	if ((dev->flags & IFF_UP) && netif_carrier_ok(dev) &&
	    (br->dev->flags & IFF_UP))
		br_stp_enable_port(p);
	spin_unlock_bh(&br->lock);

	br_ifinfo_notify(RTM_NEWLINK, p);

	dev_set_mtu(br->dev, br_min_mtu(br));

	kobject_uevent(&p->kobj, KOBJ_ADD);

	br_netpoll_enable(br, dev);

	return 0;
err2:
	br_fdb_delete_by_port(br, p, 1);
err1:
	kobject_put(&p->kobj);
	p = NULL; /* kobject_put frees */
err0:
	dev_set_promiscuity(dev, -1);
put_back:
	dev_put(dev);
	kfree(p);
	return err;
}