linux網橋--簡介
版權宣告:如有需要,可供轉載,但請註明出處:https://blog.csdn.net/City_of_skey/article/details/85240141
目錄
2.3 struct net_bridge_fdb_entry
1、網橋簡介
linux下的網橋是一個虛擬裝置,本質上說是一個高層次的二層虛擬裝置,它把其他的從裝置虛擬為一個port。當把一個網絡卡裝置加入的網橋後,網橋的mac地址就是下面所有網絡卡最小的那個mac地址,然後所有的網絡卡共享網橋的ip。網絡卡的接受、傳送資料包就交給網橋決策。網橋工作在鏈路層。
如下圖所示網橋收包流程圖:
2、網橋初始化
2.1 struct net_bridge
struct net_bridge是描述網橋的結構體,比較重要的元素hash,hash這個雜湊表儲存了mac地址對應的網路埠也就是mac地址學習,網橋轉發時就要根據mac地址在這個雜湊表中查詢從那個埠轉發出去。
struct net_bridge { spinlock_t lock; //自旋鎖 struct list_head port_list; //網橋下裝置連結串列 struct net_device *dev; //網橋的裝置例項結構體 struct br_cpu_netstats __percpu *stats; spinlock_t hash_lock; struct hlist_head hash[BR_HASH_SIZE]; //儲存mac地址學習也就是net_bridge_fsb_entry結構體 unsigned long feature_mask; #ifdef CONFIG_BRIDGE_NETFILTER struct rtable fake_rtable; #endif unsigned long flags; #define BR_SET_MAC_ADDR 0x00000001 /* STP */ bridge_id designated_root; bridge_id bridge_id; u32 root_path_cost; unsigned long max_age; unsigned long hello_time; unsigned long forward_delay; unsigned long bridge_max_age; unsigned long ageing_time; unsigned long bridge_hello_time; unsigned long bridge_forward_delay; u8 group_addr[ETH_ALEN]; u16 root_port; /*STP協議型別*/ enum { BR_NO_STP, /* no spanning tree */ BR_KERNEL_STP, /* old STP in kernel */ BR_USER_STP, /* new RSTP in userspace */ } stp_enabled; unsigned char topology_change; unsigned char topology_change_detected; #ifdef CONFIG_BRIDGE_IGMP_SNOOPING unsigned char multicast_router; u8 multicast_disabled:1; u32 hash_elasticity; u32 hash_max; u32 multicast_last_member_count; u32 multicast_startup_queries_sent; u32 multicast_startup_query_count; unsigned long multicast_last_member_interval; unsigned long multicast_membership_interval; unsigned long multicast_querier_interval; unsigned long multicast_query_interval; unsigned long multicast_query_response_interval; unsigned long multicast_startup_query_interval; spinlock_t multicast_lock; struct net_bridge_mdb_htable *mdb; struct hlist_head router_list; struct hlist_head mglist; /*一些定時器*/ struct timer_list multicast_router_timer; struct timer_list multicast_querier_timer; struct timer_list multicast_query_timer; #endif struct timer_list hello_timer; struct timer_list tcn_timer; struct timer_list topology_change_timer; struct timer_list gc_timer; struct kobject *ifobj; };
2.2 struct net_bridge_port
struct net_bridge_port描述網橋下的網絡卡埠。
struct net_bridge_port
{
struct net_bridge *br; //埠所屬的網橋裝置
struct net_device *dev; //埠裝置的接頭體
struct list_head list; //網橋下的埠連結串列
/* STP */
u8 priority;
u8 state;
u16 port_no;
unsigned char topology_change_ack;
unsigned char config_pending;
port_id port_id;
port_id designated_port;
bridge_id designated_root;
bridge_id designated_bridge;
u32 path_cost;
u32 designated_cost;
/*埠的一些定時器*/
struct timer_list forward_delay_timer;
struct timer_list hold_timer;
struct timer_list message_age_timer;
struct kobject kobj;
struct rcu_head rcu;
unsigned long flags;
#define BR_HAIRPIN_MODE 0x00000001
#ifdef CONFIG_BRIDGE_IGMP_SNOOPING
u32 multicast_startup_queries_sent;
unsigned char multicast_router;
struct timer_list multicast_router_timer;
struct timer_list multicast_query_timer;
struct hlist_head mglist;
struct hlist_node rlist;
#endif
#ifdef CONFIG_SYSFS
char sysfs_name[IFNAMSIZ];
#endif
};
struct br_cpu_netstats {
unsigned long rx_packets;
unsigned long rx_bytes;
unsigned long tx_packets;
unsigned long tx_bytes;
};
2.3 struct net_bridge_fdb_entry
struct net_bridge_fdb_entry是一個mac地址和網路埠的對應表,最主要的兩個元素是dst、addr,也就是這個mac地址從那個網路端口出去。
struct net_bridge_fdb_entry
{
struct hlist_node hlist; //連結串列指標
struct net_bridge_port *dst; //網路埠
struct rcu_head rcu;
unsigned long ageing_timer;
mac_addr addr; //mac地址
unsigned char is_local; //是否是本機mac地址
unsigned char is_static; //是否是靜態mac地址
};
2.4 br_init
網橋在核心中實現是一個模組,所有模組的初始化是br_init函式,函式在/net/bridge/br.c檔案中
br主要做以下幾件事情:
(1)STP協議註冊
(2)MAC學習表的初始化
(3)網橋在netfilter上鉤子函式註冊
(4)網橋的ioctl設定,提供給應用層的操作介面
static int __init br_init(void)
{
int err;
/*STP協議註冊*/
err = stp_proto_register(&br_stp_proto);
if (err < 0) {
pr_err("bridge: can't register sap for STP\n");
return err;
}
/*埠-MAC表初始化*/
err = br_fdb_init();
if (err)
goto err_out;
err = register_pernet_subsys(&br_net_ops);
if (err)
goto err_out1;
/*netfilter鉤子函式註冊*/
err = br_netfilter_init();
if (err)
goto err_out2;
/*註冊到netdevice通知鏈上*/
err = register_netdevice_notifier(&br_device_notifier);
if (err)
goto err_out3;
/*netlink初始化*/
err = br_netlink_init();
if (err)
goto err_out4;
/*設定網橋的ioctl,提供給使用者層的介面*/
brioctl_set(br_ioctl_deviceless_stub);
/*網橋資料處理介面*/
br_handle_frame_hook = br_handle_frame;
#if defined(CONFIG_ATM_LANE) || defined(CONFIG_ATM_LANE_MODULE)
br_fdb_test_addr_hook = br_fdb_test_addr;
#endif
return 0;
err_out4:
unregister_netdevice_notifier(&br_device_notifier);
err_out3:
br_netfilter_fini();
err_out2:
unregister_pernet_subsys(&br_net_ops);
err_out1:
br_fdb_fini();
err_out:
stp_proto_unregister(&br_stp_proto);
return err;
}
網橋在netfilter框架中主要註冊了7個鉤子函式,接下來會詳細介紹。
/* For br_nf_post_routing, we need (prio = NF_BR_PRI_LAST), because
* br_dev_queue_push_xmit is called afterwards */
static struct nf_hook_ops br_nf_ops[] __read_mostly = {
{
.hook = br_nf_pre_routing,
.owner = THIS_MODULE,
.pf = PF_BRIDGE,
.hooknum = NF_BR_PRE_ROUTING,
.priority = NF_BR_PRI_BRNF,
},
{
.hook = br_nf_local_in,
.owner = THIS_MODULE,
.pf = PF_BRIDGE,
.hooknum = NF_BR_LOCAL_IN,
.priority = NF_BR_PRI_BRNF,
},
{
.hook = br_nf_forward_ip,
.owner = THIS_MODULE,
.pf = PF_BRIDGE,
.hooknum = NF_BR_FORWARD,
.priority = NF_BR_PRI_BRNF - 1,
},
{
.hook = br_nf_forward_arp,
.owner = THIS_MODULE,
.pf = PF_BRIDGE,
.hooknum = NF_BR_FORWARD,
.priority = NF_BR_PRI_BRNF,
},
{
.hook = br_nf_post_routing,
.owner = THIS_MODULE,
.pf = PF_BRIDGE,
.hooknum = NF_BR_POST_ROUTING,
.priority = NF_BR_PRI_LAST,
},
{
.hook = ip_sabotage_in,
.owner = THIS_MODULE,
.pf = PF_INET,
.hooknum = NF_INET_PRE_ROUTING,
.priority = NF_IP_PRI_FIRST,
},
{
.hook = ip_sabotage_in,
.owner = THIS_MODULE,
.pf = PF_INET6,
.hooknum = NF_INET_PRE_ROUTING,
.priority = NF_IP6_PRI_FIRST,
},
};
3、新建網橋
我們通過brctl addbr br0命令新建一個網橋br0,核心提供的ioctl介面由函式br_ioctl_deviceless_stub實現,新建網橋呼叫br_add_bridge,刪除網橋呼叫br_del_bridge。
int br_ioctl_deviceless_stub(struct net *net, unsigned int cmd, void __user *uarg)
{
switch (cmd) {
case SIOCGIFBR:
case SIOCSIFBR:
return old_deviceless(net, uarg);
case SIOCBRADDBR:
case SIOCBRDELBR:
{
char buf[IFNAMSIZ];
if (!capable(CAP_NET_ADMIN))
return -EPERM;
/*從應用層地址空間拷貝資料*/
if (copy_from_user(buf, uarg, IFNAMSIZ))
return -EFAULT;
buf[IFNAMSIZ-1] = 0;
if (cmd == SIOCBRADDBR)
/*新建一個網橋裝置*/
return br_add_bridge(net, buf);
/*刪除一個網橋裝置*/
return br_del_bridge(net, buf);
}
}
return -EOPNOTSUPP;
}
3.1、br_add_bridge
br_add_bridge主要做三件事情:
1、呼叫new_brideg_dev根據網橋名字新建一個網橋裝置
2、網橋裝置也是一個以太裝置所以要呼叫register_netdevice註冊這個網橋裝置的struct net_device
3、初始化網橋裝置在sysfs中的相關資訊,便於管理查詢
int br_add_bridge(struct net *net, const char *name)
{
struct net_device *dev;
int ret;
/*新建一個網橋裝置,name是網橋的名字*/
dev = new_bridge_dev(net, name);
if (!dev)
return -ENOMEM;
rtnl_lock();
if (strchr(dev->name, '%')) {
ret = dev_alloc_name(dev, dev->name);
if (ret < 0)
goto out_free;
}
SET_NETDEV_DEVTYPE(dev, &br_type);
/*註冊網橋虛擬裝置*/
ret = register_netdevice(dev);
if (ret)
goto out_free;
/*在sysfs中建立相關資訊,便於查詢和管理*/
ret = br_sysfs_addbr(dev);
if (ret)
unregister_netdevice(dev);
out:
rtnl_unlock();
return ret;
out_free:
free_netdev(dev);
goto out;
}
3.2、new_bridge_dev
網橋裝置也是一個虛擬的以太裝置,所以呼叫alloc_netdev新建一個以太裝置,並執行網橋初始化函式br_dev_setup,net_device的私有資料指向網橋。
static struct net_device *new_bridge_dev(struct net *net, const char *name)
{
struct net_bridge *br;
struct net_device *dev;
/*建立一個虛擬dev,執行網橋初始化函式br_dev_setup*/
dev = alloc_netdev(sizeof(struct net_bridge), name,
br_dev_setup);
if (!dev)
return NULL;
dev_net_set(dev, net);
/*struct net_device私有資料指向網橋*/
br = netdev_priv(dev);
br->dev = dev;
br->stats = alloc_percpu(struct br_cpu_netstats);
if (!br->stats) {
free_netdev(dev);
return NULL;
}
spin_lock_init(&br->lock);
INIT_LIST_HEAD(&br->port_list);
spin_lock_init(&br->hash_lock);
br->bridge_id.prio[0] = 0x80;
br->bridge_id.prio[1] = 0x00;
memcpy(br->group_addr, br_group_address, ETH_ALEN);
br->feature_mask = dev->features;
/*預設不開啟STP功能*/
br->stp_enabled = BR_NO_STP;
br->designated_root = br->bridge_id;
br->root_path_cost = 0;
br->root_port = 0;
br->bridge_max_age = br->max_age = 20 * HZ;
br->bridge_hello_time = br->hello_time = 2 * HZ;
br->bridge_forward_delay = br->forward_delay = 15 * HZ;
br->topology_change = 0;
br->topology_change_detected = 0;
br->ageing_time = 300 * HZ;
/*路由表相關初始化*/
br_netfilter_rtable_init(br);
/*相關定時器初始化*/
br_stp_timer_init(br);
br_multicast_init(br);
return dev;
}
3.3、br_dev_setup
這個函式主要是初始化網橋
(1)初始化網橋的MAC
(2)對網橋進行乙太網初始化
(3)設定以太裝置的操作函式指標br_netdev_ops
(4)設定以太裝置管理函式指向br_ethtool_ops。
void br_dev_setup(struct net_device *dev)
{
/*初始化MAC*/
random_ether_addr(dev->dev_addr);
/*網橋是以太裝置,所以要進行以太初始化*/
ether_setup(dev);
/*以太裝置的操作函式指標*/
dev->netdev_ops = &br_netdev_ops;
dev->destructor = br_dev_free;
/*以太裝置的管理操作函式指標*/
SET_ETHTOOL_OPS(dev, &br_ethtool_ops);
dev->tx_queue_len = 0;
/*私有資料網橋標誌*/
dev->priv_flags = IFF_EBRIDGE;
dev->features = NETIF_F_SG | NETIF_F_FRAGLIST | NETIF_F_HIGHDMA |
NETIF_F_GSO_MASK | NETIF_F_NO_CSUM | NETIF_F_LLTX |
NETIF_F_NETNS_LOCAL | NETIF_F_GSO;
}
br_netdev_ops:
static const struct net_device_ops br_netdev_ops = {
.ndo_open = br_dev_open, //開啟裝置
.ndo_stop = br_dev_stop, //關閉裝置
.ndo_start_xmit = br_dev_xmit, //傳送資料
.ndo_get_stats = br_get_stats,
.ndo_set_mac_address = br_set_mac_address, //設定mac地址
.ndo_set_multicast_list = br_dev_set_multicast_list,
.ndo_change_mtu = br_change_mtu, //設定mtu
.ndo_do_ioctl = br_dev_ioctl, //ioctl操作,新增、刪除埠
#ifdef CONFIG_NET_POLL_CONTROLLER
.ndo_netpoll_cleanup = br_netpoll_cleanup,
#endif
};
br_ethtool_ops:
static const struct ethtool_ops br_ethtool_ops = {
.get_drvinfo = br_getinfo,
.get_link = ethtool_op_get_link,
.get_tx_csum = ethtool_op_get_tx_csum,
.set_tx_csum = br_set_tx_csum,
.get_sg = ethtool_op_get_sg,
.set_sg = br_set_sg,
.get_tso = ethtool_op_get_tso,
.set_tso = br_set_tso,
.get_ufo = ethtool_op_get_ufo,
.set_ufo = ethtool_op_set_ufo,
.get_flags = ethtool_op_get_flags,
};
4、新增埠
應用層通過brctl addif br0 eth0將eth0新增到網橋br0下面,核心的實現函式是br_dev_ioctl,依據cmd新增或者刪除埠add_del_if
int br_dev_ioctl(struct net_device *dev, struct ifreq *rq, int cmd)
{
struct net_bridge *br = netdev_priv(dev);
switch(cmd) {
case SIOCDEVPRIVATE:
return old_dev_ioctl(dev, rq, cmd);
case SIOCBRADDIF:
case SIOCBRDELIF:
/*根據cmd新增或者刪除埠*/
return add_del_if(br, rq->ifr_ifindex, cmd == SIOCBRADDIF);
}
br_debug(br, "Bridge does not support ioctl 0x%x\n", cmd);
return -EOPNOTSUPP;
}
4.1、add_del_if
/* called with RTNL */
static int add_del_if(struct net_bridge *br, int ifindex, int isadd)
{
struct net_device *dev;
int ret;
if (!capable(CAP_NET_ADMIN))
return -EPERM;
/*查詢網橋裝置是否存在,不存在直接返回*/
dev = __dev_get_by_index(dev_net(br->dev), ifindex);
if (dev == NULL)
return -EINVAL;
if (isadd)
/*新增埠*/
ret = br_add_if(br, dev);
else
/*刪除埠*/
ret = br_del_if(br, dev);
return ret;
}
4.2、br_add_if
這個函式是網橋新增埠的關鍵,主要做以下事情
(1)、首先對要新增的裝置檢查,環路裝置、非乙太網裝置直接返回,檢查裝置是否已經新增到其他網橋下面,
(2)、呼叫new_nbp新建一個網橋埠結構體struct net_bridge_port
(3)、裝置的模式為混合模式
(4)、將新增的埠新增到埠-MAC表中
(5)、將新增的埠p新增到sysfs檔案系統中
(6)、將新增的埠p新增到埠連結串列port_list中
/* called with RTNL */
int br_add_if(struct net_bridge *br, struct net_device *dev)
{
struct net_bridge_port *p;
int err = 0;
/*環路埠或者非乙太網埠不新增直接返回*/
/* Don't allow bridging non-ethernet like devices */
if ((dev->flags & IFF_LOOPBACK) ||
dev->type != ARPHRD_ETHER || dev->addr_len != ETH_ALEN)
return -EINVAL;
/*埠本事是橋裝置直接返回*/
/* No bridging of bridges */
if (dev->netdev_ops->ndo_start_xmit == br_dev_xmit)
return -ELOOP;
/*埠已經新增到了其他網橋下面也直接返回*/
/* Device is already being bridged */
if (dev->br_port != NULL)
return -EBUSY;
/*不允許新增的網橋標誌*/
/* No bridging devices that dislike that (e.g. wireless) */
if (dev->priv_flags & IFF_DONT_BRIDGE)
return -EOPNOTSUPP;
/*新建一個埠*/
p = new_nbp(br, dev);
if (IS_ERR(p))
return PTR_ERR(p);
/*設定裝置的為混雜模式*/
err = dev_set_promiscuity(dev, 1);
if (err)
goto put_back;
err = kobject_init_and_add(&p->kobj, &brport_ktype, &(dev->dev.kobj),
SYSFS_BRIDGE_PORT_ATTR);
if (err)
goto err0;
/*將埠新增到埠-MAC對映表中*/
err = br_fdb_insert(br, p, dev->dev_addr);
if (err)
goto err1;
/*將新增的埠p新增到sysfs檔案中*/
err = br_sysfs_addif(p);
if (err)
goto err2;
rcu_assign_pointer(dev->br_port, p);
dev_disable_lro(dev);
/*將埠p新增到埠連結串列port_list中*/
list_add_rcu(&p->list, &br->port_list);
spin_lock_bh(&br->lock);
br_stp_recalculate_bridge_id(br);
br_features_recompute(br);
if ((dev->flags & IFF_UP) && netif_carrier_ok(dev) &&
(br->dev->flags & IFF_UP))
br_stp_enable_port(p);
spin_unlock_bh(&br->lock);
br_ifinfo_notify(RTM_NEWLINK, p);
dev_set_mtu(br->dev, br_min_mtu(br));
kobject_uevent(&p->kobj, KOBJ_ADD);
br_netpoll_enable(br, dev);
return 0;
err2:
br_fdb_delete_by_port(br, p, 1);
err1:
kobject_put(&p->kobj);
p = NULL; /* kobject_put frees */
err0:
dev_set_promiscuity(dev, -1);
put_back:
dev_put(dev);
kfree(p);
return err;
}