1. 程式人生 > >學習Linux-4.12核心網路協議棧(1.5)——協議棧的初始化(inet_init主要資料結構)





int socket(int domain, int type, int protocol);

socket函式對應於普通檔案的開啟操作。普通檔案的開啟操作返回一個檔案描述字,而socket()用於建立一個socket描述符(socket descriptor),它唯一標識一個socket。這個socket描述字跟檔案描述字一樣,後續的操作都有用到它,把它作為引數,通過它來進行一些讀寫操作。


  • domain:即協議域,又稱為協議族(family)。常用的協議族有,AF_INETAF_INET6AF_LOCAL(或稱AF_UNIX,Unix域socket)、AF_ROUTE等等。協議族決定了socket的地址型別,在通訊中必須採用對應的地址,如AF_INET決定了要用ipv4地址(32位的)與埠號(16位的)的組合、AF_UNIX決定了要用一個絕對路徑名作為地址。
  • type:指定socket型別。常用的socket型別有,SOCK_STREAM
  • protocol:故名思意,就是指定協議。常用的協議有,IPPROTO_TCPIPPTOTO_UDPIPPROTO_SCTPIPPROTO_TIPC等,它們分別對應TCP傳輸協議、UDP傳輸協議、STCP傳輸協議、TIPC傳輸協議(這個協議我將會單獨開篇討論!)。



建立一個socket時,返回的socket描述字它存在於協議族(address family,AF_XXX)空間中,但沒有一個具體的地址。如果想要給它賦值一個地址,就必須呼叫bind()函式,否則就當呼叫connect()listen()時系統會自動隨機分配一個埠。

1. static struct net_proto_family *net_families[NPROTO]

200 struct net_proto_family {
201     int     family;  //地址族型別
202     int     (*create)(struct net *net, struct socket *sock, //套接字的建立方法
203                   int protocol, int kern);
204     struct module   *owner;
205 };

210 #define AF_MAX      44  /* For now.. */
24 #define NPROTO      AF_MAX
163 static const struct net_proto_family __rcu *net_families[NPROTO] __read_mostly;
第一個重要的結構體是 net_proto_family。

在這之前必須知道 一些概念——地址族和套接字型別。 大家都知道所謂的套接字都有地址族,實際就是套接字介面的種類, 每種套接字種類有自己的通訊定址方法。 Linux 將不同的地址族抽象統一為 BSD 套接字介面,應用程式 關心的只是 BSD 套接字介面,通過引數來指定所使用的套接字地址族。

Linux 內 核 中 為 了 支 持 多 個 地 址 族 , 定 義 了 這 麼 一 個 變 量 : static struct net_proto_family *net_families[NPROTO], NPROTO 等於 44, 也就是說 Linux 核心支援最多 44種地址族。不過目前已經 夠用了, 我們常用的不外乎就是 PF_UNIX( 1)、 PF_INET( 2)、 PF_NETLINK( 16), Linux 還有一個自 有的 PF_PACKET( 17),即對網絡卡進行操作的選項。所以這個連結串列裡面存放的是應用層socket()的第一個引數,它決定了這個引數可以取哪些值。當系統呼叫socket轉到核心處理的時候,它首先會用第一個引數查詢需要在哪個域裡面建立套接字。


在inet_init()中會呼叫這個函式註冊地址族:(void)sock_register(&inet_family_ops); 其中inet_family_ops是struct net_proto_family的結構體物件

1014 static const struct net_proto_family inet_family_ops = {
1015     .family = PF_INET,
1016     .create = inet_create,
1017     .owner  = THIS_MODULE,
1018 };
2490 int sock_register(const struct net_proto_family *ops)
2491 {
2492     int err;
2494     if (ops->family >= NPROTO) {
2495         pr_crit("protocol %d >= NPROTO(%d)\n", ops->family, NPROTO);
2496         return -ENOBUFS;
2497     }
2499     spin_lock(&net_family_lock);
2500     if (rcu_dereference_protected(net_families[ops->family],
2501                       lockdep_is_held(&net_family_lock)))
2502         err = -EEXIST;
2503     else {
2504         rcu_assign_pointer(net_families[ops->family], ops);  //將inet_family_ops物件新增到net_families全域性數組裡面,就完成了初始化
2505         err = 0;
2506     }
2507     spin_unlock(&net_family_lock);
2509     pr_info("NET: Registered protocol family %d\n", ops->family);
2510     return err;
2511 }
2512 EXPORT_SYMBOL(sock_register);

2. static struct inet_protosw inetsw_array[]

 79 /* This is used to register socket interfaces for IP protocols.  */
 80 struct inet_protosw {
 81     struct list_head list;
 83         /* These two fields form the lookup key.  */
 84     unsigned short   type;     /* This is the 2nd argument to socket(2). */
 85     unsigned short   protocol; /* This is the L4 protocol number.  */
 87     struct proto     *prot;
 88     const struct proto_ops *ops;
 90     unsigned char    flags;      /* See INET_PROTOSW_* below.  */
 91 };
 92 #define INET_PROTOSW_REUSE 0x01      /* Are ports automatically reusable? */
 93 #define INET_PROTOSW_PERMANENT 0x02  /* Permanent protocols are unremovable. */
 94 #define INET_PROTOSW_ICSK      0x04  /* Is this an inet_connection_sock? */

前面一直疑惑inet_protosw中的sw表示什麼意思,後來才知道,原來是switch的縮寫,表示inet層的協議切換,inetsw串聯著PF_INET地址族所支援的協議型別,比如tcp, udp等。



1. type和protocol兩個域組成了socket的查詢key,他們分別對應著應用層socket函式的第二和第三個引數,通過這兩個引數來確定使用的是哪種socket型別。查詢的時候主要是查詢type,protocol只是用來防止初始化好的協議被再次初始化,比如開機的時候已經初始化好了TCP協議,如果應用層又呼叫了該協議的初始化函式,將直接退出。

2. prot表示的是該協議相關的處理函式,比如tcp_v4_connect和tcp_v4_init_sock等相關的協議具體處理函式

3. ops表示的是該socket型別的套接字的管理函式,比如處理inet_bind和inet_accept等對inet層的套接字呼叫


4. flags用來標識該協議的一些特性,比如TCP協議的埠是否可自動重用,該協議是否可以被移除,也就是說該協議是否可以從inetsw_arry全域性陣列中刪除,當然對於TCP,UDP這些必備的協議是不能被移除的

static struct list_head inetsw[SOCK_MAX];
1020 /* Upon startup we insert all the elements in inetsw_array[] into
1021  * the linked list inetsw.
1022  */
1023 static struct inet_protosw inetsw_array[] =
1024 {
1025     {
1026         .type =       SOCK_STREAM,
1027         .protocol =   IPPROTO_TCP,
1028         .prot =       &tcp_prot,
1029         .ops =        &inet_stream_ops,
1030         .flags =      INET_PROTOSW_PERMANENT |
1031                   INET_PROTOSW_ICSK,
1032     },
1034     {
1035         .type =       SOCK_DGRAM,
1036         .protocol =   IPPROTO_UDP,
1037         .prot =       &udp_prot,
1038         .ops =        &inet_dgram_ops,
1039         .flags =      INET_PROTOSW_PERMANENT,
1040        },
1042        {
1043         .type =       SOCK_DGRAM,
1044         .protocol =   IPPROTO_ICMP,
1045         .prot =       &ping_prot,
1046         .ops =        &inet_sockraw_ops,
1047         .flags =      INET_PROTOSW_REUSE,
1048        },
1050        {
1051            .type =       SOCK_RAW,
1052            .protocol =   IPPROTO_IP,    /* wild card */
1053            .prot =       &raw_prot,
1054            .ops =        &inet_sockraw_ops,
1055            .flags =      INET_PROTOSW_REUSE,
1056        }
1057 };
1059 #define INETSW_ARRAY_LEN ARRAY_SIZE(inetsw_array)


1845     /* Register the socket-side information for inet_create. */
1846     for (r = &inetsw[0]; r < &inetsw[SOCK_MAX]; ++r)
1847         INIT_LIST_HEAD(r);
1849     for (q = inetsw_array; q < &inetsw_array[INETSW_ARRAY_LEN]; ++q)
1850         inet_register_protosw(q);

3. struct proto_ops


 926 const struct proto_ops inet_stream_ops = {
 927     .family        = PF_INET,
 928     .owner         = THIS_MODULE,
 929     .release       = inet_release,
 930     .bind          = inet_bind,
 931     .connect       = inet_stream_connect,
 932     .socketpair    = sock_no_socketpair,
 933     .accept        = inet_accept,
 934     .getname       = inet_getname,
 935     .poll          = tcp_poll,
 936     .ioctl         = inet_ioctl,
 937     .listen        = inet_listen,
 938     .shutdown      = inet_shutdown,
 939     .setsockopt    = sock_common_setsockopt,
 940     .getsockopt    = sock_common_getsockopt,
 941     .sendmsg       = inet_sendmsg,
 942     .recvmsg       = inet_recvmsg,
 943     .mmap          = sock_no_mmap,
 944     .sendpage      = inet_sendpage,
 945     .splice_read       = tcp_splice_read,
 946     .read_sock     = tcp_read_sock,
 947     .peek_len      = tcp_peek_len,
 948 #ifdef CONFIG_COMPAT
 949     .compat_setsockopt = compat_sock_common_setsockopt,
 950     .compat_getsockopt = compat_sock_common_getsockopt,
 951     .compat_ioctl      = inet_compat_ioctl,
 952 #endif
 953 };
 954 EXPORT_SYMBOL(inet_stream_ops);


 15 /** sock_type - Socket types
 16  *
 17  * Please notice that for binary compat reasons MIPS has to
 18  * override the enum sock_type in include/linux/net.h, so
 19  * we define ARCH_HAS_SOCKET_TYPES here.
 20  *
 21  * @SOCK_DGRAM - datagram (conn.less) socket
 22  * @SOCK_STREAM - stream (connection) socket
 23  * @SOCK_RAW - raw socket
 24  * @SOCK_RDM - reliably-delivered message
 25  * @SOCK_SEQPACKET - sequential packet socket
 26  * @SOCK_PACKET - linux specific way of getting packets at the dev level.
 27  *        For writing rarp and other similar things on the user level.
 28  */
 29 enum sock_type {
 30     SOCK_DGRAM  = 1,
 31     SOCK_STREAM = 2,
 32     SOCK_RAW    = 3,
 33     SOCK_RDM    = 4,
 34     SOCK_SEQPACKET  = 5,
 35     SOCK_DCCP   = 6,
 36     SOCK_PACKET = 10,
 37 };

1023 static struct inet_protosw inetsw_array[] =
1024 {
1025     {
1026         .type =       SOCK_STREAM,
1027         .protocol =   IPPROTO_TCP,
1028         .prot =       &tcp_prot,
1029         .ops =        &inet_stream_ops,
1030         .flags =      INET_PROTOSW_PERMANENT |
1031                   INET_PROTOSW_ICSK,
1032     },

4.  struct proto

這個結構體裡面存放的是不同協議的操作函式集,也就是具體協議的實現。這裡需要注意和proto_ops的區分,struct proto_ops是根據套接字的型別(引數二type: SOCK_STREAM, SOCK_DGRAM...)不同而組成不同的套接字管理函式集,它面向的是套接字的管理; struct proto是根據套接字的協議型別(引數三protocol: IPPROTO_TCP, IPPROTO_UDP,)不同而組成的不同協議管理函式集,它面向的是具體協議的實現。

 27 enum {
 28   IPPROTO_IP = 0,       /* Dummy protocol for TCP       */
 29 #define IPPROTO_IP      IPPROTO_IP
 30   IPPROTO_ICMP = 1,     /* Internet Control Message Protocol    */
 31 #define IPPROTO_ICMP        IPPROTO_ICMP
 32   IPPROTO_IGMP = 2,     /* Internet Group Management Protocol   */
 33 #define IPPROTO_IGMP        IPPROTO_IGMP
 34   IPPROTO_IPIP = 4,     /* IPIP tunnels (older KA9Q tunnels use 94) */
 35 #define IPPROTO_IPIP        IPPROTO_IPIP
 36   IPPROTO_TCP = 6,      /* Transmission Control Protocol    */
 38   IPPROTO_EGP = 8,      /* Exterior Gateway Protocol        */
 40   IPPROTO_PUP = 12,     /* PUP protocol             */
 42   IPPROTO_UDP = 17,     /* User Datagram Protocol       */

2365 struct proto tcp_prot = {
2366     .name           = "TCP",
2367     .owner          = THIS_MODULE,
2368     .close          = tcp_close,
2369     .connect        = tcp_v4_connect,
2370     .disconnect     = tcp_disconnect,
2371     .accept         = inet_csk_accept,
2372     .ioctl          = tcp_ioctl,
2373     .init           = tcp_v4_init_sock,
2374     .destroy        = tcp_v4_destroy_sock,
2375     .shutdown       = tcp_shutdown,
2376     .setsockopt     = tcp_setsockopt,
2377     .getsockopt     = tcp_getsockopt,
2378     .keepalive      = tcp_set_keepalive,
2379     .recvmsg        = tcp_recvmsg,
2380     .sendmsg        = tcp_sendmsg,
2381     .sendpage       = tcp_sendpage,
2382     .backlog_rcv        = tcp_v4_do_rcv,
2383     .release_cb     = tcp_release_cb,
2384     .hash           = inet_hash,
2385     .unhash         = inet_unhash,
2386     .get_port       = inet_csk_get_port,
2387     .enter_memory_pressure  = tcp_enter_memory_pressure,
2388     .stream_memory_free = tcp_stream_memory_free,
2389     .sockets_allocated  = &tcp_sockets_allocated,
2390     .orphan_count       = &tcp_orphan_count,
2391     .memory_allocated   = &tcp_memory_allocated,
2392     .memory_pressure    = &tcp_memory_pressure,
2393     .sysctl_mem     = sysctl_tcp_mem,
2394     .sysctl_wmem        = sysctl_tcp_wmem,
2395     .sysctl_rmem        = sysctl_tcp_rmem,
2396     .max_header     = MAX_TCP_HEADER,
2397     .obj_size       = sizeof(struct tcp_sock),
2398     .slab_flags     = SLAB_TYPESAFE_BY_RCU,
2399     .twsk_prot      = &tcp_timewait_sock_ops,
2400     .rsk_prot       = &tcp_request_sock_ops,
2401     .h.hashinfo     = &tcp_hashinfo,
2402     .no_autobind        = true,
2403 #ifdef CONFIG_COMPAT
2404     .compat_setsockopt  = compat_tcp_setsockopt,
2405     .compat_getsockopt  = compat_tcp_getsockopt,
2406 #endif
2407     .diag_destroy       = tcp_abort,
2408 };
2409 EXPORT_SYMBOL(tcp_prot);

2354 struct proto udp_prot = {
2355     .name          = "UDP",
2356     .owner         = THIS_MODULE,
2357     .close         = udp_lib_close,
2358     .connect       = ip4_datagram_connect,
2359     .disconnect    = udp_disconnect,
2360     .ioctl         = udp_ioctl,
2361     .init          = udp_init_sock,
2362     .destroy       = udp_destroy_sock,
2363     .setsockopt    = udp_setsockopt,
2364     .getsockopt    = udp_getsockopt,
2365     .sendmsg       = udp_sendmsg,
2366     .recvmsg       = udp_recvmsg,
2367     .sendpage      = udp_sendpage,
2368     .release_cb    = ip4_datagram_release_cb,
2369     .hash          = udp_lib_hash,
2370     .unhash        = udp_lib_unhash,
2371     .rehash        = udp_v4_rehash,
2372     .get_port      = udp_v4_get_port,
2373     .memory_allocated  = &udp_memory_allocated,
2374     .sysctl_mem    = sysctl_udp_mem,
2375     .sysctl_wmem       = &sysctl_udp_wmem_min,
2376     .sysctl_rmem       = &sysctl_udp_rmem_min,
2377     .obj_size      = sizeof(struct udp_sock),
2378     .h.udp_table       = &udp_table,
2379 #ifdef CONFIG_COMPAT
2380     .compat_setsockopt = compat_udp_setsockopt,
2381     .compat_getsockopt = compat_udp_getsockopt,
2382 #endif
2383     .diag_destroy      = udp_abort,
2384 };
2385 EXPORT_SYMBOL(udp_prot);

1023 static struct inet_protosw inetsw_array[] =
1024 {
1025     {
1026         .type =       SOCK_STREAM,
1027         .protocol =   IPPROTO_TCP,
1028         .prot =       &tcp_prot,
1029         .ops =        &inet_stream_ops,
1030         .flags =      INET_PROTOSW_PERMANENT |
1031                   INET_PROTOSW_ICSK,
1032     },

不僅如此, stuct_proto物件自己也維護了一個連結串列,串連在proto_list全域性連結串列後面, 下面來看看它是怎麼做的

 146 static LIST_HEAD(proto_list);
1796 static int __init inet_init(void)
1797 {
1798     struct inet_protosw *q;
1799     struct list_head *r;
1800     int rc = -EINVAL;
1802     sock_skb_cb_check_size(sizeof(struct inet_skb_parm));
1804     rc = proto_register(&tcp_prot, 1);
1805     if (rc)
1806         goto out;

3049 int proto_register(struct proto *prot, int alloc_slab)
3050 {
3051     if (alloc_slab) {
3052         prot->slab = kmem_cache_create(prot->name, prot->obj_size, 0,
3053                     SLAB_HWCACHE_ALIGN | prot->slab_flags,
3054                     NULL);
3056         if (prot->slab == NULL) {
3057             pr_crit("%s: Can't create sock SLAB cache!\n",
3058                 prot->name);
3059             goto out;
3060         }
3062         if (req_prot_init(prot))
3063             goto out_free_request_sock_slab;
3065         if (prot->twsk_prot != NULL) {
3066             prot->twsk_prot->twsk_slab_name = kasprintf(GFP_KERNEL, "tw_sock_%s", prot->name);
3068             if (prot->twsk_prot->twsk_slab_name == NULL)
3069                 goto out_free_request_sock_slab;
3071             prot->twsk_prot->twsk_slab =
3072                 kmem_cache_create(prot->twsk_prot->twsk_slab_name,
3073                           ....
3080     }
3082     mutex_lock(&proto_list_mutex);
3083     list_add(&prot->node, &proto_list);
3084     assign_proto_idx(prot);
3085     mutex_unlock(&proto_list_mutex);
3086     return 0;
3087     .....
3097 }
3098 EXPORT_SYMBOL(proto_register);

我們可以看到,在inet_inet函式剛開始就呼叫了proto_register註冊struct proto物件,第一個引數傳的是協議,第二個傳的是記憶體分配方式,如果是1表示在快取記憶體中分配空間,0則在記憶體中分配。因為tcp這些協議經常使用,所以分配在快取記憶體裡面比較合適。


5. struct net_protocol

 40 /* This is used to register protocols. */
 41 struct net_protocol {
 42     void            (*early_demux)(struct sk_buff *skb);
 43     void                    (*early_demux_handler)(struct sk_buff *skb);
 44     int         (*handler)(struct sk_buff *skb);
 45     void            (*err_handler)(struct sk_buff *skb, u32 info);
 46     unsigned int        no_policy:1,
 47                 netns_ok:1,
 48                 /* does the protocol do more stringent
 49                  * icmp tag validation than simple
 50                  * socket lookup?
 51                  */
 52                 icmp_strict_tag_validation:1;
 53 };


1. 第一個和第二個引數是查詢基於包多路徑選路,對於需要打了轉包的裝置,還是需要關閉這個功能,可以檢視這裡瞭解它

2. handler表示對應協議包的收包處理函式,當收到一個包的時候,在IP層將會判斷這個包的協議,然後根據協議型別呼叫該結構中的收包函式,進而將包傳給傳輸層處理

3. netns_ok表示是否支援虛擬網路? namespace?


1599 static const struct net_protocol igmp_protocol = {
1600     .handler =  igmp_rcv,
1601     .netns_ok = 1,
1602 };
1603 #endif
1605 static struct net_protocol tcp_protocol = {
1606     .early_demux    =   tcp_v4_early_demux,
1607     .early_demux_handler =  tcp_v4_early_demux,
1608     .handler    =   tcp_v4_rcv,
1609     .err_handler    =   tcp_v4_err,
1610     .no_policy  =   1,
1611     .netns_ok   =   1,
1612     .icmp_strict_tag_validation = 1,
1613 };
1615 static struct net_protocol udp_protocol = {
1616     .early_demux =  udp_v4_early_demux,
1617     .early_demux_handler =  udp_v4_early_demux,
1618     .handler =  udp_rcv,
1619     .err_handler =  udp_err,
1620     .no_policy =    1,
1621     .netns_ok = 1,
1622 };
1624 static const struct net_protocol icmp_protocol = {
1625     .handler =  icmp_rcv,
1626     .err_handler =  icmp_err,
1627     .no_policy =    1,
1628     .netns_ok = 1,
1629 };

這些例項在inet_init函式中,通過以下程式碼將不同的協議接收函式新增到inet_protos[protocol] 全域性連結串列中,從而完成IP層和傳輸層的銜接
1830     /*
1831      *  Add all the base protocols.
1832      */
1834     if (inet_add_protocol(&icmp_protocol, IPPROTO_ICMP) < 0)
1835         pr_crit("%s: Cannot add ICMP protocol\n", __func__);
1836     if (inet_add_protocol(&udp_protocol, IPPROTO_UDP) < 0)
1837         pr_crit("%s: Cannot add UDP protocol\n", __func__);
1838     if (inet_add_protocol(&tcp_protocol, IPPROTO_TCP) < 0)
1839         pr_crit("%s: Cannot add TCP protocol\n", __func__);
1841     if (inet_add_protocol(&igmp_protocol, IPPROTO_IGMP) < 0)
1842         pr_crit("%s: Cannot add IGMP protocol\n", __func__);
1843 #endif

 31 struct net_protocol __rcu *inet_protos[MAX_INET_PROTOS] __read_mostly;
 32 const struct net_offload __rcu *inet_offloads[MAX_INET_PROTOS] __read_mostly;
 33 EXPORT_SYMBOL(inet_offloads);
 35 int inet_add_protocol(const struct net_protocol *prot, unsigned char protocol)
 36 {
 37     if (!prot->netns_ok) {
 38         pr_err("Protocol %u is not namespace aware, cannot register.\n",
 39             protocol);
 40         return -EINVAL;
 41     }
 43     return !cmpxchg((const struct net_protocol **)&inet_protos[protocol],
 44             NULL, prot) ? 0 : -1;
 45 }
 46 EXPORT_SYMBOL(inet_add_protocol);

5. struct packet_type ptype_base[]

以上是關於協議棧框架的搭建,對於傳輸層以上的協議實現來說,已經差不多初始化好了,但是對於 IP 層接收流程,則還不夠。因為對於 傳送過程,直接呼叫 的是IP 層函式;而對於核心接收過程則分為 2 層: 上層需要有一個接收函式解複用傳輸 協議報文,我們已經介紹了, 而下層需要一個接收函式解複用網路層報文。對報文感興趣的底層(IP層)協議目 前有兩個,一個是 ARP,一個是 IP, 報文從裝置層送到上層之前,必須區分是 IP 報文還是 ARP 報文。 然後才能往上層送。 這個過程由一個數據結構來抽象,叫 packet_type{},定義在linux/netdevice.h

2204 struct packet_type {
2205     __be16          type;   /* This is really htons(ether_type). */
2206     struct net_device   *dev;   /* NULL is wildcarded here       */
2207     int         (*func) (struct sk_buff *,
2208                      struct net_device *,
2209                      struct packet_type *,
2210                      struct net_device *);
2211     bool            (*id_match)(struct packet_type *ptype,
2212                         struct sock *sk);
2213     void            *af_packet_priv;
2214     struct list_head    list;
2215 };

1. type:網路層的報文型別,目前主要是IP和ARP

2. dev:指向我們希望接收到包的那個介面的 net device 結構。如果是 NULL,則我們會從任何一個網路介面上收到包

3. af_packet_priv: 如果某個 packet_type{}被註冊到系統中, 那麼它就被掛接到全域性連結串列中( 有 2 個,見下面的解說),list 就是代表連結串列節點

如果某個 packet_type{}被註冊到系統中, 那麼它就被掛接到全域性連結串列中( 有 2 個,見下面的解說),list 就是代表連結串列節點inet_init 函式最後呼叫了一個 dev_add_pack 函式,不僅是 inet_init 函式呼叫,有一個很很重要的模組也呼叫了它,就是 ARP 模組,我們會在後面的章節看到它是如何呼叫 dev_add_pack 函式的。也就是說在網路棧初始化的時候,會新增IP協議到連結串列中,在ARP初始化的時候,會將ARP協議也新增到這個連結串列中


 45 #define ETH_P_LOOP  0x0060      /* Ethernet Loopback packet */
 46 #define ETH_P_PUP   0x0200      /* Xerox PUP packet     */
 47 #define ETH_P_PUPAT 0x0201      /* Xerox PUP Addr Trans packet  */
 48 #define ETH_P_TSN   0x22F0      /* TSN (IEEE 1722) packet   */
 49 #define ETH_P_IP    0x0800      /* Internet Protocol packet */ 
 50 #define ETH_P_X25   0x0805      /* CCITT X.25           */
 51 #define ETH_P_ARP   0x0806      /* Address Resolution packet    */
117 #define ETH_P_ALL   0x0003      /* Every packet (be careful!!!) */ 抓包模式

1791 static struct packet_type ip_packet_type __read_mostly = {
1792     .type = cpu_to_be16(ETH_P_IP),
1793     .func = ip_rcv,
1794 };

1903     dev_add_pack(&ip_packet_type);

 364 /*
 365  *  Add a protocol ID to the list. Now that the input handler is
 366  *  smarter we can dispense with all the messy stuff that used to be
 367  *  here.
 368  *
 369  *  BEWARE!!! Protocol handlers, mangling input packets,
 370  *  MUST BE last in hash buckets and checking protocol handlers
 371  *  MUST start from promiscuous ptype_all chain in net_bh.
 372  *  It is true now, do not change it.
 373  *  Explanation follows: if protocol handler, mangling packet, will
 374  *  be the first on list, it is not able to sense, that packet
 375  *  is cloned and should be copied-on-write, so that it will
 376  *  change it and subsequent readers will get broken packet.
 377  *                          --ANK (980803)
 378  */
 380 static inline struct list_head *ptype_head(const struct packet_type *pt)
 381 {
 382     if (pt->type == htons(ETH_P_ALL))
 383         return pt->dev ? &pt->dev->ptype_all : &ptype_all;
 384     else
 385         return pt->dev ? &pt->dev->ptype_specific :
 386                  &ptype_base[ntohs(pt->type) & PTYPE_HASH_MASK];
 387 }

 389 /**
 390  *  dev_add_pack - add packet handler
 391  *  @pt: packet type declaration
 392  *
 393  *  Add a protocol handler to the networking stack. The passed &packet_type
 394  *  is linked into kernel lists and may not be freed until it has been
 395  *  removed from the kernel lists.
 396  *
 397  *  This call does not sleep therefore it can not
 398  *  guarantee all CPU's that are in middle of receiving packets
 399  *  will see the new packet type (until the next received packet).
 400  */
 402 void dev_add_pack(struct packet_type *pt)
 403 {
 404     struct list_head *head = ptype_head(pt);
 406     spin_lock(&ptype_lock);
 407     list_add_rcu(&pt->list, head);
 408     spin_unlock(&ptype_lock);
 409 }
 410 EXPORT_SYMBOL(dev_add_pack);



1791 static struct packet_type ip_packet_type __read_mostly = {
1792     .type = cpu_to_be16(ETH_P_IP),
1793     .func = ip_rcv,
1794 };
inet_init函式通過呼叫dev_add_pack(&ip_packet_type)將IP協議新增到ptype_base[]陣列中而成為一員,而且對應的處理函式是ip_rcv. 當網路層收到一個包時,它檢查完包的合理性後,檢視這個包的網路層協議是哪個,匹配到是ETH_P_IP後,就呼叫ip_rcv函式進行處理,這樣一個包就從裝置介面層進入到了IP層。

