1. 程式人生 > >socket系統調用

socket系統調用

ifdef done block from rip ptr unlock backlog sca

 1 SYSCALL_DEFINE3(socket, int, family, int, type, int, protocol)
 2 {
 3     int retval;
 4     struct socket *sock;
 5     int flags;
 6 
 7     /* Check the SOCK_* constants for consistency.  */
 8     BUILD_BUG_ON(SOCK_CLOEXEC != O_CLOEXEC);
 9     BUILD_BUG_ON((SOCK_MAX | SOCK_TYPE_MASK) != SOCK_TYPE_MASK);
10 BUILD_BUG_ON(SOCK_CLOEXEC & SOCK_TYPE_MASK); 11 BUILD_BUG_ON(SOCK_NONBLOCK & SOCK_TYPE_MASK); 12 13 /* 取得標誌 */ 14 flags = type & ~SOCK_TYPE_MASK; 15 16 /* 除此標記之外還有標記,錯誤 */ 17 if (flags & ~(SOCK_CLOEXEC | SOCK_NONBLOCK)) 18 return -EINVAL; 19 20
/* 取得類型 */ 21 type &= SOCK_TYPE_MASK; 22 23 24 /* 標記以O_NONBLOCK為準 */ 25 if (SOCK_NONBLOCK != O_NONBLOCK && (flags & SOCK_NONBLOCK)) 26 flags = (flags & ~SOCK_NONBLOCK) | O_NONBLOCK; 27 28 /* 創建socket */ 29 retval = sock_create(family, type, protocol, &sock);
30 if (retval < 0) 31 goto out; 32 33 /* 創建socket文件並綁定描述符 */ 34 retval = sock_map_fd(sock, flags & (O_CLOEXEC | O_NONBLOCK)); 35 if (retval < 0) 36 goto out_release; 37 38 out: 39 /* It may be already another descriptor 8) Not kernel problem. */ 40 return retval; 41 42 out_release: 43 sock_release(sock); 44 return retval; 45 }

 1 /* 套接口與文件描述符綁定 */
 2 static int sock_map_fd(struct socket *sock, int flags)
 3 {
 4     struct file *newfile;
 5     /* 獲取未使用的文件描述符 */
 6     int fd = get_unused_fd_flags(flags);
 7     if (unlikely(fd < 0))
 8         return fd;
 9 
10     /* 分配socket文件 */
11     newfile = sock_alloc_file(sock, flags, NULL);
12     if (likely(!IS_ERR(newfile))) {
13         /* fd和文件進行綁定 */
14         fd_install(fd, newfile);
15         return fd;
16     }
17 
18     /* 釋放fd */
19     put_unused_fd(fd);
20     return PTR_ERR(newfile);
21 }

1 int sock_create(int family, int type, int protocol, struct socket **res)
2 {
3     return __sock_create(current->nsproxy->net_ns, family, type, protocol, res, 0);
4 }

  1 /* 創建socket */
  2 int __sock_create(struct net *net, int family, int type, int protocol,
  3              struct socket **res, int kern)
  4 {
  5     int err;
  6     struct socket *sock;
  7     const struct net_proto_family *pf;
  8 
  9     /*
 10      *      Check protocol is in range
 11      */
 12     /* 檢查協議族 */
 13     if (family < 0 || family >= NPROTO)
 14         return -EAFNOSUPPORT;
 15 
 16     /* 檢查類型 */
 17     if (type < 0 || type >= SOCK_MAX)
 18         return -EINVAL;
 19 
 20     /* Compatibility.
 21 
 22        This uglymoron is moved from INET layer to here to avoid
 23        deadlock in module load.
 24      */
 25     /* ipv4協議族的packet已經廢除,檢測到,則替換成packet協議族 */
 26     if (family == PF_INET && type == SOCK_PACKET) {
 27         pr_info_once("%s uses obsolete (PF_INET,SOCK_PACKET)\n",
 28                  current->comm);
 29         family = PF_PACKET;
 30     }
 31 
 32     /* 安全模塊檢查套接口 */
 33     err = security_socket_create(family, type, protocol, kern);
 34     if (err)
 35         return err;
 36 
 37     /*
 38      *    Allocate the socket and allow the family to set things up. if
 39      *    the protocol is 0, the family is instructed to select an appropriate
 40      *    default.
 41      */
 42     /* 分配socket,內部和inode已經綁定 */
 43     sock = sock_alloc();
 44     if (!sock) {
 45         net_warn_ratelimited("socket: no more sockets\n");
 46         return -ENFILE;    /* Not exactly a match, but its the
 47                    closest posix thing */
 48     }
 49 
 50     /* 設定類型 */
 51     sock->type = type;
 52 
 53 #ifdef CONFIG_MODULES
 54     /* Attempt to load a protocol module if the find failed.
 55      *
 56      * 12/09/1996 Marcin: But! this makes REALLY only sense, if the user
 57      * requested real, full-featured networking support upon configuration.
 58      * Otherwise module support will break!
 59      */
 60     if (rcu_access_pointer(net_families[family]) == NULL)
 61         request_module("net-pf-%d", family);
 62 #endif
 63 
 64     rcu_read_lock();
 65     /* 找到協議族 */
 66     pf = rcu_dereference(net_families[family]);
 67     err = -EAFNOSUPPORT;
 68     if (!pf)
 69         goto out_release;
 70 
 71     /*
 72      * We will call the ->create function, that possibly is in a loadable
 73      * module, so we have to bump that loadable module refcnt first.
 74      */
 75      /* 增加模塊的引用計數 */
 76     if (!try_module_get(pf->owner))
 77         goto out_release;
 78 
 79     /* Now protected by module ref count */
 80     rcu_read_unlock();
 81 
 82     /* 調用協議族的創建函數 */
 83     err = pf->create(net, sock, protocol, kern);
 84     if (err < 0)
 85         goto out_module_put;
 86 
 87     /*
 88      * Now to bump the refcnt of the [loadable] module that owns this
 89      * socket at sock_release time we decrement its refcnt.
 90      */
 91     if (!try_module_get(sock->ops->owner))
 92         goto out_module_busy;
 93 
 94     /*
 95      * Now that we‘re done with the ->create function, the [loadable]
 96      * module can have its refcnt decremented
 97      */
 98     module_put(pf->owner);
 99     err = security_socket_post_create(sock, family, type, protocol, kern);
100     if (err)
101         goto out_sock_release;
102     *res = sock;
103 
104     return 0;
105 
106 out_module_busy:
107     err = -EAFNOSUPPORT;
108 out_module_put:
109     sock->ops = NULL;
110     module_put(pf->owner);
111 out_sock_release:
112     sock_release(sock);
113     return err;
114 
115 out_release:
116     rcu_read_unlock();
117     goto out_sock_release;
118 }
119 EXPORT_SYMBOL(__sock_create);

上述紅色create代碼處,如PF_INET協議族,實際是調用下面結構中的inet_create

1 static const struct net_proto_family inet_family_ops = {
2     .family = PF_INET,
3     .create = inet_create,
4     .owner    = THIS_MODULE,
5 };

  1 /*
  2  *    Create an inet socket.
  3  */
  4 
  5 /* 創建與該接口對應的傳輸控制塊並關聯 */
  6 static int inet_create(struct net *net, struct socket *sock, int protocol,
  7                int kern)
  8 {
  9     struct sock *sk;
 10     struct inet_protosw *answer;
 11     struct inet_sock *inet;
 12     struct proto *answer_prot;
 13     unsigned char answer_flags;
 14     int try_loading_module = 0;
 15     int err;
 16 
 17     /* 檢查協議 */
 18     if (protocol < 0 || protocol >= IPPROTO_MAX)
 19         return -EINVAL;
 20 
 21     /* 設置接口的狀態為未連接 */
 22     sock->state = SS_UNCONNECTED;
 23 
 24     /* Look for the requested type/protocol pair. */
 25 lookup_protocol:
 26     err = -ESOCKTNOSUPPORT;
 27     rcu_read_lock();
 28     list_for_each_entry_rcu(answer, &inetsw[sock->type], list) {
 29 
 30         err = 0;
 31         /* Check the non-wild match. */
 32         /* 匹配協議成功 */
 33         if (protocol == answer->protocol) {
 34             /* 傳入為某指定協議,成功*/
 35             if (protocol != IPPROTO_IP)
 36                 break;
 37             
 38             /* 未指定協議,繼續查找 */
 39             
 40         } 
 41         /* 未指定協議或者未匹配成功的分支 */
 42         else {
 43             /* Check for the two wild cases. */
 44             /* 如果傳入為未指定協議 */
 45             if (IPPROTO_IP == protocol) {
 46                 /* 則指定為當前協議,成功 */
 47                 protocol = answer->protocol;
 48                 break;
 49             }
 50 
 51             /* 指定了傳入協議,但是均未匹配成功 */
 52 
 53             /* 當前正在匹配的協議通用協議,則使用之 */
 54             if (IPPROTO_IP == answer->protocol)
 55                 break;
 56         }
 57 
 58         /* 循環查找結束了,還未找到 */
 59         /* 傳入了某指定協議,未找到匹配,並且沒有通用協議 */
 60         err = -EPROTONOSUPPORT;
 61     }
 62 
 63     /* 
 64         未找到對應inet_protosw實例
 65         加載對應的協議模塊,重新查找
 66     */
 67     if (unlikely(err)) {
 68         /* 嘗試加載的模塊不超過2次 */
 69         if (try_loading_module < 2) {
 70             rcu_read_unlock();
 71             /*
 72              * Be more specific, e.g. net-pf-2-proto-132-type-1
 73              * (net-pf-PF_INET-proto-IPPROTO_SCTP-type-SOCK_STREAM)
 74              */
 75             /* 第一次,加載指定協議和類型的模塊 */
 76             if (++try_loading_module == 1)
 77                 request_module("net-pf-%d-proto-%d-type-%d",
 78                            PF_INET, protocol, sock->type);
 79             /*
 80              * Fall back to generic, e.g. net-pf-2-proto-132
 81              * (net-pf-PF_INET-proto-IPPROTO_SCTP)
 82              */
 83             /* 第二次,加載只指定協議的模塊 */ 
 84             else
 85                 request_module("net-pf-%d-proto-%d",
 86                            PF_INET, protocol);
 87             goto lookup_protocol;
 88         }
 89         /* 超過2次,則查找失敗 */
 90         else
 91             goto out_rcu_unlock;
 92     }
 93 
 94     err = -EPERM;
 95 
 96     /* 判斷是否允許創建sock-raw套接口 */
 97     if (sock->type == SOCK_RAW && !kern &&
 98         !ns_capable(net->user_ns, CAP_NET_RAW))
 99         goto out_rcu_unlock;
100 
101     /* 設置套接口操作 */
102     sock->ops = answer->ops;
103     /* 臨時存儲協議的操作和標誌 */
104     answer_prot = answer->prot;
105     answer_flags = answer->flags;
106     rcu_read_unlock();
107 
108     WARN_ON(!answer_prot->slab);
109 
110     err = -ENOBUFS;
111     /* 分配傳輸控制塊 */
112     sk = sk_alloc(net, PF_INET, GFP_KERNEL, answer_prot, kern);
113     if (!sk)
114         goto out;
115 
116     err = 0;
117     /* 設置重用地址和端口標記 */
118     if (INET_PROTOSW_REUSE & answer_flags)
119         sk->sk_reuse = SK_CAN_REUSE;
120 
121     inet = inet_sk(sk);
122 
123     /* 設置是否為面向連接的控制塊 */
124     inet->is_icsk = (INET_PROTOSW_ICSK & answer_flags) != 0;
125 
126     inet->nodefrag = 0;
127 
128     /* 如果類型是原始套接字 */
129     if (SOCK_RAW == sock->type) {
130         /* 設置本地端口為協議號 */
131         inet->inet_num = protocol;
132 
133         /* 協議為ipproto_raw */
134         if (IPPROTO_RAW == protocol)
135             /* 標記需要自己構建ip首部 */
136             inet->hdrincl = 1;
137     }
138 
139     /* 設置是否支持pmtu */
140     if (net->ipv4.sysctl_ip_no_pmtu_disc)
141         inet->pmtudisc = IP_PMTUDISC_DONT;
142     else
143         inet->pmtudisc = IP_PMTUDISC_WANT;
144 
145     /* 出事連接控制塊 */
146     inet->inet_id = 0;
147 
148     /* 連接控制塊的初始化 */
149     sock_init_data(sock, sk);
150 
151     sk->sk_destruct       = inet_sock_destruct;
152     sk->sk_protocol       = protocol;
153     sk->sk_backlog_rcv = sk->sk_prot->backlog_rcv;
154 
155     inet->uc_ttl    = -1;
156     inet->mc_loop    = 1;
157     inet->mc_ttl    = 1;
158     inet->mc_all    = 1;
159     inet->mc_index    = 0;
160     inet->mc_list    = NULL;
161     inet->rcv_tos    = 0;
162 
163     sk_refcnt_debug_inc(sk);
164 
165     /* 設置了本地端口 */
166     if (inet->inet_num) {
167         /* It assumes that any protocol which allows
168          * the user to assign a number at socket
169          * creation time automatically
170          * shares.
171          */
172          
173         /* 設置網絡序的源端口 */ 
174         inet->inet_sport = htons(inet->inet_num);
175         /* Add to protocol hash chains. */
176         /* 加入到hash */
177         err = sk->sk_prot->hash(sk);
178         if (err) {
179             sk_common_release(sk);
180             goto out;
181         }
182     }
183 
184     /* 如果有init則調用init初始化 */
185     if (sk->sk_prot->init) {
186         err = sk->sk_prot->init(sk);
187         if (err) {
188             sk_common_release(sk);
189             goto out;
190         }
191     }
192 
193     if (!kern) {
194         err = BPF_CGROUP_RUN_PROG_INET_SOCK(sk);
195         if (err) {
196             sk_common_release(sk);
197             goto out;
198         }
199     }
200 out:
201     return err;
202 out_rcu_unlock:
203     rcu_read_unlock();
204     goto out;
205 }

socket系統調用