socket系統調用
阿新 • • 發佈:2017-09-29
ifdef done block from rip ptr unlock backlog sca
1 SYSCALL_DEFINE3(socket, int, family, int, type, int, protocol) 2 { 3 int retval; 4 struct socket *sock; 5 int flags; 6 7 /* Check the SOCK_* constants for consistency. */ 8 BUILD_BUG_ON(SOCK_CLOEXEC != O_CLOEXEC); 9 BUILD_BUG_ON((SOCK_MAX | SOCK_TYPE_MASK) != SOCK_TYPE_MASK);10 BUILD_BUG_ON(SOCK_CLOEXEC & SOCK_TYPE_MASK); 11 BUILD_BUG_ON(SOCK_NONBLOCK & SOCK_TYPE_MASK); 12 13 /* 取得標誌 */ 14 flags = type & ~SOCK_TYPE_MASK; 15 16 /* 除此標記之外還有標記,錯誤 */ 17 if (flags & ~(SOCK_CLOEXEC | SOCK_NONBLOCK)) 18 return -EINVAL; 19 20/* 取得類型 */ 21 type &= SOCK_TYPE_MASK; 22 23 24 /* 標記以O_NONBLOCK為準 */ 25 if (SOCK_NONBLOCK != O_NONBLOCK && (flags & SOCK_NONBLOCK)) 26 flags = (flags & ~SOCK_NONBLOCK) | O_NONBLOCK; 27 28 /* 創建socket */ 29 retval = sock_create(family, type, protocol, &sock);30 if (retval < 0) 31 goto out; 32 33 /* 創建socket文件並綁定描述符 */ 34 retval = sock_map_fd(sock, flags & (O_CLOEXEC | O_NONBLOCK)); 35 if (retval < 0) 36 goto out_release; 37 38 out: 39 /* It may be already another descriptor 8) Not kernel problem. */ 40 return retval; 41 42 out_release: 43 sock_release(sock); 44 return retval; 45 }
1 /* 套接口與文件描述符綁定 */ 2 static int sock_map_fd(struct socket *sock, int flags) 3 { 4 struct file *newfile; 5 /* 獲取未使用的文件描述符 */ 6 int fd = get_unused_fd_flags(flags); 7 if (unlikely(fd < 0)) 8 return fd; 9 10 /* 分配socket文件 */ 11 newfile = sock_alloc_file(sock, flags, NULL); 12 if (likely(!IS_ERR(newfile))) { 13 /* fd和文件進行綁定 */ 14 fd_install(fd, newfile); 15 return fd; 16 } 17 18 /* 釋放fd */ 19 put_unused_fd(fd); 20 return PTR_ERR(newfile); 21 }
1 int sock_create(int family, int type, int protocol, struct socket **res) 2 { 3 return __sock_create(current->nsproxy->net_ns, family, type, protocol, res, 0); 4 }
1 /* 創建socket */ 2 int __sock_create(struct net *net, int family, int type, int protocol, 3 struct socket **res, int kern) 4 { 5 int err; 6 struct socket *sock; 7 const struct net_proto_family *pf; 8 9 /* 10 * Check protocol is in range 11 */ 12 /* 檢查協議族 */ 13 if (family < 0 || family >= NPROTO) 14 return -EAFNOSUPPORT; 15 16 /* 檢查類型 */ 17 if (type < 0 || type >= SOCK_MAX) 18 return -EINVAL; 19 20 /* Compatibility. 21 22 This uglymoron is moved from INET layer to here to avoid 23 deadlock in module load. 24 */ 25 /* ipv4協議族的packet已經廢除,檢測到,則替換成packet協議族 */ 26 if (family == PF_INET && type == SOCK_PACKET) { 27 pr_info_once("%s uses obsolete (PF_INET,SOCK_PACKET)\n", 28 current->comm); 29 family = PF_PACKET; 30 } 31 32 /* 安全模塊檢查套接口 */ 33 err = security_socket_create(family, type, protocol, kern); 34 if (err) 35 return err; 36 37 /* 38 * Allocate the socket and allow the family to set things up. if 39 * the protocol is 0, the family is instructed to select an appropriate 40 * default. 41 */ 42 /* 分配socket,內部和inode已經綁定 */ 43 sock = sock_alloc(); 44 if (!sock) { 45 net_warn_ratelimited("socket: no more sockets\n"); 46 return -ENFILE; /* Not exactly a match, but its the 47 closest posix thing */ 48 } 49 50 /* 設定類型 */ 51 sock->type = type; 52 53 #ifdef CONFIG_MODULES 54 /* Attempt to load a protocol module if the find failed. 55 * 56 * 12/09/1996 Marcin: But! this makes REALLY only sense, if the user 57 * requested real, full-featured networking support upon configuration. 58 * Otherwise module support will break! 59 */ 60 if (rcu_access_pointer(net_families[family]) == NULL) 61 request_module("net-pf-%d", family); 62 #endif 63 64 rcu_read_lock(); 65 /* 找到協議族 */ 66 pf = rcu_dereference(net_families[family]); 67 err = -EAFNOSUPPORT; 68 if (!pf) 69 goto out_release; 70 71 /* 72 * We will call the ->create function, that possibly is in a loadable 73 * module, so we have to bump that loadable module refcnt first. 74 */ 75 /* 增加模塊的引用計數 */ 76 if (!try_module_get(pf->owner)) 77 goto out_release; 78 79 /* Now protected by module ref count */ 80 rcu_read_unlock(); 81 82 /* 調用協議族的創建函數 */ 83 err = pf->create(net, sock, protocol, kern); 84 if (err < 0) 85 goto out_module_put; 86 87 /* 88 * Now to bump the refcnt of the [loadable] module that owns this 89 * socket at sock_release time we decrement its refcnt. 90 */ 91 if (!try_module_get(sock->ops->owner)) 92 goto out_module_busy; 93 94 /* 95 * Now that we‘re done with the ->create function, the [loadable] 96 * module can have its refcnt decremented 97 */ 98 module_put(pf->owner); 99 err = security_socket_post_create(sock, family, type, protocol, kern); 100 if (err) 101 goto out_sock_release; 102 *res = sock; 103 104 return 0; 105 106 out_module_busy: 107 err = -EAFNOSUPPORT; 108 out_module_put: 109 sock->ops = NULL; 110 module_put(pf->owner); 111 out_sock_release: 112 sock_release(sock); 113 return err; 114 115 out_release: 116 rcu_read_unlock(); 117 goto out_sock_release; 118 } 119 EXPORT_SYMBOL(__sock_create);
上述紅色create代碼處,如PF_INET協議族,實際是調用下面結構中的inet_create
1 static const struct net_proto_family inet_family_ops = { 2 .family = PF_INET, 3 .create = inet_create, 4 .owner = THIS_MODULE, 5 };
1 /* 2 * Create an inet socket. 3 */ 4 5 /* 創建與該接口對應的傳輸控制塊並關聯 */ 6 static int inet_create(struct net *net, struct socket *sock, int protocol, 7 int kern) 8 { 9 struct sock *sk; 10 struct inet_protosw *answer; 11 struct inet_sock *inet; 12 struct proto *answer_prot; 13 unsigned char answer_flags; 14 int try_loading_module = 0; 15 int err; 16 17 /* 檢查協議 */ 18 if (protocol < 0 || protocol >= IPPROTO_MAX) 19 return -EINVAL; 20 21 /* 設置接口的狀態為未連接 */ 22 sock->state = SS_UNCONNECTED; 23 24 /* Look for the requested type/protocol pair. */ 25 lookup_protocol: 26 err = -ESOCKTNOSUPPORT; 27 rcu_read_lock(); 28 list_for_each_entry_rcu(answer, &inetsw[sock->type], list) { 29 30 err = 0; 31 /* Check the non-wild match. */ 32 /* 匹配協議成功 */ 33 if (protocol == answer->protocol) { 34 /* 傳入為某指定協議,成功*/ 35 if (protocol != IPPROTO_IP) 36 break; 37 38 /* 未指定協議,繼續查找 */ 39 40 } 41 /* 未指定協議或者未匹配成功的分支 */ 42 else { 43 /* Check for the two wild cases. */ 44 /* 如果傳入為未指定協議 */ 45 if (IPPROTO_IP == protocol) { 46 /* 則指定為當前協議,成功 */ 47 protocol = answer->protocol; 48 break; 49 } 50 51 /* 指定了傳入協議,但是均未匹配成功 */ 52 53 /* 當前正在匹配的協議通用協議,則使用之 */ 54 if (IPPROTO_IP == answer->protocol) 55 break; 56 } 57 58 /* 循環查找結束了,還未找到 */ 59 /* 傳入了某指定協議,未找到匹配,並且沒有通用協議 */ 60 err = -EPROTONOSUPPORT; 61 } 62 63 /* 64 未找到對應inet_protosw實例 65 加載對應的協議模塊,重新查找 66 */ 67 if (unlikely(err)) { 68 /* 嘗試加載的模塊不超過2次 */ 69 if (try_loading_module < 2) { 70 rcu_read_unlock(); 71 /* 72 * Be more specific, e.g. net-pf-2-proto-132-type-1 73 * (net-pf-PF_INET-proto-IPPROTO_SCTP-type-SOCK_STREAM) 74 */ 75 /* 第一次,加載指定協議和類型的模塊 */ 76 if (++try_loading_module == 1) 77 request_module("net-pf-%d-proto-%d-type-%d", 78 PF_INET, protocol, sock->type); 79 /* 80 * Fall back to generic, e.g. net-pf-2-proto-132 81 * (net-pf-PF_INET-proto-IPPROTO_SCTP) 82 */ 83 /* 第二次,加載只指定協議的模塊 */ 84 else 85 request_module("net-pf-%d-proto-%d", 86 PF_INET, protocol); 87 goto lookup_protocol; 88 } 89 /* 超過2次,則查找失敗 */ 90 else 91 goto out_rcu_unlock; 92 } 93 94 err = -EPERM; 95 96 /* 判斷是否允許創建sock-raw套接口 */ 97 if (sock->type == SOCK_RAW && !kern && 98 !ns_capable(net->user_ns, CAP_NET_RAW)) 99 goto out_rcu_unlock; 100 101 /* 設置套接口操作 */ 102 sock->ops = answer->ops; 103 /* 臨時存儲協議的操作和標誌 */ 104 answer_prot = answer->prot; 105 answer_flags = answer->flags; 106 rcu_read_unlock(); 107 108 WARN_ON(!answer_prot->slab); 109 110 err = -ENOBUFS; 111 /* 分配傳輸控制塊 */ 112 sk = sk_alloc(net, PF_INET, GFP_KERNEL, answer_prot, kern); 113 if (!sk) 114 goto out; 115 116 err = 0; 117 /* 設置重用地址和端口標記 */ 118 if (INET_PROTOSW_REUSE & answer_flags) 119 sk->sk_reuse = SK_CAN_REUSE; 120 121 inet = inet_sk(sk); 122 123 /* 設置是否為面向連接的控制塊 */ 124 inet->is_icsk = (INET_PROTOSW_ICSK & answer_flags) != 0; 125 126 inet->nodefrag = 0; 127 128 /* 如果類型是原始套接字 */ 129 if (SOCK_RAW == sock->type) { 130 /* 設置本地端口為協議號 */ 131 inet->inet_num = protocol; 132 133 /* 協議為ipproto_raw */ 134 if (IPPROTO_RAW == protocol) 135 /* 標記需要自己構建ip首部 */ 136 inet->hdrincl = 1; 137 } 138 139 /* 設置是否支持pmtu */ 140 if (net->ipv4.sysctl_ip_no_pmtu_disc) 141 inet->pmtudisc = IP_PMTUDISC_DONT; 142 else 143 inet->pmtudisc = IP_PMTUDISC_WANT; 144 145 /* 出事連接控制塊 */ 146 inet->inet_id = 0; 147 148 /* 連接控制塊的初始化 */ 149 sock_init_data(sock, sk); 150 151 sk->sk_destruct = inet_sock_destruct; 152 sk->sk_protocol = protocol; 153 sk->sk_backlog_rcv = sk->sk_prot->backlog_rcv; 154 155 inet->uc_ttl = -1; 156 inet->mc_loop = 1; 157 inet->mc_ttl = 1; 158 inet->mc_all = 1; 159 inet->mc_index = 0; 160 inet->mc_list = NULL; 161 inet->rcv_tos = 0; 162 163 sk_refcnt_debug_inc(sk); 164 165 /* 設置了本地端口 */ 166 if (inet->inet_num) { 167 /* It assumes that any protocol which allows 168 * the user to assign a number at socket 169 * creation time automatically 170 * shares. 171 */ 172 173 /* 設置網絡序的源端口 */ 174 inet->inet_sport = htons(inet->inet_num); 175 /* Add to protocol hash chains. */ 176 /* 加入到hash */ 177 err = sk->sk_prot->hash(sk); 178 if (err) { 179 sk_common_release(sk); 180 goto out; 181 } 182 } 183 184 /* 如果有init則調用init初始化 */ 185 if (sk->sk_prot->init) { 186 err = sk->sk_prot->init(sk); 187 if (err) { 188 sk_common_release(sk); 189 goto out; 190 } 191 } 192 193 if (!kern) { 194 err = BPF_CGROUP_RUN_PROG_INET_SOCK(sk); 195 if (err) { 196 sk_common_release(sk); 197 goto out; 198 } 199 } 200 out: 201 return err; 202 out_rcu_unlock: 203 rcu_read_unlock(); 204 goto out; 205 }
socket系統調用