1. 程式人生 > >TCP/IP詳解V2(一)之UDP協議

TCP/IP詳解V2(一)之UDP協議

listen point reflect con 協議 提取 高級數據結構 don size

UDP

UDP是一個面向數據報的簡單運輸層協議。

數據結構

struct udphdr {
    u_short uh_sport;       //源端口
    u_short uh_dport;       //目的端口
    short   uh_ulen;        //UDP數據報中的數據長度
    u_short uh_sum;     //檢驗和,包括數據
};
struct udpiphdr {
    struct ipovly ui_i;     //模仿IP的實現,會有很多便利
    struct udphdr ui_u;     /* udp header */
};
struct ipovly {
    caddr_t ih_next, ih_prev;   /* for protocol sequence q‘s */
    u_char  ih_x1;          /* (unused) */
    u_char  ih_pr;                  //協議域
    short   ih_len;                 //這個相當於IP頭部,len = data Len + udp HeaderLen + ip header
    struct  in_addr ih_src;     //源地址
    struct  in_addr ih_dst;     //目標地址
};

udp_init

void
udp_init()
{
    udb.inp_next = udb.inp_prev = &udb;    //將頭部PCB的指針指向自己,形成一個雙向鏈表
}

udp_output

int
udp_output(inp, m, addr, control)
    register struct inpcb *inp;    //輸出的Internet PCB
    register struct mbuf *m;    //數據mbuf
    struct mbuf *addr, *control;    //地址與控制信息mbuf
{
    register struct udpiphdr *ui;
    register int len = m->m_pkthdr.len;    //獲取發送數據的長度
    struct in_addr laddr;
    int s, error = 0;

    if (control)    //丟棄控制信息。UDP不適用任何控制信息
        m_freem(control);       /* XXX */

    if (addr) {
        laddr = inp->inp_laddr;    //獲取本地信息
        if (inp->inp_faddr.s_addr != INADDR_ANY) {    //如果這個PCB已經被綁定(是UDP啊),返回錯誤
            error = EISCONN;
            goto release;
        }
        /*
         * Must block input while temporarily connected.
         */
        s = splnet();    //通過調整優先級來達到鎖的目的
        error = in_pcbconnect(inp, addr);    //暫時的連接,填充遠程地址與端口
        if (error) {
            splx(s);    //如果在綁定遠程地址的過程中出現錯誤,釋放數據
            goto release;
        }
    } else {
        if (inp->inp_faddr.s_addr == INADDR_ANY) {    //顯式的關聯遠程地址之後仍然沒有地址的話,放棄數據mbuf
            error = ENOTCONN;
            goto release;
        }
    }
    /*
     * Calculate data length and get a mbuf
     * for UDP and IP headers.
     */
    M_PREPEND(m, sizeof(struct udpiphdr), M_DONTWAIT);    //在數據mbuf前面分配空間以存儲udp/ip header
    if (m == 0) {    //分配失敗的話,釋放資源
        error = ENOBUFS;
        goto release;
    }

    /*
     * Fill in mbuf with extended UDP header
     * and addresses and length put into network format.
     */
    ui = mtod(m, struct udpiphdr *);    //已經在mbuf的首部為udp/ip header分配好了資源,填充這些數據
    ui->ui_next = ui->ui_prev = 0;
    ui->ui_x1 = 0;
    ui->ui_pr = IPPROTO_UDP;
    ui->ui_len = htons((u_short)len + sizeof (struct udphdr));
    ui->ui_src = inp->inp_laddr;
    ui->ui_dst = inp->inp_faddr;
    ui->ui_sport = inp->inp_lport;
    ui->ui_dport = inp->inp_fport;
    ui->ui_ulen = ui->ui_len;    //數據長度

    /*
     * Stuff checksum and output datagram.
     */
    ui->ui_sum = 0;    //計算校驗和
    if (udpcksum) {
        if ((ui->ui_sum = in_cksum(m, sizeof (struct udpiphdr) + len)) == 0)
        ui->ui_sum = 0xffff;
    }
    ((struct ip *)ui)->ip_len = sizeof (struct udpiphdr) + len;    //IP數據報中的len = IP header + udp header + data 
    ((struct ip *)ui)->ip_ttl = inp->inp_ip.ip_ttl; /* XXX */
    ((struct ip *)ui)->ip_tos = inp->inp_ip.ip_tos; /* XXX */
    udpstat.udps_opackets++;
    error = ip_output(m, inp->inp_options, &inp->inp_route,
        inp->inp_socket->so_options & (SO_DONTROUTE | SO_BROADCAST),
        inp->inp_moptions);    //計算結束之後,將數據包交由ip層進行處理

    if (addr) {    //如果提供了addr,以為著在發送前調用connect將PCB與遠程地址關聯起來了
        in_pcbdisconnect(inp);
        inp->inp_laddr = laddr;
        splx(s);
    }
    return (error);

release:
    m_freem(m);    //釋放數據資源
    return (error);
}

udp_input

  • 功能A:將UDP數據報放置到合適的插口緩存內,喚醒該插口上因輸入阻塞的所有進程。不重點關註多播與廣播的情況。

    void
    udp_input(m, iphlen)
    register struct mbuf *m;    //數據mbuf
    int iphlen;    //ip首部的長度
    {
    register struct ip *ip;
    register struct udphdr *uh;
    register struct inpcb *inp;
    struct mbuf *opts = 0;
    int len;
    struct ip save_ip;
    
    udpstat.udps_ipackets++;    //更新UDP的全局統計量
    
    /*
     * Strip IP options, if any; should skip this,
     * make available to user, and use on returned packets,
     * but we don‘t yet have a way to check the checksum
     * with options still present.
     */
    if (iphlen > sizeof (struct ip)) {    //如果存在IP選項,丟棄IP選項並更改iphlen
        ip_stripoptions(m, (struct mbuf *)0);
        iphlen = sizeof(struct ip);
    }
    
    /*
     * Get IP and UDP header together in first mbuf.
     */
    ip = mtod(m, struct ip *);        //從mbuf中獲取IP首部
    if (m->m_len < iphlen + sizeof(struct udphdr)) {    //如果mbuf中的數據長度小於ip header + udp header
        if ((m = m_pullup(m, iphlen + sizeof(struct udphdr))) == 0) {    //使用m_pullup將外部簇中的數據提取到mbuf中
            udpstat.udps_hdrops++;    //如果失敗,增加UDP的全局計數
            return;
        }
        ip = mtod(m, struct ip *);    //否則的話,ip指向正確的ip首部位置
    }
    uh = (struct udphdr *)((caddr_t)ip + iphlen);    //udp指向UDP正確的位置
    
    /*
     * Make mbuf data length reflect UDP length.
     * If not enough data to reflect UDP length, drop.
     */
    len = ntohs((u_short)uh->uh_ulen);    //將UDP中的關於數據報的長度轉換為主機字節序
    if (ip->ip_len != len) {
        if (len > ip->ip_len) {    //如果數據的長度大於IP header + udp header + data,就丟棄數據包
            udpstat.udps_badlen++;
            goto bad;
        }
        m_adj(m, len - ip->ip_len);    //調整ip數據報中的長度為data len
        /* ip->ip_len = len; */
    }
    /*
     * Save a copy of the IP header in case we want restore it
     * for sending an ICMP error message in response.
     */
    save_ip = *ip;    //使用局部變量保存IP變量
    
    /*
     * Checksum extended UDP header and data.
     */
    if (udpcksum && uh->uh_sum) {    //檢查UDP的校驗和,如果驗證失敗,在全局變量中記錄後直接丟棄
        ((struct ipovly *)ip)->ih_next = 0;
        ((struct ipovly *)ip)->ih_prev = 0;
        ((struct ipovly *)ip)->ih_x1 = 0;
        ((struct ipovly *)ip)->ih_len = uh->uh_ulen;
        if (uh->uh_sum = in_cksum(m, len + sizeof (struct ip))) {
            udpstat.udps_badsum++;
            m_freem(m);
            return;
        }
    }
    
    if (IN_MULTICAST(ntohl(ip->ip_dst.s_addr)) ||
        in_broadcast(ip->ip_dst, m->m_pkthdr.rcvif)) {    //處理多播的情況,這些數據被提交給所有匹配的插口
        struct socket *last;
        /*
         * Deliver a multicast or broadcast datagram to *all* sockets
         * for which the local and remote addresses and ports match
         * those of the incoming datagram.  This allows more than
         * one process to receive multi/broadcasts on the same port.
         * (This really ought to be done for unicast datagrams as
         * well, but that would cause problems with existing
         * applications that open both address-specific sockets and
         * a wildcard socket listening to the same port -- they would
         * end up receiving duplicates of every unicast datagram.
         * Those applications open the multiple sockets to overcome an
         * inadequacy of the UDP socket interface, but for backwards
         * compatibility we avoid the problem here rather than
         * fixing the interface.  Maybe 4.5BSD will remedy this?)
         */
    
        /*
         * Construct sockaddr format source address.
         */
        udp_in.sin_port = uh->uh_sport;    //更新獲得數據的全局變量
        udp_in.sin_addr = ip->ip_src;
        m->m_len -= sizeof (struct udpiphdr);    //調整mbuf中的打他data pointer與data length
        m->m_data += sizeof (struct udpiphdr);
        /*
         * Locate pcb(s) for datagram.
         * (Algorithm copied from raw_intr().)
         */
        last = NULL;
        for (inp = udb.inp_next; inp != &udb; inp = inp->inp_next) {    //遍歷所有的PCB
            if (inp->inp_lport != uh->uh_dport)    //如果端口不相等,再次遍歷
                continue;
            if (inp->inp_laddr.s_addr != INADDR_ANY) {    //如果地址不匹配,再次遍歷
                if (inp->inp_laddr.s_addr !=
                    ip->ip_dst.s_addr)
                    continue;
            }
            if (inp->inp_faddr.s_addr != INADDR_ANY) {    //端口不匹配,也需要再次遍歷
                if (inp->inp_faddr.s_addr !=
                    ip->ip_src.s_addr ||
                    inp->inp_fport != uh->uh_sport)
                    continue;
            }
    
            if (last != NULL) {    //
                struct mbuf *n;
    
                if ((n = m_copy(m, 0, M_COPYALL)) != NULL) {    //將數據copy到合適的端口的發送緩存中
                    if (sbappendaddr(&last->so_rcv,
                        (struct sockaddr *)&udp_in,
                        n, (struct mbuf *)0) == 0) {
                        m_freem(n);
                        udpstat.udps_fullsock++;
                    } else
                        sorwakeup(last);
                }
            }
            last = inp->inp_socket;
            /*
             * Don‘t look for additional matches if this one does
             * not have either the SO_REUSEPORT or SO_REUSEADDR
             * socket options set.  This heuristic avoids searching
             * through all pcbs in the common case of a non-shared
             * port.  It * assumes that an application will never
             * clear these options after setting them.
             */
            if ((last->so_options&(SO_REUSEPORT|SO_REUSEADDR) == 0))    //如果沒有設置REUSE選項,直接退出循環
                break;
        }
    
        if (last == NULL) {    //如果沒有找到合適的發送socket結構
            /*
             * No matching pcb found; discard datagram.
             * (No need to send an ICMP Port Unreachable
             * for a broadcast or multicast datgram.)
             */
            udpstat.udps_noportbcast++;    //退出循環
            goto bad;
        }
        if (sbappendaddr(&last->so_rcv, (struct sockaddr *)&udp_in,
             m, (struct mbuf *)0) == 0) {    //將數據copy進接收緩存中,然後喚醒左右在接收緩存上等待的進程
            udpstat.udps_fullsock++;
            goto bad;
        }
        sorwakeup(last);
        return;
    }
    /*
     * Locate pcb for datagram.
     */
    inp = udp_last_inpcb;    //單播地址,如果從緩存中獲取的PCB中的四元組與數據報中的四元組不同的話,從PCBs中尋找合適的四元組,如果找到,順便更新緩存中的PCB
    if (inp->inp_lport != uh->uh_dport ||
        inp->inp_fport != uh->uh_sport ||
        inp->inp_faddr.s_addr != ip->ip_src.s_addr ||
        inp->inp_laddr.s_addr != ip->ip_dst.s_addr) {
        inp = in_pcblookup(&udb, ip->ip_src, uh->uh_sport,
            ip->ip_dst, uh->uh_dport, INPLOOKUP_WILDCARD);
        if (inp)
            udp_last_inpcb = inp;
        udpstat.udpps_pcbcachemiss++;
    }
    if (inp == 0) {    //如果沒有找到
        udpstat.udps_noport++;    //更新全局變量,並判斷是否是多播地址OR廣播地址
        if (m->m_flags & (M_BCAST | M_MCAST)) {
            udpstat.udps_noportbcast++;
            goto bad;
        }
        *ip = save_ip;    //修改IP數據報的長度,並發送ICMP端口不可達報文
        ip->ip_len += iphlen;
        icmp_error(m, ICMP_UNREACH, ICMP_UNREACH_PORT, 0, 0);
        return;
    }
    
    /*
     * Construct sockaddr format source address.
     * Stuff source address and datagram in user buffer.
     */
    udp_in.sin_port = uh->uh_sport;    //將收到數據報的IP與Port保存在全局的端口中
    udp_in.sin_addr = ip->ip_src;
    if (inp->inp_flags & INP_CONTROLOPTS) { //如果存在UDP選項,將UDP選項保存在合適的mbuf上
        struct mbuf **mp = &opts;
    
        if (inp->inp_flags & INP_RECVDSTADDR) {
            *mp = udp_saveopt((caddr_t) &ip->ip_dst,
                sizeof(struct in_addr), IP_RECVDSTADDR);
            if (*mp)
                mp = &(*mp)->m_next;
        }
    }
    iphlen += sizeof(struct udphdr);    //調整data mbuf中的data pointer與data length
    m->m_len -= iphlen;
    m->m_pkthdr.len -= iphlen;
    m->m_data += iphlen;
    if (sbappendaddr(&inp->inp_socket->so_rcv, (struct sockaddr *)&udp_in,
        m, opts) == 0) {    //將準備好的數據放到socket的緩存中
        udpstat.udps_fullsock++;    //失敗的話,返回插口緩存已滿的錯誤
        goto bad;
    }
    sorwakeup(inp->inp_socket);    //喚醒所有等待在插口上的進程
    return;
    bad:
    m_freem(m);    //釋放數據與控制mbuf
    if (opts)
        m_freem(opts);
    }

udp_detach

static void
udp_detach(inp)        //將PCB從PCB鏈表中進行分離
    struct inpcb *inp;
{
    int s = splnet();

    if (inp == udp_last_inpcb)
        udp_last_inpcb = &udb;
    in_pcbdetach(inp);
    splx(s);
}

udp_usrrep

int
udp_usrreq(so, req, m, addr, control)
    struct socket *so;
    int req;
    struct mbuf *m, *addr, *control;
{
    struct inpcb *inp = sotoinpcb(so);    //從socket中獲取PCB
    int error = 0;
    int s;

    if (req == PRU_CONTROL)    //如果是控制選項,轉接調用in_control函數進行處理
        return (in_control(so, (int)m, (caddr_t)addr,
            (struct ifnet *)control));
    if (inp == NULL && req != PRU_ATTACH) {    //如果參數不正確,直接返回
        error = EINVAL;
        goto release;
    }
    /*
     * Note: need to block udp_input while changing
     * the udp pcb queue and/or pcb addresses.
     */
    switch (req) {

    case PRU_ATTACH:        //這是來自socket的系統調用
        if (inp != NULL) {
            error = EINVAL;
            break;
        }
        s = splnet();
        error = in_pcballoc(so, &udb);    //為UDP SOCKET分配一個PCB
        splx(s);
        if (error)
            break;
        error = soreserve(so, udp_sendspace, udp_recvspace);    //為UDP SOCKET分配緩存空間。默認情況下,SendSpace=9216,RecvSpace=41600
        if (error)
            break;
        ((struct inpcb *) so->so_pcb)->inp_ip.ip_ttl = ip_defttl;    //設置默認的TTL
        break;

    case PRU_DETACH:        //close系統調用
        udp_detach(inp);    //稍後觀察
        break;

    case PRU_BIND:        //bind系統調用,關聯本地地址與本地端口
        s = splnet();
        error = in_pcbbind(inp, addr);
        splx(s);
        break;

    case PRU_LISTEN:        //listen系統調用
        error = EOPNOTSUPP;    //UDP SOCKET沒有listen操作
        break;

    case PRU_CONNECT:        //connect系統調用
        if (inp->inp_faddr.s_addr != INADDR_ANY) {        //關聯遠程地址,如果初始化部位INADDR_ANY,那麽就返回錯誤
            error = EISCONN;
            break;
        }
        s = splnet();
        error = in_pcbconnect(inp, addr);
        splx(s);
        if (error == 0)
            soisconnected(so);        //將socket標記為已連接
        break;

    case PRU_CONNECT2:        //socketpair系統調用,僅用於UNIX域協議
        error = EOPNOTSUPP;
        break;

    case PRU_ACCEPT:    //accept系統調用,僅用於TCP協議
        error = EOPNOTSUPP;
        break;

    case PRU_DISCONNECT:        //銷毀與遠程地址之間的關聯,並將遠程地址設置為INADDR_ANY
        if (inp->inp_faddr.s_addr == INADDR_ANY) {
            error = ENOTCONN;
            break;
        }
        s = splnet();
        in_pcbdisconnect(inp);
        inp->inp_laddr.s_addr = INADDR_ANY;
        splx(s);
        so->so_state &= ~SS_ISCONNECTED;        //將socket標記為未連接
        break;

    case PRU_SHUTDOWN:        //shutdown系統調用,UDP很少使用
        socantsendmore(so);
        break;

    case PRU_SEND:    //發送數據請求
        return (udp_output(inp, m, addr, control));

    case PRU_ABORT:    //異常請求,UDP從不使用
        soisdisconnected(so);        //先將UDP SOCKET標記為未連接
        udp_detach(inp);    //然後銷毀PCB
        break;

    case PRU_SOCKADDR:    //設置本地地址
        in_setsockaddr(inp, addr);
        break;

    case PRU_PEERADDR:    //設置遠程地址
        in_setpeeraddr(inp, addr);
        break;

    case PRU_SENSE:
        /*
         * stat: don‘t bother with a blocksize.
         */
        return (0);

    case PRU_SENDOOB:
    case PRU_FASTTIMO:
    case PRU_SLOWTIMO:
    case PRU_PROTORCV:
    case PRU_PROTOSEND:
        error =  EOPNOTSUPP;
        break;

    case PRU_RCVD:
    case PRU_RCVOOB:
        return (EOPNOTSUPP);    /* do not free mbuf‘s */

    default:
        panic("udp_usrreq");
    }

release:
    if (control) {        //釋放控制mbuf
        printf("udp control data unexpectedly retained\n");
        m_freem(control);
    }
    if (m)    //釋放數據mbuf
        m_freem(m);
    return (error);
}

總結:

  • 問題1:IP數據報中和UDP數據報中length的表達意義?
    • IP數據報:len = IP header length + UDP header length + data length
    • UDP數據報:len = data length
  • 問題2:UDP的校驗和
    UDP數據報計算UDP + data的校驗和,IP僅僅計算IP頭部的校驗和
  • 問題3:UDP的優化措施
    • 在copy數據的時候順便計算校驗和
    • 使用其他高級數據結構進行PCB的查找

TCP/IP詳解V2(一)之UDP協議