Linux rndis_host 驅動的一個BUG與解決方案
阿新 • • 發佈:2020-07-31
關鍵字
rndis_host, linux, kernel, modem
綜述
rndis 是微軟定義的一套通訊方案。類似的協議還有 qmi/mbim/ecm/ncm。
rndis 協議足夠簡單,可靠。所以最近在使用一款 quectel 公司模組時採用的就是 rndis 模式。在linux 下 對應驅動是 rndis_host 驅動。windows 10下自帶rndis 驅動!
拿到模組首先測速度! 發現模組下行速度 Windows 上速度比 Linux 高很多,而且上行速度則差不多! 單獨對比 Linux,發現上行又比下行高很多。。。問題很奇怪!
分析
分析上行發包邏輯:
Linux rndis_host 發包函式程式碼
struct sk_buff * rndis_tx_fixup(struct usbnet *dev, struct sk_buff *skb, gfp_t flags) { struct rndis_data_hdr *hdr; struct sk_buff *skb2; unsigned len = skb->len; // hexdump(">> ", skb->data, 14); if (likely(!skb_cloned(skb))) { int room = skb_headroom(skb); /* enough head room as-is? */ if (unlikely((sizeof *hdr) <= room)) goto fill; /* enough room, but needs to be readjusted? */ room += skb_tailroom(skb); if (likely((sizeof *hdr) <= room)) { skb->data = memmove(skb->head + sizeof *hdr, skb->data, len); skb_set_tail_pointer(skb, len); goto fill; } } /* create a new skb, with the correct size (and tailpad) */ skb2 = skb_copy_expand(skb, sizeof *hdr, 1, flags); dev_kfree_skb_any(skb); if (unlikely(!skb2)) return skb2; skb = skb2; /* fill out the RNDIS header. we won't bother trying to batch * packets; Linux minimizes wasted bandwidth through tx queues. */ fill: hdr = __skb_push(skb, sizeof *hdr); memset(hdr, 0, sizeof *hdr); hdr->msg_type = cpu_to_le32(RNDIS_MSG_PACKET); hdr->msg_len = cpu_to_le32(skb->len); hdr->data_offset = cpu_to_le32(sizeof(*hdr) - 8); hdr->data_len = cpu_to_le32(len); /* FIXME make the last packet always be short ... */ return skb; } EXPORT_SYMBOL_GPL(rndis_tx_fixup);
上述函式很短,可以看到發包函式就是把上層傳過來的資料包加上 rndis 協議報文頭髮出去,並沒有別的處理! 需要注意的是,rndis 是支援報文聚合的!!!意思就是呼叫一次USB BULK OUT可以傳送/接收多個IP報文!
所以可以看出,即使在上行未發生聚合的情況下,下行還比上行低,再結合Windows 下下行速度比較高那麼問題就很明顯了,一定是驅動收包有問題!
分析下行收包邏輯:
/* * DATA -- host must not write zlps */ int rndis_rx_fixup(struct usbnet *dev, struct sk_buff *skb) { int tm = 0; /* This check is no longer done by usbnet */ if (skb->len < dev->net->hard_header_len) return 0; /* peripheral may have batched packets to us... */ while (likely(skb->len)) { struct rndis_data_hdr *hdr = (void *)skb->data; struct sk_buff *skb2; u32 msg_type, msg_len, data_offset, data_len; msg_type = le32_to_cpu(hdr->msg_type); msg_len = le32_to_cpu(hdr->msg_len); data_offset = le32_to_cpu(hdr->data_offset); data_len = le32_to_cpu(hdr->data_len); /* don't choke if we see oob, per-packet data, etc */ if (unlikely(msg_type != RNDIS_MSG_PACKET || skb->len < msg_len || (data_offset + data_len + 8) > msg_len)) { dev->net->stats.rx_frame_errors++; netdev_dbg(dev->net, "bad rndis message %d/%d/%d/%d, len %d\n", le32_to_cpu(hdr->msg_type), msg_len, data_offset, data_len, skb->len); return 0; } skb_pull(skb, 8 + data_offset); /* at most one packet left? */ if (likely((data_len - skb->len) <= sizeof *hdr)) { skb_trim(skb, data_len); break; } /* try to return all the packets in the batch */ skb2 = skb_clone(skb, GFP_ATOMIC); if (unlikely(!skb2)) break; skb_pull(skb, msg_len - sizeof *hdr); skb_trim(skb2, data_len); usbnet_skb_return(dev, skb2); } /* caller will usbnet_skb_return the remaining packet */ return 1; } EXPORT_SYMBOL_GPL(rndis_rx_fixup);
收包程式碼稍微複雜點,因為收包需要考慮到聚合報文的情況!因此起了一個while迴圈判斷。while 裡面就是剝離rndis 報文頭,並呼叫網絡卡收包函式的過程!
這裡對skb 有兩次偏移操作:
- skb_pull(skb, 8 + data_offset); 這一步從skb 去除當前訊息的 rndis 報文頭!
- skb_pull(skb, msg_len - sizeof *hdr); 因為skb payload 部分已經在skb2 有了一份clone,那麼skb 當前的payload 就不重要了。因此,這裡實際要做的是繼續從skb剝離當前rndis 報文的資料部分(報文頭已經剝離掉了)。這一步操作後,skb 將指向下一個rndis 報文的 rndis 報文頭!
但是這裡第2步邏輯錯了,這裡直接減去 rndis 報文頭是錯的! 因為rndis 報文的payload 之前並不一定全是協議頭,payload 的偏移是頭部offset 定義的。
解決方案
方案很簡單,修改偏移計算邏輯!
/*
* DATA -- host must not write zlps
*/
int rndis_rx_fixup(struct usbnet *dev, struct sk_buff *skb)
{
int tm = 0;
/* This check is no longer done by usbnet */
if (skb->len < dev->net->hard_header_len)
return 0;
/* peripheral may have batched packets to us... */
while (likely(skb->len)) {
struct rndis_data_hdr *hdr = (void *)skb->data;
struct sk_buff *skb2;
u32 msg_type, msg_len, data_offset, data_len;
msg_type = le32_to_cpu(hdr->msg_type);
msg_len = le32_to_cpu(hdr->msg_len);
data_offset = le32_to_cpu(hdr->data_offset);
data_len = le32_to_cpu(hdr->data_len);
/* don't choke if we see oob, per-packet data, etc */
if (unlikely(msg_type != RNDIS_MSG_PACKET || skb->len < msg_len
|| (data_offset + data_len + 8) > msg_len)) {
dev->net->stats.rx_frame_errors++;
netdev_dbg(dev->net, "bad rndis message %d/%d/%d/%d, len %d\n",
le32_to_cpu(hdr->msg_type),
msg_len, data_offset, data_len, skb->len);
return 0;
}
skb_pull(skb, 8 + data_offset);
/* at most one packet left? */
if (likely((data_len - skb->len) <= sizeof *hdr)) {
skb_trim(skb, data_len);
break;
}
/* try to return all the packets in the batch */
skb2 = skb_clone(skb, GFP_ATOMIC);
if (unlikely(!skb2))
break;
skb_pull(skb, msg_len - data_offset - 8); // here is what I fixed
skb_trim(skb2, data_len);
usbnet_skb_return(dev, skb2);
}
/* caller will usbnet_skb_return the remaining packet */
return 1;
}
EXPORT_SYMBOL_GPL(rndis_rx_fixup);