1. 程式人生 > 實用技巧 >Linux rndis_host 驅動的一個BUG與解決方案

Linux rndis_host 驅動的一個BUG與解決方案

關鍵字

rndis_host, linux, kernel, modem

綜述

rndis 是微軟定義的一套通訊方案。類似的協議還有 qmi/mbim/ecm/ncm。
rndis 協議足夠簡單,可靠。所以最近在使用一款 quectel 公司模組時採用的就是 rndis 模式。在linux 下 對應驅動是 rndis_host 驅動。windows 10下自帶rndis 驅動!
拿到模組首先測速度! 發現模組下行速度 Windows 上速度比 Linux 高很多,而且上行速度則差不多! 單獨對比 Linux,發現上行又比下行高很多。。。問題很奇怪!

分析

分析上行發包邏輯:
Linux rndis_host 發包函式程式碼


struct sk_buff *
rndis_tx_fixup(struct usbnet *dev, struct sk_buff *skb, gfp_t flags)
{
	struct rndis_data_hdr *hdr;
	struct sk_buff *skb2;
	unsigned len = skb->len;

	// hexdump(">> ", skb->data, 14);
	if (likely(!skb_cloned(skb)))
	{
		int room = skb_headroom(skb);

		/* enough head room as-is? */
		if (unlikely((sizeof *hdr) <= room))
			goto fill;

		/* enough room, but needs to be readjusted? */
		room += skb_tailroom(skb);
		if (likely((sizeof *hdr) <= room))
		{
			skb->data = memmove(skb->head + sizeof *hdr,
								skb->data, len);
			skb_set_tail_pointer(skb, len);
			goto fill;
		}
	}

	/* create a new skb, with the correct size (and tailpad) */
	skb2 = skb_copy_expand(skb, sizeof *hdr, 1, flags);
	dev_kfree_skb_any(skb);
	if (unlikely(!skb2))
		return skb2;
	skb = skb2;

	/* fill out the RNDIS header.  we won't bother trying to batch
	 * packets; Linux minimizes wasted bandwidth through tx queues.
	 */
fill:
	hdr = __skb_push(skb, sizeof *hdr);
	memset(hdr, 0, sizeof *hdr);
	hdr->msg_type = cpu_to_le32(RNDIS_MSG_PACKET);
	hdr->msg_len = cpu_to_le32(skb->len);
	hdr->data_offset = cpu_to_le32(sizeof(*hdr) - 8);
	hdr->data_len = cpu_to_le32(len);

	/* FIXME make the last packet always be short ... */
	return skb;
}
EXPORT_SYMBOL_GPL(rndis_tx_fixup);

上述函式很短,可以看到發包函式就是把上層傳過來的資料包加上 rndis 協議報文頭髮出去,並沒有別的處理! 需要注意的是,rndis 是支援報文聚合的!!!意思就是呼叫一次USB BULK OUT可以傳送/接收多個IP報文!
所以可以看出,即使在上行未發生聚合的情況下,下行還比上行低,再結合Windows 下下行速度比較高那麼問題就很明顯了,一定是驅動收包有問題!

分析下行收包邏輯:

/*
 * DATA -- host must not write zlps
 */
int rndis_rx_fixup(struct usbnet *dev, struct sk_buff *skb)
{
	int tm = 0;
	/* This check is no longer done by usbnet */
	if (skb->len < dev->net->hard_header_len)
		return 0;

	/* peripheral may have batched packets to us... */
	while (likely(skb->len)) {
		struct rndis_data_hdr	*hdr = (void *)skb->data;
		struct sk_buff		*skb2;
		u32			msg_type, msg_len, data_offset, data_len;

		msg_type = le32_to_cpu(hdr->msg_type);
		msg_len = le32_to_cpu(hdr->msg_len);
		data_offset = le32_to_cpu(hdr->data_offset);
		data_len = le32_to_cpu(hdr->data_len);

		/* don't choke if we see oob, per-packet data, etc */
		if (unlikely(msg_type != RNDIS_MSG_PACKET || skb->len < msg_len
				|| (data_offset + data_len + 8) > msg_len)) {
			dev->net->stats.rx_frame_errors++;
			netdev_dbg(dev->net, "bad rndis message %d/%d/%d/%d, len %d\n",
				   le32_to_cpu(hdr->msg_type),
				   msg_len, data_offset, data_len, skb->len);
			return 0;
		}
		skb_pull(skb, 8 + data_offset);

		/* at most one packet left? */
		if (likely((data_len - skb->len) <= sizeof *hdr)) {
			skb_trim(skb, data_len);
			break;
		}

		/* try to return all the packets in the batch */
		skb2 = skb_clone(skb, GFP_ATOMIC);
		if (unlikely(!skb2))
			break;
		skb_pull(skb, msg_len - sizeof *hdr);
		skb_trim(skb2, data_len);
		usbnet_skb_return(dev, skb2);
	}

	/* caller will usbnet_skb_return the remaining packet */
	return 1;
}
EXPORT_SYMBOL_GPL(rndis_rx_fixup);

收包程式碼稍微複雜點,因為收包需要考慮到聚合報文的情況!因此起了一個while迴圈判斷。while 裡面就是剝離rndis 報文頭,並呼叫網絡卡收包函式的過程!
這裡對skb 有兩次偏移操作:

  1. skb_pull(skb, 8 + data_offset); 這一步從skb 去除當前訊息的 rndis 報文頭!
  2. skb_pull(skb, msg_len - sizeof *hdr); 因為skb payload 部分已經在skb2 有了一份clone,那麼skb 當前的payload 就不重要了。因此,這裡實際要做的是繼續從skb剝離當前rndis 報文的資料部分(報文頭已經剝離掉了)。這一步操作後,skb 將指向下一個rndis 報文的 rndis 報文頭!
    但是這裡第2步邏輯錯了,這裡直接減去 rndis 報文頭是錯的! 因為rndis 報文的payload 之前並不一定全是協議頭,payload 的偏移是頭部offset 定義的。

解決方案

方案很簡單,修改偏移計算邏輯!

/*
 * DATA -- host must not write zlps
 */
int rndis_rx_fixup(struct usbnet *dev, struct sk_buff *skb)
{
	int tm = 0;
	/* This check is no longer done by usbnet */
	if (skb->len < dev->net->hard_header_len)
		return 0;

	/* peripheral may have batched packets to us... */
	while (likely(skb->len)) {
		struct rndis_data_hdr	*hdr = (void *)skb->data;
		struct sk_buff		*skb2;
		u32			msg_type, msg_len, data_offset, data_len;

		msg_type = le32_to_cpu(hdr->msg_type);
		msg_len = le32_to_cpu(hdr->msg_len);
		data_offset = le32_to_cpu(hdr->data_offset);
		data_len = le32_to_cpu(hdr->data_len);

		/* don't choke if we see oob, per-packet data, etc */
		if (unlikely(msg_type != RNDIS_MSG_PACKET || skb->len < msg_len
				|| (data_offset + data_len + 8) > msg_len)) {
			dev->net->stats.rx_frame_errors++;
			netdev_dbg(dev->net, "bad rndis message %d/%d/%d/%d, len %d\n",
				   le32_to_cpu(hdr->msg_type),
				   msg_len, data_offset, data_len, skb->len);
			return 0;
		}
		skb_pull(skb, 8 + data_offset);

		/* at most one packet left? */
		if (likely((data_len - skb->len) <= sizeof *hdr)) {
			skb_trim(skb, data_len);
			break;
		}

		/* try to return all the packets in the batch */
		skb2 = skb_clone(skb, GFP_ATOMIC);
		if (unlikely(!skb2))
			break;
		skb_pull(skb, msg_len - data_offset - 8); // here is what I fixed
		skb_trim(skb2, data_len);
		usbnet_skb_return(dev, skb2);
	}

	/* caller will usbnet_skb_return the remaining packet */
	return 1;
}
EXPORT_SYMBOL_GPL(rndis_rx_fixup);