[dpdk-dev] [PATCH 5/5] net/virtio: fix Tso when mbuf is shared

Olivier Matz olivier.matz at 6wind.com
Thu Nov 24 09:56:38 CET 2016


With virtio, doing tso requires to modify the network
packet data:
- the dpdk API requires to set the l4 checksum to an
  Intel-Nic-like pseudo header checksum that does
  not include the ip length
- the virtio peer expects that the l4 checksum is
  a standard pseudo header checksum.

This is a problem with shared packets, because they
should not be modified.

This patch fixes this issue by copying the headers into
a linear buffer in that case. This buffer is located in
the virtio_tx_region, at the same place where the
virtio header is stored.

The size of this buffer is set to 256, which should
be enough in all cases:
  sizeof(ethernet) + sizeof(vlan) * 2 + sizeof(ip6)
    sizeof(ip6-ext) + sizeof(tcp) + sizeof(tcp-opts)
  = 14 + 8 + 40 + sizeof(ip6-ext) + 40 + sizeof(tcp-opts)
  = 102 + sizeof(ip6-ext) + sizeof(tcp-opts)

Fixes: 696573046e9e ("net/virtio: support TSO")

Signed-off-by: Olivier Matz <olivier.matz at 6wind.com>
---
 drivers/net/virtio/virtio_rxtx.c | 119 +++++++++++++++++++++++++++------------
 drivers/net/virtio/virtqueue.h   |   2 +
 2 files changed, 85 insertions(+), 36 deletions(-)

diff --git a/drivers/net/virtio/virtio_rxtx.c b/drivers/net/virtio/virtio_rxtx.c
index 22d97a4..577c775 100644
--- a/drivers/net/virtio/virtio_rxtx.c
+++ b/drivers/net/virtio/virtio_rxtx.c
@@ -211,43 +211,73 @@ virtqueue_enqueue_recv_refill(struct virtqueue *vq, struct rte_mbuf *cookie)
 
 /* When doing TSO, the IP length is not included in the pseudo header
  * checksum of the packet given to the PMD, but for virtio it is
- * expected.
+ * expected. Fix the mbuf or a copy if the mbuf is shared.
  */
-static void
-virtio_tso_fix_cksum(struct rte_mbuf *m)
+static unsigned int
+virtio_tso_fix_cksum(struct rte_mbuf *m, char *hdr, size_t hdr_sz)
 {
-	/* common case: header is not fragmented */
-	if (likely(rte_pktmbuf_data_len(m) >= m->l2_len + m->l3_len +
-			m->l4_len)) {
-		struct ipv4_hdr *iph;
-		struct ipv6_hdr *ip6h;
-		struct tcp_hdr *th;
-		uint16_t prev_cksum, new_cksum, ip_len, ip_paylen;
-		uint32_t tmp;
-
-		iph = rte_pktmbuf_mtod_offset(m, struct ipv4_hdr *, m->l2_len);
-		th = RTE_PTR_ADD(iph, m->l3_len);
-		if ((iph->version_ihl >> 4) == 4) {
-			iph->hdr_checksum = 0;
-			iph->hdr_checksum = rte_ipv4_cksum(iph);
-			ip_len = iph->total_length;
-			ip_paylen = rte_cpu_to_be_16(rte_be_to_cpu_16(ip_len) -
-				m->l3_len);
-		} else {
-			ip6h = (struct ipv6_hdr *)iph;
-			ip_paylen = ip6h->payload_len;
+	struct ipv4_hdr *iph, iph_copy;
+	struct ipv6_hdr *ip6h = NULL, ip6h_copy;
+	struct tcp_hdr *th, th_copy;
+	size_t hdrlen = m->l2_len + m->l3_len + m->l4_len;
+	uint16_t prev_cksum, new_cksum, ip_len, ip_paylen;
+	uint32_t tmp;
+	int shared = 0;
+
+	/* mbuf is write-only, we need to copy the headers in a linear buffer */
+	if (unlikely(rte_pktmbuf_data_is_shared(m, 0, hdrlen))) {
+		shared = 1;
+
+		/* network headers are too big, there's nothing we can do */
+		if (hdrlen > hdr_sz)
+			return 0;
+
+		rte_pktmbuf_read_copy(m, 0, hdrlen, hdr);
+		iph = (struct ipv4_hdr *)(hdr + m->l2_len);
+		ip6h = (struct ipv6_hdr *)(hdr + m->l2_len);
+		th = (struct tcp_hdr *)(hdr + m->l2_len + m->l3_len);
+	} else {
+		iph = rte_pktmbuf_read(m, m->l2_len, sizeof(*iph), &iph_copy);
+		th = rte_pktmbuf_read(m, m->l2_len + m->l3_len, sizeof(*th),
+			&th_copy);
+	}
+
+	if ((iph->version_ihl >> 4) == 4) {
+		iph->hdr_checksum = 0;
+		iph->hdr_checksum = rte_ipv4_cksum(iph);
+		ip_len = iph->total_length;
+		ip_paylen = rte_cpu_to_be_16(rte_be_to_cpu_16(ip_len) -
+			m->l3_len);
+	} else {
+		if (!shared) {
+			ip6h = rte_pktmbuf_read(m, m->l2_len, sizeof(*ip6h),
+				&ip6h_copy);
 		}
+		ip_paylen = ip6h->payload_len;
+	}
 
-		/* calculate the new phdr checksum not including ip_paylen */
-		prev_cksum = th->cksum;
-		tmp = prev_cksum;
-		tmp += ip_paylen;
-		tmp = (tmp & 0xffff) + (tmp >> 16);
-		new_cksum = tmp;
+	/* calculate the new phdr checksum not including ip_paylen */
+	prev_cksum = th->cksum;
+	tmp = prev_cksum;
+	tmp += ip_paylen;
+	tmp = (tmp & 0xffff) + (tmp >> 16);
+	new_cksum = tmp;
 
-		/* replace it in the packet */
-		th->cksum = new_cksum;
-	}
+	/* replace it in the header */
+	th->cksum = new_cksum;
+
+	/* the update was done in the linear buffer, return */
+	if (shared)
+		return hdrlen;
+
+	/* copy from local buffer into mbuf if required */
+	if ((iph->version_ihl >> 4) == 4)
+		rte_pktmbuf_write(m, m->l2_len, sizeof(*iph), iph);
+	else
+		rte_pktmbuf_write(m, m->l2_len, sizeof(*ip6h), ip6h);
+	rte_pktmbuf_write(m, m->l2_len + m->l3_len, sizeof(*th), th);
+
+	return 0;
 }
 
 static inline int
@@ -268,7 +298,9 @@ virtqueue_enqueue_xmit(struct virtnet_tx *txvq, struct rte_mbuf *cookie,
 	struct vring_desc *start_dp;
 	uint16_t seg_num = cookie->nb_segs;
 	uint16_t head_idx, idx;
+	uint16_t hdr_idx = 0;
 	uint16_t head_size = vq->hw->vtnet_hdr_size;
+	unsigned int offset = 0;
 	struct virtio_net_hdr *hdr;
 	int offload;
 
@@ -303,6 +335,8 @@ virtqueue_enqueue_xmit(struct virtnet_tx *txvq, struct rte_mbuf *cookie,
 
 		/* loop below will fill in rest of the indirect elements */
 		start_dp = txr[idx].tx_indir;
+		hdr_idx = 0;
+		start_dp[hdr_idx].len = vq->hw->vtnet_hdr_size;
 		idx = 1;
 	} else {
 		/* setup first tx ring slot to point to header
@@ -313,7 +347,7 @@ virtqueue_enqueue_xmit(struct virtnet_tx *txvq, struct rte_mbuf *cookie,
 		start_dp[idx].len   = vq->hw->vtnet_hdr_size;
 		start_dp[idx].flags = VRING_DESC_F_NEXT;
 		hdr = (struct virtio_net_hdr *)&txr[idx].tx_hdr;
-
+		hdr_idx = idx;
 		idx = start_dp[idx].next;
 	}
 
@@ -345,7 +379,14 @@ virtqueue_enqueue_xmit(struct virtnet_tx *txvq, struct rte_mbuf *cookie,
 
 		/* TCP Segmentation Offload */
 		if (cookie->ol_flags & PKT_TX_TCP_SEG) {
-			virtio_tso_fix_cksum(cookie);
+			offset = virtio_tso_fix_cksum(cookie,
+				RTE_PTR_ADD(hdr, start_dp[hdr_idx].len),
+				VIRTIO_MAX_HDR_SZ);
+			if (offset > 0) {
+				RTE_ASSERT(can_push != 0);
+				start_dp[hdr_idx].len += offset;
+			}
+
 			hdr->gso_type = (cookie->ol_flags & PKT_TX_IPV6) ?
 				VIRTIO_NET_HDR_GSO_TCPV6 :
 				VIRTIO_NET_HDR_GSO_TCPV4;
@@ -362,10 +403,16 @@ virtqueue_enqueue_xmit(struct virtnet_tx *txvq, struct rte_mbuf *cookie,
 	}
 
 	do {
-		start_dp[idx].addr  = VIRTIO_MBUF_DATA_DMA_ADDR(cookie, vq);
-		start_dp[idx].len   = cookie->data_len;
+		if (offset > cookie->data_len) {
+			offset -= cookie->data_len;
+			continue;
+		}
+		start_dp[idx].addr  = VIRTIO_MBUF_DATA_DMA_ADDR(cookie, vq) +
+			offset;
+		start_dp[idx].len   = cookie->data_len - offset;
 		start_dp[idx].flags = cookie->next ? VRING_DESC_F_NEXT : 0;
 		idx = start_dp[idx].next;
+		offset = 0;
 	} while ((cookie = cookie->next) != NULL);
 
 	if (use_indirect)
diff --git a/drivers/net/virtio/virtqueue.h b/drivers/net/virtio/virtqueue.h
index f0bb089..edfe0dd 100644
--- a/drivers/net/virtio/virtqueue.h
+++ b/drivers/net/virtio/virtqueue.h
@@ -254,8 +254,10 @@ struct virtio_net_hdr_mrg_rxbuf {
 
 /* Region reserved to allow for transmit header and indirect ring */
 #define VIRTIO_MAX_TX_INDIRECT 8
+#define VIRTIO_MAX_HDR_SZ 256
 struct virtio_tx_region {
 	struct virtio_net_hdr_mrg_rxbuf tx_hdr;
+	char net_headers[VIRTIO_MAX_HDR_SZ]; /* for offload if mbuf is RO */
 	struct vring_desc tx_indir[VIRTIO_MAX_TX_INDIRECT]
 			   __attribute__((__aligned__(16)));
 };
-- 
2.8.1



More information about the dev mailing list