[dpdk-stable] patch 'vhost: fix offload flags in Rx path' has been queued to stable release 20.11.2

Xueming Li xuemingl at nvidia.com
Sat Jun 12 01:02:46 CEST 2021


Hi,

FYI, your patch has been queued to stable release 20.11.2

Note it hasn't been pushed to http://dpdk.org/browse/dpdk-stable yet.
It will be pushed if I get no objections before 06/14/21. So please
shout if anyone has objections.

Also note that after the patch there's a diff of the upstream commit vs the
patch applied to the branch. This will indicate if there was any rebasing
needed to apply to the stable branch. If there were code changes for rebasing
(ie: not only metadata diffs), please double check that the rebase was
correctly done.

Queued patches are on a temporary branch at:
https://github.com/steevenlee/dpdk

This queued commit can be viewed at:
https://github.com/steevenlee/dpdk/commit/ca60f8482369ee84266041f381fa4863c828785a

Thanks.

Xueming Li <xuemingl at nvidia.com>

---
>From ca60f8482369ee84266041f381fa4863c828785a Mon Sep 17 00:00:00 2001
From: David Marchand <david.marchand at redhat.com>
Date: Mon, 3 May 2021 18:43:44 +0200
Subject: [PATCH] vhost: fix offload flags in Rx path
Cc: Luca Boccassi <bluca at debian.org>

[ upstream commit ca7036b4af3a82d258cca914e71171434b3d0320 ]

The vhost library currently configures Tx offloading (PKT_TX_*) on any
packet received from a guest virtio device which asks for some offloading.

This is problematic, as Tx offloading is something that the application
must ask for: the application needs to configure devices
to support every used offloads (ip, tcp checksumming, tso..), and the
various l2/l3/l4 lengths must be set following any processing that
happened in the application itself.

On the other hand, the received packets are not marked wrt current
packet l3/l4 checksumming info.

Copy virtio rx processing to fix those offload flags with some
differences:
- accept VIRTIO_NET_HDR_GSO_ECN and VIRTIO_NET_HDR_GSO_UDP,
- ignore anything but the VIRTIO_NET_HDR_F_NEEDS_CSUM flag (to comply with
  the virtio spec),

Some applications might rely on the current behavior, so it is left
untouched by default.
A new RTE_VHOST_USER_NET_COMPLIANT_OL_FLAGS flag is added to enable the
new behavior.

The vhost example has been updated for the new behavior: TSO is applied to
any packet marked LRO.

Fixes: 859b480d5afd ("vhost: add guest offload setting")

Signed-off-by: David Marchand <david.marchand at redhat.com>
Reviewed-by: Maxime Coquelin <maxime.coquelin at redhat.com>
---
 doc/guides/prog_guide/vhost_lib.rst |  12 ++
 drivers/net/vhost/rte_eth_vhost.c   |   2 +-
 examples/vhost/main.c               |  44 +++----
 lib/librte_vhost/rte_vhost.h        |   1 +
 lib/librte_vhost/socket.c           |   5 +-
 lib/librte_vhost/vhost.c            |   6 +-
 lib/librte_vhost/vhost.h            |  14 ++-
 lib/librte_vhost/virtio_net.c       | 185 ++++++++++++++++++++++++----
 8 files changed, 216 insertions(+), 53 deletions(-)

diff --git a/doc/guides/prog_guide/vhost_lib.rst b/doc/guides/prog_guide/vhost_lib.rst
index ba4c62aeb8..493818bcf9 100644
--- a/doc/guides/prog_guide/vhost_lib.rst
+++ b/doc/guides/prog_guide/vhost_lib.rst
@@ -118,6 +118,18 @@ The following is an overview of some key Vhost API functions:
 
     It is disabled by default.
 
+  - ``RTE_VHOST_USER_NET_COMPLIANT_OL_FLAGS``
+
+    Since v16.04, the vhost library forwards checksum and gso requests for
+    packets received from a virtio driver by filling Tx offload metadata in
+    the mbuf. This behavior is inconsistent with other drivers but it is left
+    untouched for existing applications that might rely on it.
+
+    This flag disables the legacy behavior and instead ask vhost to simply
+    populate Rx offload metadata in the mbuf.
+
+    It is disabled by default.
+
 * ``rte_vhost_driver_set_features(path, features)``
 
   This function sets the feature bits the vhost-user driver supports. The
diff --git a/drivers/net/vhost/rte_eth_vhost.c b/drivers/net/vhost/rte_eth_vhost.c
index 5845bb15f3..fe36fc8824 100644
--- a/drivers/net/vhost/rte_eth_vhost.c
+++ b/drivers/net/vhost/rte_eth_vhost.c
@@ -1505,7 +1505,7 @@ rte_pmd_vhost_probe(struct rte_vdev_device *dev)
 	int ret = 0;
 	char *iface_name;
 	uint16_t queues;
-	uint64_t flags = 0;
+	uint64_t flags = RTE_VHOST_USER_NET_COMPLIANT_OL_FLAGS;
 	uint64_t disable_flags = 0;
 	int client_mode = 0;
 	int iommu_support = 0;
diff --git a/examples/vhost/main.c b/examples/vhost/main.c
index dd1a936f23..b7e1abffd1 100644
--- a/examples/vhost/main.c
+++ b/examples/vhost/main.c
@@ -20,6 +20,7 @@
 #include <rte_log.h>
 #include <rte_string_fns.h>
 #include <rte_malloc.h>
+#include <rte_net.h>
 #include <rte_vhost.h>
 #include <rte_ip.h>
 #include <rte_tcp.h>
@@ -911,33 +912,34 @@ find_local_dest(struct vhost_dev *vdev, struct rte_mbuf *m,
 	return 0;
 }
 
-static uint16_t
-get_psd_sum(void *l3_hdr, uint64_t ol_flags)
-{
-	if (ol_flags & PKT_TX_IPV4)
-		return rte_ipv4_phdr_cksum(l3_hdr, ol_flags);
-	else /* assume ethertype == RTE_ETHER_TYPE_IPV6 */
-		return rte_ipv6_phdr_cksum(l3_hdr, ol_flags);
-}
-
 static void virtio_tx_offload(struct rte_mbuf *m)
 {
+	struct rte_net_hdr_lens hdr_lens;
+	struct rte_ipv4_hdr *ipv4_hdr;
+	struct rte_tcp_hdr *tcp_hdr;
+	uint32_t ptype;
 	void *l3_hdr;
-	struct rte_ipv4_hdr *ipv4_hdr = NULL;
-	struct rte_tcp_hdr *tcp_hdr = NULL;
-	struct rte_ether_hdr *eth_hdr =
-		rte_pktmbuf_mtod(m, struct rte_ether_hdr *);
 
-	l3_hdr = (char *)eth_hdr + m->l2_len;
+	ptype = rte_net_get_ptype(m, &hdr_lens, RTE_PTYPE_ALL_MASK);
+	m->l2_len = hdr_lens.l2_len;
+	m->l3_len = hdr_lens.l3_len;
+	m->l4_len = hdr_lens.l4_len;
 
-	if (m->ol_flags & PKT_TX_IPV4) {
+	l3_hdr = rte_pktmbuf_mtod_offset(m, void *, m->l2_len);
+	tcp_hdr = rte_pktmbuf_mtod_offset(m, struct rte_tcp_hdr *,
+		m->l2_len + m->l3_len);
+
+	m->ol_flags |= PKT_TX_TCP_SEG;
+	if ((ptype & RTE_PTYPE_L3_MASK) == RTE_PTYPE_L3_IPV4) {
+		m->ol_flags |= PKT_TX_IPV4;
+		m->ol_flags |= PKT_TX_IP_CKSUM;
 		ipv4_hdr = l3_hdr;
 		ipv4_hdr->hdr_checksum = 0;
-		m->ol_flags |= PKT_TX_IP_CKSUM;
+		tcp_hdr->cksum = rte_ipv4_phdr_cksum(l3_hdr, m->ol_flags);
+	} else { /* assume ethertype == RTE_ETHER_TYPE_IPV6 */
+		m->ol_flags |= PKT_TX_IPV6;
+		tcp_hdr->cksum = rte_ipv6_phdr_cksum(l3_hdr, m->ol_flags);
 	}
-
-	tcp_hdr = (struct rte_tcp_hdr *)((char *)l3_hdr + m->l3_len);
-	tcp_hdr->cksum = get_psd_sum(l3_hdr, m->ol_flags);
 }
 
 static inline void
@@ -1039,7 +1041,7 @@ queue2nic:
 		m->vlan_tci = vlan_tag;
 	}
 
-	if (m->ol_flags & PKT_TX_TCP_SEG)
+	if (m->ol_flags & PKT_RX_LRO)
 		virtio_tx_offload(m);
 
 	tx_q->m_table[tx_q->len++] = m;
@@ -1503,7 +1505,7 @@ main(int argc, char *argv[])
 	int ret, i;
 	uint16_t portid;
 	static pthread_t tid;
-	uint64_t flags = 0;
+	uint64_t flags = RTE_VHOST_USER_NET_COMPLIANT_OL_FLAGS;
 
 	signal(SIGINT, sigint_handler);
 
diff --git a/lib/librte_vhost/rte_vhost.h b/lib/librte_vhost/rte_vhost.h
index 010f160869..fe910a2a7a 100644
--- a/lib/librte_vhost/rte_vhost.h
+++ b/lib/librte_vhost/rte_vhost.h
@@ -36,6 +36,7 @@ extern "C" {
 /* support only linear buffers (no chained mbufs) */
 #define RTE_VHOST_USER_LINEARBUF_SUPPORT	(1ULL << 6)
 #define RTE_VHOST_USER_ASYNC_COPY	(1ULL << 7)
+#define RTE_VHOST_USER_NET_COMPLIANT_OL_FLAGS	(1ULL << 8)
 
 /* Features. */
 #ifndef VIRTIO_NET_F_GUEST_ANNOUNCE
diff --git a/lib/librte_vhost/socket.c b/lib/librte_vhost/socket.c
index 0169d36481..5d0d728d52 100644
--- a/lib/librte_vhost/socket.c
+++ b/lib/librte_vhost/socket.c
@@ -42,6 +42,7 @@ struct vhost_user_socket {
 	bool extbuf;
 	bool linearbuf;
 	bool async_copy;
+	bool net_compliant_ol_flags;
 
 	/*
 	 * The "supported_features" indicates the feature bits the
@@ -224,7 +225,8 @@ vhost_user_add_connection(int fd, struct vhost_user_socket *vsocket)
 	size = strnlen(vsocket->path, PATH_MAX);
 	vhost_set_ifname(vid, vsocket->path, size);
 
-	vhost_set_builtin_virtio_net(vid, vsocket->use_builtin_virtio_net);
+	vhost_setup_virtio_net(vid, vsocket->use_builtin_virtio_net,
+		vsocket->net_compliant_ol_flags);
 
 	vhost_attach_vdpa_device(vid, vsocket->vdpa_dev);
 
@@ -877,6 +879,7 @@ rte_vhost_driver_register(const char *path, uint64_t flags)
 	vsocket->extbuf = flags & RTE_VHOST_USER_EXTBUF_SUPPORT;
 	vsocket->linearbuf = flags & RTE_VHOST_USER_LINEARBUF_SUPPORT;
 	vsocket->async_copy = flags & RTE_VHOST_USER_ASYNC_COPY;
+	vsocket->net_compliant_ol_flags = flags & RTE_VHOST_USER_NET_COMPLIANT_OL_FLAGS;
 
 	if (vsocket->async_copy &&
 		(flags & (RTE_VHOST_USER_IOMMU_SUPPORT |
diff --git a/lib/librte_vhost/vhost.c b/lib/librte_vhost/vhost.c
index 90cad7fffd..b97dcf6b69 100644
--- a/lib/librte_vhost/vhost.c
+++ b/lib/librte_vhost/vhost.c
@@ -742,7 +742,7 @@ vhost_set_ifname(int vid, const char *if_name, unsigned int if_len)
 }
 
 void
-vhost_set_builtin_virtio_net(int vid, bool enable)
+vhost_setup_virtio_net(int vid, bool enable, bool compliant_ol_flags)
 {
 	struct virtio_net *dev = get_device(vid);
 
@@ -753,6 +753,10 @@ vhost_set_builtin_virtio_net(int vid, bool enable)
 		dev->flags |= VIRTIO_DEV_BUILTIN_VIRTIO_NET;
 	else
 		dev->flags &= ~VIRTIO_DEV_BUILTIN_VIRTIO_NET;
+	if (!compliant_ol_flags)
+		dev->flags |= VIRTIO_DEV_LEGACY_OL_FLAGS;
+	else
+		dev->flags &= ~VIRTIO_DEV_LEGACY_OL_FLAGS;
 }
 
 void
diff --git a/lib/librte_vhost/vhost.h b/lib/librte_vhost/vhost.h
index 361c9f79b3..984c7a6f5f 100644
--- a/lib/librte_vhost/vhost.h
+++ b/lib/librte_vhost/vhost.h
@@ -27,15 +27,17 @@
 #include "rte_vhost_async.h"
 
 /* Used to indicate that the device is running on a data core */
-#define VIRTIO_DEV_RUNNING 1
+#define VIRTIO_DEV_RUNNING ((uint32_t)1 << 0)
 /* Used to indicate that the device is ready to operate */
-#define VIRTIO_DEV_READY 2
+#define VIRTIO_DEV_READY ((uint32_t)1 << 1)
 /* Used to indicate that the built-in vhost net device backend is enabled */
-#define VIRTIO_DEV_BUILTIN_VIRTIO_NET 4
+#define VIRTIO_DEV_BUILTIN_VIRTIO_NET ((uint32_t)1 << 2)
 /* Used to indicate that the device has its own data path and configured */
-#define VIRTIO_DEV_VDPA_CONFIGURED 8
+#define VIRTIO_DEV_VDPA_CONFIGURED ((uint32_t)1 << 3)
 /* Used to indicate that the feature negotiation failed */
-#define VIRTIO_DEV_FEATURES_FAILED 16
+#define VIRTIO_DEV_FEATURES_FAILED ((uint32_t)1 << 4)
+/* Used to indicate that the virtio_net tx code should fill TX ol_flags */
+#define VIRTIO_DEV_LEGACY_OL_FLAGS ((uint32_t)1 << 5)
 
 /* Backend value set by guest. */
 #define VIRTIO_DEV_STOPPED -1
@@ -672,7 +674,7 @@ int alloc_vring_queue(struct virtio_net *dev, uint32_t vring_idx);
 void vhost_attach_vdpa_device(int vid, struct rte_vdpa_device *dev);
 
 void vhost_set_ifname(int, const char *if_name, unsigned int if_len);
-void vhost_set_builtin_virtio_net(int vid, bool enable);
+void vhost_setup_virtio_net(int vid, bool enable, bool legacy_ol_flags);
 void vhost_enable_extbuf(int vid);
 void vhost_enable_linearbuf(int vid);
 int vhost_enable_guest_notification(struct virtio_net *dev,
diff --git a/lib/librte_vhost/virtio_net.c b/lib/librte_vhost/virtio_net.c
index f2392e77eb..6c7c0b0f0e 100644
--- a/lib/librte_vhost/virtio_net.c
+++ b/lib/librte_vhost/virtio_net.c
@@ -8,6 +8,7 @@
 
 #include <rte_mbuf.h>
 #include <rte_memcpy.h>
+#include <rte_net.h>
 #include <rte_ether.h>
 #include <rte_ip.h>
 #include <rte_vhost.h>
@@ -1844,15 +1845,12 @@ parse_ethernet(struct rte_mbuf *m, uint16_t *l4_proto, void **l4_hdr)
 }
 
 static __rte_always_inline void
-vhost_dequeue_offload(struct virtio_net_hdr *hdr, struct rte_mbuf *m)
+vhost_dequeue_offload_legacy(struct virtio_net_hdr *hdr, struct rte_mbuf *m)
 {
 	uint16_t l4_proto = 0;
 	void *l4_hdr = NULL;
 	struct rte_tcp_hdr *tcp_hdr = NULL;
 
-	if (hdr->flags == 0 && hdr->gso_type == VIRTIO_NET_HDR_GSO_NONE)
-		return;
-
 	parse_ethernet(m, &l4_proto, &l4_hdr);
 	if (hdr->flags == VIRTIO_NET_HDR_F_NEEDS_CSUM) {
 		if (hdr->csum_start == (m->l2_len + m->l3_len)) {
@@ -1897,6 +1895,94 @@ vhost_dequeue_offload(struct virtio_net_hdr *hdr, struct rte_mbuf *m)
 	}
 }
 
+static __rte_always_inline void
+vhost_dequeue_offload(struct virtio_net_hdr *hdr, struct rte_mbuf *m,
+	bool legacy_ol_flags)
+{
+	struct rte_net_hdr_lens hdr_lens;
+	int l4_supported = 0;
+	uint32_t ptype;
+
+	if (hdr->flags == 0 && hdr->gso_type == VIRTIO_NET_HDR_GSO_NONE)
+		return;
+
+	if (legacy_ol_flags) {
+		vhost_dequeue_offload_legacy(hdr, m);
+		return;
+	}
+
+	m->ol_flags |= PKT_RX_IP_CKSUM_UNKNOWN;
+
+	ptype = rte_net_get_ptype(m, &hdr_lens, RTE_PTYPE_ALL_MASK);
+	m->packet_type = ptype;
+	if ((ptype & RTE_PTYPE_L4_MASK) == RTE_PTYPE_L4_TCP ||
+	    (ptype & RTE_PTYPE_L4_MASK) == RTE_PTYPE_L4_UDP ||
+	    (ptype & RTE_PTYPE_L4_MASK) == RTE_PTYPE_L4_SCTP)
+		l4_supported = 1;
+
+	/* According to Virtio 1.1 spec, the device only needs to look at
+	 * VIRTIO_NET_HDR_F_NEEDS_CSUM in the packet transmission path.
+	 * This differs from the processing incoming packets path where the
+	 * driver could rely on VIRTIO_NET_HDR_F_DATA_VALID flag set by the
+	 * device.
+	 *
+	 * 5.1.6.2.1 Driver Requirements: Packet Transmission
+	 * The driver MUST NOT set the VIRTIO_NET_HDR_F_DATA_VALID and
+	 * VIRTIO_NET_HDR_F_RSC_INFO bits in flags.
+	 *
+	 * 5.1.6.2.2 Device Requirements: Packet Transmission
+	 * The device MUST ignore flag bits that it does not recognize.
+	 */
+	if (hdr->flags & VIRTIO_NET_HDR_F_NEEDS_CSUM) {
+		uint32_t hdrlen;
+
+		hdrlen = hdr_lens.l2_len + hdr_lens.l3_len + hdr_lens.l4_len;
+		if (hdr->csum_start <= hdrlen && l4_supported != 0) {
+			m->ol_flags |= PKT_RX_L4_CKSUM_NONE;
+		} else {
+			/* Unknown proto or tunnel, do sw cksum. We can assume
+			 * the cksum field is in the first segment since the
+			 * buffers we provided to the host are large enough.
+			 * In case of SCTP, this will be wrong since it's a CRC
+			 * but there's nothing we can do.
+			 */
+			uint16_t csum = 0, off;
+
+			if (rte_raw_cksum_mbuf(m, hdr->csum_start,
+					rte_pktmbuf_pkt_len(m) - hdr->csum_start, &csum) < 0)
+				return;
+			if (likely(csum != 0xffff))
+				csum = ~csum;
+			off = hdr->csum_offset + hdr->csum_start;
+			if (rte_pktmbuf_data_len(m) >= off + 1)
+				*rte_pktmbuf_mtod_offset(m, uint16_t *, off) = csum;
+		}
+	}
+
+	if (hdr->gso_type != VIRTIO_NET_HDR_GSO_NONE) {
+		if (hdr->gso_size == 0)
+			return;
+
+		switch (hdr->gso_type & ~VIRTIO_NET_HDR_GSO_ECN) {
+		case VIRTIO_NET_HDR_GSO_TCPV4:
+		case VIRTIO_NET_HDR_GSO_TCPV6:
+			if ((ptype & RTE_PTYPE_L4_MASK) != RTE_PTYPE_L4_TCP)
+				break;
+			m->ol_flags |= PKT_RX_LRO | PKT_RX_L4_CKSUM_NONE;
+			m->tso_segsz = hdr->gso_size;
+			break;
+		case VIRTIO_NET_HDR_GSO_UDP:
+			if ((ptype & RTE_PTYPE_L4_MASK) != RTE_PTYPE_L4_UDP)
+				break;
+			m->ol_flags |= PKT_RX_LRO | PKT_RX_L4_CKSUM_NONE;
+			m->tso_segsz = hdr->gso_size;
+			break;
+		default:
+			break;
+		}
+	}
+}
+
 static __rte_noinline void
 copy_vnet_hdr_from_desc(struct virtio_net_hdr *hdr,
 		struct buf_vector *buf_vec)
@@ -1921,7 +2007,8 @@ copy_vnet_hdr_from_desc(struct virtio_net_hdr *hdr,
 static __rte_always_inline int
 copy_desc_to_mbuf(struct virtio_net *dev, struct vhost_virtqueue *vq,
 		  struct buf_vector *buf_vec, uint16_t nr_vec,
-		  struct rte_mbuf *m, struct rte_mempool *mbuf_pool)
+		  struct rte_mbuf *m, struct rte_mempool *mbuf_pool,
+		  bool legacy_ol_flags)
 {
 	uint32_t buf_avail, buf_offset;
 	uint64_t buf_addr, buf_len;
@@ -2054,7 +2141,7 @@ copy_desc_to_mbuf(struct virtio_net *dev, struct vhost_virtqueue *vq,
 	m->pkt_len    += mbuf_offset;
 
 	if (hdr)
-		vhost_dequeue_offload(hdr, m);
+		vhost_dequeue_offload(hdr, m, legacy_ol_flags);
 
 out:
 
@@ -2137,9 +2224,11 @@ virtio_dev_pktmbuf_alloc(struct virtio_net *dev, struct rte_mempool *mp,
 	return NULL;
 }
 
-static __rte_noinline uint16_t
+__rte_always_inline
+static uint16_t
 virtio_dev_tx_split(struct virtio_net *dev, struct vhost_virtqueue *vq,
-	struct rte_mempool *mbuf_pool, struct rte_mbuf **pkts, uint16_t count)
+	struct rte_mempool *mbuf_pool, struct rte_mbuf **pkts, uint16_t count,
+	bool legacy_ol_flags)
 {
 	uint16_t i;
 	uint16_t free_entries;
@@ -2199,7 +2288,7 @@ virtio_dev_tx_split(struct virtio_net *dev, struct vhost_virtqueue *vq,
 		}
 
 		err = copy_desc_to_mbuf(dev, vq, buf_vec, nr_vec, pkts[i],
-				mbuf_pool);
+				mbuf_pool, legacy_ol_flags);
 		if (unlikely(err)) {
 			rte_pktmbuf_free(pkts[i]);
 			if (!allocerr_warned) {
@@ -2227,6 +2316,24 @@ virtio_dev_tx_split(struct virtio_net *dev, struct vhost_virtqueue *vq,
 	return (i - dropped);
 }
 
+__rte_noinline
+static uint16_t
+virtio_dev_tx_split_legacy(struct virtio_net *dev,
+	struct vhost_virtqueue *vq, struct rte_mempool *mbuf_pool,
+	struct rte_mbuf **pkts, uint16_t count)
+{
+	return virtio_dev_tx_split(dev, vq, mbuf_pool, pkts, count, true);
+}
+
+__rte_noinline
+static uint16_t
+virtio_dev_tx_split_compliant(struct virtio_net *dev,
+	struct vhost_virtqueue *vq, struct rte_mempool *mbuf_pool,
+	struct rte_mbuf **pkts, uint16_t count)
+{
+	return virtio_dev_tx_split(dev, vq, mbuf_pool, pkts, count, false);
+}
+
 static __rte_always_inline int
 vhost_reserve_avail_batch_packed(struct virtio_net *dev,
 				 struct vhost_virtqueue *vq,
@@ -2307,7 +2414,8 @@ static __rte_always_inline int
 virtio_dev_tx_batch_packed(struct virtio_net *dev,
 			   struct vhost_virtqueue *vq,
 			   struct rte_mempool *mbuf_pool,
-			   struct rte_mbuf **pkts)
+			   struct rte_mbuf **pkts,
+			   bool legacy_ol_flags)
 {
 	uint16_t avail_idx = vq->last_avail_idx;
 	uint32_t buf_offset = sizeof(struct virtio_net_hdr_mrg_rxbuf);
@@ -2331,7 +2439,7 @@ virtio_dev_tx_batch_packed(struct virtio_net *dev,
 	if (virtio_net_with_host_offload(dev)) {
 		vhost_for_each_try_unroll(i, 0, PACKED_BATCH_SIZE) {
 			hdr = (struct virtio_net_hdr *)(desc_addrs[i]);
-			vhost_dequeue_offload(hdr, pkts[i]);
+			vhost_dequeue_offload(hdr, pkts[i], legacy_ol_flags);
 		}
 	}
 
@@ -2352,7 +2460,8 @@ vhost_dequeue_single_packed(struct virtio_net *dev,
 			    struct rte_mempool *mbuf_pool,
 			    struct rte_mbuf **pkts,
 			    uint16_t *buf_id,
-			    uint16_t *desc_count)
+			    uint16_t *desc_count,
+			    bool legacy_ol_flags)
 {
 	struct buf_vector buf_vec[BUF_VECTOR_MAX];
 	uint32_t buf_len;
@@ -2379,7 +2488,7 @@ vhost_dequeue_single_packed(struct virtio_net *dev,
 	}
 
 	err = copy_desc_to_mbuf(dev, vq, buf_vec, nr_vec, *pkts,
-				mbuf_pool);
+				mbuf_pool, legacy_ol_flags);
 	if (unlikely(err)) {
 		if (!allocerr_warned) {
 			VHOST_LOG_DATA(ERR,
@@ -2398,14 +2507,15 @@ static __rte_always_inline int
 virtio_dev_tx_single_packed(struct virtio_net *dev,
 			    struct vhost_virtqueue *vq,
 			    struct rte_mempool *mbuf_pool,
-			    struct rte_mbuf **pkts)
+			    struct rte_mbuf **pkts,
+			    bool legacy_ol_flags)
 {
 
 	uint16_t buf_id, desc_count = 0;
 	int ret;
 
 	ret = vhost_dequeue_single_packed(dev, vq, mbuf_pool, pkts, &buf_id,
-					&desc_count);
+					&desc_count, legacy_ol_flags);
 
 	if (likely(desc_count > 0)) {
 		if (virtio_net_is_inorder(dev))
@@ -2421,12 +2531,14 @@ virtio_dev_tx_single_packed(struct virtio_net *dev,
 	return ret;
 }
 
-static __rte_noinline uint16_t
+__rte_always_inline
+static uint16_t
 virtio_dev_tx_packed(struct virtio_net *dev,
 		     struct vhost_virtqueue *__rte_restrict vq,
 		     struct rte_mempool *mbuf_pool,
 		     struct rte_mbuf **__rte_restrict pkts,
-		     uint32_t count)
+		     uint32_t count,
+		     bool legacy_ol_flags)
 {
 	uint32_t pkt_idx = 0;
 	uint32_t remained = count;
@@ -2436,7 +2548,8 @@ virtio_dev_tx_packed(struct virtio_net *dev,
 
 		if (remained >= PACKED_BATCH_SIZE) {
 			if (!virtio_dev_tx_batch_packed(dev, vq, mbuf_pool,
-							&pkts[pkt_idx])) {
+							&pkts[pkt_idx],
+							legacy_ol_flags)) {
 				pkt_idx += PACKED_BATCH_SIZE;
 				remained -= PACKED_BATCH_SIZE;
 				continue;
@@ -2444,7 +2557,8 @@ virtio_dev_tx_packed(struct virtio_net *dev,
 		}
 
 		if (virtio_dev_tx_single_packed(dev, vq, mbuf_pool,
-						&pkts[pkt_idx]))
+						&pkts[pkt_idx],
+						legacy_ol_flags))
 			break;
 		pkt_idx++;
 		remained--;
@@ -2461,6 +2575,24 @@ virtio_dev_tx_packed(struct virtio_net *dev,
 	return pkt_idx;
 }
 
+__rte_noinline
+static uint16_t
+virtio_dev_tx_packed_legacy(struct virtio_net *dev,
+	struct vhost_virtqueue *__rte_restrict vq, struct rte_mempool *mbuf_pool,
+	struct rte_mbuf **__rte_restrict pkts, uint32_t count)
+{
+	return virtio_dev_tx_packed(dev, vq, mbuf_pool, pkts, count, true);
+}
+
+__rte_noinline
+static uint16_t
+virtio_dev_tx_packed_compliant(struct virtio_net *dev,
+	struct vhost_virtqueue *__rte_restrict vq, struct rte_mempool *mbuf_pool,
+	struct rte_mbuf **__rte_restrict pkts, uint32_t count)
+{
+	return virtio_dev_tx_packed(dev, vq, mbuf_pool, pkts, count, false);
+}
+
 uint16_t
 rte_vhost_dequeue_burst(int vid, uint16_t queue_id,
 	struct rte_mempool *mbuf_pool, struct rte_mbuf **pkts, uint16_t count)
@@ -2536,10 +2668,17 @@ rte_vhost_dequeue_burst(int vid, uint16_t queue_id,
 		count -= 1;
 	}
 
-	if (vq_is_packed(dev))
-		count = virtio_dev_tx_packed(dev, vq, mbuf_pool, pkts, count);
-	else
-		count = virtio_dev_tx_split(dev, vq, mbuf_pool, pkts, count);
+	if (vq_is_packed(dev)) {
+		if (dev->flags & VIRTIO_DEV_LEGACY_OL_FLAGS)
+			count = virtio_dev_tx_packed_legacy(dev, vq, mbuf_pool, pkts, count);
+		else
+			count = virtio_dev_tx_packed_compliant(dev, vq, mbuf_pool, pkts, count);
+	} else {
+		if (dev->flags & VIRTIO_DEV_LEGACY_OL_FLAGS)
+			count = virtio_dev_tx_split_legacy(dev, vq, mbuf_pool, pkts, count);
+		else
+			count = virtio_dev_tx_split_compliant(dev, vq, mbuf_pool, pkts, count);
+	}
 
 out:
 	if (dev->features & (1ULL << VIRTIO_F_IOMMU_PLATFORM))
-- 
2.25.1

---
  Diff of the applied patch vs upstream commit (please double-check if non-empty:
---
--- -	2021-06-12 06:53:58.470345500 +0800
+++ 0072-vhost-fix-offload-flags-in-Rx-path.patch	2021-06-12 06:53:56.340000000 +0800
@@ -1 +1 @@
-From ca7036b4af3a82d258cca914e71171434b3d0320 Mon Sep 17 00:00:00 2001
+From ca60f8482369ee84266041f381fa4863c828785a Mon Sep 17 00:00:00 2001
@@ -4,0 +5,3 @@
+Cc: Luca Boccassi <bluca at debian.org>
+
+[ upstream commit ca7036b4af3a82d258cca914e71171434b3d0320 ]
@@ -33 +35,0 @@
-Cc: stable at dpdk.org
@@ -38,10 +40,9 @@
- doc/guides/prog_guide/vhost_lib.rst    |  12 ++
- doc/guides/rel_notes/release_21_05.rst |   6 +
- drivers/net/vhost/rte_eth_vhost.c      |   2 +-
- examples/vhost/main.c                  |  44 +++---
- lib/vhost/rte_vhost.h                  |   1 +
- lib/vhost/socket.c                     |   5 +-
- lib/vhost/vhost.c                      |   6 +-
- lib/vhost/vhost.h                      |  14 +-
- lib/vhost/virtio_net.c                 | 185 ++++++++++++++++++++++---
- 9 files changed, 222 insertions(+), 53 deletions(-)
+ doc/guides/prog_guide/vhost_lib.rst |  12 ++
+ drivers/net/vhost/rte_eth_vhost.c   |   2 +-
+ examples/vhost/main.c               |  44 +++----
+ lib/librte_vhost/rte_vhost.h        |   1 +
+ lib/librte_vhost/socket.c           |   5 +-
+ lib/librte_vhost/vhost.c            |   6 +-
+ lib/librte_vhost/vhost.h            |  14 ++-
+ lib/librte_vhost/virtio_net.c       | 185 ++++++++++++++++++++++++----
+ 8 files changed, 216 insertions(+), 53 deletions(-)
@@ -50 +51 @@
-index 7afa351675..d18fb98910 100644
+index ba4c62aeb8..493818bcf9 100644
@@ -72,17 +72,0 @@
-diff --git a/doc/guides/rel_notes/release_21_05.rst b/doc/guides/rel_notes/release_21_05.rst
-index 885af4ab7c..bcd4dd84fb 100644
---- a/doc/guides/rel_notes/release_21_05.rst
-+++ b/doc/guides/rel_notes/release_21_05.rst
-@@ -343,6 +343,12 @@ API Changes
-   ``policer_action_recolor_supported`` and ``policer_action_drop_supported``
-   have been removed.
- 
-+* vhost: The vhost library currently populates received mbufs from a virtio
-+  driver with Tx offload flags while not filling Rx offload flags.
-+  While this behavior is arguable, it is kept untouched.
-+  A new flag ``RTE_VHOST_USER_NET_COMPLIANT_OL_FLAGS`` has been added to ask
-+  for a behavior compliant with the mbuf offload API.
-+
- * stack: Lock-free ``rte_stack`` no longer silently ignores push and pop when
-   it's not supported on the current platform. Instead ``rte_stack_create()``
-   fails and ``rte_errno`` is set to ``ENOTSUP``.
@@ -90 +74 @@
-index d198fc8a8e..281379d6a3 100644
+index 5845bb15f3..fe36fc8824 100644
@@ -103 +87 @@
-index 0bee1f3321..d2179eadb9 100644
+index dd1a936f23..b7e1abffd1 100644
@@ -106 +90 @@
-@@ -19,6 +19,7 @@
+@@ -20,6 +20,7 @@
@@ -114 +98 @@
-@@ -1029,33 +1030,34 @@ find_local_dest(struct vhost_dev *vdev, struct rte_mbuf *m,
+@@ -911,33 +912,34 @@ find_local_dest(struct vhost_dev *vdev, struct rte_mbuf *m,
@@ -167,2 +151,2 @@
- static __rte_always_inline void
-@@ -1148,7 +1150,7 @@ queue2nic:
+ static inline void
+@@ -1039,7 +1041,7 @@ queue2nic:
@@ -177 +161 @@
-@@ -1633,7 +1635,7 @@ main(int argc, char *argv[])
+@@ -1503,7 +1505,7 @@ main(int argc, char *argv[])
@@ -186,4 +170,4 @@
-diff --git a/lib/vhost/rte_vhost.h b/lib/vhost/rte_vhost.h
-index d0a8ae31f2..8d875e9322 100644
---- a/lib/vhost/rte_vhost.h
-+++ b/lib/vhost/rte_vhost.h
+diff --git a/lib/librte_vhost/rte_vhost.h b/lib/librte_vhost/rte_vhost.h
+index 010f160869..fe910a2a7a 100644
+--- a/lib/librte_vhost/rte_vhost.h
++++ b/lib/librte_vhost/rte_vhost.h
@@ -198 +182 @@
-diff --git a/lib/vhost/socket.c b/lib/vhost/socket.c
+diff --git a/lib/librte_vhost/socket.c b/lib/librte_vhost/socket.c
@@ -200,2 +184,2 @@
---- a/lib/vhost/socket.c
-+++ b/lib/vhost/socket.c
+--- a/lib/librte_vhost/socket.c
++++ b/lib/librte_vhost/socket.c
@@ -228,5 +212,5 @@
-diff --git a/lib/vhost/vhost.c b/lib/vhost/vhost.c
-index c9b6379f73..9abfc0bfe7 100644
---- a/lib/vhost/vhost.c
-+++ b/lib/vhost/vhost.c
-@@ -752,7 +752,7 @@ vhost_set_ifname(int vid, const char *if_name, unsigned int if_len)
+diff --git a/lib/librte_vhost/vhost.c b/lib/librte_vhost/vhost.c
+index 90cad7fffd..b97dcf6b69 100644
+--- a/lib/librte_vhost/vhost.c
++++ b/lib/librte_vhost/vhost.c
+@@ -742,7 +742,7 @@ vhost_set_ifname(int vid, const char *if_name, unsigned int if_len)
@@ -241 +225 @@
-@@ -763,6 +763,10 @@ vhost_set_builtin_virtio_net(int vid, bool enable)
+@@ -753,6 +753,10 @@ vhost_set_builtin_virtio_net(int vid, bool enable)
@@ -252,4 +236,4 @@
-diff --git a/lib/vhost/vhost.h b/lib/vhost/vhost.h
-index b303635645..8078ddff79 100644
---- a/lib/vhost/vhost.h
-+++ b/lib/vhost/vhost.h
+diff --git a/lib/librte_vhost/vhost.h b/lib/librte_vhost/vhost.h
+index 361c9f79b3..984c7a6f5f 100644
+--- a/lib/librte_vhost/vhost.h
++++ b/lib/librte_vhost/vhost.h
@@ -279 +263 @@
-@@ -683,7 +685,7 @@ int alloc_vring_queue(struct virtio_net *dev, uint32_t vring_idx);
+@@ -672,7 +674,7 @@ int alloc_vring_queue(struct virtio_net *dev, uint32_t vring_idx);
@@ -288,4 +272,4 @@
-diff --git a/lib/vhost/virtio_net.c b/lib/vhost/virtio_net.c
-index 1a34867f3c..8e36f4c340 100644
---- a/lib/vhost/virtio_net.c
-+++ b/lib/vhost/virtio_net.c
+diff --git a/lib/librte_vhost/virtio_net.c b/lib/librte_vhost/virtio_net.c
+index f2392e77eb..6c7c0b0f0e 100644
+--- a/lib/librte_vhost/virtio_net.c
++++ b/lib/librte_vhost/virtio_net.c
@@ -300 +284 @@
-@@ -2303,15 +2304,12 @@ parse_ethernet(struct rte_mbuf *m, uint16_t *l4_proto, void **l4_hdr)
+@@ -1844,15 +1845,12 @@ parse_ethernet(struct rte_mbuf *m, uint16_t *l4_proto, void **l4_hdr)
@@ -317 +301 @@
-@@ -2356,6 +2354,94 @@ vhost_dequeue_offload(struct virtio_net_hdr *hdr, struct rte_mbuf *m)
+@@ -1897,6 +1895,94 @@ vhost_dequeue_offload(struct virtio_net_hdr *hdr, struct rte_mbuf *m)
@@ -412 +396 @@
-@@ -2380,7 +2466,8 @@ copy_vnet_hdr_from_desc(struct virtio_net_hdr *hdr,
+@@ -1921,7 +2007,8 @@ copy_vnet_hdr_from_desc(struct virtio_net_hdr *hdr,
@@ -422 +406 @@
-@@ -2513,7 +2600,7 @@ copy_desc_to_mbuf(struct virtio_net *dev, struct vhost_virtqueue *vq,
+@@ -2054,7 +2141,7 @@ copy_desc_to_mbuf(struct virtio_net *dev, struct vhost_virtqueue *vq,
@@ -431,2 +415,2 @@
-@@ -2606,9 +2693,11 @@ virtio_dev_pktmbuf_alloc(struct virtio_net *dev, struct rte_mempool *mp,
- 	return pkt;
+@@ -2137,9 +2224,11 @@ virtio_dev_pktmbuf_alloc(struct virtio_net *dev, struct rte_mempool *mp,
+ 	return NULL;
@@ -445 +429 @@
-@@ -2668,7 +2757,7 @@ virtio_dev_tx_split(struct virtio_net *dev, struct vhost_virtqueue *vq,
+@@ -2199,7 +2288,7 @@ virtio_dev_tx_split(struct virtio_net *dev, struct vhost_virtqueue *vq,
@@ -454 +438 @@
-@@ -2696,6 +2785,24 @@ virtio_dev_tx_split(struct virtio_net *dev, struct vhost_virtqueue *vq,
+@@ -2227,6 +2316,24 @@ virtio_dev_tx_split(struct virtio_net *dev, struct vhost_virtqueue *vq,
@@ -479,2 +463 @@
-@@ -2770,7 +2877,8 @@ err:
- static __rte_always_inline int
+@@ -2307,7 +2414,8 @@ static __rte_always_inline int
@@ -482,0 +466 @@
+ 			   struct rte_mempool *mbuf_pool,
@@ -489 +473 @@
-@@ -2794,7 +2902,7 @@ virtio_dev_tx_batch_packed(struct virtio_net *dev,
+@@ -2331,7 +2439,7 @@ virtio_dev_tx_batch_packed(struct virtio_net *dev,
@@ -498 +482 @@
-@@ -2815,7 +2923,8 @@ vhost_dequeue_single_packed(struct virtio_net *dev,
+@@ -2352,7 +2460,8 @@ vhost_dequeue_single_packed(struct virtio_net *dev,
@@ -500 +484 @@
- 			    struct rte_mbuf *pkts,
+ 			    struct rte_mbuf **pkts,
@@ -508 +492 @@
-@@ -2841,7 +2950,7 @@ vhost_dequeue_single_packed(struct virtio_net *dev,
+@@ -2379,7 +2488,7 @@ vhost_dequeue_single_packed(struct virtio_net *dev,
@@ -511 +495 @@
- 	err = copy_desc_to_mbuf(dev, vq, buf_vec, nr_vec, pkts,
+ 	err = copy_desc_to_mbuf(dev, vq, buf_vec, nr_vec, *pkts,
@@ -517 +501 @@
-@@ -2859,14 +2968,15 @@ static __rte_always_inline int
+@@ -2398,14 +2507,15 @@ static __rte_always_inline int
@@ -521,2 +505,2 @@
--			    struct rte_mbuf *pkts)
-+			    struct rte_mbuf *pkts,
+-			    struct rte_mbuf **pkts)
++			    struct rte_mbuf **pkts,
@@ -535 +519 @@
-@@ -2882,12 +2992,14 @@ virtio_dev_tx_single_packed(struct virtio_net *dev,
+@@ -2421,12 +2531,14 @@ virtio_dev_tx_single_packed(struct virtio_net *dev,
@@ -550,0 +535,2 @@
+ 	uint32_t remained = count;
+@@ -2436,7 +2548,8 @@ virtio_dev_tx_packed(struct virtio_net *dev,
@@ -552,4 +538,2 @@
-@@ -2899,14 +3011,16 @@ virtio_dev_tx_packed(struct virtio_net *dev,
- 
- 		if (count - pkt_idx >= PACKED_BATCH_SIZE) {
- 			if (!virtio_dev_tx_batch_packed(dev, vq,
+ 		if (remained >= PACKED_BATCH_SIZE) {
+ 			if (!virtio_dev_tx_batch_packed(dev, vq, mbuf_pool,
@@ -559,0 +544 @@
+ 				remained -= PACKED_BATCH_SIZE;
@@ -561 +546 @@
- 			}
+@@ -2444,7 +2557,8 @@ virtio_dev_tx_packed(struct virtio_net *dev,
@@ -565,2 +550,2 @@
--						pkts[pkt_idx]))
-+						pkts[pkt_idx],
+-						&pkts[pkt_idx]))
++						&pkts[pkt_idx],
@@ -570,2 +555,2 @@
- 	} while (pkt_idx < count);
-@@ -2924,6 +3038,24 @@ virtio_dev_tx_packed(struct virtio_net *dev,
+ 		remained--;
+@@ -2461,6 +2575,24 @@ virtio_dev_tx_packed(struct virtio_net *dev,
@@ -596 +581 @@
-@@ -2999,10 +3131,17 @@ rte_vhost_dequeue_burst(int vid, uint16_t queue_id,
+@@ -2536,10 +2668,17 @@ rte_vhost_dequeue_burst(int vid, uint16_t queue_id,


More information about the stable mailing list