[v2,6/6] net/vhost: perform SW checksum in Tx path

Message ID 20220608124946.102623-7-maxime.coquelin@redhat.com (mailing list archive)
State Accepted, archived
Delegated to: Maxime Coquelin
Headers
Series Vhost checksum offload improvements |

Checks

Context Check Description
ci/checkpatch success coding style OK
ci/Intel-compilation success Compilation OK
ci/intel-Testing success Testing PASS
ci/iol-mellanox-Performance success Performance Testing PASS
ci/github-robot: build success github build: passed
ci/iol-aarch64-unit-testing success Testing PASS
ci/iol-intel-Functional success Functional Testing PASS
ci/iol-intel-Performance success Performance Testing PASS
ci/iol-aarch64-compile-testing success Testing PASS
ci/iol-x86_64-compile-testing success Testing PASS
ci/iol-x86_64-unit-testing success Testing PASS
ci/iol-abi-testing success Testing PASS

Commit Message

Maxime Coquelin June 8, 2022, 12:49 p.m. UTC
  Virtio specification supports guest checksum offloading
for L4, which is enabled with VIRTIO_NET_F_GUEST_CSUM
feature negotiation. However, the Vhost PMD does not
advertise Tx checksum offload capabilities.

Advertising these offload capabilities at the ethdev level
is not enough, because we could still end-up with the
application enabling these offloads while the guest not
negotiating it.

This patch advertises the Tx checksum offload capabilities,
and introduces a compatibility layer to cover the case
VIRTIO_NET_F_GUEST_CSUM has not been negotiated but the
application does configure the Tx checksum offloads. This
function performs the L4 Tx checksum in SW for UDP and TCP.
Compared to Rx SW checksum, the Tx SW checksum function
needs to compute the pseudo-header checksum, as we cannot
know whether it was done before.

This patch does not advertise SCTP checksum offloading
capability for now, but it could be handled later if the
need arises.

Reported-by: Jason Wang <jasowang@redhat.com>
Signed-off-by: Maxime Coquelin <maxime.coquelin@redhat.com>
---
 drivers/net/vhost/rte_eth_vhost.c | 62 +++++++++++++++++++++++++++++++
 1 file changed, 62 insertions(+)
  

Comments

Chenbo Xia June 9, 2022, 2:26 a.m. UTC | #1
Hi Maxime,

> -----Original Message-----
> From: Maxime Coquelin <maxime.coquelin@redhat.com>
> Sent: Wednesday, June 8, 2022 8:50 PM
> To: dev@dpdk.org; jasowang@redhat.com; Xia, Chenbo <chenbo.xia@intel.com>;
> david.marchand@redhat.com; Matz, Olivier <olivier.matz@6wind.com>; Ma,
> WenwuX <wenwux.ma@intel.com>; Zhang, Yuying <yuying.zhang@intel.com>;
> Singh, Aman Deep <aman.deep.singh@intel.com>
> Cc: Maxime Coquelin <maxime.coquelin@redhat.com>
> Subject: [PATCH v2 6/6] net/vhost: perform SW checksum in Tx path
> 
> Virtio specification supports guest checksum offloading
> for L4, which is enabled with VIRTIO_NET_F_GUEST_CSUM
> feature negotiation. However, the Vhost PMD does not
> advertise Tx checksum offload capabilities.
> 
> Advertising these offload capabilities at the ethdev level
> is not enough, because we could still end-up with the
> application enabling these offloads while the guest not
> negotiating it.
> 
> This patch advertises the Tx checksum offload capabilities,
> and introduces a compatibility layer to cover the case
> VIRTIO_NET_F_GUEST_CSUM has not been negotiated but the
> application does configure the Tx checksum offloads. This
> function performs the L4 Tx checksum in SW for UDP and TCP.
> Compared to Rx SW checksum, the Tx SW checksum function
> needs to compute the pseudo-header checksum, as we cannot
> know whether it was done before.
> 
> This patch does not advertise SCTP checksum offloading
> capability for now, but it could be handled later if the
> need arises.
> 
> Reported-by: Jason Wang <jasowang@redhat.com>
> Signed-off-by: Maxime Coquelin <maxime.coquelin@redhat.com>
> ---
>  drivers/net/vhost/rte_eth_vhost.c | 62 +++++++++++++++++++++++++++++++
>  1 file changed, 62 insertions(+)
> 
> diff --git a/drivers/net/vhost/rte_eth_vhost.c
> b/drivers/net/vhost/rte_eth_vhost.c
> index 42f0d52ebc..d75d256040 100644
> --- a/drivers/net/vhost/rte_eth_vhost.c
> +++ b/drivers/net/vhost/rte_eth_vhost.c
> @@ -92,6 +92,7 @@ struct pmd_internal {
>  	rte_atomic32_t started;
>  	bool vlan_strip;
>  	bool rx_sw_csum;
> +	bool tx_sw_csum;
>  };
> 
>  struct internal_list {
> @@ -283,8 +284,10 @@ vhost_dev_csum_configure(struct rte_eth_dev *eth_dev)
>  {
>  	struct pmd_internal *internal = eth_dev->data->dev_private;
>  	const struct rte_eth_rxmode *rxmode = &eth_dev->data-
> >dev_conf.rxmode;
> +	const struct rte_eth_txmode *txmode = &eth_dev->data-
> >dev_conf.txmode;
> 
>  	internal->rx_sw_csum = false;
> +	internal->tx_sw_csum = false;
> 
>  	/* SW checksum is not compatible with legacy mode */
>  	if (!(internal->flags & RTE_VHOST_USER_NET_COMPLIANT_OL_FLAGS))
> @@ -297,6 +300,56 @@ vhost_dev_csum_configure(struct rte_eth_dev *eth_dev)
>  			internal->rx_sw_csum = true;
>  		}
>  	}
> +
> +	if (!(internal->features & (1ULL << VIRTIO_NET_F_GUEST_CSUM))) {
> +		if (txmode->offloads &
> +				(RTE_ETH_TX_OFFLOAD_UDP_CKSUM |
> RTE_ETH_TX_OFFLOAD_TCP_CKSUM)) {
> +			VHOST_LOG(NOTICE, "Tx csum will be done in SW, may
> impact performance.");

Missing \n

With above fixed:

Reviewed-by: Chenbo Xia <chenbo.xia@intel.com>

> +			internal->tx_sw_csum = true;
> +		}
> +	}
> +}
> +
> +static void
> +vhost_dev_tx_sw_csum(struct rte_mbuf *mbuf)
> +{
> +	uint32_t hdr_len;
> +	uint16_t csum = 0, csum_offset;
> +
> +	switch (mbuf->ol_flags & RTE_MBUF_F_TX_L4_MASK) {
> +	case RTE_MBUF_F_TX_L4_NO_CKSUM:
> +		return;
> +	case RTE_MBUF_F_TX_TCP_CKSUM:
> +		csum_offset = offsetof(struct rte_tcp_hdr, cksum);
> +		break;
> +	case RTE_MBUF_F_TX_UDP_CKSUM:
> +		csum_offset = offsetof(struct rte_udp_hdr, dgram_cksum);
> +		break;
> +	default:
> +		/* Unsupported packet type. */
> +		return;
> +	}
> +
> +	hdr_len = mbuf->l2_len + mbuf->l3_len;
> +	csum_offset += hdr_len;
> +
> +	/* Prepare the pseudo-header checksum */
> +	if (rte_net_intel_cksum_prepare(mbuf) < 0)
> +		return;
> +
> +	if (rte_raw_cksum_mbuf(mbuf, hdr_len, rte_pktmbuf_pkt_len(mbuf) -
> hdr_len, &csum) < 0)
> +		return;
> +
> +	csum = ~csum;
> +	/* See RFC768 */
> +	if (unlikely((mbuf->packet_type & RTE_PTYPE_L4_UDP) && csum == 0))
> +		csum = 0xffff;
> +
> +	if (rte_pktmbuf_data_len(mbuf) >= csum_offset + 1)
> +		*rte_pktmbuf_mtod_offset(mbuf, uint16_t *, csum_offset) = csum;
> +
> +	mbuf->ol_flags &= ~RTE_MBUF_F_TX_L4_MASK;
> +	mbuf->ol_flags |= RTE_MBUF_F_TX_L4_NO_CKSUM;
>  }
> 
>  static void
> @@ -423,6 +476,10 @@ eth_vhost_tx(void *q, struct rte_mbuf **bufs,
> uint16_t nb_bufs)
>  			}
>  		}
> 
> +		if (r->internal->tx_sw_csum)
> +			vhost_dev_tx_sw_csum(m);
> +
> +
>  		bufs[nb_send] = m;
>  		++nb_send;
>  	}
> @@ -1267,6 +1324,11 @@ eth_dev_info(struct rte_eth_dev *dev,
> 
>  	dev_info->tx_offload_capa = RTE_ETH_TX_OFFLOAD_MULTI_SEGS |
>  				RTE_ETH_TX_OFFLOAD_VLAN_INSERT;
> +	if (internal->flags & RTE_VHOST_USER_NET_COMPLIANT_OL_FLAGS) {
> +		dev_info->tx_offload_capa |= RTE_ETH_TX_OFFLOAD_UDP_CKSUM |
> +			RTE_ETH_TX_OFFLOAD_TCP_CKSUM;
> +	}
> +
>  	dev_info->rx_offload_capa = RTE_ETH_RX_OFFLOAD_VLAN_STRIP;
>  	if (internal->flags & RTE_VHOST_USER_NET_COMPLIANT_OL_FLAGS) {
>  		dev_info->rx_offload_capa |= RTE_ETH_RX_OFFLOAD_UDP_CKSUM |
> --
> 2.35.3
  
Chenbo Xia June 10, 2022, 3:49 a.m. UTC | #2
+Cheng for review

> -----Original Message-----
> From: Maxime Coquelin <maxime.coquelin@redhat.com>
> Sent: Wednesday, June 8, 2022 8:50 PM
> To: dev@dpdk.org; jasowang@redhat.com; Xia, Chenbo <chenbo.xia@intel.com>;
> david.marchand@redhat.com; Matz, Olivier <olivier.matz@6wind.com>; Ma,
> WenwuX <wenwux.ma@intel.com>; Zhang, Yuying <yuying.zhang@intel.com>;
> Singh, Aman Deep <aman.deep.singh@intel.com>
> Cc: Maxime Coquelin <maxime.coquelin@redhat.com>
> Subject: [PATCH v2 6/6] net/vhost: perform SW checksum in Tx path
> 
> Virtio specification supports guest checksum offloading
> for L4, which is enabled with VIRTIO_NET_F_GUEST_CSUM
> feature negotiation. However, the Vhost PMD does not
> advertise Tx checksum offload capabilities.
> 
> Advertising these offload capabilities at the ethdev level
> is not enough, because we could still end-up with the
> application enabling these offloads while the guest not
> negotiating it.
> 
> This patch advertises the Tx checksum offload capabilities,
> and introduces a compatibility layer to cover the case
> VIRTIO_NET_F_GUEST_CSUM has not been negotiated but the
> application does configure the Tx checksum offloads. This
> function performs the L4 Tx checksum in SW for UDP and TCP.
> Compared to Rx SW checksum, the Tx SW checksum function
> needs to compute the pseudo-header checksum, as we cannot
> know whether it was done before.
> 
> This patch does not advertise SCTP checksum offloading
> capability for now, but it could be handled later if the
> need arises.
> 
> Reported-by: Jason Wang <jasowang@redhat.com>
> Signed-off-by: Maxime Coquelin <maxime.coquelin@redhat.com>
> ---
>  drivers/net/vhost/rte_eth_vhost.c | 62 +++++++++++++++++++++++++++++++
>  1 file changed, 62 insertions(+)
> 
> diff --git a/drivers/net/vhost/rte_eth_vhost.c
> b/drivers/net/vhost/rte_eth_vhost.c
> index 42f0d52ebc..d75d256040 100644
> --- a/drivers/net/vhost/rte_eth_vhost.c
> +++ b/drivers/net/vhost/rte_eth_vhost.c
> @@ -92,6 +92,7 @@ struct pmd_internal {
>  	rte_atomic32_t started;
>  	bool vlan_strip;
>  	bool rx_sw_csum;
> +	bool tx_sw_csum;
>  };
> 
>  struct internal_list {
> @@ -283,8 +284,10 @@ vhost_dev_csum_configure(struct rte_eth_dev *eth_dev)
>  {
>  	struct pmd_internal *internal = eth_dev->data->dev_private;
>  	const struct rte_eth_rxmode *rxmode = &eth_dev->data-
> >dev_conf.rxmode;
> +	const struct rte_eth_txmode *txmode = &eth_dev->data-
> >dev_conf.txmode;
> 
>  	internal->rx_sw_csum = false;
> +	internal->tx_sw_csum = false;
> 
>  	/* SW checksum is not compatible with legacy mode */
>  	if (!(internal->flags & RTE_VHOST_USER_NET_COMPLIANT_OL_FLAGS))
> @@ -297,6 +300,56 @@ vhost_dev_csum_configure(struct rte_eth_dev *eth_dev)
>  			internal->rx_sw_csum = true;
>  		}
>  	}
> +
> +	if (!(internal->features & (1ULL << VIRTIO_NET_F_GUEST_CSUM))) {
> +		if (txmode->offloads &
> +				(RTE_ETH_TX_OFFLOAD_UDP_CKSUM |
> RTE_ETH_TX_OFFLOAD_TCP_CKSUM)) {
> +			VHOST_LOG(NOTICE, "Tx csum will be done in SW, may
> impact performance.");
> +			internal->tx_sw_csum = true;
> +		}
> +	}
> +}
> +
> +static void
> +vhost_dev_tx_sw_csum(struct rte_mbuf *mbuf)
> +{
> +	uint32_t hdr_len;
> +	uint16_t csum = 0, csum_offset;
> +
> +	switch (mbuf->ol_flags & RTE_MBUF_F_TX_L4_MASK) {
> +	case RTE_MBUF_F_TX_L4_NO_CKSUM:
> +		return;
> +	case RTE_MBUF_F_TX_TCP_CKSUM:
> +		csum_offset = offsetof(struct rte_tcp_hdr, cksum);
> +		break;
> +	case RTE_MBUF_F_TX_UDP_CKSUM:
> +		csum_offset = offsetof(struct rte_udp_hdr, dgram_cksum);
> +		break;
> +	default:
> +		/* Unsupported packet type. */
> +		return;
> +	}
> +
> +	hdr_len = mbuf->l2_len + mbuf->l3_len;
> +	csum_offset += hdr_len;
> +
> +	/* Prepare the pseudo-header checksum */
> +	if (rte_net_intel_cksum_prepare(mbuf) < 0)
> +		return;
> +
> +	if (rte_raw_cksum_mbuf(mbuf, hdr_len, rte_pktmbuf_pkt_len(mbuf) -
> hdr_len, &csum) < 0)
> +		return;
> +
> +	csum = ~csum;
> +	/* See RFC768 */
> +	if (unlikely((mbuf->packet_type & RTE_PTYPE_L4_UDP) && csum == 0))
> +		csum = 0xffff;
> +
> +	if (rte_pktmbuf_data_len(mbuf) >= csum_offset + 1)
> +		*rte_pktmbuf_mtod_offset(mbuf, uint16_t *, csum_offset) = csum;
> +
> +	mbuf->ol_flags &= ~RTE_MBUF_F_TX_L4_MASK;
> +	mbuf->ol_flags |= RTE_MBUF_F_TX_L4_NO_CKSUM;
>  }
> 
>  static void
> @@ -423,6 +476,10 @@ eth_vhost_tx(void *q, struct rte_mbuf **bufs,
> uint16_t nb_bufs)
>  			}
>  		}
> 
> +		if (r->internal->tx_sw_csum)
> +			vhost_dev_tx_sw_csum(m);
> +
> +
>  		bufs[nb_send] = m;
>  		++nb_send;
>  	}
> @@ -1267,6 +1324,11 @@ eth_dev_info(struct rte_eth_dev *dev,
> 
>  	dev_info->tx_offload_capa = RTE_ETH_TX_OFFLOAD_MULTI_SEGS |
>  				RTE_ETH_TX_OFFLOAD_VLAN_INSERT;
> +	if (internal->flags & RTE_VHOST_USER_NET_COMPLIANT_OL_FLAGS) {
> +		dev_info->tx_offload_capa |= RTE_ETH_TX_OFFLOAD_UDP_CKSUM |
> +			RTE_ETH_TX_OFFLOAD_TCP_CKSUM;
> +	}
> +
>  	dev_info->rx_offload_capa = RTE_ETH_RX_OFFLOAD_VLAN_STRIP;
>  	if (internal->flags & RTE_VHOST_USER_NET_COMPLIANT_OL_FLAGS) {
>  		dev_info->rx_offload_capa |= RTE_ETH_RX_OFFLOAD_UDP_CKSUM |
> --
> 2.35.3
  
Chenbo Xia June 10, 2022, 3:50 a.m. UTC | #3
+Cheng

> -----Original Message-----
> From: Maxime Coquelin <maxime.coquelin@redhat.com>
> Sent: Wednesday, June 8, 2022 8:50 PM
> To: dev@dpdk.org; jasowang@redhat.com; Xia, Chenbo <chenbo.xia@intel.com>;
> david.marchand@redhat.com; Matz, Olivier <olivier.matz@6wind.com>; Ma,
> WenwuX <wenwux.ma@intel.com>; Zhang, Yuying <yuying.zhang@intel.com>;
> Singh, Aman Deep <aman.deep.singh@intel.com>
> Cc: Maxime Coquelin <maxime.coquelin@redhat.com>
> Subject: [PATCH v2 6/6] net/vhost: perform SW checksum in Tx path
> 
> Virtio specification supports guest checksum offloading
> for L4, which is enabled with VIRTIO_NET_F_GUEST_CSUM
> feature negotiation. However, the Vhost PMD does not
> advertise Tx checksum offload capabilities.
> 
> Advertising these offload capabilities at the ethdev level
> is not enough, because we could still end-up with the
> application enabling these offloads while the guest not
> negotiating it.
> 
> This patch advertises the Tx checksum offload capabilities,
> and introduces a compatibility layer to cover the case
> VIRTIO_NET_F_GUEST_CSUM has not been negotiated but the
> application does configure the Tx checksum offloads. This
> function performs the L4 Tx checksum in SW for UDP and TCP.
> Compared to Rx SW checksum, the Tx SW checksum function
> needs to compute the pseudo-header checksum, as we cannot
> know whether it was done before.
> 
> This patch does not advertise SCTP checksum offloading
> capability for now, but it could be handled later if the
> need arises.
> 
> Reported-by: Jason Wang <jasowang@redhat.com>
> Signed-off-by: Maxime Coquelin <maxime.coquelin@redhat.com>
> ---
>  drivers/net/vhost/rte_eth_vhost.c | 62 +++++++++++++++++++++++++++++++
>  1 file changed, 62 insertions(+)
> 
> diff --git a/drivers/net/vhost/rte_eth_vhost.c
> b/drivers/net/vhost/rte_eth_vhost.c
> index 42f0d52ebc..d75d256040 100644
> --- a/drivers/net/vhost/rte_eth_vhost.c
> +++ b/drivers/net/vhost/rte_eth_vhost.c
> @@ -92,6 +92,7 @@ struct pmd_internal {
>  	rte_atomic32_t started;
>  	bool vlan_strip;
>  	bool rx_sw_csum;
> +	bool tx_sw_csum;
>  };
> 
>  struct internal_list {
> @@ -283,8 +284,10 @@ vhost_dev_csum_configure(struct rte_eth_dev *eth_dev)
>  {
>  	struct pmd_internal *internal = eth_dev->data->dev_private;
>  	const struct rte_eth_rxmode *rxmode = &eth_dev->data-
> >dev_conf.rxmode;
> +	const struct rte_eth_txmode *txmode = &eth_dev->data-
> >dev_conf.txmode;
> 
>  	internal->rx_sw_csum = false;
> +	internal->tx_sw_csum = false;
> 
>  	/* SW checksum is not compatible with legacy mode */
>  	if (!(internal->flags & RTE_VHOST_USER_NET_COMPLIANT_OL_FLAGS))
> @@ -297,6 +300,56 @@ vhost_dev_csum_configure(struct rte_eth_dev *eth_dev)
>  			internal->rx_sw_csum = true;
>  		}
>  	}
> +
> +	if (!(internal->features & (1ULL << VIRTIO_NET_F_GUEST_CSUM))) {
> +		if (txmode->offloads &
> +				(RTE_ETH_TX_OFFLOAD_UDP_CKSUM |
> RTE_ETH_TX_OFFLOAD_TCP_CKSUM)) {
> +			VHOST_LOG(NOTICE, "Tx csum will be done in SW, may
> impact performance.");
> +			internal->tx_sw_csum = true;
> +		}
> +	}
> +}
> +
> +static void
> +vhost_dev_tx_sw_csum(struct rte_mbuf *mbuf)
> +{
> +	uint32_t hdr_len;
> +	uint16_t csum = 0, csum_offset;
> +
> +	switch (mbuf->ol_flags & RTE_MBUF_F_TX_L4_MASK) {
> +	case RTE_MBUF_F_TX_L4_NO_CKSUM:
> +		return;
> +	case RTE_MBUF_F_TX_TCP_CKSUM:
> +		csum_offset = offsetof(struct rte_tcp_hdr, cksum);
> +		break;
> +	case RTE_MBUF_F_TX_UDP_CKSUM:
> +		csum_offset = offsetof(struct rte_udp_hdr, dgram_cksum);
> +		break;
> +	default:
> +		/* Unsupported packet type. */
> +		return;
> +	}
> +
> +	hdr_len = mbuf->l2_len + mbuf->l3_len;
> +	csum_offset += hdr_len;
> +
> +	/* Prepare the pseudo-header checksum */
> +	if (rte_net_intel_cksum_prepare(mbuf) < 0)
> +		return;
> +
> +	if (rte_raw_cksum_mbuf(mbuf, hdr_len, rte_pktmbuf_pkt_len(mbuf) -
> hdr_len, &csum) < 0)
> +		return;
> +
> +	csum = ~csum;
> +	/* See RFC768 */
> +	if (unlikely((mbuf->packet_type & RTE_PTYPE_L4_UDP) && csum == 0))
> +		csum = 0xffff;
> +
> +	if (rte_pktmbuf_data_len(mbuf) >= csum_offset + 1)
> +		*rte_pktmbuf_mtod_offset(mbuf, uint16_t *, csum_offset) = csum;
> +
> +	mbuf->ol_flags &= ~RTE_MBUF_F_TX_L4_MASK;
> +	mbuf->ol_flags |= RTE_MBUF_F_TX_L4_NO_CKSUM;
>  }
> 
>  static void
> @@ -423,6 +476,10 @@ eth_vhost_tx(void *q, struct rte_mbuf **bufs,
> uint16_t nb_bufs)
>  			}
>  		}
> 
> +		if (r->internal->tx_sw_csum)
> +			vhost_dev_tx_sw_csum(m);
> +
> +
>  		bufs[nb_send] = m;
>  		++nb_send;
>  	}
> @@ -1267,6 +1324,11 @@ eth_dev_info(struct rte_eth_dev *dev,
> 
>  	dev_info->tx_offload_capa = RTE_ETH_TX_OFFLOAD_MULTI_SEGS |
>  				RTE_ETH_TX_OFFLOAD_VLAN_INSERT;
> +	if (internal->flags & RTE_VHOST_USER_NET_COMPLIANT_OL_FLAGS) {
> +		dev_info->tx_offload_capa |= RTE_ETH_TX_OFFLOAD_UDP_CKSUM |
> +			RTE_ETH_TX_OFFLOAD_TCP_CKSUM;
> +	}
> +
>  	dev_info->rx_offload_capa = RTE_ETH_RX_OFFLOAD_VLAN_STRIP;
>  	if (internal->flags & RTE_VHOST_USER_NET_COMPLIANT_OL_FLAGS) {
>  		dev_info->rx_offload_capa |= RTE_ETH_RX_OFFLOAD_UDP_CKSUM |
> --
> 2.35.3
  
Jiang, Cheng1 June 10, 2022, 7:31 a.m. UTC | #4
Hi Maxime,

> -----Original Message-----
> From: Xia, Chenbo <chenbo.xia@intel.com>
> Sent: Friday, June 10, 2022 11:50 AM
> To: Maxime Coquelin <maxime.coquelin@redhat.com>; dev@dpdk.org;
> jasowang@redhat.com; david.marchand@redhat.com; Matz, Olivier
> <olivier.matz@6wind.com>; Ma, WenwuX <wenwux.ma@intel.com>; Zhang,
> Yuying <yuying.zhang@intel.com>; Singh, Aman Deep
> <aman.deep.singh@intel.com>
> Subject: RE: [PATCH v2 6/6] net/vhost: perform SW checksum in Tx path
> 
> +Cheng for review
> 
> > -----Original Message-----
> > From: Maxime Coquelin <maxime.coquelin@redhat.com>
> > Sent: Wednesday, June 8, 2022 8:50 PM
> > To: dev@dpdk.org; jasowang@redhat.com; Xia, Chenbo
> > <chenbo.xia@intel.com>; david.marchand@redhat.com; Matz, Olivier
> > <olivier.matz@6wind.com>; Ma, WenwuX <wenwux.ma@intel.com>;
> Zhang,
> > Yuying <yuying.zhang@intel.com>; Singh, Aman Deep
> > <aman.deep.singh@intel.com>
> > Cc: Maxime Coquelin <maxime.coquelin@redhat.com>
> > Subject: [PATCH v2 6/6] net/vhost: perform SW checksum in Tx path
> >
> > Virtio specification supports guest checksum offloading for L4, which
> > is enabled with VIRTIO_NET_F_GUEST_CSUM feature negotiation.
> However,
> > the Vhost PMD does not advertise Tx checksum offload capabilities.
> >
> > Advertising these offload capabilities at the ethdev level is not
> > enough, because we could still end-up with the application enabling
> > these offloads while the guest not negotiating it.
> >
> > This patch advertises the Tx checksum offload capabilities, and
> > introduces a compatibility layer to cover the case
> > VIRTIO_NET_F_GUEST_CSUM has not been negotiated but the application
> > does configure the Tx checksum offloads. This function performs the L4
> > Tx checksum in SW for UDP and TCP.
> > Compared to Rx SW checksum, the Tx SW checksum function needs to
> > compute the pseudo-header checksum, as we cannot know whether it was
> > done before.
> >
> > This patch does not advertise SCTP checksum offloading capability for
> > now, but it could be handled later if the need arises.
> >
> > Reported-by: Jason Wang <jasowang@redhat.com>
> > Signed-off-by: Maxime Coquelin <maxime.coquelin@redhat.com>
> > ---

Reviewed-by: Cheng Jiang <cheng1.jiang@intel.com>
  

Patch

diff --git a/drivers/net/vhost/rte_eth_vhost.c b/drivers/net/vhost/rte_eth_vhost.c
index 42f0d52ebc..d75d256040 100644
--- a/drivers/net/vhost/rte_eth_vhost.c
+++ b/drivers/net/vhost/rte_eth_vhost.c
@@ -92,6 +92,7 @@  struct pmd_internal {
 	rte_atomic32_t started;
 	bool vlan_strip;
 	bool rx_sw_csum;
+	bool tx_sw_csum;
 };
 
 struct internal_list {
@@ -283,8 +284,10 @@  vhost_dev_csum_configure(struct rte_eth_dev *eth_dev)
 {
 	struct pmd_internal *internal = eth_dev->data->dev_private;
 	const struct rte_eth_rxmode *rxmode = &eth_dev->data->dev_conf.rxmode;
+	const struct rte_eth_txmode *txmode = &eth_dev->data->dev_conf.txmode;
 
 	internal->rx_sw_csum = false;
+	internal->tx_sw_csum = false;
 
 	/* SW checksum is not compatible with legacy mode */
 	if (!(internal->flags & RTE_VHOST_USER_NET_COMPLIANT_OL_FLAGS))
@@ -297,6 +300,56 @@  vhost_dev_csum_configure(struct rte_eth_dev *eth_dev)
 			internal->rx_sw_csum = true;
 		}
 	}
+
+	if (!(internal->features & (1ULL << VIRTIO_NET_F_GUEST_CSUM))) {
+		if (txmode->offloads &
+				(RTE_ETH_TX_OFFLOAD_UDP_CKSUM | RTE_ETH_TX_OFFLOAD_TCP_CKSUM)) {
+			VHOST_LOG(NOTICE, "Tx csum will be done in SW, may impact performance.");
+			internal->tx_sw_csum = true;
+		}
+	}
+}
+
+static void
+vhost_dev_tx_sw_csum(struct rte_mbuf *mbuf)
+{
+	uint32_t hdr_len;
+	uint16_t csum = 0, csum_offset;
+
+	switch (mbuf->ol_flags & RTE_MBUF_F_TX_L4_MASK) {
+	case RTE_MBUF_F_TX_L4_NO_CKSUM:
+		return;
+	case RTE_MBUF_F_TX_TCP_CKSUM:
+		csum_offset = offsetof(struct rte_tcp_hdr, cksum);
+		break;
+	case RTE_MBUF_F_TX_UDP_CKSUM:
+		csum_offset = offsetof(struct rte_udp_hdr, dgram_cksum);
+		break;
+	default:
+		/* Unsupported packet type. */
+		return;
+	}
+
+	hdr_len = mbuf->l2_len + mbuf->l3_len;
+	csum_offset += hdr_len;
+
+	/* Prepare the pseudo-header checksum */
+	if (rte_net_intel_cksum_prepare(mbuf) < 0)
+		return;
+
+	if (rte_raw_cksum_mbuf(mbuf, hdr_len, rte_pktmbuf_pkt_len(mbuf) - hdr_len, &csum) < 0)
+		return;
+
+	csum = ~csum;
+	/* See RFC768 */
+	if (unlikely((mbuf->packet_type & RTE_PTYPE_L4_UDP) && csum == 0))
+		csum = 0xffff;
+
+	if (rte_pktmbuf_data_len(mbuf) >= csum_offset + 1)
+		*rte_pktmbuf_mtod_offset(mbuf, uint16_t *, csum_offset) = csum;
+
+	mbuf->ol_flags &= ~RTE_MBUF_F_TX_L4_MASK;
+	mbuf->ol_flags |= RTE_MBUF_F_TX_L4_NO_CKSUM;
 }
 
 static void
@@ -423,6 +476,10 @@  eth_vhost_tx(void *q, struct rte_mbuf **bufs, uint16_t nb_bufs)
 			}
 		}
 
+		if (r->internal->tx_sw_csum)
+			vhost_dev_tx_sw_csum(m);
+
+
 		bufs[nb_send] = m;
 		++nb_send;
 	}
@@ -1267,6 +1324,11 @@  eth_dev_info(struct rte_eth_dev *dev,
 
 	dev_info->tx_offload_capa = RTE_ETH_TX_OFFLOAD_MULTI_SEGS |
 				RTE_ETH_TX_OFFLOAD_VLAN_INSERT;
+	if (internal->flags & RTE_VHOST_USER_NET_COMPLIANT_OL_FLAGS) {
+		dev_info->tx_offload_capa |= RTE_ETH_TX_OFFLOAD_UDP_CKSUM |
+			RTE_ETH_TX_OFFLOAD_TCP_CKSUM;
+	}
+
 	dev_info->rx_offload_capa = RTE_ETH_RX_OFFLOAD_VLAN_STRIP;
 	if (internal->flags & RTE_VHOST_USER_NET_COMPLIANT_OL_FLAGS) {
 		dev_info->rx_offload_capa |= RTE_ETH_RX_OFFLOAD_UDP_CKSUM |