[PATCH v2] net/iavf: fix TSO offload for tunnel case

Zhang, Qi Z qi.z.zhang at intel.com
Tue Sep 27 04:33:57 CEST 2022



> -----Original Message-----
> From: Zeng, ZhichaoX <zhichaox.zeng at intel.com>
> Sent: Monday, September 26, 2022 1:17 PM
> To: dev at dpdk.org
> Cc: Yang, Qiming <qiming.yang at intel.com>; Zhou, YidingX
> <yidingx.zhou at intel.com>; Zhang, Qi Z <qi.z.zhang at intel.com>; Zeng,
> ZhichaoX <zhichaox.zeng at intel.com>; Wu, Jingjing <jingjing.wu at intel.com>;
> Xing, Beilei <beilei.xing at intel.com>; Sinha, Abhijit <abhijit.sinha at intel.com>;
> Doherty, Declan <declan.doherty at intel.com>; Nicolau, Radu
> <radu.nicolau at intel.com>
> Subject: [PATCH v2] net/iavf: fix TSO offload for tunnel case
> 
> This patch is to fix the tunnel TSO not enabling issue, simplify the logic of
> calculating 'Tx Buffer Size' of data descriptor with IPSec and fix handling that
> the mbuf size exceeds the TX descriptor hardware limit(1B-16KB) which
> causes malicious behavior to the NIC.
> 
> Fixes: 1e728b01120c ("net/iavf: rework Tx path")
> 
> ---
> v2: rework patch
> 
> Signed-off-by: Zhichao Zeng <zhichaox.zeng at intel.com>
> ---
>  drivers/common/iavf/iavf_osdep.h |  2 +
>  drivers/net/iavf/iavf_rxtx.c     | 95 +++++++++++++++++++-------------
>  2 files changed, 59 insertions(+), 38 deletions(-)
> 
> diff --git a/drivers/common/iavf/iavf_osdep.h
> b/drivers/common/iavf/iavf_osdep.h
> index 31d3d809f9..bf1436dfc6 100644
> --- a/drivers/common/iavf/iavf_osdep.h
> +++ b/drivers/common/iavf/iavf_osdep.h
> @@ -126,6 +126,8 @@ writeq(uint64_t value, volatile void *addr)  #define
> iavf_memset(a, b, c, d) memset((a), (b), (c))  #define iavf_memcpy(a, b, c, d)
> rte_memcpy((a), (b), (c))
> 
> +#define DIV_ROUND_UP(n, d) (((n) + (d) - 1) / (d))
> +

This looks like not necessary be added in osdep.h
Can we simply  make it local or at some header file in net/iavf, so we don't need to have a patch that cross the modules.

>  #define iavf_usec_delay(x) rte_delay_us_sleep(x)  #define
> iavf_msec_delay(x) iavf_usec_delay(1000 * (x))
> 
> diff --git a/drivers/net/iavf/iavf_rxtx.c b/drivers/net/iavf/iavf_rxtx.c index
> 109ba756f8..a06d9d3da6 100644
> --- a/drivers/net/iavf/iavf_rxtx.c
> +++ b/drivers/net/iavf/iavf_rxtx.c
> @@ -2417,7 +2417,7 @@ iavf_fill_ctx_desc_segmentation_field(volatile
> uint64_t *field,
>  		total_length = m->pkt_len - (m->l2_len + m->l3_len + m-
> >l4_len);
> 
>  		if (m->ol_flags & RTE_MBUF_F_TX_TUNNEL_MASK)
> -			total_length -= m->outer_l3_len;
> +			total_length -= m->outer_l3_len + m->outer_l2_len;
>  	}
> 
>  #ifdef RTE_LIBRTE_IAVF_DEBUG_TX
> @@ -2581,50 +2581,39 @@ iavf_build_data_desc_cmd_offset_fields(volatile
> uint64_t *qw1,
>  		((uint64_t)l2tag1 <<
> IAVF_TXD_DATA_QW1_L2TAG1_SHIFT));  }
> 
> +/* HW requires that TX buffer size ranges from 1B up to (16K-1)B. */
> +#define IAVF_MAX_DATA_PER_TXD \
> +	(IAVF_TXD_QW1_TX_BUF_SZ_MASK >>
> IAVF_TXD_QW1_TX_BUF_SZ_SHIFT)
> +
> +/* Calculate the number of TX descriptors needed for each pkt */ static
> +inline uint16_t iavf_calc_pkt_desc(struct rte_mbuf *tx_pkt) {
> +	struct rte_mbuf *txd = tx_pkt;
> +	uint16_t count = 0;
> +
> +	while (txd != NULL) {
> +		count += DIV_ROUND_UP(txd->data_len,
> IAVF_MAX_DATA_PER_TXD);
> +		txd = txd->next;
> +	}
> +
> +	return count;
> +}
> +
>  static inline void
>  iavf_fill_data_desc(volatile struct iavf_tx_desc *desc,
> -	struct rte_mbuf *m, uint64_t desc_template,
> -	uint16_t tlen, uint16_t ipseclen)
> +	uint64_t desc_template,	uint16_t buffsz,
> +	uint64_t buffer_addr)
>  {
> -	uint32_t hdrlen = m->l2_len;
> -	uint32_t bufsz = 0;
> -
>  	/* fill data descriptor qw1 from template */
>  	desc->cmd_type_offset_bsz = desc_template;
> 
> -	/* set data buffer address */
> -	desc->buffer_addr = rte_mbuf_data_iova(m);
> -
> -	/* calculate data buffer size less set header lengths */
> -	if ((m->ol_flags & RTE_MBUF_F_TX_TUNNEL_MASK) &&
> -			(m->ol_flags & (RTE_MBUF_F_TX_TCP_SEG |
> -					RTE_MBUF_F_TX_UDP_SEG))) {
> -		hdrlen += m->outer_l3_len;
> -		if (m->ol_flags & RTE_MBUF_F_TX_L4_MASK)
> -			hdrlen += m->l3_len + m->l4_len;
> -		else
> -			hdrlen += m->l3_len;
> -		if (m->ol_flags & RTE_MBUF_F_TX_SEC_OFFLOAD)
> -			hdrlen += ipseclen;
> -		bufsz = hdrlen + tlen;
> -	} else if ((m->ol_flags & RTE_MBUF_F_TX_SEC_OFFLOAD) &&
> -			(m->ol_flags & (RTE_MBUF_F_TX_TCP_SEG |
> -					RTE_MBUF_F_TX_UDP_SEG))) {
> -		hdrlen += m->outer_l3_len + m->l3_len + ipseclen;
> -		if (m->ol_flags & RTE_MBUF_F_TX_L4_MASK)
> -			hdrlen += m->l4_len;
> -		bufsz = hdrlen + tlen;
> -
> -	} else {
> -		bufsz = m->data_len;
> -	}
> -
>  	/* set data buffer size */
>  	desc->cmd_type_offset_bsz |=
> -		(((uint64_t)bufsz <<
> IAVF_TXD_DATA_QW1_TX_BUF_SZ_SHIFT) &
> +		(((uint64_t)buffsz <<
> IAVF_TXD_DATA_QW1_TX_BUF_SZ_SHIFT) &
>  		IAVF_TXD_DATA_QW1_TX_BUF_SZ_MASK);
> 
> -	desc->buffer_addr = rte_cpu_to_le_64(desc->buffer_addr);
> +	desc->buffer_addr = rte_cpu_to_le_64(buffer_addr);
>  	desc->cmd_type_offset_bsz = rte_cpu_to_le_64(desc-
> >cmd_type_offset_bsz);
>  }
> 
> @@ -2649,8 +2638,10 @@ iavf_xmit_pkts(void *tx_queue, struct rte_mbuf
> **tx_pkts, uint16_t nb_pkts)
>  	struct iavf_tx_entry *txe_ring = txq->sw_ring;
>  	struct iavf_tx_entry *txe, *txn;
>  	struct rte_mbuf *mb, *mb_seg;
> +	uint64_t buf_dma_addr;
>  	uint16_t desc_idx, desc_idx_last;
>  	uint16_t idx;
> +	uint16_t slen;
> 
> 
>  	/* Check if the descriptor ring needs to be cleaned. */ @@ -2689,8
> +2680,14 @@ iavf_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts,
> uint16_t nb_pkts)
>  		 * The number of descriptors that must be allocated for
>  		 * a packet equals to the number of the segments of that
>  		 * packet plus the context and ipsec descriptors if needed.
> +		 * Recalculate the needed tx descs when TSO enabled in case
> +		 * the mbuf data size exceeds max data size that hw allows
> +		 * per tx desc.
>  		 */
> -		nb_desc_required = nb_desc_data + nb_desc_ctx +
> nb_desc_ipsec;
> +		if (mb->ol_flags & RTE_MBUF_F_TX_TCP_SEG)
> +			nb_desc_required = iavf_calc_pkt_desc(mb) +
> nb_desc_ctx + nb_desc_ipsec;
> +		else
> +			nb_desc_required = nb_desc_data + nb_desc_ctx +
> nb_desc_ipsec;
> 
>  		desc_idx_last = (uint16_t)(desc_idx + nb_desc_required - 1);
> 
> @@ -2786,8 +2783,30 @@ iavf_xmit_pkts(void *tx_queue, struct rte_mbuf
> **tx_pkts, uint16_t nb_pkts)
>  				rte_pktmbuf_free_seg(txe->mbuf);
> 
>  			txe->mbuf = mb_seg;
> -			iavf_fill_data_desc(ddesc, mb_seg,
> -					ddesc_template, tlen, ipseclen);
> +			slen = mb_seg->data_len;
> +			if (mb_seg->ol_flags &
> RTE_MBUF_F_TX_SEC_OFFLOAD)
> +				slen += ipseclen;
> +			buf_dma_addr = rte_mbuf_data_iova(mb_seg);
> +			while ((mb_seg->ol_flags &
> (RTE_MBUF_F_TX_TCP_SEG |
> +					RTE_MBUF_F_TX_UDP_SEG)) &&
> +					unlikely(slen >
> IAVF_MAX_DATA_PER_TXD)) {
> +				iavf_fill_data_desc(ddesc, ddesc_template,
> +					IAVF_MAX_DATA_PER_TXD,
> buf_dma_addr);
> +
> +				IAVF_DUMP_TX_DESC(txq, ddesc, desc_idx);
> +
> +				buf_dma_addr +=
> IAVF_MAX_DATA_PER_TXD;
> +				slen -= IAVF_MAX_DATA_PER_TXD;
> +
> +				txe->last_id = desc_idx_last;
> +				desc_idx = txe->next_id;
> +				txe = txn;
> +				ddesc = &txr[desc_idx];
> +				txn = &txe_ring[txe->next_id];
> +			}
> +
> +			iavf_fill_data_desc(ddesc, ddesc_template,
> +					slen, buf_dma_addr);
> 
>  			IAVF_DUMP_TX_DESC(txq, ddesc, desc_idx);
> 
> --
> 2.25.1



More information about the dev mailing list