[PATCH v3 2/3] net/i40e: enable direct rearm with separate API

Feifei Wang feifei.wang2 at arm.com
Wed Jan 4 08:30:42 CET 2023


Add internal API to separate direct rearm operations between
Rx and Tx.

Suggested-by: Honnappa Nagarahalli <honnappa.nagarahalli at arm.com>
Signed-off-by: Feifei Wang <feifei.wang2 at arm.com>
Reviewed-by: Ruifeng Wang <ruifeng.wang at arm.com>
Reviewed-by: Honnappa Nagarahalli <honnappa.nagarahalli at arm.com>
---
 drivers/net/i40e/i40e_ethdev.c          |  1 +
 drivers/net/i40e/i40e_ethdev.h          |  2 +
 drivers/net/i40e/i40e_rxtx.c            | 19 +++++++++
 drivers/net/i40e/i40e_rxtx.h            |  4 ++
 drivers/net/i40e/i40e_rxtx_vec_common.h | 54 +++++++++++++++++++++++++
 drivers/net/i40e/i40e_rxtx_vec_neon.c   | 42 +++++++++++++++++++
 6 files changed, 122 insertions(+)

diff --git a/drivers/net/i40e/i40e_ethdev.c b/drivers/net/i40e/i40e_ethdev.c
index 7726a89d99..29c1ce2470 100644
--- a/drivers/net/i40e/i40e_ethdev.c
+++ b/drivers/net/i40e/i40e_ethdev.c
@@ -497,6 +497,7 @@ static const struct eth_dev_ops i40e_eth_dev_ops = {
 	.flow_ops_get                 = i40e_dev_flow_ops_get,
 	.rxq_info_get                 = i40e_rxq_info_get,
 	.txq_info_get                 = i40e_txq_info_get,
+	.rxq_rearm_data_get           = i40e_rxq_rearm_data_get,
 	.rx_burst_mode_get            = i40e_rx_burst_mode_get,
 	.tx_burst_mode_get            = i40e_tx_burst_mode_get,
 	.timesync_enable              = i40e_timesync_enable,
diff --git a/drivers/net/i40e/i40e_ethdev.h b/drivers/net/i40e/i40e_ethdev.h
index fe943a45ff..6a6a2a6d3c 100644
--- a/drivers/net/i40e/i40e_ethdev.h
+++ b/drivers/net/i40e/i40e_ethdev.h
@@ -1352,6 +1352,8 @@ void i40e_rxq_info_get(struct rte_eth_dev *dev, uint16_t queue_id,
 	struct rte_eth_rxq_info *qinfo);
 void i40e_txq_info_get(struct rte_eth_dev *dev, uint16_t queue_id,
 	struct rte_eth_txq_info *qinfo);
+void i40e_rxq_rearm_data_get(struct rte_eth_dev *dev, uint16_t queue_id,
+	struct rte_eth_rxq_rearm_data *rxq_rearm_data);
 int i40e_rx_burst_mode_get(struct rte_eth_dev *dev, uint16_t queue_id,
 			   struct rte_eth_burst_mode *mode);
 int i40e_tx_burst_mode_get(struct rte_eth_dev *dev, uint16_t queue_id,
diff --git a/drivers/net/i40e/i40e_rxtx.c b/drivers/net/i40e/i40e_rxtx.c
index 788ffb51c2..d8d801acaf 100644
--- a/drivers/net/i40e/i40e_rxtx.c
+++ b/drivers/net/i40e/i40e_rxtx.c
@@ -3197,6 +3197,19 @@ i40e_txq_info_get(struct rte_eth_dev *dev, uint16_t queue_id,
 	qinfo->conf.offloads = txq->offloads;
 }
 
+void
+i40e_rxq_rearm_data_get(struct rte_eth_dev *dev, uint16_t queue_id,
+	struct rte_eth_rxq_rearm_data *rxq_rearm_data)
+{
+	struct i40e_rx_queue *rxq;
+
+	rxq = dev->data->rx_queues[queue_id];
+
+	rxq_rearm_data->rx_sw_ring = rxq->sw_ring;
+	rxq_rearm_data->rearm_start = &rxq->rxrearm_start;
+	rxq_rearm_data->rearm_nb = &rxq->rxrearm_nb;
+}
+
 #ifdef RTE_ARCH_X86
 static inline bool
 get_avx_supported(bool request_avx512)
@@ -3321,6 +3334,9 @@ i40e_set_rx_function(struct rte_eth_dev *dev)
 			PMD_INIT_LOG(DEBUG, "Using Vector Rx (port %d).",
 				     dev->data->port_id);
 			dev->rx_pkt_burst = i40e_recv_pkts_vec;
+#ifdef RTE_ARCH_ARM64
+			dev->rx_flush_descriptor = i40e_rx_flush_descriptor_vec;
+#endif
 		}
 #endif /* RTE_ARCH_X86 */
 	} else if (!dev->data->scattered_rx && ad->rx_bulk_alloc_allowed) {
@@ -3484,6 +3500,9 @@ i40e_set_tx_function(struct rte_eth_dev *dev)
 			PMD_INIT_LOG(DEBUG, "Using Vector Tx (port %d).",
 				     dev->data->port_id);
 			dev->tx_pkt_burst = i40e_xmit_pkts_vec;
+#ifdef RTE_ARCH_ARM64
+			dev->tx_fill_sw_ring = i40e_tx_fill_sw_ring;
+#endif
 #endif /* RTE_ARCH_X86 */
 		} else {
 			PMD_INIT_LOG(DEBUG, "Simple tx finally be used.");
diff --git a/drivers/net/i40e/i40e_rxtx.h b/drivers/net/i40e/i40e_rxtx.h
index 5e6eecc501..8a29bd89df 100644
--- a/drivers/net/i40e/i40e_rxtx.h
+++ b/drivers/net/i40e/i40e_rxtx.h
@@ -233,6 +233,10 @@ uint32_t i40e_dev_rx_queue_count(void *rx_queue);
 int i40e_dev_rx_descriptor_status(void *rx_queue, uint16_t offset);
 int i40e_dev_tx_descriptor_status(void *tx_queue, uint16_t offset);
 
+int i40e_tx_fill_sw_ring(void *tx_queue,
+		struct rte_eth_rxq_rearm_data *rxq_rearm_data);
+int i40e_rx_flush_descriptor_vec(void *rx_queue, uint16_t nb_rearm);
+
 uint16_t i40e_recv_pkts_vec(void *rx_queue, struct rte_mbuf **rx_pkts,
 			    uint16_t nb_pkts);
 uint16_t i40e_recv_scattered_pkts_vec(void *rx_queue,
diff --git a/drivers/net/i40e/i40e_rxtx_vec_common.h b/drivers/net/i40e/i40e_rxtx_vec_common.h
index fe1a6ec75e..eb96301a43 100644
--- a/drivers/net/i40e/i40e_rxtx_vec_common.h
+++ b/drivers/net/i40e/i40e_rxtx_vec_common.h
@@ -146,6 +146,60 @@ i40e_tx_free_bufs(struct i40e_tx_queue *txq)
 	return txq->tx_rs_thresh;
 }
 
+int
+i40e_tx_fill_sw_ring(void *tx_queue,
+		struct rte_eth_rxq_rearm_data *rxq_rearm_data)
+{
+	struct i40e_tx_queue *txq = tx_queue;
+	struct i40e_tx_entry *txep;
+	void **rxep;
+	struct rte_mbuf *m;
+	int i, n;
+	int nb_rearm = 0;
+
+	if (*rxq_rearm_data->rearm_nb < txq->tx_rs_thresh ||
+			txq->nb_tx_free > txq->tx_free_thresh)
+		return 0;
+
+	/* check DD bits on threshold descriptor */
+	if ((txq->tx_ring[txq->tx_next_dd].cmd_type_offset_bsz &
+			rte_cpu_to_le_64(I40E_TXD_QW1_DTYPE_MASK)) !=
+			rte_cpu_to_le_64(I40E_TX_DESC_DTYPE_DESC_DONE))
+		return 0;
+
+	n = txq->tx_rs_thresh;
+
+	/* first buffer to free from S/W ring is at index
+	 * tx_next_dd - (tx_rs_thresh-1)
+	 */
+	txep = &txq->sw_ring[txq->tx_next_dd - (n - 1)];
+	rxep = rxq_rearm_data->rx_sw_ring;
+	rxep += *rxq_rearm_data->rearm_start;
+
+	if (txq->offloads & RTE_ETH_TX_OFFLOAD_MBUF_FAST_FREE) {
+		/* directly put mbufs from Tx to Rx */
+		for (i = 0; i < n; i++, rxep++, txep++)
+			*rxep = txep[0].mbuf;
+	} else {
+		for (i = 0; i < n; i++, rxep++) {
+			m = rte_pktmbuf_prefree_seg(txep[i].mbuf);
+			if (m != NULL) {
+				*rxep = m;
+				nb_rearm++;
+			}
+		}
+		n = nb_rearm;
+	}
+
+	/* update counters for Tx */
+	txq->nb_tx_free = (uint16_t)(txq->nb_tx_free + txq->tx_rs_thresh);
+	txq->tx_next_dd = (uint16_t)(txq->tx_next_dd + txq->tx_rs_thresh);
+	if (txq->tx_next_dd >= txq->nb_tx_desc)
+		txq->tx_next_dd = (uint16_t)(txq->tx_rs_thresh - 1);
+
+	return n;
+}
+
 static __rte_always_inline void
 tx_backlog_entry(struct i40e_tx_entry *txep,
 		 struct rte_mbuf **tx_pkts, uint16_t nb_pkts)
diff --git a/drivers/net/i40e/i40e_rxtx_vec_neon.c b/drivers/net/i40e/i40e_rxtx_vec_neon.c
index 12e6f1cbcb..1509d3223b 100644
--- a/drivers/net/i40e/i40e_rxtx_vec_neon.c
+++ b/drivers/net/i40e/i40e_rxtx_vec_neon.c
@@ -739,6 +739,48 @@ i40e_xmit_fixed_burst_vec(void *__rte_restrict tx_queue,
 	return nb_pkts;
 }
 
+int
+i40e_rx_flush_descriptor_vec(void *rx_queue, uint16_t nb_rearm)
+{
+	struct i40e_rx_queue *rxq = rx_queue;
+	struct i40e_rx_entry *rxep;
+	volatile union i40e_rx_desc *rxdp;
+	uint16_t rx_id;
+	uint64x2_t dma_addr;
+	uint64_t paddr;
+	uint16_t i;
+
+	rxdp = rxq->rx_ring + rxq->rxrearm_start;
+	rxep = &rxq->sw_ring[rxq->rxrearm_start];
+
+	for (i = 0; i < nb_rearm; i++) {
+		/* Initialize rxdp descs */
+		paddr = (rxep[i].mbuf)->buf_iova + RTE_PKTMBUF_HEADROOM;
+		dma_addr = vdupq_n_u64(paddr);
+		/* flush desc with pa dma_addr */
+		vst1q_u64((uint64_t *)&rxdp++->read, dma_addr);
+	}
+
+	/* Update the descriptor initializer index */
+	rxq->rxrearm_start += nb_rearm;
+	rx_id = rxq->rxrearm_start - 1;
+
+	if (unlikely(rxq->rxrearm_start >= rxq->nb_rx_desc)) {
+		rxq->rxrearm_start = rxq->rxrearm_start - rxq->nb_rx_desc;
+		if (!rxq->rxrearm_start)
+			rx_id = rxq->nb_rx_desc - 1;
+		else
+			rx_id = rxq->rxrearm_start - 1;
+	}
+	rxq->rxrearm_nb -= nb_rearm;
+
+	rte_io_wmb();
+	/* Update the tail pointer on the NIC */
+	I40E_PCI_REG_WRITE_RELAXED(rxq->qrx_tail, rx_id);
+
+	return 0;
+}
+
 void __rte_cold
 i40e_rx_queue_release_mbufs_vec(struct i40e_rx_queue *rxq)
 {
-- 
2.25.1



More information about the dev mailing list