[v1,2/2] net/i40e: use movdiri to update queue tail registers

Message ID 1591870283-7776-2-git-send-email-radu.nicolau@intel.com (mailing list archive)
State Superseded, archived
Headers
Series [v1,1/2] eal/x86: add WC store function |

Checks

Context Check Description
ci/checkpatch warning coding style issues
ci/travis-robot success Travis build: passed
ci/Intel-compilation success Compilation OK

Commit Message

Radu Nicolau June 11, 2020, 10:11 a.m. UTC
  If available use movdiri instruction instead of a regular mmio write to
update queue tail registers.

Signed-off-by: Radu Nicolau <radu.nicolau@intel.com>
---
 drivers/net/i40e/base/i40e_osdep.h    | 20 ++++++++++++++++++++
 drivers/net/i40e/i40e_ethdev_vf.c     | 10 ++++++++++
 drivers/net/i40e/i40e_fdir.c          |  4 ++++
 drivers/net/i40e/i40e_rxtx.c          | 19 +++++++++++++++----
 drivers/net/i40e/i40e_rxtx.h          |  2 ++
 drivers/net/i40e/i40e_rxtx_vec_avx2.c |  4 ++--
 drivers/net/i40e/i40e_rxtx_vec_sse.c  |  4 ++--
 7 files changed, 55 insertions(+), 8 deletions(-)
  

Patch

diff --git a/drivers/net/i40e/base/i40e_osdep.h b/drivers/net/i40e/base/i40e_osdep.h
index 58be396..b642c6f 100644
--- a/drivers/net/i40e/base/i40e_osdep.h
+++ b/drivers/net/i40e/base/i40e_osdep.h
@@ -138,6 +138,26 @@  static inline uint32_t i40e_read_addr(volatile void *addr)
 #define I40E_PCI_REG_WRITE_RELAXED(reg, value)	\
 	rte_write32_relaxed((rte_cpu_to_le_32(value)), reg)
 
+#if defined(RTE_ARCH_X86)
+#define I40E_PCI_REG_WC_WRITE(queue, reg, value, ...)			\
+	do {								\
+		uint32_t val = rte_cpu_to_le_32(value);			\
+		volatile void *addr = reg;				\
+		if (queue->use_movdiri)					\
+			rte_write32_wc(val, addr);			\
+		else							\
+			rte_write32##__VA_ARGS__(val, addr);		\
+	} while (0)
+#define I40E_PCI_REG_WC_WRITE_RELAXED(queue, reg, value) \
+		I40E_PCI_REG_WC_WRITE(queue, reg, value, _relaxed)
+#else
+	#define I40E_PCI_REG_WC_WRITE(queue, reg, value) \
+		I40E_PCI_REG_WRITE(reg, value)
+	#define I40E_PCI_REG_WC_WRITE_RELAXED(queue, reg, value) \
+		I40E_PCI_REG_WRITE_RELAXED(reg, value)
+#endif
+
+
 #define I40E_WRITE_FLUSH(a) I40E_READ_REG(a, I40E_GLGEN_STAT)
 #define I40EVF_WRITE_FLUSH(a) I40E_READ_REG(a, I40E_VFGEN_RSTAT)
 
diff --git a/drivers/net/i40e/i40e_ethdev_vf.c b/drivers/net/i40e/i40e_ethdev_vf.c
index eca716a..6a82b7b 100644
--- a/drivers/net/i40e/i40e_ethdev_vf.c
+++ b/drivers/net/i40e/i40e_ethdev_vf.c
@@ -1790,6 +1790,11 @@  i40evf_rxq_init(struct rte_eth_dev *dev, struct i40e_rx_queue *rxq)
 	rxq->max_pkt_len = RTE_MIN(len,
 		dev_data->dev_conf.rxmode.max_rx_pkt_len);
 
+#if defined(RTE_ARCH_X86)
+	/* use MOVDIRI if supported*/
+	rxq->use_movdiri = rte_cpu_get_flag_enabled(RTE_CPUFLAG_MOVDIRI);
+#endif
+
 	/**
 	 * Check if the jumbo frame and maximum packet length are set correctly
 	 */
@@ -1855,6 +1860,11 @@  i40evf_tx_init(struct rte_eth_dev *dev)
 	for (i = 0; i < dev->data->nb_tx_queues; i++)
 		txq[i]->qtx_tail = hw->hw_addr + I40E_QTX_TAIL1(i);
 
+#if defined(RTE_ARCH_X86)
+       /* use MOVDIRI if supported*/
+	txq[i]->use_movdiri = rte_cpu_get_flag_enabled(RTE_CPUFLAG_MOVDIRI);
+#endif
+
 	i40e_set_tx_function(dev);
 }
 
diff --git a/drivers/net/i40e/i40e_fdir.c b/drivers/net/i40e/i40e_fdir.c
index d59399a..6f1bc86 100644
--- a/drivers/net/i40e/i40e_fdir.c
+++ b/drivers/net/i40e/i40e_fdir.c
@@ -142,6 +142,10 @@  i40e_fdir_rx_queue_init(struct i40e_rx_queue *rxq)
 	}
 	rxq->qrx_tail = hw->hw_addr +
 		I40E_QRX_TAIL(rxq->vsi->base_queue);
+#if defined(RTE_ARCH_X86)
+	/* use MOVDIRI if supported*/
+	rxq->use_movdiri = rte_cpu_get_flag_enabled(RTE_CPUFLAG_MOVDIRI);
+#endif
 
 	rte_wmb();
 	/* Init the RX tail regieter. */
diff --git a/drivers/net/i40e/i40e_rxtx.c b/drivers/net/i40e/i40e_rxtx.c
index 840b6f3..44bba68 100644
--- a/drivers/net/i40e/i40e_rxtx.c
+++ b/drivers/net/i40e/i40e_rxtx.c
@@ -760,7 +760,7 @@  i40e_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t nb_pkts)
 	if (nb_hold > rxq->rx_free_thresh) {
 		rx_id = (uint16_t) ((rx_id == 0) ?
 			(rxq->nb_rx_desc - 1) : (rx_id - 1));
-		I40E_PCI_REG_WRITE(rxq->qrx_tail, rx_id);
+		I40E_PCI_REG_WC_WRITE(rxq, rxq->qrx_tail, rx_id);
 		nb_hold = 0;
 	}
 	rxq->nb_rx_hold = nb_hold;
@@ -938,7 +938,7 @@  i40e_recv_scattered_pkts(void *rx_queue,
 	if (nb_hold > rxq->rx_free_thresh) {
 		rx_id = (uint16_t)(rx_id == 0 ?
 			(rxq->nb_rx_desc - 1) : (rx_id - 1));
-		I40E_PCI_REG_WRITE(rxq->qrx_tail, rx_id);
+		I40E_PCI_REG_WC_WRITE(rxq, rxq->qrx_tail, rx_id);
 		nb_hold = 0;
 	}
 	rxq->nb_rx_hold = nb_hold;
@@ -1249,7 +1249,7 @@  i40e_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t nb_pkts)
 		   (unsigned) tx_id, (unsigned) nb_tx);
 
 	rte_cio_wmb();
-	I40E_PCI_REG_WRITE_RELAXED(txq->qtx_tail, tx_id);
+	I40E_PCI_REG_WC_WRITE_RELAXED(txq, txq->qtx_tail, tx_id);
 	txq->tx_tail = tx_id;
 
 	return nb_tx;
@@ -1400,7 +1400,7 @@  tx_xmit_pkts(struct i40e_tx_queue *txq,
 		txq->tx_tail = 0;
 
 	/* Update the tx tail register */
-	I40E_PCI_REG_WRITE(txq->qtx_tail, txq->tx_tail);
+	I40E_PCI_REG_WC_WRITE(txq, txq->qtx_tail, txq->tx_tail);
 
 	return nb_pkts;
 }
@@ -2717,6 +2717,12 @@  i40e_tx_queue_init(struct i40e_tx_queue *txq)
 
 	txq->qtx_tail = hw->hw_addr + I40E_QTX_TAIL(pf_q);
 
+#if defined(RTE_ARCH_X86)
+       /* use MOVDIRI if supported*/
+	txq->use_movdiri = rte_cpu_get_flag_enabled(RTE_CPUFLAG_MOVDIRI);
+#endif
+
+
 	return err;
 }
 
@@ -2881,6 +2887,11 @@  i40e_rx_queue_init(struct i40e_rx_queue *rxq)
 
 	rxq->qrx_tail = hw->hw_addr + I40E_QRX_TAIL(pf_q);
 
+#if defined(RTE_ARCH_X86)
+	/* use MOVDIRI WC store if supported*/
+	rxq->use_movdiri = rte_cpu_get_flag_enabled(RTE_CPUFLAG_MOVDIRI);
+#endif
+
 	buf_size = (uint16_t)(rte_pktmbuf_data_room_size(rxq->mp) -
 		RTE_PKTMBUF_HEADROOM);
 
diff --git a/drivers/net/i40e/i40e_rxtx.h b/drivers/net/i40e/i40e_rxtx.h
index 8f11f01..9c9d676 100644
--- a/drivers/net/i40e/i40e_rxtx.h
+++ b/drivers/net/i40e/i40e_rxtx.h
@@ -118,6 +118,7 @@  struct i40e_rx_queue {
 	uint16_t rx_using_sse; /**<flag indicate the usage of vPMD for rx */
 	uint8_t dcb_tc;         /**< Traffic class of rx queue */
 	uint64_t offloads; /**< Rx offload flags of DEV_RX_OFFLOAD_* */
+	uint8_t use_movdiri; /**< use MOVDIRI if supported */
 };
 
 struct i40e_tx_entry {
@@ -159,6 +160,7 @@  struct i40e_tx_queue {
 	bool tx_deferred_start; /**< don't start this queue in dev start */
 	uint8_t dcb_tc;         /**< Traffic class of tx queue */
 	uint64_t offloads; /**< Tx offload flags of DEV_RX_OFFLOAD_* */
+	uint8_t use_movdiri; /**< use MOVDIRI if supported */
 };
 
 /** Offload features */
diff --git a/drivers/net/i40e/i40e_rxtx_vec_avx2.c b/drivers/net/i40e/i40e_rxtx_vec_avx2.c
index 3bcef13..294c1c4 100644
--- a/drivers/net/i40e/i40e_rxtx_vec_avx2.c
+++ b/drivers/net/i40e/i40e_rxtx_vec_avx2.c
@@ -134,7 +134,7 @@  i40e_rxq_rearm(struct i40e_rx_queue *rxq)
 			     (rxq->nb_rx_desc - 1) : (rxq->rxrearm_start - 1));
 
 	/* Update the tail pointer on the NIC */
-	I40E_PCI_REG_WRITE(rxq->qrx_tail, rx_id);
+	I40E_PCI_REG_WC_WRITE(rxq, rxq->qrx_tail, rx_id);
 }
 
 #ifndef RTE_LIBRTE_I40E_16BYTE_RX_DESC
@@ -921,7 +921,7 @@  i40e_xmit_fixed_burst_vec_avx2(void *tx_queue, struct rte_mbuf **tx_pkts,
 
 	txq->tx_tail = tx_id;
 
-	I40E_PCI_REG_WRITE(txq->qtx_tail, txq->tx_tail);
+	I40E_PCI_REG_WC_WRITE(txq, txq->qtx_tail, txq->tx_tail);
 
 	return nb_pkts;
 }
diff --git a/drivers/net/i40e/i40e_rxtx_vec_sse.c b/drivers/net/i40e/i40e_rxtx_vec_sse.c
index 6985183..a4635e0 100644
--- a/drivers/net/i40e/i40e_rxtx_vec_sse.c
+++ b/drivers/net/i40e/i40e_rxtx_vec_sse.c
@@ -86,7 +86,7 @@  i40e_rxq_rearm(struct i40e_rx_queue *rxq)
 			     (rxq->nb_rx_desc - 1) : (rxq->rxrearm_start - 1));
 
 	/* Update the tail pointer on the NIC */
-	I40E_PCI_REG_WRITE(rxq->qrx_tail, rx_id);
+	I40E_PCI_REG_WC_WRITE(rxq, rxq->qrx_tail, rx_id);
 }
 
 #ifndef RTE_LIBRTE_I40E_16BYTE_RX_DESC
@@ -733,7 +733,7 @@  i40e_xmit_fixed_burst_vec(void *tx_queue, struct rte_mbuf **tx_pkts,
 
 	txq->tx_tail = tx_id;
 
-	I40E_PCI_REG_WRITE(txq->qtx_tail, txq->tx_tail);
+	I40E_PCI_REG_WC_WRITE(txq, txq->qtx_tail, txq->tx_tail);
 
 	return nb_pkts;
 }