[v3] eal: remove deprecated coherent IO memory barriers

Message ID 1600852597-9003-2-git-send-email-phil.yang@arm.com (mailing list archive)
State Accepted, archived
Delegated to: David Marchand
Headers
Series [v3] eal: remove deprecated coherent IO memory barriers |

Checks

Context Check Description
ci/checkpatch success coding style OK
ci/Intel-compilation success Compilation OK
ci/travis-robot success Travis build: passed

Commit Message

Phil Yang Sept. 23, 2020, 9:16 a.m. UTC
  Since the 20.08 release deprecated rte_cio_*mb APIs because these APIs
provide the same functionality as rte_io_*mb APIs on all platforms, so
remove them and use rte_io_*mb instead.

Signed-off-by: Phil Yang <phil.yang@arm.com>
Signed-off-by: Joyce Kong <joyce.kong@arm.com>
Reviewed-by: Ruifeng Wang <ruifeng.wang@arm.com>
Reviewed-by: Honnappa Nagarahalli <honnappa.nagarahalli@arm.com>
---
 doc/guides/rel_notes/deprecation.rst          |  6 -----
 doc/guides/rel_notes/release_20_11.rst        |  3 +++
 drivers/common/mlx5/mlx5_common.h             |  2 +-
 drivers/crypto/octeontx2/otx2_cryptodev_ops.c |  2 +-
 drivers/crypto/octeontx2/otx2_cryptodev_sec.c |  4 +--
 drivers/event/octeontx/ssovf_worker.c         |  6 ++---
 drivers/event/octeontx2/otx2_worker.h         |  2 +-
 drivers/net/bnxt/bnxt_hwrm.c                  |  2 +-
 drivers/net/bnxt/bnxt_ring.h                  |  6 ++---
 drivers/net/bnxt/bnxt_rxtx_vec_neon.c         |  8 +++---
 drivers/net/e1000/em_rxtx.c                   |  2 +-
 drivers/net/i40e/i40e_rxtx.c                  |  2 +-
 drivers/net/i40e/i40e_rxtx_vec_neon.c         |  4 +--
 drivers/net/mlx5/mlx5_flow.c                  |  2 +-
 drivers/net/mlx5/mlx5_flow_dv.c               |  2 +-
 drivers/net/mlx5/mlx5_rxq.c                   | 14 +++++-----
 drivers/net/mlx5/mlx5_rxtx.c                  | 16 +++++------
 drivers/net/mlx5/mlx5_rxtx.h                  |  2 +-
 drivers/net/mlx5/mlx5_rxtx_vec.h              |  2 +-
 drivers/net/mlx5/mlx5_rxtx_vec_altivec.h      |  2 +-
 drivers/net/mlx5/mlx5_rxtx_vec_neon.h         |  4 +--
 drivers/net/mlx5/mlx5_rxtx_vec_sse.h          |  2 +-
 drivers/net/mlx5/mlx5_txq.c                   |  4 +--
 drivers/net/octeontx/octeontx_rxtx.h          |  2 +-
 drivers/net/octeontx2/otx2_ethdev_sec.c       |  2 +-
 drivers/net/octeontx2/otx2_ethdev_sec_tx.h    |  2 +-
 drivers/net/octeontx2/otx2_rx.c               |  2 +-
 drivers/net/octeontx2/otx2_tx.c               |  6 ++---
 drivers/net/virtio/virtio_rxtx.c              |  2 +-
 drivers/net/virtio/virtio_rxtx_simple_neon.c  |  2 +-
 drivers/net/virtio/virtqueue.h                | 26 +++++++++---------
 drivers/raw/octeontx2_ep/otx2_ep_enqdeq.c     |  4 +--
 drivers/regex/mlx5/mlx5_regex_fastpath.c      |  4 +--
 lib/librte_eal/arm/include/rte_atomic_32.h    |  4 ---
 lib/librte_eal/arm/include/rte_atomic_64.h    |  4 ---
 lib/librte_eal/include/generic/rte_atomic.h   | 39 ---------------------------
 lib/librte_eal/ppc/include/rte_atomic.h       |  4 ---
 lib/librte_eal/x86/include/rte_atomic.h       |  4 ---
 38 files changed, 74 insertions(+), 132 deletions(-)
  

Comments

David Marchand Sept. 23, 2020, 12:20 p.m. UTC | #1
On Wed, Sep 23, 2020 at 11:17 AM Phil Yang <phil.yang@arm.com> wrote:
>
> Since the 20.08 release deprecated rte_cio_*mb APIs because these APIs
> provide the same functionality as rte_io_*mb APIs on all platforms, so
> remove them and use rte_io_*mb instead.
>
> Signed-off-by: Phil Yang <phil.yang@arm.com>
> Signed-off-by: Joyce Kong <joyce.kong@arm.com>
> Reviewed-by: Ruifeng Wang <ruifeng.wang@arm.com>
> Reviewed-by: Honnappa Nagarahalli <honnappa.nagarahalli@arm.com>

UNH had a hiccup when trying to apply this patch on 0c762e81da9b:
maybe UNH lab scripts pulled the main repository at the wrong time.

Anyway, the change is mechanical and my checks passed.
Acked-by: David Marchand <david.marchand@redhat.com>

Applied, thanks Phil.
  

Patch

diff --git a/doc/guides/rel_notes/deprecation.rst b/doc/guides/rel_notes/deprecation.rst
index 95a31c7..67caedb 100644
--- a/doc/guides/rel_notes/deprecation.rst
+++ b/doc/guides/rel_notes/deprecation.rst
@@ -83,12 +83,6 @@  Deprecation Notices
   These wrappers must be used for patches that need to be merged in 20.08
   onwards. This change will not introduce any performance degradation.
 
-* rte_cio_*mb: Since the IO barriers for ARMv8 platforms are relaxed from DSB
-  to DMB, rte_cio_*mb APIs provide the same functionality as rte_io_*mb
-  APIs (taking all platforms into consideration). rte_io_*mb APIs should be
-  used in the place of rte_cio_*mb APIs. The rte_cio_*mb APIs will be
-  deprecated in 20.11 release.
-
 * igb_uio: In the view of reducing the kernel dependency from the main tree,
   as a first step, the Technical Board decided to move ``igb_uio``
   kernel module to the dpdk-kmods repository in the /linux/igb_uio/ directory
diff --git a/doc/guides/rel_notes/release_20_11.rst b/doc/guides/rel_notes/release_20_11.rst
index c6642f5..57041f6 100644
--- a/doc/guides/rel_notes/release_20_11.rst
+++ b/doc/guides/rel_notes/release_20_11.rst
@@ -165,6 +165,9 @@  API Changes
 
 * bpf: ``RTE_BPF_XTYPE_NUM`` has been dropped from ``rte_bpf_xtype``.
 
+* eal: ``rte_cio_rmb()`` and ``rte_cio_wmb()`` were deprecated since 20.08
+  and are removed in this release.
+
 
 ABI Changes
 -----------
diff --git a/drivers/common/mlx5/mlx5_common.h b/drivers/common/mlx5/mlx5_common.h
index 2cdb226..ed44a45 100644
--- a/drivers/common/mlx5/mlx5_common.h
+++ b/drivers/common/mlx5/mlx5_common.h
@@ -193,7 +193,7 @@  check_cqe(volatile struct mlx5_cqe *cqe, const uint16_t cqes_n,
 
 	if (unlikely((op_owner != (!!(idx))) || (op_code == MLX5_CQE_INVALID)))
 		return MLX5_CQE_STATUS_HW_OWN;
-	rte_cio_rmb();
+	rte_io_rmb();
 	if (unlikely(op_code == MLX5_CQE_RESP_ERR ||
 		     op_code == MLX5_CQE_REQ_ERR))
 		return MLX5_CQE_STATUS_ERR;
diff --git a/drivers/crypto/octeontx2/otx2_cryptodev_ops.c b/drivers/crypto/octeontx2/otx2_cryptodev_ops.c
index 9d51b17..df39cde 100644
--- a/drivers/crypto/octeontx2/otx2_cryptodev_ops.c
+++ b/drivers/crypto/octeontx2/otx2_cryptodev_ops.c
@@ -469,7 +469,7 @@  otx2_cpt_enqueue_req(const struct otx2_cpt_qp *qp,
 		 * buffer immediately, a DMB is not required to push out
 		 * LMTSTs.
 		 */
-		rte_cio_wmb();
+		rte_io_wmb();
 		lmt_status = otx2_lmt_submit(qp->lf_nq_reg);
 	} while (lmt_status == 0);
 
diff --git a/drivers/crypto/octeontx2/otx2_cryptodev_sec.c b/drivers/crypto/octeontx2/otx2_cryptodev_sec.c
index 0741a59..72e6c41 100644
--- a/drivers/crypto/octeontx2/otx2_cryptodev_sec.c
+++ b/drivers/crypto/octeontx2/otx2_cryptodev_sec.c
@@ -107,7 +107,7 @@  otx2_cpt_enq_sa_write(struct otx2_sec_session_ipsec_lp *lp,
 	inst.u64[3] = 0;
 	inst.res_addr = rte_mempool_virt2iova(res);
 
-	rte_cio_wmb();
+	rte_io_wmb();
 
 	do {
 		/* Copy CPT command to LMTLINE */
@@ -124,7 +124,7 @@  otx2_cpt_enq_sa_write(struct otx2_sec_session_ipsec_lp *lp,
 			otx2_err("Request timed out");
 			return -ETIMEDOUT;
 		}
-	    rte_cio_rmb();
+	    rte_io_rmb();
 	}
 
 	if (unlikely(res->compcode != CPT_9X_COMP_E_GOOD)) {
diff --git a/drivers/event/octeontx/ssovf_worker.c b/drivers/event/octeontx/ssovf_worker.c
index 18b7926..3dfe665 100644
--- a/drivers/event/octeontx/ssovf_worker.c
+++ b/drivers/event/octeontx/ssovf_worker.c
@@ -286,17 +286,17 @@  __sso_event_tx_adapter_enqueue(void *port, struct rte_event ev[],
 	switch (ev->sched_type) {
 	case SSO_SYNC_ORDERED:
 		ssows_swtag_norm(ws, ev->event, SSO_SYNC_ATOMIC);
-		rte_cio_wmb();
+		rte_io_wmb();
 		ssows_swtag_wait(ws);
 		break;
 	case SSO_SYNC_UNTAGGED:
 		ssows_swtag_full(ws, ev->u64, ev->event, SSO_SYNC_ATOMIC,
 				ev->queue_id);
-		rte_cio_wmb();
+		rte_io_wmb();
 		ssows_swtag_wait(ws);
 		break;
 	case SSO_SYNC_ATOMIC:
-		rte_cio_wmb();
+		rte_io_wmb();
 		break;
 	}
 
diff --git a/drivers/event/octeontx2/otx2_worker.h b/drivers/event/octeontx2/otx2_worker.h
index 924ff7f..cde1288 100644
--- a/drivers/event/octeontx2/otx2_worker.h
+++ b/drivers/event/octeontx2/otx2_worker.h
@@ -256,7 +256,7 @@  otx2_ssogws_order(struct otx2_ssogws *ws, const uint8_t wait_flag)
 	if (wait_flag)
 		otx2_ssogws_head_wait(ws);
 
-	rte_cio_wmb();
+	rte_io_wmb();
 }
 
 static __rte_always_inline const struct otx2_eth_txq *
diff --git a/drivers/net/bnxt/bnxt_hwrm.c b/drivers/net/bnxt/bnxt_hwrm.c
index 57d1026..d0b820f 100644
--- a/drivers/net/bnxt/bnxt_hwrm.c
+++ b/drivers/net/bnxt/bnxt_hwrm.c
@@ -148,7 +148,7 @@  static int bnxt_hwrm_send_message(struct bnxt *bp, void *msg,
 	/* Poll for the valid bit */
 	for (i = 0; i < timeout; i++) {
 		/* Sanity check on the resp->resp_len */
-		rte_cio_rmb();
+		rte_io_rmb();
 		if (resp->resp_len && resp->resp_len <= bp->max_resp_len) {
 			/* Last byte of resp contains the valid key */
 			valid = (uint8_t *)resp + resp->resp_len - 1;
diff --git a/drivers/net/bnxt/bnxt_ring.h b/drivers/net/bnxt/bnxt_ring.h
index 9913aed..daf9804 100644
--- a/drivers/net/bnxt/bnxt_ring.h
+++ b/drivers/net/bnxt/bnxt_ring.h
@@ -82,7 +82,7 @@  void bnxt_free_rxtx_nq_ring(struct bnxt *bp);
 
 static inline void bnxt_db_write(struct bnxt_db_info *db, uint32_t idx)
 {
-	rte_cio_wmb();
+	rte_io_wmb();
 
 	if (db->db_64)
 		rte_write64_relaxed(db->db_key64 | idx, db->doorbell);
@@ -96,7 +96,7 @@  static inline void bnxt_db_nq(struct bnxt_cp_ring_info *cpr)
 	if (unlikely(!cpr->cp_db.db_64))
 		return;
 
-	rte_cio_wmb();
+	rte_io_wmb();
 	rte_write64_relaxed(cpr->cp_db.db_key64 | DBR_TYPE_NQ |
 			    RING_CMP(cpr->cp_ring_struct, cpr->cp_raw_cons),
 			    cpr->cp_db.doorbell);
@@ -108,7 +108,7 @@  static inline void bnxt_db_nq_arm(struct bnxt_cp_ring_info *cpr)
 	if (unlikely(!cpr->cp_db.db_64))
 		return;
 
-	rte_cio_wmb();
+	rte_io_wmb();
 	rte_write64_relaxed(cpr->cp_db.db_key64 | DBR_TYPE_NQ_ARM |
 			    RING_CMP(cpr->cp_ring_struct, cpr->cp_raw_cons),
 			    cpr->cp_db.doorbell);
diff --git a/drivers/net/bnxt/bnxt_rxtx_vec_neon.c b/drivers/net/bnxt/bnxt_rxtx_vec_neon.c
index 4075669..299b6b8 100644
--- a/drivers/net/bnxt/bnxt_rxtx_vec_neon.c
+++ b/drivers/net/bnxt/bnxt_rxtx_vec_neon.c
@@ -258,21 +258,21 @@  bnxt_recv_pkts_vec(void *rx_queue, struct rte_mbuf **rx_pkts,
 		 * reverse order to ensure consistent state.
 		 */
 		rxcmp1[3] = vld1q_u32((void *)&cpr->cp_desc_ring[cons + 7]);
-		rte_cio_rmb();
+		rte_io_rmb();
 		rxcmp[3] = vld1q_u32((void *)&cpr->cp_desc_ring[cons + 6]);
 
 		rxcmp1[2] = vld1q_u32((void *)&cpr->cp_desc_ring[cons + 5]);
-		rte_cio_rmb();
+		rte_io_rmb();
 		rxcmp[2] = vld1q_u32((void *)&cpr->cp_desc_ring[cons + 4]);
 
 		t1 = vreinterpretq_u64_u32(vzip2q_u32(rxcmp1[2], rxcmp1[3]));
 
 		rxcmp1[1] = vld1q_u32((void *)&cpr->cp_desc_ring[cons + 3]);
-		rte_cio_rmb();
+		rte_io_rmb();
 		rxcmp[1] = vld1q_u32((void *)&cpr->cp_desc_ring[cons + 2]);
 
 		rxcmp1[0] = vld1q_u32((void *)&cpr->cp_desc_ring[cons + 1]);
-		rte_cio_rmb();
+		rte_io_rmb();
 		rxcmp[0] = vld1q_u32((void *)&cpr->cp_desc_ring[cons + 0]);
 
 		t0 = vreinterpretq_u64_u32(vzip2q_u32(rxcmp1[0], rxcmp1[1]));
diff --git a/drivers/net/e1000/em_rxtx.c b/drivers/net/e1000/em_rxtx.c
index 67a271e..19e3bff 100644
--- a/drivers/net/e1000/em_rxtx.c
+++ b/drivers/net/e1000/em_rxtx.c
@@ -2051,7 +2051,7 @@  e1000_flush_tx_ring(struct rte_eth_dev *dev)
 		tx_desc->lower.data = rte_cpu_to_le_32(txd_lower | size);
 		tx_desc->upper.data = 0;
 
-		rte_cio_wmb();
+		rte_io_wmb();
 		txq->tx_tail++;
 		if (txq->tx_tail == txq->nb_tx_desc)
 			txq->tx_tail = 0;
diff --git a/drivers/net/i40e/i40e_rxtx.c b/drivers/net/i40e/i40e_rxtx.c
index 60b33d2..322fc1e 100644
--- a/drivers/net/i40e/i40e_rxtx.c
+++ b/drivers/net/i40e/i40e_rxtx.c
@@ -1248,7 +1248,7 @@  i40e_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t nb_pkts)
 		   (unsigned) txq->port_id, (unsigned) txq->queue_id,
 		   (unsigned) tx_id, (unsigned) nb_tx);
 
-	rte_cio_wmb();
+	rte_io_wmb();
 	I40E_PCI_REG_WRITE_RELAXED(txq->qtx_tail, tx_id);
 	txq->tx_tail = tx_id;
 
diff --git a/drivers/net/i40e/i40e_rxtx_vec_neon.c b/drivers/net/i40e/i40e_rxtx_vec_neon.c
index 6f874e4..543ecad 100644
--- a/drivers/net/i40e/i40e_rxtx_vec_neon.c
+++ b/drivers/net/i40e/i40e_rxtx_vec_neon.c
@@ -72,7 +72,7 @@  i40e_rxq_rearm(struct i40e_rx_queue *rxq)
 	rx_id = (uint16_t)((rxq->rxrearm_start == 0) ?
 			     (rxq->nb_rx_desc - 1) : (rxq->rxrearm_start - 1));
 
-	rte_cio_wmb();
+	rte_io_wmb();
 	/* Update the tail pointer on the NIC */
 	I40E_PCI_REG_WRITE_RELAXED(rxq->qrx_tail, rx_id);
 }
@@ -566,7 +566,7 @@  i40e_xmit_fixed_burst_vec(void *__rte_restrict tx_queue,
 
 	txq->tx_tail = tx_id;
 
-	rte_cio_wmb();
+	rte_io_wmb();
 	I40E_PCI_REG_WRITE_RELAXED(txq->qtx_tail, tx_id);
 
 	return nb_pkts;
diff --git a/drivers/net/mlx5/mlx5_flow.c b/drivers/net/mlx5/mlx5_flow.c
index 416505f..ffa7646 100644
--- a/drivers/net/mlx5/mlx5_flow.c
+++ b/drivers/net/mlx5/mlx5_flow.c
@@ -6118,7 +6118,7 @@  mlx5_flow_async_pool_query_handle(struct mlx5_dev_ctx_shared *sh,
 		pool->raw = pool->raw_hw;
 		rte_spinlock_unlock(&pool->sl);
 		/* Be sure the new raw counters data is updated in memory. */
-		rte_cio_wmb();
+		rte_io_wmb();
 		if (!TAILQ_EMPTY(&pool->counters[query_gen])) {
 			rte_spinlock_lock(&cont->csl);
 			TAILQ_CONCAT(&cont->counters,
diff --git a/drivers/net/mlx5/mlx5_flow_dv.c b/drivers/net/mlx5/mlx5_flow_dv.c
index 56529c8..ca1f39f 100644
--- a/drivers/net/mlx5/mlx5_flow_dv.c
+++ b/drivers/net/mlx5/mlx5_flow_dv.c
@@ -4452,7 +4452,7 @@  flow_dv_pool_create(struct rte_eth_dev *dev, struct mlx5_devx_obj *dcs,
 		cont->last_pool_idx = pool->index;
 	}
 	/* Pool initialization must be updated before host thread access. */
-	rte_cio_wmb();
+	rte_io_wmb();
 	rte_atomic16_add(&cont->n_valid, 1);
 	return pool;
 }
diff --git a/drivers/net/mlx5/mlx5_rxq.c b/drivers/net/mlx5/mlx5_rxq.c
index 487f997..9f68a5c 100644
--- a/drivers/net/mlx5/mlx5_rxq.c
+++ b/drivers/net/mlx5/mlx5_rxq.c
@@ -484,11 +484,11 @@  rxq_sync_cq(struct mlx5_rxq_data *rxq)
 		cqe->op_own = MLX5_CQE_INVALIDATE;
 	}
 	/* Resync CQE and WQE (WQ in RESET state). */
-	rte_cio_wmb();
+	rte_io_wmb();
 	*rxq->cq_db = rte_cpu_to_be_32(rxq->cq_ci);
-	rte_cio_wmb();
+	rte_io_wmb();
 	*rxq->rq_db = rte_cpu_to_be_32(0);
-	rte_cio_wmb();
+	rte_io_wmb();
 }
 
 /**
@@ -606,12 +606,12 @@  mlx5_rx_queue_start_primary(struct rte_eth_dev *dev, uint16_t idx)
 		rte_errno = errno;
 		return ret;
 	}
-	rte_cio_wmb();
+	rte_io_wmb();
 	*rxq->cq_db = rte_cpu_to_be_32(rxq->cq_ci);
-	rte_cio_wmb();
-	/* Reset RQ consumer before moving queue to READY state. */
+	rte_io_wmb();
+	/* Reset RQ consumer before moving queue ro READY state. */
 	*rxq->rq_db = rte_cpu_to_be_32(0);
-	rte_cio_wmb();
+	rte_io_wmb();
 	ret = priv->obj_ops.rxq_obj_modify(rxq_ctrl->obj, true);
 	if (ret) {
 		DRV_LOG(ERR, "Cannot change Rx WQ state to READY:  %s",
diff --git a/drivers/net/mlx5/mlx5_rxtx.c b/drivers/net/mlx5/mlx5_rxtx.c
index 1b71e94..101555e 100644
--- a/drivers/net/mlx5/mlx5_rxtx.c
+++ b/drivers/net/mlx5/mlx5_rxtx.c
@@ -873,7 +873,7 @@  mlx5_rxq_initialize(struct mlx5_rxq_data *rxq)
 	};
 	/* Update doorbell counter. */
 	rxq->rq_ci = wqe_n >> rxq->sges_n;
-	rte_cio_wmb();
+	rte_io_wmb();
 	*rxq->rq_db = rte_cpu_to_be_32(rxq->rq_ci);
 }
 
@@ -1113,15 +1113,15 @@  mlx5_rx_err_handle(struct mlx5_rxq_data *rxq, uint8_t vec)
 	case MLX5_RXQ_ERR_STATE_NEED_READY:
 		ret = check_cqe(u.cqe, cqe_n, rxq->cq_ci);
 		if (ret == MLX5_CQE_STATUS_HW_OWN) {
-			rte_cio_wmb();
+			rte_io_wmb();
 			*rxq->cq_db = rte_cpu_to_be_32(rxq->cq_ci);
-			rte_cio_wmb();
+			rte_io_wmb();
 			/*
 			 * The RQ consumer index must be zeroed while moving
 			 * from RESET state to RDY state.
 			 */
 			*rxq->rq_db = rte_cpu_to_be_32(0);
-			rte_cio_wmb();
+			rte_io_wmb();
 			sm.is_wq = 1;
 			sm.queue_id = rxq->idx;
 			sm.state = IBV_WQS_RDY;
@@ -1515,9 +1515,9 @@  mlx5_rx_burst(void *dpdk_rxq, struct rte_mbuf **pkts, uint16_t pkts_n)
 		return 0;
 	/* Update the consumer index. */
 	rxq->rq_ci = rq_ci >> sges_n;
-	rte_cio_wmb();
+	rte_io_wmb();
 	*rxq->cq_db = rte_cpu_to_be_32(rxq->cq_ci);
-	rte_cio_wmb();
+	rte_io_wmb();
 	*rxq->rq_db = rte_cpu_to_be_32(rxq->rq_ci);
 #ifdef MLX5_PMD_SOFT_COUNTERS
 	/* Increment packets counter. */
@@ -1893,11 +1893,11 @@  mlx5_rx_burst_mprq(void *dpdk_rxq, struct rte_mbuf **pkts, uint16_t pkts_n)
 out:
 	/* Update the consumer indexes. */
 	rxq->consumed_strd = consumed_strd;
-	rte_cio_wmb();
+	rte_io_wmb();
 	*rxq->cq_db = rte_cpu_to_be_32(rxq->cq_ci);
 	if (rq_ci != rxq->rq_ci) {
 		rxq->rq_ci = rq_ci;
-		rte_cio_wmb();
+		rte_io_wmb();
 		*rxq->rq_db = rte_cpu_to_be_32(rxq->rq_ci);
 	}
 #ifdef MLX5_PMD_SOFT_COUNTERS
diff --git a/drivers/net/mlx5/mlx5_rxtx.h b/drivers/net/mlx5/mlx5_rxtx.h
index a8e6837..6876c1b 100644
--- a/drivers/net/mlx5/mlx5_rxtx.h
+++ b/drivers/net/mlx5/mlx5_rxtx.h
@@ -627,7 +627,7 @@  mlx5_tx_dbrec_cond_wmb(struct mlx5_txq_data *txq, volatile struct mlx5_wqe *wqe,
 	uint64_t *dst = MLX5_TX_BFREG(txq);
 	volatile uint64_t *src = ((volatile uint64_t *)wqe);
 
-	rte_cio_wmb();
+	rte_io_wmb();
 	*txq->qp_db = rte_cpu_to_be_32(txq->wqe_ci);
 	/* Ensure ordering between DB record and BF copy. */
 	rte_wmb();
diff --git a/drivers/net/mlx5/mlx5_rxtx_vec.h b/drivers/net/mlx5/mlx5_rxtx_vec.h
index 6ddcbfb..a8d6c4f 100644
--- a/drivers/net/mlx5/mlx5_rxtx_vec.h
+++ b/drivers/net/mlx5/mlx5_rxtx_vec.h
@@ -118,7 +118,7 @@  mlx5_rx_replenish_bulk_mbuf(struct mlx5_rxq_data *rxq, uint16_t n)
 	elts_idx = rxq->rq_ci & q_mask;
 	for (i = 0; i < MLX5_VPMD_DESCS_PER_LOOP; ++i)
 		(*rxq->elts)[elts_idx + i] = &rxq->fake_mbuf;
-	rte_cio_wmb();
+	rte_io_wmb();
 	*rxq->rq_db = rte_cpu_to_be_32(rxq->rq_ci);
 }
 
diff --git a/drivers/net/mlx5/mlx5_rxtx_vec_altivec.h b/drivers/net/mlx5/mlx5_rxtx_vec_altivec.h
index cb4ce1a..6bf0c9b 100644
--- a/drivers/net/mlx5/mlx5_rxtx_vec_altivec.h
+++ b/drivers/net/mlx5/mlx5_rxtx_vec_altivec.h
@@ -788,7 +788,7 @@  rxq_burst_v(struct mlx5_rxq_data *rxq, struct rte_mbuf **pkts, uint16_t pkts_n,
 		/* B.2 copy mbuf pointers. */
 		*(vector unsigned char *)&pkts[pos] = mbp1;
 		*(vector unsigned char *)&pkts[pos + 2] = mbp2;
-		rte_cio_rmb();
+		rte_io_rmb();
 
 		/* C.1 load remaining CQE data and extract necessary fields. */
 		cqe_tmp2 = *(vector unsigned char *)
diff --git a/drivers/net/mlx5/mlx5_rxtx_vec_neon.h b/drivers/net/mlx5/mlx5_rxtx_vec_neon.h
index af924b7..d122dad 100644
--- a/drivers/net/mlx5/mlx5_rxtx_vec_neon.h
+++ b/drivers/net/mlx5/mlx5_rxtx_vec_neon.h
@@ -554,7 +554,7 @@  rxq_burst_v(struct mlx5_rxq_data *rxq, struct rte_mbuf **pkts, uint16_t pkts_n,
 		/* B.0 (CQE 0) load a block having op_own. */
 		c0 = vld1q_u64((uint64_t *)(p0 + 48));
 		/* Synchronize for loading the rest of blocks. */
-		rte_cio_rmb();
+		rte_io_rmb();
 		/* Prefetch next 4 CQEs. */
 		if (pkts_n - pos >= 2 * MLX5_VPMD_DESCS_PER_LOOP) {
 			unsigned int next = pos + MLX5_VPMD_DESCS_PER_LOOP;
@@ -803,7 +803,7 @@  rxq_burst_v(struct mlx5_rxq_data *rxq, struct rte_mbuf **pkts, uint16_t pkts_n,
 			rxq->decompressed -= n;
 		}
 	}
-	rte_cio_wmb();
+	rte_io_wmb();
 	*rxq->cq_db = rte_cpu_to_be_32(rxq->cq_ci);
 	*no_cq = !rcvd_pkt;
 	return rcvd_pkt;
diff --git a/drivers/net/mlx5/mlx5_rxtx_vec_sse.h b/drivers/net/mlx5/mlx5_rxtx_vec_sse.h
index 554924d..0bbcbee 100644
--- a/drivers/net/mlx5/mlx5_rxtx_vec_sse.h
+++ b/drivers/net/mlx5/mlx5_rxtx_vec_sse.h
@@ -552,7 +552,7 @@  rxq_burst_v(struct mlx5_rxq_data *rxq, struct rte_mbuf **pkts, uint16_t pkts_n,
 		/* B.2 copy mbuf pointers. */
 		_mm_storeu_si128((__m128i *)&pkts[pos], mbp1);
 		_mm_storeu_si128((__m128i *)&pkts[pos + 2], mbp2);
-		rte_cio_rmb();
+		rte_io_rmb();
 		/* C.1 load remained CQE data and extract necessary fields. */
 		cqe_tmp2 = _mm_load_si128((__m128i *)&cq[pos + p3]);
 		cqe_tmp1 = _mm_load_si128((__m128i *)&cq[pos + p2]);
diff --git a/drivers/net/mlx5/mlx5_txq.c b/drivers/net/mlx5/mlx5_txq.c
index 450d964..1bb667d 100644
--- a/drivers/net/mlx5/mlx5_txq.c
+++ b/drivers/net/mlx5/mlx5_txq.c
@@ -155,9 +155,9 @@  txq_sync_cq(struct mlx5_txq_data *txq)
 		cqe->op_own = MLX5_CQE_INVALIDATE;
 	}
 	/* Resync CQE and WQE (WQ in reset state). */
-	rte_cio_wmb();
+	rte_io_wmb();
 	*txq->cq_db = rte_cpu_to_be_32(txq->cq_ci);
-	rte_cio_wmb();
+	rte_io_wmb();
 }
 
 /**
diff --git a/drivers/net/octeontx/octeontx_rxtx.h b/drivers/net/octeontx/octeontx_rxtx.h
index 8b46105..af596cd 100644
--- a/drivers/net/octeontx/octeontx_rxtx.h
+++ b/drivers/net/octeontx/octeontx_rxtx.h
@@ -418,7 +418,7 @@  __octeontx_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts,
 	struct octeontx_txq *txq = tx_queue;
 	octeontx_dq_t *dq = &txq->dq;
 	uint16_t count = 0, nb_desc;
-	rte_cio_wmb();
+	rte_io_wmb();
 
 	while (count < nb_pkts) {
 		if (unlikely(*((volatile int64_t *)dq->fc_status_va) < 0))
diff --git a/drivers/net/octeontx2/otx2_ethdev_sec.c b/drivers/net/octeontx2/otx2_ethdev_sec.c
index a155594..0cbeed0 100644
--- a/drivers/net/octeontx2/otx2_ethdev_sec.c
+++ b/drivers/net/octeontx2/otx2_ethdev_sec.c
@@ -312,7 +312,7 @@  hmac_init(struct otx2_ipsec_fp_sa_ctl *ctl, struct otx2_cpt_qp *qp,
 
 	timeout = rte_get_timer_cycles() + 5 * rte_get_timer_hz();
 
-	rte_cio_wmb();
+	rte_io_wmb();
 
 	do {
 		otx2_lmt_mov(qp->lmtline, &inst, 2);
diff --git a/drivers/net/octeontx2/otx2_ethdev_sec_tx.h b/drivers/net/octeontx2/otx2_ethdev_sec_tx.h
index 15122b4..5bf8c19 100644
--- a/drivers/net/octeontx2/otx2_ethdev_sec_tx.h
+++ b/drivers/net/octeontx2/otx2_ethdev_sec_tx.h
@@ -160,7 +160,7 @@  otx2_sec_event_tx(struct otx2_ssogws *ws, struct rte_event *ev,
 	sess->ip_id++;
 	sess->esn++;
 
-	rte_cio_wmb();
+	rte_io_wmb();
 
 	do {
 		otx2_lmt_mov(sess->cpt_lmtline, &inst, 2);
diff --git a/drivers/net/octeontx2/otx2_rx.c b/drivers/net/octeontx2/otx2_rx.c
index ac40704..2da8efe 100644
--- a/drivers/net/octeontx2/otx2_rx.c
+++ b/drivers/net/octeontx2/otx2_rx.c
@@ -303,7 +303,7 @@  nix_recv_pkts_vector(void *rx_queue, struct rte_mbuf **rx_pkts,
 	rxq->head = head;
 	rxq->available -= packets;
 
-	rte_cio_wmb();
+	rte_io_wmb();
 	/* Free all the CQs that we've processed */
 	otx2_write64((rxq->wdata | packets), rxq->cq_door);
 
diff --git a/drivers/net/octeontx2/otx2_tx.c b/drivers/net/octeontx2/otx2_tx.c
index 1af6fa6..1b75cd5 100644
--- a/drivers/net/octeontx2/otx2_tx.c
+++ b/drivers/net/octeontx2/otx2_tx.c
@@ -39,7 +39,7 @@  nix_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts,
 	}
 
 	/* Lets commit any changes in the packet */
-	rte_cio_wmb();
+	rte_io_wmb();
 
 	for (i = 0; i < pkts; i++) {
 		otx2_nix_xmit_prepare(tx_pkts[i], cmd, flags);
@@ -75,7 +75,7 @@  nix_xmit_pkts_mseg(void *tx_queue, struct rte_mbuf **tx_pkts,
 	}
 
 	/* Lets commit any changes in the packet */
-	rte_cio_wmb();
+	rte_io_wmb();
 
 	for (i = 0; i < pkts; i++) {
 		otx2_nix_xmit_prepare(tx_pkts[i], cmd, flags);
@@ -128,7 +128,7 @@  nix_xmit_pkts_vector(void *tx_queue, struct rte_mbuf **tx_pkts,
 	txq->fc_cache_pkts -= pkts;
 
 	/* Lets commit any changes in the packet */
-	rte_cio_wmb();
+	rte_io_wmb();
 
 	senddesc01_w0 = vld1q_dup_u64(&txq->cmd[0]);
 	senddesc23_w0 = senddesc01_w0;
diff --git a/drivers/net/virtio/virtio_rxtx.c b/drivers/net/virtio/virtio_rxtx.c
index f915b8a..0ade352 100644
--- a/drivers/net/virtio/virtio_rxtx.c
+++ b/drivers/net/virtio/virtio_rxtx.c
@@ -147,7 +147,7 @@  virtqueue_dequeue_burst_rx_packed(struct virtqueue *vq,
 
 	for (i = 0; i < num; i++) {
 		used_idx = vq->vq_used_cons_idx;
-		/* desc_is_used has a load-acquire or rte_cio_rmb inside
+		/* desc_is_used has a load-acquire or rte_io_rmb inside
 		 * and wait for used desc in virtqueue.
 		 */
 		if (!desc_is_used(&desc[used_idx], vq))
diff --git a/drivers/net/virtio/virtio_rxtx_simple_neon.c b/drivers/net/virtio/virtio_rxtx_simple_neon.c
index 02520fd..12e034d 100644
--- a/drivers/net/virtio/virtio_rxtx_simple_neon.c
+++ b/drivers/net/virtio/virtio_rxtx_simple_neon.c
@@ -84,7 +84,7 @@  virtio_recv_pkts_vec(void *rx_queue,
 	if (unlikely(nb_pkts < RTE_VIRTIO_DESC_PER_LOOP))
 		return 0;
 
-	/* virtqueue_nused has a load-acquire or rte_cio_rmb inside */
+	/* virtqueue_nused has a load-acquire or rte_io_rmb inside */
 	nb_used = virtqueue_nused(vq);
 
 	if (unlikely(nb_used == 0))
diff --git a/drivers/net/virtio/virtqueue.h b/drivers/net/virtio/virtqueue.h
index 6ed5064..738b1a5 100644
--- a/drivers/net/virtio/virtqueue.h
+++ b/drivers/net/virtio/virtqueue.h
@@ -47,7 +47,7 @@  virtio_rmb(uint8_t weak_barriers)
 	if (weak_barriers)
 		rte_smp_rmb();
 	else
-		rte_cio_rmb();
+		rte_io_rmb();
 }
 
 static inline void
@@ -56,7 +56,7 @@  virtio_wmb(uint8_t weak_barriers)
 	if (weak_barriers)
 		rte_smp_wmb();
 	else
-		rte_cio_wmb();
+		rte_io_wmb();
 }
 
 static inline uint16_t
@@ -68,7 +68,7 @@  virtqueue_fetch_flags_packed(struct vring_packed_desc *dp,
 	if (weak_barriers) {
 /* x86 prefers to using rte_smp_rmb over __atomic_load_n as it reports
  * a better perf(~1.5%), which comes from the saved branch by the compiler.
- * The if and else branch are identical with the smp and cio barriers both
+ * The if and else branch are identical with the smp and io barriers both
  * defined as compiler barriers on x86.
  */
 #ifdef RTE_ARCH_X86_64
@@ -79,7 +79,7 @@  virtqueue_fetch_flags_packed(struct vring_packed_desc *dp,
 #endif
 	} else {
 		flags = dp->flags;
-		rte_cio_rmb();
+		rte_io_rmb();
 	}
 
 	return flags;
@@ -92,7 +92,7 @@  virtqueue_store_flags_packed(struct vring_packed_desc *dp,
 	if (weak_barriers) {
 /* x86 prefers to using rte_smp_wmb over __atomic_store_n as it reports
  * a better perf(~1.5%), which comes from the saved branch by the compiler.
- * The if and else branch are identical with the smp and cio barriers both
+ * The if and else branch are identical with the smp and io barriers both
  * defined as compiler barriers on x86.
  */
 #ifdef RTE_ARCH_X86_64
@@ -102,7 +102,7 @@  virtqueue_store_flags_packed(struct vring_packed_desc *dp,
 		__atomic_store_n(&dp->flags, flags, __ATOMIC_RELEASE);
 #endif
 	} else {
-		rte_cio_wmb();
+		rte_io_wmb();
 		dp->flags = flags;
 	}
 }
@@ -469,7 +469,7 @@  virtio_get_queue_type(struct virtio_hw *hw, uint16_t vtpci_queue_idx)
 		return VTNET_TQ;
 }
 
-/* virtqueue_nused has load-acquire or rte_cio_rmb insed */
+/* virtqueue_nused has load-acquire or rte_io_rmb insed */
 static inline uint16_t
 virtqueue_nused(const struct virtqueue *vq)
 {
@@ -480,7 +480,7 @@  virtqueue_nused(const struct virtqueue *vq)
 	 * x86 prefers to using rte_smp_rmb over __atomic_load_n as it
 	 * reports a slightly better perf, which comes from the saved
 	 * branch by the compiler.
-	 * The if and else branches are identical with the smp and cio
+	 * The if and else branches are identical with the smp and io
 	 * barriers both defined as compiler barriers on x86.
 	 */
 #ifdef RTE_ARCH_X86_64
@@ -492,7 +492,7 @@  virtqueue_nused(const struct virtqueue *vq)
 #endif
 	} else {
 		idx = vq->vq_split.ring.used->idx;
-		rte_cio_rmb();
+		rte_io_rmb();
 	}
 	return idx - vq->vq_used_cons_idx;
 }
@@ -510,7 +510,7 @@  vq_update_avail_idx(struct virtqueue *vq)
 	 * it reports a slightly better perf, which comes from the
 	 * saved branch by the compiler.
 	 * The if and else branches are identical with the smp and
-	 * cio barriers both defined as compiler barriers on x86.
+	 * io barriers both defined as compiler barriers on x86.
 	 */
 #ifdef RTE_ARCH_X86_64
 		rte_smp_wmb();
@@ -520,7 +520,7 @@  vq_update_avail_idx(struct virtqueue *vq)
 				 vq->vq_avail_idx, __ATOMIC_RELEASE);
 #endif
 	} else {
-		rte_cio_wmb();
+		rte_io_wmb();
 		vq->vq_split.ring.avail->idx = vq->vq_avail_idx;
 	}
 }
@@ -793,7 +793,7 @@  virtio_xmit_cleanup_inorder_packed(struct virtqueue *vq, int num)
 	struct vq_desc_extra *dxp;
 
 	used_idx = vq->vq_used_cons_idx;
-	/* desc_is_used has a load-acquire or rte_cio_rmb inside
+	/* desc_is_used has a load-acquire or rte_io_rmb inside
 	 * and wait for used desc in virtqueue.
 	 */
 	while (num > 0 && desc_is_used(&desc[used_idx], vq)) {
@@ -827,7 +827,7 @@  virtio_xmit_cleanup_normal_packed(struct virtqueue *vq, int num)
 	struct vq_desc_extra *dxp;
 
 	used_idx = vq->vq_used_cons_idx;
-	/* desc_is_used has a load-acquire or rte_cio_rmb inside
+	/* desc_is_used has a load-acquire or rte_io_rmb inside
 	 * and wait for used desc in virtqueue.
 	 */
 	while (num-- && desc_is_used(&desc[used_idx], vq)) {
diff --git a/drivers/raw/octeontx2_ep/otx2_ep_enqdeq.c b/drivers/raw/octeontx2_ep/otx2_ep_enqdeq.c
index 9f1e5ed..d04e957 100644
--- a/drivers/raw/octeontx2_ep/otx2_ep_enqdeq.c
+++ b/drivers/raw/octeontx2_ep/otx2_ep_enqdeq.c
@@ -475,7 +475,7 @@  sdp_ring_doorbell(struct sdp_device *sdpvf __rte_unused,
 	otx2_write64(iq->fill_cnt, iq->doorbell_reg);
 
 	/* Make sure doorbell writes observed by HW */
-	rte_cio_wmb();
+	rte_io_wmb();
 	iq->fill_cnt = 0;
 
 }
@@ -812,7 +812,7 @@  sdp_rawdev_dequeue(struct rte_rawdev *rawdev,
 
 	/* Ack the h/w with no# of pkts read by Host */
 	rte_write32(pkts, droq->pkts_sent_reg);
-	rte_cio_wmb();
+	rte_io_wmb();
 
 	droq->last_pkt_count -= pkts;
 
diff --git a/drivers/regex/mlx5/mlx5_regex_fastpath.c b/drivers/regex/mlx5/mlx5_regex_fastpath.c
index 6fafcff..d9b2a1a 100644
--- a/drivers/regex/mlx5/mlx5_regex_fastpath.c
+++ b/drivers/regex/mlx5/mlx5_regex_fastpath.c
@@ -135,7 +135,7 @@  send_doorbell(struct mlx5dv_devx_uar *uar, struct mlx5_regex_sq *sq)
 	((struct mlx5_wqe_ctrl_seg *)wqe)->fm_ce_se = MLX5_WQE_CTRL_CQ_UPDATE;
 	uint64_t *doorbell_addr =
 		(uint64_t *)((uint8_t *)uar->base_addr + 0x800);
-	rte_cio_wmb();
+	rte_io_wmb();
 	sq->dbr[MLX5_SND_DBR] = rte_cpu_to_be_32((sq->db_pi + 1) &
 						 MLX5_REGEX_MAX_WQE_INDEX);
 	rte_wmb();
@@ -219,7 +219,7 @@  poll_one(struct mlx5_regex_cq *cq)
 
 	next_cqe_offset =  (cq->ci & (cq_size_get(cq) - 1));
 	cqe = (volatile struct mlx5_cqe *)(cq->cqe + next_cqe_offset);
-	rte_cio_wmb();
+	rte_io_wmb();
 
 	int ret = check_cqe(cqe, cq_size_get(cq), cq->ci);
 
diff --git a/lib/librte_eal/arm/include/rte_atomic_32.h b/lib/librte_eal/arm/include/rte_atomic_32.h
index 368f10c..9d0568d 100644
--- a/lib/librte_eal/arm/include/rte_atomic_32.h
+++ b/lib/librte_eal/arm/include/rte_atomic_32.h
@@ -33,10 +33,6 @@  extern "C" {
 
 #define rte_io_rmb() rte_rmb()
 
-#define rte_cio_wmb() rte_wmb()
-
-#define rte_cio_rmb() rte_rmb()
-
 static __rte_always_inline void
 rte_atomic_thread_fence(int memory_order)
 {
diff --git a/lib/librte_eal/arm/include/rte_atomic_64.h b/lib/librte_eal/arm/include/rte_atomic_64.h
index 5cae52d..c518559 100644
--- a/lib/librte_eal/arm/include/rte_atomic_64.h
+++ b/lib/librte_eal/arm/include/rte_atomic_64.h
@@ -37,10 +37,6 @@  extern "C" {
 
 #define rte_io_rmb() rte_rmb()
 
-#define rte_cio_wmb() rte_wmb()
-
-#define rte_cio_rmb() rte_rmb()
-
 static __rte_always_inline void
 rte_atomic_thread_fence(int memory_order)
 {
diff --git a/lib/librte_eal/include/generic/rte_atomic.h b/lib/librte_eal/include/generic/rte_atomic.h
index 95270f1..d1255b2 100644
--- a/lib/librte_eal/include/generic/rte_atomic.h
+++ b/lib/librte_eal/include/generic/rte_atomic.h
@@ -107,45 +107,6 @@  static inline void rte_io_wmb(void);
 static inline void rte_io_rmb(void);
 ///@}
 
-/** @name Coherent I/O Memory Barrier
- *
- * Coherent I/O memory barrier is a lightweight version of I/O memory
- * barriers which are system-wide data synchronization barriers. This
- * is for only coherent memory domain between lcore and I/O device but
- * it is same as the I/O memory barriers in most of architectures.
- * However, some architecture provides even lighter barriers which are
- * somewhere in between I/O memory barriers and SMP memory barriers.
- * For example, in case of ARMv8, DMB(data memory barrier) instruction
- * can have different shareability domains - inner-shareable and
- * outer-shareable. And inner-shareable DMB fits for SMP memory
- * barriers and outer-shareable DMB for coherent I/O memory barriers,
- * which acts on coherent memory.
- *
- * In most cases, I/O memory barriers are safer but if operations are
- * on coherent memory instead of incoherent MMIO region of a device,
- * then coherent I/O memory barriers can be used and this could bring
- * performance gain depending on architectures.
- */
-///@{
-/**
- * Write memory barrier for coherent memory between lcore and I/O device
- *
- * Guarantees that the STORE operations on coherent memory that
- * precede the rte_cio_wmb() call are visible to I/O device before the
- * STORE operations that follow it.
- */
-static inline void rte_cio_wmb(void);
-
-/**
- * Read memory barrier for coherent memory between lcore and I/O device
- *
- * Guarantees that the LOAD operations on coherent memory updated by
- * I/O device that precede the rte_cio_rmb() call are visible to CPU
- * before the LOAD operations that follow it.
- */
-static inline void rte_cio_rmb(void);
-///@}
-
 #endif /* __DOXYGEN__ */
 
 /**
diff --git a/lib/librte_eal/ppc/include/rte_atomic.h b/lib/librte_eal/ppc/include/rte_atomic.h
index 527fcaf..a919899 100644
--- a/lib/librte_eal/ppc/include/rte_atomic.h
+++ b/lib/librte_eal/ppc/include/rte_atomic.h
@@ -36,10 +36,6 @@  extern "C" {
 
 #define rte_io_rmb() rte_rmb()
 
-#define rte_cio_wmb() rte_wmb()
-
-#define rte_cio_rmb() rte_rmb()
-
 static __rte_always_inline void
 rte_atomic_thread_fence(int memory_order)
 {
diff --git a/lib/librte_eal/x86/include/rte_atomic.h b/lib/librte_eal/x86/include/rte_atomic.h
index 62ea393..b7d6b06 100644
--- a/lib/librte_eal/x86/include/rte_atomic.h
+++ b/lib/librte_eal/x86/include/rte_atomic.h
@@ -79,10 +79,6 @@  rte_smp_mb(void)
 
 #define rte_io_rmb() rte_compiler_barrier()
 
-#define rte_cio_wmb() rte_compiler_barrier()
-
-#define rte_cio_rmb() rte_compiler_barrier()
-
 /**
  * Synchronization fence between threads based on the specified memory order.
  *