[3/3] event/octeontx2: improve datapath memory locality

Message ID 20200629013329.5297-3-pbhagavatula@marvell.com (mailing list archive)
State Accepted, archived
Delegated to: Jerin Jacob
Headers
Series [1/3] event/octeontx2: fix device reconfigure |

Checks

Context Check Description
ci/checkpatch success coding style OK
ci/travis-robot success Travis build: passed
ci/Intel-compilation success Compilation OK

Commit Message

Pavan Nikhilesh Bhagavatula June 29, 2020, 1:33 a.m. UTC
  From: Pavan Nikhilesh <pbhagavatula@marvell.com>

When event device is transmitting packet on OCTEONTX2 it needs to access
the destined ethernet device TXq data.
Currently, we get the TXq data through rte_eth_devices global array.
Instead save the TXq address inside event port memory.

Cc: stable@dpdk.org

Signed-off-by: Pavan Nikhilesh <pbhagavatula@marvell.com>
---
 drivers/event/octeontx2/otx2_evdev.h       |  5 ++
 drivers/event/octeontx2/otx2_evdev_adptr.c | 67 +++++++++++++++++++++-
 drivers/event/octeontx2/otx2_worker.c      | 15 +++--
 drivers/event/octeontx2/otx2_worker.h      | 21 ++++---
 drivers/event/octeontx2/otx2_worker_dual.c | 15 +++--
 5 files changed, 103 insertions(+), 20 deletions(-)
  

Patch

diff --git a/drivers/event/octeontx2/otx2_evdev.h b/drivers/event/octeontx2/otx2_evdev.h
index 3b477820f..873724dd4 100644
--- a/drivers/event/octeontx2/otx2_evdev.h
+++ b/drivers/event/octeontx2/otx2_evdev.h
@@ -141,6 +141,7 @@  struct otx2_sso_evdev {
 	uint64_t adptr_xae_cnt;
 	uint16_t rx_adptr_pool_cnt;
 	uint64_t *rx_adptr_pools;
+	uint16_t max_port_id;
 	uint16_t tim_adptr_ring_cnt;
 	uint16_t *timer_adptr_rings;
 	uint64_t *timer_adptr_sz;
@@ -185,6 +186,8 @@  struct otx2_ssogws {
 	uintptr_t grps_base[OTX2_SSO_MAX_VHGRP];
 	/* PTP timestamp */
 	struct otx2_timesync_info *tstamp;
+	/* Tx Fastpath data */
+	uint8_t tx_adptr_data[] __rte_cache_aligned;
 } __rte_cache_aligned;
 
 struct otx2_ssogws_state {
@@ -204,6 +207,8 @@  struct otx2_ssogws_dual {
 	uintptr_t grps_base[OTX2_SSO_MAX_VHGRP];
 	/* PTP timestamp */
 	struct otx2_timesync_info *tstamp;
+	/* Tx Fastpath data */
+	uint8_t tx_adptr_data[] __rte_cache_aligned;
 } __rte_cache_aligned;
 
 static inline struct otx2_sso_evdev *
diff --git a/drivers/event/octeontx2/otx2_evdev_adptr.c b/drivers/event/octeontx2/otx2_evdev_adptr.c
index 8bdcfa3ea..0a5d7924a 100644
--- a/drivers/event/octeontx2/otx2_evdev_adptr.c
+++ b/drivers/event/octeontx2/otx2_evdev_adptr.c
@@ -438,6 +438,60 @@  sso_sqb_aura_limit_edit(struct rte_mempool *mp, uint16_t nb_sqb_bufs)
 	return otx2_mbox_process(npa_lf->mbox);
 }
 
+static int
+sso_add_tx_queue_data(const struct rte_eventdev *event_dev,
+		      uint16_t eth_port_id, uint16_t tx_queue_id,
+		      struct otx2_eth_txq *txq)
+{
+	struct otx2_sso_evdev *dev = sso_pmd_priv(event_dev);
+	int i;
+
+	for (i = 0; i < event_dev->data->nb_ports; i++) {
+		dev->max_port_id = RTE_MAX(dev->max_port_id, eth_port_id);
+		if (dev->dual_ws) {
+			struct otx2_ssogws_dual *old_dws;
+			struct otx2_ssogws_dual *dws;
+
+			old_dws = event_dev->data->ports[i];
+			dws = rte_realloc_socket(old_dws,
+						 sizeof(struct otx2_ssogws_dual)
+						 + (sizeof(uint64_t) *
+						    (dev->max_port_id + 1) *
+						    RTE_MAX_QUEUES_PER_PORT),
+						 RTE_CACHE_LINE_SIZE,
+						 event_dev->data->socket_id);
+			if (dws == NULL)
+				return -ENOMEM;
+
+			((uint64_t (*)[RTE_MAX_QUEUES_PER_PORT]
+			 )&dws->tx_adptr_data)[eth_port_id][tx_queue_id] =
+				(uint64_t)txq;
+			event_dev->data->ports[i] = dws;
+		} else {
+			struct otx2_ssogws *old_ws;
+			struct otx2_ssogws *ws;
+
+			old_ws = event_dev->data->ports[i];
+			ws = rte_realloc_socket(old_ws,
+						sizeof(struct otx2_ssogws_dual)
+						+ (sizeof(uint64_t) *
+						   (dev->max_port_id + 1) *
+						   RTE_MAX_QUEUES_PER_PORT),
+						RTE_CACHE_LINE_SIZE,
+						event_dev->data->socket_id);
+			if (ws == NULL)
+				return -ENOMEM;
+
+			((uint64_t (*)[RTE_MAX_QUEUES_PER_PORT]
+			 )&ws->tx_adptr_data)[eth_port_id][tx_queue_id] =
+				(uint64_t)txq;
+			event_dev->data->ports[i] = ws;
+		}
+	}
+
+	return 0;
+}
+
 int
 otx2_sso_tx_adapter_queue_add(uint8_t id, const struct rte_eventdev *event_dev,
 			      const struct rte_eth_dev *eth_dev,
@@ -446,18 +500,27 @@  otx2_sso_tx_adapter_queue_add(uint8_t id, const struct rte_eventdev *event_dev,
 	struct otx2_eth_dev *otx2_eth_dev = eth_dev->data->dev_private;
 	struct otx2_sso_evdev *dev = sso_pmd_priv(event_dev);
 	struct otx2_eth_txq *txq;
-	int i;
+	int i, ret;
 
 	RTE_SET_USED(id);
 	if (tx_queue_id < 0) {
 		for (i = 0 ; i < eth_dev->data->nb_tx_queues; i++) {
 			txq = eth_dev->data->tx_queues[i];
 			sso_sqb_aura_limit_edit(txq->sqb_pool,
-						OTX2_SSO_SQB_LIMIT);
+					OTX2_SSO_SQB_LIMIT);
+			ret = sso_add_tx_queue_data(event_dev,
+						    eth_dev->data->port_id, i,
+						    txq);
+			if (ret < 0)
+				return ret;
 		}
 	} else {
 		txq = eth_dev->data->tx_queues[tx_queue_id];
 		sso_sqb_aura_limit_edit(txq->sqb_pool, OTX2_SSO_SQB_LIMIT);
+		ret = sso_add_tx_queue_data(event_dev, eth_dev->data->port_id,
+					    tx_queue_id, txq);
+		if (ret < 0)
+			return ret;
 	}
 
 	dev->tx_offloads |= otx2_eth_dev->tx_offload_flags;
diff --git a/drivers/event/octeontx2/otx2_worker.c b/drivers/event/octeontx2/otx2_worker.c
index 88bac391c..1d427e4a3 100644
--- a/drivers/event/octeontx2/otx2_worker.c
+++ b/drivers/event/octeontx2/otx2_worker.c
@@ -268,7 +268,7 @@  otx2_ssogws_enq_fwd_burst(void *port, const struct rte_event ev[],
 }
 
 #define T(name, f6, f5, f4, f3, f2, f1, f0, sz, flags)			\
-uint16_t __rte_hot								\
+uint16_t __rte_hot							\
 otx2_ssogws_tx_adptr_enq_ ## name(void *port, struct rte_event ev[],	\
 				  uint16_t nb_events)			\
 {									\
@@ -276,13 +276,16 @@  otx2_ssogws_tx_adptr_enq_ ## name(void *port, struct rte_event ev[],	\
 	uint64_t cmd[sz];						\
 									\
 	RTE_SET_USED(nb_events);					\
-	return otx2_ssogws_event_tx(ws, ev, cmd, flags);		\
+	return otx2_ssogws_event_tx(ws, ev, cmd, (const uint64_t	\
+				    (*)[RTE_MAX_QUEUES_PER_PORT])	\
+				    &ws->tx_adptr_data,			\
+				    flags);				\
 }
 SSO_TX_ADPTR_ENQ_FASTPATH_FUNC
 #undef T
 
 #define T(name, f6, f5, f4, f3, f2, f1, f0, sz, flags)			\
-uint16_t __rte_hot								\
+uint16_t __rte_hot							\
 otx2_ssogws_tx_adptr_enq_seg_ ## name(void *port, struct rte_event ev[],\
 				      uint16_t nb_events)		\
 {									\
@@ -290,8 +293,10 @@  otx2_ssogws_tx_adptr_enq_seg_ ## name(void *port, struct rte_event ev[],\
 	uint64_t cmd[(sz) + NIX_TX_MSEG_SG_DWORDS - 2];			\
 									\
 	RTE_SET_USED(nb_events);					\
-	return otx2_ssogws_event_tx(ws, ev, cmd, (flags) |		\
-				    NIX_TX_MULTI_SEG_F);		\
+	return otx2_ssogws_event_tx(ws, ev, cmd, (const uint64_t	\
+				    (*)[RTE_MAX_QUEUES_PER_PORT])	\
+				    &ws->tx_adptr_data,			\
+				    (flags) | NIX_TX_MULTI_SEG_F);	\
 }
 SSO_TX_ADPTR_ENQ_FASTPATH_FUNC
 #undef T
diff --git a/drivers/event/octeontx2/otx2_worker.h b/drivers/event/octeontx2/otx2_worker.h
index 5f5aa8746..924ff7ff4 100644
--- a/drivers/event/octeontx2/otx2_worker.h
+++ b/drivers/event/octeontx2/otx2_worker.h
@@ -260,10 +260,11 @@  otx2_ssogws_order(struct otx2_ssogws *ws, const uint8_t wait_flag)
 }
 
 static __rte_always_inline const struct otx2_eth_txq *
-otx2_ssogws_xtract_meta(struct rte_mbuf *m)
+otx2_ssogws_xtract_meta(struct rte_mbuf *m,
+			const uint64_t txq_data[][RTE_MAX_QUEUES_PER_PORT])
 {
-	return rte_eth_devices[m->port].data->tx_queues[
-			rte_event_eth_tx_adapter_txq_get(m)];
+	return (const struct otx2_eth_txq *)txq_data[m->port][
+					rte_event_eth_tx_adapter_txq_get(m)];
 }
 
 static __rte_always_inline void
@@ -276,20 +277,24 @@  otx2_ssogws_prepare_pkt(const struct otx2_eth_txq *txq, struct rte_mbuf *m,
 
 static __rte_always_inline uint16_t
 otx2_ssogws_event_tx(struct otx2_ssogws *ws, struct rte_event ev[],
-		     uint64_t *cmd, const uint32_t flags)
+		     uint64_t *cmd, const uint64_t
+		     txq_data[][RTE_MAX_QUEUES_PER_PORT],
+		     const uint32_t flags)
 {
 	struct rte_mbuf *m = ev[0].mbuf;
-	const struct otx2_eth_txq *txq = otx2_ssogws_xtract_meta(m);
-
-	rte_prefetch_non_temporal(txq);
+	const struct otx2_eth_txq *txq;
 
 	if ((flags & NIX_TX_OFFLOAD_SECURITY_F) &&
-	    (m->ol_flags & PKT_TX_SEC_OFFLOAD))
+	    (m->ol_flags & PKT_TX_SEC_OFFLOAD)) {
+		txq = otx2_ssogws_xtract_meta(m, txq_data);
 		return otx2_sec_event_tx(ws, ev, m, txq, flags);
+	}
 
+	rte_prefetch_non_temporal(&txq_data[m->port][0]);
 	/* Perform header writes before barrier for TSO */
 	otx2_nix_xmit_prepare_tso(m, flags);
 	otx2_ssogws_order(ws, !ev->sched_type);
+	txq = otx2_ssogws_xtract_meta(m, txq_data);
 	otx2_ssogws_prepare_pkt(txq, m, cmd, flags);
 
 	if (flags & NIX_TX_MULTI_SEG_F) {
diff --git a/drivers/event/octeontx2/otx2_worker_dual.c b/drivers/event/octeontx2/otx2_worker_dual.c
index 3d55d921b..946488eab 100644
--- a/drivers/event/octeontx2/otx2_worker_dual.c
+++ b/drivers/event/octeontx2/otx2_worker_dual.c
@@ -308,7 +308,7 @@  SSO_RX_ADPTR_ENQ_FASTPATH_FUNC
 #undef R
 
 #define T(name, f6, f5, f4, f3, f2, f1, f0, sz, flags)			\
-uint16_t __rte_hot								\
+uint16_t __rte_hot							\
 otx2_ssogws_dual_tx_adptr_enq_ ## name(void *port,			\
 				       struct rte_event ev[],		\
 				       uint16_t nb_events)		\
@@ -319,13 +319,16 @@  otx2_ssogws_dual_tx_adptr_enq_ ## name(void *port,			\
 	uint64_t cmd[sz];						\
 									\
 	RTE_SET_USED(nb_events);					\
-	return otx2_ssogws_event_tx(vws, ev, cmd, flags);		\
+	return otx2_ssogws_event_tx(vws, ev, cmd, (const uint64_t	\
+				    (*)[RTE_MAX_QUEUES_PER_PORT])	\
+				    ws->tx_adptr_data,			\
+				    flags);				\
 }
 SSO_TX_ADPTR_ENQ_FASTPATH_FUNC
 #undef T
 
 #define T(name, f6, f5, f4, f3, f2, f1, f0, sz, flags)			\
-uint16_t __rte_hot								\
+uint16_t __rte_hot							\
 otx2_ssogws_dual_tx_adptr_enq_seg_ ## name(void *port,			\
 					   struct rte_event ev[],	\
 					   uint16_t nb_events)		\
@@ -336,8 +339,10 @@  otx2_ssogws_dual_tx_adptr_enq_seg_ ## name(void *port,			\
 	uint64_t cmd[(sz) + NIX_TX_MSEG_SG_DWORDS - 2];			\
 									\
 	RTE_SET_USED(nb_events);					\
-	return otx2_ssogws_event_tx(vws, ev, cmd, (flags) |		\
-				    NIX_TX_MULTI_SEG_F);		\
+	return otx2_ssogws_event_tx(vws, ev, cmd, (const uint64_t	\
+				    (*)[RTE_MAX_QUEUES_PER_PORT])	\
+				    ws->tx_adptr_data,			\
+				    (flags) | NIX_TX_MULTI_SEG_F);	\
 }
 SSO_TX_ADPTR_ENQ_FASTPATH_FUNC
 #undef T