[dpdk-dev] [PATCH v2 02/16] mlx5: get rid of the WR structure in RX queue elements

Adrien Mazarguil adrien.mazarguil at 6wind.com
Fri Oct 30 19:55:05 CET 2015


Removing this structure reduces the size of SG and non-SG RX queue elements
significantly to improve performance.

An nice side effect is that the mbuf pointer is now fully stored in
struct rxq_elt instead of relying on the WR ID data offset hack.

Signed-off-by: Adrien Mazarguil <adrien.mazarguil at 6wind.com>
Signed-off-by: Olga Shern <olgas at mellanox.com>
Signed-off-by: Or Ami <ora at mellanox.com>
Signed-off-by: Nelio Laranjeiro <nelio.laranjeiro at 6wind.com>
---
 drivers/net/mlx5/mlx5.h       |  18 -----
 drivers/net/mlx5/mlx5_rxq.c   | 173 ++++++++++++++++++++++--------------------
 drivers/net/mlx5/mlx5_rxtx.c  |  33 ++------
 drivers/net/mlx5/mlx5_rxtx.h  |   4 +-
 drivers/net/mlx5/mlx5_utils.h |   2 -
 5 files changed, 98 insertions(+), 132 deletions(-)

diff --git a/drivers/net/mlx5/mlx5.h b/drivers/net/mlx5/mlx5.h
index 3a1e7a6..c8a517c 100644
--- a/drivers/net/mlx5/mlx5.h
+++ b/drivers/net/mlx5/mlx5.h
@@ -115,24 +115,6 @@ struct priv {
 	rte_spinlock_t lock; /* Lock for control functions. */
 };
 
-/* Work Request ID data type (64 bit). */
-typedef union {
-	struct {
-		uint32_t id;
-		uint16_t offset;
-	} data;
-	uint64_t raw;
-} wr_id_t;
-
-/* Compile-time check. */
-static inline void wr_id_t_check(void)
-{
-	wr_id_t check[1 + (2 * -!(sizeof(wr_id_t) == sizeof(uint64_t)))];
-
-	(void)check;
-	(void)wr_id_t_check;
-}
-
 /**
  * Lock private structure to protect it from concurrent access in the
  * control path.
diff --git a/drivers/net/mlx5/mlx5_rxq.c b/drivers/net/mlx5/mlx5_rxq.c
index 5a55886..f2f773e 100644
--- a/drivers/net/mlx5/mlx5_rxq.c
+++ b/drivers/net/mlx5/mlx5_rxq.c
@@ -60,6 +60,7 @@
 #endif
 
 #include "mlx5.h"
+#include "mlx5_autoconf.h"
 #include "mlx5_rxtx.h"
 #include "mlx5_utils.h"
 #include "mlx5_defs.h"
@@ -97,16 +98,10 @@ rxq_alloc_elts_sp(struct rxq *rxq, unsigned int elts_n,
 	for (i = 0; (i != elts_n); ++i) {
 		unsigned int j;
 		struct rxq_elt_sp *elt = &(*elts)[i];
-		struct ibv_recv_wr *wr = &elt->wr;
 		struct ibv_sge (*sges)[RTE_DIM(elt->sges)] = &elt->sges;
 
 		/* These two arrays must have the same size. */
 		assert(RTE_DIM(elt->sges) == RTE_DIM(elt->bufs));
-		/* Configure WR. */
-		wr->wr_id = i;
-		wr->next = &(*elts)[(i + 1)].wr;
-		wr->sg_list = &(*sges)[0];
-		wr->num_sge = RTE_DIM(*sges);
 		/* For each SGE (segment). */
 		for (j = 0; (j != RTE_DIM(elt->bufs)); ++j) {
 			struct ibv_sge *sge = &(*sges)[j];
@@ -149,8 +144,6 @@ rxq_alloc_elts_sp(struct rxq *rxq, unsigned int elts_n,
 			assert(sge->length == rte_pktmbuf_tailroom(buf));
 		}
 	}
-	/* The last WR pointer must be NULL. */
-	(*elts)[(i - 1)].wr.next = NULL;
 	DEBUG("%p: allocated and configured %u WRs (%zu segments)",
 	      (void *)rxq, elts_n, (elts_n * RTE_DIM((*elts)[0].sges)));
 	rxq->elts_n = elts_n;
@@ -242,7 +235,6 @@ rxq_alloc_elts(struct rxq *rxq, unsigned int elts_n, struct rte_mbuf **pool)
 	/* For each WR (packet). */
 	for (i = 0; (i != elts_n); ++i) {
 		struct rxq_elt *elt = &(*elts)[i];
-		struct ibv_recv_wr *wr = &elt->wr;
 		struct ibv_sge *sge = &(*elts)[i].sge;
 		struct rte_mbuf *buf;
 
@@ -258,16 +250,7 @@ rxq_alloc_elts(struct rxq *rxq, unsigned int elts_n, struct rte_mbuf **pool)
 			ret = ENOMEM;
 			goto error;
 		}
-		/* Configure WR. Work request ID contains its own index in
-		 * the elts array and the offset between SGE buffer header and
-		 * its data. */
-		WR_ID(wr->wr_id).id = i;
-		WR_ID(wr->wr_id).offset =
-			(((uintptr_t)buf->buf_addr + RTE_PKTMBUF_HEADROOM) -
-			 (uintptr_t)buf);
-		wr->next = &(*elts)[(i + 1)].wr;
-		wr->sg_list = sge;
-		wr->num_sge = 1;
+		elt->buf = buf;
 		/* Headroom is reserved by rte_pktmbuf_alloc(). */
 		assert(DATA_OFF(buf) == RTE_PKTMBUF_HEADROOM);
 		/* Buffer is supposed to be empty. */
@@ -282,21 +265,7 @@ rxq_alloc_elts(struct rxq *rxq, unsigned int elts_n, struct rte_mbuf **pool)
 		sge->lkey = rxq->mr->lkey;
 		/* Redundant check for tailroom. */
 		assert(sge->length == rte_pktmbuf_tailroom(buf));
-		/* Make sure elts index and SGE mbuf pointer can be deduced
-		 * from WR ID. */
-		if ((WR_ID(wr->wr_id).id != i) ||
-		    ((void *)((uintptr_t)sge->addr -
-			WR_ID(wr->wr_id).offset) != buf)) {
-			ERROR("%p: cannot store index and offset in WR ID",
-			      (void *)rxq);
-			sge->addr = 0;
-			rte_pktmbuf_free(buf);
-			ret = EOVERFLOW;
-			goto error;
-		}
 	}
-	/* The last WR pointer must be NULL. */
-	(*elts)[(i - 1)].wr.next = NULL;
 	DEBUG("%p: allocated and configured %u single-segment WRs",
 	      (void *)rxq, elts_n);
 	rxq->elts_n = elts_n;
@@ -309,14 +278,10 @@ error:
 		assert(pool == NULL);
 		for (i = 0; (i != RTE_DIM(*elts)); ++i) {
 			struct rxq_elt *elt = &(*elts)[i];
-			struct rte_mbuf *buf;
+			struct rte_mbuf *buf = elt->buf;
 
-			if (elt->sge.addr == 0)
-				continue;
-			assert(WR_ID(elt->wr.wr_id).id == i);
-			buf = (void *)((uintptr_t)elt->sge.addr -
-				WR_ID(elt->wr.wr_id).offset);
-			rte_pktmbuf_free_seg(buf);
+			if (buf != NULL)
+				rte_pktmbuf_free_seg(buf);
 		}
 		rte_free(elts);
 	}
@@ -345,14 +310,10 @@ rxq_free_elts(struct rxq *rxq)
 		return;
 	for (i = 0; (i != RTE_DIM(*elts)); ++i) {
 		struct rxq_elt *elt = &(*elts)[i];
-		struct rte_mbuf *buf;
+		struct rte_mbuf *buf = elt->buf;
 
-		if (elt->sge.addr == 0)
-			continue;
-		assert(WR_ID(elt->wr.wr_id).id == i);
-		buf = (void *)((uintptr_t)elt->sge.addr -
-			WR_ID(elt->wr.wr_id).offset);
-		rte_pktmbuf_free_seg(buf);
+		if (buf != NULL)
+			rte_pktmbuf_free_seg(buf);
 	}
 	rte_free(elts);
 }
@@ -552,7 +513,6 @@ rxq_rehash(struct rte_eth_dev *dev, struct rxq *rxq)
 	struct rte_mbuf **pool;
 	unsigned int i, k;
 	struct ibv_exp_qp_attr mod;
-	struct ibv_recv_wr *bad_wr;
 	int err;
 	int parent = (rxq == &priv->rxq_parent);
 
@@ -670,11 +630,8 @@ rxq_rehash(struct rte_eth_dev *dev, struct rxq *rxq)
 
 		for (i = 0; (i != RTE_DIM(*elts)); ++i) {
 			struct rxq_elt *elt = &(*elts)[i];
-			struct rte_mbuf *buf = (void *)
-				((uintptr_t)elt->sge.addr -
-				 WR_ID(elt->wr.wr_id).offset);
+			struct rte_mbuf *buf = elt->buf;
 
-			assert(WR_ID(elt->wr.wr_id).id == i);
 			pool[k++] = buf;
 		}
 	}
@@ -698,17 +655,41 @@ rxq_rehash(struct rte_eth_dev *dev, struct rxq *rxq)
 	rxq->elts_n = 0;
 	rte_free(rxq->elts.sp);
 	rxq->elts.sp = NULL;
-	/* Post WRs. */
-	err = ibv_post_recv(tmpl.qp,
-			    (tmpl.sp ?
-			     &(*tmpl.elts.sp)[0].wr :
-			     &(*tmpl.elts.no_sp)[0].wr),
-			    &bad_wr);
+	/* Post SGEs. */
+	assert(tmpl.if_qp != NULL);
+	if (tmpl.sp) {
+		struct rxq_elt_sp (*elts)[tmpl.elts_n] = tmpl.elts.sp;
+
+		for (i = 0; (i != RTE_DIM(*elts)); ++i) {
+#ifdef HAVE_EXP_QP_BURST_RECV_SG_LIST
+			err = tmpl.if_qp->recv_sg_list
+				(tmpl.qp,
+				 (*elts)[i].sges,
+				 RTE_DIM((*elts)[i].sges));
+#else /* HAVE_EXP_QP_BURST_RECV_SG_LIST */
+			errno = ENOSYS;
+			err = -1;
+#endif /* HAVE_EXP_QP_BURST_RECV_SG_LIST */
+			if (err)
+				break;
+		}
+	} else {
+		struct rxq_elt (*elts)[tmpl.elts_n] = tmpl.elts.no_sp;
+
+		for (i = 0; (i != RTE_DIM(*elts)); ++i) {
+			err = tmpl.if_qp->recv_burst(
+				tmpl.qp,
+				&(*elts)[i].sge,
+				1);
+			if (err)
+				break;
+		}
+	}
 	if (err) {
-		ERROR("%p: ibv_post_recv() failed for WR %p: %s",
-		      (void *)dev,
-		      (void *)bad_wr,
-		      strerror(err));
+		ERROR("%p: failed to post SGEs with error %d",
+		      (void *)dev, err);
+		/* Set err because it does not contain a valid errno value. */
+		err = EIO;
 		goto skip_rtr;
 	}
 	mod = (struct ibv_exp_qp_attr){
@@ -761,10 +742,10 @@ rxq_setup(struct rte_eth_dev *dev, struct rxq *rxq, uint16_t desc,
 		struct ibv_exp_res_domain_init_attr rd;
 	} attr;
 	enum ibv_exp_query_intf_status status;
-	struct ibv_recv_wr *bad_wr;
 	struct rte_mbuf *buf;
 	int ret = 0;
 	int parent = (rxq == &priv->rxq_parent);
+	unsigned int i;
 
 	(void)conf; /* Thresholds configuration (ignored). */
 	/*
@@ -900,28 +881,7 @@ skip_mr:
 		      (void *)dev, strerror(ret));
 		goto error;
 	}
-	ret = ibv_post_recv(tmpl.qp,
-			    (tmpl.sp ?
-			     &(*tmpl.elts.sp)[0].wr :
-			     &(*tmpl.elts.no_sp)[0].wr),
-			    &bad_wr);
-	if (ret) {
-		ERROR("%p: ibv_post_recv() failed for WR %p: %s",
-		      (void *)dev,
-		      (void *)bad_wr,
-		      strerror(ret));
-		goto error;
-	}
 skip_alloc:
-	mod = (struct ibv_exp_qp_attr){
-		.qp_state = IBV_QPS_RTR
-	};
-	ret = ibv_exp_modify_qp(tmpl.qp, &mod, IBV_EXP_QP_STATE);
-	if (ret) {
-		ERROR("%p: QP state to IBV_QPS_RTR failed: %s",
-		      (void *)dev, strerror(ret));
-		goto error;
-	}
 	/* Save port ID. */
 	tmpl.port_id = dev->data->port_id;
 	DEBUG("%p: RTE port ID: %u", (void *)rxq, tmpl.port_id);
@@ -947,6 +907,51 @@ skip_alloc:
 		      (void *)dev, status);
 		goto error;
 	}
+	/* Post SGEs. */
+	if (!parent && tmpl.sp) {
+		struct rxq_elt_sp (*elts)[tmpl.elts_n] = tmpl.elts.sp;
+
+		for (i = 0; (i != RTE_DIM(*elts)); ++i) {
+#ifdef HAVE_EXP_QP_BURST_RECV_SG_LIST
+			ret = tmpl.if_qp->recv_sg_list
+				(tmpl.qp,
+				 (*elts)[i].sges,
+				 RTE_DIM((*elts)[i].sges));
+#else /* HAVE_EXP_QP_BURST_RECV_SG_LIST */
+			errno = ENOSYS;
+			ret = -1;
+#endif /* HAVE_EXP_QP_BURST_RECV_SG_LIST */
+			if (ret)
+				break;
+		}
+	} else if (!parent) {
+		struct rxq_elt (*elts)[tmpl.elts_n] = tmpl.elts.no_sp;
+
+		for (i = 0; (i != RTE_DIM(*elts)); ++i) {
+			ret = tmpl.if_qp->recv_burst(
+				tmpl.qp,
+				&(*elts)[i].sge,
+				1);
+			if (ret)
+				break;
+		}
+	}
+	if (ret) {
+		ERROR("%p: failed to post SGEs with error %d",
+		      (void *)dev, ret);
+		/* Set ret because it does not contain a valid errno value. */
+		ret = EIO;
+		goto error;
+	}
+	mod = (struct ibv_exp_qp_attr){
+		.qp_state = IBV_QPS_RTR
+	};
+	ret = ibv_exp_modify_qp(tmpl.qp, &mod, IBV_EXP_QP_STATE);
+	if (ret) {
+		ERROR("%p: QP state to IBV_QPS_RTR failed: %s",
+		      (void *)dev, strerror(ret));
+		goto error;
+	}
 	/* Clean up rxq in case we're reinitializing it. */
 	DEBUG("%p: cleaning-up old rxq just in case", (void *)rxq);
 	rxq_cleanup(rxq);
diff --git a/drivers/net/mlx5/mlx5_rxtx.c b/drivers/net/mlx5/mlx5_rxtx.c
index 8872f19..f48fec1 100644
--- a/drivers/net/mlx5/mlx5_rxtx.c
+++ b/drivers/net/mlx5/mlx5_rxtx.c
@@ -612,8 +612,6 @@ mlx5_rx_burst_sp(void *dpdk_rxq, struct rte_mbuf **pkts, uint16_t pkts_n)
 		return 0;
 	for (i = 0; (i != pkts_n); ++i) {
 		struct rxq_elt_sp *elt = &(*elts)[elts_head];
-		struct ibv_recv_wr *wr = &elt->wr;
-		uint64_t wr_id = wr->wr_id;
 		unsigned int len;
 		unsigned int pkt_buf_len;
 		struct rte_mbuf *pkt_buf = NULL; /* Buffer returned in pkts. */
@@ -623,12 +621,6 @@ mlx5_rx_burst_sp(void *dpdk_rxq, struct rte_mbuf **pkts, uint16_t pkts_n)
 		uint32_t flags;
 
 		/* Sanity checks. */
-#ifdef NDEBUG
-		(void)wr_id;
-#endif
-		assert(wr_id < rxq->elts_n);
-		assert(wr->sg_list == elt->sges);
-		assert(wr->num_sge == RTE_DIM(elt->sges));
 		assert(elts_head < rxq->elts_n);
 		assert(rxq->elts_head < rxq->elts_n);
 		ret = rxq->if_cq->poll_length_flags(rxq->cq, NULL, NULL,
@@ -677,6 +669,7 @@ mlx5_rx_burst_sp(void *dpdk_rxq, struct rte_mbuf **pkts, uint16_t pkts_n)
 			struct rte_mbuf *rep;
 			unsigned int seg_tailroom;
 
+			assert(seg != NULL);
 			/*
 			 * Fetch initial bytes of packet descriptor into a
 			 * cacheline while allocating rep.
@@ -688,9 +681,8 @@ mlx5_rx_burst_sp(void *dpdk_rxq, struct rte_mbuf **pkts, uint16_t pkts_n)
 				 * Unable to allocate a replacement mbuf,
 				 * repost WR.
 				 */
-				DEBUG("rxq=%p, wr_id=%" PRIu64 ":"
-				      " can't allocate a new mbuf",
-				      (void *)rxq, wr_id);
+				DEBUG("rxq=%p: can't allocate a new mbuf",
+				      (void *)rxq);
 				if (pkt_buf != NULL) {
 					*pkt_buf_next = NULL;
 					rte_pktmbuf_free(pkt_buf);
@@ -825,18 +817,13 @@ mlx5_rx_burst(void *dpdk_rxq, struct rte_mbuf **pkts, uint16_t pkts_n)
 		return mlx5_rx_burst_sp(dpdk_rxq, pkts, pkts_n);
 	for (i = 0; (i != pkts_n); ++i) {
 		struct rxq_elt *elt = &(*elts)[elts_head];
-		struct ibv_recv_wr *wr = &elt->wr;
-		uint64_t wr_id = wr->wr_id;
 		unsigned int len;
-		struct rte_mbuf *seg = (void *)((uintptr_t)elt->sge.addr -
-			WR_ID(wr_id).offset);
+		struct rte_mbuf *seg = elt->buf;
 		struct rte_mbuf *rep;
 		uint32_t flags;
 
 		/* Sanity checks. */
-		assert(WR_ID(wr_id).id < rxq->elts_n);
-		assert(wr->sg_list == &elt->sge);
-		assert(wr->num_sge == 1);
+		assert(seg != NULL);
 		assert(elts_head < rxq->elts_n);
 		assert(rxq->elts_head < rxq->elts_n);
 		/*
@@ -888,9 +875,8 @@ mlx5_rx_burst(void *dpdk_rxq, struct rte_mbuf **pkts, uint16_t pkts_n)
 			 * Unable to allocate a replacement mbuf,
 			 * repost WR.
 			 */
-			DEBUG("rxq=%p, wr_id=%" PRIu32 ":"
-			      " can't allocate a new mbuf",
-			      (void *)rxq, WR_ID(wr_id).id);
+			DEBUG("rxq=%p: can't allocate a new mbuf",
+			      (void *)rxq);
 			/* Increment out of memory counters. */
 			++rxq->stats.rx_nombuf;
 			++rxq->priv->dev->data->rx_mbuf_alloc_failed;
@@ -900,10 +886,7 @@ mlx5_rx_burst(void *dpdk_rxq, struct rte_mbuf **pkts, uint16_t pkts_n)
 		/* Reconfigure sge to use rep instead of seg. */
 		elt->sge.addr = (uintptr_t)rep->buf_addr + RTE_PKTMBUF_HEADROOM;
 		assert(elt->sge.lkey == rxq->mr->lkey);
-		WR_ID(wr->wr_id).offset =
-			(((uintptr_t)rep->buf_addr + RTE_PKTMBUF_HEADROOM) -
-			 (uintptr_t)rep);
-		assert(WR_ID(wr->wr_id).id == WR_ID(wr_id).id);
+		elt->buf = rep;
 
 		/* Add SGE to array for repost. */
 		sges[i] = elt->sge;
diff --git a/drivers/net/mlx5/mlx5_rxtx.h b/drivers/net/mlx5/mlx5_rxtx.h
index d86d623..90c99dc 100644
--- a/drivers/net/mlx5/mlx5_rxtx.h
+++ b/drivers/net/mlx5/mlx5_rxtx.h
@@ -81,16 +81,14 @@ struct mlx5_txq_stats {
 
 /* RX element (scattered packets). */
 struct rxq_elt_sp {
-	struct ibv_recv_wr wr; /* Work Request. */
 	struct ibv_sge sges[MLX5_PMD_SGE_WR_N]; /* Scatter/Gather Elements. */
 	struct rte_mbuf *bufs[MLX5_PMD_SGE_WR_N]; /* SGEs buffers. */
 };
 
 /* RX element. */
 struct rxq_elt {
-	struct ibv_recv_wr wr; /* Work Request. */
 	struct ibv_sge sge; /* Scatter/Gather Element. */
-	/* mbuf pointer is derived from WR_ID(wr.wr_id).offset. */
+	struct rte_mbuf *buf; /* SGE buffer. */
 };
 
 struct priv;
diff --git a/drivers/net/mlx5/mlx5_utils.h b/drivers/net/mlx5/mlx5_utils.h
index 8ff075b..f1fad18 100644
--- a/drivers/net/mlx5/mlx5_utils.h
+++ b/drivers/net/mlx5/mlx5_utils.h
@@ -161,6 +161,4 @@ pmd_drv_log_basename(const char *s)
 	\
 	snprintf(name, sizeof(name), __VA_ARGS__)
 
-#define WR_ID(o) (((wr_id_t *)&(o))->data)
-
 #endif /* RTE_PMD_MLX5_UTILS_H_ */
-- 
2.1.0



More information about the dev mailing list