[dpdk-dev] [PATCH v2 24/51] net/mlx4: revert fast Verbs interface for Tx

Adrien Mazarguil adrien.mazarguil at 6wind.com
Fri Sep 1 10:06:39 CEST 2017


This reverts commit 9980f81dc2623291b89cf1c281a6a9f116fd2394.

"Fast Verbs" is a nonstandard experimental interface that must be reverted
for compatibility reasons. Its replacement is slower but temporary,
performance will be restored by a subsequent commit through an enhanced
data path implementation. This one focuses on maintaining basic
functionality in the meantime.

Signed-off-by: Moti Haimovsky <motih at mellanox.com>
Signed-off-by: Adrien Mazarguil <adrien.mazarguil at 6wind.com>
---
 drivers/net/mlx4/mlx4.c | 120 ++++++++++++++++++-------------------------
 drivers/net/mlx4/mlx4.h |   4 +-
 2 files changed, 52 insertions(+), 72 deletions(-)

diff --git a/drivers/net/mlx4/mlx4.c b/drivers/net/mlx4/mlx4.c
index 5abfb37..4432952 100644
--- a/drivers/net/mlx4/mlx4.c
+++ b/drivers/net/mlx4/mlx4.c
@@ -666,33 +666,10 @@ txq_free_elts(struct txq *txq)
 static void
 txq_cleanup(struct txq *txq)
 {
-	struct ibv_exp_release_intf_params params;
 	size_t i;
 
 	DEBUG("cleaning up %p", (void *)txq);
 	txq_free_elts(txq);
-	if (txq->if_qp != NULL) {
-		assert(txq->priv != NULL);
-		assert(txq->priv->ctx != NULL);
-		assert(txq->qp != NULL);
-		params = (struct ibv_exp_release_intf_params){
-			.comp_mask = 0,
-		};
-		claim_zero(ibv_exp_release_intf(txq->priv->ctx,
-						txq->if_qp,
-						&params));
-	}
-	if (txq->if_cq != NULL) {
-		assert(txq->priv != NULL);
-		assert(txq->priv->ctx != NULL);
-		assert(txq->cq != NULL);
-		params = (struct ibv_exp_release_intf_params){
-			.comp_mask = 0,
-		};
-		claim_zero(ibv_exp_release_intf(txq->priv->ctx,
-						txq->if_cq,
-						&params));
-	}
 	if (txq->qp != NULL)
 		claim_zero(ibv_destroy_qp(txq->qp));
 	if (txq->cq != NULL)
@@ -726,11 +703,12 @@ txq_complete(struct txq *txq)
 	unsigned int elts_comp = txq->elts_comp;
 	unsigned int elts_tail = txq->elts_tail;
 	const unsigned int elts_n = txq->elts_n;
+	struct ibv_wc wcs[elts_comp];
 	int wcs_n;
 
 	if (unlikely(elts_comp == 0))
 		return 0;
-	wcs_n = txq->if_cq->poll_cnt(txq->cq, elts_comp);
+	wcs_n = ibv_poll_cq(txq->cq, elts_comp, wcs);
 	if (unlikely(wcs_n == 0))
 		return 0;
 	if (unlikely(wcs_n < 0)) {
@@ -1014,6 +992,9 @@ static uint16_t
 mlx4_tx_burst(void *dpdk_txq, struct rte_mbuf **pkts, uint16_t pkts_n)
 {
 	struct txq *txq = (struct txq *)dpdk_txq;
+	struct ibv_send_wr *wr_head = NULL;
+	struct ibv_send_wr **wr_next = &wr_head;
+	struct ibv_send_wr *wr_bad = NULL;
 	unsigned int elts_head = txq->elts_head;
 	const unsigned int elts_n = txq->elts_n;
 	unsigned int elts_comp_cd = txq->elts_comp_cd;
@@ -1041,6 +1022,7 @@ mlx4_tx_burst(void *dpdk_txq, struct rte_mbuf **pkts, uint16_t pkts_n)
 			(((elts_head + 1) == elts_n) ? 0 : elts_head + 1);
 		struct txq_elt *elt_next = &(*txq->elts)[elts_head_next];
 		struct txq_elt *elt = &(*txq->elts)[elts_head];
+		struct ibv_send_wr *wr = &elt->wr;
 		unsigned int segs = NB_SEGS(buf);
 		unsigned int sent_size = 0;
 		uint32_t send_flags = 0;
@@ -1065,9 +1047,10 @@ mlx4_tx_burst(void *dpdk_txq, struct rte_mbuf **pkts, uint16_t pkts_n)
 		if (unlikely(--elts_comp_cd == 0)) {
 			elts_comp_cd = txq->elts_comp_cd_init;
 			++elts_comp;
-			send_flags |= IBV_EXP_QP_BURST_SIGNALED;
+			send_flags |= IBV_SEND_SIGNALED;
 		}
 		if (likely(segs == 1)) {
+			struct ibv_sge *sge = &elt->sge;
 			uintptr_t addr;
 			uint32_t length;
 			uint32_t lkey;
@@ -1091,30 +1074,26 @@ mlx4_tx_burst(void *dpdk_txq, struct rte_mbuf **pkts, uint16_t pkts_n)
 				rte_prefetch0((volatile void *)
 					      (uintptr_t)addr);
 			RTE_MBUF_PREFETCH_TO_FREE(elt_next->buf);
-			/* Put packet into send queue. */
-			if (length <= txq->max_inline)
-				err = txq->if_qp->send_pending_inline
-					(txq->qp,
-					 (void *)addr,
-					 length,
-					 send_flags);
-			else
-				err = txq->if_qp->send_pending
-					(txq->qp,
-					 addr,
-					 length,
-					 lkey,
-					 send_flags);
-			if (unlikely(err))
-				goto stop;
+			sge->addr = addr;
+			sge->length = length;
+			sge->lkey = lkey;
 			sent_size += length;
 		} else {
 			err = -1;
 			goto stop;
 		}
+		if (sent_size <= txq->max_inline)
+			send_flags |= IBV_SEND_INLINE;
 		elts_head = elts_head_next;
 		/* Increment sent bytes counter. */
 		txq->stats.obytes += sent_size;
+		/* Set up WR. */
+		wr->sg_list = &elt->sge;
+		wr->num_sge = segs;
+		wr->opcode = IBV_WR_SEND;
+		wr->send_flags = send_flags;
+		*wr_next = wr;
+		wr_next = &wr->next;
 	}
 stop:
 	/* Take a shortcut if nothing must be sent. */
@@ -1123,12 +1102,37 @@ mlx4_tx_burst(void *dpdk_txq, struct rte_mbuf **pkts, uint16_t pkts_n)
 	/* Increment sent packets counter. */
 	txq->stats.opackets += i;
 	/* Ring QP doorbell. */
-	err = txq->if_qp->send_flush(txq->qp);
+	*wr_next = NULL;
+	assert(wr_head);
+	err = ibv_post_send(txq->qp, wr_head, &wr_bad);
 	if (unlikely(err)) {
-		/* A nonzero value is not supposed to be returned.
-		 * Nothing can be done about it. */
-		DEBUG("%p: send_flush() failed with error %d",
-		      (void *)txq, err);
+		uint64_t obytes = 0;
+		uint64_t opackets = 0;
+
+		/* Rewind bad WRs. */
+		while (wr_bad != NULL) {
+			int j;
+
+			/* Force completion request if one was lost. */
+			if (wr_bad->send_flags & IBV_SEND_SIGNALED) {
+				elts_comp_cd = 1;
+				--elts_comp;
+			}
+			++opackets;
+			for (j = 0; j < wr_bad->num_sge; ++j)
+				obytes += wr_bad->sg_list[j].length;
+			elts_head = (elts_head ? elts_head : elts_n) - 1;
+			wr_bad = wr_bad->next;
+		}
+		txq->stats.opackets -= opackets;
+		txq->stats.obytes -= obytes;
+		i -= opackets;
+		DEBUG("%p: ibv_post_send() failed, %" PRIu64 " packets"
+		      " (%" PRIu64 " bytes) rejected: %s",
+		      (void *)txq,
+		      opackets,
+		      obytes,
+		      (err <= -1) ? "Internal error" : strerror(err));
 	}
 	txq->elts_head = elts_head;
 	txq->elts_comp += elts_comp;
@@ -1163,11 +1167,9 @@ txq_setup(struct rte_eth_dev *dev, struct txq *txq, uint16_t desc,
 		.socket = socket
 	};
 	union {
-		struct ibv_exp_query_intf_params params;
 		struct ibv_qp_init_attr init;
 		struct ibv_qp_attr mod;
 	} attr;
-	enum ibv_exp_query_intf_status status;
 	int ret = 0;
 
 	(void)conf; /* Thresholds configuration (ignored). */
@@ -1251,28 +1253,6 @@ txq_setup(struct rte_eth_dev *dev, struct txq *txq, uint16_t desc,
 		      (void *)dev, strerror(ret));
 		goto error;
 	}
-	attr.params = (struct ibv_exp_query_intf_params){
-		.intf_scope = IBV_EXP_INTF_GLOBAL,
-		.intf = IBV_EXP_INTF_CQ,
-		.obj = tmpl.cq,
-	};
-	tmpl.if_cq = ibv_exp_query_intf(priv->ctx, &attr.params, &status);
-	if (tmpl.if_cq == NULL) {
-		ERROR("%p: CQ interface family query failed with status %d",
-		      (void *)dev, status);
-		goto error;
-	}
-	attr.params = (struct ibv_exp_query_intf_params){
-		.intf_scope = IBV_EXP_INTF_GLOBAL,
-		.intf = IBV_EXP_INTF_QP_BURST,
-		.obj = tmpl.qp,
-	};
-	tmpl.if_qp = ibv_exp_query_intf(priv->ctx, &attr.params, &status);
-	if (tmpl.if_qp == NULL) {
-		ERROR("%p: QP interface family query failed with status %d",
-		      (void *)dev, status);
-		goto error;
-	}
 	/* Clean up txq in case we're reinitializing it. */
 	DEBUG("%p: cleaning-up old txq just in case", (void *)txq);
 	txq_cleanup(txq);
diff --git a/drivers/net/mlx4/mlx4.h b/drivers/net/mlx4/mlx4.h
index edec40c..8a9a678 100644
--- a/drivers/net/mlx4/mlx4.h
+++ b/drivers/net/mlx4/mlx4.h
@@ -171,6 +171,8 @@ struct rxq {
 
 /* TX element. */
 struct txq_elt {
+	struct ibv_send_wr wr; /* Work request. */
+	struct ibv_sge sge; /* Scatter/gather element. */
 	struct rte_mbuf *buf;
 };
 
@@ -191,8 +193,6 @@ struct txq {
 	} mp2mr[MLX4_PMD_TX_MP_CACHE]; /* MP to MR translation table. */
 	struct ibv_cq *cq; /* Completion Queue. */
 	struct ibv_qp *qp; /* Queue Pair. */
-	struct ibv_exp_qp_burst_family *if_qp; /* QP burst interface. */
-	struct ibv_exp_cq_family *if_cq; /* CQ interface. */
 	uint32_t max_inline; /* Max inline send size <= MLX4_PMD_MAX_INLINE. */
 	unsigned int elts_n; /* (*elts)[] length. */
 	struct txq_elt (*elts)[]; /* TX elements. */
-- 
2.1.4



More information about the dev mailing list