[dpdk-dev] [PATCH v2 15/23] mlx4: use MOFED 3.0 fast verbs interface for TX operations
Adrien Mazarguil
adrien.mazarguil at 6wind.com
Tue Jun 30 11:28:01 CEST 2015
The "raw" post send interface was experimental and has been deprecated. This
commit replaces it with a new low level interface that dissociates post and
flush (doorbell) operations for improved QP performance.
The CQ polling function is updated as well.
Signed-off-by: Alex Rosenbaum <Alexr at mellanox.com>
Signed-off-by: Adrien Mazarguil <adrien.mazarguil at 6wind.com>
---
drivers/net/mlx4/Makefile | 4 --
drivers/net/mlx4/mlx4.c | 167 +++++++++++++++++++++++-----------------------
2 files changed, 85 insertions(+), 86 deletions(-)
diff --git a/drivers/net/mlx4/Makefile b/drivers/net/mlx4/Makefile
index ce1f2b0..fd74dc8 100644
--- a/drivers/net/mlx4/Makefile
+++ b/drivers/net/mlx4/Makefile
@@ -109,10 +109,6 @@ mlx4_autoconf.h: $(RTE_SDK)/scripts/auto-config-h.sh
infiniband/verbs.h \
enum IBV_EXP_DEVICE_ATTR_INLINE_RECV_SZ $(AUTOCONF_OUTPUT)
$Q sh -- '$<' '$@' \
- SEND_RAW_WR_SUPPORT \
- infiniband/verbs.h \
- type 'struct ibv_send_wr_raw' $(AUTOCONF_OUTPUT)
- $Q sh -- '$<' '$@' \
HAVE_EXP_QUERY_DEVICE \
infiniband/verbs.h \
type 'struct ibv_exp_device_attr' $(AUTOCONF_OUTPUT)
diff --git a/drivers/net/mlx4/mlx4.c b/drivers/net/mlx4/mlx4.c
index f76f415..3dff64d 100644
--- a/drivers/net/mlx4/mlx4.c
+++ b/drivers/net/mlx4/mlx4.c
@@ -139,15 +139,6 @@ static inline void wr_id_t_check(void)
(void)wr_id_t_check;
}
-/* If raw send operations are available, use them since they are faster. */
-#ifdef SEND_RAW_WR_SUPPORT
-typedef struct ibv_send_wr_raw mlx4_send_wr_t;
-#define mlx4_post_send ibv_post_send_raw
-#else
-typedef struct ibv_send_wr mlx4_send_wr_t;
-#define mlx4_post_send ibv_post_send
-#endif
-
struct mlx4_rxq_stats {
unsigned int idx; /**< Mapping index. */
#ifdef MLX4_PMD_SOFT_COUNTERS
@@ -212,7 +203,7 @@ struct rxq {
/* TX element. */
struct txq_elt {
- mlx4_send_wr_t wr; /* Work Request. */
+ struct ibv_send_wr wr; /* Work Request. */
struct ibv_sge sges[MLX4_PMD_SGE_WR_N]; /* Scatter/Gather Elements. */
/* mbuf pointer is derived from WR_ID(wr.wr_id).offset. */
};
@@ -235,6 +226,8 @@ struct txq {
} mp2mr[MLX4_PMD_TX_MP_CACHE]; /* MP to MR translation table. */
struct ibv_cq *cq; /* Completion Queue. */
struct ibv_qp *qp; /* Queue Pair. */
+ struct ibv_exp_qp_burst_family *if_qp; /* QP burst interface. */
+ struct ibv_exp_cq_family *if_cq; /* CQ interface. */
#if MLX4_PMD_MAX_INLINE > 0
uint32_t max_inline; /* Max inline send size <= MLX4_PMD_MAX_INLINE. */
#endif
@@ -797,7 +790,7 @@ txq_alloc_elts(struct txq *txq, unsigned int elts_n)
}
for (i = 0; (i != elts_n); ++i) {
struct txq_elt *elt = &(*elts)[i];
- mlx4_send_wr_t *wr = &elt->wr;
+ struct ibv_send_wr *wr = &elt->wr;
/* Configure WR. */
WR_ID(wr->wr_id).id = i;
@@ -883,10 +876,33 @@ txq_free_elts(struct txq *txq)
static void
txq_cleanup(struct txq *txq)
{
+ struct ibv_exp_release_intf_params params;
size_t i;
DEBUG("cleaning up %p", (void *)txq);
txq_free_elts(txq);
+ if (txq->if_qp != NULL) {
+ assert(txq->priv != NULL);
+ assert(txq->priv->ctx != NULL);
+ assert(txq->qp != NULL);
+ params = (struct ibv_exp_release_intf_params){
+ .comp_mask = 0,
+ };
+ claim_zero(ibv_exp_release_intf(txq->priv->ctx,
+ txq->if_qp,
+ ¶ms));
+ }
+ if (txq->if_cq != NULL) {
+ assert(txq->priv != NULL);
+ assert(txq->priv->ctx != NULL);
+ assert(txq->cq != NULL);
+ params = (struct ibv_exp_release_intf_params){
+ .comp_mask = 0,
+ };
+ claim_zero(ibv_exp_release_intf(txq->priv->ctx,
+ txq->if_cq,
+ ¶ms));
+ }
if (txq->qp != NULL)
claim_zero(ibv_destroy_qp(txq->qp));
if (txq->cq != NULL)
@@ -920,7 +936,6 @@ txq_complete(struct txq *txq)
unsigned int elts_comp = txq->elts_comp;
unsigned int elts_tail = txq->elts_tail;
const unsigned int elts_n = txq->elts_n;
- struct ibv_wc wcs[elts_comp];
int wcs_n;
if (unlikely(elts_comp == 0))
@@ -929,7 +944,7 @@ txq_complete(struct txq *txq)
DEBUG("%p: processing %u work requests completions",
(void *)txq, elts_comp);
#endif
- wcs_n = ibv_poll_cq(txq->cq, elts_comp, wcs);
+ wcs_n = txq->if_cq->poll_cnt(txq->cq, elts_comp);
if (unlikely(wcs_n == 0))
return 0;
if (unlikely(wcs_n < 0)) {
@@ -1059,9 +1074,8 @@ static uint16_t
mlx4_tx_burst(void *dpdk_txq, struct rte_mbuf **pkts, uint16_t pkts_n)
{
struct txq *txq = (struct txq *)dpdk_txq;
- mlx4_send_wr_t head;
- mlx4_send_wr_t **wr_next = &head.next;
- mlx4_send_wr_t *bad_wr;
+ struct ibv_send_wr head;
+ struct ibv_send_wr **wr_next = &head.next;
unsigned int elts_head = txq->elts_head;
const unsigned int elts_tail = txq->elts_tail;
const unsigned int elts_n = txq->elts_n;
@@ -1087,13 +1101,14 @@ mlx4_tx_burst(void *dpdk_txq, struct rte_mbuf **pkts, uint16_t pkts_n)
for (i = 0; (i != max); ++i) {
struct rte_mbuf *buf = pkts[i];
struct txq_elt *elt = &(*txq->elts)[elts_head];
- mlx4_send_wr_t *wr = &elt->wr;
+ struct ibv_send_wr *wr = &elt->wr;
unsigned int segs = NB_SEGS(buf);
-#if (MLX4_PMD_MAX_INLINE > 0) || defined(MLX4_PMD_SOFT_COUNTERS)
+#ifdef MLX4_PMD_SOFT_COUNTERS
unsigned int sent_size = 0;
#endif
unsigned int j;
int linearize = 0;
+ uint32_t send_flags = 0;
/* Clean up old buffer. */
if (likely(WR_ID(wr->wr_id).offset != 0)) {
@@ -1179,7 +1194,7 @@ mlx4_tx_burst(void *dpdk_txq, struct rte_mbuf **pkts, uint16_t pkts_n)
(uintptr_t)sge->addr);
sge->length = DATA_LEN(buf);
sge->lkey = lkey;
-#if (MLX4_PMD_MAX_INLINE > 0) || defined(MLX4_PMD_SOFT_COUNTERS)
+#ifdef MLX4_PMD_SOFT_COUNTERS
sent_size += sge->length;
#endif
buf = NEXT(buf);
@@ -1236,24 +1251,19 @@ mlx4_tx_burst(void *dpdk_txq, struct rte_mbuf **pkts, uint16_t pkts_n)
sge->addr = (uintptr_t)&(*linear)[0];
sge->length = size;
sge->lkey = txq->mr_linear->lkey;
-#if (MLX4_PMD_MAX_INLINE > 0) || defined(MLX4_PMD_SOFT_COUNTERS)
+#ifdef MLX4_PMD_SOFT_COUNTERS
sent_size += size;
#endif
}
/* Link WRs together for ibv_post_send(). */
*wr_next = wr;
wr_next = &wr->next;
-#if MLX4_PMD_MAX_INLINE > 0
- if (sent_size <= txq->max_inline)
- wr->send_flags = IBV_SEND_INLINE;
- else
-#endif
- wr->send_flags = 0;
+ assert(wr->send_flags == 0);
/* Request TX completion. */
if (unlikely(--elts_comp_cd == 0)) {
elts_comp_cd = txq->elts_comp_cd_init;
++elts_comp;
- wr->send_flags |= IBV_SEND_SIGNALED;
+ send_flags |= IBV_EXP_QP_BURST_SIGNALED;
}
if (++elts_head >= elts_n)
elts_head = 0;
@@ -1261,6 +1271,24 @@ mlx4_tx_burst(void *dpdk_txq, struct rte_mbuf **pkts, uint16_t pkts_n)
/* Increment sent bytes counter. */
txq->stats.obytes += sent_size;
#endif
+ /* Put SG list into send queue and ask for completion event. */
+#if MLX4_PMD_MAX_INLINE > 0
+ if ((segs == 1) &&
+ (elt->sges[0].length <= txq->max_inline))
+ err = txq->if_qp->send_pending_inline
+ (txq->qp,
+ (void *)(uintptr_t)elt->sges[0].addr,
+ elt->sges[0].length,
+ send_flags);
+ else
+#endif
+ err = txq->if_qp->send_pending_sg_list
+ (txq->qp,
+ elt->sges,
+ segs,
+ send_flags);
+ if (unlikely(err))
+ goto stop;
}
stop:
/* Take a shortcut if nothing must be sent. */
@@ -1271,62 +1299,13 @@ stop:
txq->stats.opackets += i;
#endif
*wr_next = NULL;
- err = mlx4_post_send(txq->qp, head.next, &bad_wr);
+ /* Ring QP doorbell. */
+ err = txq->if_qp->send_flush(txq->qp);
if (unlikely(err)) {
- unsigned int unsent = 0;
-
- /* An error occurred, fix counters. */
- while (bad_wr != NULL) {
- struct txq_elt *elt =
- containerof(bad_wr, struct txq_elt, wr);
- mlx4_send_wr_t *wr = &elt->wr;
- mlx4_send_wr_t *next = wr->next;
-#if defined(MLX4_PMD_SOFT_COUNTERS) || !defined(NDEBUG)
- unsigned int j;
-#endif
-
- assert(wr == bad_wr);
- /* Clean up TX element without freeing it, caller
- * should take care of this. */
- WR_ID(elt->wr.wr_id).offset = 0;
-#ifdef MLX4_PMD_SOFT_COUNTERS
- for (j = 0; ((int)j < wr->num_sge); ++j)
- txq->stats.obytes -= wr->sg_list[j].length;
-#endif
- ++unsent;
- if (wr->send_flags & IBV_SEND_SIGNALED) {
- assert(elts_comp != 0);
- --elts_comp;
- }
- if (elts_comp_cd == txq->elts_comp_cd_init)
- elts_comp_cd = 1;
- else
- ++elts_comp_cd;
-#ifndef NDEBUG
- /* For assert(). */
- for (j = 0; ((int)j < wr->num_sge); ++j) {
- elt->sges[j].addr = 0;
- elt->sges[j].length = 0;
- elt->sges[j].lkey = 0;
- }
- wr->next = NULL;
- wr->num_sge = 0;
-#endif
- bad_wr = next;
- }
-#ifdef MLX4_PMD_SOFT_COUNTERS
- txq->stats.opackets -= unsent;
-#endif
- assert(i >= unsent);
- i -= unsent;
- /* "Unsend" remaining packets. */
- elts_head -= unsent;
- if (elts_head >= elts_n)
- elts_head += elts_n;
- assert(elts_head < elts_n);
- DEBUG("%p: mlx4_post_send() failed, %u unprocessed WRs: %s",
- (void *)txq, unsent,
- ((err <= -1) ? "Internal error" : strerror(err)));
+ /* A nonzero value is not supposed to be returned.
+ * Nothing can be done about it. */
+ DEBUG("%p: send_flush() failed with error %d",
+ (void *)txq, err);
}
txq->elts_head = elts_head;
txq->elts_comp += elts_comp;
@@ -1361,9 +1340,11 @@ txq_setup(struct rte_eth_dev *dev, struct txq *txq, uint16_t desc,
.socket = socket
};
union {
+ struct ibv_exp_query_intf_params params;
struct ibv_qp_init_attr init;
struct ibv_exp_qp_attr mod;
} attr;
+ enum ibv_exp_query_intf_status status;
int ret = 0;
(void)conf; /* Thresholds configuration (ignored). */
@@ -1455,6 +1436,28 @@ txq_setup(struct rte_eth_dev *dev, struct txq *txq, uint16_t desc,
(void *)dev, strerror(ret));
goto error;
}
+ attr.params = (struct ibv_exp_query_intf_params){
+ .intf_scope = IBV_EXP_INTF_GLOBAL,
+ .intf = IBV_EXP_INTF_CQ,
+ .obj = tmpl.cq,
+ };
+ tmpl.if_cq = ibv_exp_query_intf(priv->ctx, &attr.params, &status);
+ if (tmpl.if_cq == NULL) {
+ ERROR("%p: CQ interface family query failed with status %d",
+ (void *)dev, status);
+ goto error;
+ }
+ attr.params = (struct ibv_exp_query_intf_params){
+ .intf_scope = IBV_EXP_INTF_GLOBAL,
+ .intf = IBV_EXP_INTF_QP_BURST,
+ .obj = tmpl.qp,
+ };
+ tmpl.if_qp = ibv_exp_query_intf(priv->ctx, &attr.params, &status);
+ if (tmpl.if_qp == NULL) {
+ ERROR("%p: QP interface family query failed with status %d",
+ (void *)dev, status);
+ goto error;
+ }
/* Clean up txq in case we're reinitializing it. */
DEBUG("%p: cleaning-up old txq just in case", (void *)txq);
txq_cleanup(txq);
--
2.1.0
More information about the dev
mailing list