[dpdk-dev] [PATCH v2 13/25] mlx5: refactor Rx data path

Nelio Laranjeiro nelio.laranjeiro at 6wind.com
Mon Jun 20 18:10:25 CEST 2016


Bypass Verbs to improve RX performance.

Signed-off-by: Nelio Laranjeiro <nelio.laranjeiro at 6wind.com>
Signed-off-by: Yaacov Hazan <yaacovh at mellanox.com>
Signed-off-by: Adrien Mazarguil <adrien.mazarguil at 6wind.com>
Signed-off-by: Vasily Philipov <vasilyf at mellanox.com>
---
 drivers/net/mlx5/mlx5_ethdev.c |   4 +-
 drivers/net/mlx5/mlx5_fdir.c   |   2 +-
 drivers/net/mlx5/mlx5_rxq.c    | 303 ++++++++++++++++++++---------------------
 drivers/net/mlx5/mlx5_rxtx.c   | 290 ++++++++++++++++++++-------------------
 drivers/net/mlx5/mlx5_rxtx.h   |  38 +++---
 drivers/net/mlx5/mlx5_vlan.c   |   3 +-
 6 files changed, 325 insertions(+), 315 deletions(-)

diff --git a/drivers/net/mlx5/mlx5_ethdev.c b/drivers/net/mlx5/mlx5_ethdev.c
index 771d8b5..8628321 100644
--- a/drivers/net/mlx5/mlx5_ethdev.c
+++ b/drivers/net/mlx5/mlx5_ethdev.c
@@ -1262,7 +1262,9 @@ mlx5_secondary_data_setup(struct priv *priv)
 	}
 	/* RX queues. */
 	for (i = 0; i != nb_rx_queues; ++i) {
-		struct rxq *primary_rxq = (*sd->primary_priv->rxqs)[i];
+		struct rxq_ctrl *primary_rxq =
+			container_of((*sd->primary_priv->rxqs)[i],
+				     struct rxq_ctrl, rxq);
 
 		if (primary_rxq == NULL)
 			continue;
diff --git a/drivers/net/mlx5/mlx5_fdir.c b/drivers/net/mlx5/mlx5_fdir.c
index 1850218..73eb00e 100644
--- a/drivers/net/mlx5/mlx5_fdir.c
+++ b/drivers/net/mlx5/mlx5_fdir.c
@@ -431,7 +431,7 @@ priv_get_fdir_queue(struct priv *priv, uint16_t idx)
 	ind_init_attr = (struct ibv_exp_rwq_ind_table_init_attr){
 		.pd = priv->pd,
 		.log_ind_tbl_size = 0,
-		.ind_tbl = &((*priv->rxqs)[idx]->wq),
+		.ind_tbl = &rxq_ctrl->wq,
 		.comp_mask = 0,
 	};
 
diff --git a/drivers/net/mlx5/mlx5_rxq.c b/drivers/net/mlx5/mlx5_rxq.c
index 7db4ce7..a8f68a3 100644
--- a/drivers/net/mlx5/mlx5_rxq.c
+++ b/drivers/net/mlx5/mlx5_rxq.c
@@ -43,6 +43,8 @@
 #pragma GCC diagnostic ignored "-pedantic"
 #endif
 #include <infiniband/verbs.h>
+#include <infiniband/arch.h>
+#include <infiniband/mlx5_hw.h>
 #ifdef PEDANTIC
 #pragma GCC diagnostic error "-pedantic"
 #endif
@@ -373,8 +375,13 @@ priv_create_hash_rxqs(struct priv *priv)
 		DEBUG("indirection table extended to assume %u WQs",
 		      priv->reta_idx_n);
 	}
-	for (i = 0; (i != priv->reta_idx_n); ++i)
-		wqs[i] = (*priv->rxqs)[(*priv->reta_idx)[i]]->wq;
+	for (i = 0; (i != priv->reta_idx_n); ++i) {
+		struct rxq_ctrl *rxq_ctrl;
+
+		rxq_ctrl = container_of((*priv->rxqs)[(*priv->reta_idx)[i]],
+					struct rxq_ctrl, rxq);
+		wqs[i] = rxq_ctrl->wq;
+	}
 	/* Get number of hash RX queues to configure. */
 	for (i = 0, hash_rxqs_n = 0; (i != ind_tables_n); ++i)
 		hash_rxqs_n += ind_table_init[i].hash_types_n;
@@ -638,21 +645,13 @@ rxq_alloc_elts(struct rxq_ctrl *rxq_ctrl, unsigned int elts_n,
 	       struct rte_mbuf **pool)
 {
 	unsigned int i;
-	struct rxq_elt (*elts)[elts_n] =
-		rte_calloc_socket("RXQ elements", 1, sizeof(*elts), 0,
-				  rxq_ctrl->socket);
 	int ret = 0;
 
-	if (elts == NULL) {
-		ERROR("%p: can't allocate packets array", (void *)rxq_ctrl);
-		ret = ENOMEM;
-		goto error;
-	}
 	/* For each WR (packet). */
 	for (i = 0; (i != elts_n); ++i) {
-		struct rxq_elt *elt = &(*elts)[i];
-		struct ibv_sge *sge = &(*elts)[i].sge;
 		struct rte_mbuf *buf;
+		volatile struct mlx5_wqe_data_seg *scat =
+			&(*rxq_ctrl->rxq.wqes)[i];
 
 		if (pool != NULL) {
 			buf = *(pool++);
@@ -666,40 +665,36 @@ rxq_alloc_elts(struct rxq_ctrl *rxq_ctrl, unsigned int elts_n,
 			ret = ENOMEM;
 			goto error;
 		}
-		elt->buf = buf;
 		/* Headroom is reserved by rte_pktmbuf_alloc(). */
 		assert(DATA_OFF(buf) == RTE_PKTMBUF_HEADROOM);
 		/* Buffer is supposed to be empty. */
 		assert(rte_pktmbuf_data_len(buf) == 0);
 		assert(rte_pktmbuf_pkt_len(buf) == 0);
-		/* sge->addr must be able to store a pointer. */
-		assert(sizeof(sge->addr) >= sizeof(uintptr_t));
-		/* SGE keeps its headroom. */
-		sge->addr = (uintptr_t)
-			((uint8_t *)buf->buf_addr + RTE_PKTMBUF_HEADROOM);
-		sge->length = (buf->buf_len - RTE_PKTMBUF_HEADROOM);
-		sge->lkey = rxq_ctrl->mr->lkey;
-		/* Redundant check for tailroom. */
-		assert(sge->length == rte_pktmbuf_tailroom(buf));
+		assert(!buf->next);
+		PORT(buf) = rxq_ctrl->rxq.port_id;
+		DATA_LEN(buf) = rte_pktmbuf_tailroom(buf);
+		PKT_LEN(buf) = DATA_LEN(buf);
+		NB_SEGS(buf) = 1;
+		/* scat->addr must be able to store a pointer. */
+		assert(sizeof(scat->addr) >= sizeof(uintptr_t));
+		*scat = (struct mlx5_wqe_data_seg){
+			.addr = htonll(rte_pktmbuf_mtod(buf, uintptr_t)),
+			.byte_count = htonl(DATA_LEN(buf)),
+			.lkey = htonl(rxq_ctrl->mr->lkey),
+		};
+		(*rxq_ctrl->rxq.elts)[i] = buf;
 	}
 	DEBUG("%p: allocated and configured %u single-segment WRs",
 	      (void *)rxq_ctrl, elts_n);
-	rxq_ctrl->rxq.elts_n = elts_n;
-	rxq_ctrl->rxq.elts_head = 0;
-	rxq_ctrl->rxq.elts = elts;
 	assert(ret == 0);
 	return 0;
 error:
-	if (elts != NULL) {
-		assert(pool == NULL);
-		for (i = 0; (i != RTE_DIM(*elts)); ++i) {
-			struct rxq_elt *elt = &(*elts)[i];
-			struct rte_mbuf *buf = elt->buf;
-
-			if (buf != NULL)
-				rte_pktmbuf_free_seg(buf);
-		}
-		rte_free(elts);
+	assert(pool == NULL);
+	elts_n = i;
+	for (i = 0; (i != elts_n); ++i) {
+		if ((*rxq_ctrl->rxq.elts)[i] != NULL)
+			rte_pktmbuf_free_seg((*rxq_ctrl->rxq.elts)[i]);
+		(*rxq_ctrl->rxq.elts)[i] = NULL;
 	}
 	DEBUG("%p: failed, freed everything", (void *)rxq_ctrl);
 	assert(ret > 0);
@@ -716,22 +711,16 @@ static void
 rxq_free_elts(struct rxq_ctrl *rxq_ctrl)
 {
 	unsigned int i;
-	unsigned int elts_n = rxq_ctrl->rxq.elts_n;
-	struct rxq_elt (*elts)[elts_n] = rxq_ctrl->rxq.elts;
 
 	DEBUG("%p: freeing WRs", (void *)rxq_ctrl);
-	rxq_ctrl->rxq.elts_n = 0;
-	rxq_ctrl->rxq.elts = NULL;
-	if (elts == NULL)
+	if (rxq_ctrl->rxq.elts == NULL)
 		return;
-	for (i = 0; (i != RTE_DIM(*elts)); ++i) {
-		struct rxq_elt *elt = &(*elts)[i];
-		struct rte_mbuf *buf = elt->buf;
 
-		if (buf != NULL)
-			rte_pktmbuf_free_seg(buf);
+	for (i = 0; (i != rxq_ctrl->rxq.elts_n); ++i) {
+		if ((*rxq_ctrl->rxq.elts)[i] != NULL)
+			rte_pktmbuf_free_seg((*rxq_ctrl->rxq.elts)[i]);
+		(*rxq_ctrl->rxq.elts)[i] = NULL;
 	}
-	rte_free(elts);
 }
 
 /**
@@ -749,42 +738,40 @@ rxq_cleanup(struct rxq_ctrl *rxq_ctrl)
 
 	DEBUG("cleaning up %p", (void *)rxq_ctrl);
 	rxq_free_elts(rxq_ctrl);
-	rxq_ctrl->rxq.poll = NULL;
-	rxq_ctrl->rxq.recv = NULL;
 	if (rxq_ctrl->if_wq != NULL) {
-		assert(rxq_ctrl->rxq.priv != NULL);
-		assert(rxq_ctrl->rxq.priv->ctx != NULL);
-		assert(rxq_ctrl->rxq.wq != NULL);
+		assert(rxq_ctrl->priv != NULL);
+		assert(rxq_ctrl->priv->ctx != NULL);
+		assert(rxq_ctrl->wq != NULL);
 		params = (struct ibv_exp_release_intf_params){
 			.comp_mask = 0,
 		};
-		claim_zero(ibv_exp_release_intf(rxq_ctrl->rxq.priv->ctx,
+		claim_zero(ibv_exp_release_intf(rxq_ctrl->priv->ctx,
 						rxq_ctrl->if_wq,
 						&params));
 	}
 	if (rxq_ctrl->if_cq != NULL) {
-		assert(rxq_ctrl->rxq.priv != NULL);
-		assert(rxq_ctrl->rxq.priv->ctx != NULL);
-		assert(rxq_ctrl->rxq.cq != NULL);
+		assert(rxq_ctrl->priv != NULL);
+		assert(rxq_ctrl->priv->ctx != NULL);
+		assert(rxq_ctrl->cq != NULL);
 		params = (struct ibv_exp_release_intf_params){
 			.comp_mask = 0,
 		};
-		claim_zero(ibv_exp_release_intf(rxq_ctrl->rxq.priv->ctx,
+		claim_zero(ibv_exp_release_intf(rxq_ctrl->priv->ctx,
 						rxq_ctrl->if_cq,
 						&params));
 	}
-	if (rxq_ctrl->rxq.wq != NULL)
-		claim_zero(ibv_exp_destroy_wq(rxq_ctrl->rxq.wq));
-	if (rxq_ctrl->rxq.cq != NULL)
-		claim_zero(ibv_destroy_cq(rxq_ctrl->rxq.cq));
+	if (rxq_ctrl->wq != NULL)
+		claim_zero(ibv_exp_destroy_wq(rxq_ctrl->wq));
+	if (rxq_ctrl->cq != NULL)
+		claim_zero(ibv_destroy_cq(rxq_ctrl->cq));
 	if (rxq_ctrl->rd != NULL) {
 		struct ibv_exp_destroy_res_domain_attr attr = {
 			.comp_mask = 0,
 		};
 
-		assert(rxq_ctrl->rxq.priv != NULL);
-		assert(rxq_ctrl->rxq.priv->ctx != NULL);
-		claim_zero(ibv_exp_destroy_res_domain(rxq_ctrl->rxq.priv->ctx,
+		assert(rxq_ctrl->priv != NULL);
+		assert(rxq_ctrl->priv->ctx != NULL);
+		claim_zero(ibv_exp_destroy_res_domain(rxq_ctrl->priv->ctx,
 						      rxq_ctrl->rd,
 						      &attr));
 	}
@@ -811,14 +798,13 @@ rxq_cleanup(struct rxq_ctrl *rxq_ctrl)
 int
 rxq_rehash(struct rte_eth_dev *dev, struct rxq_ctrl *rxq_ctrl)
 {
-	struct priv *priv = rxq_ctrl->rxq.priv;
+	struct priv *priv = rxq_ctrl->priv;
 	struct rxq_ctrl tmpl = *rxq_ctrl;
 	unsigned int mbuf_n;
 	unsigned int desc_n;
 	struct rte_mbuf **pool;
 	unsigned int i, k;
 	struct ibv_exp_wq_attr mod;
-	struct rxq_elt (*elts)[tmpl.rxq.elts_n];
 	int err;
 
 	DEBUG("%p: rehashing queue %p", (void *)dev, (void *)rxq_ctrl);
@@ -840,7 +826,7 @@ rxq_rehash(struct rte_eth_dev *dev, struct rxq_ctrl *rxq_ctrl)
 		.attr_mask = IBV_EXP_WQ_ATTR_STATE,
 		.wq_state = IBV_EXP_WQS_RESET,
 	};
-	err = ibv_exp_modify_wq(tmpl.rxq.wq, &mod);
+	err = ibv_exp_modify_wq(tmpl.wq, &mod);
 	if (err) {
 		ERROR("%p: cannot reset WQ: %s", (void *)dev, strerror(err));
 		assert(err > 0);
@@ -854,60 +840,33 @@ rxq_rehash(struct rte_eth_dev *dev, struct rxq_ctrl *rxq_ctrl)
 	}
 	/* Snatch mbufs from original queue. */
 	k = 0;
-	elts = rxq_ctrl->rxq.elts;
-	for (i = 0; (i != RTE_DIM(*elts)); ++i) {
-		struct rxq_elt *elt = &(*elts)[i];
-		struct rte_mbuf *buf = elt->buf;
-
-		pool[k++] = buf;
-	}
+	for (i = 0; (i != desc_n); ++i)
+		pool[k++] = (*rxq_ctrl->rxq.elts)[i];
 	assert(k == mbuf_n);
-	tmpl.rxq.elts_n = 0;
-	tmpl.rxq.elts = NULL;
-	assert((void *)&tmpl.rxq.elts == NULL);
-	err = rxq_alloc_elts(&tmpl, desc_n, pool);
-	if (err) {
-		ERROR("%p: cannot reallocate WRs, aborting", (void *)dev);
-		rte_free(pool);
-		assert(err > 0);
-		return err;
-	}
-	assert(tmpl.rxq.elts_n == desc_n);
 	rte_free(pool);
-	/* Clean up original data. */
-	rxq_ctrl->rxq.elts_n = 0;
-	rte_free(rxq_ctrl->rxq.elts);
-	rxq_ctrl->rxq.elts = NULL;
 	/* Change queue state to ready. */
 	mod = (struct ibv_exp_wq_attr){
 		.attr_mask = IBV_EXP_WQ_ATTR_STATE,
 		.wq_state = IBV_EXP_WQS_RDY,
 	};
-	err = ibv_exp_modify_wq(tmpl.rxq.wq, &mod);
+	err = ibv_exp_modify_wq(tmpl.wq, &mod);
 	if (err) {
 		ERROR("%p: WQ state to IBV_EXP_WQS_RDY failed: %s",
 		      (void *)dev, strerror(err));
 		goto error;
 	}
 	/* Post SGEs. */
-	assert(tmpl.if_wq != NULL);
-	elts = tmpl.rxq.elts;
-	for (i = 0; (i != RTE_DIM(*elts)); ++i) {
-		err = tmpl.if_wq->recv_burst(
-			tmpl.rxq.wq,
-			&(*elts)[i].sge,
-			1);
-		if (err)
-			break;
-	}
+	err = rxq_alloc_elts(&tmpl, desc_n, pool);
 	if (err) {
-		ERROR("%p: failed to post SGEs with error %d",
-		      (void *)dev, err);
-		/* Set err because it does not contain a valid errno value. */
-		err = EIO;
-		goto error;
+		ERROR("%p: cannot reallocate WRs, aborting", (void *)dev);
+		rte_free(pool);
+		assert(err > 0);
+		return err;
 	}
-	tmpl.rxq.recv = tmpl.if_wq->recv_burst;
+	/* Update doorbell counter. */
+	rxq_ctrl->rxq.rq_ci = desc_n;
+	rte_wmb();
+	*rxq_ctrl->rxq.rq_db = htonl(rxq_ctrl->rxq.rq_ci);
 error:
 	*rxq_ctrl = tmpl;
 	assert(err >= 0);
@@ -915,6 +874,45 @@ error:
 }
 
 /**
+ * Initialize RX queue.
+ *
+ * @param tmpl
+ *   Pointer to RX queue control template.
+ * @param rxq_ctrl
+ *   Pointer to RX queue control.
+ *
+ * @return
+ *   0 on success, errno value on failure.
+ */
+static inline int
+rxq_setup(struct rxq_ctrl *tmpl, struct rxq_ctrl *rxq_ctrl)
+{
+	struct ibv_cq *ibcq = tmpl->cq;
+	struct mlx5_cq *cq = to_mxxx(cq, cq);
+	struct mlx5_rwq *rwq = container_of(tmpl->wq, struct mlx5_rwq, wq);
+
+	if (cq->cqe_sz != RTE_CACHE_LINE_SIZE) {
+		ERROR("Wrong MLX5_CQE_SIZE environment variable value: "
+		      "it should be set to %u", RTE_CACHE_LINE_SIZE);
+		return EINVAL;
+	}
+	tmpl->rxq.rq_db = rwq->rq.db;
+	tmpl->rxq.cq_ci = 0;
+	tmpl->rxq.rq_ci = 0;
+	tmpl->rxq.cq_db = cq->dbrec;
+	tmpl->rxq.wqes =
+		(volatile struct mlx5_wqe_data_seg (*)[])
+		(uintptr_t)rwq->rq.buff;
+	tmpl->rxq.cqes =
+		(volatile struct mlx5_cqe (*)[])
+		(uintptr_t)cq->active_buf->buf;
+	tmpl->rxq.elts =
+		(struct rte_mbuf *(*)[tmpl->rxq.elts_n])
+		((uintptr_t)rxq_ctrl + sizeof(*rxq_ctrl));
+	return 0;
+}
+
+/**
  * Configure a RX queue.
  *
  * @param dev
@@ -934,15 +932,16 @@ error:
  *   0 on success, errno value on failure.
  */
 int
-rxq_setup(struct rte_eth_dev *dev, struct rxq_ctrl *rxq_ctrl, uint16_t desc,
-	  unsigned int socket, const struct rte_eth_rxconf *conf,
-	  struct rte_mempool *mp)
+rxq_ctrl_setup(struct rte_eth_dev *dev, struct rxq_ctrl *rxq_ctrl,
+	       uint16_t desc, unsigned int socket,
+	       const struct rte_eth_rxconf *conf, struct rte_mempool *mp)
 {
 	struct priv *priv = dev->data->dev_private;
 	struct rxq_ctrl tmpl = {
+		.priv = priv,
 		.socket = socket,
 		.rxq = {
-			.priv = priv,
+			.elts_n = desc,
 			.mp = mp,
 		},
 	};
@@ -952,17 +951,16 @@ rxq_setup(struct rte_eth_dev *dev, struct rxq_ctrl *rxq_ctrl, uint16_t desc,
 		struct ibv_exp_cq_init_attr cq;
 		struct ibv_exp_res_domain_init_attr rd;
 		struct ibv_exp_wq_init_attr wq;
+		struct ibv_exp_cq_attr cq_attr;
 	} attr;
 	enum ibv_exp_query_intf_status status;
 	unsigned int mb_len = rte_pktmbuf_data_room_size(mp);
-	struct rxq_elt (*elts)[desc];
 	int ret = 0;
-	unsigned int i;
-	unsigned int cq_size = desc;
 
 	(void)conf; /* Thresholds configuration (ignored). */
 	if (desc == 0) {
-		ERROR("%p: invalid number of RX descriptors", (void *)dev);
+		ERROR("%p: invalid number of RX descriptors (must be a"
+		      " multiple of 2)", (void *)dev);
 		return EINVAL;
 	}
 	/* Toggle RX checksum offload if hardware supports it. */
@@ -996,9 +994,9 @@ rxq_setup(struct rte_eth_dev *dev, struct rxq_ctrl *rxq_ctrl, uint16_t desc,
 		.comp_mask = IBV_EXP_CQ_INIT_ATTR_RES_DOMAIN,
 		.res_domain = tmpl.rd,
 	};
-	tmpl.rxq.cq = ibv_exp_create_cq(priv->ctx, cq_size, NULL, NULL, 0,
-					&attr.cq);
-	if (tmpl.rxq.cq == NULL) {
+	tmpl.cq = ibv_exp_create_cq(priv->ctx, desc - 1, NULL, NULL, 0,
+				    &attr.cq);
+	if (tmpl.cq == NULL) {
 		ret = ENOMEM;
 		ERROR("%p: CQ creation failure: %s",
 		      (void *)dev, strerror(ret));
@@ -1015,13 +1013,13 @@ rxq_setup(struct rte_eth_dev *dev, struct rxq_ctrl *rxq_ctrl, uint16_t desc,
 		.wq_context = NULL, /* Could be useful in the future. */
 		.wq_type = IBV_EXP_WQT_RQ,
 		/* Max number of outstanding WRs. */
-		.max_recv_wr = ((priv->device_attr.max_qp_wr < (int)cq_size) ?
+		.max_recv_wr = ((priv->device_attr.max_qp_wr < (int)desc) ?
 				priv->device_attr.max_qp_wr :
-				(int)cq_size),
+				(int)desc),
 		/* Max number of scatter/gather elements in a WR. */
 		.max_recv_sge = 1,
 		.pd = priv->pd,
-		.cq = tmpl.rxq.cq,
+		.cq = tmpl.cq,
 		.comp_mask =
 			IBV_EXP_CREATE_WQ_RES_DOMAIN |
 			IBV_EXP_CREATE_WQ_VLAN_OFFLOADS |
@@ -1064,19 +1062,13 @@ rxq_setup(struct rte_eth_dev *dev, struct rxq_ctrl *rxq_ctrl, uint16_t desc,
 		     " up to date",
 		     (void *)dev);
 
-	tmpl.rxq.wq = ibv_exp_create_wq(priv->ctx, &attr.wq);
-	if (tmpl.rxq.wq == NULL) {
+	tmpl.wq = ibv_exp_create_wq(priv->ctx, &attr.wq);
+	if (tmpl.wq == NULL) {
 		ret = (errno ? errno : EINVAL);
 		ERROR("%p: WQ creation failure: %s",
 		      (void *)dev, strerror(ret));
 		goto error;
 	}
-	ret = rxq_alloc_elts(&tmpl, desc, NULL);
-	if (ret) {
-		ERROR("%p: RXQ allocation failed: %s",
-		      (void *)dev, strerror(ret));
-		goto error;
-	}
 	/* Save port ID. */
 	tmpl.rxq.port_id = dev->data->port_id;
 	DEBUG("%p: RTE port ID: %u", (void *)rxq_ctrl, tmpl.rxq.port_id);
@@ -1084,7 +1076,7 @@ rxq_setup(struct rte_eth_dev *dev, struct rxq_ctrl *rxq_ctrl, uint16_t desc,
 		.intf_scope = IBV_EXP_INTF_GLOBAL,
 		.intf_version = 1,
 		.intf = IBV_EXP_INTF_CQ,
-		.obj = tmpl.rxq.cq,
+		.obj = tmpl.cq,
 	};
 	tmpl.if_cq = ibv_exp_query_intf(priv->ctx, &attr.params, &status);
 	if (tmpl.if_cq == NULL) {
@@ -1095,7 +1087,7 @@ rxq_setup(struct rte_eth_dev *dev, struct rxq_ctrl *rxq_ctrl, uint16_t desc,
 	attr.params = (struct ibv_exp_query_intf_params){
 		.intf_scope = IBV_EXP_INTF_GLOBAL,
 		.intf = IBV_EXP_INTF_WQ,
-		.obj = tmpl.rxq.wq,
+		.obj = tmpl.wq,
 	};
 	tmpl.if_wq = ibv_exp_query_intf(priv->ctx, &attr.params, &status);
 	if (tmpl.if_wq == NULL) {
@@ -1108,38 +1100,34 @@ rxq_setup(struct rte_eth_dev *dev, struct rxq_ctrl *rxq_ctrl, uint16_t desc,
 		.attr_mask = IBV_EXP_WQ_ATTR_STATE,
 		.wq_state = IBV_EXP_WQS_RDY,
 	};
-	ret = ibv_exp_modify_wq(tmpl.rxq.wq, &mod);
+	ret = ibv_exp_modify_wq(tmpl.wq, &mod);
 	if (ret) {
 		ERROR("%p: WQ state to IBV_EXP_WQS_RDY failed: %s",
 		      (void *)dev, strerror(ret));
 		goto error;
 	}
-	/* Post SGEs. */
-	elts = tmpl.rxq.elts;
-	for (i = 0; (i != RTE_DIM(*elts)); ++i) {
-		ret = tmpl.if_wq->recv_burst(
-			tmpl.rxq.wq,
-			&(*elts)[i].sge,
-			1);
-		if (ret)
-			break;
+	ret = rxq_setup(&tmpl, rxq_ctrl);
+	if (ret) {
+		ERROR("%p: cannot initialize RX queue structure: %s",
+		      (void *)dev, strerror(ret));
+		goto error;
 	}
+	ret = rxq_alloc_elts(&tmpl, desc, NULL);
 	if (ret) {
-		ERROR("%p: failed to post SGEs with error %d",
-		      (void *)dev, ret);
-		/* Set ret because it does not contain a valid errno value. */
-		ret = EIO;
+		ERROR("%p: RXQ allocation failed: %s",
+		      (void *)dev, strerror(ret));
 		goto error;
 	}
 	/* Clean up rxq in case we're reinitializing it. */
 	DEBUG("%p: cleaning-up old rxq just in case", (void *)rxq_ctrl);
 	rxq_cleanup(rxq_ctrl);
 	*rxq_ctrl = tmpl;
+	/* Update doorbell counter. */
+	rxq_ctrl->rxq.rq_ci = desc;
+	rte_wmb();
+	*rxq_ctrl->rxq.rq_db = htonl(rxq_ctrl->rxq.rq_ci);
 	DEBUG("%p: rxq updated with %p", (void *)rxq_ctrl, (void *)&tmpl);
 	assert(ret == 0);
-	/* Assign function in queue. */
-	rxq_ctrl->rxq.poll = rxq_ctrl->if_cq->poll_length_flags_cvlan;
-	rxq_ctrl->rxq.recv = rxq_ctrl->if_wq->recv_burst;
 	return 0;
 error:
 	rxq_cleanup(&tmpl);
@@ -1173,14 +1161,19 @@ mlx5_rx_queue_setup(struct rte_eth_dev *dev, uint16_t idx, uint16_t desc,
 {
 	struct priv *priv = dev->data->dev_private;
 	struct rxq *rxq = (*priv->rxqs)[idx];
-	struct rxq_ctrl *rxq_ctrl;
+	struct rxq_ctrl *rxq_ctrl = container_of(rxq, struct rxq_ctrl, rxq);
 	int ret;
 
 	if (mlx5_is_secondary())
 		return -E_RTE_SECONDARY;
 
 	priv_lock(priv);
-	rxq_ctrl = container_of(rxq, struct rxq_ctrl, rxq);
+	if (!rte_is_power_of_2(desc)) {
+		desc = 1 << log2above(desc);
+		WARN("%p: increased number of descriptors in RX queue %u"
+		     " to the next power of two (%d)",
+		     (void *)dev, idx, desc);
+	}
 	DEBUG("%p: configuring queue %u for %u descriptors",
 	      (void *)dev, idx, desc);
 	if (idx >= priv->rxqs_n) {
@@ -1199,8 +1192,9 @@ mlx5_rx_queue_setup(struct rte_eth_dev *dev, uint16_t idx, uint16_t desc,
 		(*priv->rxqs)[idx] = NULL;
 		rxq_cleanup(rxq_ctrl);
 	} else {
-		rxq_ctrl = rte_calloc_socket("RXQ", 1, sizeof(*rxq_ctrl), 0,
-					     socket);
+		rxq_ctrl = rte_calloc_socket("RXQ", 1, sizeof(*rxq_ctrl) +
+					     desc * sizeof(struct rte_mbuf *),
+					     0, socket);
 		if (rxq_ctrl == NULL) {
 			ERROR("%p: unable to allocate queue index %u",
 			      (void *)dev, idx);
@@ -1208,7 +1202,7 @@ mlx5_rx_queue_setup(struct rte_eth_dev *dev, uint16_t idx, uint16_t desc,
 			return -ENOMEM;
 		}
 	}
-	ret = rxq_setup(dev, rxq_ctrl, desc, socket, conf, mp);
+	ret = rxq_ctrl_setup(dev, rxq_ctrl, desc, socket, conf, mp);
 	if (ret)
 		rte_free(rxq_ctrl);
 	else {
@@ -1243,12 +1237,12 @@ mlx5_rx_queue_release(void *dpdk_rxq)
 	if (rxq == NULL)
 		return;
 	rxq_ctrl = container_of(rxq, struct rxq_ctrl, rxq);
-	priv = rxq->priv;
+	priv = rxq_ctrl->priv;
 	priv_lock(priv);
 	for (i = 0; (i != priv->rxqs_n); ++i)
 		if ((*priv->rxqs)[i] == rxq) {
 			DEBUG("%p: removing RX queue %p from list",
-			      (void *)priv->dev, (void *)rxq);
+			      (void *)priv->dev, (void *)rxq_ctrl);
 			(*priv->rxqs)[i] = NULL;
 			break;
 		}
@@ -1278,7 +1272,8 @@ mlx5_rx_burst_secondary_setup(void *dpdk_rxq, struct rte_mbuf **pkts,
 			      uint16_t pkts_n)
 {
 	struct rxq *rxq = dpdk_rxq;
-	struct priv *priv = mlx5_secondary_data_setup(rxq->priv);
+	struct rxq_ctrl *rxq_ctrl = container_of(rxq, struct rxq_ctrl, rxq);
+	struct priv *priv = mlx5_secondary_data_setup(rxq_ctrl->priv);
 	struct priv *primary_priv;
 	unsigned int index;
 
diff --git a/drivers/net/mlx5/mlx5_rxtx.c b/drivers/net/mlx5/mlx5_rxtx.c
index a6b0cf5..27d8852 100644
--- a/drivers/net/mlx5/mlx5_rxtx.c
+++ b/drivers/net/mlx5/mlx5_rxtx.c
@@ -42,6 +42,8 @@
 #pragma GCC diagnostic ignored "-pedantic"
 #endif
 #include <infiniband/verbs.h>
+#include <infiniband/mlx5_hw.h>
+#include <infiniband/arch.h>
 #ifdef PEDANTIC
 #pragma GCC diagnostic error "-pedantic"
 #endif
@@ -55,7 +57,7 @@
 #include <rte_prefetch.h>
 #include <rte_common.h>
 #include <rte_branch_prediction.h>
-#include <rte_memory.h>
+#include <rte_ether.h>
 #ifdef PEDANTIC
 #pragma GCC diagnostic error "-pedantic"
 #endif
@@ -65,6 +67,47 @@
 #include "mlx5_rxtx.h"
 #include "mlx5_autoconf.h"
 #include "mlx5_defs.h"
+#include "mlx5_prm.h"
+
+static inline volatile struct mlx5_cqe64 *
+get_cqe64(volatile struct mlx5_cqe cqes[],
+	  unsigned int cqes_n, uint16_t *ci)
+	  __attribute__((always_inline));
+
+static inline int
+rx_poll_len(struct rxq *rxq) __attribute__((always_inline));
+
+static volatile struct mlx5_cqe64 *
+get_cqe64(volatile struct mlx5_cqe cqes[],
+	  unsigned int cqes_n, uint16_t *ci)
+{
+	volatile struct mlx5_cqe64 *cqe;
+	uint16_t idx = *ci;
+	uint8_t op_own;
+
+	cqe = &cqes[idx & (cqes_n - 1)].cqe64;
+	op_own = cqe->op_own;
+	if (unlikely((op_own & MLX5_CQE_OWNER_MASK) == !(idx & cqes_n))) {
+		return NULL;
+	} else if (unlikely(op_own & 0x80)) {
+		switch (op_own >> 4) {
+		case MLX5_CQE_INVALID:
+			return NULL; /* No CQE */
+		case MLX5_CQE_REQ_ERR:
+			return cqe;
+		case MLX5_CQE_RESP_ERR:
+			++(*ci);
+			return NULL;
+		default:
+			return NULL;
+		}
+	}
+	if (cqe) {
+		*ci = idx + 1;
+		return cqe;
+	}
+	return NULL;
+}
 
 /**
  * Manage TX completions.
@@ -390,8 +433,8 @@ stop:
 /**
  * Translate RX completion flags to packet type.
  *
- * @param flags
- *   RX completion flags returned by poll_length_flags().
+ * @param[in] cqe
+ *   Pointer to CQE.
  *
  * @note: fix mlx5_dev_supported_ptypes_get() if any change here.
  *
@@ -399,11 +442,13 @@ stop:
  *   Packet type for struct rte_mbuf.
  */
 static inline uint32_t
-rxq_cq_to_pkt_type(uint32_t flags)
+rxq_cq_to_pkt_type(volatile struct mlx5_cqe64 *cqe)
 {
 	uint32_t pkt_type;
+	uint8_t flags = cqe->l4_hdr_type_etc;
+	uint8_t info = cqe->rsvd0[0];
 
-	if (flags & IBV_EXP_CQ_RX_TUNNEL_PACKET)
+	if (info & IBV_EXP_CQ_RX_TUNNEL_PACKET)
 		pkt_type =
 			TRANSPOSE(flags,
 				  IBV_EXP_CQ_RX_OUTER_IPV4_PACKET,
@@ -420,11 +465,11 @@ rxq_cq_to_pkt_type(uint32_t flags)
 	else
 		pkt_type =
 			TRANSPOSE(flags,
-				  IBV_EXP_CQ_RX_IPV4_PACKET,
-				  RTE_PTYPE_L3_IPV4) |
+				  MLX5_CQE_L3_HDR_TYPE_IPV6,
+				  RTE_PTYPE_L3_IPV6) |
 			TRANSPOSE(flags,
-				  IBV_EXP_CQ_RX_IPV6_PACKET,
-				  RTE_PTYPE_L3_IPV6);
+				  MLX5_CQE_L3_HDR_TYPE_IPV4,
+				  RTE_PTYPE_L3_IPV4);
 	return pkt_type;
 }
 
@@ -433,50 +478,69 @@ rxq_cq_to_pkt_type(uint32_t flags)
  *
  * @param[in] rxq
  *   Pointer to RX queue structure.
- * @param flags
- *   RX completion flags returned by poll_length_flags().
+ * @param[in] cqe
+ *   Pointer to CQE.
  *
  * @return
  *   Offload flags (ol_flags) for struct rte_mbuf.
  */
 static inline uint32_t
-rxq_cq_to_ol_flags(const struct rxq *rxq, uint32_t flags)
+rxq_cq_to_ol_flags(struct rxq *rxq, volatile struct mlx5_cqe64 *cqe)
 {
 	uint32_t ol_flags = 0;
+	uint8_t l3_hdr = (cqe->l4_hdr_type_etc) & MLX5_CQE_L3_HDR_TYPE_MASK;
+	uint8_t l4_hdr = (cqe->l4_hdr_type_etc) & MLX5_CQE_L4_HDR_TYPE_MASK;
+	uint8_t info = cqe->rsvd0[0];
 
-	if (rxq->csum) {
-		/* Set IP checksum flag only for IPv4/IPv6 packets. */
-		if (flags &
-		    (IBV_EXP_CQ_RX_IPV4_PACKET | IBV_EXP_CQ_RX_IPV6_PACKET))
-			ol_flags |=
-				TRANSPOSE(~flags,
-					IBV_EXP_CQ_RX_IP_CSUM_OK,
-					PKT_RX_IP_CKSUM_BAD);
-		/* Set L4 checksum flag only for TCP/UDP packets. */
-		if (flags &
-		    (IBV_EXP_CQ_RX_TCP_PACKET | IBV_EXP_CQ_RX_UDP_PACKET))
-			ol_flags |=
-				TRANSPOSE(~flags,
-					IBV_EXP_CQ_RX_TCP_UDP_CSUM_OK,
-					PKT_RX_L4_CKSUM_BAD);
-	}
+	if ((l3_hdr == MLX5_CQE_L3_HDR_TYPE_IPV4) ||
+	    (l3_hdr == MLX5_CQE_L3_HDR_TYPE_IPV6))
+		ol_flags |=
+			(!(cqe->hds_ip_ext & MLX5_CQE_L3_OK) *
+			 PKT_RX_IP_CKSUM_BAD);
+	if ((l4_hdr == MLX5_CQE_L4_HDR_TYPE_TCP) ||
+	    (l4_hdr == MLX5_CQE_L4_HDR_TYPE_TCP_EMP_ACK) ||
+	    (l4_hdr == MLX5_CQE_L4_HDR_TYPE_TCP_ACK) ||
+	    (l4_hdr == MLX5_CQE_L4_HDR_TYPE_UDP))
+		ol_flags |=
+			(!(cqe->hds_ip_ext & MLX5_CQE_L4_OK) *
+			 PKT_RX_L4_CKSUM_BAD);
 	/*
 	 * PKT_RX_IP_CKSUM_BAD and PKT_RX_L4_CKSUM_BAD are used in place
 	 * of PKT_RX_EIP_CKSUM_BAD because the latter is not functional
 	 * (its value is 0).
 	 */
-	if ((flags & IBV_EXP_CQ_RX_TUNNEL_PACKET) && (rxq->csum_l2tun))
+	if ((info & IBV_EXP_CQ_RX_TUNNEL_PACKET) && (rxq->csum_l2tun))
 		ol_flags |=
-			TRANSPOSE(~flags,
+			TRANSPOSE(~cqe->l4_hdr_type_etc,
 				  IBV_EXP_CQ_RX_OUTER_IP_CSUM_OK,
 				  PKT_RX_IP_CKSUM_BAD) |
-			TRANSPOSE(~flags,
+			TRANSPOSE(~cqe->l4_hdr_type_etc,
 				  IBV_EXP_CQ_RX_OUTER_TCP_UDP_CSUM_OK,
 				  PKT_RX_L4_CKSUM_BAD);
 	return ol_flags;
 }
 
 /**
+ * Get size of the next packet.
+ *
+ * @param rxq
+ *   RX queue to fetch packet from.
+ *
+ * @return
+ *   Packet size in bytes.
+ */
+static inline int __attribute__((always_inline))
+rx_poll_len(struct rxq *rxq)
+{
+	volatile struct mlx5_cqe64 *cqe;
+
+	cqe = get_cqe64(*rxq->cqes, rxq->elts_n, &rxq->cq_ci);
+	if (cqe)
+		return ntohl(cqe->byte_cnt);
+	return 0;
+}
+
+/**
  * DPDK callback for RX.
  *
  * @param dpdk_rxq
@@ -492,134 +556,82 @@ rxq_cq_to_ol_flags(const struct rxq *rxq, uint32_t flags)
 uint16_t
 mlx5_rx_burst(void *dpdk_rxq, struct rte_mbuf **pkts, uint16_t pkts_n)
 {
-	struct rxq *rxq = (struct rxq *)dpdk_rxq;
-	struct rxq_elt (*elts)[rxq->elts_n] = rxq->elts;
-	const unsigned int elts_n = rxq->elts_n;
-	unsigned int elts_head = rxq->elts_head;
-	struct ibv_sge sges[pkts_n];
-	unsigned int i;
+	struct rxq *rxq = dpdk_rxq;
 	unsigned int pkts_ret = 0;
-	int ret;
+	unsigned int i;
+	unsigned int rq_ci = rxq->rq_ci;
+	const unsigned int elts_n = rxq->elts_n;
+	const unsigned int wqe_cnt = elts_n - 1;
 
 	for (i = 0; (i != pkts_n); ++i) {
-		struct rxq_elt *elt = &(*elts)[elts_head];
-		unsigned int len;
-		struct rte_mbuf *seg = elt->buf;
+		unsigned int idx = rq_ci & wqe_cnt;
 		struct rte_mbuf *rep;
-		uint32_t flags;
-		uint16_t vlan_tci;
-
-		/* Sanity checks. */
-		assert(seg != NULL);
-		assert(elts_head < rxq->elts_n);
-		assert(rxq->elts_head < rxq->elts_n);
-		/*
-		 * Fetch initial bytes of packet descriptor into a
-		 * cacheline while allocating rep.
-		 */
-		rte_mbuf_prefetch_part1(seg);
-		rte_mbuf_prefetch_part2(seg);
-		ret = rxq->poll(rxq->cq, NULL, NULL, &flags, &vlan_tci);
-		if (unlikely(ret < 0)) {
-			struct ibv_wc wc;
-			int wcs_n;
-
-			DEBUG("rxq=%p, poll_length() failed (ret=%d)",
-			      (void *)rxq, ret);
-			/* ibv_poll_cq() must be used in case of failure. */
-			wcs_n = ibv_poll_cq(rxq->cq, 1, &wc);
-			if (unlikely(wcs_n == 0))
-				break;
-			if (unlikely(wcs_n < 0)) {
-				DEBUG("rxq=%p, ibv_poll_cq() failed (wcs_n=%d)",
-				      (void *)rxq, wcs_n);
-				break;
-			}
-			assert(wcs_n == 1);
-			if (unlikely(wc.status != IBV_WC_SUCCESS)) {
-				/* Whatever, just repost the offending WR. */
-				DEBUG("rxq=%p, wr_id=%" PRIu64 ": bad work"
-				      " completion status (%d): %s",
-				      (void *)rxq, wc.wr_id, wc.status,
-				      ibv_wc_status_str(wc.status));
-#ifdef MLX5_PMD_SOFT_COUNTERS
-				/* Increment dropped packets counter. */
-				++rxq->stats.idropped;
-#endif
-				/* Add SGE to array for repost. */
-				sges[i] = elt->sge;
-				goto repost;
-			}
-			ret = wc.byte_len;
-		}
-		if (ret == 0)
-			break;
-		assert(ret >= (rxq->crc_present << 2));
-		len = ret - (rxq->crc_present << 2);
+		struct rte_mbuf *pkt;
+		unsigned int len;
+		volatile struct mlx5_wqe_data_seg *wqe = &(*rxq->wqes)[idx];
+		volatile struct mlx5_cqe64 *cqe =
+			&(*rxq->cqes)[rxq->cq_ci & wqe_cnt].cqe64;
+
+		pkt = (*rxq->elts)[idx];
+		rte_prefetch0(cqe);
 		rep = rte_mbuf_raw_alloc(rxq->mp);
 		if (unlikely(rep == NULL)) {
-			/*
-			 * Unable to allocate a replacement mbuf,
-			 * repost WR.
-			 */
-			DEBUG("rxq=%p: can't allocate a new mbuf",
-			      (void *)rxq);
-			/* Increment out of memory counters. */
 			++rxq->stats.rx_nombuf;
-			++rxq->priv->dev->data->rx_mbuf_alloc_failed;
-			goto repost;
+			break;
 		}
-
-		/* Reconfigure sge to use rep instead of seg. */
-		elt->sge.addr = (uintptr_t)rep->buf_addr + RTE_PKTMBUF_HEADROOM;
-		elt->buf = rep;
-
-		/* Add SGE to array for repost. */
-		sges[i] = elt->sge;
-
-		/* Update seg information. */
-		SET_DATA_OFF(seg, RTE_PKTMBUF_HEADROOM);
-		NB_SEGS(seg) = 1;
-		PORT(seg) = rxq->port_id;
-		NEXT(seg) = NULL;
-		PKT_LEN(seg) = len;
-		DATA_LEN(seg) = len;
-		if (rxq->csum | rxq->csum_l2tun | rxq->vlan_strip) {
-			seg->packet_type = rxq_cq_to_pkt_type(flags);
-			seg->ol_flags = rxq_cq_to_ol_flags(rxq, flags);
-			if (flags & IBV_EXP_CQ_RX_CVLAN_STRIPPED_V1) {
-				seg->ol_flags |= PKT_RX_VLAN_PKT |
-					PKT_RX_VLAN_STRIPPED;
-				seg->vlan_tci = vlan_tci;
+		SET_DATA_OFF(rep, RTE_PKTMBUF_HEADROOM);
+		NB_SEGS(rep) = 1;
+		PORT(rep) = rxq->port_id;
+		NEXT(rep) = NULL;
+		len = rx_poll_len(rxq);
+		if (unlikely(len == 0)) {
+			rte_mbuf_refcnt_set(rep, 0);
+			__rte_mbuf_raw_free(rep);
+			break;
+		}
+		/* Fill NIC descriptor with the new buffer.  The lkey and size
+		 * of the buffers are already known, only the buffer address
+		 * changes. */
+		wqe->addr = htonll((uintptr_t)rep->buf_addr +
+				   RTE_PKTMBUF_HEADROOM);
+		(*rxq->elts)[idx] = rep;
+		/* Update pkt information. */
+		if (rxq->csum | rxq->csum_l2tun | rxq->vlan_strip |
+		    rxq->crc_present) {
+			if (rxq->csum) {
+				pkt->packet_type = rxq_cq_to_pkt_type(cqe);
+				pkt->ol_flags = rxq_cq_to_ol_flags(rxq, cqe);
+			}
+			if (cqe->l4_hdr_type_etc & MLX5_CQE_VLAN_STRIPPED) {
+				pkt->ol_flags |= PKT_RX_VLAN_PKT;
+				pkt->vlan_tci = ntohs(cqe->vlan_info);
 			}
+			if (rxq->crc_present)
+				len -= ETHER_CRC_LEN;
 		}
-		/* Return packet. */
-		*(pkts++) = seg;
-		++pkts_ret;
+		PKT_LEN(pkt) = len;
+		DATA_LEN(pkt) = len;
 #ifdef MLX5_PMD_SOFT_COUNTERS
 		/* Increment bytes counter. */
 		rxq->stats.ibytes += len;
 #endif
-repost:
-		if (++elts_head >= elts_n)
-			elts_head = 0;
-		continue;
+		/* Return packet. */
+		*(pkts++) = pkt;
+		++pkts_ret;
+		++rq_ci;
 	}
-	if (unlikely(i == 0))
+	if (unlikely((i == 0) && (rq_ci == rxq->rq_ci)))
 		return 0;
 	/* Repost WRs. */
 #ifdef DEBUG_RECV
 	DEBUG("%p: reposting %u WRs", (void *)rxq, i);
 #endif
-	ret = rxq->recv(rxq->wq, sges, i);
-	if (unlikely(ret)) {
-		/* Inability to repost WRs is fatal. */
-		DEBUG("%p: recv_burst(): failed (ret=%d)",
-		      (void *)rxq->priv,
-		      ret);
-		abort();
-	}
-	rxq->elts_head = elts_head;
+	/* Update the consumer index. */
+	rxq->rq_ci = rq_ci;
+	rte_wmb();
+	*rxq->cq_db = htonl(rxq->cq_ci);
+	rte_wmb();
+	*rxq->rq_db = htonl(rxq->rq_ci);
 #ifdef MLX5_PMD_SOFT_COUNTERS
 	/* Increment packets counter. */
 	rxq->stats.ipackets += pkts_ret;
diff --git a/drivers/net/mlx5/mlx5_rxtx.h b/drivers/net/mlx5/mlx5_rxtx.h
index 570345b..1827123 100644
--- a/drivers/net/mlx5/mlx5_rxtx.h
+++ b/drivers/net/mlx5/mlx5_rxtx.h
@@ -43,6 +43,7 @@
 #pragma GCC diagnostic ignored "-pedantic"
 #endif
 #include <infiniband/verbs.h>
+#include <infiniband/mlx5_hw.h>
 #ifdef PEDANTIC
 #pragma GCC diagnostic error "-pedantic"
 #endif
@@ -61,6 +62,7 @@
 #include "mlx5.h"
 #include "mlx5_autoconf.h"
 #include "mlx5_defs.h"
+#include "mlx5_prm.h"
 
 struct mlx5_rxq_stats {
 	unsigned int idx; /**< Mapping index. */
@@ -81,12 +83,6 @@ struct mlx5_txq_stats {
 	uint64_t odropped; /**< Total of packets not sent when TX ring full. */
 };
 
-/* RX element. */
-struct rxq_elt {
-	struct ibv_sge sge; /* Scatter/Gather Element. */
-	struct rte_mbuf *buf; /* SGE buffer. */
-};
-
 /* Flow director queue structure. */
 struct fdir_queue {
 	struct ibv_qp *qp; /* Associated RX QP. */
@@ -97,25 +93,28 @@ struct priv;
 
 /* RX queue descriptor. */
 struct rxq {
-	struct priv *priv; /* Back pointer to private data. */
-	struct rte_mempool *mp; /* Memory Pool for allocations. */
-	struct ibv_cq *cq; /* Completion Queue. */
-	struct ibv_exp_wq *wq; /* Work Queue. */
-	int32_t (*poll)(); /* Verbs poll function. */
-	int32_t (*recv)(); /* Verbs receive function. */
-	unsigned int port_id; /* Port ID for incoming packets. */
-	unsigned int elts_n; /* (*elts)[] length. */
-	unsigned int elts_head; /* Current index in (*elts)[]. */
 	unsigned int csum:1; /* Enable checksum offloading. */
 	unsigned int csum_l2tun:1; /* Same for L2 tunnels. */
 	unsigned int vlan_strip:1; /* Enable VLAN stripping. */
 	unsigned int crc_present:1; /* CRC must be subtracted. */
-	struct rxq_elt (*elts)[]; /* RX elements. */
-	struct mlx5_rxq_stats stats; /* RX queue counters. */
+	uint16_t rq_ci;
+	uint16_t cq_ci;
+	uint16_t elts_n;
+	uint16_t port_id;
+	volatile struct mlx5_wqe_data_seg(*wqes)[];
+	volatile struct mlx5_cqe(*cqes)[];
+	volatile uint32_t *rq_db;
+	volatile uint32_t *cq_db;
+	struct rte_mbuf *(*elts)[];
+	struct rte_mempool *mp;
+	struct mlx5_rxq_stats stats;
 } __rte_cache_aligned;
 
 /* RX queue control descriptor. */
 struct rxq_ctrl {
+	struct priv *priv; /* Back pointer to private data. */
+	struct ibv_cq *cq; /* Completion Queue. */
+	struct ibv_exp_wq *wq; /* Work Queue. */
 	struct ibv_exp_res_domain *rd; /* Resource Domain. */
 	struct fdir_queue fdir_queue; /* Flow director queue. */
 	struct ibv_mr *mr; /* Memory Region (for mp). */
@@ -284,8 +283,9 @@ int priv_allow_flow_type(struct priv *, enum hash_rxq_flow_type);
 int priv_rehash_flows(struct priv *);
 void rxq_cleanup(struct rxq_ctrl *);
 int rxq_rehash(struct rte_eth_dev *, struct rxq_ctrl *);
-int rxq_setup(struct rte_eth_dev *, struct rxq_ctrl *, uint16_t, unsigned int,
-	      const struct rte_eth_rxconf *, struct rte_mempool *);
+int rxq_ctrl_setup(struct rte_eth_dev *, struct rxq_ctrl *, uint16_t,
+		   unsigned int, const struct rte_eth_rxconf *,
+		   struct rte_mempool *);
 int mlx5_rx_queue_setup(struct rte_eth_dev *, uint16_t, uint16_t, unsigned int,
 			const struct rte_eth_rxconf *, struct rte_mempool *);
 void mlx5_rx_queue_release(void *);
diff --git a/drivers/net/mlx5/mlx5_vlan.c b/drivers/net/mlx5/mlx5_vlan.c
index 3b9b771..4719e69 100644
--- a/drivers/net/mlx5/mlx5_vlan.c
+++ b/drivers/net/mlx5/mlx5_vlan.c
@@ -144,6 +144,7 @@ static void
 priv_vlan_strip_queue_set(struct priv *priv, uint16_t idx, int on)
 {
 	struct rxq *rxq = (*priv->rxqs)[idx];
+	struct rxq_ctrl *rxq_ctrl = container_of(rxq, struct rxq_ctrl, rxq);
 	struct ibv_exp_wq_attr mod;
 	uint16_t vlan_offloads =
 		(on ? IBV_EXP_RECEIVE_WQ_CVLAN_STRIP : 0) |
@@ -157,7 +158,7 @@ priv_vlan_strip_queue_set(struct priv *priv, uint16_t idx, int on)
 		.vlan_offloads = vlan_offloads,
 	};
 
-	err = ibv_exp_modify_wq(rxq->wq, &mod);
+	err = ibv_exp_modify_wq(rxq_ctrl->wq, &mod);
 	if (err) {
 		ERROR("%p: failed to modified stripping mode: %s",
 		      (void *)priv, strerror(err));
-- 
2.1.4



More information about the dev mailing list