[dpdk-dev] [PATCH v1 4/6] net/mlx5: add Multi-Packet Rx support

Yongseok Koh yskoh at mellanox.com
Sat Mar 10 02:25:30 CET 2018


Multi-Packet Rx Queue (MPRQ a.k.a Striding RQ) can further save PCIe
bandwidth by posting a single large buffer for multiple packets. Instead of
posting a buffer per a packet, one large buffer is posted in order to
receive multiple packets on the buffer. A MPRQ buffer consists of multiple
fixed-size strides and each stride receives one packet.

Rx packet is either mem-copied to a user-provided mbuf if length is
comparatively small or referenced by mbuf indirection otherwise. In case of
indirection, the Mempool for the direct mbufs will be allocated and managed
by PMD.

Signed-off-by: Yongseok Koh <yskoh at mellanox.com>
---
 doc/guides/nics/mlx5.rst         |  23 +++
 drivers/net/mlx5/Makefile        |   5 +
 drivers/net/mlx5/mlx5.c          |  63 +++++++
 drivers/net/mlx5/mlx5.h          |   3 +
 drivers/net/mlx5/mlx5_defs.h     |  20 ++
 drivers/net/mlx5/mlx5_ethdev.c   |   3 +
 drivers/net/mlx5/mlx5_prm.h      |  15 ++
 drivers/net/mlx5/mlx5_rxq.c      | 389 +++++++++++++++++++++++++++++++++++----
 drivers/net/mlx5/mlx5_rxtx.c     | 152 ++++++++++++++-
 drivers/net/mlx5/mlx5_rxtx.h     |  16 +-
 drivers/net/mlx5/mlx5_rxtx_vec.c |   4 +
 drivers/net/mlx5/mlx5_rxtx_vec.h |   3 +-
 12 files changed, 660 insertions(+), 36 deletions(-)

diff --git a/doc/guides/nics/mlx5.rst b/doc/guides/nics/mlx5.rst
index 0e6e525c9..1600bfa7b 100644
--- a/doc/guides/nics/mlx5.rst
+++ b/doc/guides/nics/mlx5.rst
@@ -253,6 +253,29 @@ Run-time configuration
   - x86_64 with ConnectX-4, ConnectX-4 LX and ConnectX-5.
   - POWER8 and ARMv8 with ConnectX-4 LX and ConnectX-5.
 
+- ``mprq_en`` parameter [int]
+
+  A nonzero value enables configuring Multi-Packet Rx queues. Rx queue is
+  configured as Multi-Packet RQ if the total number of Rx queues is
+  ``rxqs_min_mprq`` or more and Rx scatter isn't configured. Disabled by default.
+
+  Multi-Packet Rx Queue (MPRQ a.k.a Striding RQ) can further save PCIe bandwidth
+  by posting a single large buffer for multiple packets. Instead of posting a
+  buffers per a packet, one large buffer is posted in order to receive multiple
+  packets on the buffer. A MPRQ buffer consists of multiple fixed-size strides
+  and each stride receives one packet.
+
+- ``mprq_max_memcpy_len`` parameter [int]
+  The maximum size of packet for memcpy in case of Multi-Packet Rx queue. Rx
+  packet is mem-copied to a user-provided mbuf if the size of Rx packet is less
+  than or equal to this parameter. Otherwise, the packet will be referenced by mbuf
+  indirection. In case of indirection, the Mempool for the direct mbufs will be
+  allocated and managed by PMD. The default value is 128.
+
+- ``rxqs_min_mprq`` parameter [int]
+  Configure Rx queues as Multi-Packet RQ if the total number of Rx queues is greater or
+  equal to this value. The default value is 12.
+
 - ``txq_inline`` parameter [int]
 
   Amount of data to be inlined during TX operations. Improves latency.
diff --git a/drivers/net/mlx5/Makefile b/drivers/net/mlx5/Makefile
index afda4118f..e5e276a71 100644
--- a/drivers/net/mlx5/Makefile
+++ b/drivers/net/mlx5/Makefile
@@ -125,6 +125,11 @@ mlx5_autoconf.h.new: FORCE
 mlx5_autoconf.h.new: $(RTE_SDK)/buildtools/auto-config-h.sh
 	$Q $(RM) -f -- '$@'
 	$Q sh -- '$<' '$@' \
+		HAVE_IBV_DEVICE_STRIDING_RQ_SUPPORT \
+		infiniband/mlx5dv.h \
+		enum MLX5DV_CONTEXT_MASK_STRIDING_RQ \
+		$(AUTOCONF_OUTPUT)
+	$Q sh -- '$<' '$@' \
 		HAVE_IBV_DEVICE_TUNNEL_SUPPORT \
 		infiniband/mlx5dv.h \
 		enum MLX5DV_CONTEXT_MASK_TUNNEL_OFFLOADS \
diff --git a/drivers/net/mlx5/mlx5.c b/drivers/net/mlx5/mlx5.c
index 61cb93101..25c0b5b1f 100644
--- a/drivers/net/mlx5/mlx5.c
+++ b/drivers/net/mlx5/mlx5.c
@@ -44,6 +44,18 @@
 /* Device parameter to enable RX completion queue compression. */
 #define MLX5_RXQ_CQE_COMP_EN "rxq_cqe_comp_en"
 
+/* Device parameter to enable Multi-Packet Rx queue. */
+#define MLX5_RX_MPRQ_EN "mprq_en"
+
+/* Device parameter to limit the size of memcpy'd packet. */
+#define MLX5_RX_MPRQ_MAX_MEMCPY_LEN "mprq_max_memcpy_len"
+
+/*
+ * Device parameter to set the minimum number of Rx queues to configure
+ * Multi-Packet Rx queue.
+ */
+#define MLX5_RXQS_MIN_MPRQ "rxqs_min_mprq"
+
 /* Device parameter to configure inline send. */
 #define MLX5_TXQ_INLINE "txq_inline"
 
@@ -383,6 +395,12 @@ mlx5_args_check(const char *key, const char *val, void *opaque)
 	}
 	if (strcmp(MLX5_RXQ_CQE_COMP_EN, key) == 0) {
 		config->cqe_comp = !!tmp;
+	} else if (strcmp(MLX5_RX_MPRQ_EN, key) == 0) {
+		config->mprq = !!tmp;
+	} else if (strcmp(MLX5_RX_MPRQ_MAX_MEMCPY_LEN, key) == 0) {
+		config->mprq_max_memcpy_len = tmp;
+	} else if (strcmp(MLX5_RXQS_MIN_MPRQ, key) == 0) {
+		config->rxqs_mprq = tmp;
 	} else if (strcmp(MLX5_TXQ_INLINE, key) == 0) {
 		config->txq_inline = tmp;
 	} else if (strcmp(MLX5_TXQS_MIN_INLINE, key) == 0) {
@@ -420,6 +438,9 @@ mlx5_args(struct mlx5_dev_config *config, struct rte_devargs *devargs)
 {
 	const char **params = (const char *[]){
 		MLX5_RXQ_CQE_COMP_EN,
+		MLX5_RX_MPRQ_EN,
+		MLX5_RX_MPRQ_MAX_MEMCPY_LEN,
+		MLX5_RXQS_MIN_MPRQ,
 		MLX5_TXQ_INLINE,
 		MLX5_TXQS_MIN_INLINE,
 		MLX5_TXQ_MPW_EN,
@@ -582,6 +603,7 @@ mlx5_pci_probe(struct rte_pci_driver *pci_drv, struct rte_pci_device *pci_dev)
 	unsigned int mps;
 	unsigned int cqe_comp;
 	unsigned int tunnel_en = 0;
+	unsigned int mprq = 0;
 	int idx;
 	int i;
 	struct mlx5dv_context attrs_out = {0};
@@ -664,6 +686,9 @@ mlx5_pci_probe(struct rte_pci_driver *pci_drv, struct rte_pci_device *pci_dev)
 #ifdef HAVE_IBV_DEVICE_TUNNEL_SUPPORT
 	attrs_out.comp_mask |= MLX5DV_CONTEXT_MASK_TUNNEL_OFFLOADS;
 #endif
+#ifdef HAVE_IBV_DEVICE_STRIDING_RQ_SUPPORT
+	attrs_out.comp_mask |= MLX5DV_CONTEXT_MASK_STRIDING_RQ;
+#endif
 	mlx5_glue->dv_query_device(attr_ctx, &attrs_out);
 	if (attrs_out.flags & MLX5DV_CONTEXT_FLAGS_MPW_ALLOWED) {
 		if (attrs_out.flags & MLX5DV_CONTEXT_FLAGS_ENHANCED_MPW) {
@@ -677,6 +702,37 @@ mlx5_pci_probe(struct rte_pci_driver *pci_drv, struct rte_pci_device *pci_dev)
 		DEBUG("MPW isn't supported");
 		mps = MLX5_MPW_DISABLED;
 	}
+#ifdef HAVE_IBV_DEVICE_STRIDING_RQ_SUPPORT
+	if (attrs_out.comp_mask & MLX5DV_CONTEXT_MASK_STRIDING_RQ) {
+		struct mlx5dv_striding_rq_caps mprq_caps =
+			attrs_out.striding_rq_caps;
+
+		DEBUG("\tmin_single_stride_log_num_of_bytes: %d",
+		      mprq_caps.min_single_stride_log_num_of_bytes);
+		DEBUG("\tmax_single_stride_log_num_of_bytes: %d",
+		      mprq_caps.max_single_stride_log_num_of_bytes);
+		DEBUG("\tmin_single_wqe_log_num_of_strides: %d",
+		      mprq_caps.min_single_wqe_log_num_of_strides);
+		DEBUG("\tmax_single_wqe_log_num_of_strides: %d",
+		      mprq_caps.max_single_wqe_log_num_of_strides);
+		DEBUG("\tsupported_qpts: %d",
+		      mprq_caps.supported_qpts);
+		if (mprq_caps.min_single_stride_log_num_of_bytes <=
+		    MLX5_MPRQ_MIN_STRIDE_SZ_N &&
+		    mprq_caps.max_single_stride_log_num_of_bytes >=
+		    MLX5_MPRQ_STRIDE_SZ_N &&
+		    mprq_caps.min_single_wqe_log_num_of_strides <=
+		    MLX5_MPRQ_MIN_STRIDE_NUM_N &&
+		    mprq_caps.max_single_wqe_log_num_of_strides >=
+		    MLX5_MPRQ_STRIDE_NUM_N) {
+			DEBUG("Multi-Packet RQ is supported");
+			mprq = 1;
+		} else {
+			DEBUG("Multi-Packet RQ isn't supported");
+			mprq = 0;
+		}
+	}
+#endif
 	if (RTE_CACHE_LINE_SIZE == 128 &&
 	    !(attrs_out.flags & MLX5DV_CONTEXT_FLAGS_CQE_128B_COMP))
 		cqe_comp = 0;
@@ -721,6 +777,9 @@ mlx5_pci_probe(struct rte_pci_driver *pci_drv, struct rte_pci_device *pci_dev)
 			.txq_inline = MLX5_ARG_UNSET,
 			.txqs_inline = MLX5_ARG_UNSET,
 			.inline_max_packet_sz = MLX5_ARG_UNSET,
+			.mprq = 0, /* Disable by default. */
+			.mprq_max_memcpy_len = MLX5_MPRQ_MEMCPY_DEFAULT_LEN,
+			.rxqs_mprq = MLX5_MPRQ_MIN_RXQS,
 		};
 
 		len = snprintf(name, sizeof(name), PCI_PRI_FMT,
@@ -891,6 +950,10 @@ mlx5_pci_probe(struct rte_pci_driver *pci_drv, struct rte_pci_device *pci_dev)
 			WARN("Rx CQE compression isn't supported");
 			config.cqe_comp = 0;
 		}
+		if (config.mprq && !mprq) {
+			WARN("Multi-Packet RQ isn't supported");
+			config.mprq = 0;
+		}
 		err = priv_uar_init_primary(priv);
 		if (err)
 			goto port_error;
diff --git a/drivers/net/mlx5/mlx5.h b/drivers/net/mlx5/mlx5.h
index 9ad0533fc..42632a7e5 100644
--- a/drivers/net/mlx5/mlx5.h
+++ b/drivers/net/mlx5/mlx5.h
@@ -88,6 +88,9 @@ struct mlx5_dev_config {
 	unsigned int tx_vec_en:1; /* Tx vector is enabled. */
 	unsigned int rx_vec_en:1; /* Rx vector is enabled. */
 	unsigned int mpw_hdr_dseg:1; /* Enable DSEGs in the title WQEBB. */
+	unsigned int mprq:1; /* Whether Multi-Packet RQ is supported. */
+	unsigned int mprq_max_memcpy_len; /* Maximum packet size to memcpy. */
+	unsigned int rxqs_mprq; /* Queue count threshold for Multi-Packet RQ. */
 	unsigned int tso_max_payload_sz; /* Maximum TCP payload for TSO. */
 	unsigned int ind_table_max_size; /* Maximum indirection table size. */
 	int txq_inline; /* Maximum packet size for inlining. */
diff --git a/drivers/net/mlx5/mlx5_defs.h b/drivers/net/mlx5/mlx5_defs.h
index c3334ca30..39cc1344a 100644
--- a/drivers/net/mlx5/mlx5_defs.h
+++ b/drivers/net/mlx5/mlx5_defs.h
@@ -95,4 +95,24 @@
  */
 #define MLX5_UAR_OFFSET (1ULL << 32)
 
+/* Log 2 of the size of a stride for Multi-Packet RQ. */
+#define MLX5_MPRQ_STRIDE_SZ_N 11
+#define MLX5_MPRQ_MIN_STRIDE_SZ_N 6
+
+/* Log 2 of the number of strides per WQE for Multi-Packet RQ. */
+#define MLX5_MPRQ_STRIDE_NUM_N 4
+#define MLX5_MPRQ_MIN_STRIDE_NUM_N 3
+
+/* Two-byte shift is disabled for Multi-Packet RQ. */
+#define MLX5_MPRQ_TWO_BYTE_SHIFT 0
+
+/* Minimum size of packet to be memcpy'd instead of indirection. */
+#define MLX5_MPRQ_MEMCPY_DEFAULT_LEN 128
+
+/* Minimum number Rx queues to enable Multi-Packet RQ. */
+#define MLX5_MPRQ_MIN_RXQS 12
+
+/* Cache size of mempool for Multi-Packet RQ. */
+#define MLX5_MPRQ_MP_CACHE_SZ 16
+
 #endif /* RTE_PMD_MLX5_DEFS_H_ */
diff --git a/drivers/net/mlx5/mlx5_ethdev.c b/drivers/net/mlx5/mlx5_ethdev.c
index b73cb53df..2729c3b62 100644
--- a/drivers/net/mlx5/mlx5_ethdev.c
+++ b/drivers/net/mlx5/mlx5_ethdev.c
@@ -494,6 +494,7 @@ mlx5_dev_supported_ptypes_get(struct rte_eth_dev *dev)
 	};
 
 	if (dev->rx_pkt_burst == mlx5_rx_burst ||
+	    dev->rx_pkt_burst == mlx5_rx_burst_mprq ||
 	    dev->rx_pkt_burst == mlx5_rx_burst_vec)
 		return ptypes;
 	return NULL;
@@ -1316,6 +1317,8 @@ priv_select_rx_function(struct priv *priv, __rte_unused struct rte_eth_dev *dev)
 	if (priv_check_vec_rx_support(priv) > 0) {
 		rx_pkt_burst = mlx5_rx_burst_vec;
 		DEBUG("selected RX vectorized function");
+	} else if (priv_mprq_enabled(priv)) {
+		rx_pkt_burst = mlx5_rx_burst_mprq;
 	}
 	return rx_pkt_burst;
 }
diff --git a/drivers/net/mlx5/mlx5_prm.h b/drivers/net/mlx5/mlx5_prm.h
index 9eb9c15e1..b7ad3454e 100644
--- a/drivers/net/mlx5/mlx5_prm.h
+++ b/drivers/net/mlx5/mlx5_prm.h
@@ -195,6 +195,21 @@ struct mlx5_mpw {
 	} data;
 };
 
+/* WQE for Multi-Packet RQ. */
+struct mlx5_wqe_mprq {
+	struct mlx5_wqe_srq_next_seg next_seg;
+	struct mlx5_wqe_data_seg dseg;
+};
+
+#define MLX5_MPRQ_LEN_MASK 0x000ffff
+#define MLX5_MPRQ_LEN_SHIFT 0
+#define MLX5_MPRQ_STRIDE_NUM_MASK 0x7fff0000
+#define MLX5_MPRQ_STRIDE_NUM_SHIFT 16
+#define MLX5_MPRQ_FILLER_MASK 0x80000000
+#define MLX5_MPRQ_FILLER_SHIFT 31
+
+#define MLX5_MPRQ_STRIDE_SHIFT_BYTE 2
+
 /* CQ element structure - should be equal to the cache line size */
 struct mlx5_cqe {
 #if (RTE_CACHE_LINE_SIZE == 128)
diff --git a/drivers/net/mlx5/mlx5_rxq.c b/drivers/net/mlx5/mlx5_rxq.c
index 238fa7e56..8fa56a53a 100644
--- a/drivers/net/mlx5/mlx5_rxq.c
+++ b/drivers/net/mlx5/mlx5_rxq.c
@@ -55,7 +55,73 @@ uint8_t rss_hash_default_key[] = {
 const size_t rss_hash_default_key_len = sizeof(rss_hash_default_key);
 
 /**
- * Allocate RX queue elements.
+ * Check whether Multi-Packet RQ can be enabled for the device.
+ *
+ * @param priv
+ *   Pointer to private structure.
+ *
+ * @return
+ *   1 if supported, negative errno value if not.
+ */
+inline int
+priv_check_mprq_support(struct priv *priv)
+{
+	if (priv->config.mprq && priv->rxqs_n >= priv->config.rxqs_mprq)
+		return 1;
+	return -ENOTSUP;
+}
+
+/**
+ * Check whether Multi-Packet RQ is enabled for the Rx queue.
+ *
+ *  @param rxq
+ *     Pointer to receive queue structure.
+ *
+ * @return
+ *   0 if disabled, otherwise enabled.
+ */
+static inline int
+rxq_mprq_enabled(struct mlx5_rxq_data *rxq)
+{
+	return rxq->mprq_mp != NULL;
+}
+
+/**
+ * Check whether Multi-Packet RQ is enabled for the device.
+ *
+ * @param priv
+ *   Pointer to private structure.
+ *
+ * @return
+ *   0 if disabled, otherwise enabled.
+ */
+inline int
+priv_mprq_enabled(struct priv *priv)
+{
+	uint16_t i;
+	uint16_t n = 0;
+
+	if (priv_check_mprq_support(priv) < 0)
+		return 0;
+	/* All the configured queues should be enabled. */
+	for (i = 0; i < priv->rxqs_n; ++i) {
+		struct mlx5_rxq_data *rxq = (*priv->rxqs)[i];
+
+		if (!rxq)
+			continue;
+		if (rxq_mprq_enabled(rxq))
+			++n;
+	}
+	if (n == priv->rxqs_n)
+		return 1;
+	if (n != 0)
+		ERROR("Multi-Packet RQ can't be partially configured, %u/%u",
+		      n, priv->rxqs_n);
+	return 0;
+}
+
+/**
+ * Allocate RX queue elements for Multi-Packet RQ.
  *
  * @param rxq_ctrl
  *   Pointer to RX queue structure.
@@ -63,8 +129,57 @@ const size_t rss_hash_default_key_len = sizeof(rss_hash_default_key);
  * @return
  *   0 on success, errno value on failure.
  */
-int
-rxq_alloc_elts(struct mlx5_rxq_ctrl *rxq_ctrl)
+static int
+rxq_alloc_elts_mprq(struct mlx5_rxq_ctrl *rxq_ctrl)
+{
+	struct mlx5_rxq_data *rxq = &rxq_ctrl->rxq;
+	unsigned int wqe_n = 1 << rxq->elts_n;
+	unsigned int i;
+	int ret = 0;
+
+	/* Iterate on segments. */
+	for (i = 0; i <= wqe_n; ++i) {
+		struct rte_mbuf *buf;
+
+		if (rte_mempool_get(rxq->mprq_mp, (void **)&buf) < 0) {
+			ERROR("%p: empty mbuf pool", (void *)rxq_ctrl);
+			ret = ENOMEM;
+			goto error;
+		}
+		if (i < wqe_n)
+			(*rxq->elts)[i] = buf;
+		else
+			rxq->mprq_repl = buf;
+		PORT(buf) = rxq->port_id;
+	}
+	DEBUG("%p: allocated and configured %u segments",
+	      (void *)rxq_ctrl, wqe_n);
+	assert(ret == 0);
+	return 0;
+error:
+	wqe_n = i;
+	for (i = 0; (i != wqe_n); ++i) {
+		if ((*rxq->elts)[i] != NULL)
+			rte_mempool_put(rxq->mprq_mp,
+					(*rxq->elts)[i]);
+		(*rxq->elts)[i] = NULL;
+	}
+	DEBUG("%p: failed, freed everything", (void *)rxq_ctrl);
+	assert(ret > 0);
+	return ret;
+}
+
+/**
+ * Allocate RX queue elements for Single-Packet RQ.
+ *
+ * @param rxq_ctrl
+ *   Pointer to RX queue structure.
+ *
+ * @return
+ *   0 on success, errno value on failure.
+ */
+static int
+rxq_alloc_elts_sprq(struct mlx5_rxq_ctrl *rxq_ctrl)
 {
 	const unsigned int sges_n = 1 << rxq_ctrl->rxq.sges_n;
 	unsigned int elts_n = 1 << rxq_ctrl->rxq.elts_n;
@@ -135,6 +250,22 @@ rxq_alloc_elts(struct mlx5_rxq_ctrl *rxq_ctrl)
 }
 
 /**
+ * Allocate RX queue elements.
+ *
+ * @param rxq_ctrl
+ *   Pointer to RX queue structure.
+ *
+ * @return
+ *   0 on success, errno value on failure.
+ */
+int
+rxq_alloc_elts(struct mlx5_rxq_ctrl *rxq_ctrl)
+{
+	return rxq_mprq_enabled(&rxq_ctrl->rxq) ?
+	       rxq_alloc_elts_mprq(rxq_ctrl) : rxq_alloc_elts_sprq(rxq_ctrl);
+}
+
+/**
  * Free RX queue elements.
  *
  * @param rxq_ctrl
@@ -166,6 +297,10 @@ rxq_free_elts(struct mlx5_rxq_ctrl *rxq_ctrl)
 			rte_pktmbuf_free_seg((*rxq->elts)[i]);
 		(*rxq->elts)[i] = NULL;
 	}
+	if (rxq->mprq_repl != NULL) {
+		rte_pktmbuf_free_seg(rxq->mprq_repl);
+		rxq->mprq_repl = NULL;
+	}
 }
 
 /**
@@ -613,10 +748,16 @@ mlx5_priv_rxq_ibv_new(struct priv *priv, uint16_t idx)
 			struct ibv_cq_init_attr_ex ibv;
 			struct mlx5dv_cq_init_attr mlx5;
 		} cq;
-		struct ibv_wq_init_attr wq;
+		struct {
+			struct ibv_wq_init_attr ibv;
+#ifdef HAVE_IBV_DEVICE_STRIDING_RQ_SUPPORT
+			struct mlx5dv_wq_init_attr mlx5;
+#endif
+		} wq;
 		struct ibv_cq_ex cq_attr;
 	} attr;
-	unsigned int cqe_n = (1 << rxq_data->elts_n) - 1;
+	unsigned int cqe_n;
+	unsigned int wqe_n = 1 << rxq_data->elts_n;
 	struct mlx5_rxq_ibv *tmpl;
 	struct mlx5dv_cq cq_info;
 	struct mlx5dv_rwq rwq;
@@ -624,6 +765,7 @@ mlx5_priv_rxq_ibv_new(struct priv *priv, uint16_t idx)
 	int ret = 0;
 	struct mlx5dv_obj obj;
 	struct mlx5_dev_config *config = &priv->config;
+	const int mprq_en = rxq_mprq_enabled(rxq_data);
 
 	assert(rxq_data);
 	assert(!rxq_ctrl->ibv);
@@ -646,6 +788,17 @@ mlx5_priv_rxq_ibv_new(struct priv *priv, uint16_t idx)
 			goto error;
 		}
 	}
+	if (mprq_en) {
+		tmpl->mprq_mr = priv_mr_get(priv, rxq_data->mprq_mp);
+		if (!tmpl->mprq_mr) {
+			tmpl->mprq_mr = priv_mr_new(priv, rxq_data->mprq_mp);
+			if (!tmpl->mprq_mr) {
+				ERROR("%p: cannot create MR for"
+				      " Multi-Packet RQ", (void *)rxq_ctrl);
+				goto error;
+			}
+		}
+	}
 	if (rxq_ctrl->irq) {
 		tmpl->channel = mlx5_glue->create_comp_channel(priv->ctx);
 		if (!tmpl->channel) {
@@ -654,6 +807,10 @@ mlx5_priv_rxq_ibv_new(struct priv *priv, uint16_t idx)
 			goto error;
 		}
 	}
+	if (mprq_en)
+		cqe_n = wqe_n * (1 << MLX5_MPRQ_STRIDE_NUM_N) - 1;
+	else
+		cqe_n = wqe_n  - 1;
 	attr.cq.ibv = (struct ibv_cq_init_attr_ex){
 		.cqe = cqe_n,
 		.channel = tmpl->channel,
@@ -686,11 +843,11 @@ mlx5_priv_rxq_ibv_new(struct priv *priv, uint16_t idx)
 	      priv->device_attr.orig_attr.max_qp_wr);
 	DEBUG("priv->device_attr.max_sge is %d",
 	      priv->device_attr.orig_attr.max_sge);
-	attr.wq = (struct ibv_wq_init_attr){
+	attr.wq.ibv = (struct ibv_wq_init_attr){
 		.wq_context = NULL, /* Could be useful in the future. */
 		.wq_type = IBV_WQT_RQ,
 		/* Max number of outstanding WRs. */
-		.max_wr = (1 << rxq_data->elts_n) >> rxq_data->sges_n,
+		.max_wr = wqe_n >> rxq_data->sges_n,
 		/* Max number of scatter/gather elements in a WR. */
 		.max_sge = 1 << rxq_data->sges_n,
 		.pd = priv->pd,
@@ -704,8 +861,8 @@ mlx5_priv_rxq_ibv_new(struct priv *priv, uint16_t idx)
 	};
 	/* By default, FCS (CRC) is stripped by hardware. */
 	if (rxq_data->crc_present) {
-		attr.wq.create_flags |= IBV_WQ_FLAGS_SCATTER_FCS;
-		attr.wq.comp_mask |= IBV_WQ_INIT_ATTR_FLAGS;
+		attr.wq.ibv.create_flags |= IBV_WQ_FLAGS_SCATTER_FCS;
+		attr.wq.ibv.comp_mask |= IBV_WQ_INIT_ATTR_FLAGS;
 	}
 #ifdef HAVE_IBV_WQ_FLAG_RX_END_PADDING
 	if (config->hw_padding) {
@@ -713,7 +870,26 @@ mlx5_priv_rxq_ibv_new(struct priv *priv, uint16_t idx)
 		attr.wq.comp_mask |= IBV_WQ_INIT_ATTR_FLAGS;
 	}
 #endif
-	tmpl->wq = mlx5_glue->create_wq(priv->ctx, &attr.wq);
+#ifdef HAVE_IBV_DEVICE_STRIDING_RQ_SUPPORT
+	attr.wq.mlx5 = (struct mlx5dv_wq_init_attr){
+		.comp_mask = 0,
+	};
+	if (mprq_en) {
+		struct mlx5dv_striding_rq_init_attr *mprq_attr =
+			&attr.wq.mlx5.striding_rq_attrs;
+
+		attr.wq.mlx5.comp_mask |= MLX5DV_WQ_INIT_ATTR_MASK_STRIDING_RQ;
+		*mprq_attr = (struct mlx5dv_striding_rq_init_attr){
+			.single_stride_log_num_of_bytes = MLX5_MPRQ_STRIDE_SZ_N,
+			.single_wqe_log_num_of_strides = MLX5_MPRQ_STRIDE_NUM_N,
+			.two_byte_shift_en = MLX5_MPRQ_TWO_BYTE_SHIFT,
+		};
+	}
+	tmpl->wq = mlx5_glue->dv_create_wq(priv->ctx, &attr.wq.ibv,
+					   &attr.wq.mlx5);
+#else
+	tmpl->wq = mlx5_glue->create_wq(priv->ctx, &attr.wq.ibv);
+#endif
 	if (tmpl->wq == NULL) {
 		ERROR("%p: WQ creation failure", (void *)rxq_ctrl);
 		goto error;
@@ -722,14 +898,13 @@ mlx5_priv_rxq_ibv_new(struct priv *priv, uint16_t idx)
 	 * Make sure number of WRs*SGEs match expectations since a queue
 	 * cannot allocate more than "desc" buffers.
 	 */
-	if (((int)attr.wq.max_wr !=
-	     ((1 << rxq_data->elts_n) >> rxq_data->sges_n)) ||
-	    ((int)attr.wq.max_sge != (1 << rxq_data->sges_n))) {
+	if (attr.wq.ibv.max_wr != (wqe_n >> rxq_data->sges_n) ||
+	    (int)attr.wq.ibv.max_sge != (1 << rxq_data->sges_n)) {
 		ERROR("%p: requested %u*%u but got %u*%u WRs*SGEs",
 		      (void *)rxq_ctrl,
-		      ((1 << rxq_data->elts_n) >> rxq_data->sges_n),
+		      wqe_n >> rxq_data->sges_n,
 		      (1 << rxq_data->sges_n),
-		      attr.wq.max_wr, attr.wq.max_sge);
+		      attr.wq.ibv.max_wr, attr.wq.ibv.max_sge);
 		goto error;
 	}
 	/* Change queue state to ready. */
@@ -756,25 +931,38 @@ mlx5_priv_rxq_ibv_new(struct priv *priv, uint16_t idx)
 		goto error;
 	}
 	/* Fill the rings. */
-	rxq_data->wqes = (volatile struct mlx5_wqe_data_seg (*)[])
-		(uintptr_t)rwq.buf;
-	for (i = 0; (i != (unsigned int)(1 << rxq_data->elts_n)); ++i) {
+	rxq_data->wqes = rwq.buf;
+	for (i = 0; (i != wqe_n); ++i) {
+		volatile struct mlx5_wqe_data_seg *scat;
 		struct rte_mbuf *buf = (*rxq_data->elts)[i];
-		volatile struct mlx5_wqe_data_seg *scat = &(*rxq_data->wqes)[i];
-
+		uintptr_t addr = rte_pktmbuf_mtod(buf, uintptr_t);
+		uint32_t byte_count;
+		uint32_t lkey;
+
+		if (mprq_en) {
+			scat = &((volatile struct mlx5_wqe_mprq *)
+				 rxq_data->wqes)[i].dseg;
+			byte_count = (1 << MLX5_MPRQ_STRIDE_SZ_N) *
+				     (1 << MLX5_MPRQ_STRIDE_NUM_N);
+			lkey = tmpl->mprq_mr->lkey;
+		} else {
+			scat = &((volatile struct mlx5_wqe_data_seg *)
+				 rxq_data->wqes)[i];
+			byte_count = DATA_LEN(buf);
+			lkey = tmpl->mr->lkey;
+		}
 		/* scat->addr must be able to store a pointer. */
 		assert(sizeof(scat->addr) >= sizeof(uintptr_t));
 		*scat = (struct mlx5_wqe_data_seg){
-			.addr = rte_cpu_to_be_64(rte_pktmbuf_mtod(buf,
-								  uintptr_t)),
-			.byte_count = rte_cpu_to_be_32(DATA_LEN(buf)),
-			.lkey = tmpl->mr->lkey,
+			.addr = rte_cpu_to_be_64(addr),
+			.byte_count = rte_cpu_to_be_32(byte_count),
+			.lkey = lkey
 		};
 	}
 	rxq_data->rq_db = rwq.dbrec;
 	rxq_data->cqe_n = log2above(cq_info.cqe_cnt);
 	rxq_data->cq_ci = 0;
-	rxq_data->rq_ci = 0;
+	rxq_data->strd_ci = 0;
 	rxq_data->rq_pi = 0;
 	rxq_data->zip = (struct rxq_zip){
 		.ai = 0,
@@ -785,7 +973,7 @@ mlx5_priv_rxq_ibv_new(struct priv *priv, uint16_t idx)
 	rxq_data->cqn = cq_info.cqn;
 	rxq_data->cq_arm_sn = 0;
 	/* Update doorbell counter. */
-	rxq_data->rq_ci = (1 << rxq_data->elts_n) >> rxq_data->sges_n;
+	rxq_data->rq_ci = wqe_n >> rxq_data->sges_n;
 	rte_wmb();
 	*rxq_data->rq_db = rte_cpu_to_be_32(rxq_data->rq_ci);
 	DEBUG("%p: rxq updated with %p", (void *)rxq_ctrl, (void *)&tmpl);
@@ -802,6 +990,8 @@ mlx5_priv_rxq_ibv_new(struct priv *priv, uint16_t idx)
 		claim_zero(mlx5_glue->destroy_cq(tmpl->cq));
 	if (tmpl->channel)
 		claim_zero(mlx5_glue->destroy_comp_channel(tmpl->channel));
+	if (tmpl->mprq_mr)
+		priv_mr_release(priv, tmpl->mprq_mr);
 	if (tmpl->mr)
 		priv_mr_release(priv, tmpl->mr);
 	priv->verbs_alloc_ctx.type = MLX5_VERBS_ALLOC_TYPE_NONE;
@@ -832,6 +1022,8 @@ mlx5_priv_rxq_ibv_get(struct priv *priv, uint16_t idx)
 	rxq_ctrl = container_of(rxq_data, struct mlx5_rxq_ctrl, rxq);
 	if (rxq_ctrl->ibv) {
 		priv_mr_get(priv, rxq_data->mp);
+		if (rxq_mprq_enabled(rxq_data))
+			priv_mr_get(priv, rxq_data->mprq_mp);
 		rte_atomic32_inc(&rxq_ctrl->ibv->refcnt);
 		DEBUG("%p: Verbs Rx queue %p: refcnt %d", (void *)priv,
 		      (void *)rxq_ctrl->ibv,
@@ -863,6 +1055,11 @@ mlx5_priv_rxq_ibv_release(struct priv *priv, struct mlx5_rxq_ibv *rxq_ibv)
 	ret = priv_mr_release(priv, rxq_ibv->mr);
 	if (!ret)
 		rxq_ibv->mr = NULL;
+	if (rxq_mprq_enabled(&rxq_ibv->rxq_ctrl->rxq)) {
+		ret = priv_mr_release(priv, rxq_ibv->mprq_mr);
+		if (!ret)
+			rxq_ibv->mprq_mr = NULL;
+	}
 	DEBUG("%p: Verbs Rx queue %p: refcnt %d", (void *)priv,
 	      (void *)rxq_ibv, rte_atomic32_read(&rxq_ibv->refcnt));
 	if (rte_atomic32_dec_and_test(&rxq_ibv->refcnt)) {
@@ -918,12 +1115,99 @@ mlx5_priv_rxq_ibv_releasable(struct priv *priv, struct mlx5_rxq_ibv *rxq_ibv)
 }
 
 /**
+ * Callback function to initialize mbufs for Multi-Packet RQ.
+ */
+static inline void
+mlx5_mprq_mbuf_init(struct rte_mempool *mp, void *opaque_arg,
+		    void *_m, unsigned int i __rte_unused)
+{
+	struct rte_mbuf *m = _m;
+
+	rte_pktmbuf_init(mp, opaque_arg, _m, i);
+	m->buf_len =
+		(1 << MLX5_MPRQ_STRIDE_SZ_N) * (1 << MLX5_MPRQ_STRIDE_NUM_N);
+	rte_pktmbuf_reset_headroom(m);
+}
+
+/**
+ * Configure Rx queue as Multi-Packet RQ.
+ *
+ * @param rxq_ctrl
+ *   Pointer to RX queue structure.
+ * @param priv
+ *   Pointer to private structure.
+ * @param idx
+ *   RX queue index.
+ * @param desc
+ *   Number of descriptors to configure in queue.
+ *
+ * @return
+ *   0 on success, negative errno value on failure.
+ */
+static int
+rxq_configure_mprq(struct mlx5_rxq_ctrl *rxq_ctrl, uint16_t idx, uint16_t desc)
+{
+	struct priv *priv = rxq_ctrl->priv;
+	struct mlx5_dev_config *config = &priv->config;
+	struct rte_mempool *mp;
+	char name[RTE_MEMPOOL_NAMESIZE];
+	unsigned int buf_len;
+	unsigned int obj_size;
+
+	assert(rxq_ctrl->rxq.sges_n == 0);
+	rxq_ctrl->rxq.strd_sz_n =
+		MLX5_MPRQ_STRIDE_SZ_N - MLX5_MPRQ_MIN_STRIDE_SZ_N;
+	rxq_ctrl->rxq.strd_num_n =
+		MLX5_MPRQ_STRIDE_NUM_N - MLX5_MPRQ_MIN_STRIDE_NUM_N;
+	rxq_ctrl->rxq.strd_shift_en = MLX5_MPRQ_TWO_BYTE_SHIFT;
+	rxq_ctrl->rxq.mprq_max_memcpy_len = config->mprq_max_memcpy_len;
+	buf_len = (1 << MLX5_MPRQ_STRIDE_SZ_N) * (1 << MLX5_MPRQ_STRIDE_NUM_N) +
+		  RTE_PKTMBUF_HEADROOM;
+	obj_size = buf_len + sizeof(struct rte_mbuf);
+	snprintf(name, sizeof(name), "%s-mprq-%u", priv->dev->data->name, idx);
+	/*
+	 * Allocate per-queue Mempool for Multi-Packet RQ.
+	 *
+	 * Received packets can be either memcpy'd or indirectly referenced. In
+	 * case of mbuf indirection, as it isn't possible to predict how the
+	 * buffers will be queued by application, there's no option to exactly
+	 * pre-allocate needed buffers in advance but to speculatively prepares
+	 * enough buffers.
+	 *
+	 * In the data path, if this Mempool is depleted, PMD will try to memcpy
+	 * received packets to buffers provided by application (rxq->mp) until
+	 * this Mempool gets available again.
+	 */
+	desc *= 4;
+	mp = rte_mempool_create(name, desc + MLX5_MPRQ_MP_CACHE_SZ,
+				obj_size, MLX5_MPRQ_MP_CACHE_SZ,
+				sizeof(struct rte_pktmbuf_pool_private),
+				NULL, NULL, NULL, NULL,
+				priv->dev->device->numa_node,
+				MEMPOOL_F_SC_GET);
+	if (mp == NULL) {
+		ERROR("%p: failed to allocate a mempool for"
+		      " multi-packet Rx queue (%u): %s",
+		      (void *)priv->dev, idx,
+		      rte_strerror(rte_errno));
+		return -ENOMEM;
+	}
+
+	rte_pktmbuf_pool_init(mp, NULL);
+	rte_mempool_obj_iter(mp, mlx5_mprq_mbuf_init, NULL);
+	rxq_ctrl->rxq.mprq_mp = mp;
+	DEBUG("%p: Multi-Packet RQ is enabled for Rx queue %u",
+	      (void *)priv->dev, idx);
+	return 0;
+}
+
+/**
  * Create a DPDK Rx queue.
  *
  * @param priv
  *   Pointer to private structure.
  * @param idx
- *   TX queue index.
+ *   RX queue index.
  * @param desc
  *   Number of descriptors to configure in queue.
  * @param socket
@@ -945,8 +1229,9 @@ mlx5_priv_rxq_new(struct priv *priv, uint16_t idx, uint16_t desc,
 	 * Always allocate extra slots, even if eventually
 	 * the vector Rx will not be used.
 	 */
-	const uint16_t desc_n =
+	uint16_t desc_n =
 		desc + config->rx_vec_en * MLX5_VPMD_DESCS_PER_LOOP;
+	const int mprq_en = priv_check_mprq_support(priv) > 0;
 
 	tmpl = rte_calloc_socket("RXQ", 1,
 				 sizeof(*tmpl) +
@@ -954,13 +1239,35 @@ mlx5_priv_rxq_new(struct priv *priv, uint16_t idx, uint16_t desc,
 				 0, socket);
 	if (!tmpl)
 		return NULL;
+	tmpl->priv = priv;
 	tmpl->socket = socket;
 	if (priv->dev->data->dev_conf.intr_conf.rxq)
 		tmpl->irq = 1;
-	/* Enable scattered packets support for this queue if necessary. */
+	/*
+	 * This Rx queue can be configured as a Multi-Packet RQ if all of the
+	 * following conditions are met:
+	 *  - MPRQ is enabled.
+	 *  - The number of descs is more than the number of strides.
+	 *  - max_rx_pkt_len is less than the size of a stride sparing headroom.
+	 *
+	 *  Otherwise, enable Rx scatter if necessary.
+	 */
 	assert(mb_len >= RTE_PKTMBUF_HEADROOM);
-	if (dev->data->dev_conf.rxmode.max_rx_pkt_len <=
-	    (mb_len - RTE_PKTMBUF_HEADROOM)) {
+	if (mprq_en &&
+	    desc >= (1U << MLX5_MPRQ_STRIDE_NUM_N) &&
+	    dev->data->dev_conf.rxmode.max_rx_pkt_len <=
+	    (1U << MLX5_MPRQ_STRIDE_SZ_N) - RTE_PKTMBUF_HEADROOM) {
+		int ret;
+
+		/* TODO: Rx scatter isn't supported yet. */
+		tmpl->rxq.sges_n = 0;
+		/* Trim the number of descs needed. */
+		desc >>= MLX5_MPRQ_STRIDE_NUM_N;
+		ret = rxq_configure_mprq(tmpl, idx, desc);
+		if (ret)
+			goto error;
+	} else if (dev->data->dev_conf.rxmode.max_rx_pkt_len <=
+		   (mb_len - RTE_PKTMBUF_HEADROOM)) {
 		tmpl->rxq.sges_n = 0;
 	} else if (conf->offloads & DEV_RX_OFFLOAD_SCATTER) {
 		unsigned int size =
@@ -1030,7 +1337,6 @@ mlx5_priv_rxq_new(struct priv *priv, uint16_t idx, uint16_t desc,
 	/* Save port ID. */
 	tmpl->rxq.rss_hash = priv->rxqs_n > 1;
 	tmpl->rxq.port_id = dev->data->port_id;
-	tmpl->priv = priv;
 	tmpl->rxq.mp = mp;
 	tmpl->rxq.stats.idx = idx;
 	tmpl->rxq.elts_n = log2above(desc);
@@ -1105,6 +1411,25 @@ mlx5_priv_rxq_release(struct priv *priv, uint16_t idx)
 	DEBUG("%p: Rx queue %p: refcnt %d", (void *)priv,
 	      (void *)rxq_ctrl, rte_atomic32_read(&rxq_ctrl->refcnt));
 	if (rte_atomic32_dec_and_test(&rxq_ctrl->refcnt)) {
+		if (rxq_ctrl->rxq.mprq_mp != NULL) {
+			/* If a mbuf in the pool has an indirect mbuf attached
+			 * and it is still in use by application, destroying
+			 * the Rx qeueue can spoil the packet. It is unlikely
+			 * to happen but if application dynamically creates and
+			 * destroys with holding Rx packets, this can happen.
+			 *
+			 * TODO: It is unavoidable for now because the Mempool
+			 * for Multi-Packet RQ isn't provided by application but
+			 * managed by PMD.
+			 */
+			if (!rte_mempool_full(rxq_ctrl->rxq.mprq_mp)) {
+				ERROR("Mempool for Multi-Packet RQ %p"
+				      " is still in use", (void *)rxq_ctrl);
+				return EBUSY;
+			}
+			rte_mempool_free(rxq_ctrl->rxq.mprq_mp);
+			rxq_ctrl->rxq.mprq_mp = NULL;
+		}
 		LIST_REMOVE(rxq_ctrl, next);
 		rte_free(rxq_ctrl);
 		(*priv->rxqs)[idx] = NULL;
diff --git a/drivers/net/mlx5/mlx5_rxtx.c b/drivers/net/mlx5/mlx5_rxtx.c
index 36eeefb49..49254ab59 100644
--- a/drivers/net/mlx5/mlx5_rxtx.c
+++ b/drivers/net/mlx5/mlx5_rxtx.c
@@ -1800,7 +1800,8 @@ mlx5_rx_burst(void *dpdk_rxq, struct rte_mbuf **pkts, uint16_t pkts_n)
 
 	while (pkts_n) {
 		unsigned int idx = rq_ci & wqe_cnt;
-		volatile struct mlx5_wqe_data_seg *wqe = &(*rxq->wqes)[idx];
+		volatile struct mlx5_wqe_data_seg *wqe =
+			&((volatile struct mlx5_wqe_data_seg *)rxq->wqes)[idx];
 		struct rte_mbuf *rep = (*rxq->elts)[idx];
 		uint32_t rss_hash_res = 0;
 
@@ -1901,6 +1902,155 @@ mlx5_rx_burst(void *dpdk_rxq, struct rte_mbuf **pkts, uint16_t pkts_n)
 }
 
 /**
+ * DPDK callback for RX with Multi-Packet RQ support.
+ *
+ * @param dpdk_rxq
+ *   Generic pointer to RX queue structure.
+ * @param[out] pkts
+ *   Array to store received packets.
+ * @param pkts_n
+ *   Maximum number of packets in array.
+ *
+ * @return
+ *   Number of packets successfully received (<= pkts_n).
+ */
+uint16_t
+mlx5_rx_burst_mprq(void *dpdk_rxq, struct rte_mbuf **pkts, uint16_t pkts_n)
+{
+	struct mlx5_rxq_data *rxq = dpdk_rxq;
+	const unsigned int strd_n =
+		1 << (rxq->strd_num_n + MLX5_MPRQ_MIN_STRIDE_NUM_N);
+	const unsigned int strd_sz =
+		1 << (rxq->strd_sz_n + MLX5_MPRQ_MIN_STRIDE_SZ_N);
+	const unsigned int strd_shift =
+		MLX5_MPRQ_STRIDE_SHIFT_BYTE * rxq->strd_shift_en;
+	const unsigned int cq_mask = (1 << rxq->cqe_n) - 1;
+	const unsigned int wq_mask = (1 << rxq->elts_n) - 1;
+	volatile struct mlx5_cqe *cqe = &(*rxq->cqes)[rxq->cq_ci & cq_mask];
+	unsigned int i = 0;
+	uint16_t rq_ci = rxq->rq_ci;
+	uint16_t strd_idx = rxq->strd_ci;
+	struct rte_mbuf *buf = (*rxq->elts)[rq_ci & wq_mask];
+
+	while (i < pkts_n) {
+		struct rte_mbuf *pkt;
+		int ret;
+		unsigned int len;
+		uint16_t consumed_strd;
+		uint32_t offset;
+		uint32_t byte_cnt;
+		uint32_t rss_hash_res = 0;
+
+		if (strd_idx == strd_n) {
+			/* Replace WQE only if the buffer is still in use. */
+			if (unlikely(rte_mbuf_refcnt_read(buf) > 1)) {
+				struct rte_mbuf *rep = rxq->mprq_repl;
+				volatile struct mlx5_wqe_data_seg *wqe =
+					&((volatile struct mlx5_wqe_mprq *)
+					  rxq->wqes)[rq_ci & wq_mask].dseg;
+				uintptr_t addr;
+
+				/* Replace mbuf. */
+				(*rxq->elts)[rq_ci & wq_mask] = rep;
+				PORT(rep) = PORT(buf);
+				/* Release the old buffer. */
+				if (__rte_mbuf_refcnt_update(buf, -1) == 0) {
+					rte_mbuf_refcnt_set(buf, 1);
+					rte_mbuf_raw_free(buf);
+				}
+				/* Replace WQE. */
+				addr = rte_pktmbuf_mtod(rep, uintptr_t);
+				wqe->addr = rte_cpu_to_be_64(addr);
+				/* Stash a mbuf for next replacement. */
+				if (likely(!rte_mempool_get(rxq->mprq_mp,
+							    (void **)&rep)))
+					rxq->mprq_repl = rep;
+				else
+					rxq->mprq_repl = NULL;
+			}
+			/* Advance to the next WQE. */
+			strd_idx = 0;
+			++rq_ci;
+			buf = (*rxq->elts)[rq_ci & wq_mask];
+		}
+		cqe = &(*rxq->cqes)[rxq->cq_ci & cq_mask];
+		ret = mlx5_rx_poll_len(rxq, cqe, cq_mask, &rss_hash_res);
+		if (!ret)
+			break;
+		if (unlikely(ret == -1)) {
+			/* RX error, packet is likely too large. */
+			++rxq->stats.idropped;
+			continue;
+		}
+		byte_cnt = ret;
+		offset = strd_idx * strd_sz + strd_shift;
+		consumed_strd = (byte_cnt & MLX5_MPRQ_STRIDE_NUM_MASK) >>
+				MLX5_MPRQ_STRIDE_NUM_SHIFT;
+		strd_idx += consumed_strd;
+		if (byte_cnt & MLX5_MPRQ_FILLER_MASK)
+			continue;
+		pkt = rte_pktmbuf_alloc(rxq->mp);
+		if (unlikely(pkt == NULL)) {
+			++rxq->stats.rx_nombuf;
+			break;
+		}
+		len = (byte_cnt & MLX5_MPRQ_LEN_MASK) >> MLX5_MPRQ_LEN_SHIFT;
+		assert((int)len >= (rxq->crc_present << 2));
+		if (rxq->crc_present)
+			len -= ETHER_CRC_LEN;
+		/*
+		 * Memcpy packets to the target mbuf if:
+		 * - The size of packet is smaller than MLX5_MPRQ_MEMCPY_LEN.
+		 * - Out of buffer in the Mempool for Multi-Packet RQ.
+		 */
+		if (len <= rxq->mprq_max_memcpy_len || rxq->mprq_repl == NULL) {
+			uintptr_t base = rte_pktmbuf_mtod(buf, uintptr_t);
+
+			rte_memcpy(rte_pktmbuf_mtod(pkt, void *),
+				   (void *)(base + offset), len);
+			/* Initialize the offload flag. */
+			pkt->ol_flags = 0;
+		} else {
+			/*
+			 * IND_ATTACHED_MBUF will be set to pkt->ol_flags when
+			 * attaching the mbuf and more offload flags will be
+			 * added below by calling rxq_cq_to_mbuf(). Other fields
+			 * will be overwritten.
+			 */
+			rte_pktmbuf_attach_at(pkt, buf, offset,
+					      consumed_strd * strd_sz);
+			assert(pkt->ol_flags == IND_ATTACHED_MBUF);
+			rte_pktmbuf_reset_headroom(pkt);
+		}
+		rxq_cq_to_mbuf(rxq, pkt, cqe, rss_hash_res);
+		PKT_LEN(pkt) = len;
+		DATA_LEN(pkt) = len;
+#ifdef MLX5_PMD_SOFT_COUNTERS
+		/* Increment bytes counter. */
+		rxq->stats.ibytes += PKT_LEN(pkt);
+#endif
+		/* Return packet. */
+		*(pkts++) = pkt;
+		++i;
+	}
+	/* Update the consumer index. */
+	rxq->rq_pi += i;
+	rxq->strd_ci = strd_idx;
+	rte_io_wmb();
+	*rxq->cq_db = rte_cpu_to_be_32(rxq->cq_ci);
+	if (rq_ci != rxq->rq_ci) {
+		rxq->rq_ci = rq_ci;
+		rte_io_wmb();
+		*rxq->rq_db = rte_cpu_to_be_32(rxq->rq_ci);
+	}
+#ifdef MLX5_PMD_SOFT_COUNTERS
+	/* Increment packets counter. */
+	rxq->stats.ipackets += i;
+#endif
+	return i;
+}
+
+/**
  * Dummy DPDK callback for TX.
  *
  * This function is used to temporarily replace the real callback during
diff --git a/drivers/net/mlx5/mlx5_rxtx.h b/drivers/net/mlx5/mlx5_rxtx.h
index d7e890558..ba8ac32c2 100644
--- a/drivers/net/mlx5/mlx5_rxtx.h
+++ b/drivers/net/mlx5/mlx5_rxtx.h
@@ -86,18 +86,25 @@ struct mlx5_rxq_data {
 	unsigned int elts_n:4; /* Log 2 of Mbufs. */
 	unsigned int rss_hash:1; /* RSS hash result is enabled. */
 	unsigned int mark:1; /* Marked flow available on the queue. */
-	unsigned int :15; /* Remaining bits. */
+	unsigned int strd_sz_n:3; /* Log 2 of stride size. */
+	unsigned int strd_num_n:4; /* Log 2 of the number of stride. */
+	unsigned int strd_shift_en:1; /* Enable 2bytes shift on a stride. */
+	unsigned int :8; /* Remaining bits. */
 	volatile uint32_t *rq_db;
 	volatile uint32_t *cq_db;
 	uint16_t port_id;
 	uint16_t rq_ci;
+	uint16_t strd_ci; /* Stride index in a WQE for Multi-Packet RQ. */
 	uint16_t rq_pi;
 	uint16_t cq_ci;
-	volatile struct mlx5_wqe_data_seg(*wqes)[];
+	uint16_t mprq_max_memcpy_len; /* Maximum size of packet to memcpy. */
+	volatile void *wqes;
 	volatile struct mlx5_cqe(*cqes)[];
 	struct rxq_zip zip; /* Compressed context. */
 	struct rte_mbuf *(*elts)[];
 	struct rte_mempool *mp;
+	struct rte_mempool *mprq_mp; /* Mempool for Multi-Packet RQ. */
+	struct rte_mbuf *mprq_repl; /* Stashed mbuf for replenish. */
 	struct mlx5_rxq_stats stats;
 	uint64_t mbuf_initializer; /* Default rearm_data for vectorized Rx. */
 	struct rte_mbuf fake_mbuf; /* elts padding for vectorized Rx. */
@@ -115,6 +122,7 @@ struct mlx5_rxq_ibv {
 	struct ibv_wq *wq; /* Work Queue. */
 	struct ibv_comp_channel *channel;
 	struct mlx5_mr *mr; /* Memory Region (for mp). */
+	struct mlx5_mr *mprq_mr; /* Memory Region (for mprq_mp). */
 };
 
 /* RX queue control descriptor. */
@@ -210,6 +218,8 @@ struct mlx5_txq_ctrl {
 extern uint8_t rss_hash_default_key[];
 extern const size_t rss_hash_default_key_len;
 
+int priv_check_mprq_support(struct priv *);
+int priv_mprq_enabled(struct priv *);
 void mlx5_rxq_cleanup(struct mlx5_rxq_ctrl *);
 int mlx5_rx_queue_setup(struct rte_eth_dev *, uint16_t, uint16_t, unsigned int,
 			const struct rte_eth_rxconf *, struct rte_mempool *);
@@ -232,6 +242,7 @@ int mlx5_priv_rxq_release(struct priv *, uint16_t);
 int mlx5_priv_rxq_releasable(struct priv *, uint16_t);
 int mlx5_priv_rxq_verify(struct priv *);
 int rxq_alloc_elts(struct mlx5_rxq_ctrl *);
+int rxq_alloc_mprq_buf(struct mlx5_rxq_ctrl *);
 struct mlx5_ind_table_ibv *mlx5_priv_ind_table_ibv_new(struct priv *,
 						       uint16_t [],
 						       uint16_t);
@@ -280,6 +291,7 @@ uint16_t mlx5_tx_burst_mpw(void *, struct rte_mbuf **, uint16_t);
 uint16_t mlx5_tx_burst_mpw_inline(void *, struct rte_mbuf **, uint16_t);
 uint16_t mlx5_tx_burst_empw(void *, struct rte_mbuf **, uint16_t);
 uint16_t mlx5_rx_burst(void *, struct rte_mbuf **, uint16_t);
+uint16_t mlx5_rx_burst_mprq(void *, struct rte_mbuf **, uint16_t);
 uint16_t removed_tx_burst(void *, struct rte_mbuf **, uint16_t);
 uint16_t removed_rx_burst(void *, struct rte_mbuf **, uint16_t);
 int mlx5_rx_descriptor_status(void *, uint16_t);
diff --git a/drivers/net/mlx5/mlx5_rxtx_vec.c b/drivers/net/mlx5/mlx5_rxtx_vec.c
index b66c2916f..ab4610c84 100644
--- a/drivers/net/mlx5/mlx5_rxtx_vec.c
+++ b/drivers/net/mlx5/mlx5_rxtx_vec.c
@@ -282,6 +282,8 @@ rxq_check_vec_support(struct mlx5_rxq_data *rxq)
 	struct mlx5_rxq_ctrl *ctrl =
 		container_of(rxq, struct mlx5_rxq_ctrl, rxq);
 
+	if (priv_mprq_enabled(ctrl->priv))
+		return -ENOTSUP;
 	if (!ctrl->priv->config.rx_vec_en || rxq->sges_n != 0)
 		return -ENOTSUP;
 	return 1;
@@ -303,6 +305,8 @@ priv_check_vec_rx_support(struct priv *priv)
 
 	if (!priv->config.rx_vec_en)
 		return -ENOTSUP;
+	if (priv_mprq_enabled(priv))
+		return -ENOTSUP;
 	/* All the configured queues should support. */
 	for (i = 0; i < priv->rxqs_n; ++i) {
 		struct mlx5_rxq_data *rxq = (*priv->rxqs)[i];
diff --git a/drivers/net/mlx5/mlx5_rxtx_vec.h b/drivers/net/mlx5/mlx5_rxtx_vec.h
index 44856bbff..b181d04cf 100644
--- a/drivers/net/mlx5/mlx5_rxtx_vec.h
+++ b/drivers/net/mlx5/mlx5_rxtx_vec.h
@@ -87,7 +87,8 @@ mlx5_rx_replenish_bulk_mbuf(struct mlx5_rxq_data *rxq, uint16_t n)
 	const uint16_t q_mask = q_n - 1;
 	uint16_t elts_idx = rxq->rq_ci & q_mask;
 	struct rte_mbuf **elts = &(*rxq->elts)[elts_idx];
-	volatile struct mlx5_wqe_data_seg *wq = &(*rxq->wqes)[elts_idx];
+	volatile struct mlx5_wqe_data_seg *wq =
+		&((volatile struct mlx5_wqe_data_seg *)rxq->wqes)[elts_idx];
 	unsigned int i;
 
 	assert(n >= MLX5_VPMD_RXQ_RPLNSH_THRESH);
-- 
2.11.0



More information about the dev mailing list