[dpdk-stable] patch 'net/mlx5: fix vectorized Rx burst termination' has been queued to stable release 19.11.4

luca.boccassi at gmail.com luca.boccassi at gmail.com
Fri Jul 24 13:57:35 CEST 2020


Hi,

FYI, your patch has been queued to stable release 19.11.4

Note it hasn't been pushed to http://dpdk.org/browse/dpdk-stable yet.
It will be pushed if I get no objections before 07/26/20. So please
shout if anyone has objections.

Also note that after the patch there's a diff of the upstream commit vs the
patch applied to the branch. This will indicate if there was any rebasing
needed to apply to the stable branch. If there were code changes for rebasing
(ie: not only metadata diffs), please double check that the rebase was
correctly done.

Thanks.

Luca Boccassi

---
>From e4e924134232556d751dd9aa192c1123c9544a8a Mon Sep 17 00:00:00 2001
From: Alexander Kozyrev <akozyrev at mellanox.com>
Date: Tue, 2 Jun 2020 03:50:41 +0000
Subject: [PATCH] net/mlx5: fix vectorized Rx burst termination

[ upstream commit c9cc554ba423641d3515671269c5648dab3bb9ad ]

Maximum burst size of Vectorized Rx burst routine is set to
MLX5_VPMD_RX_MAX_BURST(64). This limits the performance of any
application that would like to gather more than 64 packets from
the single Rx burst for batch processing (i.e. VPP).

The situation gets worse with a mix of zipped and unzipped CQEs.
They are processed separately and the Rx burst function returns
small number of packets every call.

Repeat the cycle of gathering packets from the vectorized Rx routine
until a requested number of packets are collected or there are no
more CQEs left to process.

Fixes: 6cb559d67b83 ("net/mlx5: add vectorized Rx/Tx burst for x86")

Signed-off-by: Alexander Kozyrev <akozyrev at mellanox.com>
Acked-by: Viacheslav Ovsiienko <viacheslavo at mellanox.com>
Acked-by: Matan Azrad <matan at mellanox.com>
---
 drivers/net/mlx5/mlx5_rxtx_vec.c         | 17 ++++++++++++-----
 drivers/net/mlx5/mlx5_rxtx_vec_altivec.h | 13 ++++++++++---
 drivers/net/mlx5/mlx5_rxtx_vec_neon.h    | 13 ++++++++++---
 drivers/net/mlx5/mlx5_rxtx_vec_sse.h     | 13 ++++++++++---
 4 files changed, 42 insertions(+), 14 deletions(-)

diff --git a/drivers/net/mlx5/mlx5_rxtx_vec.c b/drivers/net/mlx5/mlx5_rxtx_vec.c
index d85f90874..dbb4cfaeb 100644
--- a/drivers/net/mlx5/mlx5_rxtx_vec.c
+++ b/drivers/net/mlx5/mlx5_rxtx_vec.c
@@ -103,13 +103,20 @@ uint16_t
 mlx5_rx_burst_vec(void *dpdk_rxq, struct rte_mbuf **pkts, uint16_t pkts_n)
 {
 	struct mlx5_rxq_data *rxq = dpdk_rxq;
-	uint16_t nb_rx;
+	uint16_t nb_rx = 0;
+	uint16_t tn = 0;
 	uint64_t err = 0;
+	bool no_cq = false;
 
-	nb_rx = rxq_burst_v(rxq, pkts, pkts_n, &err);
-	if (unlikely(err | rxq->err_state))
-		nb_rx = rxq_handle_pending_error(rxq, pkts, nb_rx);
-	return nb_rx;
+	do {
+		nb_rx = rxq_burst_v(rxq, pkts + tn, pkts_n - tn, &err, &no_cq);
+		if (unlikely(err | rxq->err_state))
+			nb_rx = rxq_handle_pending_error(rxq, pkts + tn, nb_rx);
+		tn += nb_rx;
+		if (unlikely(no_cq))
+			break;
+	} while (tn != pkts_n);
+	return tn;
 }
 
 /**
diff --git a/drivers/net/mlx5/mlx5_rxtx_vec_altivec.h b/drivers/net/mlx5/mlx5_rxtx_vec_altivec.h
index feb17fe1c..c167672f5 100644
--- a/drivers/net/mlx5/mlx5_rxtx_vec_altivec.h
+++ b/drivers/net/mlx5/mlx5_rxtx_vec_altivec.h
@@ -564,13 +564,15 @@ rxq_cq_to_ptype_oflags_v(struct mlx5_rxq_data *rxq,
  * @param[out] err
  *   Pointer to a flag. Set non-zero value if pkts array has at least one error
  *   packet to handle.
+ * @param[out] no_cq
+ *  Pointer to a boolean. Set true if no new CQE seen.
  *
  * @return
  *   Number of packets received including errors (<= pkts_n).
  */
 static inline uint16_t
 rxq_burst_v(struct mlx5_rxq_data *rxq, struct rte_mbuf **pkts, uint16_t pkts_n,
-	    uint64_t *err)
+	    uint64_t *err, bool *no_cq)
 {
 	const uint16_t q_n = 1 << rxq->cqe_n;
 	const uint16_t q_mask = q_n - 1;
@@ -663,8 +665,10 @@ rxq_burst_v(struct mlx5_rxq_data *rxq, struct rte_mbuf **pkts, uint16_t pkts_n,
 	/* Not to cross queue end. */
 	pkts_n = RTE_MIN(pkts_n, q_n - elts_idx);
 	pkts_n = RTE_MIN(pkts_n, q_n - cq_idx);
-	if (!pkts_n)
+	if (!pkts_n) {
+		*no_cq = !rcvd_pkt;
 		return rcvd_pkt;
+	}
 	/* At this point, there shouldn't be any remaining packets. */
 	assert(rxq->decompressed == 0);
 
@@ -1079,8 +1083,10 @@ rxq_burst_v(struct mlx5_rxq_data *rxq, struct rte_mbuf **pkts, uint16_t pkts_n,
 			break;
 	}
 	/* If no new CQE seen, return without updating cq_db. */
-	if (unlikely(!nocmp_n && comp_idx == MLX5_VPMD_DESCS_PER_LOOP))
+	if (unlikely(!nocmp_n && comp_idx == MLX5_VPMD_DESCS_PER_LOOP)) {
+		*no_cq = true;
 		return rcvd_pkt;
+	}
 	/* Update the consumer indexes for non-compressed CQEs. */
 	assert(nocmp_n <= pkts_n);
 	rxq->cq_ci += nocmp_n;
@@ -1108,6 +1114,7 @@ rxq_burst_v(struct mlx5_rxq_data *rxq, struct rte_mbuf **pkts, uint16_t pkts_n,
 	}
 	rte_compiler_barrier();
 	*rxq->cq_db = rte_cpu_to_be_32(rxq->cq_ci);
+	*no_cq = !rcvd_pkt;
 	return rcvd_pkt;
 }
 
diff --git a/drivers/net/mlx5/mlx5_rxtx_vec_neon.h b/drivers/net/mlx5/mlx5_rxtx_vec_neon.h
index f92ece429..b2cf857fd 100644
--- a/drivers/net/mlx5/mlx5_rxtx_vec_neon.h
+++ b/drivers/net/mlx5/mlx5_rxtx_vec_neon.h
@@ -377,13 +377,15 @@ rxq_cq_to_ptype_oflags_v(struct mlx5_rxq_data *rxq,
  * @param[out] err
  *   Pointer to a flag. Set non-zero value if pkts array has at least one error
  *   packet to handle.
+ * @param[out] no_cq
+ *   Pointer to a boolean. Set true if no new CQE seen.
  *
  * @return
  *   Number of packets received including errors (<= pkts_n).
  */
 static inline uint16_t
 rxq_burst_v(struct mlx5_rxq_data *rxq, struct rte_mbuf **pkts, uint16_t pkts_n,
-	    uint64_t *err)
+	    uint64_t *err, bool *no_cq)
 {
 	const uint16_t q_n = 1 << rxq->cqe_n;
 	const uint16_t q_mask = q_n - 1;
@@ -484,8 +486,10 @@ rxq_burst_v(struct mlx5_rxq_data *rxq, struct rte_mbuf **pkts, uint16_t pkts_n,
 	/* Not to cross queue end. */
 	pkts_n = RTE_MIN(pkts_n, q_n - elts_idx);
 	pkts_n = RTE_MIN(pkts_n, q_n - cq_idx);
-	if (!pkts_n)
+	if (!pkts_n) {
+		*no_cq = !rcvd_pkt;
 		return rcvd_pkt;
+	}
 	/* At this point, there shouldn't be any remained packets. */
 	assert(rxq->decompressed == 0);
 	/*
@@ -744,8 +748,10 @@ rxq_burst_v(struct mlx5_rxq_data *rxq, struct rte_mbuf **pkts, uint16_t pkts_n,
 			break;
 	}
 	/* If no new CQE seen, return without updating cq_db. */
-	if (unlikely(!nocmp_n && comp_idx == MLX5_VPMD_DESCS_PER_LOOP))
+	if (unlikely(!nocmp_n && comp_idx == MLX5_VPMD_DESCS_PER_LOOP)) {
+		*no_cq = true;
 		return rcvd_pkt;
+	}
 	/* Update the consumer indexes for non-compressed CQEs. */
 	assert(nocmp_n <= pkts_n);
 	rxq->cq_ci += nocmp_n;
@@ -773,6 +779,7 @@ rxq_burst_v(struct mlx5_rxq_data *rxq, struct rte_mbuf **pkts, uint16_t pkts_n,
 	}
 	rte_cio_wmb();
 	*rxq->cq_db = rte_cpu_to_be_32(rxq->cq_ci);
+	*no_cq = !rcvd_pkt;
 	return rcvd_pkt;
 }
 
diff --git a/drivers/net/mlx5/mlx5_rxtx_vec_sse.h b/drivers/net/mlx5/mlx5_rxtx_vec_sse.h
index bb59163a2..9935299d5 100644
--- a/drivers/net/mlx5/mlx5_rxtx_vec_sse.h
+++ b/drivers/net/mlx5/mlx5_rxtx_vec_sse.h
@@ -383,13 +383,15 @@ rxq_cq_to_ptype_oflags_v(struct mlx5_rxq_data *rxq, __m128i cqes[4],
  * @param[out] err
  *   Pointer to a flag. Set non-zero value if pkts array has at least one error
  *   packet to handle.
+ * @param[out] no_cq
+ *   Pointer to a boolean. Set true if no new CQE seen.
  *
  * @return
  *   Number of packets received including errors (<= pkts_n).
  */
 static inline uint16_t
 rxq_burst_v(struct mlx5_rxq_data *rxq, struct rte_mbuf **pkts, uint16_t pkts_n,
-	    uint64_t *err)
+	    uint64_t *err, bool *no_cq)
 {
 	const uint16_t q_n = 1 << rxq->cqe_n;
 	const uint16_t q_mask = q_n - 1;
@@ -471,8 +473,10 @@ rxq_burst_v(struct mlx5_rxq_data *rxq, struct rte_mbuf **pkts, uint16_t pkts_n,
 	/* Not to cross queue end. */
 	pkts_n = RTE_MIN(pkts_n, q_n - elts_idx);
 	pkts_n = RTE_MIN(pkts_n, q_n - cq_idx);
-	if (!pkts_n)
+	if (!pkts_n) {
+		*no_cq = !rcvd_pkt;
 		return rcvd_pkt;
+	}
 	/* At this point, there shouldn't be any remained packets. */
 	assert(rxq->decompressed == 0);
 	/*
@@ -694,8 +698,10 @@ rxq_burst_v(struct mlx5_rxq_data *rxq, struct rte_mbuf **pkts, uint16_t pkts_n,
 			break;
 	}
 	/* If no new CQE seen, return without updating cq_db. */
-	if (unlikely(!nocmp_n && comp_idx == MLX5_VPMD_DESCS_PER_LOOP))
+	if (unlikely(!nocmp_n && comp_idx == MLX5_VPMD_DESCS_PER_LOOP)) {
+		*no_cq = true;
 		return rcvd_pkt;
+	}
 	/* Update the consumer indexes for non-compressed CQEs. */
 	assert(nocmp_n <= pkts_n);
 	rxq->cq_ci += nocmp_n;
@@ -723,6 +729,7 @@ rxq_burst_v(struct mlx5_rxq_data *rxq, struct rte_mbuf **pkts, uint16_t pkts_n,
 	}
 	rte_compiler_barrier();
 	*rxq->cq_db = rte_cpu_to_be_32(rxq->cq_ci);
+	*no_cq = !rcvd_pkt;
 	return rcvd_pkt;
 }
 
-- 
2.20.1

---
  Diff of the applied patch vs upstream commit (please double-check if non-empty:
---
--- -	2020-07-24 12:53:49.263432540 +0100
+++ 0017-net-mlx5-fix-vectorized-Rx-burst-termination.patch	2020-07-24 12:53:48.187004597 +0100
@@ -1,8 +1,10 @@
-From c9cc554ba423641d3515671269c5648dab3bb9ad Mon Sep 17 00:00:00 2001
+From e4e924134232556d751dd9aa192c1123c9544a8a Mon Sep 17 00:00:00 2001
 From: Alexander Kozyrev <akozyrev at mellanox.com>
 Date: Tue, 2 Jun 2020 03:50:41 +0000
 Subject: [PATCH] net/mlx5: fix vectorized Rx burst termination
 
+[ upstream commit c9cc554ba423641d3515671269c5648dab3bb9ad ]
+
 Maximum burst size of Vectorized Rx burst routine is set to
 MLX5_VPMD_RX_MAX_BURST(64). This limits the performance of any
 application that would like to gather more than 64 packets from
@@ -17,7 +19,6 @@
 more CQEs left to process.
 
 Fixes: 6cb559d67b83 ("net/mlx5: add vectorized Rx/Tx burst for x86")
-Cc: stable at dpdk.org
 
 Signed-off-by: Alexander Kozyrev <akozyrev at mellanox.com>
 Acked-by: Viacheslav Ovsiienko <viacheslavo at mellanox.com>
@@ -30,7 +31,7 @@
  4 files changed, 42 insertions(+), 14 deletions(-)
 
 diff --git a/drivers/net/mlx5/mlx5_rxtx_vec.c b/drivers/net/mlx5/mlx5_rxtx_vec.c
-index 1518bdd5b..b38bd203c 100644
+index d85f90874..dbb4cfaeb 100644
 --- a/drivers/net/mlx5/mlx5_rxtx_vec.c
 +++ b/drivers/net/mlx5/mlx5_rxtx_vec.c
 @@ -103,13 +103,20 @@ uint16_t
@@ -60,7 +61,7 @@
  
  /**
 diff --git a/drivers/net/mlx5/mlx5_rxtx_vec_altivec.h b/drivers/net/mlx5/mlx5_rxtx_vec_altivec.h
-index 26715ef45..b55138ac7 100644
+index feb17fe1c..c167672f5 100644
 --- a/drivers/net/mlx5/mlx5_rxtx_vec_altivec.h
 +++ b/drivers/net/mlx5/mlx5_rxtx_vec_altivec.h
 @@ -564,13 +564,15 @@ rxq_cq_to_ptype_oflags_v(struct mlx5_rxq_data *rxq,
@@ -90,7 +91,7 @@
  		return rcvd_pkt;
 +	}
  	/* At this point, there shouldn't be any remaining packets. */
- 	MLX5_ASSERT(rxq->decompressed == 0);
+ 	assert(rxq->decompressed == 0);
  
 @@ -1079,8 +1083,10 @@ rxq_burst_v(struct mlx5_rxq_data *rxq, struct rte_mbuf **pkts, uint16_t pkts_n,
  			break;
@@ -102,7 +103,7 @@
  		return rcvd_pkt;
 +	}
  	/* Update the consumer indexes for non-compressed CQEs. */
- 	MLX5_ASSERT(nocmp_n <= pkts_n);
+ 	assert(nocmp_n <= pkts_n);
  	rxq->cq_ci += nocmp_n;
 @@ -1108,6 +1114,7 @@ rxq_burst_v(struct mlx5_rxq_data *rxq, struct rte_mbuf **pkts, uint16_t pkts_n,
  	}
@@ -113,10 +114,10 @@
  }
  
 diff --git a/drivers/net/mlx5/mlx5_rxtx_vec_neon.h b/drivers/net/mlx5/mlx5_rxtx_vec_neon.h
-index ecafbf800..3007c0359 100644
+index f92ece429..b2cf857fd 100644
 --- a/drivers/net/mlx5/mlx5_rxtx_vec_neon.h
 +++ b/drivers/net/mlx5/mlx5_rxtx_vec_neon.h
-@@ -378,13 +378,15 @@ rxq_cq_to_ptype_oflags_v(struct mlx5_rxq_data *rxq,
+@@ -377,13 +377,15 @@ rxq_cq_to_ptype_oflags_v(struct mlx5_rxq_data *rxq,
   * @param[out] err
   *   Pointer to a flag. Set non-zero value if pkts array has at least one error
   *   packet to handle.
@@ -133,7 +134,7 @@
  {
  	const uint16_t q_n = 1 << rxq->cqe_n;
  	const uint16_t q_mask = q_n - 1;
-@@ -485,8 +487,10 @@ rxq_burst_v(struct mlx5_rxq_data *rxq, struct rte_mbuf **pkts, uint16_t pkts_n,
+@@ -484,8 +486,10 @@ rxq_burst_v(struct mlx5_rxq_data *rxq, struct rte_mbuf **pkts, uint16_t pkts_n,
  	/* Not to cross queue end. */
  	pkts_n = RTE_MIN(pkts_n, q_n - elts_idx);
  	pkts_n = RTE_MIN(pkts_n, q_n - cq_idx);
@@ -143,9 +144,9 @@
  		return rcvd_pkt;
 +	}
  	/* At this point, there shouldn't be any remained packets. */
- 	MLX5_ASSERT(rxq->decompressed == 0);
+ 	assert(rxq->decompressed == 0);
  	/*
-@@ -745,8 +749,10 @@ rxq_burst_v(struct mlx5_rxq_data *rxq, struct rte_mbuf **pkts, uint16_t pkts_n,
+@@ -744,8 +748,10 @@ rxq_burst_v(struct mlx5_rxq_data *rxq, struct rte_mbuf **pkts, uint16_t pkts_n,
  			break;
  	}
  	/* If no new CQE seen, return without updating cq_db. */
@@ -155,9 +156,9 @@
  		return rcvd_pkt;
 +	}
  	/* Update the consumer indexes for non-compressed CQEs. */
- 	MLX5_ASSERT(nocmp_n <= pkts_n);
+ 	assert(nocmp_n <= pkts_n);
  	rxq->cq_ci += nocmp_n;
-@@ -774,6 +780,7 @@ rxq_burst_v(struct mlx5_rxq_data *rxq, struct rte_mbuf **pkts, uint16_t pkts_n,
+@@ -773,6 +779,7 @@ rxq_burst_v(struct mlx5_rxq_data *rxq, struct rte_mbuf **pkts, uint16_t pkts_n,
  	}
  	rte_cio_wmb();
  	*rxq->cq_db = rte_cpu_to_be_32(rxq->cq_ci);
@@ -166,10 +167,10 @@
  }
  
 diff --git a/drivers/net/mlx5/mlx5_rxtx_vec_sse.h b/drivers/net/mlx5/mlx5_rxtx_vec_sse.h
-index 6847ae782..da5960ad8 100644
+index bb59163a2..9935299d5 100644
 --- a/drivers/net/mlx5/mlx5_rxtx_vec_sse.h
 +++ b/drivers/net/mlx5/mlx5_rxtx_vec_sse.h
-@@ -385,13 +385,15 @@ rxq_cq_to_ptype_oflags_v(struct mlx5_rxq_data *rxq, __m128i cqes[4],
+@@ -383,13 +383,15 @@ rxq_cq_to_ptype_oflags_v(struct mlx5_rxq_data *rxq, __m128i cqes[4],
   * @param[out] err
   *   Pointer to a flag. Set non-zero value if pkts array has at least one error
   *   packet to handle.
@@ -186,7 +187,7 @@
  {
  	const uint16_t q_n = 1 << rxq->cqe_n;
  	const uint16_t q_mask = q_n - 1;
-@@ -473,8 +475,10 @@ rxq_burst_v(struct mlx5_rxq_data *rxq, struct rte_mbuf **pkts, uint16_t pkts_n,
+@@ -471,8 +473,10 @@ rxq_burst_v(struct mlx5_rxq_data *rxq, struct rte_mbuf **pkts, uint16_t pkts_n,
  	/* Not to cross queue end. */
  	pkts_n = RTE_MIN(pkts_n, q_n - elts_idx);
  	pkts_n = RTE_MIN(pkts_n, q_n - cq_idx);
@@ -196,9 +197,9 @@
  		return rcvd_pkt;
 +	}
  	/* At this point, there shouldn't be any remained packets. */
- 	MLX5_ASSERT(rxq->decompressed == 0);
+ 	assert(rxq->decompressed == 0);
  	/*
-@@ -696,8 +700,10 @@ rxq_burst_v(struct mlx5_rxq_data *rxq, struct rte_mbuf **pkts, uint16_t pkts_n,
+@@ -694,8 +698,10 @@ rxq_burst_v(struct mlx5_rxq_data *rxq, struct rte_mbuf **pkts, uint16_t pkts_n,
  			break;
  	}
  	/* If no new CQE seen, return without updating cq_db. */
@@ -208,9 +209,9 @@
  		return rcvd_pkt;
 +	}
  	/* Update the consumer indexes for non-compressed CQEs. */
- 	MLX5_ASSERT(nocmp_n <= pkts_n);
+ 	assert(nocmp_n <= pkts_n);
  	rxq->cq_ci += nocmp_n;
-@@ -725,6 +731,7 @@ rxq_burst_v(struct mlx5_rxq_data *rxq, struct rte_mbuf **pkts, uint16_t pkts_n,
+@@ -723,6 +729,7 @@ rxq_burst_v(struct mlx5_rxq_data *rxq, struct rte_mbuf **pkts, uint16_t pkts_n,
  	}
  	rte_compiler_barrier();
  	*rxq->cq_db = rte_cpu_to_be_32(rxq->cq_ci);


More information about the stable mailing list