patch 'net/mlx5: fix error CQE dumping for vectorized Rx' has been queued to stable release 21.11.4

Kevin Traynor ktraynor at redhat.com
Thu Feb 23 16:06:04 CET 2023
Previous message (by thread): patch 'net/mlx5: fix flow sample with ConnectX-5' has been queued to stable release 21.11.4
Next message (by thread): patch 'net/mlx5: ignore non-critical syndromes for Rx queue' has been queued to stable release 21.11.4
Messages sorted by: [ date ] [ thread ] [ subject ] [ author ]
Hi,

FYI, your patch has been queued to stable release 21.11.4

Note it hasn't been pushed to http://dpdk.org/browse/dpdk-stable yet.
It will be pushed if I get no objections before 02/28/23. So please
shout if anyone has objections.

Also note that after the patch there's a diff of the upstream commit vs the
patch applied to the branch. This will indicate if there was any rebasing
needed to apply to the stable branch. If there were code changes for rebasing
(ie: not only metadata diffs), please double check that the rebase was
correctly done.

Queued patches are on a temporary branch at:
https://github.com/kevintraynor/dpdk-stable

This queued commit can be viewed at:
https://github.com/kevintraynor/dpdk-stable/commit/0167cd8cf13850ef466a55062d0c248612dd6b7b

Thanks.

Kevin

---
>From 0167cd8cf13850ef466a55062d0c248612dd6b7b Mon Sep 17 00:00:00 2001
From: Alexander Kozyrev <akozyrev at nvidia.com>
Date: Fri, 27 Jan 2023 05:22:11 +0200
Subject: [PATCH] net/mlx5: fix error CQE dumping for vectorized Rx

[ upstream commit 633684e0d0defdd7649132797cc14329f71f678c ]

There is a dump file with debug information created
for an error CQE to help with troubleshooting later.
It starts with the last CQE, which, presumably is the error CQE.
But this is only true for the scalar Rx burst routing since
we handle CQEs there one by one and detect the error immediately.
For vectorized Rx bursts, we may already move to another CQE
when we detect the error since we handle CQEs in batches there.
Go back to the error CQE in this case to dump proper CQE.

Fixes: 88c0733535 ("net/mlx5: extend Rx completion with error handling")

Signed-off-by: Alexander Kozyrev <akozyrev at nvidia.com>
Acked-by: Matan Azrad <matan at nvidia.com>
---
 drivers/net/mlx5/mlx5_rx.c       | 16 +++++++++++-----
 drivers/net/mlx5/mlx5_rx.h       |  3 ++-
 drivers/net/mlx5/mlx5_rxtx_vec.c | 12 +++++++-----
 3 files changed, 20 insertions(+), 11 deletions(-)

diff --git a/drivers/net/mlx5/mlx5_rx.c b/drivers/net/mlx5/mlx5_rx.c
index 9fcd039c22..5e982f3011 100644
--- a/drivers/net/mlx5/mlx5_rx.c
+++ b/drivers/net/mlx5/mlx5_rx.c
@@ -408,4 +408,6 @@ mlx5_rxq_initialize(struct mlx5_rxq_data *rxq)
  *   1 when called from vectorized Rx burst, need to prepare mbufs for the RQ.
  *   0 when called from non-vectorized Rx burst.
+ * @param[in] err_n
+ *   Number of CQEs to check for an error.
  *
  * @return
@@ -413,5 +415,5 @@ mlx5_rxq_initialize(struct mlx5_rxq_data *rxq)
  */
 int
-mlx5_rx_err_handle(struct mlx5_rxq_data *rxq, uint8_t vec)
+mlx5_rx_err_handle(struct mlx5_rxq_data *rxq, uint8_t vec, uint16_t err_n)
 {
 	const uint16_t cqe_n = 1 << rxq->cqe_n;
@@ -425,11 +427,16 @@ mlx5_rx_err_handle(struct mlx5_rxq_data *rxq, uint8_t vec)
 		volatile struct mlx5_err_cqe *err_cqe;
 	} u = {
-		.cqe = &(*rxq->cqes)[rxq->cq_ci & cqe_mask],
+		.cqe = &(*rxq->cqes)[(rxq->cq_ci - vec) & cqe_mask],
 	};
 	struct mlx5_mp_arg_queue_state_modify sm;
-	int ret;
+	int ret, i;
 
 	switch (rxq->err_state) {
 	case MLX5_RXQ_ERR_STATE_NO_ERROR:
+		for (i = 0; i < (int)err_n; i++) {
+			u.cqe = &(*rxq->cqes)[(rxq->cq_ci - vec - i) & cqe_mask];
+			if (MLX5_CQE_OPCODE(u.cqe->op_own) == MLX5_CQE_RESP_ERR)
+				break;
+		}
 		rxq->err_state = MLX5_RXQ_ERR_STATE_NEED_RESET;
 		/* Fall-through */
@@ -490,5 +497,4 @@ mlx5_rx_err_handle(struct mlx5_rxq_data *rxq, uint8_t vec)
 				uint32_t elt_idx;
 				struct rte_mbuf **elt;
-				int i;
 				unsigned int n = elts_n - (elts_ci -
 							  rxq->rq_pi);
@@ -611,5 +617,5 @@ mlx5_rx_poll_len(struct mlx5_rxq_data *rxq, volatile struct mlx5_cqe *cqe,
 				if (unlikely(ret == MLX5_CQE_STATUS_ERR ||
 					     rxq->err_state)) {
-					ret = mlx5_rx_err_handle(rxq, 0);
+					ret = mlx5_rx_err_handle(rxq, 0, 1);
 					if (ret == MLX5_CQE_STATUS_HW_OWN ||
 					    ret == MLX5_RECOVERY_ERROR_RET)
diff --git a/drivers/net/mlx5/mlx5_rx.h b/drivers/net/mlx5/mlx5_rx.h
index 423d80e4a7..028f0bbb14 100644
--- a/drivers/net/mlx5/mlx5_rx.h
+++ b/drivers/net/mlx5/mlx5_rx.h
@@ -273,5 +273,6 @@ int mlx5_hrxq_modify(struct rte_eth_dev *dev, uint32_t hxrq_idx,
 uint16_t mlx5_rx_burst(void *dpdk_rxq, struct rte_mbuf **pkts, uint16_t pkts_n);
 void mlx5_rxq_initialize(struct mlx5_rxq_data *rxq);
-__rte_noinline int mlx5_rx_err_handle(struct mlx5_rxq_data *rxq, uint8_t vec);
+__rte_noinline int mlx5_rx_err_handle(struct mlx5_rxq_data *rxq,
+				      uint8_t vec, uint16_t err_n);
 void mlx5_mprq_buf_free(struct mlx5_mprq_buf *buf);
 uint16_t mlx5_rx_burst_mprq(void *dpdk_rxq, struct rte_mbuf **pkts,
diff --git a/drivers/net/mlx5/mlx5_rxtx_vec.c b/drivers/net/mlx5/mlx5_rxtx_vec.c
index 0e2eab068a..c6be2be763 100644
--- a/drivers/net/mlx5/mlx5_rxtx_vec.c
+++ b/drivers/net/mlx5/mlx5_rxtx_vec.c
@@ -75,5 +75,5 @@ rxq_handle_pending_error(struct mlx5_rxq_data *rxq, struct rte_mbuf **pkts,
 	rxq->stats.ibytes -= err_bytes;
 #endif
-	mlx5_rx_err_handle(rxq, 1);
+	mlx5_rx_err_handle(rxq, 1, pkts_n);
 	return n;
 }
@@ -254,6 +254,4 @@ rxq_copy_mprq_mbuf_v(struct mlx5_rxq_data *rxq,
 	rxq->rq_pi += i;
 	rxq->cq_ci += i;
-	rte_io_wmb();
-	*rxq->cq_db = rte_cpu_to_be_32(rxq->cq_ci);
 	if (rq_ci != rxq->rq_ci) {
 		rxq->rq_ci = rq_ci;
@@ -362,6 +360,4 @@ rxq_burst_v(struct mlx5_rxq_data *rxq, struct rte_mbuf **pkts,
 		}
 	}
-	rte_io_wmb();
-	*rxq->cq_db = rte_cpu_to_be_32(rxq->cq_ci);
 	*no_cq = !rcvd_pkt;
 	return rcvd_pkt;
@@ -391,4 +387,5 @@ mlx5_rx_burst_vec(void *dpdk_rxq, struct rte_mbuf **pkts, uint16_t pkts_n)
 
 	do {
+		err = 0;
 		nb_rx = rxq_burst_v(rxq, pkts + tn, pkts_n - tn,
 				    &err, &no_cq);
@@ -398,4 +395,6 @@ mlx5_rx_burst_vec(void *dpdk_rxq, struct rte_mbuf **pkts, uint16_t pkts_n)
 		if (unlikely(no_cq))
 			break;
+		rte_io_wmb();
+		*rxq->cq_db = rte_cpu_to_be_32(rxq->cq_ci);
 	} while (tn != pkts_n);
 	return tn;
@@ -525,4 +524,5 @@ mlx5_rx_burst_mprq_vec(void *dpdk_rxq, struct rte_mbuf **pkts, uint16_t pkts_n)
 
 	do {
+		err = 0;
 		nb_rx = rxq_burst_mprq_v(rxq, pkts + tn, pkts_n - tn,
 					 &err, &no_cq);
@@ -532,4 +532,6 @@ mlx5_rx_burst_mprq_vec(void *dpdk_rxq, struct rte_mbuf **pkts, uint16_t pkts_n)
 		if (unlikely(no_cq))
 			break;
+		rte_io_wmb();
+		*rxq->cq_db = rte_cpu_to_be_32(rxq->cq_ci);
 	} while (tn != pkts_n);
 	return tn;
-- 
2.39.1

---
  Diff of the applied patch vs upstream commit (please double-check if non-empty:
---
--- -	2023-02-23 14:46:25.573208241 +0000
+++ 0073-net-mlx5-fix-error-CQE-dumping-for-vectorized-Rx.patch	2023-02-23 14:46:23.845236227 +0000
@@ -1 +1 @@
-From 633684e0d0defdd7649132797cc14329f71f678c Mon Sep 17 00:00:00 2001
+From 0167cd8cf13850ef466a55062d0c248612dd6b7b Mon Sep 17 00:00:00 2001
@@ -5,0 +6,2 @@
+[ upstream commit 633684e0d0defdd7649132797cc14329f71f678c ]
+
@@ -16 +17,0 @@
-Cc: stable at dpdk.org
@@ -27 +28 @@
-index 917c517b83..7612d15f01 100644
+index 9fcd039c22..5e982f3011 100644
@@ -30 +31 @@
-@@ -426,4 +426,6 @@ mlx5_rxq_initialize(struct mlx5_rxq_data *rxq)
+@@ -408,4 +408,6 @@ mlx5_rxq_initialize(struct mlx5_rxq_data *rxq)
@@ -37 +38 @@
-@@ -431,5 +433,5 @@ mlx5_rxq_initialize(struct mlx5_rxq_data *rxq)
+@@ -413,5 +415,5 @@ mlx5_rxq_initialize(struct mlx5_rxq_data *rxq)
@@ -44 +45 @@
-@@ -443,11 +445,16 @@ mlx5_rx_err_handle(struct mlx5_rxq_data *rxq, uint8_t vec)
+@@ -425,11 +427,16 @@ mlx5_rx_err_handle(struct mlx5_rxq_data *rxq, uint8_t vec)
@@ -63 +64 @@
-@@ -508,5 +515,4 @@ mlx5_rx_err_handle(struct mlx5_rxq_data *rxq, uint8_t vec)
+@@ -490,5 +497,4 @@ mlx5_rx_err_handle(struct mlx5_rxq_data *rxq, uint8_t vec)
@@ -69 +70 @@
-@@ -629,5 +635,5 @@ mlx5_rx_poll_len(struct mlx5_rxq_data *rxq, volatile struct mlx5_cqe *cqe,
+@@ -611,5 +617,5 @@ mlx5_rx_poll_len(struct mlx5_rxq_data *rxq, volatile struct mlx5_cqe *cqe,
@@ -77 +78 @@
-index e078aaf3dc..4ba53ebc48 100644
+index 423d80e4a7..028f0bbb14 100644
@@ -80 +81 @@
-@@ -287,5 +287,6 @@ int mlx5_hrxq_modify(struct rte_eth_dev *dev, uint32_t hxrq_idx,
+@@ -273,5 +273,6 @@ int mlx5_hrxq_modify(struct rte_eth_dev *dev, uint32_t hxrq_idx,
Previous message (by thread): patch 'net/mlx5: fix flow sample with ConnectX-5' has been queued to stable release 21.11.4
Next message (by thread): patch 'net/mlx5: ignore non-critical syndromes for Rx queue' has been queued to stable release 21.11.4
Messages sorted by: [ date ] [ thread ] [ subject ] [ author ]
More information about the stable mailing list