net/mlx5: fix Rx queue count calculation

Message ID 20200929183623.22685-1-akozyrev@nvidia.com (mailing list archive)
State Accepted, archived
Delegated to: Raslan Darawsheh
Headers
Series net/mlx5: fix Rx queue count calculation |

Checks

Context Check Description
ci/checkpatch success coding style OK
ci/iol-broadcom-Functional success Functional Testing PASS
ci/iol-intel-Functional success Functional Testing PASS
ci/iol-testing success Testing PASS
ci/Intel-compilation success Compilation OK
ci/travis-robot success Travis build: passed

Commit Message

Alexander Kozyrev Sept. 29, 2020, 6:36 p.m. UTC
  There are a few discrepancies in the Rx queue count calculation.

The wrong index is used to calculate the number of used descriptors
in an Rx queue in case of the compressed CQE processing. The global
CQ index is used while we really need an internal index in a single
compressed session to get the right number of elements processed.

The total number of CQs should be used instead of the number of mbufs
to find out about the maximum number of Rx descriptors. These numbers
are not equal for the Multi-Packet Rx queue.

Allow the Rx queue count calculation for all possible Rx bursts since
CQ handling is the same for regular, vectorized, and multi-packet Rx
queues.

Fixes: 26f0488344 ("net/mlx5: support Rx queue count API")
Cc: stable@dpdk.org

Signed-off-by: Alexander Kozyrev <akozyrev@nvidia.com>
Acked-by: Viacheslav Ovsiienko <viacheslavo@nvidia.com>
---
 drivers/net/mlx5/mlx5_rxtx.c | 27 ++++++++++++---------------
 1 file changed, 12 insertions(+), 15 deletions(-)
  

Comments

Raslan Darawsheh Oct. 1, 2020, 1:28 p.m. UTC | #1
Hi,

> -----Original Message-----
> From: Alexander Kozyrev <akozyrev@nvidia.com>
> Sent: Tuesday, September 29, 2020 9:36 PM
> To: dev@dpdk.org
> Cc: stable@dpdk.org; Raslan Darawsheh <rasland@nvidia.com>; Slava
> Ovsiienko <viacheslavo@nvidia.com>
> Subject: [PATCH] net/mlx5: fix Rx queue count calculation
> 
> There are a few discrepancies in the Rx queue count calculation.
> 
> The wrong index is used to calculate the number of used descriptors
> in an Rx queue in case of the compressed CQE processing. The global
> CQ index is used while we really need an internal index in a single
> compressed session to get the right number of elements processed.
> 
> The total number of CQs should be used instead of the number of mbufs
> to find out about the maximum number of Rx descriptors. These numbers
> are not equal for the Multi-Packet Rx queue.
> 
> Allow the Rx queue count calculation for all possible Rx bursts since
> CQ handling is the same for regular, vectorized, and multi-packet Rx
> queues.
> 
> Fixes: 26f0488344 ("net/mlx5: support Rx queue count API")
> Cc: stable@dpdk.org
> 
> Signed-off-by: Alexander Kozyrev <akozyrev@nvidia.com>
> Acked-by: Viacheslav Ovsiienko <viacheslavo@nvidia.com>
> ---
>  drivers/net/mlx5/mlx5_rxtx.c | 27 ++++++++++++---------------
>  1 file changed, 12 insertions(+), 15 deletions(-)
> 
> diff --git a/drivers/net/mlx5/mlx5_rxtx.c b/drivers/net/mlx5/mlx5_rxtx.c
> index 101555ed03..4755980d5b 100644
> --- a/drivers/net/mlx5/mlx5_rxtx.c
> +++ b/drivers/net/mlx5/mlx5_rxtx.c
> @@ -465,19 +465,11 @@ rx_queue_count(struct mlx5_rxq_data *rxq)
>  {
>  	struct rxq_zip *zip = &rxq->zip;
>  	volatile struct mlx5_cqe *cqe;
> +	unsigned int cq_ci = rxq->cq_ci;
>  	const unsigned int cqe_n = (1 << rxq->cqe_n);
>  	const unsigned int cqe_cnt = cqe_n - 1;
> -	unsigned int cq_ci;
> -	unsigned int used;
> +	unsigned int used = 0;
> 
> -	/* if we are processing a compressed cqe */
> -	if (zip->ai) {
> -		used = zip->cqe_cnt - zip->ca;
> -		cq_ci = zip->cq_ci;
> -	} else {
> -		used = 0;
> -		cq_ci = rxq->cq_ci;
> -	}
>  	cqe = &(*rxq->cqes)[cq_ci & cqe_cnt];
>  	while (check_cqe(cqe, cqe_n, cq_ci) !=
> MLX5_CQE_STATUS_HW_OWN) {
>  		int8_t op_own;
> @@ -485,14 +477,17 @@ rx_queue_count(struct mlx5_rxq_data *rxq)
> 
>  		op_own = cqe->op_own;
>  		if (MLX5_CQE_FORMAT(op_own) == MLX5_COMPRESSED)
> -			n = rte_be_to_cpu_32(cqe->byte_cnt);
> +			if (unlikely(zip->ai))
> +				n = zip->cqe_cnt - zip->ai;
> +			else
> +				n = rte_be_to_cpu_32(cqe->byte_cnt);
>  		else
>  			n = 1;
>  		cq_ci += n;
>  		used += n;
>  		cqe = &(*rxq->cqes)[cq_ci & cqe_cnt];
>  	}
> -	used = RTE_MIN(used, (1U << rxq->elts_n) - 1);
> +	used = RTE_MIN(used, cqe_n);
>  	return used;
>  }
> 
> @@ -515,11 +510,12 @@ mlx5_rx_descriptor_status(void *rx_queue,
> uint16_t offset)
>  			container_of(rxq, struct mlx5_rxq_ctrl, rxq);
>  	struct rte_eth_dev *dev = ETH_DEV(rxq_ctrl->priv);
> 
> -	if (dev->rx_pkt_burst != mlx5_rx_burst) {
> +	if (dev->rx_pkt_burst == NULL ||
> +	    dev->rx_pkt_burst == removed_rx_burst) {
>  		rte_errno = ENOTSUP;
>  		return -rte_errno;
>  	}
> -	if (offset >= (1 << rxq->elts_n)) {
> +	if (offset >= (1 << rxq->cqe_n)) {
>  		rte_errno = EINVAL;
>  		return -rte_errno;
>  	}
> @@ -630,7 +626,8 @@ mlx5_rx_queue_count(struct rte_eth_dev *dev,
> uint16_t rx_queue_id)
>  	struct mlx5_priv *priv = dev->data->dev_private;
>  	struct mlx5_rxq_data *rxq;
> 
> -	if (dev->rx_pkt_burst != mlx5_rx_burst) {
> +	if (dev->rx_pkt_burst == NULL ||
> +	    dev->rx_pkt_burst == removed_rx_burst) {
>  		rte_errno = ENOTSUP;
>  		return -rte_errno;
>  	}
> --
> 2.24.1

Patch applied to next-net-mlx,

Kindest regards,
Raslan Darawsheh
  

Patch

diff --git a/drivers/net/mlx5/mlx5_rxtx.c b/drivers/net/mlx5/mlx5_rxtx.c
index 101555ed03..4755980d5b 100644
--- a/drivers/net/mlx5/mlx5_rxtx.c
+++ b/drivers/net/mlx5/mlx5_rxtx.c
@@ -465,19 +465,11 @@  rx_queue_count(struct mlx5_rxq_data *rxq)
 {
 	struct rxq_zip *zip = &rxq->zip;
 	volatile struct mlx5_cqe *cqe;
+	unsigned int cq_ci = rxq->cq_ci;
 	const unsigned int cqe_n = (1 << rxq->cqe_n);
 	const unsigned int cqe_cnt = cqe_n - 1;
-	unsigned int cq_ci;
-	unsigned int used;
+	unsigned int used = 0;
 
-	/* if we are processing a compressed cqe */
-	if (zip->ai) {
-		used = zip->cqe_cnt - zip->ca;
-		cq_ci = zip->cq_ci;
-	} else {
-		used = 0;
-		cq_ci = rxq->cq_ci;
-	}
 	cqe = &(*rxq->cqes)[cq_ci & cqe_cnt];
 	while (check_cqe(cqe, cqe_n, cq_ci) != MLX5_CQE_STATUS_HW_OWN) {
 		int8_t op_own;
@@ -485,14 +477,17 @@  rx_queue_count(struct mlx5_rxq_data *rxq)
 
 		op_own = cqe->op_own;
 		if (MLX5_CQE_FORMAT(op_own) == MLX5_COMPRESSED)
-			n = rte_be_to_cpu_32(cqe->byte_cnt);
+			if (unlikely(zip->ai))
+				n = zip->cqe_cnt - zip->ai;
+			else
+				n = rte_be_to_cpu_32(cqe->byte_cnt);
 		else
 			n = 1;
 		cq_ci += n;
 		used += n;
 		cqe = &(*rxq->cqes)[cq_ci & cqe_cnt];
 	}
-	used = RTE_MIN(used, (1U << rxq->elts_n) - 1);
+	used = RTE_MIN(used, cqe_n);
 	return used;
 }
 
@@ -515,11 +510,12 @@  mlx5_rx_descriptor_status(void *rx_queue, uint16_t offset)
 			container_of(rxq, struct mlx5_rxq_ctrl, rxq);
 	struct rte_eth_dev *dev = ETH_DEV(rxq_ctrl->priv);
 
-	if (dev->rx_pkt_burst != mlx5_rx_burst) {
+	if (dev->rx_pkt_burst == NULL ||
+	    dev->rx_pkt_burst == removed_rx_burst) {
 		rte_errno = ENOTSUP;
 		return -rte_errno;
 	}
-	if (offset >= (1 << rxq->elts_n)) {
+	if (offset >= (1 << rxq->cqe_n)) {
 		rte_errno = EINVAL;
 		return -rte_errno;
 	}
@@ -630,7 +626,8 @@  mlx5_rx_queue_count(struct rte_eth_dev *dev, uint16_t rx_queue_id)
 	struct mlx5_priv *priv = dev->data->dev_private;
 	struct mlx5_rxq_data *rxq;
 
-	if (dev->rx_pkt_burst != mlx5_rx_burst) {
+	if (dev->rx_pkt_burst == NULL ||
+	    dev->rx_pkt_burst == removed_rx_burst) {
 		rte_errno = ENOTSUP;
 		return -rte_errno;
 	}