[dpdk-dev] net/mlx4: fix last Tx wqe stamping lack

Message ID 1510302438-29138-1-git-send-email-matan@mellanox.com (mailing list archive)
State Accepted, archived
Headers

Checks

Context Check Description
ci/checkpatch success coding style OK
ci/Intel-compilation success Compilation OK

Commit Message

Matan Azrad Nov. 10, 2017, 8:27 a.m. UTC
  When Tx pakcet HW processing is done, SW should stamp all the completion
burst WQEs.

Stamp missed last completion burst WQE.

Fixes: c3c977bbecbd ("net/mlx4: add Tx bypassing Verbs")

Signed-off-by: Matan Azrad <matan@mellanox.com>
---
 drivers/net/mlx4/mlx4_rxtx.c | 13 +++++++------
 1 file changed, 7 insertions(+), 6 deletions(-)

I think this is a critical bug fix that should be added to 17.11 version.
No performance impact was seen.
  

Comments

Adrien Mazarguil Nov. 10, 2017, 3:02 p.m. UTC | #1
On Fri, Nov 10, 2017 at 08:27:18AM +0000, Matan Azrad wrote:
> When Tx pakcet HW processing is done, SW should stamp all the completion
> burst WQEs.
> 
> Stamp missed last completion burst WQE.
> 
> Fixes: c3c977bbecbd ("net/mlx4: add Tx bypassing Verbs")
> 
> Signed-off-by: Matan Azrad <matan@mellanox.com>

This reads like you were in a hurry :)

Took me a while to understand the problem and how you addressed it. So in
short, wqe_index is consumed but its TXBBs aren't stamped because the loop
stops at its index without processing it.

Patch looks good but could have been simpler by directly initializing
nr_txbbs to sq->tail, not use sq->tail as an offset afterward and get rid of
sq_tail. It's OK as this wouldn't have resulted in a smaller patch anyway.

Commit log rewording suggestion:

 net/mlx4: fix missing stamp during Tx completion

 After processing completed packets, the owner bit of each TXBB comprised
 in its WQEs must be invalidated. The loop stops short of processing the
 last WQE.

Other than that,

Acked-by: Adrien Mazarguil <adrien.mazarguil@6wind.com>

> ---
>  drivers/net/mlx4/mlx4_rxtx.c | 13 +++++++------
>  1 file changed, 7 insertions(+), 6 deletions(-)
> 
> I think this is a critical bug fix that should be added to 17.11 version.
> No performance impact was seen.
> 
> diff --git a/drivers/net/mlx4/mlx4_rxtx.c b/drivers/net/mlx4/mlx4_rxtx.c
> index 3985e06..44edeac 100644
> --- a/drivers/net/mlx4/mlx4_rxtx.c
> +++ b/drivers/net/mlx4/mlx4_rxtx.c
> @@ -336,6 +336,7 @@ struct pv {
>  {
>  	unsigned int elts_comp = txq->elts_comp;
>  	unsigned int elts_tail = txq->elts_tail;
> +	unsigned int sq_tail = sq->tail;
>  	struct mlx4_cq *cq = &txq->mcq;
>  	volatile struct mlx4_cqe *cqe;
>  	uint32_t cons_index = cq->cons_index;
> @@ -372,13 +373,13 @@ struct pv {
>  			rte_be_to_cpu_16(cqe->wqe_index) & sq->txbb_cnt_mask;
>  		do {
>  			/* Free next descriptor. */
> -			nr_txbbs +=
> +			sq_tail += nr_txbbs;
> +			nr_txbbs =
>  				mlx4_txq_stamp_freed_wqe(sq,
> -				     (sq->tail + nr_txbbs) & sq->txbb_cnt_mask,
> -				     !!((sq->tail + nr_txbbs) & sq->txbb_cnt));
> +				     sq_tail & sq->txbb_cnt_mask,
> +				     !!(sq_tail & sq->txbb_cnt));
>  			pkts++;
> -		} while (((sq->tail + nr_txbbs) & sq->txbb_cnt_mask) !=
> -			 new_index);
> +		} while ((sq_tail & sq->txbb_cnt_mask) != new_index);
>  		cons_index++;
>  	} while (1);
>  	if (unlikely(pkts == 0))
> @@ -386,7 +387,7 @@ struct pv {
>  	/* Update CQ. */
>  	cq->cons_index = cons_index;
>  	*cq->set_ci_db = rte_cpu_to_be_32(cq->cons_index & MLX4_CQ_DB_CI_MASK);
> -	sq->tail = sq->tail + nr_txbbs;
> +	sq->tail = sq_tail + nr_txbbs;
>  	/* Update the list of packets posted for transmission. */
>  	elts_comp -= pkts;
>  	assert(elts_comp <= txq->elts_comp);
  
Thomas Monjalon Nov. 11, 2017, 2:53 p.m. UTC | #2
10/11/2017 16:02, Adrien Mazarguil:
> On Fri, Nov 10, 2017 at 08:27:18AM +0000, Matan Azrad wrote:
> > When Tx pakcet HW processing is done, SW should stamp all the completion
> > burst WQEs.
> > 
> > Stamp missed last completion burst WQE.
> > 
> > Fixes: c3c977bbecbd ("net/mlx4: add Tx bypassing Verbs")
> > 
> > Signed-off-by: Matan Azrad <matan@mellanox.com>
> 
> This reads like you were in a hurry :)
> 
> Took me a while to understand the problem and how you addressed it. So in
> short, wqe_index is consumed but its TXBBs aren't stamped because the loop
> stops at its index without processing it.
> 
> Patch looks good but could have been simpler by directly initializing
> nr_txbbs to sq->tail, not use sq->tail as an offset afterward and get rid of
> sq_tail. It's OK as this wouldn't have resulted in a smaller patch anyway.
> 
> Commit log rewording suggestion:
> 
>  net/mlx4: fix missing stamp during Tx completion
> 
>  After processing completed packets, the owner bit of each TXBB comprised
>  in its WQEs must be invalidated. The loop stops short of processing the
>  last WQE.
> 
> Other than that,
> 
> Acked-by: Adrien Mazarguil <adrien.mazarguil@6wind.com>

Applied with suggested commit log, thanks
  

Patch

diff --git a/drivers/net/mlx4/mlx4_rxtx.c b/drivers/net/mlx4/mlx4_rxtx.c
index 3985e06..44edeac 100644
--- a/drivers/net/mlx4/mlx4_rxtx.c
+++ b/drivers/net/mlx4/mlx4_rxtx.c
@@ -336,6 +336,7 @@  struct pv {
 {
 	unsigned int elts_comp = txq->elts_comp;
 	unsigned int elts_tail = txq->elts_tail;
+	unsigned int sq_tail = sq->tail;
 	struct mlx4_cq *cq = &txq->mcq;
 	volatile struct mlx4_cqe *cqe;
 	uint32_t cons_index = cq->cons_index;
@@ -372,13 +373,13 @@  struct pv {
 			rte_be_to_cpu_16(cqe->wqe_index) & sq->txbb_cnt_mask;
 		do {
 			/* Free next descriptor. */
-			nr_txbbs +=
+			sq_tail += nr_txbbs;
+			nr_txbbs =
 				mlx4_txq_stamp_freed_wqe(sq,
-				     (sq->tail + nr_txbbs) & sq->txbb_cnt_mask,
-				     !!((sq->tail + nr_txbbs) & sq->txbb_cnt));
+				     sq_tail & sq->txbb_cnt_mask,
+				     !!(sq_tail & sq->txbb_cnt));
 			pkts++;
-		} while (((sq->tail + nr_txbbs) & sq->txbb_cnt_mask) !=
-			 new_index);
+		} while ((sq_tail & sq->txbb_cnt_mask) != new_index);
 		cons_index++;
 	} while (1);
 	if (unlikely(pkts == 0))
@@ -386,7 +387,7 @@  struct pv {
 	/* Update CQ. */
 	cq->cons_index = cons_index;
 	*cq->set_ci_db = rte_cpu_to_be_32(cq->cons_index & MLX4_CQ_DB_CI_MASK);
-	sq->tail = sq->tail + nr_txbbs;
+	sq->tail = sq_tail + nr_txbbs;
 	/* Update the list of packets posted for transmission. */
 	elts_comp -= pkts;
 	assert(elts_comp <= txq->elts_comp);