[dpdk-dev,v2,7/8] net/mlx4: align Tx descriptors number

Message ID 1512571693-15338-8-git-send-email-matan@mellanox.com (mailing list archive)
State Superseded, archived
Headers

Checks

Context Check Description
ci/checkpatch success coding style OK
ci/Intel-compilation success Compilation OK

Commit Message

Matan Azrad Dec. 6, 2017, 2:48 p.m. UTC
  Using power of 2 descriptors number makes the ring management easier
and allows to use mask operation instead of wraparound conditions.

Adjust Tx descriptor number to be power of 2 and change calculation to
use mask accordingly.

Signed-off-by: Matan Azrad <matan@mellanox.com>
---
 drivers/net/mlx4/mlx4_rxtx.c | 28 +++++++++++++---------------
 drivers/net/mlx4/mlx4_txq.c  | 13 +++++++++----
 2 files changed, 22 insertions(+), 19 deletions(-)
  

Comments

Adrien Mazarguil Dec. 6, 2017, 4:22 p.m. UTC | #1
On Wed, Dec 06, 2017 at 02:48:12PM +0000, Matan Azrad wrote:
> Using power of 2 descriptors number makes the ring management easier
> and allows to use mask operation instead of wraparound conditions.
> 
> Adjust Tx descriptor number to be power of 2 and change calculation to
> use mask accordingly.
> 
> Signed-off-by: Matan Azrad <matan@mellanox.com>
> ---
>  drivers/net/mlx4/mlx4_rxtx.c | 28 +++++++++++++---------------
>  drivers/net/mlx4/mlx4_txq.c  | 13 +++++++++----
>  2 files changed, 22 insertions(+), 19 deletions(-)
> 
> diff --git a/drivers/net/mlx4/mlx4_rxtx.c b/drivers/net/mlx4/mlx4_rxtx.c
> index 8b8d95e..14192fe 100644
> --- a/drivers/net/mlx4/mlx4_rxtx.c
> +++ b/drivers/net/mlx4/mlx4_rxtx.c
> @@ -312,10 +312,14 @@ struct pv {
>   *
>   * @param txq
>   *   Pointer to Tx queue structure.
> + * @param sq
> + *   Pointer to the SQ structure.
> + * @param elts_m
> + *   Tx elements number mask.

It's minor however these parameters should be described in the same order as
they appear in the function prototype, please swap them if you send an
updated series.

>   */
>  static void
> -mlx4_txq_complete(struct txq *txq, const unsigned int elts_n,
> -				  struct mlx4_sq *sq)
> +mlx4_txq_complete(struct txq *txq, const unsigned int elts_m,
> +		  struct mlx4_sq *sq)
>  {
<snip>
> diff --git a/drivers/net/mlx4/mlx4_txq.c b/drivers/net/mlx4/mlx4_txq.c
> index 4c7b62a..7eb4b04 100644
> --- a/drivers/net/mlx4/mlx4_txq.c
> +++ b/drivers/net/mlx4/mlx4_txq.c
> @@ -76,17 +76,16 @@
>  	unsigned int elts_head = txq->elts_head;
>  	unsigned int elts_tail = txq->elts_tail;
>  	struct txq_elt (*elts)[txq->elts_n] = txq->elts;
> +	unsigned int elts_m = txq->elts_n - 1;
>  
>  	DEBUG("%p: freeing WRs", (void *)txq);
>  	while (elts_tail != elts_head) {
> -		struct txq_elt *elt = &(*elts)[elts_tail];
> +		struct txq_elt *elt = &(*elts)[elts_tail++ & elts_m];
>  
>  		assert(elt->buf != NULL);
>  		rte_pktmbuf_free(elt->buf);
>  		elt->buf = NULL;
>  		elt->wqe = NULL;
> -		if (++elts_tail == RTE_DIM(*elts))
> -			elts_tail = 0;
>  	}
>  	txq->elts_tail = txq->elts_head;
>  }
> @@ -208,7 +207,7 @@ struct txq_mp2mr_mbuf_check_data {
>  	struct mlx4dv_obj mlxdv;
>  	struct mlx4dv_qp dv_qp;
>  	struct mlx4dv_cq dv_cq;
> -	struct txq_elt (*elts)[desc];
> +	struct txq_elt (*elts)[rte_align32pow2(desc)];

OK, I'm curious about what happened to the magic 0x1000 though? Was it a
limitation or some leftover debugging code?
  
Matan Azrad Dec. 6, 2017, 5:24 p.m. UTC | #2
Hi Adrien

> -----Original Message-----
> From: Adrien Mazarguil [mailto:adrien.mazarguil@6wind.com]
> Sent: Wednesday, December 6, 2017 6:23 PM
> To: Matan Azrad <matan@mellanox.com>
> Cc: dev@dpdk.org
> Subject: Re: [PATCH v2 7/8] net/mlx4: align Tx descriptors number
> 
> On Wed, Dec 06, 2017 at 02:48:12PM +0000, Matan Azrad wrote:
> > Using power of 2 descriptors number makes the ring management easier
> > and allows to use mask operation instead of wraparound conditions.
> >
> > Adjust Tx descriptor number to be power of 2 and change calculation to
> > use mask accordingly.
> >
> > Signed-off-by: Matan Azrad <matan@mellanox.com>
> > ---
> >  drivers/net/mlx4/mlx4_rxtx.c | 28 +++++++++++++---------------
> > drivers/net/mlx4/mlx4_txq.c  | 13 +++++++++----
> >  2 files changed, 22 insertions(+), 19 deletions(-)
> >
> > diff --git a/drivers/net/mlx4/mlx4_rxtx.c
> > b/drivers/net/mlx4/mlx4_rxtx.c index 8b8d95e..14192fe 100644
> > --- a/drivers/net/mlx4/mlx4_rxtx.c
> > +++ b/drivers/net/mlx4/mlx4_rxtx.c
> > @@ -312,10 +312,14 @@ struct pv {
> >   *
> >   * @param txq
> >   *   Pointer to Tx queue structure.
> > + * @param sq
> > + *   Pointer to the SQ structure.
> > + * @param elts_m
> > + *   Tx elements number mask.
> 
> It's minor however these parameters should be described in the same order
> as they appear in the function prototype, please swap them if you send an
> updated series.
> 
> >   */
> >  static void
> > -mlx4_txq_complete(struct txq *txq, const unsigned int elts_n,
> > -				  struct mlx4_sq *sq)
> > +mlx4_txq_complete(struct txq *txq, const unsigned int elts_m,
> > +		  struct mlx4_sq *sq)
> >  {
> <snip>
> > diff --git a/drivers/net/mlx4/mlx4_txq.c b/drivers/net/mlx4/mlx4_txq.c
> > index 4c7b62a..7eb4b04 100644
> > --- a/drivers/net/mlx4/mlx4_txq.c
> > +++ b/drivers/net/mlx4/mlx4_txq.c
> > @@ -76,17 +76,16 @@
> >  	unsigned int elts_head = txq->elts_head;
> >  	unsigned int elts_tail = txq->elts_tail;
> >  	struct txq_elt (*elts)[txq->elts_n] = txq->elts;
> > +	unsigned int elts_m = txq->elts_n - 1;
> >
> >  	DEBUG("%p: freeing WRs", (void *)txq);
> >  	while (elts_tail != elts_head) {
> > -		struct txq_elt *elt = &(*elts)[elts_tail];
> > +		struct txq_elt *elt = &(*elts)[elts_tail++ & elts_m];
> >
> >  		assert(elt->buf != NULL);
> >  		rte_pktmbuf_free(elt->buf);
> >  		elt->buf = NULL;
> >  		elt->wqe = NULL;
> > -		if (++elts_tail == RTE_DIM(*elts))
> > -			elts_tail = 0;
> >  	}
> >  	txq->elts_tail = txq->elts_head;
> >  }
> > @@ -208,7 +207,7 @@ struct txq_mp2mr_mbuf_check_data {
> >  	struct mlx4dv_obj mlxdv;
> >  	struct mlx4dv_qp dv_qp;
> >  	struct mlx4dv_cq dv_cq;
> > -	struct txq_elt (*elts)[desc];
> > +	struct txq_elt (*elts)[rte_align32pow2(desc)];
> 
> OK, I'm curious about what happened to the magic 0x1000 though? Was it a
> limitation or some leftover debugging code?
> 
Wrong limitation to the max number of descriptors.
Thanks again for the second good review. Will adjust all your comments for v3.
> --
> Adrien Mazarguil
> 6WIND
  

Patch

diff --git a/drivers/net/mlx4/mlx4_rxtx.c b/drivers/net/mlx4/mlx4_rxtx.c
index 8b8d95e..14192fe 100644
--- a/drivers/net/mlx4/mlx4_rxtx.c
+++ b/drivers/net/mlx4/mlx4_rxtx.c
@@ -312,10 +312,14 @@  struct pv {
  *
  * @param txq
  *   Pointer to Tx queue structure.
+ * @param sq
+ *   Pointer to the SQ structure.
+ * @param elts_m
+ *   Tx elements number mask.
  */
 static void
-mlx4_txq_complete(struct txq *txq, const unsigned int elts_n,
-				  struct mlx4_sq *sq)
+mlx4_txq_complete(struct txq *txq, const unsigned int elts_m,
+		  struct mlx4_sq *sq)
 {
 	unsigned int elts_tail = txq->elts_tail;
 	struct mlx4_cq *cq = &txq->mcq;
@@ -355,13 +359,11 @@  struct pv {
 	if (unlikely(!completed))
 		return;
 	/* First stamping address is the end of the last one. */
-	first_txbb = (&(*txq->elts)[elts_tail])->eocb;
+	first_txbb = (&(*txq->elts)[elts_tail & elts_m])->eocb;
 	elts_tail += completed;
-	if (elts_tail >= elts_n)
-		elts_tail -= elts_n;
 	/* The new tail element holds the end address. */
 	sq->remain_size += mlx4_txq_stamp_freed_wqe(sq, first_txbb,
-		(&(*txq->elts)[elts_tail])->eocb);
+		(&(*txq->elts)[elts_tail & elts_m])->eocb);
 	/* Update CQ consumer index. */
 	cq->cons_index = cons_index;
 	*cq->set_ci_db = rte_cpu_to_be_32(cons_index & MLX4_CQ_DB_CI_MASK);
@@ -580,6 +582,7 @@  struct pv {
 	struct txq *txq = (struct txq *)dpdk_txq;
 	unsigned int elts_head = txq->elts_head;
 	const unsigned int elts_n = txq->elts_n;
+	const unsigned int elts_m = elts_n - 1;
 	unsigned int bytes_sent = 0;
 	unsigned int i;
 	unsigned int max;
@@ -589,24 +592,20 @@  struct pv {
 
 	assert(txq->elts_comp_cd != 0);
 	if (likely(txq->elts_comp != 0))
-		mlx4_txq_complete(txq, elts_n, sq);
+		mlx4_txq_complete(txq, elts_m, sq);
 	max = (elts_n - (elts_head - txq->elts_tail));
-	if (max > elts_n)
-		max -= elts_n;
 	assert(max >= 1);
 	assert(max <= elts_n);
 	/* Always leave one free entry in the ring. */
 	--max;
 	if (max > pkts_n)
 		max = pkts_n;
-	elt = &(*txq->elts)[elts_head];
+	elt = &(*txq->elts)[elts_head & elts_m];
 	/* First Tx burst element saves the next WQE control segment. */
 	ctrl = elt->wqe;
 	for (i = 0; (i != max); ++i) {
 		struct rte_mbuf *buf = pkts[i];
-		unsigned int elts_head_next =
-			(((elts_head + 1) == elts_n) ? 0 : elts_head + 1);
-		struct txq_elt *elt_next = &(*txq->elts)[elts_head_next];
+		struct txq_elt *elt_next = &(*txq->elts)[++elts_head & elts_m];
 		uint32_t owner_opcode = sq->owner_opcode;
 		volatile struct mlx4_wqe_data_seg *dseg =
 				(volatile struct mlx4_wqe_data_seg *)(ctrl + 1);
@@ -725,7 +724,6 @@  struct pv {
 		ctrl->owner_opcode = rte_cpu_to_be_32(owner_opcode);
 		elt->buf = buf;
 		bytes_sent += buf->pkt_len;
-		elts_head = elts_head_next;
 		ctrl = ctrl_next;
 		elt = elt_next;
 	}
@@ -741,7 +739,7 @@  struct pv {
 	rte_wmb();
 	/* Ring QP doorbell. */
 	rte_write32(txq->msq.doorbell_qpn, txq->msq.db);
-	txq->elts_head = elts_head;
+	txq->elts_head += i;
 	txq->elts_comp += i;
 	return i;
 }
diff --git a/drivers/net/mlx4/mlx4_txq.c b/drivers/net/mlx4/mlx4_txq.c
index 4c7b62a..7eb4b04 100644
--- a/drivers/net/mlx4/mlx4_txq.c
+++ b/drivers/net/mlx4/mlx4_txq.c
@@ -76,17 +76,16 @@ 
 	unsigned int elts_head = txq->elts_head;
 	unsigned int elts_tail = txq->elts_tail;
 	struct txq_elt (*elts)[txq->elts_n] = txq->elts;
+	unsigned int elts_m = txq->elts_n - 1;
 
 	DEBUG("%p: freeing WRs", (void *)txq);
 	while (elts_tail != elts_head) {
-		struct txq_elt *elt = &(*elts)[elts_tail];
+		struct txq_elt *elt = &(*elts)[elts_tail++ & elts_m];
 
 		assert(elt->buf != NULL);
 		rte_pktmbuf_free(elt->buf);
 		elt->buf = NULL;
 		elt->wqe = NULL;
-		if (++elts_tail == RTE_DIM(*elts))
-			elts_tail = 0;
 	}
 	txq->elts_tail = txq->elts_head;
 }
@@ -208,7 +207,7 @@  struct txq_mp2mr_mbuf_check_data {
 	struct mlx4dv_obj mlxdv;
 	struct mlx4dv_qp dv_qp;
 	struct mlx4dv_cq dv_cq;
-	struct txq_elt (*elts)[desc];
+	struct txq_elt (*elts)[rte_align32pow2(desc)];
 	struct ibv_qp_init_attr qp_init_attr;
 	struct txq *txq;
 	uint8_t *bounce_buf;
@@ -252,6 +251,12 @@  struct txq_mp2mr_mbuf_check_data {
 		ERROR("%p: invalid number of Tx descriptors", (void *)dev);
 		return -rte_errno;
 	}
+	if (desc != RTE_DIM(*elts)) {
+		desc = RTE_DIM(*elts);
+		WARN("%p: increased number of descriptors in Tx queue %u"
+		     " to the next power of two (%u)",
+		     (void *)dev, idx, desc);
+	}
 	/* Allocate and initialize Tx queue. */
 	mlx4_zmallocv_socket("TXQ", vec, RTE_DIM(vec), socket);
 	if (!txq) {