[dpdk-dev,RFC,3/8] mbuf: set mbuf fields while in pool

Message ID 1485271173-13408-4-git-send-email-olivier.matz@6wind.com (mailing list archive)
State Superseded, archived
Delegated to: Thomas Monjalon
Headers

Checks

Context Check Description
ci/checkpatch success coding style OK
ci/Intel compilation success Compilation OK

Commit Message

Olivier Matz Jan. 24, 2017, 3:19 p.m. UTC
  Set the value of m->refcnt to 1, m->nb_segs to 1 and m->next
to NULL when the mbuf is stored inside the mempool (unused).
This is done in rte_pktmbuf_prefree_seg(), before freeing or
recycling a mbuf.

Before this patch, the value of m->refcnt was expected to be 0
while in pool.

The objectives are:

- to avoid drivers to set m->next to NULL in the early Rx path, since
  this field is in the second 64B of the mbuf and its access could
  trigger a cache miss

- rationalize the behavior of raw_alloc/raw_free: one is now the
  symmetric of the other, and refcnt is never changed in these functions.

Signed-off-by: Olivier Matz <olivier.matz@6wind.com>
---
 drivers/net/mlx5/mlx5_rxtx.c     |  5 ++---
 drivers/net/mpipe/mpipe_tilegx.c |  1 +
 lib/librte_mbuf/rte_mbuf.c       |  2 ++
 lib/librte_mbuf/rte_mbuf.h       | 45 +++++++++++++++++++++++++++++-----------
 4 files changed, 38 insertions(+), 15 deletions(-)
  

Comments

Bruce Richardson Jan. 24, 2017, 3:50 p.m. UTC | #1
On Tue, Jan 24, 2017 at 04:19:28PM +0100, Olivier Matz wrote:
> Set the value of m->refcnt to 1, m->nb_segs to 1 and m->next
> to NULL when the mbuf is stored inside the mempool (unused).
> This is done in rte_pktmbuf_prefree_seg(), before freeing or
> recycling a mbuf.
> 
> Before this patch, the value of m->refcnt was expected to be 0
> while in pool.
> 
> The objectives are:
> 
> - to avoid drivers to set m->next to NULL in the early Rx path, since
>   this field is in the second 64B of the mbuf and its access could
>   trigger a cache miss
> 
> - rationalize the behavior of raw_alloc/raw_free: one is now the
>   symmetric of the other, and refcnt is never changed in these functions.
> 
> Signed-off-by: Olivier Matz <olivier.matz@6wind.com>
> ---
>  drivers/net/mlx5/mlx5_rxtx.c     |  5 ++---
>  drivers/net/mpipe/mpipe_tilegx.c |  1 +
>  lib/librte_mbuf/rte_mbuf.c       |  2 ++
>  lib/librte_mbuf/rte_mbuf.h       | 45 +++++++++++++++++++++++++++++-----------
>  4 files changed, 38 insertions(+), 15 deletions(-)
> 
<snip>
> /* compat with older versions */
>  __rte_deprecated
> -static inline void __attribute__((always_inline))
> +static inline void
>  __rte_mbuf_raw_free(struct rte_mbuf *m)
>  {
>  	rte_mbuf_raw_free(m);
> @@ -1218,8 +1232,12 @@ static inline void rte_pktmbuf_detach(struct rte_mbuf *m)
>  	m->data_len = 0;
>  	m->ol_flags = 0;
>  
> -	if (rte_mbuf_refcnt_update(md, -1) == 0)
> +	if (rte_mbuf_refcnt_update(md, -1) == 0) {

Minor nit, but in the case that we only have a single reference to the
mbufs, we are always setting that to zero just to re-increment it to 1
again.

> +		md->next = NULL;
> +		md->nb_segs = 1;
> +		rte_mbuf_refcnt_set(md, 1);
>  		rte_mbuf_raw_free(md);
> +	}
>  }
>  
>  /**
  
Olivier Matz Feb. 28, 2017, 2:51 p.m. UTC | #2
Hi Bruce,

On Tue, 24 Jan 2017 15:50:49 +0000, Bruce Richardson
<bruce.richardson@intel.com> wrote:
> On Tue, Jan 24, 2017 at 04:19:28PM +0100, Olivier Matz wrote:
> > Set the value of m->refcnt to 1, m->nb_segs to 1 and m->next
> > to NULL when the mbuf is stored inside the mempool (unused).
> > This is done in rte_pktmbuf_prefree_seg(), before freeing or
> > recycling a mbuf.
> > 
> > Before this patch, the value of m->refcnt was expected to be 0
> > while in pool.
> > 
> > The objectives are:
> > 
> > - to avoid drivers to set m->next to NULL in the early Rx path,
> > since this field is in the second 64B of the mbuf and its access
> > could trigger a cache miss
> > 
> > - rationalize the behavior of raw_alloc/raw_free: one is now the
> >   symmetric of the other, and refcnt is never changed in these
> > functions.
> > 
> > Signed-off-by: Olivier Matz <olivier.matz@6wind.com>
> > ---
> >  drivers/net/mlx5/mlx5_rxtx.c     |  5 ++---
> >  drivers/net/mpipe/mpipe_tilegx.c |  1 +
> >  lib/librte_mbuf/rte_mbuf.c       |  2 ++
> >  lib/librte_mbuf/rte_mbuf.h       | 45
> > +++++++++++++++++++++++++++++----------- 4 files changed, 38
> > insertions(+), 15 deletions(-) 
> <snip>
> > /* compat with older versions */
> >  __rte_deprecated
> > -static inline void __attribute__((always_inline))
> > +static inline void
> >  __rte_mbuf_raw_free(struct rte_mbuf *m)
> >  {
> >  	rte_mbuf_raw_free(m);
> > @@ -1218,8 +1232,12 @@ static inline void rte_pktmbuf_detach(struct
> > rte_mbuf *m) m->data_len = 0;
> >  	m->ol_flags = 0;
> >  
> > -	if (rte_mbuf_refcnt_update(md, -1) == 0)
> > +	if (rte_mbuf_refcnt_update(md, -1) == 0) {  
> 
> Minor nit, but in the case that we only have a single reference to the
> mbufs, we are always setting that to zero just to re-increment it to 1
> again.
> 
> > +		md->next = NULL;
> > +		md->nb_segs = 1;
> > +		rte_mbuf_refcnt_set(md, 1);
> >  		rte_mbuf_raw_free(md);
> > +	}
> >  }
> >  
> >  /**  


I'm trying to gather the comments that have been made on this patchset.
About this one, I think it would be more complex to change the code
to avoid to set the refcnt twice:

 - we would need to duplicate code from rte_mbuf_refcnt_update(), which
   I think is not a very good idea, due to the big comment
 - it would make the detach code less readable
 - it's even not sure that it will be more performant: since
   rte_mbuf_refcnt_update() is inline, the compiler is probably able to
   do the simplification by itself.



Olivier
  

Patch

diff --git a/drivers/net/mlx5/mlx5_rxtx.c b/drivers/net/mlx5/mlx5_rxtx.c
index a518a42..294dfde 100644
--- a/drivers/net/mlx5/mlx5_rxtx.c
+++ b/drivers/net/mlx5/mlx5_rxtx.c
@@ -1327,7 +1327,8 @@  mlx5_rx_burst(void *dpdk_rxq, struct rte_mbuf **pkts, uint16_t pkts_n)
 			while (pkt != seg) {
 				assert(pkt != (*rxq->elts)[idx]);
 				rep = NEXT(pkt);
-				rte_mbuf_refcnt_set(pkt, 0);
+				NEXT(pkt) = NULL;
+				NB_SEGS(pkt) = 1;
 				rte_mbuf_raw_free(pkt);
 				pkt = rep;
 			}
@@ -1338,13 +1339,11 @@  mlx5_rx_burst(void *dpdk_rxq, struct rte_mbuf **pkts, uint16_t pkts_n)
 			len = mlx5_rx_poll_len(rxq, cqe, cqe_cnt,
 					       &rss_hash_res);
 			if (!len) {
-				rte_mbuf_refcnt_set(rep, 0);
 				rte_mbuf_raw_free(rep);
 				break;
 			}
 			if (unlikely(len == -1)) {
 				/* RX error, packet is likely too large. */
-				rte_mbuf_refcnt_set(rep, 0);
 				rte_mbuf_raw_free(rep);
 				++rxq->stats.idropped;
 				goto skip;
diff --git a/drivers/net/mpipe/mpipe_tilegx.c b/drivers/net/mpipe/mpipe_tilegx.c
index eedc0b3..560ffe9 100644
--- a/drivers/net/mpipe/mpipe_tilegx.c
+++ b/drivers/net/mpipe/mpipe_tilegx.c
@@ -548,6 +548,7 @@  mpipe_recv_flush_stack(struct mpipe_dev_priv *priv)
 		mbuf->packet_type = 0;
 		mbuf->data_len    = 0;
 		mbuf->pkt_len     = 0;
+		mbuf->next        = NULL;
 
 		rte_mbuf_raw_free(mbuf);
 	}
diff --git a/lib/librte_mbuf/rte_mbuf.c b/lib/librte_mbuf/rte_mbuf.c
index 72ad91e..0acc810 100644
--- a/lib/librte_mbuf/rte_mbuf.c
+++ b/lib/librte_mbuf/rte_mbuf.c
@@ -145,6 +145,8 @@  rte_pktmbuf_init(struct rte_mempool *mp,
 	m->pool = mp;
 	m->nb_segs = 1;
 	m->port = 0xff;
+	rte_mbuf_refcnt_set(m, 1);
+	m->next = NULL;
 }
 
 /* helper to create a mbuf pool */
diff --git a/lib/librte_mbuf/rte_mbuf.h b/lib/librte_mbuf/rte_mbuf.h
index 8ff2290..bbd0700 100644
--- a/lib/librte_mbuf/rte_mbuf.h
+++ b/lib/librte_mbuf/rte_mbuf.h
@@ -766,6 +766,11 @@  rte_mbuf_sanity_check(const struct rte_mbuf *m, int is_header);
  * initializing all the required fields. See rte_pktmbuf_reset().
  * For standard needs, prefer rte_pktmbuf_alloc().
  *
+ * The caller can expect that the following fields of the mbuf structure
+ * are initialized: buf_addr, buf_physaddr, buf_len, refcnt=1, nb_segs=1,
+ * next=NULL, pool, priv_size. The other fields must be initialized
+ * by the caller.
+ *
  * @param mp
  *   The mempool from which mbuf is allocated.
  * @return
@@ -780,8 +785,9 @@  static inline struct rte_mbuf *rte_mbuf_raw_alloc(struct rte_mempool *mp)
 	if (rte_mempool_get(mp, &mb) < 0)
 		return NULL;
 	m = (struct rte_mbuf *)mb;
-	RTE_ASSERT(rte_mbuf_refcnt_read(m) == 0);
-	rte_mbuf_refcnt_set(m, 1);
+	RTE_ASSERT(rte_mbuf_refcnt_read(m) == 1);
+	RTE_ASSERT(m->next == NULL);
+	RTE_ASSERT(m->nb_segs == 1);
 	__rte_mbuf_sanity_check(m, 0);
 
 	return m;
@@ -790,8 +796,13 @@  static inline struct rte_mbuf *rte_mbuf_raw_alloc(struct rte_mempool *mp)
 /**
  * Put mbuf back into its original mempool.
  *
- * The caller must ensure that the mbuf is direct and that the
- * reference counter is 0.
+ * The caller must ensure that the mbuf is direct and properly
+ * reinitialized (refcnt=1, next=NULL, nb_segs=1), as done by
+ * rte_pktmbuf_prefree_seg().
+ *
+ * This function should be used with care, when optimization is
+ * required. For standard needs, prefer rte_pktmbuf_free() or
+ * rte_pktmbuf_free_seg().
  *
  * @param m
  *   The mbuf to be freed.
@@ -800,13 +811,16 @@  static inline void __attribute__((always_inline))
 rte_mbuf_raw_free(struct rte_mbuf *m)
 {
 	RTE_ASSERT(RTE_MBUF_DIRECT(m));
-	RTE_ASSERT(rte_mbuf_refcnt_read(m) == 0);
+	RTE_ASSERT(rte_mbuf_refcnt_read(m) == 1);
+	RTE_ASSERT(m->next == NULL);
+	RTE_ASSERT(m->nb_segs == 1);
+	__rte_mbuf_sanity_check(m, 0);
 	rte_mempool_put(m->pool, m);
 }
 
 /* compat with older versions */
 __rte_deprecated
-static inline void __attribute__((always_inline))
+static inline void
 __rte_mbuf_raw_free(struct rte_mbuf *m)
 {
 	rte_mbuf_raw_free(m);
@@ -1218,8 +1232,12 @@  static inline void rte_pktmbuf_detach(struct rte_mbuf *m)
 	m->data_len = 0;
 	m->ol_flags = 0;
 
-	if (rte_mbuf_refcnt_update(md, -1) == 0)
+	if (rte_mbuf_refcnt_update(md, -1) == 0) {
+		md->next = NULL;
+		md->nb_segs = 1;
+		rte_mbuf_refcnt_set(md, 1);
 		rte_mbuf_raw_free(md);
+	}
 }
 
 /**
@@ -1243,9 +1261,14 @@  rte_pktmbuf_prefree_seg(struct rte_mbuf *m)
 	__rte_mbuf_sanity_check(m, 0);
 
 	if (likely(rte_mbuf_refcnt_update(m, -1) == 0)) {
-		/* if this is an indirect mbuf, it is detached. */
-		if (RTE_MBUF_INDIRECT(m))
+		if (RTE_MBUF_INDIRECT(m)) {
 			rte_pktmbuf_detach(m);
+			/* next, nb_segs, refcnt are reset */
+		} else {
+			m->next = NULL;
+			m->nb_segs = 1;
+			rte_mbuf_refcnt_set(m, 1);
+		}
 		return m;
 	}
 	return NULL;
@@ -1272,10 +1295,8 @@  static inline void __attribute__((always_inline))
 rte_pktmbuf_free_seg(struct rte_mbuf *m)
 {
 	m = rte_pktmbuf_prefree_seg(m);
-	if (likely(m != NULL)) {
-		m->next = NULL;
+	if (likely(m != NULL))
 		rte_mbuf_raw_free(m);
-	}
 }
 
 /**