mlx5: Report imissed stat

Message ID 1542104199-55746-1-git-send-email-barbette@kth.se (mailing list archive)
State Superseded, archived
Delegated to: Shahaf Shuler
Headers
Series mlx5: Report imissed stat |

Checks

Context Check Description
ci/checkpatch warning coding style issues
ci/Intel-compilation success Compilation OK

Commit Message

Tom Barbette Nov. 13, 2018, 10:16 a.m. UTC
  The imissed counters (number of packets dropped because the queues were
full) were actually reported through xstats as "rx_out_of_buffer"
but was not reported through stats.

Following a recent discussion on the ML, as there is no way to tell the
user if a counter is implemented or not, this should be considered a
bug. Eg, user looking at imissed will think the packets are lost before
reaching the device.

As for xstats, I added a base counter to be able to "reset" imissed.

Signed-off-by: Tom Barbette <barbette@kth.se>
---
 drivers/net/mlx5/mlx5.h       |  2 ++
 drivers/net/mlx5/mlx5_stats.c | 36 +++++++++++++++++++++++-------------
 2 files changed, 25 insertions(+), 13 deletions(-)
  

Comments

Shahaf Shuler Nov. 14, 2018, 3:17 p.m. UTC | #1
Hi Again Tom, 

Tuesday, November 13, 2018 12:17 PM, Tom Barbette:
> Subject: [PATCH] mlx5: Report imissed stat
> 
> The imissed counters (number of packets dropped because the queues were
> full) were actually reported through xstats as "rx_out_of_buffer"
> but was not reported through stats.
> 
> Following a recent discussion on the ML, as there is no way to tell the user if a
> counter is implemented or not, this should be considered a bug. Eg, user
> looking at imissed will think the packets are lost before reaching the device.
> 
> As for xstats, I added a base counter to be able to "reset" imissed.
> 

Yes, a bit of extra work is needed here. 
the full definition of the missed is
"
uint64_t imissed;                                                  
/**< Total of RX packets dropped by the HW,                        
 * because there are no available buffer (i.e. RX queues are full).
 */                                                                
"

It is not well defined (this is other topic), because it assumes the only reason to drop packets is because there is no avail buffer on the Rx queue.
It is not entirely correct, for example if you have large latency on your system it is possible that packets would be dropped on the physical port, not because the application is slow and not replenish the buffers fast enough, rather because the NIC is not processing them on the needed rate.

I guess what application seek on imissed is the sum of two. It can be done by summing up two counters: the out_of_buffer and the rx_discard_phy (which exists on ethtool -S <netdev> and need to be added to mlx5_counters_init array). 

Still, the output will be incorrect on the following cases:
1. from VF, since VF cannot read the phy port counters
2. in the presence of representors, as they all share the same phy port counter. 

I guess if we want to get such patch in, we need first to make the calculation correct, and document the limitations. 
 

> Signed-off-by: Tom Barbette <barbette@kth.se>
> ---
>  drivers/net/mlx5/mlx5.h       |  2 ++
>  drivers/net/mlx5/mlx5_stats.c | 36 +++++++++++++++++++++++-------------
>  2 files changed, 25 insertions(+), 13 deletions(-)
> 
> diff --git a/drivers/net/mlx5/mlx5.h b/drivers/net/mlx5/mlx5.h index
> a3a34cf..61054a8 100644
> --- a/drivers/net/mlx5/mlx5.h
> +++ b/drivers/net/mlx5/mlx5.h
> @@ -77,6 +77,8 @@ struct mlx5_xstats_ctrl {
>  	/* Index in the device counters table. */
>  	uint16_t dev_table_idx[MLX5_MAX_XSTATS];
>  	uint64_t base[MLX5_MAX_XSTATS];
> +	/* Base for imissed counter. */
> +	uint64_t imissed_base;
>  };
> 
>  /* Flow list . */
> diff --git a/drivers/net/mlx5/mlx5_stats.c b/drivers/net/mlx5/mlx5_stats.c
> index 91f3d47..1e75e85 100644
> --- a/drivers/net/mlx5/mlx5_stats.c
> +++ b/drivers/net/mlx5/mlx5_stats.c
> @@ -119,6 +119,24 @@ static const struct mlx5_counter_ctrl
> mlx5_counters_init[] = {
> 
>  static const unsigned int xstats_n = RTE_DIM(mlx5_counters_init);
> 
> +static inline void
> +mlx5_read_ib_stat(struct priv *priv, unsigned int idx, uint64_t *stat)
> +{
> +	FILE *file;
> +	MKSTR(path, "%s/ports/1/hw_counters/%s",
> +		  priv->ibdev_path,
> +		  mlx5_counters_init[idx].ctr_name);
> +
> +	file = fopen(path, "rb");
> +	if (file) {
> +		int n = fscanf(file, "%" SCNu64, stat);
> +
> +		fclose(file);
> +		if (n != 1)
> +			stat = 0;
> +	}
> +}
> +
>  /**
>   * Read device counters table.
>   *
> @@ -155,19 +173,7 @@ mlx5_read_dev_counters(struct rte_eth_dev *dev,
> uint64_t *stats)
>  	}
>  	for (i = 0; i != xstats_n; ++i) {
>  		if (mlx5_counters_init[i].ib) {
> -			FILE *file;
> -			MKSTR(path, "%s/ports/1/hw_counters/%s",
> -			      priv->ibdev_path,
> -			      mlx5_counters_init[i].ctr_name);
> -
> -			file = fopen(path, "rb");
> -			if (file) {
> -				int n = fscanf(file, "%" SCNu64, &stats[i]);
> -
> -				fclose(file);
> -				if (n != 1)
> -					stats[i] = 0;
> -			}
> +			mlx5_read_ib_stat(priv, i, &stats[i]);
>  		} else {
>  			stats[i] = (uint64_t)
>  				et_stats->data[xstats_ctrl-
> >dev_table_idx[i]];
> @@ -281,6 +287,7 @@ mlx5_xstats_init(struct rte_eth_dev *dev)
>  	if (ret)
>  		DRV_LOG(ERR, "port %u cannot read device counters: %s",
>  			dev->data->port_id, strerror(rte_errno));
> +	mlx5_read_ib_stat(priv, 17, &xstats_ctrl->imissed_base);
>  free:
>  	rte_free(strings);
>  }
> @@ -389,6 +396,8 @@ mlx5_stats_get(struct rte_eth_dev *dev, struct
> rte_eth_stats *stats)  #endif
>  		tmp.oerrors += txq->stats.oerrors;
>  	}
> +	mlx5_read_ib_stat(priv, 17, &tmp.imissed);
> +	tmp.imissed -= priv->xstats_ctrl.imissed_base;
>  #ifndef MLX5_PMD_SOFT_COUNTERS
>  	/* FIXME: retrieve and add hardware counters. */  #endif @@ -461,6
> +470,7 @@ mlx5_xstats_reset(struct rte_eth_dev *dev)
>  	}
>  	for (i = 0; i != n; ++i)
>  		xstats_ctrl->base[i] = counters[i];
> +	mlx5_read_ib_stat(priv, 17, &xstats_ctrl->imissed_base);
>  }
> 
>  /**
> --
> 2.7.4
  
Tom Barbette Nov. 14, 2018, 4:18 p.m. UTC | #2
Hi Shahaf,

Yes we learned this distinction with rx_discards_phy the hard way. I would expect imissed to be only rx_out_of_buffer actually.  I'd say people look at imissed to see if they consume packets fast enough. If the problem is pure CPU power. And that is more the definition of imissed, it is "ie RX queues are full", not "eg".
The *_phy counters are somehow unusual, 82599, xl710 and others would simply never receive other packets. But I'll follow your lead.

If documentation of the limitations is okay for you, I can propose a v2, also adding rx_discards_phy no matter what in xstats as it is needed for the full picture. Then the documentation can mention that.

Tom
  
Shahaf Shuler Nov. 15, 2018, 7:39 a.m. UTC | #3
Wednesday, November 14, 2018 6:18 PM, Tom Barbette:
> Subject: RE: [PATCH] mlx5: Report imissed stat
> 
> Hi Shahaf,
> 
> Yes we learned this distinction with rx_discards_phy the hard way. I would
> expect imissed to be only rx_out_of_buffer actually.  I'd say people look at
> imissed to see if they consume packets fast enough. If the problem is pure
> CPU power. And that is more the definition of imissed, it is "ie RX queues are
> full", not "eg".
> The *_phy counters are somehow unusual, 82599, xl710 and others would
> simply never receive other packets. But I'll follow your lead.
> 
> If documentation of the limitations is okay for you, I can propose a v2, also
> adding rx_discards_phy no matter what in xstats as it is needed for the full
> picture. Then the documentation can mention that.

OK, let's go this way, since we want to have a consist behavior between the PMD:
1. the imissed will be only the out_of_buffer
2. the phy drop counters will be reported on xstat only
3. not need to doc the phy limitation since we allready have one.

For the v2 please see more comments below 

> 
> Tom
> 
> 
> 
> ________________________________________
> De : Shahaf Shuler <shahafs@mellanox.com> Envoyé : mercredi 14
> novembre 2018 16:17 À : Tom Barbette; dev@dpdk.org Cc : Yongseok Koh
> Objet : RE: [PATCH] mlx5: Report imissed stat
> 
> Hi Again Tom,
> 
> Tuesday, November 13, 2018 12:17 PM, Tom Barbette:
> > Subject: [PATCH] mlx5: Report imissed stat
> >
> > The imissed counters (number of packets dropped because the queues
> > were
> > full) were actually reported through xstats as "rx_out_of_buffer"
> > but was not reported through stats.
> >
> > Following a recent discussion on the ML, as there is no way to tell
> > the user if a counter is implemented or not, this should be considered
> > a bug. Eg, user looking at imissed will think the packets are lost before
> reaching the device.
> >
> > As for xstats, I added a base counter to be able to "reset" imissed.
> >
> 
> Yes, a bit of extra work is needed here.
> the full definition of the missed is
> "
> uint64_t imissed;
> /**< Total of RX packets dropped by the HW,
>  * because there are no available buffer (i.e. RX queues are full).
>  */
> "
> 
> It is not well defined (this is other topic), because it assumes the only reason
> to drop packets is because there is no avail buffer on the Rx queue.
> It is not entirely correct, for example if you have large latency on your system
> it is possible that packets would be dropped on the physical port, not
> because the application is slow and not replenish the buffers fast enough,
> rather because the NIC is not processing them on the needed rate.
> 
> I guess what application seek on imissed is the sum of two. It can be done by
> summing up two counters: the out_of_buffer and the rx_discard_phy (which
> exists on ethtool -S <netdev> and need to be added to mlx5_counters_init
> array).
> 
> Still, the output will be incorrect on the following cases:
> 1. from VF, since VF cannot read the phy port counters 2. in the presence of
> representors, as they all share the same phy port counter.
> 
> I guess if we want to get such patch in, we need first to make the calculation
> correct, and document the limitations.
> 
> 
> > Signed-off-by: Tom Barbette <barbette@kth.se>
> > ---
> >  drivers/net/mlx5/mlx5.h       |  2 ++
> >  drivers/net/mlx5/mlx5_stats.c | 36
> > +++++++++++++++++++++++-------------
> >  2 files changed, 25 insertions(+), 13 deletions(-)
> >
> > diff --git a/drivers/net/mlx5/mlx5.h b/drivers/net/mlx5/mlx5.h index
> > a3a34cf..61054a8 100644
> > --- a/drivers/net/mlx5/mlx5.h
> > +++ b/drivers/net/mlx5/mlx5.h
> > @@ -77,6 +77,8 @@ struct mlx5_xstats_ctrl {
> >       /* Index in the device counters table. */
> >       uint16_t dev_table_idx[MLX5_MAX_XSTATS];
> >       uint64_t base[MLX5_MAX_XSTATS];
> > +     /* Base for imissed counter. */
> > +     uint64_t imissed_base;

Since imissed is part of stats and not xstats it will probably be cleaner to have it on a different (new) struct called mlx5_stats_ctrl. 

> >  };
> >
> >  /* Flow list . */
> > diff --git a/drivers/net/mlx5/mlx5_stats.c
> > b/drivers/net/mlx5/mlx5_stats.c index 91f3d47..1e75e85 100644
> > --- a/drivers/net/mlx5/mlx5_stats.c
> > +++ b/drivers/net/mlx5/mlx5_stats.c
> > @@ -119,6 +119,24 @@ static const struct mlx5_counter_ctrl
> > mlx5_counters_init[] = {
> >
> >  static const unsigned int xstats_n = RTE_DIM(mlx5_counters_init);
> >
> > +static inline void
> > +mlx5_read_ib_stat(struct priv *priv, unsigned int idx, uint64_t

Why not provide the counter name instead of the idx? 

> > +*stat) {
> > +     FILE *file;
> > +     MKSTR(path, "%s/ports/1/hw_counters/%s",
> > +               priv->ibdev_path,
> > +               mlx5_counters_init[idx].ctr_name);
> > +
> > +     file = fopen(path, "rb");
> > +     if (file) {
> > +             int n = fscanf(file, "%" SCNu64, stat);
> > +
> > +             fclose(file);
> > +             if (n != 1)
> > +                     stat = 0;
> > +     }
> > +}
> > +
> >  /**
> >   * Read device counters table.
> >   *
> > @@ -155,19 +173,7 @@ mlx5_read_dev_counters(struct rte_eth_dev
> *dev,
> > uint64_t *stats)
> >       }
> >       for (i = 0; i != xstats_n; ++i) {
> >               if (mlx5_counters_init[i].ib) {
> > -                     FILE *file;
> > -                     MKSTR(path, "%s/ports/1/hw_counters/%s",
> > -                           priv->ibdev_path,
> > -                           mlx5_counters_init[i].ctr_name);
> > -
> > -                     file = fopen(path, "rb");
> > -                     if (file) {
> > -                             int n = fscanf(file, "%" SCNu64, &stats[i]);
> > -
> > -                             fclose(file);
> > -                             if (n != 1)
> > -                                     stats[i] = 0;
> > -                     }
> > +                     mlx5_read_ib_stat(priv, i, &stats[i]);
> >               } else {
> >                       stats[i] = (uint64_t)
> >                               et_stats->data[xstats_ctrl-
> > >dev_table_idx[i]];
> > @@ -281,6 +287,7 @@ mlx5_xstats_init(struct rte_eth_dev *dev)
> >       if (ret)
> >               DRV_LOG(ERR, "port %u cannot read device counters: %s",
> >                       dev->data->port_id, strerror(rte_errno));
> > +     mlx5_read_ib_stat(priv, 17, &xstats_ctrl->imissed_base);

Since this function now is doing init for both stats and xstats better to call it mlx5_stats_init. 

> >  free:
> >       rte_free(strings);
> >  }
> > @@ -389,6 +396,8 @@ mlx5_stats_get(struct rte_eth_dev *dev, struct
> > rte_eth_stats *stats)  #endif
> >               tmp.oerrors += txq->stats.oerrors;
> >       }
> > +     mlx5_read_ib_stat(priv, 17, &tmp.imissed);
> > +     tmp.imissed -= priv->xstats_ctrl.imissed_base;
> >  #ifndef MLX5_PMD_SOFT_COUNTERS
> >       /* FIXME: retrieve and add hardware counters. */  #endif @@
> > -461,6
> > +470,7 @@ mlx5_xstats_reset(struct rte_eth_dev *dev)
> >       }
> >       for (i = 0; i != n; ++i)
> >               xstats_ctrl->base[i] = counters[i];
> > +     mlx5_read_ib_stat(priv, 17, &xstats_ctrl->imissed_base);

The imissed is part of stats, not xstats, therefore it should be reset on mlx5_stats_reset.

> >  }
> >
> >  /**
> > --
> > 2.7.4
  

Patch

diff --git a/drivers/net/mlx5/mlx5.h b/drivers/net/mlx5/mlx5.h
index a3a34cf..61054a8 100644
--- a/drivers/net/mlx5/mlx5.h
+++ b/drivers/net/mlx5/mlx5.h
@@ -77,6 +77,8 @@  struct mlx5_xstats_ctrl {
 	/* Index in the device counters table. */
 	uint16_t dev_table_idx[MLX5_MAX_XSTATS];
 	uint64_t base[MLX5_MAX_XSTATS];
+	/* Base for imissed counter. */
+	uint64_t imissed_base;
 };
 
 /* Flow list . */
diff --git a/drivers/net/mlx5/mlx5_stats.c b/drivers/net/mlx5/mlx5_stats.c
index 91f3d47..1e75e85 100644
--- a/drivers/net/mlx5/mlx5_stats.c
+++ b/drivers/net/mlx5/mlx5_stats.c
@@ -119,6 +119,24 @@  static const struct mlx5_counter_ctrl mlx5_counters_init[] = {
 
 static const unsigned int xstats_n = RTE_DIM(mlx5_counters_init);
 
+static inline void
+mlx5_read_ib_stat(struct priv *priv, unsigned int idx, uint64_t *stat)
+{
+	FILE *file;
+	MKSTR(path, "%s/ports/1/hw_counters/%s",
+		  priv->ibdev_path,
+		  mlx5_counters_init[idx].ctr_name);
+
+	file = fopen(path, "rb");
+	if (file) {
+		int n = fscanf(file, "%" SCNu64, stat);
+
+		fclose(file);
+		if (n != 1)
+			stat = 0;
+	}
+}
+
 /**
  * Read device counters table.
  *
@@ -155,19 +173,7 @@  mlx5_read_dev_counters(struct rte_eth_dev *dev, uint64_t *stats)
 	}
 	for (i = 0; i != xstats_n; ++i) {
 		if (mlx5_counters_init[i].ib) {
-			FILE *file;
-			MKSTR(path, "%s/ports/1/hw_counters/%s",
-			      priv->ibdev_path,
-			      mlx5_counters_init[i].ctr_name);
-
-			file = fopen(path, "rb");
-			if (file) {
-				int n = fscanf(file, "%" SCNu64, &stats[i]);
-
-				fclose(file);
-				if (n != 1)
-					stats[i] = 0;
-			}
+			mlx5_read_ib_stat(priv, i, &stats[i]);
 		} else {
 			stats[i] = (uint64_t)
 				et_stats->data[xstats_ctrl->dev_table_idx[i]];
@@ -281,6 +287,7 @@  mlx5_xstats_init(struct rte_eth_dev *dev)
 	if (ret)
 		DRV_LOG(ERR, "port %u cannot read device counters: %s",
 			dev->data->port_id, strerror(rte_errno));
+	mlx5_read_ib_stat(priv, 17, &xstats_ctrl->imissed_base);
 free:
 	rte_free(strings);
 }
@@ -389,6 +396,8 @@  mlx5_stats_get(struct rte_eth_dev *dev, struct rte_eth_stats *stats)
 #endif
 		tmp.oerrors += txq->stats.oerrors;
 	}
+	mlx5_read_ib_stat(priv, 17, &tmp.imissed);
+	tmp.imissed -= priv->xstats_ctrl.imissed_base;
 #ifndef MLX5_PMD_SOFT_COUNTERS
 	/* FIXME: retrieve and add hardware counters. */
 #endif
@@ -461,6 +470,7 @@  mlx5_xstats_reset(struct rte_eth_dev *dev)
 	}
 	for (i = 0; i != n; ++i)
 		xstats_ctrl->base[i] = counters[i];
+	mlx5_read_ib_stat(priv, 17, &xstats_ctrl->imissed_base);
 }
 
 /**