[dpdk-dev] [PATCH v2] net/failsafe: improve stats accuracy

Gaëtan Rivet gaetan.rivet at 6wind.com
Thu Oct 19 17:08:49 CEST 2017


Hello Matan,

Adding the time delta should have been done in a separate commit.
Can you please divide this patch in two?

The first one will only attempt the ultimate stat read, the second one
would add the delay warning.

Small nit below for your v3.

On Thu, Oct 19, 2017 at 02:31:54PM +0000, Matan Azrad wrote:
> The stats_get API was changed to signal a potential failure to read
> stats. Furthermore, some PMDs are able to provide statistics even after
> a removal event occurred.
> 
> Considering this, the fail-safe can try to access the latest statistics
> of a PMD to improve statistics accuracy.
> 
> Attempt an ultimate statistics read on removal time; if that fails, use
> the latest recorded snapshot.
> 
> Signed-off-by: Matan Azrad <matan at mellanox.com>
> ---
>  drivers/net/failsafe/failsafe_ether.c   | 19 +++++++++++++++++--
>  drivers/net/failsafe/failsafe_ops.c     | 10 ++++++++--
>  drivers/net/failsafe/failsafe_private.h |  7 ++++++-
>  3 files changed, 31 insertions(+), 5 deletions(-)
> 
> diff --git a/drivers/net/failsafe/failsafe_ether.c b/drivers/net/failsafe/failsafe_ether.c
> index f4db423..df38360 100644
> --- a/drivers/net/failsafe/failsafe_ether.c
> +++ b/drivers/net/failsafe/failsafe_ether.c
> @@ -35,6 +35,7 @@
>  
>  #include <rte_flow.h>
>  #include <rte_flow_driver.h>
> +#include <rte_cycles.h>
>  
>  #include "failsafe_private.h"
>  
> @@ -312,9 +313,23 @@
>  static void
>  fs_dev_stats_save(struct sub_device *sdev)
>  {
> +	struct rte_eth_stats stats;
> +	int err;
> +
> +	/* Attempt to read current stats. */
> +	err = rte_eth_stats_get(PORT_ID(sdev), &stats);
> +	if (err) {
> +		uint64_t cycles = sdev->stats_snapshot.cycles;
> +
> +		WARN("Could not access latest statistics from sub-device %d.\n",
> +			 SUB_ID(sdev));
> +		if (cycles != 0)
> +			WARN("Using latest snapshot taken before %lu seconds.\n",
> +				 (rte_rdtsc() - cycles) / rte_get_tsc_hz());
> +	}
>  	failsafe_stats_increment(&PRIV(sdev->fs_dev)->stats_accumulator,
> -			&sdev->stats_snapshot);
> -	memset(&sdev->stats_snapshot, 0, sizeof(struct rte_eth_stats));
> +			err ? &sdev->stats_snapshot.stats : &stats);
> +	memset(&sdev->stats_snapshot, 0, sizeof(sdev->stats_snapshot));
>  }
>  
>  static inline int
> diff --git a/drivers/net/failsafe/failsafe_ops.c b/drivers/net/failsafe/failsafe_ops.c
> index d360965..818f12d 100644
> --- a/drivers/net/failsafe/failsafe_ops.c
> +++ b/drivers/net/failsafe/failsafe_ops.c
> @@ -38,6 +38,7 @@
>  #include <rte_ethdev.h>
>  #include <rte_malloc.h>
>  #include <rte_flow.h>
> +#include <rte_cycles.h>
>  
>  #include "failsafe_private.h"
>  
> @@ -592,13 +593,18 @@
>  
>  	rte_memcpy(stats, &PRIV(dev)->stats_accumulator, sizeof(*stats));
>  	FOREACH_SUBDEV_STATE(sdev, i, dev, DEV_ACTIVE) {
> -		ret = rte_eth_stats_get(PORT_ID(sdev), &sdev->stats_snapshot);
> +		struct rte_eth_stats *snapshot = &sdev->stats_snapshot.stats;
> +		uint64_t *cycles = &sdev->stats_snapshot.cycles;
> +
> +		ret = rte_eth_stats_get(PORT_ID(sdev), snapshot);
>  		if (ret) {
>  			ERROR("Operation rte_eth_stats_get failed for sub_device %d with error %d",
>  				  i, ret);
> +			*cycles = 0;
>  			return ret;
>  		}
> -		failsafe_stats_increment(stats, &sdev->stats_snapshot);
> +		*cycles = rte_rdtsc();
> +		failsafe_stats_increment(stats, snapshot);
>  	}
>  	return 0;
>  }
> diff --git a/drivers/net/failsafe/failsafe_private.h b/drivers/net/failsafe/failsafe_private.h
> index d343ebf..1df52f4 100644
> --- a/drivers/net/failsafe/failsafe_private.h
> +++ b/drivers/net/failsafe/failsafe_private.h
> @@ -93,6 +93,11 @@ enum dev_state {
>  	DEV_STARTED,
>  };
>  
> +struct fs_stats {
> +	struct rte_eth_stats stats;
> +	uint64_t cycles;

What do you think of the name "timestamp" for this field?
It would be more descriptive of its use.

> +};
> +
>  struct sub_device {
>  	/* Exhaustive DPDK device description */
>  	struct rte_devargs devargs;
> @@ -103,7 +108,7 @@ struct sub_device {
>  	/* Device state machine */
>  	enum dev_state state;
>  	/* Last stats snapshot passed to user */
> -	struct rte_eth_stats stats_snapshot;
> +	struct fs_stats stats_snapshot;
>  	/* Some device are defined as a command line */
>  	char *cmdline;
>  	/* fail-safe device backreference */
> -- 
> 1.8.3.1
> 

Thanks,

-- 
Gaëtan Rivet
6WIND


More information about the dev mailing list