[dpdk-dev] [PATCH] net/failsafe: stat support enhancement

Gaëtan Rivet gaetan.rivet at 6wind.com
Tue Sep 5 15:22:44 CEST 2017


Hi Matan,

On Tue, Sep 05, 2017 at 12:56:34PM +0300, Matan Azrad wrote:
> The previous stats code returned only the current TX sub
> device stats.
> 
> This enhancement extends it to return the sum of all sub
> devices stats with history of removed sub-devices.
> 
> Dedicated stats accumulator saves the stat history of all
> sub device remove events.
> 
> Each failsafe sub device contains the last stats asked by
> the user and updates the accumulator in removal time.
> 
> I would like to implement ultimate snapshot on removal time.
> The stats_get API needs to be changed to return error in the
> case it is too late to retrieve statistics.

You need to have this API evolution first. It is not assured to be
accepted by the community.

As it is, this version is incorrect, because the only available stats
for a removed slave is the last snapshot.

Thus it complicates the rules while still being incorrect. Even if you
were able to push for this API evolution, PMDs with hard counters would
be the ones to return an error code on stat read while removed.
You may be lucky at the moment because MLX drivers do not support hard
counters, but this won't always be true (and it will be false soon
enough). You will thus be back at square one, with a new useless API and
still incorrect stats in the fail-safe. On the other hand, the fail-safe
should strive to be as easy to use as possible with most drivers, and
not cater specifically to soft-counters ones.

So, my take on this: I understand that aggregated stats would be
interesting. Keep it simple & stupid: simple aggregation on stats_get.
You will have a rollback at some point on device removal, but this is
still easier to detect / deal with than partially / sometimes incorrect
stats.

> By this way, failsafe can get stats snapshot in removal interrupt
> callback for each PMD which can give stats after removal event.
> 
> Signed-off-by: Matan Azrad <matan at mellanox.com>
> ---
>  drivers/net/failsafe/failsafe_ether.c   | 33 +++++++++++++++++++++++++++++++++
>  drivers/net/failsafe/failsafe_ops.c     | 16 ++++++++++++----
>  drivers/net/failsafe/failsafe_private.h |  5 +++++
>  3 files changed, 50 insertions(+), 4 deletions(-)
> 
> diff --git a/drivers/net/failsafe/failsafe_ether.c b/drivers/net/failsafe/failsafe_ether.c
> index a3a8cce..133080d 100644
> --- a/drivers/net/failsafe/failsafe_ether.c
> +++ b/drivers/net/failsafe/failsafe_ether.c
> @@ -399,6 +399,37 @@ failsafe_eth_dev_state_sync(struct rte_eth_dev *dev)
>  	return ret;
>  }
>  
> +void
> +fs_increment_stats(struct rte_eth_stats *from, struct rte_eth_stats *to)
> +{
> +	uint8_t i;
> +
> +	RTE_ASSERT(from != NULL && to != NULL);
> +	to->ipackets += from->ipackets;
> +	to->opackets += from->opackets;
> +	to->ibytes += from->ibytes;
> +	to->obytes += from->obytes;
> +	to->imissed += from->imissed;
> +	to->ierrors += from->ierrors;
> +	to->oerrors += from->oerrors;
> +	to->rx_nombuf += from->rx_nombuf;
> +	for (i = 0; i < RTE_ETHDEV_QUEUE_STAT_CNTRS; i++) {
> +		to->q_ipackets[i] += from->q_ipackets[i];
> +		to->q_opackets[i] += from->q_opackets[i];
> +		to->q_ibytes[i] += from->q_ibytes[i];
> +		to->q_obytes[i] += from->q_obytes[i];
> +		to->q_errors[i] += from->q_errors[i];
> +	}
> +}
> +
> +void
> +fs_increment_stats_accumulator(struct sub_device *sdev)
> +{
> +	fs_increment_stats(&sdev->stats_snapshot,
> +			&PRIV(sdev->fs_dev)->stats_accumulator);
> +	memset(&sdev->stats_snapshot, 0, sizeof(struct rte_eth_stats));
> +}
> +
>  int
>  failsafe_eth_rmv_event_callback(uint8_t port_id __rte_unused,
>  				enum rte_eth_event_type event __rte_unused,
> @@ -410,6 +441,8 @@ failsafe_eth_rmv_event_callback(uint8_t port_id __rte_unused,
>  	fs_switch_dev(sdev->fs_dev, sdev);
>  	/* Use safe bursts in any case. */
>  	set_burst_fn(sdev->fs_dev, 1);
> +	/* Increment the stats accumulator by the last stats snapshot. */
> +	fs_increment_stats_accumulator(sdev);
>  	/*
>  	 * Async removal, the sub-PMD will try to unregister
>  	 * the callback at the source of the current thread context.
> diff --git a/drivers/net/failsafe/failsafe_ops.c b/drivers/net/failsafe/failsafe_ops.c
> index ff9ad15..e47cc85 100644
> --- a/drivers/net/failsafe/failsafe_ops.c
> +++ b/drivers/net/failsafe/failsafe_ops.c
> @@ -586,9 +586,14 @@ static void
>  fs_stats_get(struct rte_eth_dev *dev,
>  	     struct rte_eth_stats *stats)
>  {
> -	if (TX_SUBDEV(dev) == NULL)
> -		return;
> -	rte_eth_stats_get(PORT_ID(TX_SUBDEV(dev)), stats);
> +	struct sub_device *sdev;
> +	uint8_t i;
> +
> +	memcpy(stats, &PRIV(dev)->stats_accumulator, sizeof(*stats));
> +	FOREACH_SUBDEV_STATE(sdev, i, dev, DEV_ACTIVE) {
> +		rte_eth_stats_get(PORT_ID(sdev), &sdev->stats_snapshot);
> +		fs_increment_stats(&sdev->stats_snapshot, stats);
> +	}
>  }
>  
>  static void
> @@ -597,8 +602,11 @@ fs_stats_reset(struct rte_eth_dev *dev)
>  	struct sub_device *sdev;
>  	uint8_t i;
>  
> -	FOREACH_SUBDEV_STATE(sdev, i, dev, DEV_ACTIVE)
> +	FOREACH_SUBDEV_STATE(sdev, i, dev, DEV_ACTIVE) {
>  		rte_eth_stats_reset(PORT_ID(sdev));
> +		memset(&sdev->stats_snapshot, 0, sizeof(struct rte_eth_stats));
> +	}
> +	memset(&PRIV(dev)->stats_accumulator, 0, sizeof(struct rte_eth_stats));
>  }
>  
>  /**
> diff --git a/drivers/net/failsafe/failsafe_private.h b/drivers/net/failsafe/failsafe_private.h
> index 0361cf4..267c749 100644
> --- a/drivers/net/failsafe/failsafe_private.h
> +++ b/drivers/net/failsafe/failsafe_private.h
> @@ -102,6 +102,8 @@ struct sub_device {
>  	uint8_t sid;
>  	/* Device state machine */
>  	enum dev_state state;
> +	/* Last stats snapshot passed to user */
> +	struct rte_eth_stats stats_snapshot;
>  	/* Some device are defined as a command line */
>  	char *cmdline;
>  	/* fail-safe device backreference */
> @@ -140,6 +142,7 @@ struct fs_priv {
>  	 * synchronized state.
>  	 */
>  	enum dev_state state;
> +	struct rte_eth_stats stats_accumulator;
>  	unsigned int pending_alarm:1; /* An alarm is pending */
>  	/* flow isolation state */
>  	int flow_isolated:1;
> @@ -180,6 +183,8 @@ int failsafe_eal_uninit(struct rte_eth_dev *dev);
>  
>  int failsafe_eth_dev_state_sync(struct rte_eth_dev *dev);
>  void failsafe_dev_remove(struct rte_eth_dev *dev);
> +void fs_increment_stats(struct rte_eth_stats *from, struct rte_eth_stats *to);
> +void fs_increment_stats_accumulator(struct sub_device *sdev);
>  int failsafe_eth_rmv_event_callback(uint8_t port_id,
>  				    enum rte_eth_event_type type,
>  				    void *arg, void *out);
> -- 
> 2.7.4
> 

-- 
Gaëtan Rivet
6WIND


More information about the dev mailing list