[dpdk-dev,v3] net/mlx5: support device removal event

Message ID 1504533353-38337-1-git-send-email-matan@mellanox.com (mailing list archive)
State Superseded, archived
Headers

Checks

Context Check Description
ci/checkpatch success coding style OK
ci/Intel-compilation success Compilation OK

Commit Message

Matan Azrad Sept. 4, 2017, 1:55 p.m. UTC
  Extend the LSC event handling to support the device removal as well.
The Verbs library may send several related events, which are
different from LSC event.

The mlx5 event handling has been made capable of receiving and
signaling several event types at once.

This support includes next:
1. Removal event detection according to the user configuration.
2. Calling to all registered mlx5 removal callbacks.
3. Capabilities extension to include removal interrupt handling.

Signed-off-by: Matan Azrad <matan@mellanox.com>
---
 drivers/net/mlx5/mlx5.c        |   2 +-
 drivers/net/mlx5/mlx5_ethdev.c | 103 +++++++++++++++++++++++++++++------------
 2 files changed, 74 insertions(+), 31 deletions(-)

Changes:
V2:
Replace link status update function name.
add inconsistent link workaround comment.

V3:
Fix indentations.
Accurate inconsistent link comment.
  

Comments

Adrien Mazarguil Sept. 4, 2017, 3:33 p.m. UTC | #1
Hi Matan,

One comment I have is, while this patch adds support for RMV, it also
silently addresses a bug (see large comment you added to
priv_link_status_update()).

This should be split in two commits, with the fix part coming first and CC
stable@dpdk.org, and a second commit adding RMV support proper.

More below.

On Mon, Sep 04, 2017 at 04:55:53PM +0300, Matan Azrad wrote:
> Extend the LSC event handling to support the device removal as well.
> The Verbs library may send several related events, which are
> different from LSC event.
> 
> The mlx5 event handling has been made capable of receiving and
> signaling several event types at once.
> 
> This support includes next:
> 1. Removal event detection according to the user configuration.
> 2. Calling to all registered mlx5 removal callbacks.
> 3. Capabilities extension to include removal interrupt handling.
> 
> Signed-off-by: Matan Azrad <matan@mellanox.com>
> ---
>  drivers/net/mlx5/mlx5.c        |   2 +-
>  drivers/net/mlx5/mlx5_ethdev.c | 103 +++++++++++++++++++++++++++++------------
>  2 files changed, 74 insertions(+), 31 deletions(-)
> 
> Changes:
> V2:
> Replace link status update function name.
> add inconsistent link workaround comment.
> 
> V3:
> Fix indentations.
> Accurate inconsistent link comment.
> 
> 
> diff --git a/drivers/net/mlx5/mlx5.c b/drivers/net/mlx5/mlx5.c
> index bd66a7c..1a3d7f1 100644
> --- a/drivers/net/mlx5/mlx5.c
> +++ b/drivers/net/mlx5/mlx5.c
> @@ -865,7 +865,7 @@ static struct rte_pci_driver mlx5_driver = {
>  	},
>  	.id_table = mlx5_pci_id_map,
>  	.probe = mlx5_pci_probe,
> -	.drv_flags = RTE_PCI_DRV_INTR_LSC,
> +	.drv_flags = RTE_PCI_DRV_INTR_LSC | RTE_PCI_DRV_INTR_RMV,
>  };
>  
>  /**
> diff --git a/drivers/net/mlx5/mlx5_ethdev.c b/drivers/net/mlx5/mlx5_ethdev.c
> index 57f6237..cdbd723 100644
> --- a/drivers/net/mlx5/mlx5_ethdev.c
> +++ b/drivers/net/mlx5/mlx5_ethdev.c
> @@ -1112,47 +1112,84 @@ mlx5_ibv_device_to_pci_addr(const struct ibv_device *device,
>  }
>  
>  /**
> - * Link status handler.
> + * Update the link status.
>   *
>   * @param priv
>   *   Pointer to private structure.
> - * @param dev
> - *   Pointer to the rte_eth_dev structure.
>   *
>   * @return
> - *   Nonzero if the callback process can be called immediately.
> + *   Zero if the callback process can be called immediately.
>   */
>  static int
> -priv_dev_link_status_handler(struct priv *priv, struct rte_eth_dev *dev)
> +priv_link_status_update(struct priv *priv)
> +{
> +	struct rte_eth_link *link = &priv->dev->data->dev_link;
> +
> +	mlx5_link_update(priv->dev, 0);
> +	if (((link->link_speed == 0) && link->link_status) ||
> +		((link->link_speed != 0) && !link->link_status)) {
> +		/*
> +		 * Inconsistent status.
> +		 * The link status is read from Ethtool through an IOCTL,
> +		 * but as the application may work in polling mode it
> +		 * may get the port event before the Kernel driver had
> +		 * time to process it. PMD then request the link from
> +		 * the kernel but the event is still not processed (due
> +		 * to more urgent interrupts) and finally the PMD may
> +		 * get an inconsistent link.
> +		 * Setting alarm for later checking.
> +		 */

While adding a comment is nice, there's too much info in there. From the PMD
standpoint, what happens is the interrupt occurs much before the kernel
netdevice exposes the new status, so it needs to be checked later. Can you
sum it up in fewer words?

> +		if (!priv->pending_alarm) {
> +			priv->pending_alarm = 1;
> +			rte_eal_alarm_set(MLX5_ALARM_TIMEOUT_US,
> +					  mlx5_dev_link_status_handler,
> +					  priv->dev);
> +		}
> +		return 1;
> +	} else if (unlikely(priv->pending_alarm)) {
> +		/* In case of link interrupt while link alarm was setting. */
> +		priv->pending_alarm = 0;
> +		rte_eal_alarm_cancel(mlx5_dev_link_status_handler, priv->dev);
> +	}
> +	return 0;
> +}
> +
> +/**
> + * Device status handler.
> + *
> + * @param priv
> + *   Pointer to private structure.
> + * @param events
> + *   Pointer to event flags holder.
> + *
> + * @return
> + *   Events bitmap of callback process which can be called immediately.
> + */
> +static uint32_t
> +priv_dev_status_handler(struct priv *priv)
>  {
>  	struct ibv_async_event event;
> -	struct rte_eth_link *link = &dev->data->dev_link;
> -	int ret = 0;
> +	uint32_t ret = 0;
>  
>  	/* Read all message and acknowledge them. */
>  	for (;;) {
>  		if (ibv_get_async_event(priv->ctx, &event))
>  			break;
> -
> -		if (event.event_type != IBV_EVENT_PORT_ACTIVE &&
> -		    event.event_type != IBV_EVENT_PORT_ERR)
> +		if ((event.event_type == IBV_EVENT_PORT_ACTIVE ||
> +			event.event_type == IBV_EVENT_PORT_ERR) &&
> +			(priv->dev->data->dev_conf.intr_conf.lsc == 1))
> +			ret |= (1 << RTE_ETH_EVENT_INTR_LSC);
> +		else if (event.event_type == IBV_EVENT_DEVICE_FATAL &&
> +			priv->dev->data->dev_conf.intr_conf.rmv == 1)
> +			ret |= (1 << RTE_ETH_EVENT_INTR_RMV);
> +		else
>  			DEBUG("event type %d on port %d not handled",
>  			      event.event_type, event.element.port_num);

What you also need to mention in the commit log of the fix is that splitting
priv_dev_status_handler() and priv_link_status_update() addresses another
bug here: this loop consumed *all* events, even during alarms. An alarm
occurring for a LSC event could eat a RMV event that the application would
never receive. This also affects mlx4, for which I intend to submit a fix
soon.

>  		ibv_ack_async_event(&event);
>  	}
> -	mlx5_link_update(dev, 0);
> -	if (((link->link_speed == 0) && link->link_status) ||
> -	    ((link->link_speed != 0) && !link->link_status)) {
> -		if (!priv->pending_alarm) {
> -			/* Inconsistent status, check again later. */
> -			priv->pending_alarm = 1;
> -			rte_eal_alarm_set(MLX5_ALARM_TIMEOUT_US,
> -					  mlx5_dev_link_status_handler,
> -					  dev);
> -		}
> -	} else {
> -		ret = 1;
> -	}
> +	if (ret & (1 << RTE_ETH_EVENT_INTR_LSC))
> +		if (priv_link_status_update(priv))
> +			ret &= ~(1 << RTE_ETH_EVENT_INTR_LSC);
>  	return ret;
>  }
>  
> @@ -1172,9 +1209,9 @@ mlx5_dev_link_status_handler(void *arg)
>  	priv_lock(priv);
>  	assert(priv->pending_alarm == 1);
>  	priv->pending_alarm = 0;
> -	ret = priv_dev_link_status_handler(priv, dev);
> +	ret = priv_link_status_update(priv);
>  	priv_unlock(priv);
> -	if (ret)
> +	if (!ret)
>  		_rte_eth_dev_callback_process(dev, RTE_ETH_EVENT_INTR_LSC, NULL,
>  					      NULL);
>  }
> @@ -1192,14 +1229,17 @@ mlx5_dev_interrupt_handler(void *cb_arg)
>  {
>  	struct rte_eth_dev *dev = cb_arg;
>  	struct priv *priv = dev->data->dev_private;
> -	int ret;
> +	uint32_t events;
>  
>  	priv_lock(priv);
> -	ret = priv_dev_link_status_handler(priv, dev);
> +	events = priv_dev_status_handler(priv);
>  	priv_unlock(priv);
> -	if (ret)
> +	if (events & (1 << RTE_ETH_EVENT_INTR_LSC))
>  		_rte_eth_dev_callback_process(dev, RTE_ETH_EVENT_INTR_LSC, NULL,
>  					      NULL);
> +	if (events & (1 << RTE_ETH_EVENT_INTR_RMV))
> +		_rte_eth_dev_callback_process(dev, RTE_ETH_EVENT_INTR_RMV, NULL,
> +					      NULL);
>  }
>  
>  /**
> @@ -1213,7 +1253,8 @@ mlx5_dev_interrupt_handler(void *cb_arg)
>  void
>  priv_dev_interrupt_handler_uninstall(struct priv *priv, struct rte_eth_dev *dev)
>  {
> -	if (!dev->data->dev_conf.intr_conf.lsc)
> +	if (!dev->data->dev_conf.intr_conf.lsc &&
> +		!dev->data->dev_conf.intr_conf.rmv)
>  		return;
>  	rte_intr_callback_unregister(&priv->intr_handle,
>  				     mlx5_dev_interrupt_handler,
> @@ -1238,7 +1279,8 @@ priv_dev_interrupt_handler_install(struct priv *priv, struct rte_eth_dev *dev)
>  {
>  	int rc, flags;
>  
> -	if (!dev->data->dev_conf.intr_conf.lsc)
> +	if (!dev->data->dev_conf.intr_conf.lsc &&
> +		!dev->data->dev_conf.intr_conf.rmv)
>  		return;
>  	assert(priv->ctx->async_fd > 0);
>  	flags = fcntl(priv->ctx->async_fd, F_GETFL);
> @@ -1246,6 +1288,7 @@ priv_dev_interrupt_handler_install(struct priv *priv, struct rte_eth_dev *dev)
>  	if (rc < 0) {
>  		INFO("failed to change file descriptor async event queue");
>  		dev->data->dev_conf.intr_conf.lsc = 0;
> +		dev->data->dev_conf.intr_conf.rmv = 0;
>  	} else {
>  		priv->intr_handle.fd = priv->ctx->async_fd;
>  		priv->intr_handle.type = RTE_INTR_HANDLE_EXT;
> -- 
> 2.7.4
>
  
Matan Azrad Sept. 4, 2017, 5:52 p.m. UTC | #2
Hi Adrien,

> -----Original Message-----
> From: Adrien Mazarguil [mailto:adrien.mazarguil@6wind.com]
> Sent: Monday, September 4, 2017 6:33 PM
> To: Matan Azrad <matan@mellanox.com>
> Cc: Nélio Laranjeiro <nelio.laranjeiro@6wind.com>; dev@dpdk.org
> Subject: Re: [dpdk-dev] [PATCH v3] net/mlx5: support device removal event
> 
> Hi Matan,
> 
> One comment I have is, while this patch adds support for RMV, it also silently
> addresses a bug (see large comment you added to
> priv_link_status_update()).
> 
> This should be split in two commits, with the fix part coming first and CC
> stable@dpdk.org, and a second commit adding RMV support proper.
> 

Actually, the mlx4 bug was not appeared in the mlx5 previous code,
Probably because the RMV interrupt was not implemented in mlx5 before this patch.
The big comment just explains the link inconsistent issue and was added
here since Nelio and I think the new function, priv_link_status_update(),
justifies this comment for future review.  

> More below.
> 
> On Mon, Sep 04, 2017 at 04:55:53PM +0300, Matan Azrad wrote:
> > Extend the LSC event handling to support the device removal as well.
> > The Verbs library may send several related events, which are different
> > from LSC event.
> >
> > The mlx5 event handling has been made capable of receiving and
> > signaling several event types at once.
> >
> > This support includes next:
> > 1. Removal event detection according to the user configuration.
> > 2. Calling to all registered mlx5 removal callbacks.
> > 3. Capabilities extension to include removal interrupt handling.
> >
> > Signed-off-by: Matan Azrad <matan@mellanox.com>
> > ---
> >  drivers/net/mlx5/mlx5.c        |   2 +-
> >  drivers/net/mlx5/mlx5_ethdev.c | 103
> > +++++++++++++++++++++++++++++------------
> >  2 files changed, 74 insertions(+), 31 deletions(-)
> >
> > Changes:
> > V2:
> > Replace link status update function name.
> > add inconsistent link workaround comment.
> >
> > V3:
> > Fix indentations.
> > Accurate inconsistent link comment.
> >
> >
> > diff --git a/drivers/net/mlx5/mlx5.c b/drivers/net/mlx5/mlx5.c index
> > bd66a7c..1a3d7f1 100644
> > --- a/drivers/net/mlx5/mlx5.c
> > +++ b/drivers/net/mlx5/mlx5.c
> > @@ -865,7 +865,7 @@ static struct rte_pci_driver mlx5_driver = {
> >  	},
> >  	.id_table = mlx5_pci_id_map,
> >  	.probe = mlx5_pci_probe,
> > -	.drv_flags = RTE_PCI_DRV_INTR_LSC,
> > +	.drv_flags = RTE_PCI_DRV_INTR_LSC | RTE_PCI_DRV_INTR_RMV,
> >  };
> >
> >  /**
> > diff --git a/drivers/net/mlx5/mlx5_ethdev.c
> > b/drivers/net/mlx5/mlx5_ethdev.c index 57f6237..cdbd723 100644
> > --- a/drivers/net/mlx5/mlx5_ethdev.c
> > +++ b/drivers/net/mlx5/mlx5_ethdev.c
> > @@ -1112,47 +1112,84 @@ mlx5_ibv_device_to_pci_addr(const struct
> > ibv_device *device,  }
> >
> >  /**
> > - * Link status handler.
> > + * Update the link status.
> >   *
> >   * @param priv
> >   *   Pointer to private structure.
> > - * @param dev
> > - *   Pointer to the rte_eth_dev structure.
> >   *
> >   * @return
> > - *   Nonzero if the callback process can be called immediately.
> > + *   Zero if the callback process can be called immediately.
> >   */
> >  static int
> > -priv_dev_link_status_handler(struct priv *priv, struct rte_eth_dev
> > *dev)
> > +priv_link_status_update(struct priv *priv) {
> > +	struct rte_eth_link *link = &priv->dev->data->dev_link;
> > +
> > +	mlx5_link_update(priv->dev, 0);
> > +	if (((link->link_speed == 0) && link->link_status) ||
> > +		((link->link_speed != 0) && !link->link_status)) {
> > +		/*
> > +		 * Inconsistent status.
> > +		 * The link status is read from Ethtool through an IOCTL,
> > +		 * but as the application may work in polling mode it
> > +		 * may get the port event before the Kernel driver had
> > +		 * time to process it. PMD then request the link from
> > +		 * the kernel but the event is still not processed (due
> > +		 * to more urgent interrupts) and finally the PMD may
> > +		 * get an inconsistent link.
> > +		 * Setting alarm for later checking.
> > +		 */
> 
> While adding a comment is nice, there's too much info in there. From the
> PMD standpoint, what happens is the interrupt occurs much before the
> kernel netdevice exposes the new status, so it needs to be checked later.
> Can you sum it up in fewer words?
> 

Yes, sure :)

> > +		if (!priv->pending_alarm) {
> > +			priv->pending_alarm = 1;
> > +			rte_eal_alarm_set(MLX5_ALARM_TIMEOUT_US,
> > +					  mlx5_dev_link_status_handler,
> > +					  priv->dev);
> > +		}
> > +		return 1;
> > +	} else if (unlikely(priv->pending_alarm)) {
> > +		/* In case of link interrupt while link alarm was setting. */
> > +		priv->pending_alarm = 0;
> > +		rte_eal_alarm_cancel(mlx5_dev_link_status_handler, priv-
> >dev);
> > +	}
> > +	return 0;
> > +}
> > +
> > +/**
> > + * Device status handler.
> > + *
> > + * @param priv
> > + *   Pointer to private structure.
> > + * @param events
> > + *   Pointer to event flags holder.
> > + *
> > + * @return
> > + *   Events bitmap of callback process which can be called immediately.
> > + */
> > +static uint32_t
> > +priv_dev_status_handler(struct priv *priv)
> >  {
> >  	struct ibv_async_event event;
> > -	struct rte_eth_link *link = &dev->data->dev_link;
> > -	int ret = 0;
> > +	uint32_t ret = 0;
> >
> >  	/* Read all message and acknowledge them. */
> >  	for (;;) {
> >  		if (ibv_get_async_event(priv->ctx, &event))
> >  			break;
> > -
> > -		if (event.event_type != IBV_EVENT_PORT_ACTIVE &&
> > -		    event.event_type != IBV_EVENT_PORT_ERR)
> > +		if ((event.event_type == IBV_EVENT_PORT_ACTIVE ||
> > +			event.event_type == IBV_EVENT_PORT_ERR) &&
> > +			(priv->dev->data->dev_conf.intr_conf.lsc == 1))
> > +			ret |= (1 << RTE_ETH_EVENT_INTR_LSC);
> > +		else if (event.event_type == IBV_EVENT_DEVICE_FATAL &&
> > +			priv->dev->data->dev_conf.intr_conf.rmv == 1)
> > +			ret |= (1 << RTE_ETH_EVENT_INTR_RMV);
> > +		else
> >  			DEBUG("event type %d on port %d not handled",
> >  			      event.event_type, event.element.port_num);
> 
> What you also need to mention in the commit log of the fix is that splitting
> priv_dev_status_handler() and priv_link_status_update() addresses another
> bug here: this loop consumed *all* events, even during alarms. An alarm
> occurring for a LSC event could eat a RMV event that the application would
> never receive. This also affects mlx4, for which I intend to submit a fix soon.
> 

I think also this issue is only mlx4 bug,
Since in the previous mlx5 code only LCS event was supported,
all these problems was not there. 

> >  		ibv_ack_async_event(&event);
> >  	}
> > -	mlx5_link_update(dev, 0);
> > -	if (((link->link_speed == 0) && link->link_status) ||
> > -	    ((link->link_speed != 0) && !link->link_status)) {
> > -		if (!priv->pending_alarm) {
> > -			/* Inconsistent status, check again later. */
> > -			priv->pending_alarm = 1;
> > -			rte_eal_alarm_set(MLX5_ALARM_TIMEOUT_US,
> > -					  mlx5_dev_link_status_handler,
> > -					  dev);
> > -		}
> > -	} else {
> > -		ret = 1;
> > -	}
> > +	if (ret & (1 << RTE_ETH_EVENT_INTR_LSC))
> > +		if (priv_link_status_update(priv))
> > +			ret &= ~(1 << RTE_ETH_EVENT_INTR_LSC);
> >  	return ret;
> >  }
> >
> > @@ -1172,9 +1209,9 @@ mlx5_dev_link_status_handler(void *arg)
> >  	priv_lock(priv);
> >  	assert(priv->pending_alarm == 1);
> >  	priv->pending_alarm = 0;
> > -	ret = priv_dev_link_status_handler(priv, dev);
> > +	ret = priv_link_status_update(priv);
> >  	priv_unlock(priv);
> > -	if (ret)
> > +	if (!ret)
> >  		_rte_eth_dev_callback_process(dev,
> RTE_ETH_EVENT_INTR_LSC, NULL,
> >  					      NULL);
> >  }
> > @@ -1192,14 +1229,17 @@ mlx5_dev_interrupt_handler(void *cb_arg)  {
> >  	struct rte_eth_dev *dev = cb_arg;
> >  	struct priv *priv = dev->data->dev_private;
> > -	int ret;
> > +	uint32_t events;
> >
> >  	priv_lock(priv);
> > -	ret = priv_dev_link_status_handler(priv, dev);
> > +	events = priv_dev_status_handler(priv);
> >  	priv_unlock(priv);
> > -	if (ret)
> > +	if (events & (1 << RTE_ETH_EVENT_INTR_LSC))
> >  		_rte_eth_dev_callback_process(dev,
> RTE_ETH_EVENT_INTR_LSC, NULL,
> >  					      NULL);
> > +	if (events & (1 << RTE_ETH_EVENT_INTR_RMV))
> > +		_rte_eth_dev_callback_process(dev,
> RTE_ETH_EVENT_INTR_RMV, NULL,
> > +					      NULL);
> >  }
> >
> >  /**
> > @@ -1213,7 +1253,8 @@ mlx5_dev_interrupt_handler(void *cb_arg)  void
> > priv_dev_interrupt_handler_uninstall(struct priv *priv, struct
> > rte_eth_dev *dev)  {
> > -	if (!dev->data->dev_conf.intr_conf.lsc)
> > +	if (!dev->data->dev_conf.intr_conf.lsc &&
> > +		!dev->data->dev_conf.intr_conf.rmv)
> >  		return;
> >  	rte_intr_callback_unregister(&priv->intr_handle,
> >  				     mlx5_dev_interrupt_handler,
> > @@ -1238,7 +1279,8 @@ priv_dev_interrupt_handler_install(struct priv
> > *priv, struct rte_eth_dev *dev)  {
> >  	int rc, flags;
> >
> > -	if (!dev->data->dev_conf.intr_conf.lsc)
> > +	if (!dev->data->dev_conf.intr_conf.lsc &&
> > +		!dev->data->dev_conf.intr_conf.rmv)
> >  		return;
> >  	assert(priv->ctx->async_fd > 0);
> >  	flags = fcntl(priv->ctx->async_fd, F_GETFL); @@ -1246,6 +1288,7 @@
> > priv_dev_interrupt_handler_install(struct priv *priv, struct rte_eth_dev
> *dev)
> >  	if (rc < 0) {
> >  		INFO("failed to change file descriptor async event queue");
> >  		dev->data->dev_conf.intr_conf.lsc = 0;
> > +		dev->data->dev_conf.intr_conf.rmv = 0;
> >  	} else {
> >  		priv->intr_handle.fd = priv->ctx->async_fd;
> >  		priv->intr_handle.type = RTE_INTR_HANDLE_EXT;
> > --
> > 2.7.4
> >
> 
> --
> Adrien Mazarguil
> 6WIND

Thanks,
Matan Azrad
  
Adrien Mazarguil Sept. 5, 2017, 9:28 a.m. UTC | #3
Hi Matan,

On Mon, Sep 04, 2017 at 05:52:55PM +0000, Matan Azrad wrote:
> Hi Adrien,
> 
> > -----Original Message-----
> > From: Adrien Mazarguil [mailto:adrien.mazarguil@6wind.com]
> > Sent: Monday, September 4, 2017 6:33 PM
> > To: Matan Azrad <matan@mellanox.com>
> > Cc: Nélio Laranjeiro <nelio.laranjeiro@6wind.com>; dev@dpdk.org
> > Subject: Re: [dpdk-dev] [PATCH v3] net/mlx5: support device removal event
> > 
> > Hi Matan,
> > 
> > One comment I have is, while this patch adds support for RMV, it also silently
> > addresses a bug (see large comment you added to
> > priv_link_status_update()).
> > 
> > This should be split in two commits, with the fix part coming first and CC
> > stable@dpdk.org, and a second commit adding RMV support proper.
> > 
> 
> Actually, the mlx4 bug was not appeared in the mlx5 previous code,
> Probably because the RMV interrupt was not implemented in mlx5 before this patch.

Good point, no RMV could occur before it is implemented, however a dedicated
commit for the fix itself (i.e. alarm callback not supposed to end up
calling ibv_get_async_event()) might better explain the logic behind these
changes. What I mean is, if there was no problem, you wouldn't need to make
priv_link_status_update() a separate function, right?

> The big comment just explains the link inconsistent issue and was added
> here since Nelio and I think the new function, priv_link_status_update(),
> justifies this comment for future review.  

I understand, this could also have been part of the commit log of the
dedicated commit.

Thanks.
  
Matan Azrad Sept. 5, 2017, 10:38 a.m. UTC | #4
Hi Adrien

> -----Original Message-----

> From: Adrien Mazarguil [mailto:adrien.mazarguil@6wind.com]

> Sent: Tuesday, September 5, 2017 12:28 PM

> To: Matan Azrad <matan@mellanox.com>

> Cc: Nélio Laranjeiro <nelio.laranjeiro@6wind.com>; dev@dpdk.org

> Subject: Re: [dpdk-dev] [PATCH v3] net/mlx5: support device removal event

> 

> Hi Matan,

> 

> On Mon, Sep 04, 2017 at 05:52:55PM +0000, Matan Azrad wrote:

> > Hi Adrien,

> >

> > > -----Original Message-----

> > > From: Adrien Mazarguil [mailto:adrien.mazarguil@6wind.com]

> > > Sent: Monday, September 4, 2017 6:33 PM

> > > To: Matan Azrad <matan@mellanox.com>

> > > Cc: Nélio Laranjeiro <nelio.laranjeiro@6wind.com>; dev@dpdk.org

> > > Subject: Re: [dpdk-dev] [PATCH v3] net/mlx5: support device removal

> > > event

> > >

> > > Hi Matan,

> > >

> > > One comment I have is, while this patch adds support for RMV, it

> > > also silently addresses a bug (see large comment you added to

> > > priv_link_status_update()).

> > >

> > > This should be split in two commits, with the fix part coming first

> > > and CC stable@dpdk.org, and a second commit adding RMV support

> proper.

> > >

> >

> > Actually, the mlx4 bug was not appeared in the mlx5 previous code,

> > Probably because the RMV interrupt was not implemented in mlx5 before

> this patch.

> 

> Good point, no RMV could occur before it is implemented, however a

> dedicated commit for the fix itself (i.e. alarm callback not supposed to end up

> calling ibv_get_async_event()) might better explain the logic behind these

> changes. What I mean is, if there was no problem, you wouldn't need to

> make

> priv_link_status_update() a separate function, right?

> 


The separation was done mainly because of the new interrupt implementation,
else, there was bug here.
The unnecessary  alarm ibv_get_async_event calling was harmless in
the previous code.
I gets your point for the logic explanation behind these changes and I can add it in this
patch commit log to be clearer, something like:
The link update operation was separated from the interrupt callback
to avoid RMV interrupt disregard and unnecessary event acknowledgment
caused by the inconsistent link status alarm callback.

> > The big comment just explains the link inconsistent issue and was

> > added here since Nelio and I think the new function,

> > priv_link_status_update(), justifies this comment for future review.

> 

> I understand, this could also have been part of the commit log of the

> dedicated commit.

> 

Are you sure we need to describe the code comment reason in the commit log?

> Thanks.

> 

> --

> Adrien Mazarguil

> 6WIND
  
Adrien Mazarguil Sept. 5, 2017, 12:01 p.m. UTC | #5
Hi Matan,

On Tue, Sep 05, 2017 at 10:38:21AM +0000, Matan Azrad wrote:
> Hi Adrien
> 
> > -----Original Message-----
> > From: Adrien Mazarguil [mailto:adrien.mazarguil@6wind.com]
> > Sent: Tuesday, September 5, 2017 12:28 PM
> > To: Matan Azrad <matan@mellanox.com>
> > Cc: Nélio Laranjeiro <nelio.laranjeiro@6wind.com>; dev@dpdk.org
> > Subject: Re: [dpdk-dev] [PATCH v3] net/mlx5: support device removal event
> > 
> > Hi Matan,
> > 
> > On Mon, Sep 04, 2017 at 05:52:55PM +0000, Matan Azrad wrote:
> > > Hi Adrien,
> > >
> > > > -----Original Message-----
> > > > From: Adrien Mazarguil [mailto:adrien.mazarguil@6wind.com]
> > > > Sent: Monday, September 4, 2017 6:33 PM
> > > > To: Matan Azrad <matan@mellanox.com>
> > > > Cc: Nélio Laranjeiro <nelio.laranjeiro@6wind.com>; dev@dpdk.org
> > > > Subject: Re: [dpdk-dev] [PATCH v3] net/mlx5: support device removal
> > > > event
> > > >
> > > > Hi Matan,
> > > >
> > > > One comment I have is, while this patch adds support for RMV, it
> > > > also silently addresses a bug (see large comment you added to
> > > > priv_link_status_update()).
> > > >
> > > > This should be split in two commits, with the fix part coming first
> > > > and CC stable@dpdk.org, and a second commit adding RMV support
> > proper.
> > > >
> > >
> > > Actually, the mlx4 bug was not appeared in the mlx5 previous code,
> > > Probably because the RMV interrupt was not implemented in mlx5 before
> > this patch.
> > 
> > Good point, no RMV could occur before it is implemented, however a
> > dedicated commit for the fix itself (i.e. alarm callback not supposed to end up
> > calling ibv_get_async_event()) might better explain the logic behind these
> > changes. What I mean is, if there was no problem, you wouldn't need to
> > make
> > priv_link_status_update() a separate function, right?
> > 
> 
> The separation was done mainly because of the new interrupt implementation,
> else, there was bug here.
> The unnecessary  alarm ibv_get_async_event calling was harmless in
> the previous code.
> I gets your point for the logic explanation behind these changes and I can add it in this
> patch commit log to be clearer, something like:
> The link update operation was separated from the interrupt callback
> to avoid RMV interrupt disregard and unnecessary event acknowledgment
> caused by the inconsistent link status alarm callback.

Yes, it's better to explain why you did this in the commit log, but see
below.

> > > The big comment just explains the link inconsistent issue and was
> > > added here since Nelio and I think the new function,
> > > priv_link_status_update(), justifies this comment for future review.
> > 
> > I understand, this could also have been part of the commit log of the
> > dedicated commit.
> > 
> Are you sure we need to describe the code comment reason in the commit log?

It's a change you did to address a possible bug otherwise so we have to,
however remember that a commit should, as much as possible, do exactly one
thing. If you need to explain that you did this in order to do that, "this"
and "that" can often be identified as two separate commits. Doing so makes
it much easier for reviewers to understand the reasoning behind changes and
leads to quicker reviews (makes instant-acks even possible).

It'd still like a separate commit if you don't mind.
  
Matan Azrad Sept. 5, 2017, 1:36 p.m. UTC | #6
Hi Adrien

> -----Original Message-----

> From: Adrien Mazarguil [mailto:adrien.mazarguil@6wind.com]

> Sent: Tuesday, September 5, 2017 3:02 PM

> To: Matan Azrad <matan@mellanox.com>

> Cc: Nélio Laranjeiro <nelio.laranjeiro@6wind.com>; dev@dpdk.org

> Subject: Re: [dpdk-dev] [PATCH v3] net/mlx5: support device removal event

> 

> Hi Matan,

> 

> On Tue, Sep 05, 2017 at 10:38:21AM +0000, Matan Azrad wrote:

> > Hi Adrien

> >

> > > -----Original Message-----

> > > From: Adrien Mazarguil [mailto:adrien.mazarguil@6wind.com]

> > > Sent: Tuesday, September 5, 2017 12:28 PM

> > > To: Matan Azrad <matan@mellanox.com>

> > > Cc: Nélio Laranjeiro <nelio.laranjeiro@6wind.com>; dev@dpdk.org

> > > Subject: Re: [dpdk-dev] [PATCH v3] net/mlx5: support device removal

> > > event

> > >

> > > Hi Matan,

> > >

> > > On Mon, Sep 04, 2017 at 05:52:55PM +0000, Matan Azrad wrote:

> > > > Hi Adrien,

> > > >

> > > > > -----Original Message-----

> > > > > From: Adrien Mazarguil [mailto:adrien.mazarguil@6wind.com]

> > > > > Sent: Monday, September 4, 2017 6:33 PM

> > > > > To: Matan Azrad <matan@mellanox.com>

> > > > > Cc: Nélio Laranjeiro <nelio.laranjeiro@6wind.com>; dev@dpdk.org

> > > > > Subject: Re: [dpdk-dev] [PATCH v3] net/mlx5: support device

> > > > > removal event

> > > > >

> > > > > Hi Matan,

> > > > >

> > > > > One comment I have is, while this patch adds support for RMV, it

> > > > > also silently addresses a bug (see large comment you added to

> > > > > priv_link_status_update()).

> > > > >

> > > > > This should be split in two commits, with the fix part coming

> > > > > first and CC stable@dpdk.org, and a second commit adding RMV

> > > > > support

> > > proper.

> > > > >

> > > >

> > > > Actually, the mlx4 bug was not appeared in the mlx5 previous code,

> > > > Probably because the RMV interrupt was not implemented in mlx5

> > > > before

> > > this patch.

> > >

> > > Good point, no RMV could occur before it is implemented, however a

> > > dedicated commit for the fix itself (i.e. alarm callback not

> > > supposed to end up calling ibv_get_async_event()) might better

> > > explain the logic behind these changes. What I mean is, if there was

> > > no problem, you wouldn't need to make

> > > priv_link_status_update() a separate function, right?

> > >

> >

> > The separation was done mainly because of the new interrupt

> > implementation, else, there was bug here.

> > The unnecessary  alarm ibv_get_async_event calling was harmless in the

> > previous code.

> > I gets your point for the logic explanation behind these changes and I

> > can add it in this patch commit log to be clearer, something like:

> > The link update operation was separated from the interrupt callback to

> > avoid RMV interrupt disregard and unnecessary event acknowledgment

> > caused by the inconsistent link status alarm callback.

> 

> Yes, it's better to explain why you did this in the commit log, but see below.

> 

> > > > The big comment just explains the link inconsistent issue and was

> > > > added here since Nelio and I think the new function,

> > > > priv_link_status_update(), justifies this comment for future review.

> > >

> > > I understand, this could also have been part of the commit log of

> > > the dedicated commit.

> > >

> > Are you sure we need to describe the code comment reason in the commit

> log?

> 

> It's a change you did to address a possible bug otherwise so we have to,

> however remember that a commit should, as much as possible, do exactly

> one thing. If you need to explain that you did this in order to do that, "this"

> and "that" can often be identified as two separate commits. Doing so makes

> it much easier for reviewers to understand the reasoning behind changes

> and leads to quicker reviews (makes instant-acks even possible).

> 

> It'd still like a separate commit if you don't mind.


Sorry, but I think it is an infinite order.
I have just added RMV interrupt, I did a lot of things in this patch for it.
I think  I don't need to separate each thing done for this support.
I prefer to stay it in one patch if you don't mind. 
 
> 

> --

> Adrien Mazarguil

> 6WIND
  
Adrien Mazarguil Sept. 6, 2017, 7:12 a.m. UTC | #7
Hi Matan,

On Tue, Sep 05, 2017 at 01:36:13PM +0000, Matan Azrad wrote:
> Hi Adrien
> 
> > -----Original Message-----
> > From: Adrien Mazarguil [mailto:adrien.mazarguil@6wind.com]
> > Sent: Tuesday, September 5, 2017 3:02 PM
> > To: Matan Azrad <matan@mellanox.com>
> > Cc: Nélio Laranjeiro <nelio.laranjeiro@6wind.com>; dev@dpdk.org
> > Subject: Re: [dpdk-dev] [PATCH v3] net/mlx5: support device removal event
> > 
> > Hi Matan,
> > 
> > On Tue, Sep 05, 2017 at 10:38:21AM +0000, Matan Azrad wrote:
> > > Hi Adrien
> > >
> > > > -----Original Message-----
> > > > From: Adrien Mazarguil [mailto:adrien.mazarguil@6wind.com]
> > > > Sent: Tuesday, September 5, 2017 12:28 PM
> > > > To: Matan Azrad <matan@mellanox.com>
> > > > Cc: Nélio Laranjeiro <nelio.laranjeiro@6wind.com>; dev@dpdk.org
> > > > Subject: Re: [dpdk-dev] [PATCH v3] net/mlx5: support device removal
> > > > event
> > > >
> > > > Hi Matan,
> > > >
> > > > On Mon, Sep 04, 2017 at 05:52:55PM +0000, Matan Azrad wrote:
> > > > > Hi Adrien,
> > > > >
> > > > > > -----Original Message-----
> > > > > > From: Adrien Mazarguil [mailto:adrien.mazarguil@6wind.com]
> > > > > > Sent: Monday, September 4, 2017 6:33 PM
> > > > > > To: Matan Azrad <matan@mellanox.com>
> > > > > > Cc: Nélio Laranjeiro <nelio.laranjeiro@6wind.com>; dev@dpdk.org
> > > > > > Subject: Re: [dpdk-dev] [PATCH v3] net/mlx5: support device
> > > > > > removal event
> > > > > >
> > > > > > Hi Matan,
> > > > > >
> > > > > > One comment I have is, while this patch adds support for RMV, it
> > > > > > also silently addresses a bug (see large comment you added to
> > > > > > priv_link_status_update()).
> > > > > >
> > > > > > This should be split in two commits, with the fix part coming
> > > > > > first and CC stable@dpdk.org, and a second commit adding RMV
> > > > > > support
> > > > proper.
> > > > > >
> > > > >
> > > > > Actually, the mlx4 bug was not appeared in the mlx5 previous code,
> > > > > Probably because the RMV interrupt was not implemented in mlx5
> > > > > before
> > > > this patch.
> > > >
> > > > Good point, no RMV could occur before it is implemented, however a
> > > > dedicated commit for the fix itself (i.e. alarm callback not
> > > > supposed to end up calling ibv_get_async_event()) might better
> > > > explain the logic behind these changes. What I mean is, if there was
> > > > no problem, you wouldn't need to make
> > > > priv_link_status_update() a separate function, right?
> > > >
> > >
> > > The separation was done mainly because of the new interrupt
> > > implementation, else, there was bug here.
> > > The unnecessary  alarm ibv_get_async_event calling was harmless in the
> > > previous code.
> > > I gets your point for the logic explanation behind these changes and I
> > > can add it in this patch commit log to be clearer, something like:
> > > The link update operation was separated from the interrupt callback to
> > > avoid RMV interrupt disregard and unnecessary event acknowledgment
> > > caused by the inconsistent link status alarm callback.
> > 
> > Yes, it's better to explain why you did this in the commit log, but see below.
> > 
> > > > > The big comment just explains the link inconsistent issue and was
> > > > > added here since Nelio and I think the new function,
> > > > > priv_link_status_update(), justifies this comment for future review.
> > > >
> > > > I understand, this could also have been part of the commit log of
> > > > the dedicated commit.
> > > >
> > > Are you sure we need to describe the code comment reason in the commit
> > log?
> > 
> > It's a change you did to address a possible bug otherwise so we have to,
> > however remember that a commit should, as much as possible, do exactly
> > one thing. If you need to explain that you did this in order to do that, "this"
> > and "that" can often be identified as two separate commits. Doing so makes
> > it much easier for reviewers to understand the reasoning behind changes
> > and leads to quicker reviews (makes instant-acks even possible).
> > 
> > It'd still like a separate commit if you don't mind.
> 
> Sorry, but I think it is an infinite order.
> I have just added RMV interrupt, I did a lot of things in this patch for it.
> I think  I don't need to separate each thing done for this support.
> I prefer to stay it in one patch if you don't mind. 

I understand that's a lot of work, so let's cut the talk. Since I'm the one
requesting for patches to be split, I'll offer to re-spin yours and submit
the result as v4, is that OK?
  

Patch

diff --git a/drivers/net/mlx5/mlx5.c b/drivers/net/mlx5/mlx5.c
index bd66a7c..1a3d7f1 100644
--- a/drivers/net/mlx5/mlx5.c
+++ b/drivers/net/mlx5/mlx5.c
@@ -865,7 +865,7 @@  static struct rte_pci_driver mlx5_driver = {
 	},
 	.id_table = mlx5_pci_id_map,
 	.probe = mlx5_pci_probe,
-	.drv_flags = RTE_PCI_DRV_INTR_LSC,
+	.drv_flags = RTE_PCI_DRV_INTR_LSC | RTE_PCI_DRV_INTR_RMV,
 };
 
 /**
diff --git a/drivers/net/mlx5/mlx5_ethdev.c b/drivers/net/mlx5/mlx5_ethdev.c
index 57f6237..cdbd723 100644
--- a/drivers/net/mlx5/mlx5_ethdev.c
+++ b/drivers/net/mlx5/mlx5_ethdev.c
@@ -1112,47 +1112,84 @@  mlx5_ibv_device_to_pci_addr(const struct ibv_device *device,
 }
 
 /**
- * Link status handler.
+ * Update the link status.
  *
  * @param priv
  *   Pointer to private structure.
- * @param dev
- *   Pointer to the rte_eth_dev structure.
  *
  * @return
- *   Nonzero if the callback process can be called immediately.
+ *   Zero if the callback process can be called immediately.
  */
 static int
-priv_dev_link_status_handler(struct priv *priv, struct rte_eth_dev *dev)
+priv_link_status_update(struct priv *priv)
+{
+	struct rte_eth_link *link = &priv->dev->data->dev_link;
+
+	mlx5_link_update(priv->dev, 0);
+	if (((link->link_speed == 0) && link->link_status) ||
+		((link->link_speed != 0) && !link->link_status)) {
+		/*
+		 * Inconsistent status.
+		 * The link status is read from Ethtool through an IOCTL,
+		 * but as the application may work in polling mode it
+		 * may get the port event before the Kernel driver had
+		 * time to process it. PMD then request the link from
+		 * the kernel but the event is still not processed (due
+		 * to more urgent interrupts) and finally the PMD may
+		 * get an inconsistent link.
+		 * Setting alarm for later checking.
+		 */
+		if (!priv->pending_alarm) {
+			priv->pending_alarm = 1;
+			rte_eal_alarm_set(MLX5_ALARM_TIMEOUT_US,
+					  mlx5_dev_link_status_handler,
+					  priv->dev);
+		}
+		return 1;
+	} else if (unlikely(priv->pending_alarm)) {
+		/* In case of link interrupt while link alarm was setting. */
+		priv->pending_alarm = 0;
+		rte_eal_alarm_cancel(mlx5_dev_link_status_handler, priv->dev);
+	}
+	return 0;
+}
+
+/**
+ * Device status handler.
+ *
+ * @param priv
+ *   Pointer to private structure.
+ * @param events
+ *   Pointer to event flags holder.
+ *
+ * @return
+ *   Events bitmap of callback process which can be called immediately.
+ */
+static uint32_t
+priv_dev_status_handler(struct priv *priv)
 {
 	struct ibv_async_event event;
-	struct rte_eth_link *link = &dev->data->dev_link;
-	int ret = 0;
+	uint32_t ret = 0;
 
 	/* Read all message and acknowledge them. */
 	for (;;) {
 		if (ibv_get_async_event(priv->ctx, &event))
 			break;
-
-		if (event.event_type != IBV_EVENT_PORT_ACTIVE &&
-		    event.event_type != IBV_EVENT_PORT_ERR)
+		if ((event.event_type == IBV_EVENT_PORT_ACTIVE ||
+			event.event_type == IBV_EVENT_PORT_ERR) &&
+			(priv->dev->data->dev_conf.intr_conf.lsc == 1))
+			ret |= (1 << RTE_ETH_EVENT_INTR_LSC);
+		else if (event.event_type == IBV_EVENT_DEVICE_FATAL &&
+			priv->dev->data->dev_conf.intr_conf.rmv == 1)
+			ret |= (1 << RTE_ETH_EVENT_INTR_RMV);
+		else
 			DEBUG("event type %d on port %d not handled",
 			      event.event_type, event.element.port_num);
 		ibv_ack_async_event(&event);
 	}
-	mlx5_link_update(dev, 0);
-	if (((link->link_speed == 0) && link->link_status) ||
-	    ((link->link_speed != 0) && !link->link_status)) {
-		if (!priv->pending_alarm) {
-			/* Inconsistent status, check again later. */
-			priv->pending_alarm = 1;
-			rte_eal_alarm_set(MLX5_ALARM_TIMEOUT_US,
-					  mlx5_dev_link_status_handler,
-					  dev);
-		}
-	} else {
-		ret = 1;
-	}
+	if (ret & (1 << RTE_ETH_EVENT_INTR_LSC))
+		if (priv_link_status_update(priv))
+			ret &= ~(1 << RTE_ETH_EVENT_INTR_LSC);
 	return ret;
 }
 
@@ -1172,9 +1209,9 @@  mlx5_dev_link_status_handler(void *arg)
 	priv_lock(priv);
 	assert(priv->pending_alarm == 1);
 	priv->pending_alarm = 0;
-	ret = priv_dev_link_status_handler(priv, dev);
+	ret = priv_link_status_update(priv);
 	priv_unlock(priv);
-	if (ret)
+	if (!ret)
 		_rte_eth_dev_callback_process(dev, RTE_ETH_EVENT_INTR_LSC, NULL,
 					      NULL);
 }
@@ -1192,14 +1229,17 @@  mlx5_dev_interrupt_handler(void *cb_arg)
 {
 	struct rte_eth_dev *dev = cb_arg;
 	struct priv *priv = dev->data->dev_private;
-	int ret;
+	uint32_t events;
 
 	priv_lock(priv);
-	ret = priv_dev_link_status_handler(priv, dev);
+	events = priv_dev_status_handler(priv);
 	priv_unlock(priv);
-	if (ret)
+	if (events & (1 << RTE_ETH_EVENT_INTR_LSC))
 		_rte_eth_dev_callback_process(dev, RTE_ETH_EVENT_INTR_LSC, NULL,
 					      NULL);
+	if (events & (1 << RTE_ETH_EVENT_INTR_RMV))
+		_rte_eth_dev_callback_process(dev, RTE_ETH_EVENT_INTR_RMV, NULL,
+					      NULL);
 }
 
 /**
@@ -1213,7 +1253,8 @@  mlx5_dev_interrupt_handler(void *cb_arg)
 void
 priv_dev_interrupt_handler_uninstall(struct priv *priv, struct rte_eth_dev *dev)
 {
-	if (!dev->data->dev_conf.intr_conf.lsc)
+	if (!dev->data->dev_conf.intr_conf.lsc &&
+		!dev->data->dev_conf.intr_conf.rmv)
 		return;
 	rte_intr_callback_unregister(&priv->intr_handle,
 				     mlx5_dev_interrupt_handler,
@@ -1238,7 +1279,8 @@  priv_dev_interrupt_handler_install(struct priv *priv, struct rte_eth_dev *dev)
 {
 	int rc, flags;
 
-	if (!dev->data->dev_conf.intr_conf.lsc)
+	if (!dev->data->dev_conf.intr_conf.lsc &&
+		!dev->data->dev_conf.intr_conf.rmv)
 		return;
 	assert(priv->ctx->async_fd > 0);
 	flags = fcntl(priv->ctx->async_fd, F_GETFL);
@@ -1246,6 +1288,7 @@  priv_dev_interrupt_handler_install(struct priv *priv, struct rte_eth_dev *dev)
 	if (rc < 0) {
 		INFO("failed to change file descriptor async event queue");
 		dev->data->dev_conf.intr_conf.lsc = 0;
+		dev->data->dev_conf.intr_conf.rmv = 0;
 	} else {
 		priv->intr_handle.fd = priv->ctx->async_fd;
 		priv->intr_handle.type = RTE_INTR_HANDLE_EXT;