[dpdk-dev,2/3] net/mlx4: adjust removal error

Message ID 1509637324-13525-3-git-send-email-matan@mellanox.com (mailing list archive)
State Superseded, archived
Delegated to: Shahaf Shuler
Headers

Checks

Context Check Description
ci/checkpatch success coding style OK
ci/Intel-compilation success Compilation OK

Commit Message

Matan Azrad Nov. 2, 2017, 3:42 p.m. UTC
  Fail-safe PMD expects to get -ENODEV error value if sub PMD control
command fails because of device removal.

Make control callbacks return with -ENODEV when the device has
disappeared.

Signed-off-by: Matan Azrad <matan@mellanox.com>
---
 drivers/net/mlx4/mlx4.h        |  1 +
 drivers/net/mlx4/mlx4_ethdev.c | 38 ++++++++++++++++++++++++++++++++++----
 drivers/net/mlx4/mlx4_flow.c   |  2 ++
 drivers/net/mlx4/mlx4_intr.c   |  5 ++++-
 drivers/net/mlx4/mlx4_rxq.c    |  1 +
 drivers/net/mlx4/mlx4_txq.c    |  1 +
 6 files changed, 43 insertions(+), 5 deletions(-)
  

Comments

Adrien Mazarguil Nov. 3, 2017, 1:05 p.m. UTC | #1
On Thu, Nov 02, 2017 at 03:42:03PM +0000, Matan Azrad wrote:
> Fail-safe PMD expects to get -ENODEV error value if sub PMD control
> command fails because of device removal.
> 
> Make control callbacks return with -ENODEV when the device has
> disappeared.
> 
> Signed-off-by: Matan Azrad <matan@mellanox.com>

I think there are a several inconsistencies regarding the places where
mlx4_removed() is used, this could lead to mistakes or redundant calls to
this function later on.

You have to choose between low-level internal functions
(e.g. mlx4_set_sysfs_ulong()) or user-facing ones from the eth_dev_ops
interface (e.g. mlx4_dev_set_link_up()), but neither intermediate functions
nor a mix of all approaches.

Standardizing on low-level functions is not practical as it means you'd have
to check for a device removal after each ibv_*() call. Therefore my
suggestion is to check it at the highest level, in all functions exposed
though mlx4_dev_ops in case of error, even innocuous one like
mlx4_stats_get() and those returning void (rte_errno can still be set), all
in the name of consistency.

The mlx4_removed() documentation should be updated to reflect the places
it's supposed to be called as well. All this means a larger patch is
necessary.

See below for coding style issues.

> ---
>  drivers/net/mlx4/mlx4.h        |  1 +
>  drivers/net/mlx4/mlx4_ethdev.c | 38 ++++++++++++++++++++++++++++++++++----
>  drivers/net/mlx4/mlx4_flow.c   |  2 ++
>  drivers/net/mlx4/mlx4_intr.c   |  5 ++++-
>  drivers/net/mlx4/mlx4_rxq.c    |  1 +
>  drivers/net/mlx4/mlx4_txq.c    |  1 +
>  6 files changed, 43 insertions(+), 5 deletions(-)
> 
> diff --git a/drivers/net/mlx4/mlx4.h b/drivers/net/mlx4/mlx4.h
> index e0a9853..cac9654 100644
> --- a/drivers/net/mlx4/mlx4.h
> +++ b/drivers/net/mlx4/mlx4.h
> @@ -149,6 +149,7 @@ int mlx4_flow_ctrl_get(struct rte_eth_dev *dev,
>  		       struct rte_eth_fc_conf *fc_conf);
>  int mlx4_flow_ctrl_set(struct rte_eth_dev *dev,
>  		       struct rte_eth_fc_conf *fc_conf);
> +int mlx4_removed(const struct priv *priv);
>  
>  /* mlx4_intr.c */
>  
> diff --git a/drivers/net/mlx4/mlx4_ethdev.c b/drivers/net/mlx4/mlx4_ethdev.c
> index b0acd12..76914b0 100644
> --- a/drivers/net/mlx4/mlx4_ethdev.c
> +++ b/drivers/net/mlx4/mlx4_ethdev.c
> @@ -312,6 +312,8 @@
>  
>  	ret = mlx4_sysfs_write(priv, name, value_str, (sizeof(value_str) - 1));
>  	if (ret < 0) {
> +		if (mlx4_removed(priv))
> +			ret = -ENODEV;
>  		DEBUG("cannot write %s `%s' (%lu) to sysfs: %s",
>  		      name, value_str, value, strerror(rte_errno));
>  		return ret;
> @@ -340,15 +342,19 @@
>  
>  	if (sock == -1) {
>  		rte_errno = errno;
> -		return -rte_errno;
> +		goto error;
>  	}
>  	ret = mlx4_get_ifname(priv, &ifr->ifr_name);
>  	if (!ret && ioctl(sock, req, ifr) == -1) {
>  		rte_errno = errno;
> -		ret = -rte_errno;
> +		close(sock);
> +		goto error;
>  	}
>  	close(sock);
>  	return ret;
> +error:
> +	mlx4_removed(priv);
> +	return -rte_errno;
>  }
>  
>  /**
> @@ -473,13 +479,17 @@
>  	if (up) {
>  		err = mlx4_set_flags(priv, ~IFF_UP, IFF_UP);
>  		if (err)
> -			return err;
> +			goto error;
>  	} else {
>  		err = mlx4_set_flags(priv, ~IFF_UP, ~IFF_UP);
>  		if (err)
> -			return err;
> +			goto error;
>  	}
>  	return 0;
> +error:
> +	if (mlx4_removed(priv))
> +		return -ENODEV;
> +	return err;
>  }
>  
>  /**
> @@ -947,6 +957,7 @@ enum rxmode_toggle {
>  
>  	ifr.ifr_data = (void *)&ethpause;
>  	if (mlx4_ifreq(priv, SIOCETHTOOL, &ifr)) {
> +		mlx4_removed(priv);
>  		ret = rte_errno;
>  		WARN("ioctl(SIOCETHTOOL, ETHTOOL_GPAUSEPARAM)"
>  		     " failed: %s",
> @@ -1002,6 +1013,7 @@ enum rxmode_toggle {
>  	else
>  		ethpause.tx_pause = 0;
>  	if (mlx4_ifreq(priv, SIOCETHTOOL, &ifr)) {
> +		mlx4_removed(priv);
>  		ret = rte_errno;
>  		WARN("ioctl(SIOCETHTOOL, ETHTOOL_SPAUSEPARAM)"
>  		     " failed: %s",
> @@ -1013,3 +1025,21 @@ enum rxmode_toggle {
>  	assert(ret >= 0);
>  	return -ret;
>  }

Missing empty line.

> +/**
> + * Check if mlx4 device was removed.

"mlx4" is a somewhat redundant given PMD name.

A separate paragraph should describe where this function is supposed to be
called.

> + *
> + * @param priv
> + *   Pointer to private structure.
> + *
> + * @return
> + *   -ENODEV when device is removed and rte_errno is set, otherwise 0.
> + */
> +int
> +mlx4_removed(const struct priv *priv)
> +{
> +	struct ibv_device_attr device_attr;
> +
> +	if (ibv_query_device(priv->ctx, &device_attr) == EIO)
> +		return -(rte_errno = ENODEV);

Although a nice shortcut, coding rules don't allow this. You have to assign
rte_errno on its own separate line. My suggestion if you want to avoid a
block would be to return 0 directly when != EIO.

> +	return 0;
> +}
> diff --git a/drivers/net/mlx4/mlx4_flow.c b/drivers/net/mlx4/mlx4_flow.c
> index 8b87b29..606c888 100644
> --- a/drivers/net/mlx4/mlx4_flow.c
> +++ b/drivers/net/mlx4/mlx4_flow.c
> @@ -1069,6 +1069,8 @@ struct mlx4_drop {
>  	err = errno;
>  	msg = "flow rule rejected by device";
>  error:
> +	if (mlx4_removed(priv))
> +		err = ENODEV;
>  	return rte_flow_error_set
>  		(error, err, RTE_FLOW_ERROR_TYPE_HANDLE, flow, msg);
>  }
> diff --git a/drivers/net/mlx4/mlx4_intr.c b/drivers/net/mlx4/mlx4_intr.c
> index b17d109..0ebdb28 100644
> --- a/drivers/net/mlx4/mlx4_intr.c
> +++ b/drivers/net/mlx4/mlx4_intr.c
> @@ -359,7 +359,10 @@
>  			ret = EINVAL;
>  	}
>  	if (ret) {
> -		rte_errno = ret;
> +		if (mlx4_removed(dev->data->dev_private))
> +			ret = ENODEV;
> +		else
> +			rte_errno = ret;
>  		WARN("unable to disable interrupt on rx queue %d",
>  		     idx);
>  	} else {
> diff --git a/drivers/net/mlx4/mlx4_rxq.c b/drivers/net/mlx4/mlx4_rxq.c
> index 7fe21b6..43dad26 100644
> --- a/drivers/net/mlx4/mlx4_rxq.c
> +++ b/drivers/net/mlx4/mlx4_rxq.c
> @@ -832,6 +832,7 @@ void mlx4_rss_detach(struct mlx4_rss *rss)
>  	ret = rte_errno;
>  	mlx4_rx_queue_release(rxq);
>  	rte_errno = ret;
> +	mlx4_removed(priv);
>  	assert(rte_errno > 0);
>  	return -rte_errno;
>  }
> diff --git a/drivers/net/mlx4/mlx4_txq.c b/drivers/net/mlx4/mlx4_txq.c
> index a9c5bd2..09bdfd8 100644
> --- a/drivers/net/mlx4/mlx4_txq.c
> +++ b/drivers/net/mlx4/mlx4_txq.c
> @@ -372,6 +372,7 @@ struct txq_mp2mr_mbuf_check_data {
>  	ret = rte_errno;
>  	mlx4_tx_queue_release(txq);
>  	rte_errno = ret;
> +	mlx4_removed(priv);
>  	assert(rte_errno > 0);
>  	return -rte_errno;
>  }
> -- 
> 1.8.3.1
>
  
Matan Azrad Nov. 5, 2017, 6:52 a.m. UTC | #2
Hi Adrien,
Thanks for the review :)

Please see below comments.

> -----Original Message-----
> From: Adrien Mazarguil [mailto:adrien.mazarguil@6wind.com]
> Sent: Friday, November 3, 2017 3:06 PM
> To: Matan Azrad <matan@mellanox.com>
> Cc: Gaetan Rivet <gaetan.rivet@6wind.com>; dev@dpdk.org
> Subject: Re: [PATCH 2/3] net/mlx4: adjust removal error
> 
> On Thu, Nov 02, 2017 at 03:42:03PM +0000, Matan Azrad wrote:
> > Fail-safe PMD expects to get -ENODEV error value if sub PMD control
> > command fails because of device removal.
> >
> > Make control callbacks return with -ENODEV when the device has
> > disappeared.
> >
> > Signed-off-by: Matan Azrad <matan@mellanox.com>
> 
> I think there are a several inconsistencies regarding the places where
> mlx4_removed() is used, this could lead to mistakes or redundant calls to this
> function later on.
> 
> You have to choose between low-level internal functions (e.g.
> mlx4_set_sysfs_ulong()) or user-facing ones from the eth_dev_ops
> interface (e.g. mlx4_dev_set_link_up()), but neither intermediate functions
> nor a mix of all approaches.

You are touching here, exactly in one of my design thoughts:
Either using always "low" level error adjustments or using always high level  adjustments.
The high level approach does less reuse of code but simpler to maintain (as you said).
I decided to combine the two approaches while never going to the lowest level code(ibv, pipes).
Adding the check in mlx4_dev_set_link() can replace two checks: in mlx4_dev_set_link_up() and mlx4_dev_set_link_down(). 
Adding the check in mlx4_flow_toggle()can replace many checks: all flows callbacks and also mlx4_mac_addr_add(),mlx4_mac_addr_set().
You right regarding  mlx4_set_sysfs_ulong() it can be replaced by check in mlx4_mtu_set() - will fix it in V2.
You right regarding  mlx4_ifreq(), it can be replaced by check in in mlx4_link_update() - - will fix it in V2.

I can understand the consistency approach but I think the above two cases to be in lower level functions are harmless and reuse code.
What do you think?

> 
> Standardizing on low-level functions is not practical as it means you'd have to
> check for a device removal after each ibv_*() call. Therefore my suggestion is
> to check it at the highest level, in all functions exposed though
> mlx4_dev_ops in case of error, even innocuous one like
> mlx4_stats_get() and those returning void (rte_errno can still be set), all in
> the name of consistency.
> 

If everything OK with the callback (even in a removal case) why to set rte_errno?
Specifically in mlx4_stats_get() has no error flow and we don't want error return in case of removal since we can provide stats even after removal (SW counters) and this is a good "feature" for failsafe plug out saving stats process.  

> The mlx4_removed() documentation should be updated to reflect the places
> it's supposed to be called as well. All this means a larger patch is necessary.
> 

Do you mean documentation in code(comment) or mlx4 docs, maybe both?

> See below for coding style issues.
> 
> > ---
> >  drivers/net/mlx4/mlx4.h        |  1 +
> >  drivers/net/mlx4/mlx4_ethdev.c | 38
> ++++++++++++++++++++++++++++++++++----
> >  drivers/net/mlx4/mlx4_flow.c   |  2 ++
> >  drivers/net/mlx4/mlx4_intr.c   |  5 ++++-
> >  drivers/net/mlx4/mlx4_rxq.c    |  1 +
> >  drivers/net/mlx4/mlx4_txq.c    |  1 +
> >  6 files changed, 43 insertions(+), 5 deletions(-)
> >
> > diff --git a/drivers/net/mlx4/mlx4.h b/drivers/net/mlx4/mlx4.h index
> > e0a9853..cac9654 100644
> > --- a/drivers/net/mlx4/mlx4.h
> > +++ b/drivers/net/mlx4/mlx4.h
> > @@ -149,6 +149,7 @@ int mlx4_flow_ctrl_get(struct rte_eth_dev *dev,
> >  		       struct rte_eth_fc_conf *fc_conf);  int
> > mlx4_flow_ctrl_set(struct rte_eth_dev *dev,
> >  		       struct rte_eth_fc_conf *fc_conf);
> > +int mlx4_removed(const struct priv *priv);
> >
> >  /* mlx4_intr.c */
> >
> > diff --git a/drivers/net/mlx4/mlx4_ethdev.c
> > b/drivers/net/mlx4/mlx4_ethdev.c index b0acd12..76914b0 100644
> > --- a/drivers/net/mlx4/mlx4_ethdev.c
> > +++ b/drivers/net/mlx4/mlx4_ethdev.c
> > @@ -312,6 +312,8 @@
> >
> >  	ret = mlx4_sysfs_write(priv, name, value_str, (sizeof(value_str) - 1));
> >  	if (ret < 0) {
> > +		if (mlx4_removed(priv))
> > +			ret = -ENODEV;
> >  		DEBUG("cannot write %s `%s' (%lu) to sysfs: %s",
> >  		      name, value_str, value, strerror(rte_errno));
> >  		return ret;
> > @@ -340,15 +342,19 @@
> >
> >  	if (sock == -1) {
> >  		rte_errno = errno;
> > -		return -rte_errno;
> > +		goto error;
> >  	}
> >  	ret = mlx4_get_ifname(priv, &ifr->ifr_name);
> >  	if (!ret && ioctl(sock, req, ifr) == -1) {
> >  		rte_errno = errno;
> > -		ret = -rte_errno;
> > +		close(sock);
> > +		goto error;
> >  	}
> >  	close(sock);
> >  	return ret;
> > +error:
> > +	mlx4_removed(priv);
> > +	return -rte_errno;
> >  }
> >
> >  /**
> > @@ -473,13 +479,17 @@
> >  	if (up) {
> >  		err = mlx4_set_flags(priv, ~IFF_UP, IFF_UP);
> >  		if (err)
> > -			return err;
> > +			goto error;
> >  	} else {
> >  		err = mlx4_set_flags(priv, ~IFF_UP, ~IFF_UP);
> >  		if (err)
> > -			return err;
> > +			goto error;
> >  	}
> >  	return 0;
> > +error:
> > +	if (mlx4_removed(priv))
> > +		return -ENODEV;
> > +	return err;
> >  }
> >
> >  /**
> > @@ -947,6 +957,7 @@ enum rxmode_toggle {
> >
> >  	ifr.ifr_data = (void *)&ethpause;
> >  	if (mlx4_ifreq(priv, SIOCETHTOOL, &ifr)) {
> > +		mlx4_removed(priv);
> >  		ret = rte_errno;
> >  		WARN("ioctl(SIOCETHTOOL, ETHTOOL_GPAUSEPARAM)"
> >  		     " failed: %s",
> > @@ -1002,6 +1013,7 @@ enum rxmode_toggle {
> >  	else
> >  		ethpause.tx_pause = 0;
> >  	if (mlx4_ifreq(priv, SIOCETHTOOL, &ifr)) {
> > +		mlx4_removed(priv);
> >  		ret = rte_errno;
> >  		WARN("ioctl(SIOCETHTOOL, ETHTOOL_SPAUSEPARAM)"
> >  		     " failed: %s",
> > @@ -1013,3 +1025,21 @@ enum rxmode_toggle {
> >  	assert(ret >= 0);
> >  	return -ret;
> >  }
> 
> Missing empty line.
> 
OK.

> > +/**
> > + * Check if mlx4 device was removed.
> 
> "mlx4" is a somewhat redundant given PMD name.
> 
> A separate paragraph should describe where this function is supposed to be
> called.
> 
OK.

> > + *
> > + * @param priv
> > + *   Pointer to private structure.
> > + *
> > + * @return
> > + *   -ENODEV when device is removed and rte_errno is set, otherwise 0.
> > + */
> > +int
> > +mlx4_removed(const struct priv *priv) {
> > +	struct ibv_device_attr device_attr;
> > +
> > +	if (ibv_query_device(priv->ctx, &device_attr) == EIO)
> > +		return -(rte_errno = ENODEV);
> 
> Although a nice shortcut, coding rules don't allow this. You have to assign
> rte_errno on its own separate line. My suggestion if you want to avoid a
> block would be to return 0 directly when != EIO.
> 

Can you address me to this code rule documentation?

> > +	return 0;
> > +}
> > diff --git a/drivers/net/mlx4/mlx4_flow.c
> > b/drivers/net/mlx4/mlx4_flow.c index 8b87b29..606c888 100644
> > --- a/drivers/net/mlx4/mlx4_flow.c
> > +++ b/drivers/net/mlx4/mlx4_flow.c
> > @@ -1069,6 +1069,8 @@ struct mlx4_drop {
> >  	err = errno;
> >  	msg = "flow rule rejected by device";
> >  error:
> > +	if (mlx4_removed(priv))
> > +		err = ENODEV;
> >  	return rte_flow_error_set
> >  		(error, err, RTE_FLOW_ERROR_TYPE_HANDLE, flow, msg);  }
> diff --git
> > a/drivers/net/mlx4/mlx4_intr.c b/drivers/net/mlx4/mlx4_intr.c index
> > b17d109..0ebdb28 100644
> > --- a/drivers/net/mlx4/mlx4_intr.c
> > +++ b/drivers/net/mlx4/mlx4_intr.c
> > @@ -359,7 +359,10 @@
> >  			ret = EINVAL;
> >  	}
> >  	if (ret) {
> > -		rte_errno = ret;
> > +		if (mlx4_removed(dev->data->dev_private))
> > +			ret = ENODEV;
> > +		else
> > +			rte_errno = ret;
> >  		WARN("unable to disable interrupt on rx queue %d",
> >  		     idx);
> >  	} else {
> > diff --git a/drivers/net/mlx4/mlx4_rxq.c b/drivers/net/mlx4/mlx4_rxq.c
> > index 7fe21b6..43dad26 100644
> > --- a/drivers/net/mlx4/mlx4_rxq.c
> > +++ b/drivers/net/mlx4/mlx4_rxq.c
> > @@ -832,6 +832,7 @@ void mlx4_rss_detach(struct mlx4_rss *rss)
> >  	ret = rte_errno;
> >  	mlx4_rx_queue_release(rxq);
> >  	rte_errno = ret;
> > +	mlx4_removed(priv);
> >  	assert(rte_errno > 0);
> >  	return -rte_errno;
> >  }
> > diff --git a/drivers/net/mlx4/mlx4_txq.c b/drivers/net/mlx4/mlx4_txq.c
> > index a9c5bd2..09bdfd8 100644
> > --- a/drivers/net/mlx4/mlx4_txq.c
> > +++ b/drivers/net/mlx4/mlx4_txq.c
> > @@ -372,6 +372,7 @@ struct txq_mp2mr_mbuf_check_data {
> >  	ret = rte_errno;
> >  	mlx4_tx_queue_release(txq);
> >  	rte_errno = ret;
> > +	mlx4_removed(priv);
> >  	assert(rte_errno > 0);
> >  	return -rte_errno;
> >  }
> > --
> > 1.8.3.1
> >
> 
> --
> Adrien Mazarguil
> 6WIND
  
Adrien Mazarguil Nov. 6, 2017, 4:51 p.m. UTC | #3
Hi Matan,

On Sun, Nov 05, 2017 at 06:52:59AM +0000, Matan Azrad wrote:
> Hi Adrien,
> Thanks for the review :)
> 
> Please see below comments.
> 
> > -----Original Message-----
> > From: Adrien Mazarguil [mailto:adrien.mazarguil@6wind.com]
> > Sent: Friday, November 3, 2017 3:06 PM
> > To: Matan Azrad <matan@mellanox.com>
> > Cc: Gaetan Rivet <gaetan.rivet@6wind.com>; dev@dpdk.org
> > Subject: Re: [PATCH 2/3] net/mlx4: adjust removal error
> > 
> > On Thu, Nov 02, 2017 at 03:42:03PM +0000, Matan Azrad wrote:
> > > Fail-safe PMD expects to get -ENODEV error value if sub PMD control
> > > command fails because of device removal.
> > >
> > > Make control callbacks return with -ENODEV when the device has
> > > disappeared.
> > >
> > > Signed-off-by: Matan Azrad <matan@mellanox.com>
> > 
> > I think there are a several inconsistencies regarding the places where
> > mlx4_removed() is used, this could lead to mistakes or redundant calls to this
> > function later on.
> > 
> > You have to choose between low-level internal functions (e.g.
> > mlx4_set_sysfs_ulong()) or user-facing ones from the eth_dev_ops
> > interface (e.g. mlx4_dev_set_link_up()), but neither intermediate functions
> > nor a mix of all approaches.
> 
> You are touching here, exactly in one of my design thoughts:
> Either using always "low" level error adjustments or using always high level  adjustments.
> The high level approach does less reuse of code but simpler to maintain (as you said).
> I decided to combine the two approaches while never going to the lowest level code(ibv, pipes).
> Adding the check in mlx4_dev_set_link() can replace two checks: in mlx4_dev_set_link_up() and mlx4_dev_set_link_down(). 
> Adding the check in mlx4_flow_toggle()can replace many checks: all flows callbacks and also mlx4_mac_addr_add(),mlx4_mac_addr_set().
> You right regarding  mlx4_set_sysfs_ulong() it can be replaced by check in mlx4_mtu_set() - will fix it in V2.
> You right regarding  mlx4_ifreq(), it can be replaced by check in in mlx4_link_update() - - will fix it in V2.
> 
> I can understand the consistency approach but I think the above two cases to be in lower level functions are harmless and reuse code.
> What do you think?

Well, given this is pure control path code where performance doesn't really
matter, in my opinion we should focus on making its maintenance easier by
having it directly in all eth_dev_ops. It's much easier to document as well.

Actually I think the ethdev API should evolve to provide a separate
"is_removed()" dev_ops implemented by PMDs and automatically used by upper
ethdev layers in order to not have to patch all callbacks in all PMDs like
you did.

> > Standardizing on low-level functions is not practical as it means you'd have to
> > check for a device removal after each ibv_*() call. Therefore my suggestion is
> > to check it at the highest level, in all functions exposed though
> > mlx4_dev_ops in case of error, even innocuous one like
> > mlx4_stats_get() and those returning void (rte_errno can still be set), all in
> > the name of consistency.
> > 
> 
> If everything OK with the callback (even in a removal case) why to set rte_errno?
> Specifically in mlx4_stats_get() has no error flow and we don't want error return in case of removal since we can provide stats even after removal (SW counters) and this is a good "feature" for failsafe plug out saving stats process.  

I said "in the name of consistency" to keep things logical without special
cases since updating rte_errno shouldn't really hurt anyone.

However void-returning functions like mlx4_stats_get() shouldn't have any
side effects (rte_errno should remain unchanged after calling it even if
undocumented), it's OK if you leave them out, although keep in mind mlx4
with its software counters is a bit special. Polling counters on a
nonexistent (unplugged) physical device may yield errors or random garbage
with other PMDs.

> > The mlx4_removed() documentation should be updated to reflect the places
> > it's supposed to be called as well. All this means a larger patch is necessary.
> > 
> 
> Do you mean documentation in code(comment) or mlx4 docs, maybe both?

I mean only the Doxygen comment describing what this function does.
Internally documenting where it's supposed to be called is useful.

> > See below for coding style issues.
> > 
> > > ---
> > >  drivers/net/mlx4/mlx4.h        |  1 +
> > >  drivers/net/mlx4/mlx4_ethdev.c | 38
> > ++++++++++++++++++++++++++++++++++----
> > >  drivers/net/mlx4/mlx4_flow.c   |  2 ++
> > >  drivers/net/mlx4/mlx4_intr.c   |  5 ++++-
> > >  drivers/net/mlx4/mlx4_rxq.c    |  1 +
> > >  drivers/net/mlx4/mlx4_txq.c    |  1 +
> > >  6 files changed, 43 insertions(+), 5 deletions(-)
> > >
> > > diff --git a/drivers/net/mlx4/mlx4.h b/drivers/net/mlx4/mlx4.h index
> > > e0a9853..cac9654 100644
> > > --- a/drivers/net/mlx4/mlx4.h
> > > +++ b/drivers/net/mlx4/mlx4.h
> > > @@ -149,6 +149,7 @@ int mlx4_flow_ctrl_get(struct rte_eth_dev *dev,
> > >  		       struct rte_eth_fc_conf *fc_conf);  int
> > > mlx4_flow_ctrl_set(struct rte_eth_dev *dev,
> > >  		       struct rte_eth_fc_conf *fc_conf);
> > > +int mlx4_removed(const struct priv *priv);
> > >
> > >  /* mlx4_intr.c */
> > >
> > > diff --git a/drivers/net/mlx4/mlx4_ethdev.c
> > > b/drivers/net/mlx4/mlx4_ethdev.c index b0acd12..76914b0 100644
> > > --- a/drivers/net/mlx4/mlx4_ethdev.c
> > > +++ b/drivers/net/mlx4/mlx4_ethdev.c
> > > @@ -312,6 +312,8 @@
> > >
> > >  	ret = mlx4_sysfs_write(priv, name, value_str, (sizeof(value_str) - 1));
> > >  	if (ret < 0) {
> > > +		if (mlx4_removed(priv))
> > > +			ret = -ENODEV;
> > >  		DEBUG("cannot write %s `%s' (%lu) to sysfs: %s",
> > >  		      name, value_str, value, strerror(rte_errno));
> > >  		return ret;
> > > @@ -340,15 +342,19 @@
> > >
> > >  	if (sock == -1) {
> > >  		rte_errno = errno;
> > > -		return -rte_errno;
> > > +		goto error;
> > >  	}
> > >  	ret = mlx4_get_ifname(priv, &ifr->ifr_name);
> > >  	if (!ret && ioctl(sock, req, ifr) == -1) {
> > >  		rte_errno = errno;
> > > -		ret = -rte_errno;
> > > +		close(sock);
> > > +		goto error;
> > >  	}
> > >  	close(sock);
> > >  	return ret;
> > > +error:
> > > +	mlx4_removed(priv);
> > > +	return -rte_errno;
> > >  }
> > >
> > >  /**
> > > @@ -473,13 +479,17 @@
> > >  	if (up) {
> > >  		err = mlx4_set_flags(priv, ~IFF_UP, IFF_UP);
> > >  		if (err)
> > > -			return err;
> > > +			goto error;
> > >  	} else {
> > >  		err = mlx4_set_flags(priv, ~IFF_UP, ~IFF_UP);
> > >  		if (err)
> > > -			return err;
> > > +			goto error;
> > >  	}
> > >  	return 0;
> > > +error:
> > > +	if (mlx4_removed(priv))
> > > +		return -ENODEV;
> > > +	return err;
> > >  }
> > >
> > >  /**
> > > @@ -947,6 +957,7 @@ enum rxmode_toggle {
> > >
> > >  	ifr.ifr_data = (void *)&ethpause;
> > >  	if (mlx4_ifreq(priv, SIOCETHTOOL, &ifr)) {
> > > +		mlx4_removed(priv);
> > >  		ret = rte_errno;
> > >  		WARN("ioctl(SIOCETHTOOL, ETHTOOL_GPAUSEPARAM)"
> > >  		     " failed: %s",
> > > @@ -1002,6 +1013,7 @@ enum rxmode_toggle {
> > >  	else
> > >  		ethpause.tx_pause = 0;
> > >  	if (mlx4_ifreq(priv, SIOCETHTOOL, &ifr)) {
> > > +		mlx4_removed(priv);
> > >  		ret = rte_errno;
> > >  		WARN("ioctl(SIOCETHTOOL, ETHTOOL_SPAUSEPARAM)"
> > >  		     " failed: %s",
> > > @@ -1013,3 +1025,21 @@ enum rxmode_toggle {
> > >  	assert(ret >= 0);
> > >  	return -ret;
> > >  }
> > 
> > Missing empty line.
> > 
> OK.
> 
> > > +/**
> > > + * Check if mlx4 device was removed.
> > 
> > "mlx4" is a somewhat redundant given PMD name.
> > 
> > A separate paragraph should describe where this function is supposed to be
> > called.
> > 
> OK.
> 
> > > + *
> > > + * @param priv
> > > + *   Pointer to private structure.
> > > + *
> > > + * @return
> > > + *   -ENODEV when device is removed and rte_errno is set, otherwise 0.
> > > + */
> > > +int
> > > +mlx4_removed(const struct priv *priv) {
> > > +	struct ibv_device_attr device_attr;
> > > +
> > > +	if (ibv_query_device(priv->ctx, &device_attr) == EIO)
> > > +		return -(rte_errno = ENODEV);
> > 
> > Although a nice shortcut, coding rules don't allow this. You have to assign
> > rte_errno on its own separate line. My suggestion if you want to avoid a
> > block would be to return 0 directly when != EIO.
> > 
> 
> Can you address me to this code rule documentation?

Yes and no, I take it back; there is no such coding rule in DPDK.

What bit me in the past was actually a checkpatch error which forbids
assignments in conditionals, e.g.:

 if ((x = foo()) == 42) ...

That's not the case here since it's not a conditional. However for
consistency with the rest of the code in that PMD, my comment still
stands :)

> > > +	return 0;
> > > +}
> > > diff --git a/drivers/net/mlx4/mlx4_flow.c
> > > b/drivers/net/mlx4/mlx4_flow.c index 8b87b29..606c888 100644
> > > --- a/drivers/net/mlx4/mlx4_flow.c
> > > +++ b/drivers/net/mlx4/mlx4_flow.c
> > > @@ -1069,6 +1069,8 @@ struct mlx4_drop {
> > >  	err = errno;
> > >  	msg = "flow rule rejected by device";
> > >  error:
> > > +	if (mlx4_removed(priv))
> > > +		err = ENODEV;
> > >  	return rte_flow_error_set
> > >  		(error, err, RTE_FLOW_ERROR_TYPE_HANDLE, flow, msg);  }
> > diff --git
> > > a/drivers/net/mlx4/mlx4_intr.c b/drivers/net/mlx4/mlx4_intr.c index
> > > b17d109..0ebdb28 100644
> > > --- a/drivers/net/mlx4/mlx4_intr.c
> > > +++ b/drivers/net/mlx4/mlx4_intr.c
> > > @@ -359,7 +359,10 @@
> > >  			ret = EINVAL;
> > >  	}
> > >  	if (ret) {
> > > -		rte_errno = ret;
> > > +		if (mlx4_removed(dev->data->dev_private))
> > > +			ret = ENODEV;
> > > +		else
> > > +			rte_errno = ret;
> > >  		WARN("unable to disable interrupt on rx queue %d",
> > >  		     idx);
> > >  	} else {
> > > diff --git a/drivers/net/mlx4/mlx4_rxq.c b/drivers/net/mlx4/mlx4_rxq.c
> > > index 7fe21b6..43dad26 100644
> > > --- a/drivers/net/mlx4/mlx4_rxq.c
> > > +++ b/drivers/net/mlx4/mlx4_rxq.c
> > > @@ -832,6 +832,7 @@ void mlx4_rss_detach(struct mlx4_rss *rss)
> > >  	ret = rte_errno;
> > >  	mlx4_rx_queue_release(rxq);
> > >  	rte_errno = ret;
> > > +	mlx4_removed(priv);
> > >  	assert(rte_errno > 0);
> > >  	return -rte_errno;
> > >  }
> > > diff --git a/drivers/net/mlx4/mlx4_txq.c b/drivers/net/mlx4/mlx4_txq.c
> > > index a9c5bd2..09bdfd8 100644
> > > --- a/drivers/net/mlx4/mlx4_txq.c
> > > +++ b/drivers/net/mlx4/mlx4_txq.c
> > > @@ -372,6 +372,7 @@ struct txq_mp2mr_mbuf_check_data {
> > >  	ret = rte_errno;
> > >  	mlx4_tx_queue_release(txq);
> > >  	rte_errno = ret;
> > > +	mlx4_removed(priv);
> > >  	assert(rte_errno > 0);
> > >  	return -rte_errno;
> > >  }
> > > --
> > > 1.8.3.1
> > >
> > 
> > --
> > Adrien Mazarguil
> > 6WIND
  

Patch

diff --git a/drivers/net/mlx4/mlx4.h b/drivers/net/mlx4/mlx4.h
index e0a9853..cac9654 100644
--- a/drivers/net/mlx4/mlx4.h
+++ b/drivers/net/mlx4/mlx4.h
@@ -149,6 +149,7 @@  int mlx4_flow_ctrl_get(struct rte_eth_dev *dev,
 		       struct rte_eth_fc_conf *fc_conf);
 int mlx4_flow_ctrl_set(struct rte_eth_dev *dev,
 		       struct rte_eth_fc_conf *fc_conf);
+int mlx4_removed(const struct priv *priv);
 
 /* mlx4_intr.c */
 
diff --git a/drivers/net/mlx4/mlx4_ethdev.c b/drivers/net/mlx4/mlx4_ethdev.c
index b0acd12..76914b0 100644
--- a/drivers/net/mlx4/mlx4_ethdev.c
+++ b/drivers/net/mlx4/mlx4_ethdev.c
@@ -312,6 +312,8 @@ 
 
 	ret = mlx4_sysfs_write(priv, name, value_str, (sizeof(value_str) - 1));
 	if (ret < 0) {
+		if (mlx4_removed(priv))
+			ret = -ENODEV;
 		DEBUG("cannot write %s `%s' (%lu) to sysfs: %s",
 		      name, value_str, value, strerror(rte_errno));
 		return ret;
@@ -340,15 +342,19 @@ 
 
 	if (sock == -1) {
 		rte_errno = errno;
-		return -rte_errno;
+		goto error;
 	}
 	ret = mlx4_get_ifname(priv, &ifr->ifr_name);
 	if (!ret && ioctl(sock, req, ifr) == -1) {
 		rte_errno = errno;
-		ret = -rte_errno;
+		close(sock);
+		goto error;
 	}
 	close(sock);
 	return ret;
+error:
+	mlx4_removed(priv);
+	return -rte_errno;
 }
 
 /**
@@ -473,13 +479,17 @@ 
 	if (up) {
 		err = mlx4_set_flags(priv, ~IFF_UP, IFF_UP);
 		if (err)
-			return err;
+			goto error;
 	} else {
 		err = mlx4_set_flags(priv, ~IFF_UP, ~IFF_UP);
 		if (err)
-			return err;
+			goto error;
 	}
 	return 0;
+error:
+	if (mlx4_removed(priv))
+		return -ENODEV;
+	return err;
 }
 
 /**
@@ -947,6 +957,7 @@  enum rxmode_toggle {
 
 	ifr.ifr_data = (void *)&ethpause;
 	if (mlx4_ifreq(priv, SIOCETHTOOL, &ifr)) {
+		mlx4_removed(priv);
 		ret = rte_errno;
 		WARN("ioctl(SIOCETHTOOL, ETHTOOL_GPAUSEPARAM)"
 		     " failed: %s",
@@ -1002,6 +1013,7 @@  enum rxmode_toggle {
 	else
 		ethpause.tx_pause = 0;
 	if (mlx4_ifreq(priv, SIOCETHTOOL, &ifr)) {
+		mlx4_removed(priv);
 		ret = rte_errno;
 		WARN("ioctl(SIOCETHTOOL, ETHTOOL_SPAUSEPARAM)"
 		     " failed: %s",
@@ -1013,3 +1025,21 @@  enum rxmode_toggle {
 	assert(ret >= 0);
 	return -ret;
 }
+/**
+ * Check if mlx4 device was removed.
+ *
+ * @param priv
+ *   Pointer to private structure.
+ *
+ * @return
+ *   -ENODEV when device is removed and rte_errno is set, otherwise 0.
+ */
+int
+mlx4_removed(const struct priv *priv)
+{
+	struct ibv_device_attr device_attr;
+
+	if (ibv_query_device(priv->ctx, &device_attr) == EIO)
+		return -(rte_errno = ENODEV);
+	return 0;
+}
diff --git a/drivers/net/mlx4/mlx4_flow.c b/drivers/net/mlx4/mlx4_flow.c
index 8b87b29..606c888 100644
--- a/drivers/net/mlx4/mlx4_flow.c
+++ b/drivers/net/mlx4/mlx4_flow.c
@@ -1069,6 +1069,8 @@  struct mlx4_drop {
 	err = errno;
 	msg = "flow rule rejected by device";
 error:
+	if (mlx4_removed(priv))
+		err = ENODEV;
 	return rte_flow_error_set
 		(error, err, RTE_FLOW_ERROR_TYPE_HANDLE, flow, msg);
 }
diff --git a/drivers/net/mlx4/mlx4_intr.c b/drivers/net/mlx4/mlx4_intr.c
index b17d109..0ebdb28 100644
--- a/drivers/net/mlx4/mlx4_intr.c
+++ b/drivers/net/mlx4/mlx4_intr.c
@@ -359,7 +359,10 @@ 
 			ret = EINVAL;
 	}
 	if (ret) {
-		rte_errno = ret;
+		if (mlx4_removed(dev->data->dev_private))
+			ret = ENODEV;
+		else
+			rte_errno = ret;
 		WARN("unable to disable interrupt on rx queue %d",
 		     idx);
 	} else {
diff --git a/drivers/net/mlx4/mlx4_rxq.c b/drivers/net/mlx4/mlx4_rxq.c
index 7fe21b6..43dad26 100644
--- a/drivers/net/mlx4/mlx4_rxq.c
+++ b/drivers/net/mlx4/mlx4_rxq.c
@@ -832,6 +832,7 @@  void mlx4_rss_detach(struct mlx4_rss *rss)
 	ret = rte_errno;
 	mlx4_rx_queue_release(rxq);
 	rte_errno = ret;
+	mlx4_removed(priv);
 	assert(rte_errno > 0);
 	return -rte_errno;
 }
diff --git a/drivers/net/mlx4/mlx4_txq.c b/drivers/net/mlx4/mlx4_txq.c
index a9c5bd2..09bdfd8 100644
--- a/drivers/net/mlx4/mlx4_txq.c
+++ b/drivers/net/mlx4/mlx4_txq.c
@@ -372,6 +372,7 @@  struct txq_mp2mr_mbuf_check_data {
 	ret = rte_errno;
 	mlx4_tx_queue_release(txq);
 	rte_errno = ret;
+	mlx4_removed(priv);
 	assert(rte_errno > 0);
 	return -rte_errno;
 }