[dpdk-dev,v4,6/6] net/failsafe: fix removed device handling

Message ID 1515587465-9304-7-git-send-email-matan@mellanox.com (mailing list archive)
State Superseded, archived
Delegated to: Ferruh Yigit
Headers

Checks

Context Check Description
ci/checkpatch warning coding style issues
ci/Intel-compilation success Compilation OK

Commit Message

Matan Azrad Jan. 10, 2018, 12:31 p.m. UTC
  There is time between the physical removal of the device until
sub-device PMDs get a RMV interrupt. At this time DPDK PMDs and
applications still don't know about the removal and may call sub-device
control operation which should return an error.

In previous code this error is reported to the application contrary to
fail-safe principle that the app should not be aware of device removal.

Add an removal check in each relevant control command error flow and
prevent an error report to application when the sub-device is removed.

Signed-off-by: Matan Azrad <matan@mellanox.com>
---
 drivers/net/failsafe/failsafe_flow.c    | 18 ++++++++++-------
 drivers/net/failsafe/failsafe_ops.c     | 35 ++++++++++++++++++++++-----------
 drivers/net/failsafe/failsafe_private.h | 11 +++++++++++
 3 files changed, 46 insertions(+), 18 deletions(-)
  

Comments

Matan Azrad Jan. 10, 2018, 12:43 p.m. UTC | #1
Hi Gaetan

> -----Original Message-----
> From: dev [mailto:dev-bounces@dpdk.org] On Behalf Of Matan Azrad
> Sent: Wednesday, January 10, 2018 2:31 PM
> To: Thomas Monjalon <thomas@monjalon.net>; Gaetan Rivet
> <gaetan.rivet@6wind.com>
> Cc: dev@dpdk.org
> Subject: [dpdk-dev] [PATCH v4 6/6] net/failsafe: fix removed device handling
> 
> There is time between the physical removal of the device until sub-device
> PMDs get a RMV interrupt. At this time DPDK PMDs and applications still
> don't know about the removal and may call sub-device control operation
> which should return an error.
> 
> In previous code this error is reported to the application contrary to fail-safe
> principle that the app should not be aware of device removal.
> 
> Add an removal check in each relevant control command error flow and
> prevent an error report to application when the sub-device is removed.
> 
> Signed-off-by: Matan Azrad <matan@mellanox.com>
> ---
>  drivers/net/failsafe/failsafe_flow.c    | 18 ++++++++++-------
>  drivers/net/failsafe/failsafe_ops.c     | 35 ++++++++++++++++++++++--------
> ---
>  drivers/net/failsafe/failsafe_private.h | 11 +++++++++++
>  3 files changed, 46 insertions(+), 18 deletions(-)
> 
> diff --git a/drivers/net/failsafe/failsafe_flow.c
> b/drivers/net/failsafe/failsafe_flow.c
> index 153ceee..c072d1e 100644
> --- a/drivers/net/failsafe/failsafe_flow.c
> +++ b/drivers/net/failsafe/failsafe_flow.c
> @@ -87,7 +87,7 @@
>  		DEBUG("Calling rte_flow_validate on sub_device %d", i);
>  		ret = rte_flow_validate(PORT_ID(sdev),
>  				attr, patterns, actions, error);
> -		if (ret) {
> +		if ((ret = fs_err(sdev, ret))) {
This assignment in "if" statement causes to checkpatch error, I sent it as is because you asked it like this.
If you think I need to change it, I see 2 options:

1.
ret = fs_err(sdev, ret);
if (ret ) {...}

2.
if (fs_err(sdev, &ret)) {..}

what do you think?

>  			ERROR("Operation rte_flow_validate failed for
> sub_device %d"
>  			      " with error %d", i, ret);
>  			return ret;
> @@ -111,7 +111,7 @@
>  	FOREACH_SUBDEV_STATE(sdev, i, dev, DEV_ACTIVE) {
>  		flow->flows[i] = rte_flow_create(PORT_ID(sdev),
>  				attr, patterns, actions, error);
> -		if (flow->flows[i] == NULL) {
> +		if (flow->flows[i] == NULL && fs_err(sdev, -rte_errno)) {
>  			ERROR("Failed to create flow on sub_device %d",
>  				i);
>  			goto err;
> @@ -150,7 +150,7 @@
>  			continue;
>  		local_ret = rte_flow_destroy(PORT_ID(sdev),
>  				flow->flows[i], error);
> -		if (local_ret) {
> +		if ((local_ret = fs_err(sdev, local_ret))) {
>  			ERROR("Failed to destroy flow on sub_device %d:
> %d",
>  					i, local_ret);
>  			if (ret == 0)
> @@ -175,7 +175,7 @@
>  	FOREACH_SUBDEV_STATE(sdev, i, dev, DEV_ACTIVE) {
>  		DEBUG("Calling rte_flow_flush on sub_device %d", i);
>  		ret = rte_flow_flush(PORT_ID(sdev), error);
> -		if (ret) {
> +		if ((ret = fs_err(sdev, ret))) {
>  			ERROR("Operation rte_flow_flush failed for
> sub_device %d"
>  			      " with error %d", i, ret);
>  			return ret;
> @@ -199,8 +199,12 @@
> 
>  	sdev = TX_SUBDEV(dev);
>  	if (sdev != NULL) {
> -		return rte_flow_query(PORT_ID(sdev),
> -				flow->flows[SUB_ID(sdev)], type, arg, error);
> +		int ret = rte_flow_query(PORT_ID(sdev),
> +					 flow->flows[SUB_ID(sdev)],
> +					 type, arg, error);
> +
> +		if ((ret = fs_err(sdev, ret)))
> +			return ret;
>  	}
>  	WARN("No active sub_device to query about its flow");
>  	return -1;
> @@ -223,7 +227,7 @@
>  			WARN("flow isolation mode of sub_device %d in
> incoherent state.",
>  				i);
>  		ret = rte_flow_isolate(PORT_ID(sdev), set, error);
> -		if (ret) {
> +		if ((ret = fs_err(sdev, ret))) {
>  			ERROR("Operation rte_flow_isolate failed for
> sub_device %d"
>  			      " with error %d", i, ret);
>  			return ret;
> diff --git a/drivers/net/failsafe/failsafe_ops.c
> b/drivers/net/failsafe/failsafe_ops.c
> index e16a590..f5390db 100644
> --- a/drivers/net/failsafe/failsafe_ops.c
> +++ b/drivers/net/failsafe/failsafe_ops.c
> @@ -121,6 +121,8 @@
>  					dev->data->nb_tx_queues,
>  					&dev->data->dev_conf);
>  		if (ret) {
> +			if (!fs_err(sdev, ret))
> +				continue;
>  			ERROR("Could not configure sub_device %d", i);
>  			return ret;
>  		}
> @@ -163,8 +165,11 @@
>  			continue;
>  		DEBUG("Starting sub_device %d", i);
>  		ret = rte_eth_dev_start(PORT_ID(sdev));
> -		if (ret)
> +		if (ret) {
> +			if (!fs_err(sdev, ret))
> +				continue;
>  			return ret;
> +		}
>  		sdev->state = DEV_STARTED;
>  	}
>  	if (PRIV(dev)->state < DEV_STARTED)
> @@ -196,7 +201,7 @@
>  	FOREACH_SUBDEV_STATE(sdev, i, dev, DEV_ACTIVE) {
>  		DEBUG("Calling rte_eth_dev_set_link_up on sub_device
> %d", i);
>  		ret = rte_eth_dev_set_link_up(PORT_ID(sdev));
> -		if (ret) {
> +		if ((ret = fs_err(sdev, ret))) {
>  			ERROR("Operation rte_eth_dev_set_link_up failed
> for sub_device %d"
>  			      " with error %d", i, ret);
>  			return ret;
> @@ -215,7 +220,7 @@
>  	FOREACH_SUBDEV_STATE(sdev, i, dev, DEV_ACTIVE) {
>  		DEBUG("Calling rte_eth_dev_set_link_down on sub_device
> %d", i);
>  		ret = rte_eth_dev_set_link_down(PORT_ID(sdev));
> -		if (ret) {
> +		if ((ret = fs_err(sdev, ret))) {
>  			ERROR("Operation rte_eth_dev_set_link_down
> failed for sub_device %d"
>  			      " with error %d", i, ret);
>  			return ret;
> @@ -300,7 +305,7 @@
>  				rx_queue_id,
>  				nb_rx_desc, socket_id,
>  				rx_conf, mb_pool);
> -		if (ret) {
> +		if ((ret = fs_err(sdev, ret))) {
>  			ERROR("RX queue setup failed for sub_device %d",
> i);
>  			goto free_rxq;
>  		}
> @@ -366,7 +371,7 @@
>  				tx_queue_id,
>  				nb_tx_desc, socket_id,
>  				tx_conf);
> -		if (ret) {
> +		if ((ret = fs_err(sdev, ret))) {
>  			ERROR("TX queue setup failed for sub_device %d", i);
>  			goto free_txq;
>  		}
> @@ -445,7 +450,8 @@
>  	FOREACH_SUBDEV_STATE(sdev, i, dev, DEV_ACTIVE) {
>  		DEBUG("Calling link_update on sub_device %d", i);
>  		ret = (SUBOPS(sdev, link_update))(ETH(sdev),
> wait_to_complete);
> -		if (ret && ret != -1) {
> +		if (ret && ret != -1 && sdev->remove == 0 &&
> +		    rte_eth_dev_is_removed(PORT_ID(sdev)) == 0) {
>  			ERROR("Link update failed for sub_device %d with
> error %d",
>  			      i, ret);
>  			return ret;
> @@ -469,6 +475,7 @@
>  fs_stats_get(struct rte_eth_dev *dev,
>  	     struct rte_eth_stats *stats)
>  {
> +	struct rte_eth_stats backup;
>  	struct sub_device *sdev;
>  	uint8_t i;
>  	int ret;
> @@ -478,14 +485,20 @@
>  		struct rte_eth_stats *snapshot = &sdev-
> >stats_snapshot.stats;
>  		uint64_t *timestamp = &sdev->stats_snapshot.timestamp;
> 
> +		rte_memcpy(&backup, snapshot, sizeof(backup));
>  		ret = rte_eth_stats_get(PORT_ID(sdev), snapshot);
>  		if (ret) {
> +			if (!fs_err(sdev, ret)) {
> +				rte_memcpy(snapshot, &backup,
> sizeof(backup));
> +				goto inc;
> +			}
>  			ERROR("Operation rte_eth_stats_get failed for
> sub_device %d with error %d",
>  				  i, ret);
>  			*timestamp = 0;
>  			return ret;
>  		}
>  		*timestamp = rte_rdtsc();
> +inc:
>  		failsafe_stats_increment(stats, snapshot);
>  	}
>  	return 0;
> @@ -598,7 +611,7 @@
>  	FOREACH_SUBDEV_STATE(sdev, i, dev, DEV_ACTIVE) {
>  		DEBUG("Calling rte_eth_dev_set_mtu on sub_device %d", i);
>  		ret = rte_eth_dev_set_mtu(PORT_ID(sdev), mtu);
> -		if (ret) {
> +		if ((ret = fs_err(sdev, ret))) {
>  			ERROR("Operation rte_eth_dev_set_mtu failed for
> sub_device %d with error %d",
>  			      i, ret);
>  			return ret;
> @@ -617,7 +630,7 @@
>  	FOREACH_SUBDEV_STATE(sdev, i, dev, DEV_ACTIVE) {
>  		DEBUG("Calling rte_eth_dev_vlan_filter on sub_device %d",
> i);
>  		ret = rte_eth_dev_vlan_filter(PORT_ID(sdev), vlan_id, on);
> -		if (ret) {
> +		if ((ret = fs_err(sdev, ret))) {
>  			ERROR("Operation rte_eth_dev_vlan_filter failed for
> sub_device %d"
>  			      " with error %d", i, ret);
>  			return ret;
> @@ -651,7 +664,7 @@
>  	FOREACH_SUBDEV_STATE(sdev, i, dev, DEV_ACTIVE) {
>  		DEBUG("Calling rte_eth_dev_flow_ctrl_set on sub_device
> %d", i);
>  		ret = rte_eth_dev_flow_ctrl_set(PORT_ID(sdev), fc_conf);
> -		if (ret) {
> +		if ((ret = fs_err(sdev, ret))) {
>  			ERROR("Operation rte_eth_dev_flow_ctrl_set failed
> for sub_device %d"
>  			      " with error %d", i, ret);
>  			return ret;
> @@ -688,7 +701,7 @@
>  	RTE_ASSERT(index < FAILSAFE_MAX_ETHADDR);
>  	FOREACH_SUBDEV_STATE(sdev, i, dev, DEV_ACTIVE) {
>  		ret = rte_eth_dev_mac_addr_add(PORT_ID(sdev),
> mac_addr, vmdq);
> -		if (ret) {
> +		if ((ret = fs_err(sdev, ret))) {
>  			ERROR("Operation rte_eth_dev_mac_addr_add
> failed for sub_device %"
>  			      PRIu8 " with error %d", i, ret);
>  			return ret;
> @@ -730,7 +743,7 @@
>  	FOREACH_SUBDEV_STATE(sdev, i, dev, DEV_ACTIVE) {
>  		DEBUG("Calling rte_eth_dev_filter_ctrl on sub_device %d",
> i);
>  		ret = rte_eth_dev_filter_ctrl(PORT_ID(sdev), type, op, arg);
> -		if (ret) {
> +		if ((ret = fs_err(sdev, ret))) {
>  			ERROR("Operation rte_eth_dev_filter_ctrl failed for
> sub_device %d"
>  			      " with error %d", i, ret);
>  			return ret;
> diff --git a/drivers/net/failsafe/failsafe_private.h
> b/drivers/net/failsafe/failsafe_private.h
> index d81cc3c..a306970 100644
> --- a/drivers/net/failsafe/failsafe_private.h
> +++ b/drivers/net/failsafe/failsafe_private.h
> @@ -375,4 +375,15 @@ int failsafe_eth_lsc_event_callback(uint16_t port_id,
>  	rte_wmb();
>  }
> 
> +/*
> + * Adjust error value and rte_errno to the fail-safe actual error value.
> + */
> +static inline int
> +fs_err(struct sub_device *sdev, int err) {
> +	/* A device removal shouldn't be reported as an error. */
> +	if (sdev->remove == 1 || err == -EIO)
> +		return rte_errno = 0;
> +	return err;
> +}
>  #endif /* _RTE_ETH_FAILSAFE_PRIVATE_H_ */
> --
> 1.8.3.1
  
Gaëtan Rivet Jan. 10, 2018, 1:47 p.m. UTC | #2
Hi Matan,

I am a bit concerned with the forceful rte_errno reset within fs_err, I
think it would have been safer to only do this when rte_errno is
modified by the eth_dev ops (either in rte_flow or another library).

This means that the eth_dev API will be slightly different whether the
ethdev is a fail-safe or a bare device, which might throw off some
users. One could read the eth_dev source and not see any rte_errno
change, and wonder why it is modified on some specific ports.

But this is pretty minor, and if problem arises it will be easy to
pinpoint and fix, so I won't bother you further on this patch.

Thanks for the good work.

On Wed, Jan 10, 2018 at 12:31:05PM +0000, Matan Azrad wrote:
> There is time between the physical removal of the device until
> sub-device PMDs get a RMV interrupt. At this time DPDK PMDs and
> applications still don't know about the removal and may call sub-device
> control operation which should return an error.
> 
> In previous code this error is reported to the application contrary to
> fail-safe principle that the app should not be aware of device removal.
> 
> Add an removal check in each relevant control command error flow and
> prevent an error report to application when the sub-device is removed.
> 
> Signed-off-by: Matan Azrad <matan@mellanox.com>
Acked-by: Gaetan Rivet <gaetan.rivet@6wind.com>
> ---
>  drivers/net/failsafe/failsafe_flow.c    | 18 ++++++++++-------
>  drivers/net/failsafe/failsafe_ops.c     | 35 ++++++++++++++++++++++-----------
>  drivers/net/failsafe/failsafe_private.h | 11 +++++++++++
>  3 files changed, 46 insertions(+), 18 deletions(-)
>
  
Gaëtan Rivet Jan. 10, 2018, 1:51 p.m. UTC | #3
Hi Matan,

On Wed, Jan 10, 2018 at 12:43:33PM +0000, Matan Azrad wrote:
> Hi Gaetan
> 
> > -----Original Message-----
> > From: dev [mailto:dev-bounces@dpdk.org] On Behalf Of Matan Azrad
> > Sent: Wednesday, January 10, 2018 2:31 PM
> > To: Thomas Monjalon <thomas@monjalon.net>; Gaetan Rivet
> > <gaetan.rivet@6wind.com>
> > Cc: dev@dpdk.org
> > Subject: [dpdk-dev] [PATCH v4 6/6] net/failsafe: fix removed device handling
> > 
> > There is time between the physical removal of the device until sub-device
> > PMDs get a RMV interrupt. At this time DPDK PMDs and applications still
> > don't know about the removal and may call sub-device control operation
> > which should return an error.
> > 
> > In previous code this error is reported to the application contrary to fail-safe
> > principle that the app should not be aware of device removal.
> > 
> > Add an removal check in each relevant control command error flow and
> > prevent an error report to application when the sub-device is removed.
> > 
> > Signed-off-by: Matan Azrad <matan@mellanox.com>
> > ---
> >  drivers/net/failsafe/failsafe_flow.c    | 18 ++++++++++-------
> >  drivers/net/failsafe/failsafe_ops.c     | 35 ++++++++++++++++++++++--------
> > ---
> >  drivers/net/failsafe/failsafe_private.h | 11 +++++++++++
> >  3 files changed, 46 insertions(+), 18 deletions(-)
> > 
> > diff --git a/drivers/net/failsafe/failsafe_flow.c
> > b/drivers/net/failsafe/failsafe_flow.c
> > index 153ceee..c072d1e 100644
> > --- a/drivers/net/failsafe/failsafe_flow.c
> > +++ b/drivers/net/failsafe/failsafe_flow.c
> > @@ -87,7 +87,7 @@
> >  		DEBUG("Calling rte_flow_validate on sub_device %d", i);
> >  		ret = rte_flow_validate(PORT_ID(sdev),
> >  				attr, patterns, actions, error);
> > -		if (ret) {
> > +		if ((ret = fs_err(sdev, ret))) {
> This assignment in "if" statement causes to checkpatch error, I sent it as is because you asked it like this.
> If you think I need to change it, I see 2 options:
> 
> 1.
> ret = fs_err(sdev, ret);
> if (ret ) {...}
> 
> 2.
> if (fs_err(sdev, &ret)) {..}
> 
> what do you think?
> 

Yes I forgot that checkpatch was like this.
Our mail crossed, but I acked this patch. I think this is acceptable at
the driver level, or should be at the discretion of the driver
maintainer.

So personally, I'd say leave it this way. If someone or something shouts
about this we will consider alternatives. Otherwise this is readable and
easily understandable as-is.

Regards,
  

Patch

diff --git a/drivers/net/failsafe/failsafe_flow.c b/drivers/net/failsafe/failsafe_flow.c
index 153ceee..c072d1e 100644
--- a/drivers/net/failsafe/failsafe_flow.c
+++ b/drivers/net/failsafe/failsafe_flow.c
@@ -87,7 +87,7 @@ 
 		DEBUG("Calling rte_flow_validate on sub_device %d", i);
 		ret = rte_flow_validate(PORT_ID(sdev),
 				attr, patterns, actions, error);
-		if (ret) {
+		if ((ret = fs_err(sdev, ret))) {
 			ERROR("Operation rte_flow_validate failed for sub_device %d"
 			      " with error %d", i, ret);
 			return ret;
@@ -111,7 +111,7 @@ 
 	FOREACH_SUBDEV_STATE(sdev, i, dev, DEV_ACTIVE) {
 		flow->flows[i] = rte_flow_create(PORT_ID(sdev),
 				attr, patterns, actions, error);
-		if (flow->flows[i] == NULL) {
+		if (flow->flows[i] == NULL && fs_err(sdev, -rte_errno)) {
 			ERROR("Failed to create flow on sub_device %d",
 				i);
 			goto err;
@@ -150,7 +150,7 @@ 
 			continue;
 		local_ret = rte_flow_destroy(PORT_ID(sdev),
 				flow->flows[i], error);
-		if (local_ret) {
+		if ((local_ret = fs_err(sdev, local_ret))) {
 			ERROR("Failed to destroy flow on sub_device %d: %d",
 					i, local_ret);
 			if (ret == 0)
@@ -175,7 +175,7 @@ 
 	FOREACH_SUBDEV_STATE(sdev, i, dev, DEV_ACTIVE) {
 		DEBUG("Calling rte_flow_flush on sub_device %d", i);
 		ret = rte_flow_flush(PORT_ID(sdev), error);
-		if (ret) {
+		if ((ret = fs_err(sdev, ret))) {
 			ERROR("Operation rte_flow_flush failed for sub_device %d"
 			      " with error %d", i, ret);
 			return ret;
@@ -199,8 +199,12 @@ 
 
 	sdev = TX_SUBDEV(dev);
 	if (sdev != NULL) {
-		return rte_flow_query(PORT_ID(sdev),
-				flow->flows[SUB_ID(sdev)], type, arg, error);
+		int ret = rte_flow_query(PORT_ID(sdev),
+					 flow->flows[SUB_ID(sdev)],
+					 type, arg, error);
+
+		if ((ret = fs_err(sdev, ret)))
+			return ret;
 	}
 	WARN("No active sub_device to query about its flow");
 	return -1;
@@ -223,7 +227,7 @@ 
 			WARN("flow isolation mode of sub_device %d in incoherent state.",
 				i);
 		ret = rte_flow_isolate(PORT_ID(sdev), set, error);
-		if (ret) {
+		if ((ret = fs_err(sdev, ret))) {
 			ERROR("Operation rte_flow_isolate failed for sub_device %d"
 			      " with error %d", i, ret);
 			return ret;
diff --git a/drivers/net/failsafe/failsafe_ops.c b/drivers/net/failsafe/failsafe_ops.c
index e16a590..f5390db 100644
--- a/drivers/net/failsafe/failsafe_ops.c
+++ b/drivers/net/failsafe/failsafe_ops.c
@@ -121,6 +121,8 @@ 
 					dev->data->nb_tx_queues,
 					&dev->data->dev_conf);
 		if (ret) {
+			if (!fs_err(sdev, ret))
+				continue;
 			ERROR("Could not configure sub_device %d", i);
 			return ret;
 		}
@@ -163,8 +165,11 @@ 
 			continue;
 		DEBUG("Starting sub_device %d", i);
 		ret = rte_eth_dev_start(PORT_ID(sdev));
-		if (ret)
+		if (ret) {
+			if (!fs_err(sdev, ret))
+				continue;
 			return ret;
+		}
 		sdev->state = DEV_STARTED;
 	}
 	if (PRIV(dev)->state < DEV_STARTED)
@@ -196,7 +201,7 @@ 
 	FOREACH_SUBDEV_STATE(sdev, i, dev, DEV_ACTIVE) {
 		DEBUG("Calling rte_eth_dev_set_link_up on sub_device %d", i);
 		ret = rte_eth_dev_set_link_up(PORT_ID(sdev));
-		if (ret) {
+		if ((ret = fs_err(sdev, ret))) {
 			ERROR("Operation rte_eth_dev_set_link_up failed for sub_device %d"
 			      " with error %d", i, ret);
 			return ret;
@@ -215,7 +220,7 @@ 
 	FOREACH_SUBDEV_STATE(sdev, i, dev, DEV_ACTIVE) {
 		DEBUG("Calling rte_eth_dev_set_link_down on sub_device %d", i);
 		ret = rte_eth_dev_set_link_down(PORT_ID(sdev));
-		if (ret) {
+		if ((ret = fs_err(sdev, ret))) {
 			ERROR("Operation rte_eth_dev_set_link_down failed for sub_device %d"
 			      " with error %d", i, ret);
 			return ret;
@@ -300,7 +305,7 @@ 
 				rx_queue_id,
 				nb_rx_desc, socket_id,
 				rx_conf, mb_pool);
-		if (ret) {
+		if ((ret = fs_err(sdev, ret))) {
 			ERROR("RX queue setup failed for sub_device %d", i);
 			goto free_rxq;
 		}
@@ -366,7 +371,7 @@ 
 				tx_queue_id,
 				nb_tx_desc, socket_id,
 				tx_conf);
-		if (ret) {
+		if ((ret = fs_err(sdev, ret))) {
 			ERROR("TX queue setup failed for sub_device %d", i);
 			goto free_txq;
 		}
@@ -445,7 +450,8 @@ 
 	FOREACH_SUBDEV_STATE(sdev, i, dev, DEV_ACTIVE) {
 		DEBUG("Calling link_update on sub_device %d", i);
 		ret = (SUBOPS(sdev, link_update))(ETH(sdev), wait_to_complete);
-		if (ret && ret != -1) {
+		if (ret && ret != -1 && sdev->remove == 0 &&
+		    rte_eth_dev_is_removed(PORT_ID(sdev)) == 0) {
 			ERROR("Link update failed for sub_device %d with error %d",
 			      i, ret);
 			return ret;
@@ -469,6 +475,7 @@ 
 fs_stats_get(struct rte_eth_dev *dev,
 	     struct rte_eth_stats *stats)
 {
+	struct rte_eth_stats backup;
 	struct sub_device *sdev;
 	uint8_t i;
 	int ret;
@@ -478,14 +485,20 @@ 
 		struct rte_eth_stats *snapshot = &sdev->stats_snapshot.stats;
 		uint64_t *timestamp = &sdev->stats_snapshot.timestamp;
 
+		rte_memcpy(&backup, snapshot, sizeof(backup));
 		ret = rte_eth_stats_get(PORT_ID(sdev), snapshot);
 		if (ret) {
+			if (!fs_err(sdev, ret)) {
+				rte_memcpy(snapshot, &backup, sizeof(backup));
+				goto inc;
+			}
 			ERROR("Operation rte_eth_stats_get failed for sub_device %d with error %d",
 				  i, ret);
 			*timestamp = 0;
 			return ret;
 		}
 		*timestamp = rte_rdtsc();
+inc:
 		failsafe_stats_increment(stats, snapshot);
 	}
 	return 0;
@@ -598,7 +611,7 @@ 
 	FOREACH_SUBDEV_STATE(sdev, i, dev, DEV_ACTIVE) {
 		DEBUG("Calling rte_eth_dev_set_mtu on sub_device %d", i);
 		ret = rte_eth_dev_set_mtu(PORT_ID(sdev), mtu);
-		if (ret) {
+		if ((ret = fs_err(sdev, ret))) {
 			ERROR("Operation rte_eth_dev_set_mtu failed for sub_device %d with error %d",
 			      i, ret);
 			return ret;
@@ -617,7 +630,7 @@ 
 	FOREACH_SUBDEV_STATE(sdev, i, dev, DEV_ACTIVE) {
 		DEBUG("Calling rte_eth_dev_vlan_filter on sub_device %d", i);
 		ret = rte_eth_dev_vlan_filter(PORT_ID(sdev), vlan_id, on);
-		if (ret) {
+		if ((ret = fs_err(sdev, ret))) {
 			ERROR("Operation rte_eth_dev_vlan_filter failed for sub_device %d"
 			      " with error %d", i, ret);
 			return ret;
@@ -651,7 +664,7 @@ 
 	FOREACH_SUBDEV_STATE(sdev, i, dev, DEV_ACTIVE) {
 		DEBUG("Calling rte_eth_dev_flow_ctrl_set on sub_device %d", i);
 		ret = rte_eth_dev_flow_ctrl_set(PORT_ID(sdev), fc_conf);
-		if (ret) {
+		if ((ret = fs_err(sdev, ret))) {
 			ERROR("Operation rte_eth_dev_flow_ctrl_set failed for sub_device %d"
 			      " with error %d", i, ret);
 			return ret;
@@ -688,7 +701,7 @@ 
 	RTE_ASSERT(index < FAILSAFE_MAX_ETHADDR);
 	FOREACH_SUBDEV_STATE(sdev, i, dev, DEV_ACTIVE) {
 		ret = rte_eth_dev_mac_addr_add(PORT_ID(sdev), mac_addr, vmdq);
-		if (ret) {
+		if ((ret = fs_err(sdev, ret))) {
 			ERROR("Operation rte_eth_dev_mac_addr_add failed for sub_device %"
 			      PRIu8 " with error %d", i, ret);
 			return ret;
@@ -730,7 +743,7 @@ 
 	FOREACH_SUBDEV_STATE(sdev, i, dev, DEV_ACTIVE) {
 		DEBUG("Calling rte_eth_dev_filter_ctrl on sub_device %d", i);
 		ret = rte_eth_dev_filter_ctrl(PORT_ID(sdev), type, op, arg);
-		if (ret) {
+		if ((ret = fs_err(sdev, ret))) {
 			ERROR("Operation rte_eth_dev_filter_ctrl failed for sub_device %d"
 			      " with error %d", i, ret);
 			return ret;
diff --git a/drivers/net/failsafe/failsafe_private.h b/drivers/net/failsafe/failsafe_private.h
index d81cc3c..a306970 100644
--- a/drivers/net/failsafe/failsafe_private.h
+++ b/drivers/net/failsafe/failsafe_private.h
@@ -375,4 +375,15 @@  int failsafe_eth_lsc_event_callback(uint16_t port_id,
 	rte_wmb();
 }
 
+/*
+ * Adjust error value and rte_errno to the fail-safe actual error value.
+ */
+static inline int
+fs_err(struct sub_device *sdev, int err)
+{
+	/* A device removal shouldn't be reported as an error. */
+	if (sdev->remove == 1 || err == -EIO)
+		return rte_errno = 0;
+	return err;
+}
 #endif /* _RTE_ETH_FAILSAFE_PRIVATE_H_ */