[dpdk-dev,v2,10/11] net/failsafe: fix sub-device ownership race

Message ID 20180509224313.27289-11-thomas@monjalon.net (mailing list archive)
State Superseded, archived
Headers

Checks

Context Check Description
ci/checkpatch warning coding style issues
ci/Intel-compilation success Compilation OK

Commit Message

Thomas Monjalon May 9, 2018, 10:43 p.m. UTC
  From: Matan Azrad <matan@mellanox.com>

There is time between the sub-device port probing by the sub-device PMD
to the sub-device port ownership taking by a fail-safe port.

In this time, the port is available for the application usage. For
example, the port will be exposed to the applications which use
RTE_ETH_FOREACH_DEV iterator.

Thus, ownership unaware applications may manage the port in this time
what may cause a lot of problematic behaviors in the fail-safe
sub-device initialization.

Register to the ethdev NEW event to take the sub-device port ownership
before it becomes exposed to the application.

Fixes: a46f8d584eb8 ("net/failsafe: add fail-safe PMD")
Cc: stable@dpdk.org

Signed-off-by: Matan Azrad <matan@mellanox.com>
Acked-by: Gaetan Rivet <gaetan.rivet@6wind.com>
---
 drivers/net/failsafe/failsafe.c         | 22 ++++++++++---
 drivers/net/failsafe/failsafe_eal.c     | 56 ++++++++++++++++++++++-----------
 drivers/net/failsafe/failsafe_ether.c   | 23 ++++++++++++++
 drivers/net/failsafe/failsafe_private.h |  4 +++
 4 files changed, 82 insertions(+), 23 deletions(-)
  

Comments

Andrew Rybchenko May 10, 2018, 11:15 a.m. UTC | #1
On 05/10/2018 01:43 AM, Thomas Monjalon wrote:
> From: Matan Azrad <matan@mellanox.com>
>
> There is time between the sub-device port probing by the sub-device PMD
> to the sub-device port ownership taking by a fail-safe port.
>
> In this time, the port is available for the application usage. For
> example, the port will be exposed to the applications which use
> RTE_ETH_FOREACH_DEV iterator.
>
> Thus, ownership unaware applications may manage the port in this time
> what may cause a lot of problematic behaviors in the fail-safe
> sub-device initialization.
>
> Register to the ethdev NEW event to take the sub-device port ownership
> before it becomes exposed to the application.
>
> Fixes: a46f8d584eb8 ("net/failsafe: add fail-safe PMD")
> Cc: stable@dpdk.org
>
> Signed-off-by: Matan Azrad <matan@mellanox.com>
> Acked-by: Gaetan Rivet <gaetan.rivet@6wind.com>
> ---
>   drivers/net/failsafe/failsafe.c         | 22 ++++++++++---
>   drivers/net/failsafe/failsafe_eal.c     | 56 ++++++++++++++++++++++-----------
>   drivers/net/failsafe/failsafe_ether.c   | 23 ++++++++++++++
>   drivers/net/failsafe/failsafe_private.h |  4 +++
>   4 files changed, 82 insertions(+), 23 deletions(-)
>
> diff --git a/drivers/net/failsafe/failsafe.c b/drivers/net/failsafe/failsafe.c
> index fc989c4f5..c9d128de3 100644
> --- a/drivers/net/failsafe/failsafe.c
> +++ b/drivers/net/failsafe/failsafe.c
> @@ -204,16 +204,25 @@ fs_eth_dev_create(struct rte_vdev_device *vdev)
>   	}
>   	snprintf(priv->my_owner.name, sizeof(priv->my_owner.name),
>   		 FAILSAFE_OWNER_NAME);
> +	DEBUG("Failsafe port %u owner info: %s_%016"PRIX64, dev->data->port_id,
> +	      priv->my_owner.name, priv->my_owner.id);
> +	ret = rte_eth_dev_callback_register(RTE_ETH_ALL, RTE_ETH_EVENT_NEW,
> +					    failsafe_eth_new_event_callback,
> +					    dev);
> +	if (ret) {
> +		ERROR("Failed to register NEW callback");
> +		goto free_args;
> +	}
>   	ret = failsafe_eal_init(dev);
>   	if (ret)
> -		goto free_args;
> +		goto unregister_new_callback;
>   	ret = fs_mutex_init(priv);
>   	if (ret)
> -		goto free_args;
> +		goto unregister_new_callback;
>   	ret = failsafe_hotplug_alarm_install(dev);
>   	if (ret) {
>   		ERROR("Could not set up plug-in event detection");
> -		goto free_args;
> +		goto unregister_new_callback;
>   	}
>   	mac = &dev->data->mac_addrs[0];
>   	if (mac_from_arg) {
> @@ -226,7 +235,7 @@ fs_eth_dev_create(struct rte_vdev_device *vdev)
>   							       mac);
>   			if (ret) {
>   				ERROR("Failed to set default MAC address");
> -				goto free_args;
> +				goto unregister_new_callback;

It will fail to apply on next-net since there is cancel_alarm there now.
So, this hunk should be simply skipped.

>   			}
>   		}
>   	} else {
> @@ -261,6 +270,9 @@ fs_eth_dev_create(struct rte_vdev_device *vdev)
>   	};
>   	rte_eth_dev_probing_finish(dev);
>   	return 0;
> +unregister_new_callback:
> +	rte_eth_dev_callback_unregister(RTE_ETH_ALL, RTE_ETH_EVENT_NEW,
> +					failsafe_eth_new_event_callback, dev);

There is cancel_alarm here in next-net.

>   free_args:
>   	failsafe_args_free(dev);
>   free_subs:
> @@ -280,6 +292,8 @@ fs_rte_eth_free(const char *name)
>   	dev = rte_eth_dev_allocated(name);
>   	if (dev == NULL)
>   		return -ENODEV;
> +	rte_eth_dev_callback_unregister(RTE_ETH_ALL, RTE_ETH_EVENT_NEW,
> +					failsafe_eth_new_event_callback, dev);
>   	ret = failsafe_eal_uninit(dev);
>   	if (ret)
>   		ERROR("Error while uninitializing sub-EAL");
> diff --git a/drivers/net/failsafe/failsafe_eal.c b/drivers/net/failsafe/failsafe_eal.c
> index ce767703f..5672f3961 100644
> --- a/drivers/net/failsafe/failsafe_eal.c
> +++ b/drivers/net/failsafe/failsafe_eal.c
> @@ -18,8 +18,9 @@ fs_ethdev_portid_get(const char *name, uint16_t *port_id)
>   		return -EINVAL;
>   	}
>   	len = strlen(name);
> -	RTE_ETH_FOREACH_DEV(pid) {
> -		if (!strncmp(name, rte_eth_devices[pid].device->name, len)) {
> +	for (pid = 0; pid < RTE_MAX_ETHPORTS; pid++) {
> +		if (rte_eth_dev_is_valid_port(pid) &&
> +		    !strncmp(name, rte_eth_devices[pid].device->name, len)) {
>   			*port_id = pid;
>   			return 0;
>   		}
> @@ -41,6 +42,8 @@ fs_bus_init(struct rte_eth_dev *dev)
>   			continue;
>   		da = &sdev->devargs;
>   		if (fs_ethdev_portid_get(da->name, &pid) != 0) {
> +			struct rte_eth_dev_owner pid_owner;
> +
>   			ret = rte_eal_hotplug_add(da->bus->name,
>   						  da->name,
>   						  da->args);
> @@ -55,12 +58,26 @@ fs_bus_init(struct rte_eth_dev *dev)
>   				ERROR("sub_device %d init went wrong", i);
>   				return -ENODEV;
>   			}
> +			/*
> +			 * The NEW callback tried to take ownership, check
> +			 * whether it succeed or didn't.
> +			 */
> +			rte_eth_dev_owner_get(pid, &pid_owner);
> +			if (pid_owner.id != PRIV(dev)->my_owner.id) {
> +				INFO("sub_device %d owner(%s_%016"PRIX64") is not my,"
> +				     " owner(%s_%016"PRIX64"), will try again later",

Frankly speaking I don't understand what and why will change later.

> +				     i, pid_owner.name, pid_owner.id,
> +				     PRIV(dev)->my_owner.name,
> +				     PRIV(dev)->my_owner.id);
> +				continue;
> +			}
>   		} else {
> +			/* The sub-device port was found. */
>   			char devstr[DEVARGS_MAXLEN] = "";
>   			struct rte_devargs *probed_da =
>   					rte_eth_devices[pid].device->devargs;
>   
> -			/* Take control of device probed by EAL options. */
> +			/* Take control of probed device. */
>   			free(da->args);
>   			memset(da, 0, sizeof(*da));
>   			if (probed_da != NULL)
> @@ -77,22 +94,23 @@ fs_bus_init(struct rte_eth_dev *dev)
>   			}
>   			INFO("Taking control of a probed sub device"
>   			      " %d named %s", i, da->name);
> -		}
> -		ret = rte_eth_dev_owner_set(pid, &PRIV(dev)->my_owner);
> -		if (ret < 0) {
> -			INFO("sub_device %d owner set failed (%s),"
> -			     " will try again later", i, strerror(-ret));
> -			continue;
> -		} else if (strncmp(rte_eth_devices[pid].device->name, da->name,
> -			   strlen(da->name)) != 0) {
> -			/*
> -			 * The device probably was removed and its port id was
> -			 * reallocated before ownership set.
> -			 */
> -			rte_eth_dev_owner_unset(pid, PRIV(dev)->my_owner.id);
> -			INFO("sub_device %d was probably removed before taking"
> -			     " ownership, will try again later", i);
> -			continue;
> +			ret = rte_eth_dev_owner_set(pid, &PRIV(dev)->my_owner);
> +			if (ret < 0) {
> +				INFO("sub_device %d owner set failed (%s), "
> +				     "will try again later", i, strerror(-ret));
> +				continue;
> +			} else if (strncmp(rte_eth_devices[pid].device->name,
> +				   da->name, strlen(da->name)) != 0) {
> +				/*
> +				 * The device probably was removed and its port
> +				 * id was reallocated before ownership set.
> +				 */
> +				rte_eth_dev_owner_unset(pid,
> +							PRIV(dev)->my_owner.id);
> +				INFO("sub_device %d was removed before taking"
> +				     " ownership, will try again later", i);
> +				continue;
> +			}
>   		}
>   		ETH(sdev) = &rte_eth_devices[pid];
>   		SUB_ID(sdev) = i;
> diff --git a/drivers/net/failsafe/failsafe_ether.c b/drivers/net/failsafe/failsafe_ether.c
> index b414a7884..ebce87841 100644
> --- a/drivers/net/failsafe/failsafe_ether.c
> +++ b/drivers/net/failsafe/failsafe_ether.c
> @@ -463,3 +463,26 @@ failsafe_eth_lsc_event_callback(uint16_t port_id __rte_unused,
>   	else
>   		return 0;
>   }
> +
> +/* Take sub-device ownership before it becomes exposed to the application. */
> +int
> +failsafe_eth_new_event_callback(uint16_t port_id,
> +				enum rte_eth_event_type event __rte_unused,
> +				void *cb_arg, void *out __rte_unused)
> +{
> +	struct rte_eth_dev *fs_dev = cb_arg;
> +	struct sub_device *sdev;
> +	struct rte_eth_dev *dev = &rte_eth_devices[port_id];
> +	size_t len = strlen(dev->device->name);
> +	uint8_t i;
> +
> +	FOREACH_SUBDEV_STATE(sdev, i, fs_dev, DEV_PARSED) {
> +		if (sdev->state >= DEV_PROBED)
> +			continue;
> +		if (strncmp(sdev->devargs.name, dev->device->name, len) != 0)

Shouldn't we ensure that name is exact match instead of checking
sdev->devargs.name prefix only.

> +			continue;
> +		rte_eth_dev_owner_set(port_id, &PRIV(fs_dev)->my_owner);

I think return value should be processed here.
If we really ignore it, it should be explained why.

> +		break;
> +	}
> +	return 0;
> +}
> diff --git a/drivers/net/failsafe/failsafe_private.h b/drivers/net/failsafe/failsafe_private.h
> index b54f8e336..cd8e0b8c9 100644
> --- a/drivers/net/failsafe/failsafe_private.h
> +++ b/drivers/net/failsafe/failsafe_private.h
> @@ -220,6 +220,10 @@ int failsafe_eth_rmv_event_callback(uint16_t port_id,
>   int failsafe_eth_lsc_event_callback(uint16_t port_id,
>   				    enum rte_eth_event_type event,
>   				    void *cb_arg, void *out);
> +int
> +failsafe_eth_new_event_callback(uint16_t port_id,
> +				enum rte_eth_event_type event,
> +				void *cb_arg, void *out);

Return value type and function name should be on the same line in
accordance with style used in this file. Just to be consistent.

[...]
  
Matan Azrad May 10, 2018, 12:03 p.m. UTC | #2
Hi Andrew

> From: Andrew Rybchenko

> On 05/10/2018 01:43 AM, Thomas Monjalon wrote:

> From: Matan Azrad <matan@mellanox.com>

> 

> There is time between the sub-device port probing by the sub-device PMD to

> the sub-device port ownership taking by a fail-safe port.

> 

> In this time, the port is available for the application usage. For example, the

> port will be exposed to the applications which use RTE_ETH_FOREACH_DEV

> iterator.

> 

> Thus, ownership unaware applications may manage the port in this time what

> may cause a lot of problematic behaviors in the fail-safe sub-device

> initialization.

> 

> Register to the ethdev NEW event to take the sub-device port ownership before

> it becomes exposed to the application.

> 

> Fixes: a46f8d584eb8 ("net/failsafe: add fail-safe PMD")

> Cc: stable@dpdk.org

> 

> Signed-off-by: Matan Azrad <matan@mellanox.com>

> Acked-by: Gaetan Rivet <gaetan.rivet@6wind.com>

> ---

>  drivers/net/failsafe/failsafe.c         | 22 ++++++++++---

>  drivers/net/failsafe/failsafe_eal.c     | 56 ++++++++++++++++++++++-----------

>  drivers/net/failsafe/failsafe_ether.c   | 23 ++++++++++++++

>  drivers/net/failsafe/failsafe_private.h |  4 +++

>  4 files changed, 82 insertions(+), 23 deletions(-)

> 

> diff --git a/drivers/net/failsafe/failsafe.c b/drivers/net/failsafe/failsafe.c index

> fc989c4f5..c9d128de3 100644

> --- a/drivers/net/failsafe/failsafe.c

> +++ b/drivers/net/failsafe/failsafe.c

> @@ -204,16 +204,25 @@ fs_eth_dev_create(struct rte_vdev_device *vdev)

>  	}

>  	snprintf(priv->my_owner.name, sizeof(priv->my_owner.name),

>  		 FAILSAFE_OWNER_NAME);

> +	DEBUG("Failsafe port %u owner info: %s_%016"PRIX64, dev->data-

> >port_id,

> +	      priv->my_owner.name, priv->my_owner.id);

> +	ret = rte_eth_dev_callback_register(RTE_ETH_ALL,

> RTE_ETH_EVENT_NEW,

> +					    failsafe_eth_new_event_callback,

> +					    dev);

> +	if (ret) {

> +		ERROR("Failed to register NEW callback");

> +		goto free_args;

> +	}

>  	ret = failsafe_eal_init(dev);

>  	if (ret)

> -		goto free_args;

> +		goto unregister_new_callback;

>  	ret = fs_mutex_init(priv);

>  	if (ret)

> -		goto free_args;

> +		goto unregister_new_callback;

>  	ret = failsafe_hotplug_alarm_install(dev);

>  	if (ret) {

>  		ERROR("Could not set up plug-in event detection");

> -		goto free_args;

> +		goto unregister_new_callback;

>  	}

>  	mac = &dev->data->mac_addrs[0];

>  	if (mac_from_arg) {

> @@ -226,7 +235,7 @@ fs_eth_dev_create(struct rte_vdev_device *vdev)

>  							       mac);

>  			if (ret) {

>  				ERROR("Failed to set default MAC address");

> -				goto free_args;

> +				goto unregister_new_callback;

> 

> It will fail to apply on next-net since there is cancel_alarm there now.

> So, this hunk should be simply skipped.

> 


	Yes, a rebase issue.
> 

>  			}

>  		}

>  	} else {

> @@ -261,6 +270,9 @@ fs_eth_dev_create(struct rte_vdev_device *vdev)

>  	};

>  	rte_eth_dev_probing_finish(dev);

>  	return 0;

> +unregister_new_callback:

> +	rte_eth_dev_callback_unregister(RTE_ETH_ALL,

> RTE_ETH_EVENT_NEW,

> +					failsafe_eth_new_event_callback,

> dev);

> 

> There is cancel_alarm here in next-net.

> 


Yes, a rebase issue.
> 

>  free_args:

>  	failsafe_args_free(dev);

>  free_subs:

> @@ -280,6 +292,8 @@ fs_rte_eth_free(const char *name)

>  	dev = rte_eth_dev_allocated(name);

>  	if (dev == NULL)

>  		return -ENODEV;

> +	rte_eth_dev_callback_unregister(RTE_ETH_ALL,

> RTE_ETH_EVENT_NEW,

> +					failsafe_eth_new_event_callback,

> dev);

>  	ret = failsafe_eal_uninit(dev);

>  	if (ret)

>  		ERROR("Error while uninitializing sub-EAL"); diff --git

> a/drivers/net/failsafe/failsafe_eal.c b/drivers/net/failsafe/failsafe_eal.c

> index ce767703f..5672f3961 100644

> --- a/drivers/net/failsafe/failsafe_eal.c

> +++ b/drivers/net/failsafe/failsafe_eal.c

> @@ -18,8 +18,9 @@ fs_ethdev_portid_get(const char *name, uint16_t

> *port_id)

>  		return -EINVAL;

>  	}

>  	len = strlen(name);

> -	RTE_ETH_FOREACH_DEV(pid) {

> -		if (!strncmp(name, rte_eth_devices[pid].device->name, len)) {

> +	for (pid = 0; pid < RTE_MAX_ETHPORTS; pid++) {

> +		if (rte_eth_dev_is_valid_port(pid) &&

> +		    !strncmp(name, rte_eth_devices[pid].device->name, len)) {

>  			*port_id = pid;

>  			return 0;

>  		}

> @@ -41,6 +42,8 @@ fs_bus_init(struct rte_eth_dev *dev)

>  			continue;

>  		da = &sdev->devargs;

>  		if (fs_ethdev_portid_get(da->name, &pid) != 0) {

> +			struct rte_eth_dev_owner pid_owner;

> +

>  			ret = rte_eal_hotplug_add(da->bus->name,

>  						  da->name,

>  						  da->args);

> @@ -55,12 +58,26 @@ fs_bus_init(struct rte_eth_dev *dev)

>  				ERROR("sub_device %d init went wrong", i);

>  				return -ENODEV;

>  			}

> +			/*

> +			 * The NEW callback tried to take ownership, check

> +			 * whether it succeed or didn't.

> +			 */

> +			rte_eth_dev_owner_get(pid, &pid_owner);

> +			if (pid_owner.id != PRIV(dev)->my_owner.id) {

> +				INFO("sub_device %d

> owner(%s_%016"PRIX64") is not my,"

> +				     " owner(%s_%016"PRIX64"), will try again

> later",

> 

> Frankly speaking I don't understand what and why will change later.


Probably some other dpdk entity took ownership of the device and will release it later,
Fail-safe has alarm mechanism to try the owner taking again and again until success.

> 

> 

> +				     i, pid_owner.name, pid_owner.id,

> +				     PRIV(dev)->my_owner.name,

> +				     PRIV(dev)->my_owner.id);

> +				continue;

> +			}

>  		} else {

> +			/* The sub-device port was found. */

>  			char devstr[DEVARGS_MAXLEN] = "";

>  			struct rte_devargs *probed_da =

>  					rte_eth_devices[pid].device->devargs;

> 

> -			/* Take control of device probed by EAL options. */

> +			/* Take control of probed device. */

>  			free(da->args);

>  			memset(da, 0, sizeof(*da));

>  			if (probed_da != NULL)

> @@ -77,22 +94,23 @@ fs_bus_init(struct rte_eth_dev *dev)

>  			}

>  			INFO("Taking control of a probed sub device"

>  			      " %d named %s", i, da->name);

> -		}

> -		ret = rte_eth_dev_owner_set(pid, &PRIV(dev)->my_owner);

> -		if (ret < 0) {

> -			INFO("sub_device %d owner set failed (%s),"

> -			     " will try again later", i, strerror(-ret));

> -			continue;

> -		} else if (strncmp(rte_eth_devices[pid].device->name, da-

> >name,

> -			   strlen(da->name)) != 0) {

> -			/*

> -			 * The device probably was removed and its port id

> was

> -			 * reallocated before ownership set.

> -			 */

> -			rte_eth_dev_owner_unset(pid, PRIV(dev)-

> >my_owner.id);

> -			INFO("sub_device %d was probably removed before

> taking"

> -			     " ownership, will try again later", i);

> -			continue;

> +			ret = rte_eth_dev_owner_set(pid, &PRIV(dev)-

> >my_owner);

> +			if (ret < 0) {

> +				INFO("sub_device %d owner set failed (%s), "

> +				     "will try again later", i, strerror(-ret));

> +				continue;

> +			} else if (strncmp(rte_eth_devices[pid].device->name,

> +				   da->name, strlen(da->name)) != 0) {

> +				/*

> +				 * The device probably was removed and its

> port

> +				 * id was reallocated before ownership set.

> +				 */

> +				rte_eth_dev_owner_unset(pid,

> +							PRIV(dev)-

> >my_owner.id);

> +				INFO("sub_device %d was removed before

> taking"

> +				     " ownership, will try again later", i);

> +				continue;

> +			}

>  		}

>  		ETH(sdev) = &rte_eth_devices[pid];

>  		SUB_ID(sdev) = i;

> diff --git a/drivers/net/failsafe/failsafe_ether.c

> b/drivers/net/failsafe/failsafe_ether.c

> index b414a7884..ebce87841 100644

> --- a/drivers/net/failsafe/failsafe_ether.c

> +++ b/drivers/net/failsafe/failsafe_ether.c

> @@ -463,3 +463,26 @@ failsafe_eth_lsc_event_callback(uint16_t port_id

> __rte_unused,

>  	else

>  		return 0;

>  }

> +

> +/* Take sub-device ownership before it becomes exposed to the

> +application. */ int failsafe_eth_new_event_callback(uint16_t port_id,

> +				enum rte_eth_event_type event __rte_unused,

> +				void *cb_arg, void *out __rte_unused) {

> +	struct rte_eth_dev *fs_dev = cb_arg;

> +	struct sub_device *sdev;

> +	struct rte_eth_dev *dev = &rte_eth_devices[port_id];

> +	size_t len = strlen(dev->device->name);

> +	uint8_t i;

> +

> +	FOREACH_SUBDEV_STATE(sdev, i, fs_dev, DEV_PARSED) {

> +		if (sdev->state >= DEV_PROBED)

> +			continue;

> +		if (strncmp(sdev->devargs.name, dev->device->name, len) != 0)

> 

> Shouldn't we ensure that name is exact match instead of checking

> sdev->devargs.name prefix only.

> 


Yes you right, no need the len.

> 

> +			continue;

> +		rte_eth_dev_owner_set(port_id, &PRIV(fs_dev)->my_owner);

> 

> I think return value should be processed here.

> If we really ignore it, it should be explained why.


It is actually checked by fs_bus_init, no need it here, I will add a comment.

> 

> +		break;

> +	}

> +	return 0;

> +}

> diff --git a/drivers/net/failsafe/failsafe_private.h

> b/drivers/net/failsafe/failsafe_private.h

> index b54f8e336..cd8e0b8c9 100644

> --- a/drivers/net/failsafe/failsafe_private.h

> +++ b/drivers/net/failsafe/failsafe_private.h

> @@ -220,6 +220,10 @@ int failsafe_eth_rmv_event_callback(uint16_t port_id,

> int failsafe_eth_lsc_event_callback(uint16_t port_id,

>  				    enum rte_eth_event_type event,

>  				    void *cb_arg, void *out);

> +int

> +failsafe_eth_new_event_callback(uint16_t port_id,

> +				enum rte_eth_event_type event,

> +				void *cb_arg, void *out);

> 

> Return value type and function name should be on the same line in accordance

> with style used in this file. Just to be consistent.


Yes, You right.

Thanks for the good review, will update it.

> 

> [...]
  

Patch

diff --git a/drivers/net/failsafe/failsafe.c b/drivers/net/failsafe/failsafe.c
index fc989c4f5..c9d128de3 100644
--- a/drivers/net/failsafe/failsafe.c
+++ b/drivers/net/failsafe/failsafe.c
@@ -204,16 +204,25 @@  fs_eth_dev_create(struct rte_vdev_device *vdev)
 	}
 	snprintf(priv->my_owner.name, sizeof(priv->my_owner.name),
 		 FAILSAFE_OWNER_NAME);
+	DEBUG("Failsafe port %u owner info: %s_%016"PRIX64, dev->data->port_id,
+	      priv->my_owner.name, priv->my_owner.id);
+	ret = rte_eth_dev_callback_register(RTE_ETH_ALL, RTE_ETH_EVENT_NEW,
+					    failsafe_eth_new_event_callback,
+					    dev);
+	if (ret) {
+		ERROR("Failed to register NEW callback");
+		goto free_args;
+	}
 	ret = failsafe_eal_init(dev);
 	if (ret)
-		goto free_args;
+		goto unregister_new_callback;
 	ret = fs_mutex_init(priv);
 	if (ret)
-		goto free_args;
+		goto unregister_new_callback;
 	ret = failsafe_hotplug_alarm_install(dev);
 	if (ret) {
 		ERROR("Could not set up plug-in event detection");
-		goto free_args;
+		goto unregister_new_callback;
 	}
 	mac = &dev->data->mac_addrs[0];
 	if (mac_from_arg) {
@@ -226,7 +235,7 @@  fs_eth_dev_create(struct rte_vdev_device *vdev)
 							       mac);
 			if (ret) {
 				ERROR("Failed to set default MAC address");
-				goto free_args;
+				goto unregister_new_callback;
 			}
 		}
 	} else {
@@ -261,6 +270,9 @@  fs_eth_dev_create(struct rte_vdev_device *vdev)
 	};
 	rte_eth_dev_probing_finish(dev);
 	return 0;
+unregister_new_callback:
+	rte_eth_dev_callback_unregister(RTE_ETH_ALL, RTE_ETH_EVENT_NEW,
+					failsafe_eth_new_event_callback, dev);
 free_args:
 	failsafe_args_free(dev);
 free_subs:
@@ -280,6 +292,8 @@  fs_rte_eth_free(const char *name)
 	dev = rte_eth_dev_allocated(name);
 	if (dev == NULL)
 		return -ENODEV;
+	rte_eth_dev_callback_unregister(RTE_ETH_ALL, RTE_ETH_EVENT_NEW,
+					failsafe_eth_new_event_callback, dev);
 	ret = failsafe_eal_uninit(dev);
 	if (ret)
 		ERROR("Error while uninitializing sub-EAL");
diff --git a/drivers/net/failsafe/failsafe_eal.c b/drivers/net/failsafe/failsafe_eal.c
index ce767703f..5672f3961 100644
--- a/drivers/net/failsafe/failsafe_eal.c
+++ b/drivers/net/failsafe/failsafe_eal.c
@@ -18,8 +18,9 @@  fs_ethdev_portid_get(const char *name, uint16_t *port_id)
 		return -EINVAL;
 	}
 	len = strlen(name);
-	RTE_ETH_FOREACH_DEV(pid) {
-		if (!strncmp(name, rte_eth_devices[pid].device->name, len)) {
+	for (pid = 0; pid < RTE_MAX_ETHPORTS; pid++) {
+		if (rte_eth_dev_is_valid_port(pid) &&
+		    !strncmp(name, rte_eth_devices[pid].device->name, len)) {
 			*port_id = pid;
 			return 0;
 		}
@@ -41,6 +42,8 @@  fs_bus_init(struct rte_eth_dev *dev)
 			continue;
 		da = &sdev->devargs;
 		if (fs_ethdev_portid_get(da->name, &pid) != 0) {
+			struct rte_eth_dev_owner pid_owner;
+
 			ret = rte_eal_hotplug_add(da->bus->name,
 						  da->name,
 						  da->args);
@@ -55,12 +58,26 @@  fs_bus_init(struct rte_eth_dev *dev)
 				ERROR("sub_device %d init went wrong", i);
 				return -ENODEV;
 			}
+			/*
+			 * The NEW callback tried to take ownership, check
+			 * whether it succeed or didn't.
+			 */
+			rte_eth_dev_owner_get(pid, &pid_owner);
+			if (pid_owner.id != PRIV(dev)->my_owner.id) {
+				INFO("sub_device %d owner(%s_%016"PRIX64") is not my,"
+				     " owner(%s_%016"PRIX64"), will try again later",
+				     i, pid_owner.name, pid_owner.id,
+				     PRIV(dev)->my_owner.name,
+				     PRIV(dev)->my_owner.id);
+				continue;
+			}
 		} else {
+			/* The sub-device port was found. */
 			char devstr[DEVARGS_MAXLEN] = "";
 			struct rte_devargs *probed_da =
 					rte_eth_devices[pid].device->devargs;
 
-			/* Take control of device probed by EAL options. */
+			/* Take control of probed device. */
 			free(da->args);
 			memset(da, 0, sizeof(*da));
 			if (probed_da != NULL)
@@ -77,22 +94,23 @@  fs_bus_init(struct rte_eth_dev *dev)
 			}
 			INFO("Taking control of a probed sub device"
 			      " %d named %s", i, da->name);
-		}
-		ret = rte_eth_dev_owner_set(pid, &PRIV(dev)->my_owner);
-		if (ret < 0) {
-			INFO("sub_device %d owner set failed (%s),"
-			     " will try again later", i, strerror(-ret));
-			continue;
-		} else if (strncmp(rte_eth_devices[pid].device->name, da->name,
-			   strlen(da->name)) != 0) {
-			/*
-			 * The device probably was removed and its port id was
-			 * reallocated before ownership set.
-			 */
-			rte_eth_dev_owner_unset(pid, PRIV(dev)->my_owner.id);
-			INFO("sub_device %d was probably removed before taking"
-			     " ownership, will try again later", i);
-			continue;
+			ret = rte_eth_dev_owner_set(pid, &PRIV(dev)->my_owner);
+			if (ret < 0) {
+				INFO("sub_device %d owner set failed (%s), "
+				     "will try again later", i, strerror(-ret));
+				continue;
+			} else if (strncmp(rte_eth_devices[pid].device->name,
+				   da->name, strlen(da->name)) != 0) {
+				/*
+				 * The device probably was removed and its port
+				 * id was reallocated before ownership set.
+				 */
+				rte_eth_dev_owner_unset(pid,
+							PRIV(dev)->my_owner.id);
+				INFO("sub_device %d was removed before taking"
+				     " ownership, will try again later", i);
+				continue;
+			}
 		}
 		ETH(sdev) = &rte_eth_devices[pid];
 		SUB_ID(sdev) = i;
diff --git a/drivers/net/failsafe/failsafe_ether.c b/drivers/net/failsafe/failsafe_ether.c
index b414a7884..ebce87841 100644
--- a/drivers/net/failsafe/failsafe_ether.c
+++ b/drivers/net/failsafe/failsafe_ether.c
@@ -463,3 +463,26 @@  failsafe_eth_lsc_event_callback(uint16_t port_id __rte_unused,
 	else
 		return 0;
 }
+
+/* Take sub-device ownership before it becomes exposed to the application. */
+int
+failsafe_eth_new_event_callback(uint16_t port_id,
+				enum rte_eth_event_type event __rte_unused,
+				void *cb_arg, void *out __rte_unused)
+{
+	struct rte_eth_dev *fs_dev = cb_arg;
+	struct sub_device *sdev;
+	struct rte_eth_dev *dev = &rte_eth_devices[port_id];
+	size_t len = strlen(dev->device->name);
+	uint8_t i;
+
+	FOREACH_SUBDEV_STATE(sdev, i, fs_dev, DEV_PARSED) {
+		if (sdev->state >= DEV_PROBED)
+			continue;
+		if (strncmp(sdev->devargs.name, dev->device->name, len) != 0)
+			continue;
+		rte_eth_dev_owner_set(port_id, &PRIV(fs_dev)->my_owner);
+		break;
+	}
+	return 0;
+}
diff --git a/drivers/net/failsafe/failsafe_private.h b/drivers/net/failsafe/failsafe_private.h
index b54f8e336..cd8e0b8c9 100644
--- a/drivers/net/failsafe/failsafe_private.h
+++ b/drivers/net/failsafe/failsafe_private.h
@@ -220,6 +220,10 @@  int failsafe_eth_rmv_event_callback(uint16_t port_id,
 int failsafe_eth_lsc_event_callback(uint16_t port_id,
 				    enum rte_eth_event_type event,
 				    void *cb_arg, void *out);
+int
+failsafe_eth_new_event_callback(uint16_t port_id,
+				enum rte_eth_event_type event,
+				void *cb_arg, void *out);
 
 /* GLOBALS */