[2/2] app/testpmd: fix hot-unplug detaching

Message ID 20200213155226.1024939-3-thomas@monjalon.net (mailing list archive)
State Accepted, archived
Delegated to: Ferruh Yigit
Headers
Series app/testpmd: fix detach |

Checks

Context Check Description
ci/checkpatch success coding style OK
ci/travis-robot success Travis build: passed
ci/Intel-compilation fail apply issues

Commit Message

Thomas Monjalon Feb. 13, 2020, 3:52 p.m. UTC
  There is a possible race condition in the hotplug path
in rmv_port_callback(). If a port is created between
close_port(port_id) and detach_port_device(port_id),
then the port_id will have been reallocated to a different
device which will be wrongly detached.

Since a check was added in detach_port_device() for
manual detach case, the hotplug path was even more broken.
It became impossible to run because the new check prevented
to run detach_port_device() after the port is closed.

The solution for both issues is to not rely on the port_id
for detaching the rte_device.
The function detach_port_device() is split to allow calling
detach_device() directly with the rte_device pointer, saved
before closing the port.

Fixes: 43d0e304980a ("app/testpmd: fix invalid port detaching")
Cc: stable@dpdk.org

Signed-off-by: Thomas Monjalon <thomas@monjalon.net>
---
 app/test-pmd/testpmd.c | 48 ++++++++++++++++++++++++------------------
 1 file changed, 28 insertions(+), 20 deletions(-)
  

Comments

Matan Azrad Feb. 16, 2020, 8:09 a.m. UTC | #1
Hi Thomas

Thanks for the patches, I Saw it just now.
 please see small comment below:

 From: Thomas Monjalon
> There is a possible race condition in the hotplug path in rmv_port_callback().
> If a port is created between
> close_port(port_id) and detach_port_device(port_id), then the port_id will
> have been reallocated to a different device which will be wrongly detached.
> 
> Since a check was added in detach_port_device() for manual detach case,
> the hotplug path was even more broken.
> It became impossible to run because the new check prevented to run
> detach_port_device() after the port is closed.
> 
> The solution for both issues is to not rely on the port_id for detaching the
> rte_device.
> The function detach_port_device() is split to allow calling
> detach_device() directly with the rte_device pointer, saved before closing
> the port.
> 
> Fixes: 43d0e304980a ("app/testpmd: fix invalid port detaching")

If you fix the race, Don't you think you need to add fixes line for the patch which created the race?

> Cc: stable@dpdk.org
> 
> Signed-off-by: Thomas Monjalon <thomas@monjalon.net>
> ---
>  app/test-pmd/testpmd.c | 48 ++++++++++++++++++++++++------------------
>  1 file changed, 28 insertions(+), 20 deletions(-)
> 
> diff --git a/app/test-pmd/testpmd.c b/app/test-pmd/testpmd.c index
> 11203cb03d..035836adfb 100644
> --- a/app/test-pmd/testpmd.c
> +++ b/app/test-pmd/testpmd.c
> @@ -2633,32 +2633,17 @@ setup_attached_port(portid_t pi)
>  	printf("Done\n");
>  }
> 
> -void
> -detach_port_device(portid_t port_id)
> +static void
> +detach_device(struct rte_device *dev)
>  {
> -	struct rte_device *dev;
>  	portid_t sibling;
> 
> -	printf("Removing a device...\n");
> -
> -	if (port_id_is_invalid(port_id, ENABLED_WARN))
> -		return;
> -
> -	dev = rte_eth_devices[port_id].device;
>  	if (dev == NULL) {
>  		printf("Device already removed\n");
>  		return;
>  	}
> 
> -	if (ports[port_id].port_status != RTE_PORT_CLOSED) {
> -		if (ports[port_id].port_status != RTE_PORT_STOPPED) {
> -			printf("Port not stopped\n");
> -			return;
> -		}
> -		printf("Port was not closed\n");
> -		if (ports[port_id].flow_list)
> -			port_flow_flush(port_id);
> -	}
> +	printf("Removing a device...\n");
> 
>  	if (rte_dev_remove(dev) < 0) {
>  		TESTPMD_LOG(ERR, "Failed to detach device %s\n", dev-
> >name); @@ -2676,13 +2661,31 @@ detach_port_device(portid_t port_id)
> 
>  	remove_invalid_ports();
> 
> -	printf("Device of port %u is detached\n", port_id);
> +	printf("Device is detached\n");
>  	printf("Now total ports is %d\n", nb_ports);
>  	printf("Done\n");
>  	return;
>  }
> 
>  void
> +detach_port_device(portid_t port_id)
> +{
> +	if (port_id_is_invalid(port_id, ENABLED_WARN))
> +		return;
> +
> +	if (ports[port_id].port_status != RTE_PORT_CLOSED) {
> +		if (ports[port_id].port_status != RTE_PORT_STOPPED) {
> +			printf("Port not stopped\n");
> +			return;
> +		}
> +		printf("Port was not closed\n");
> +		if (ports[port_id].flow_list)
> +			port_flow_flush(port_id);
> +	}
> +
> +	detach_device(rte_eth_devices[port_id].device);
> +}
> +
>  void
>  detach_devargs(char *identifier)
>  {
> @@ -2873,6 +2876,7 @@ rmv_port_callback(void *arg)
>  	int need_to_start = 0;
>  	int org_no_link_check = no_link_check;
>  	portid_t port_id = (intptr_t)arg;
> +	struct rte_device *dev;
> 
>  	RTE_ETH_VALID_PORTID_OR_RET(port_id);
> 
> @@ -2883,8 +2887,12 @@ rmv_port_callback(void *arg)
>  	no_link_check = 1;
>  	stop_port(port_id);
>  	no_link_check = org_no_link_check;
> +
> +	/* Save rte_device pointer before closing ethdev port */
> +	dev = rte_eth_devices[port_id].device;
>  	close_port(port_id);
> -	detach_port_device(port_id);
> +	detach_device(dev); /* might be already removed or have more
> ports */
> +
>  	if (need_to_start)
>  		start_packet_forwarding(0);
>  }
> --
> 2.25.0
  
Thomas Monjalon Feb. 16, 2020, 9:47 a.m. UTC | #2
16/02/2020 09:09, Matan Azrad:
> Hi Thomas
> 
> Thanks for the patches, I Saw it just now.
>  please see small comment below:
> 
>  From: Thomas Monjalon
> > There is a possible race condition in the hotplug path in rmv_port_callback().
> > If a port is created between
> > close_port(port_id) and detach_port_device(port_id), then the port_id will
> > have been reallocated to a different device which will be wrongly detached.
> > 
> > Since a check was added in detach_port_device() for manual detach case,
> > the hotplug path was even more broken.
> > It became impossible to run because the new check prevented to run
> > detach_port_device() after the port is closed.
> > 
> > The solution for both issues is to not rely on the port_id for detaching the
> > rte_device.
> > The function detach_port_device() is split to allow calling
> > detach_device() directly with the rte_device pointer, saved before closing
> > the port.
> > 
> > Fixes: 43d0e304980a ("app/testpmd: fix invalid port detaching")
> 
> If you fix the race, Don't you think you need to add fixes line for the patch which created the race?

Yes, you're right, I forgot it.
It is too late now unfortunately, but we may request to backport it
properly in 18.11 as well.

Please Kevin, add this tag so it will be part of 18.11:

Fixes: cbb4c648c5df ("ethdev: use device handle to detach")

> > Cc: stable@dpdk.org
  
Kevin Traynor Feb. 17, 2020, 9:50 a.m. UTC | #3
On 16/02/2020 09:47, Thomas Monjalon wrote:
> 16/02/2020 09:09, Matan Azrad:
>> Hi Thomas
>>
>> Thanks for the patches, I Saw it just now.
>>  please see small comment below:
>>
>>  From: Thomas Monjalon
>>> There is a possible race condition in the hotplug path in rmv_port_callback().
>>> If a port is created between
>>> close_port(port_id) and detach_port_device(port_id), then the port_id will
>>> have been reallocated to a different device which will be wrongly detached.
>>>
>>> Since a check was added in detach_port_device() for manual detach case,
>>> the hotplug path was even more broken.
>>> It became impossible to run because the new check prevented to run
>>> detach_port_device() after the port is closed.
>>>
>>> The solution for both issues is to not rely on the port_id for detaching the
>>> rte_device.
>>> The function detach_port_device() is split to allow calling
>>> detach_device() directly with the rte_device pointer, saved before closing
>>> the port.
>>>
>>> Fixes: 43d0e304980a ("app/testpmd: fix invalid port detaching")
>>
>> If you fix the race, Don't you think you need to add fixes line for the patch which created the race?
> 
> Yes, you're right, I forgot it.
> It is too late now unfortunately, but we may request to backport it
> properly in 18.11 as well.
> 
> Please Kevin, add this tag so it will be part of 18.11:
> 
> Fixes: cbb4c648c5df ("ethdev: use device handle to detach")
> 

Ack. Will adjust the tag when applying, thanks.

>>> Cc: stable@dpdk.org
> 
> 
>
  

Patch

diff --git a/app/test-pmd/testpmd.c b/app/test-pmd/testpmd.c
index 11203cb03d..035836adfb 100644
--- a/app/test-pmd/testpmd.c
+++ b/app/test-pmd/testpmd.c
@@ -2633,32 +2633,17 @@  setup_attached_port(portid_t pi)
 	printf("Done\n");
 }
 
-void
-detach_port_device(portid_t port_id)
+static void
+detach_device(struct rte_device *dev)
 {
-	struct rte_device *dev;
 	portid_t sibling;
 
-	printf("Removing a device...\n");
-
-	if (port_id_is_invalid(port_id, ENABLED_WARN))
-		return;
-
-	dev = rte_eth_devices[port_id].device;
 	if (dev == NULL) {
 		printf("Device already removed\n");
 		return;
 	}
 
-	if (ports[port_id].port_status != RTE_PORT_CLOSED) {
-		if (ports[port_id].port_status != RTE_PORT_STOPPED) {
-			printf("Port not stopped\n");
-			return;
-		}
-		printf("Port was not closed\n");
-		if (ports[port_id].flow_list)
-			port_flow_flush(port_id);
-	}
+	printf("Removing a device...\n");
 
 	if (rte_dev_remove(dev) < 0) {
 		TESTPMD_LOG(ERR, "Failed to detach device %s\n", dev->name);
@@ -2676,13 +2661,31 @@  detach_port_device(portid_t port_id)
 
 	remove_invalid_ports();
 
-	printf("Device of port %u is detached\n", port_id);
+	printf("Device is detached\n");
 	printf("Now total ports is %d\n", nb_ports);
 	printf("Done\n");
 	return;
 }
 
 void
+detach_port_device(portid_t port_id)
+{
+	if (port_id_is_invalid(port_id, ENABLED_WARN))
+		return;
+
+	if (ports[port_id].port_status != RTE_PORT_CLOSED) {
+		if (ports[port_id].port_status != RTE_PORT_STOPPED) {
+			printf("Port not stopped\n");
+			return;
+		}
+		printf("Port was not closed\n");
+		if (ports[port_id].flow_list)
+			port_flow_flush(port_id);
+	}
+
+	detach_device(rte_eth_devices[port_id].device);
+}
+
 void
 detach_devargs(char *identifier)
 {
@@ -2873,6 +2876,7 @@  rmv_port_callback(void *arg)
 	int need_to_start = 0;
 	int org_no_link_check = no_link_check;
 	portid_t port_id = (intptr_t)arg;
+	struct rte_device *dev;
 
 	RTE_ETH_VALID_PORTID_OR_RET(port_id);
 
@@ -2883,8 +2887,12 @@  rmv_port_callback(void *arg)
 	no_link_check = 1;
 	stop_port(port_id);
 	no_link_check = org_no_link_check;
+
+	/* Save rte_device pointer before closing ethdev port */
+	dev = rte_eth_devices[port_id].device;
 	close_port(port_id);
-	detach_port_device(port_id);
+	detach_device(dev); /* might be already removed or have more ports */
+
 	if (need_to_start)
 		start_packet_forwarding(0);
 }