[2/6] net/mlx5: fix PCI probing error flow

Message ID 20210831203732.3411134-3-michaelba@nvidia.com (mailing list archive)
State Superseded, archived
Delegated to: Raslan Darawsheh
Headers
Series mlx5: some independent fixes |

Checks

Context Check Description
ci/checkpatch success coding style OK

Commit Message

Michael Baum Aug. 31, 2021, 8:37 p.m. UTC
  In PCI probing, there is internal probe function that is called several
times for several PFs.

When one of them fails, the previous PFs are not destroyed what may
cause a memory leak.

Destroy them.

Fixes: 08c2772fc747 ("net/mlx5: support list of representor PF")
Cc: stable@dpdk.org

Signed-off-by: Michael Baum <michaelba@nvidia.com>
---
 drivers/net/mlx5/linux/mlx5_os.c | 13 ++++++++++++-
 drivers/net/mlx5/mlx5.c          |  2 +-
 drivers/net/mlx5/mlx5.h          |  1 +
 3 files changed, 14 insertions(+), 2 deletions(-)
  

Comments

Raslan Darawsheh Sept. 5, 2021, 8:53 a.m. UTC | #1
Hi,

> -----Original Message-----
> From: Michael Baum <michaelba@nvidia.com>
> Sent: Tuesday, August 31, 2021 11:37 PM
> To: dev@dpdk.org
> Cc: Matan Azrad <matan@nvidia.com>; Raslan Darawsheh
> <rasland@nvidia.com>; Slava Ovsiienko <viacheslavo@nvidia.com>;
> stable@dpdk.org
> Subject: [PATCH 2/6] net/mlx5: fix PCI probing error flow
> 
How about changing to something like this:
Fix memory leak in PCI prob
> In PCI probing, there is internal probe function that is called several times for
> several PFs.
During PCI probe, the internal probe function is called per PF.
> 
> When one of them fails, the previous PFs are not destroyed what may cause
> a memory leak.
If one of them fails, it was missing a prober destroy for the previously probed PFs
> 
> Destroy them.
This fixes the behavior by destroying all previously probed PF's
> 
> Fixes: 08c2772fc747 ("net/mlx5: support list of representor PF")
> Cc: stable@dpdk.org
> 
> Signed-off-by: Michael Baum <michaelba@nvidia.com>
> ---
>  drivers/net/mlx5/linux/mlx5_os.c | 13 ++++++++++++-
>  drivers/net/mlx5/mlx5.c          |  2 +-
>  drivers/net/mlx5/mlx5.h          |  1 +
>  3 files changed, 14 insertions(+), 2 deletions(-)
> 
> diff --git a/drivers/net/mlx5/linux/mlx5_os.c
> b/drivers/net/mlx5/linux/mlx5_os.c
> index 5f8766aa48..3d204f99f7 100644
> --- a/drivers/net/mlx5/linux/mlx5_os.c
> +++ b/drivers/net/mlx5/linux/mlx5_os.c
> @@ -2700,9 +2700,20 @@ mlx5_os_pci_probe(struct rte_pci_device
> *pci_dev)
> 
>  	if (eth_da.nb_ports > 0) {
>  		/* Iterate all port if devargs pf is range: "pf[0-1]vf[...]". */
> -		for (p = 0; p < eth_da.nb_ports; p++)
> +		for (p = 0; p < eth_da.nb_ports; p++) {
>  			ret = mlx5_os_pci_probe_pf(pci_dev, &eth_da,
>  						   eth_da.ports[p]);
> +			if (ret)
> +				break;
> +		}
> +		if (ret) {
> +			DRV_LOG(ERR, "Probe of PCI device " PCI_PRI_FMT "
> "
> +				"aborted due to proding failure of PF %u",
> +				pci_dev->addr.domain, pci_dev->addr.bus,
> +				pci_dev->addr.devid, pci_dev-
> >addr.function,
> +				eth_da.ports[p]);
> +			mlx5_net_remove(&pci_dev->device);
> +		}
>  	} else {
>  		ret = mlx5_os_pci_probe_pf(pci_dev, &eth_da, 0);
>  	}
> diff --git a/drivers/net/mlx5/mlx5.c b/drivers/net/mlx5/mlx5.c index
> f0ec2d1279..02ea2e781e 100644
> --- a/drivers/net/mlx5/mlx5.c
> +++ b/drivers/net/mlx5/mlx5.c
> @@ -2386,7 +2386,7 @@ mlx5_eth_find_next(uint16_t port_id, struct
> rte_device *odev)
>   * @return
>   *   0 on success, the function cannot fail.
>   */
> -static int
> +int
>  mlx5_net_remove(struct rte_device *dev)  {
>  	uint16_t port_id;
> diff --git a/drivers/net/mlx5/mlx5.h b/drivers/net/mlx5/mlx5.h index
> e02714e231..3581414b78 100644
> --- a/drivers/net/mlx5/mlx5.h
> +++ b/drivers/net/mlx5/mlx5.h
> @@ -1483,6 +1483,7 @@ int mlx5_udp_tunnel_port_add(struct rte_eth_dev
> *dev,
>  			      struct rte_eth_udp_tunnel *udp_tunnel);
> uint16_t mlx5_eth_find_next(uint16_t port_id, struct rte_device *odev);  int
> mlx5_dev_close(struct rte_eth_dev *dev);
> +int mlx5_net_remove(struct rte_device *dev);
>  bool mlx5_is_hpf(struct rte_eth_dev *dev);  bool mlx5_is_sf_repr(struct
> rte_eth_dev *dev);  void mlx5_age_event_prepare(struct
> mlx5_dev_ctx_shared *sh);
> --
> 2.25.1
  

Patch

diff --git a/drivers/net/mlx5/linux/mlx5_os.c b/drivers/net/mlx5/linux/mlx5_os.c
index 5f8766aa48..3d204f99f7 100644
--- a/drivers/net/mlx5/linux/mlx5_os.c
+++ b/drivers/net/mlx5/linux/mlx5_os.c
@@ -2700,9 +2700,20 @@  mlx5_os_pci_probe(struct rte_pci_device *pci_dev)
 
 	if (eth_da.nb_ports > 0) {
 		/* Iterate all port if devargs pf is range: "pf[0-1]vf[...]". */
-		for (p = 0; p < eth_da.nb_ports; p++)
+		for (p = 0; p < eth_da.nb_ports; p++) {
 			ret = mlx5_os_pci_probe_pf(pci_dev, &eth_da,
 						   eth_da.ports[p]);
+			if (ret)
+				break;
+		}
+		if (ret) {
+			DRV_LOG(ERR, "Probe of PCI device " PCI_PRI_FMT " "
+				"aborted due to proding failure of PF %u",
+				pci_dev->addr.domain, pci_dev->addr.bus,
+				pci_dev->addr.devid, pci_dev->addr.function,
+				eth_da.ports[p]);
+			mlx5_net_remove(&pci_dev->device);
+		}
 	} else {
 		ret = mlx5_os_pci_probe_pf(pci_dev, &eth_da, 0);
 	}
diff --git a/drivers/net/mlx5/mlx5.c b/drivers/net/mlx5/mlx5.c
index f0ec2d1279..02ea2e781e 100644
--- a/drivers/net/mlx5/mlx5.c
+++ b/drivers/net/mlx5/mlx5.c
@@ -2386,7 +2386,7 @@  mlx5_eth_find_next(uint16_t port_id, struct rte_device *odev)
  * @return
  *   0 on success, the function cannot fail.
  */
-static int
+int
 mlx5_net_remove(struct rte_device *dev)
 {
 	uint16_t port_id;
diff --git a/drivers/net/mlx5/mlx5.h b/drivers/net/mlx5/mlx5.h
index e02714e231..3581414b78 100644
--- a/drivers/net/mlx5/mlx5.h
+++ b/drivers/net/mlx5/mlx5.h
@@ -1483,6 +1483,7 @@  int mlx5_udp_tunnel_port_add(struct rte_eth_dev *dev,
 			      struct rte_eth_udp_tunnel *udp_tunnel);
 uint16_t mlx5_eth_find_next(uint16_t port_id, struct rte_device *odev);
 int mlx5_dev_close(struct rte_eth_dev *dev);
+int mlx5_net_remove(struct rte_device *dev);
 bool mlx5_is_hpf(struct rte_eth_dev *dev);
 bool mlx5_is_sf_repr(struct rte_eth_dev *dev);
 void mlx5_age_event_prepare(struct mlx5_dev_ctx_shared *sh);