[dpdk-stable] patch 'net/mlx5: fix secondary process resources release' has been queued to stable release 19.11.4

luca.boccassi at gmail.com luca.boccassi at gmail.com
Fri Jul 24 13:57:32 CEST 2020


Hi,

FYI, your patch has been queued to stable release 19.11.4

Note it hasn't been pushed to http://dpdk.org/browse/dpdk-stable yet.
It will be pushed if I get no objections before 07/26/20. So please
shout if anyone has objections.

Also note that after the patch there's a diff of the upstream commit vs the
patch applied to the branch. This will indicate if there was any rebasing
needed to apply to the stable branch. If there were code changes for rebasing
(ie: not only metadata diffs), please double check that the rebase was
correctly done.

Thanks.

Luca Boccassi

---
>From f32fd4561c6e74e184e3a30d55e7de6581a24a6f Mon Sep 17 00:00:00 2001
From: Suanming Mou <suanmingm at mellanox.com>
Date: Thu, 28 May 2020 14:59:49 +0800
Subject: [PATCH] net/mlx5: fix secondary process resources release

[ upstream commit 2786b7bf9084b32dde9a346d92ab1c27f0ffc476 ]

When secondary process starts, it will allocate its own process private
data, and also does remap to UAR register of the Tx queue. Once the
secondary process exits, these resources should be released accordingly.
And the shared resources owned by primary should not be touched.

Currently, once one port in the secondary process spawn failed, all the
other spawned ports will also be released during process exits. However,
the mlx5_dev_close() function does not add the cases for secondary
process, it means call the mlx5_dev_close() function directly in
secondary process releases the resources it should not touch.

Add the case for secondary process release to its own resources in
mlx5_dev_close() function to help it quits gracefully.

Fixes: 942d13e6e7d1 ("net/mlx5: fix sharing context destroy order")
Fixes: 3a8207423a0f ("net/mlx5: close all ports on remove")

Signed-off-by: Suanming Mou <suanmingm at mellanox.com>
Acked-by: Matan Azrad <matan at mellanox.com>
---
 drivers/net/mlx5/mlx5.c      | 47 +++++++++++++++++++++++++-----------
 drivers/net/mlx5/mlx5_rxtx.h |  1 +
 drivers/net/mlx5/mlx5_txq.c  | 24 ++++++++++++++++++
 3 files changed, 58 insertions(+), 14 deletions(-)

diff --git a/drivers/net/mlx5/mlx5.c b/drivers/net/mlx5/mlx5.c
index 8879df317..1e34f6263 100644
--- a/drivers/net/mlx5/mlx5.c
+++ b/drivers/net/mlx5/mlx5.c
@@ -1234,6 +1234,17 @@ mlx5_dev_close(struct rte_eth_dev *dev)
 	unsigned int i;
 	int ret;
 
+	if (rte_eal_process_type() == RTE_PROC_SECONDARY) {
+		/* Check if process_private released. */
+		if (!dev->process_private)
+			return;
+		mlx5_tx_uar_uninit_secondary(dev);
+		mlx5_proc_priv_uninit(dev);
+		rte_eth_dev_release_port(dev);
+		return;
+	}
+	if (!priv->sh)
+		return;
 	DRV_LOG(DEBUG, "port %u closing device \"%s\"",
 		dev->data->port_id,
 		((priv->sh->ctx != NULL) ? priv->sh->ctx->device->name : ""));
@@ -1282,16 +1293,13 @@ mlx5_dev_close(struct rte_eth_dev *dev)
 		close(priv->nl_socket_rdma);
 	if (priv->vmwa_context)
 		mlx5_vlan_vmwa_exit(priv->vmwa_context);
-	if (priv->sh) {
-		/*
-		 * Free the shared context in last turn, because the cleanup
-		 * routines above may use some shared fields, like
-		 * mlx5_nl_mac_addr_flush() uses ibdev_path for retrieveing
-		 * ifindex if Netlink fails.
-		 */
-		mlx5_free_shared_ibctx(priv->sh);
-		priv->sh = NULL;
-	}
+	/*
+	 * Free the shared context in last turn, because the cleanup
+	 * routines above may use some shared fields, like
+	 * mlx5_nl_mac_addr_flush() uses ibdev_path for retrieveing
+	 * ifindex if Netlink fails.
+	 */
+	mlx5_free_shared_ibctx(priv->sh);
 	ret = mlx5_hrxq_verify(dev);
 	if (ret)
 		DRV_LOG(WARNING, "port %u some hash Rx queue still remain",
@@ -2164,11 +2172,11 @@ mlx5_dev_spawn(struct rte_device *dpdk_dev,
 		/* Receive command fd from primary process */
 		err = mlx5_mp_req_verbs_cmd_fd(eth_dev);
 		if (err < 0)
-			return NULL;
+			goto err_secondary;
 		/* Remap UAR for Tx queues. */
 		err = mlx5_tx_uar_init_secondary(eth_dev, err);
 		if (err)
-			return NULL;
+			goto err_secondary;
 		/*
 		 * Ethdev pointer is still required as input since
 		 * the primary device is not accessible from the
@@ -2177,6 +2185,9 @@ mlx5_dev_spawn(struct rte_device *dpdk_dev,
 		eth_dev->rx_pkt_burst = mlx5_select_rx_function(eth_dev);
 		eth_dev->tx_pkt_burst = mlx5_select_tx_function(eth_dev);
 		return eth_dev;
+err_secondary:
+		mlx5_dev_close(eth_dev);
+		return NULL;
 	}
 	/*
 	 * Some parameters ("tx_db_nc" in particularly) are needed in
@@ -3445,8 +3456,16 @@ mlx5_pci_remove(struct rte_pci_device *pci_dev)
 {
 	uint16_t port_id;
 
-	RTE_ETH_FOREACH_DEV_OF(port_id, &pci_dev->device)
-		rte_eth_dev_close(port_id);
+	RTE_ETH_FOREACH_DEV_OF(port_id, &pci_dev->device) {
+		/*
+		 * mlx5_dev_close() is not registered to secondary process,
+		 * call the close function explicitly for secondary process.
+		 */
+		if (rte_eal_process_type() == RTE_PROC_SECONDARY)
+			mlx5_dev_close(&rte_eth_devices[port_id]);
+		else
+			rte_eth_dev_close(port_id);
+	}
 	return 0;
 }
 
diff --git a/drivers/net/mlx5/mlx5_rxtx.h b/drivers/net/mlx5/mlx5_rxtx.h
index a50f057c1..daa67e2f5 100644
--- a/drivers/net/mlx5/mlx5_rxtx.h
+++ b/drivers/net/mlx5/mlx5_rxtx.h
@@ -429,6 +429,7 @@ int mlx5_tx_hairpin_queue_setup
 	 const struct rte_eth_hairpin_conf *hairpin_conf);
 void mlx5_tx_queue_release(void *dpdk_txq);
 int mlx5_tx_uar_init_secondary(struct rte_eth_dev *dev, int fd);
+void mlx5_tx_uar_uninit_secondary(struct rte_eth_dev *dev);
 struct mlx5_txq_obj *mlx5_txq_obj_new(struct rte_eth_dev *dev, uint16_t idx,
 				      enum mlx5_txq_obj_type type);
 struct mlx5_txq_obj *mlx5_txq_obj_get(struct rte_eth_dev *dev, uint16_t idx);
diff --git a/drivers/net/mlx5/mlx5_txq.c b/drivers/net/mlx5/mlx5_txq.c
index 42dcd7a4b..1947e15f6 100644
--- a/drivers/net/mlx5/mlx5_txq.c
+++ b/drivers/net/mlx5/mlx5_txq.c
@@ -424,6 +424,30 @@ txq_uar_uninit_secondary(struct mlx5_txq_ctrl *txq_ctrl)
 	munmap(RTE_PTR_ALIGN_FLOOR(addr, page_size), page_size);
 }
 
+/**
+ * Deinitialize Tx UAR registers for secondary process.
+ *
+ * @param dev
+ *   Pointer to Ethernet device.
+ */
+void
+mlx5_tx_uar_uninit_secondary(struct rte_eth_dev *dev)
+{
+	struct mlx5_priv *priv = dev->data->dev_private;
+	struct mlx5_txq_data *txq;
+	struct mlx5_txq_ctrl *txq_ctrl;
+	unsigned int i;
+
+	assert(rte_eal_process_type() == RTE_PROC_SECONDARY);
+	for (i = 0; i != priv->txqs_n; ++i) {
+		if (!(*priv->txqs)[i])
+			continue;
+		txq = (*priv->txqs)[i];
+		txq_ctrl = container_of(txq, struct mlx5_txq_ctrl, txq);
+		txq_uar_uninit_secondary(txq_ctrl);
+	}
+}
+
 /**
  * Initialize Tx UAR registers for secondary process.
  *
-- 
2.20.1

---
  Diff of the applied patch vs upstream commit (please double-check if non-empty:
---
--- -	2020-07-24 12:53:49.138701107 +0100
+++ 0014-net-mlx5-fix-secondary-process-resources-release.patch	2020-07-24 12:53:48.175004373 +0100
@@ -1,8 +1,10 @@
-From 2786b7bf9084b32dde9a346d92ab1c27f0ffc476 Mon Sep 17 00:00:00 2001
+From f32fd4561c6e74e184e3a30d55e7de6581a24a6f Mon Sep 17 00:00:00 2001
 From: Suanming Mou <suanmingm at mellanox.com>
 Date: Thu, 28 May 2020 14:59:49 +0800
 Subject: [PATCH] net/mlx5: fix secondary process resources release
 
+[ upstream commit 2786b7bf9084b32dde9a346d92ab1c27f0ffc476 ]
+
 When secondary process starts, it will allocate its own process private
 data, and also does remap to UAR register of the Tx queue. Once the
 secondary process exits, these resources should be released accordingly.
@@ -19,7 +21,6 @@
 
 Fixes: 942d13e6e7d1 ("net/mlx5: fix sharing context destroy order")
 Fixes: 3a8207423a0f ("net/mlx5: close all ports on remove")
-Cc: stable at dpdk.org
 
 Signed-off-by: Suanming Mou <suanmingm at mellanox.com>
 Acked-by: Matan Azrad <matan at mellanox.com>
@@ -30,10 +31,10 @@
  3 files changed, 58 insertions(+), 14 deletions(-)
 
 diff --git a/drivers/net/mlx5/mlx5.c b/drivers/net/mlx5/mlx5.c
-index 5589772eb..81102631a 100644
+index 8879df317..1e34f6263 100644
 --- a/drivers/net/mlx5/mlx5.c
 +++ b/drivers/net/mlx5/mlx5.c
-@@ -1423,6 +1423,17 @@ mlx5_dev_close(struct rte_eth_dev *dev)
+@@ -1234,6 +1234,17 @@ mlx5_dev_close(struct rte_eth_dev *dev)
  	unsigned int i;
  	int ret;
  
@@ -51,10 +52,10 @@
  	DRV_LOG(DEBUG, "port %u closing device \"%s\"",
  		dev->data->port_id,
  		((priv->sh->ctx != NULL) ? priv->sh->ctx->device->name : ""));
-@@ -1512,16 +1523,13 @@ mlx5_dev_close(struct rte_eth_dev *dev)
- 	if (ret)
- 		DRV_LOG(WARNING, "port %u some flows still remain",
- 			dev->data->port_id);
+@@ -1282,16 +1293,13 @@ mlx5_dev_close(struct rte_eth_dev *dev)
+ 		close(priv->nl_socket_rdma);
+ 	if (priv->vmwa_context)
+ 		mlx5_vlan_vmwa_exit(priv->vmwa_context);
 -	if (priv->sh) {
 -		/*
 -		 * Free the shared context in last turn, because the cleanup
@@ -72,12 +73,12 @@
 +	 * ifindex if Netlink fails.
 +	 */
 +	mlx5_free_shared_ibctx(priv->sh);
- 	if (priv->domain_id != RTE_ETH_DEV_SWITCH_DOMAIN_ID_INVALID) {
- 		unsigned int c = 0;
- 		uint16_t port_id;
-@@ -2409,11 +2417,11 @@ mlx5_dev_spawn(struct rte_device *dpdk_dev,
+ 	ret = mlx5_hrxq_verify(dev);
+ 	if (ret)
+ 		DRV_LOG(WARNING, "port %u some hash Rx queue still remain",
+@@ -2164,11 +2172,11 @@ mlx5_dev_spawn(struct rte_device *dpdk_dev,
  		/* Receive command fd from primary process */
- 		err = mlx5_mp_req_verbs_cmd_fd(&mp_id);
+ 		err = mlx5_mp_req_verbs_cmd_fd(eth_dev);
  		if (err < 0)
 -			return NULL;
 +			goto err_secondary;
@@ -89,7 +90,7 @@
  		/*
  		 * Ethdev pointer is still required as input since
  		 * the primary device is not accessible from the
-@@ -2422,6 +2430,9 @@ mlx5_dev_spawn(struct rte_device *dpdk_dev,
+@@ -2177,6 +2185,9 @@ mlx5_dev_spawn(struct rte_device *dpdk_dev,
  		eth_dev->rx_pkt_burst = mlx5_select_rx_function(eth_dev);
  		eth_dev->tx_pkt_burst = mlx5_select_tx_function(eth_dev);
  		return eth_dev;
@@ -99,7 +100,7 @@
  	}
  	/*
  	 * Some parameters ("tx_db_nc" in particularly) are needed in
-@@ -3707,8 +3718,16 @@ mlx5_pci_remove(struct rte_pci_device *pci_dev)
+@@ -3445,8 +3456,16 @@ mlx5_pci_remove(struct rte_pci_device *pci_dev)
  {
  	uint16_t port_id;
  
@@ -119,10 +120,10 @@
  }
  
 diff --git a/drivers/net/mlx5/mlx5_rxtx.h b/drivers/net/mlx5/mlx5_rxtx.h
-index 48f2b7941..26621ff19 100644
+index a50f057c1..daa67e2f5 100644
 --- a/drivers/net/mlx5/mlx5_rxtx.h
 +++ b/drivers/net/mlx5/mlx5_rxtx.h
-@@ -434,6 +434,7 @@ int mlx5_tx_hairpin_queue_setup
+@@ -429,6 +429,7 @@ int mlx5_tx_hairpin_queue_setup
  	 const struct rte_eth_hairpin_conf *hairpin_conf);
  void mlx5_tx_queue_release(void *dpdk_txq);
  int mlx5_tx_uar_init_secondary(struct rte_eth_dev *dev, int fd);
@@ -131,10 +132,10 @@
  				      enum mlx5_txq_obj_type type);
  struct mlx5_txq_obj *mlx5_txq_obj_get(struct rte_eth_dev *dev, uint16_t idx);
 diff --git a/drivers/net/mlx5/mlx5_txq.c b/drivers/net/mlx5/mlx5_txq.c
-index 80d99ff94..2047a9a4e 100644
+index 42dcd7a4b..1947e15f6 100644
 --- a/drivers/net/mlx5/mlx5_txq.c
 +++ b/drivers/net/mlx5/mlx5_txq.c
-@@ -427,6 +427,30 @@ txq_uar_uninit_secondary(struct mlx5_txq_ctrl *txq_ctrl)
+@@ -424,6 +424,30 @@ txq_uar_uninit_secondary(struct mlx5_txq_ctrl *txq_ctrl)
  	munmap(RTE_PTR_ALIGN_FLOOR(addr, page_size), page_size);
  }
  
@@ -152,7 +153,7 @@
 +	struct mlx5_txq_ctrl *txq_ctrl;
 +	unsigned int i;
 +
-+	MLX5_ASSERT(rte_eal_process_type() == RTE_PROC_SECONDARY);
++	assert(rte_eal_process_type() == RTE_PROC_SECONDARY);
 +	for (i = 0; i != priv->txqs_n; ++i) {
 +		if (!(*priv->txqs)[i])
 +			continue;


More information about the stable mailing list