[v3] net/ice: fix ice dcf control thread crash

Message ID 20230315082018.4260-1-mingjinx.ye@intel.com (mailing list archive)
State Superseded, archived
Delegated to: Qi Zhang
Headers
Series [v3] net/ice: fix ice dcf control thread crash |

Checks

Context Check Description
ci/checkpatch success coding style OK
ci/loongarch-compilation success Compilation OK
ci/loongarch-unit-testing success Unit Testing PASS
ci/Intel-compilation success Compilation OK
ci/intel-Testing success Testing PASS
ci/github-robot: build success github build: passed
ci/iol-mellanox-Performance success Performance Testing PASS
ci/iol-broadcom-Performance success Performance Testing PASS
ci/intel-Functional fail Functional issues
ci/iol-broadcom-Functional success Functional Testing PASS
ci/iol-intel-Functional success Functional Testing PASS
ci/iol-intel-Performance success Performance Testing PASS
ci/iol-aarch64-compile-testing success Testing PASS
ci/iol-x86_64-compile-testing success Testing PASS
ci/iol-aarch64-unit-testing success Testing PASS
ci/iol-abi-testing success Testing PASS
ci/iol-testing success Testing PASS
ci/iol-x86_64-unit-testing success Testing PASS

Commit Message

Mingjin Ye March 15, 2023, 8:20 a.m. UTC
  The control thread accesses the hardware resources after the
resources were released, resulting in a segment error.

This commit fixes the issue by waiting for all `ice-reset` threads to
finish before reclaiming resources.

Fixes: b71573ec2fc2 ("net/ice: retry getting VF VSI map after failure")
Fixes: 7564d5509611 ("net/ice: add DCF hardware initialization")
Cc: stable@dpdk.org

Signed-off-by: Ke Zhang <ke1x.zhang@intel.com>
Signed-off-by: Mingjin Ye <mingjinx.ye@intel.com>
---
v2: add pthread_exit() for windows
---
V3: Optimization. It is unsafe for a thread to forcibly exit, which will
cause the spin lock to not be released correctly
---
 drivers/net/ice/ice_dcf.c        | 15 +++++++++++++--
 drivers/net/ice/ice_dcf.h        |  2 ++
 drivers/net/ice/ice_dcf_parent.c |  1 -
 3 files changed, 15 insertions(+), 3 deletions(-)
  

Comments

Qi Zhang March 15, 2023, 1:06 p.m. UTC | #1
> -----Original Message-----
> From: Ye, MingjinX <mingjinx.ye@intel.com>
> Sent: Wednesday, March 15, 2023 4:20 PM
> To: dev@dpdk.org
> Cc: Yang, Qiming <qiming.yang@intel.com>; stable@dpdk.org; Zhou, YidingX
> <yidingx.zhou@intel.com>; Ye, MingjinX <mingjinx.ye@intel.com>; Zhang,
> Ke1X <ke1x.zhang@intel.com>; Zhang, Qi Z <qi.z.zhang@intel.com>
> Subject: [PATCH v3] net/ice: fix ice dcf control thread crash
> 
> The control thread accesses the hardware resources after the resources were
> released, resulting in a segment error.
> 
> This commit fixes the issue by waiting for all `ice-reset` threads to finish
> before reclaiming resources.

Please explain how the patch implemented this, I didn't see the code that waiting for other thread ready, like "pthread_join" call
Please try to add more comments in your code for easy review.

> 
> Fixes: b71573ec2fc2 ("net/ice: retry getting VF VSI map after failure")
> Fixes: 7564d5509611 ("net/ice: add DCF hardware initialization")
> Cc: stable@dpdk.org
> 
> Signed-off-by: Ke Zhang <ke1x.zhang@intel.com>
> Signed-off-by: Mingjin Ye <mingjinx.ye@intel.com>
> ---
> v2: add pthread_exit() for windows
> ---
> V3: Optimization. It is unsafe for a thread to forcibly exit, which will cause
> the spin lock to not be released correctly
> ---
>  drivers/net/ice/ice_dcf.c        | 15 +++++++++++++--
>  drivers/net/ice/ice_dcf.h        |  2 ++
>  drivers/net/ice/ice_dcf_parent.c |  1 -
>  3 files changed, 15 insertions(+), 3 deletions(-)
> 
> diff --git a/drivers/net/ice/ice_dcf.c b/drivers/net/ice/ice_dcf.c index
> 1c3d22ae0f..b3dea779aa 100644
> --- a/drivers/net/ice/ice_dcf.c
> +++ b/drivers/net/ice/ice_dcf.c
> @@ -543,6 +543,8 @@ ice_dcf_handle_vsi_update_event(struct ice_dcf_hw
> *hw)
>  	ice_dcf_disable_irq0(hw);
> 
>  	for (;;) {
> +		if (hw->vc_event_msg_cb == NULL)
> +			break;
>  		if (ice_dcf_get_vf_resource(hw) == 0 &&
>  		    ice_dcf_get_vf_vsi_map(hw) >= 0) {
>  			err = 0;
> @@ -555,8 +557,10 @@ ice_dcf_handle_vsi_update_event(struct ice_dcf_hw
> *hw)
>  		rte_delay_ms(ICE_DCF_ARQ_CHECK_TIME);
>  	}
> 
> -	rte_intr_enable(pci_dev->intr_handle);
> -	ice_dcf_enable_irq0(hw);
> +	if (hw->vc_event_msg_cb != NULL) {
> +		rte_intr_enable(pci_dev->intr_handle);
> +		ice_dcf_enable_irq0(hw);
> +	}
> 
>  	rte_spinlock_unlock(&hw->vc_cmd_send_lock);
> 
> @@ -749,6 +753,8 @@ ice_dcf_uninit_hw(struct rte_eth_dev *eth_dev,
> struct ice_dcf_hw *hw)
>  	struct rte_pci_device *pci_dev = RTE_ETH_DEV_TO_PCI(eth_dev);
>  	struct rte_intr_handle *intr_handle = pci_dev->intr_handle;
> 
> +	hw->vc_event_msg_cb = NULL;
> +
>  	if (hw->vf_res->vf_cap_flags & VIRTCHNL_VF_OFFLOAD_QOS)
>  		if (hw->tm_conf.committed) {
>  			ice_dcf_clear_bw(hw);
> @@ -760,6 +766,9 @@ ice_dcf_uninit_hw(struct rte_eth_dev *eth_dev,
> struct ice_dcf_hw *hw)
>  	rte_intr_callback_unregister(intr_handle,
>  				     ice_dcf_dev_interrupt_handler, hw);
> 
> +	rte_delay_us(ICE_DCF_VSI_UPDATE_SERVICE_INTERVAL);
> +	rte_spinlock_lock(&hw->vc_cmd_send_lock);
> +
>  	ice_dcf_mode_disable(hw);
>  	iavf_shutdown_adminq(&hw->avf);
> 
> @@ -783,6 +792,8 @@ ice_dcf_uninit_hw(struct rte_eth_dev *eth_dev,
> struct ice_dcf_hw *hw)
> 
>  	rte_free(hw->ets_config);
>  	hw->ets_config = NULL;
> +
> +	rte_spinlock_unlock(&hw->vc_cmd_send_lock);
>  }
> 
>  int
> diff --git a/drivers/net/ice/ice_dcf.h b/drivers/net/ice/ice_dcf.h index
> 7f42ebabe9..f9465f60a6 100644
> --- a/drivers/net/ice/ice_dcf.h
> +++ b/drivers/net/ice/ice_dcf.h
> @@ -15,6 +15,8 @@
>  #include "base/ice_type.h"
>  #include "ice_logs.h"
> 
> +#define ICE_DCF_VSI_UPDATE_SERVICE_INTERVAL	100000 /* us */
> +
>  /* ICE_DCF_DEV_PRIVATE_TO */
>  #define ICE_DCF_DEV_PRIVATE_TO_ADAPTER(adapter) \
>  	((struct ice_dcf_adapter *)adapter)
> diff --git a/drivers/net/ice/ice_dcf_parent.c
> b/drivers/net/ice/ice_dcf_parent.c
> index 01e390ddda..d1b227c431 100644
> --- a/drivers/net/ice/ice_dcf_parent.c
> +++ b/drivers/net/ice/ice_dcf_parent.c
> @@ -12,7 +12,6 @@
>  #include "ice_dcf_ethdev.h"
>  #include "ice_generic_flow.h"
> 
> -#define ICE_DCF_VSI_UPDATE_SERVICE_INTERVAL	100000 /* us */
>  static rte_spinlock_t vsi_update_lock = RTE_SPINLOCK_INITIALIZER;
> 
>  struct ice_dcf_reset_event_param {
> --
> 2.25.1
  

Patch

diff --git a/drivers/net/ice/ice_dcf.c b/drivers/net/ice/ice_dcf.c
index 1c3d22ae0f..b3dea779aa 100644
--- a/drivers/net/ice/ice_dcf.c
+++ b/drivers/net/ice/ice_dcf.c
@@ -543,6 +543,8 @@  ice_dcf_handle_vsi_update_event(struct ice_dcf_hw *hw)
 	ice_dcf_disable_irq0(hw);
 
 	for (;;) {
+		if (hw->vc_event_msg_cb == NULL)
+			break;
 		if (ice_dcf_get_vf_resource(hw) == 0 &&
 		    ice_dcf_get_vf_vsi_map(hw) >= 0) {
 			err = 0;
@@ -555,8 +557,10 @@  ice_dcf_handle_vsi_update_event(struct ice_dcf_hw *hw)
 		rte_delay_ms(ICE_DCF_ARQ_CHECK_TIME);
 	}
 
-	rte_intr_enable(pci_dev->intr_handle);
-	ice_dcf_enable_irq0(hw);
+	if (hw->vc_event_msg_cb != NULL) {
+		rte_intr_enable(pci_dev->intr_handle);
+		ice_dcf_enable_irq0(hw);
+	}
 
 	rte_spinlock_unlock(&hw->vc_cmd_send_lock);
 
@@ -749,6 +753,8 @@  ice_dcf_uninit_hw(struct rte_eth_dev *eth_dev, struct ice_dcf_hw *hw)
 	struct rte_pci_device *pci_dev = RTE_ETH_DEV_TO_PCI(eth_dev);
 	struct rte_intr_handle *intr_handle = pci_dev->intr_handle;
 
+	hw->vc_event_msg_cb = NULL;
+
 	if (hw->vf_res->vf_cap_flags & VIRTCHNL_VF_OFFLOAD_QOS)
 		if (hw->tm_conf.committed) {
 			ice_dcf_clear_bw(hw);
@@ -760,6 +766,9 @@  ice_dcf_uninit_hw(struct rte_eth_dev *eth_dev, struct ice_dcf_hw *hw)
 	rte_intr_callback_unregister(intr_handle,
 				     ice_dcf_dev_interrupt_handler, hw);
 
+	rte_delay_us(ICE_DCF_VSI_UPDATE_SERVICE_INTERVAL);
+	rte_spinlock_lock(&hw->vc_cmd_send_lock);
+
 	ice_dcf_mode_disable(hw);
 	iavf_shutdown_adminq(&hw->avf);
 
@@ -783,6 +792,8 @@  ice_dcf_uninit_hw(struct rte_eth_dev *eth_dev, struct ice_dcf_hw *hw)
 
 	rte_free(hw->ets_config);
 	hw->ets_config = NULL;
+
+	rte_spinlock_unlock(&hw->vc_cmd_send_lock);
 }
 
 int
diff --git a/drivers/net/ice/ice_dcf.h b/drivers/net/ice/ice_dcf.h
index 7f42ebabe9..f9465f60a6 100644
--- a/drivers/net/ice/ice_dcf.h
+++ b/drivers/net/ice/ice_dcf.h
@@ -15,6 +15,8 @@ 
 #include "base/ice_type.h"
 #include "ice_logs.h"
 
+#define ICE_DCF_VSI_UPDATE_SERVICE_INTERVAL	100000 /* us */
+
 /* ICE_DCF_DEV_PRIVATE_TO */
 #define ICE_DCF_DEV_PRIVATE_TO_ADAPTER(adapter) \
 	((struct ice_dcf_adapter *)adapter)
diff --git a/drivers/net/ice/ice_dcf_parent.c b/drivers/net/ice/ice_dcf_parent.c
index 01e390ddda..d1b227c431 100644
--- a/drivers/net/ice/ice_dcf_parent.c
+++ b/drivers/net/ice/ice_dcf_parent.c
@@ -12,7 +12,6 @@ 
 #include "ice_dcf_ethdev.h"
 #include "ice_generic_flow.h"
 
-#define ICE_DCF_VSI_UPDATE_SERVICE_INTERVAL	100000 /* us */
 static rte_spinlock_t vsi_update_lock = RTE_SPINLOCK_INITIALIZER;
 
 struct ice_dcf_reset_event_param {