[v3] net/ice: fix ice dcf control thread crash
Checks
Commit Message
The control thread accesses the hardware resources after the
resources were released, resulting in a segment error.
This commit fixes the issue by waiting for all `ice-reset` threads to
finish before reclaiming resources.
Fixes: b71573ec2fc2 ("net/ice: retry getting VF VSI map after failure")
Fixes: 7564d5509611 ("net/ice: add DCF hardware initialization")
Cc: stable@dpdk.org
Signed-off-by: Ke Zhang <ke1x.zhang@intel.com>
Signed-off-by: Mingjin Ye <mingjinx.ye@intel.com>
---
v2: add pthread_exit() for windows
---
V3: Optimization. It is unsafe for a thread to forcibly exit, which will
cause the spin lock to not be released correctly
---
drivers/net/ice/ice_dcf.c | 15 +++++++++++++--
drivers/net/ice/ice_dcf.h | 2 ++
drivers/net/ice/ice_dcf_parent.c | 1 -
3 files changed, 15 insertions(+), 3 deletions(-)
Comments
> -----Original Message-----
> From: Ye, MingjinX <mingjinx.ye@intel.com>
> Sent: Wednesday, March 15, 2023 4:20 PM
> To: dev@dpdk.org
> Cc: Yang, Qiming <qiming.yang@intel.com>; stable@dpdk.org; Zhou, YidingX
> <yidingx.zhou@intel.com>; Ye, MingjinX <mingjinx.ye@intel.com>; Zhang,
> Ke1X <ke1x.zhang@intel.com>; Zhang, Qi Z <qi.z.zhang@intel.com>
> Subject: [PATCH v3] net/ice: fix ice dcf control thread crash
>
> The control thread accesses the hardware resources after the resources were
> released, resulting in a segment error.
>
> This commit fixes the issue by waiting for all `ice-reset` threads to finish
> before reclaiming resources.
Please explain how the patch implemented this, I didn't see the code that waiting for other thread ready, like "pthread_join" call
Please try to add more comments in your code for easy review.
>
> Fixes: b71573ec2fc2 ("net/ice: retry getting VF VSI map after failure")
> Fixes: 7564d5509611 ("net/ice: add DCF hardware initialization")
> Cc: stable@dpdk.org
>
> Signed-off-by: Ke Zhang <ke1x.zhang@intel.com>
> Signed-off-by: Mingjin Ye <mingjinx.ye@intel.com>
> ---
> v2: add pthread_exit() for windows
> ---
> V3: Optimization. It is unsafe for a thread to forcibly exit, which will cause
> the spin lock to not be released correctly
> ---
> drivers/net/ice/ice_dcf.c | 15 +++++++++++++--
> drivers/net/ice/ice_dcf.h | 2 ++
> drivers/net/ice/ice_dcf_parent.c | 1 -
> 3 files changed, 15 insertions(+), 3 deletions(-)
>
> diff --git a/drivers/net/ice/ice_dcf.c b/drivers/net/ice/ice_dcf.c index
> 1c3d22ae0f..b3dea779aa 100644
> --- a/drivers/net/ice/ice_dcf.c
> +++ b/drivers/net/ice/ice_dcf.c
> @@ -543,6 +543,8 @@ ice_dcf_handle_vsi_update_event(struct ice_dcf_hw
> *hw)
> ice_dcf_disable_irq0(hw);
>
> for (;;) {
> + if (hw->vc_event_msg_cb == NULL)
> + break;
> if (ice_dcf_get_vf_resource(hw) == 0 &&
> ice_dcf_get_vf_vsi_map(hw) >= 0) {
> err = 0;
> @@ -555,8 +557,10 @@ ice_dcf_handle_vsi_update_event(struct ice_dcf_hw
> *hw)
> rte_delay_ms(ICE_DCF_ARQ_CHECK_TIME);
> }
>
> - rte_intr_enable(pci_dev->intr_handle);
> - ice_dcf_enable_irq0(hw);
> + if (hw->vc_event_msg_cb != NULL) {
> + rte_intr_enable(pci_dev->intr_handle);
> + ice_dcf_enable_irq0(hw);
> + }
>
> rte_spinlock_unlock(&hw->vc_cmd_send_lock);
>
> @@ -749,6 +753,8 @@ ice_dcf_uninit_hw(struct rte_eth_dev *eth_dev,
> struct ice_dcf_hw *hw)
> struct rte_pci_device *pci_dev = RTE_ETH_DEV_TO_PCI(eth_dev);
> struct rte_intr_handle *intr_handle = pci_dev->intr_handle;
>
> + hw->vc_event_msg_cb = NULL;
> +
> if (hw->vf_res->vf_cap_flags & VIRTCHNL_VF_OFFLOAD_QOS)
> if (hw->tm_conf.committed) {
> ice_dcf_clear_bw(hw);
> @@ -760,6 +766,9 @@ ice_dcf_uninit_hw(struct rte_eth_dev *eth_dev,
> struct ice_dcf_hw *hw)
> rte_intr_callback_unregister(intr_handle,
> ice_dcf_dev_interrupt_handler, hw);
>
> + rte_delay_us(ICE_DCF_VSI_UPDATE_SERVICE_INTERVAL);
> + rte_spinlock_lock(&hw->vc_cmd_send_lock);
> +
> ice_dcf_mode_disable(hw);
> iavf_shutdown_adminq(&hw->avf);
>
> @@ -783,6 +792,8 @@ ice_dcf_uninit_hw(struct rte_eth_dev *eth_dev,
> struct ice_dcf_hw *hw)
>
> rte_free(hw->ets_config);
> hw->ets_config = NULL;
> +
> + rte_spinlock_unlock(&hw->vc_cmd_send_lock);
> }
>
> int
> diff --git a/drivers/net/ice/ice_dcf.h b/drivers/net/ice/ice_dcf.h index
> 7f42ebabe9..f9465f60a6 100644
> --- a/drivers/net/ice/ice_dcf.h
> +++ b/drivers/net/ice/ice_dcf.h
> @@ -15,6 +15,8 @@
> #include "base/ice_type.h"
> #include "ice_logs.h"
>
> +#define ICE_DCF_VSI_UPDATE_SERVICE_INTERVAL 100000 /* us */
> +
> /* ICE_DCF_DEV_PRIVATE_TO */
> #define ICE_DCF_DEV_PRIVATE_TO_ADAPTER(adapter) \
> ((struct ice_dcf_adapter *)adapter)
> diff --git a/drivers/net/ice/ice_dcf_parent.c
> b/drivers/net/ice/ice_dcf_parent.c
> index 01e390ddda..d1b227c431 100644
> --- a/drivers/net/ice/ice_dcf_parent.c
> +++ b/drivers/net/ice/ice_dcf_parent.c
> @@ -12,7 +12,6 @@
> #include "ice_dcf_ethdev.h"
> #include "ice_generic_flow.h"
>
> -#define ICE_DCF_VSI_UPDATE_SERVICE_INTERVAL 100000 /* us */
> static rte_spinlock_t vsi_update_lock = RTE_SPINLOCK_INITIALIZER;
>
> struct ice_dcf_reset_event_param {
> --
> 2.25.1
@@ -543,6 +543,8 @@ ice_dcf_handle_vsi_update_event(struct ice_dcf_hw *hw)
ice_dcf_disable_irq0(hw);
for (;;) {
+ if (hw->vc_event_msg_cb == NULL)
+ break;
if (ice_dcf_get_vf_resource(hw) == 0 &&
ice_dcf_get_vf_vsi_map(hw) >= 0) {
err = 0;
@@ -555,8 +557,10 @@ ice_dcf_handle_vsi_update_event(struct ice_dcf_hw *hw)
rte_delay_ms(ICE_DCF_ARQ_CHECK_TIME);
}
- rte_intr_enable(pci_dev->intr_handle);
- ice_dcf_enable_irq0(hw);
+ if (hw->vc_event_msg_cb != NULL) {
+ rte_intr_enable(pci_dev->intr_handle);
+ ice_dcf_enable_irq0(hw);
+ }
rte_spinlock_unlock(&hw->vc_cmd_send_lock);
@@ -749,6 +753,8 @@ ice_dcf_uninit_hw(struct rte_eth_dev *eth_dev, struct ice_dcf_hw *hw)
struct rte_pci_device *pci_dev = RTE_ETH_DEV_TO_PCI(eth_dev);
struct rte_intr_handle *intr_handle = pci_dev->intr_handle;
+ hw->vc_event_msg_cb = NULL;
+
if (hw->vf_res->vf_cap_flags & VIRTCHNL_VF_OFFLOAD_QOS)
if (hw->tm_conf.committed) {
ice_dcf_clear_bw(hw);
@@ -760,6 +766,9 @@ ice_dcf_uninit_hw(struct rte_eth_dev *eth_dev, struct ice_dcf_hw *hw)
rte_intr_callback_unregister(intr_handle,
ice_dcf_dev_interrupt_handler, hw);
+ rte_delay_us(ICE_DCF_VSI_UPDATE_SERVICE_INTERVAL);
+ rte_spinlock_lock(&hw->vc_cmd_send_lock);
+
ice_dcf_mode_disable(hw);
iavf_shutdown_adminq(&hw->avf);
@@ -783,6 +792,8 @@ ice_dcf_uninit_hw(struct rte_eth_dev *eth_dev, struct ice_dcf_hw *hw)
rte_free(hw->ets_config);
hw->ets_config = NULL;
+
+ rte_spinlock_unlock(&hw->vc_cmd_send_lock);
}
int
@@ -15,6 +15,8 @@
#include "base/ice_type.h"
#include "ice_logs.h"
+#define ICE_DCF_VSI_UPDATE_SERVICE_INTERVAL 100000 /* us */
+
/* ICE_DCF_DEV_PRIVATE_TO */
#define ICE_DCF_DEV_PRIVATE_TO_ADAPTER(adapter) \
((struct ice_dcf_adapter *)adapter)
@@ -12,7 +12,6 @@
#include "ice_dcf_ethdev.h"
#include "ice_generic_flow.h"
-#define ICE_DCF_VSI_UPDATE_SERVICE_INTERVAL 100000 /* us */
static rte_spinlock_t vsi_update_lock = RTE_SPINLOCK_INITIALIZER;
struct ice_dcf_reset_event_param {