[dpdk-dev,INTERNAL,REVIEW,1/7] net/qede/base: fix recovery from previous ungraceful exit

Message ID 1500891015-21625-1-git-send-email-rasesh.mody@cavium.com (mailing list archive)
State Superseded, archived
Headers

Checks

Context Check Description
ci/checkpatch success coding style OK
ci/Intel-compilation success Compilation OK

Commit Message

Mody, Rasesh July 24, 2017, 10:10 a.m. UTC
  This patch modifies the recovery flow to allow ongoing PCIe
transactions to be completed. To achieve this, the load sequence is
changed such that the "final_cleanup" notification is sent while the
FID_enable is cleared.
This change ensures that the chip cleanup actions takes place from
previous driver instance if needed.

Fixes: ec94dbc57362 ("qede: add base driver")

Signed-off-by: Rasesh Mody <rasesh.mody@cavium.com>
---
 drivers/net/qede/base/ecore_dev.c     |  121 +++++++++++++++------------
 drivers/net/qede/base/ecore_dev_api.h |   12 +++
 drivers/net/qede/base/ecore_int.c     |  144 ++++++++++++++++-----------------
 drivers/net/qede/base/ecore_int.h     |    3 +
 drivers/net/qede/base/ecore_mcp.c     |   45 +++++++++++
 drivers/net/qede/base/ecore_mcp.h     |   11 +++
 drivers/net/qede/base/ecore_mcp_api.h |   11 +++
 7 files changed, 224 insertions(+), 123 deletions(-)
  

Comments

Mody, Rasesh July 24, 2017, 10:18 a.m. UTC | #1
Please ignore this patch-set with 'INTERNAL REVIEW' subject prefix. Sending a follow up with proper subject prefix.

Thanks!
-Rasesh

> -----Original Message-----
> From: Rasesh Mody [mailto:rasesh.mody@cavium.com]
> Sent: Monday, July 24, 2017 3:10 AM
> To: dev@dpdk.org
> Cc: Mody, Rasesh <Rasesh.Mody@cavium.com>; Dept-Eng DPDK Dev <Dept-
> EngDPDKDev@cavium.com>
> Subject: [INTERNAL REVIEW 1/7] net/qede/base: fix recovery from previous
> ungraceful exit
> 
> This patch modifies the recovery flow to allow ongoing PCIe transactions to
> be completed. To achieve this, the load sequence is changed such that the
> "final_cleanup" notification is sent while the FID_enable is cleared.
> This change ensures that the chip cleanup actions takes place from previous
> driver instance if needed.
> 
> Fixes: ec94dbc57362 ("qede: add base driver")
> 
> Signed-off-by: Rasesh Mody <rasesh.mody@cavium.com>
> ---
>  drivers/net/qede/base/ecore_dev.c     |  121 +++++++++++++++------------
>  drivers/net/qede/base/ecore_dev_api.h |   12 +++
>  drivers/net/qede/base/ecore_int.c     |  144 ++++++++++++++++--------------
> ---
>  drivers/net/qede/base/ecore_int.h     |    3 +
>  drivers/net/qede/base/ecore_mcp.c     |   45 +++++++++++
>  drivers/net/qede/base/ecore_mcp.h     |   11 +++
>  drivers/net/qede/base/ecore_mcp_api.h |   11 +++
>  7 files changed, 224 insertions(+), 123 deletions(-)
> 
> diff --git a/drivers/net/qede/base/ecore_dev.c
> b/drivers/net/qede/base/ecore_dev.c
> index 4cfa668..65b89b8 100644
> --- a/drivers/net/qede/base/ecore_dev.c
> +++ b/drivers/net/qede/base/ecore_dev.c
> @@ -1080,7 +1080,7 @@ enum _ecore_status_t ecore_final_cleanup(struct
> ecore_hwfn *p_hwfn,
>  	}
> 
>  	DP_VERBOSE(p_hwfn, ECORE_MSG_IOV,
> -		   "Sending final cleanup for PFVF[%d] [Command %08x\n]",
> +		   "Sending final cleanup for PFVF[%d] [Command %08x]\n",
>  		   id, command);
> 
>  	ecore_wr(p_hwfn, p_ptt, XSDM_REG_OPERATION_GEN,
> command); @@ -1776,13 +1776,6 @@ static enum _ecore_status_t
> ecore_hw_init_port(struct ecore_hwfn *p_hwfn,
>  	/* perform debug configuration when chip is out of reset */
>  	OSAL_BEFORE_PF_START((void *)p_hwfn->p_dev, p_hwfn-
> >my_id);
> 
> -	/* Cleanup chip from previous driver if such remains exist */
> -	rc = ecore_final_cleanup(p_hwfn, p_ptt, rel_pf_id, false);
> -	if (rc != ECORE_SUCCESS) {
> -		ecore_hw_err_notify(p_hwfn,
> ECORE_HW_ERR_RAMROD_FAIL);
> -		return rc;
> -	}
> -
>  	/* PF Init sequence */
>  	rc = ecore_init_run(p_hwfn, p_ptt, PHASE_PF, rel_pf_id, hw_mode);
>  	if (rc)
> @@ -1866,17 +1859,17 @@ static enum _ecore_status_t
> ecore_hw_init_port(struct ecore_hwfn *p_hwfn,
>  	return rc;
>  }
> 
> -static enum _ecore_status_t
> -ecore_change_pci_hwfn(struct ecore_hwfn *p_hwfn,
> -		      struct ecore_ptt *p_ptt, u8 enable)
> +enum _ecore_status_t ecore_pglueb_set_pfid_enable(struct ecore_hwfn
> *p_hwfn,
> +						  struct ecore_ptt *p_ptt,
> +						  bool b_enable)
>  {
> -	u32 delay_idx = 0, val, set_val = enable ? 1 : 0;
> +	u32 delay_idx = 0, val, set_val = b_enable ? 1 : 0;
> 
> -	/* Change PF in PXP */
> +	/* Configure the PF's internal FID_enable for master transactions */
>  	ecore_wr(p_hwfn, p_ptt,
>  		 PGLUE_B_REG_INTERNAL_PFID_ENABLE_MASTER, set_val);
> 
> -	/* wait until value is set - try for 1 second every 50us */
> +	/* Wait until value is set - try for 1 second every 50us */
>  	for (delay_idx = 0; delay_idx < 20000; delay_idx++) {
>  		val = ecore_rd(p_hwfn, p_ptt,
> 
> PGLUE_B_REG_INTERNAL_PFID_ENABLE_MASTER);
> @@ -1918,14 +1911,21 @@ enum _ecore_status_t ecore_vf_start(struct
> ecore_hwfn *p_hwfn,
>  	return ECORE_SUCCESS;
>  }
> 
> +static void ecore_pglueb_clear_err(struct ecore_hwfn *p_hwfn,
> +				     struct ecore_ptt *p_ptt)
> +{
> +	ecore_wr(p_hwfn, p_ptt,
> PGLUE_B_REG_WAS_ERROR_PF_31_0_CLR,
> +		 1 << p_hwfn->abs_pf_id);
> +}
> +
>  enum _ecore_status_t ecore_hw_init(struct ecore_dev *p_dev,
>  				   struct ecore_hw_init_params *p_params)  {
>  	struct ecore_load_req_params load_req_params;
> -	u32 load_code, param, drv_mb_param;
> +	u32 load_code, resp, param, drv_mb_param;
>  	bool b_default_mtu = true;
>  	struct ecore_hwfn *p_hwfn;
> -	enum _ecore_status_t rc = ECORE_SUCCESS, mfw_rc;
> +	enum _ecore_status_t rc = ECORE_SUCCESS;
>  	int i;
> 
>  	if ((p_params->int_mode == ECORE_INT_MODE_MSI) && @@ -
> 1942,7 +1942,7 @@ enum _ecore_status_t ecore_hw_init(struct ecore_dev
> *p_dev,
>  	}
> 
>  	for_each_hwfn(p_dev, i) {
> -		struct ecore_hwfn *p_hwfn = &p_dev->hwfns[i];
> +		p_hwfn = &p_dev->hwfns[i];
> 
>  		/* If management didn't provide a default, set one of our
> own */
>  		if (!p_hwfn->hw_info.mtu) {
> @@ -1955,11 +1955,6 @@ enum _ecore_status_t ecore_hw_init(struct
> ecore_dev *p_dev,
>  			continue;
>  		}
> 
> -		/* Enable DMAE in PXP */
> -		rc = ecore_change_pci_hwfn(p_hwfn, p_hwfn->p_main_ptt,
> true);
> -		if (rc != ECORE_SUCCESS)
> -			return rc;
> -
>  		rc = ecore_calc_hw_mode(p_hwfn);
>  		if (rc != ECORE_SUCCESS)
>  			return rc;
> @@ -2009,6 +2004,30 @@ enum _ecore_status_t ecore_hw_init(struct
> ecore_dev *p_dev,
>  			qm_lock_init = true;
>  		}
> 
> +		/* Clean up chip from previous driver if such remains exist.
> +		 * This is not needed when the PF is the first one on the
> +		 * engine, since afterwards we are going to init the FW.
> +		 */
> +		if (load_code != FW_MSG_CODE_DRV_LOAD_ENGINE) {
> +			rc = ecore_final_cleanup(p_hwfn, p_hwfn-
> >p_main_ptt,
> +						 p_hwfn->rel_pf_id, false);
> +			if (rc != ECORE_SUCCESS) {
> +				ecore_hw_err_notify(p_hwfn,
> +
> ECORE_HW_ERR_RAMROD_FAIL);
> +				goto load_err;
> +			}
> +		}
> +
> +		/* Log and clean previous pglue_b errors if such exist */
> +		ecore_pglueb_rbc_attn_handler(p_hwfn, p_hwfn-
> >p_main_ptt);
> +		ecore_pglueb_clear_err(p_hwfn, p_hwfn->p_main_ptt);
> +
> +		/* Enable the PF's internal FID_enable in the PXP */
> +		rc = ecore_pglueb_set_pfid_enable(p_hwfn, p_hwfn-
> >p_main_ptt,
> +						  true);
> +		if (rc != ECORE_SUCCESS)
> +			goto load_err;
> +
>  		switch (load_code) {
>  		case FW_MSG_CODE_DRV_LOAD_ENGINE:
>  			rc = ecore_hw_init_common(p_hwfn, p_hwfn-
> >p_main_ptt, @@ -2037,35 +2056,28 @@ enum _ecore_status_t
> ecore_hw_init(struct ecore_dev *p_dev,
>  			break;
>  		}
> 
> -		if (rc != ECORE_SUCCESS)
> +		if (rc != ECORE_SUCCESS) {
>  			DP_NOTICE(p_hwfn, true,
>  				  "init phase failed for loadcode 0x%x (rc
> %d)\n",
>  				  load_code, rc);
> +			goto load_err;
> +		}
> 
> -		/* ACK mfw regardless of success or failure of initialization */
> -		mfw_rc = ecore_mcp_cmd(p_hwfn, p_hwfn->p_main_ptt,
> -				       DRV_MSG_CODE_LOAD_DONE,
> -				       0, &load_code, &param);
> +		rc = ecore_mcp_load_done(p_hwfn, p_hwfn->p_main_ptt);
>  		if (rc != ECORE_SUCCESS)
>  			return rc;
> 
> -		if (mfw_rc != ECORE_SUCCESS) {
> -			DP_NOTICE(p_hwfn, true,
> -				  "Failed sending a LOAD_DONE
> command\n");
> -			return mfw_rc;
> -		}
> -
>  		/* send DCBX attention request command */
>  		DP_VERBOSE(p_hwfn, ECORE_MSG_DCB,
>  			   "sending phony dcbx set command to trigger DCBx
> attention handling\n");
> -		mfw_rc = ecore_mcp_cmd(p_hwfn, p_hwfn->p_main_ptt,
> -				       DRV_MSG_CODE_SET_DCBX,
> -				       1 <<
> DRV_MB_PARAM_DCBX_NOTIFY_SHIFT,
> -				       &load_code, &param);
> -		if (mfw_rc != ECORE_SUCCESS) {
> +		rc = ecore_mcp_cmd(p_hwfn, p_hwfn->p_main_ptt,
> +				   DRV_MSG_CODE_SET_DCBX,
> +				   1 <<
> DRV_MB_PARAM_DCBX_NOTIFY_SHIFT, &resp,
> +				   &param);
> +		if (rc != ECORE_SUCCESS) {
>  			DP_NOTICE(p_hwfn, true,
>  				  "Failed to send DCBX attention request\n");
> -			return mfw_rc;
> +			return rc;
>  		}
> 
>  		p_hwfn->hw_init_done = true;
> @@ -2076,7 +2088,7 @@ enum _ecore_status_t ecore_hw_init(struct
> ecore_dev *p_dev,
>  		drv_mb_param = STORM_FW_VERSION;
>  		rc = ecore_mcp_cmd(p_hwfn, p_hwfn->p_main_ptt,
> 
> DRV_MSG_CODE_OV_UPDATE_STORM_FW_VER,
> -				   drv_mb_param, &load_code, &param);
> +				   drv_mb_param, &resp, &param);
>  		if (rc != ECORE_SUCCESS)
>  			DP_INFO(p_hwfn, "Failed to update firmware
> version\n");
> 
> @@ -2094,6 +2106,14 @@ enum _ecore_status_t ecore_hw_init(struct
> ecore_dev *p_dev,
>  	}
> 
>  	return rc;
> +
> +load_err:
> +	/* The MFW load lock should be released regardless of success or
> failure
> +	 * of initialization.
> +	 * TODO: replace this with an attempt to send cancel_load.
> +	 */
> +	ecore_mcp_load_done(p_hwfn, p_hwfn->p_main_ptt);
> +	return rc;
>  }
> 
>  #define ECORE_HW_STOP_RETRY_LIMIT	(10)
> @@ -2261,18 +2281,20 @@ enum _ecore_status_t ecore_hw_stop(struct
> ecore_dev *p_dev)
>  		}
>  	} /* hwfn loop */
> 
> -	if (IS_PF(p_dev)) {
> +	if (IS_PF(p_dev) && !p_dev->recov_in_prog) {
>  		p_hwfn = ECORE_LEADING_HWFN(p_dev);
>  		p_ptt = ECORE_LEADING_HWFN(p_dev)->p_main_ptt;
> 
> -		/* Disable DMAE in PXP - in CMT, this should only be done for
> -		 * first hw-function, and only after all transactions have
> -		 * stopped for all active hw-functions.
> -		 */
> -		rc = ecore_change_pci_hwfn(p_hwfn, p_ptt, false);
> +		 /* Clear the PF's internal FID_enable in the PXP.
> +		  * In CMT this should only be done for first hw-function, and
> +		  * only after all transactions have stopped for all active
> +		  * hw-functions.
> +		  */
> +		rc = ecore_pglueb_set_pfid_enable(p_hwfn, p_hwfn-
> >p_main_ptt,
> +						  false);
>  		if (rc != ECORE_SUCCESS) {
>  			DP_NOTICE(p_hwfn, true,
> -				  "ecore_change_pci_hwfn failed. rc =
> %d.\n",
> +				  "ecore_pglueb_set_pfid_enable() failed. rc
> = %d.\n",
>  				  rc);
>  			rc2 = ECORE_UNKNOWN_ERROR;
>  		}
> @@ -2370,9 +2392,8 @@ static void ecore_hw_hwfn_prepare(struct
> ecore_hwfn *p_hwfn)
>  			 PGLUE_B_REG_PGL_ADDR_94_F0_BB, 0);
>  	}
> 
> -	/* Clean Previous errors if such exist */
> -	ecore_wr(p_hwfn, p_hwfn->p_main_ptt,
> -		 PGLUE_B_REG_WAS_ERROR_PF_31_0_CLR, 1 << p_hwfn-
> >abs_pf_id);
> +	/* Clean previous pglue_b errors if such exist */
> +	ecore_pglueb_clear_err(p_hwfn, p_hwfn->p_main_ptt);
> 
>  	/* enable internal target-read */
>  	ecore_wr(p_hwfn, p_hwfn->p_main_ptt,
> diff --git a/drivers/net/qede/base/ecore_dev_api.h
> b/drivers/net/qede/base/ecore_dev_api.h
> index 886407b..eea22e0 100644
> --- a/drivers/net/qede/base/ecore_dev_api.h
> +++ b/drivers/net/qede/base/ecore_dev_api.h
> @@ -584,4 +584,16 @@ enum _ecore_status_t
> ecore_set_queue_coalesce(struct ecore_hwfn *p_hwfn, u16 rx_coal,
>  			 u16 tx_coal, void *p_handle);
> 
> +/**
> + * @brief ecore_pglueb_set_pfid_enable - Enable or disable PCI BUS
> +MASTER
> + *
> + * @param p_hwfn
> + * @param p_ptt
> + * @param b_enable - true/false
> + *
> + * @return enum _ecore_status_t
> + */
> +enum _ecore_status_t ecore_pglueb_set_pfid_enable(struct ecore_hwfn
> *p_hwfn,
> +						  struct ecore_ptt *p_ptt,
> +						  bool b_enable);
>  #endif
> diff --git a/drivers/net/qede/base/ecore_int.c
> b/drivers/net/qede/base/ecore_int.c
> index 2afca29..b57c510 100644
> --- a/drivers/net/qede/base/ecore_int.c
> +++ b/drivers/net/qede/base/ecore_int.c
> @@ -284,122 +284,119 @@ static enum _ecore_status_t
> ecore_grc_attn_cb(struct ecore_hwfn *p_hwfn)  #define
> ECORE_PGLUE_ATTENTION_ICPL_VALID (1 << 23)  #define
> ECORE_PGLUE_ATTENTION_ZLR_VALID (1 << 25)  #define
> ECORE_PGLUE_ATTENTION_ILT_VALID (1 << 23) -static enum
> _ecore_status_t ecore_pglub_rbc_attn_cb(struct ecore_hwfn *p_hwfn)
> +
> +enum _ecore_status_t ecore_pglueb_rbc_attn_handler(struct ecore_hwfn
> *p_hwfn,
> +						   struct ecore_ptt *p_ptt)
>  {
>  	u32 tmp;
> 
> -	tmp = ecore_rd(p_hwfn, p_hwfn->p_dpc_ptt,
> -		       PGLUE_B_REG_TX_ERR_WR_DETAILS2);
> +	tmp = ecore_rd(p_hwfn, p_ptt,
> PGLUE_B_REG_TX_ERR_WR_DETAILS2);
>  	if (tmp & ECORE_PGLUE_ATTENTION_VALID) {
>  		u32 addr_lo, addr_hi, details;
> 
> -		addr_lo = ecore_rd(p_hwfn, p_hwfn->p_dpc_ptt,
> +		addr_lo = ecore_rd(p_hwfn, p_ptt,
>  				   PGLUE_B_REG_TX_ERR_WR_ADD_31_0);
> -		addr_hi = ecore_rd(p_hwfn, p_hwfn->p_dpc_ptt,
> +		addr_hi = ecore_rd(p_hwfn, p_ptt,
>  				   PGLUE_B_REG_TX_ERR_WR_ADD_63_32);
> -		details = ecore_rd(p_hwfn, p_hwfn->p_dpc_ptt,
> +		details = ecore_rd(p_hwfn, p_ptt,
>  				   PGLUE_B_REG_TX_ERR_WR_DETAILS);
> 
> -		DP_INFO(p_hwfn,
> -			"Illegal write by chip to [%08x:%08x] blocked."
> -			"Details: %08x [PFID %02x, VFID %02x, VF_VALID
> %02x]"
> -			" Details2 %08x [Was_error %02x BME deassert
> %02x"
> -			" FID_enable deassert %02x]\n",
> -			addr_hi, addr_lo, details,
> -			(u8)((details &
> -
> ECORE_PGLUE_ATTENTION_DETAILS_PFID_MASK) >>
> -
> ECORE_PGLUE_ATTENTION_DETAILS_PFID_SHIFT),
> -			(u8)((details &
> -
> ECORE_PGLUE_ATTENTION_DETAILS_VFID_MASK) >>
> -
> ECORE_PGLUE_ATTENTION_DETAILS_VFID_SHIFT),
> -			(u8)((details &
> ECORE_PGLUE_ATTENTION_DETAILS_VF_VALID)
> -			     ? 1 : 0), tmp,
> -			(u8)((tmp &
> ECORE_PGLUE_ATTENTION_DETAILS2_WAS_ERR) ? 1
> -			     : 0),
> -			(u8)((tmp &
> ECORE_PGLUE_ATTENTION_DETAILS2_BME) ? 1 :
> -			     0),
> -			(u8)((tmp &
> ECORE_PGLUE_ATTENTION_DETAILS2_FID_EN) ? 1
> -			     : 0));
> +		DP_NOTICE(p_hwfn, false,
> +			  "Illegal write by chip to [%08x:%08x] blocked.
> Details: %08x [PFID %02x, VFID %02x, VF_VALID %02x] Details2 %08x
> [Was_error %02x BME deassert %02x FID_enable deassert %02x]\n",
> +			  addr_hi, addr_lo, details,
> +			  (u8)((details &
> +
> 	ECORE_PGLUE_ATTENTION_DETAILS_PFID_MASK) >>
> +
> ECORE_PGLUE_ATTENTION_DETAILS_PFID_SHIFT),
> +			  (u8)((details &
> +
> 	ECORE_PGLUE_ATTENTION_DETAILS_VFID_MASK) >>
> +
> ECORE_PGLUE_ATTENTION_DETAILS_VFID_SHIFT),
> +			  (u8)((details &
> +			       ECORE_PGLUE_ATTENTION_DETAILS_VF_VALID)
> ? 1 : 0),
> +			  tmp,
> +			  (u8)((tmp &
> ECORE_PGLUE_ATTENTION_DETAILS2_WAS_ERR) ?
> +				1 : 0),
> +			  (u8)((tmp &
> ECORE_PGLUE_ATTENTION_DETAILS2_BME) ?
> +				1 : 0),
> +			  (u8)((tmp &
> ECORE_PGLUE_ATTENTION_DETAILS2_FID_EN) ?
> +				1 : 0));
>  	}
> 
> -	tmp = ecore_rd(p_hwfn, p_hwfn->p_dpc_ptt,
> -		       PGLUE_B_REG_TX_ERR_RD_DETAILS2);
> +	tmp = ecore_rd(p_hwfn, p_ptt,
> PGLUE_B_REG_TX_ERR_RD_DETAILS2);
>  	if (tmp & ECORE_PGLUE_ATTENTION_RD_VALID) {
>  		u32 addr_lo, addr_hi, details;
> 
> -		addr_lo = ecore_rd(p_hwfn, p_hwfn->p_dpc_ptt,
> +		addr_lo = ecore_rd(p_hwfn, p_ptt,
>  				   PGLUE_B_REG_TX_ERR_RD_ADD_31_0);
> -		addr_hi = ecore_rd(p_hwfn, p_hwfn->p_dpc_ptt,
> +		addr_hi = ecore_rd(p_hwfn, p_ptt,
>  				   PGLUE_B_REG_TX_ERR_RD_ADD_63_32);
> -		details = ecore_rd(p_hwfn, p_hwfn->p_dpc_ptt,
> +		details = ecore_rd(p_hwfn, p_ptt,
>  				   PGLUE_B_REG_TX_ERR_RD_DETAILS);
> 
> -		DP_INFO(p_hwfn,
> -			"Illegal read by chip from [%08x:%08x] blocked."
> -			" Details: %08x [PFID %02x, VFID %02x, VF_VALID
> %02x]"
> -			" Details2 %08x [Was_error %02x BME deassert
> %02x"
> -			" FID_enable deassert %02x]\n",
> -			addr_hi, addr_lo, details,
> -			(u8)((details &
> -
> ECORE_PGLUE_ATTENTION_DETAILS_PFID_MASK) >>
> -
> ECORE_PGLUE_ATTENTION_DETAILS_PFID_SHIFT),
> -			(u8)((details &
> -
> ECORE_PGLUE_ATTENTION_DETAILS_VFID_MASK) >>
> -
> ECORE_PGLUE_ATTENTION_DETAILS_VFID_SHIFT),
> -			(u8)((details &
> ECORE_PGLUE_ATTENTION_DETAILS_VF_VALID)
> -			     ? 1 : 0), tmp,
> -			(u8)((tmp &
> ECORE_PGLUE_ATTENTION_DETAILS2_WAS_ERR) ? 1
> -			     : 0),
> -			(u8)((tmp &
> ECORE_PGLUE_ATTENTION_DETAILS2_BME) ? 1 :
> -			     0),
> -			(u8)((tmp &
> ECORE_PGLUE_ATTENTION_DETAILS2_FID_EN) ? 1
> -			     : 0));
> +		DP_NOTICE(p_hwfn, false,
> +			  "Illegal read by chip from [%08x:%08x] blocked.
> Details: %08x [PFID %02x, VFID %02x, VF_VALID %02x] Details2 %08x
> [Was_error %02x BME deassert %02x FID_enable deassert %02x]\n",
> +			  addr_hi, addr_lo, details,
> +			  (u8)((details &
> +
> 	ECORE_PGLUE_ATTENTION_DETAILS_PFID_MASK) >>
> +
> ECORE_PGLUE_ATTENTION_DETAILS_PFID_SHIFT),
> +			  (u8)((details &
> +
> 	ECORE_PGLUE_ATTENTION_DETAILS_VFID_MASK) >>
> +
> ECORE_PGLUE_ATTENTION_DETAILS_VFID_SHIFT),
> +			  (u8)((details &
> +			       ECORE_PGLUE_ATTENTION_DETAILS_VF_VALID)
> ? 1 : 0),
> +			  tmp,
> +			  (u8)((tmp &
> ECORE_PGLUE_ATTENTION_DETAILS2_WAS_ERR) ?
> +				1 : 0),
> +			  (u8)((tmp &
> ECORE_PGLUE_ATTENTION_DETAILS2_BME) ?
> +				1 : 0),
> +			  (u8)((tmp &
> ECORE_PGLUE_ATTENTION_DETAILS2_FID_EN) ?
> +				1 : 0));
>  	}
> 
> -	tmp = ecore_rd(p_hwfn, p_hwfn->p_dpc_ptt,
> -		       PGLUE_B_REG_TX_ERR_WR_DETAILS_ICPL);
> +	tmp = ecore_rd(p_hwfn, p_ptt,
> PGLUE_B_REG_TX_ERR_WR_DETAILS_ICPL);
>  	if (tmp & ECORE_PGLUE_ATTENTION_ICPL_VALID)
> -		DP_INFO(p_hwfn, "ICPL error - %08x\n", tmp);
> +		DP_NOTICE(p_hwfn, false, "ICPL erorr - %08x\n", tmp);
> 
> -	tmp = ecore_rd(p_hwfn, p_hwfn->p_dpc_ptt,
> -		       PGLUE_B_REG_MASTER_ZLR_ERR_DETAILS);
> +	tmp = ecore_rd(p_hwfn, p_ptt,
> PGLUE_B_REG_MASTER_ZLR_ERR_DETAILS);
>  	if (tmp & ECORE_PGLUE_ATTENTION_ZLR_VALID) {
>  		u32 addr_hi, addr_lo;
> 
> -		addr_lo = ecore_rd(p_hwfn, p_hwfn->p_dpc_ptt,
> +		addr_lo = ecore_rd(p_hwfn, p_ptt,
> 
> PGLUE_B_REG_MASTER_ZLR_ERR_ADD_31_0);
> -		addr_hi = ecore_rd(p_hwfn, p_hwfn->p_dpc_ptt,
> +		addr_hi = ecore_rd(p_hwfn, p_ptt,
> 
> PGLUE_B_REG_MASTER_ZLR_ERR_ADD_63_32);
> 
> -		DP_INFO(p_hwfn, "ICPL error - %08x [Address
> %08x:%08x]\n",
> -			tmp, addr_hi, addr_lo);
> +		DP_NOTICE(p_hwfn, false,
> +			  "ICPL erorr - %08x [Address %08x:%08x]\n",
> +			  tmp, addr_hi, addr_lo);
>  	}
> 
> -	tmp = ecore_rd(p_hwfn, p_hwfn->p_dpc_ptt,
> -		       PGLUE_B_REG_VF_ILT_ERR_DETAILS2);
> +	tmp = ecore_rd(p_hwfn, p_ptt,
> PGLUE_B_REG_VF_ILT_ERR_DETAILS2);
>  	if (tmp & ECORE_PGLUE_ATTENTION_ILT_VALID) {
>  		u32 addr_hi, addr_lo, details;
> 
> -		addr_lo = ecore_rd(p_hwfn, p_hwfn->p_dpc_ptt,
> +		addr_lo = ecore_rd(p_hwfn, p_ptt,
>  				   PGLUE_B_REG_VF_ILT_ERR_ADD_31_0);
> -		addr_hi = ecore_rd(p_hwfn, p_hwfn->p_dpc_ptt,
> +		addr_hi = ecore_rd(p_hwfn, p_ptt,
>  				   PGLUE_B_REG_VF_ILT_ERR_ADD_63_32);
> -		details = ecore_rd(p_hwfn, p_hwfn->p_dpc_ptt,
> +		details = ecore_rd(p_hwfn, p_ptt,
>  				   PGLUE_B_REG_VF_ILT_ERR_DETAILS);
> 
> -		DP_INFO(p_hwfn,
> -			"ILT error - Details %08x Details2 %08x"
> -			" [Address %08x:%08x]\n",
> -			details, tmp, addr_hi, addr_lo);
> +		DP_NOTICE(p_hwfn, false,
> +			  "ILT error - Details %08x Details2 %08x [Address
> %08x:%08x]\n",
> +			  details, tmp, addr_hi, addr_lo);
>  	}
> 
>  	/* Clear the indications */
> -	ecore_wr(p_hwfn, p_hwfn->p_dpc_ptt,
> -		 PGLUE_B_REG_LATCHED_ERRORS_CLR, (1 << 2));
> +	ecore_wr(p_hwfn, p_ptt, PGLUE_B_REG_LATCHED_ERRORS_CLR, (1
> << 2));
> 
>  	return ECORE_SUCCESS;
>  }
> 
> +static enum _ecore_status_t ecore_pglueb_rbc_attn_cb(struct
> ecore_hwfn
> +*p_hwfn) {
> +	return ecore_pglueb_rbc_attn_handler(p_hwfn, p_hwfn-
> >p_dpc_ptt); }
> +
>  static enum _ecore_status_t ecore_fw_assertion(struct ecore_hwfn
> *p_hwfn)  {
>  	DP_NOTICE(p_hwfn, false, "FW assertion!\n"); @@ -505,7 +502,7
> @@ enum aeu_invert_reg_special_type {
>  	 {			/* After Invert 2 */
>  	  {"PGLUE config_space", ATTENTION_SINGLE, OSAL_NULL,
> MAX_BLOCK_ID},
>  	  {"PGLUE misc_flr", ATTENTION_SINGLE, OSAL_NULL,
> MAX_BLOCK_ID},
> -	  {"PGLUE B RBC", ATTENTION_PAR_INT, ecore_pglub_rbc_attn_cb,
> +	  {"PGLUE B RBC", ATTENTION_PAR_INT, ecore_pglueb_rbc_attn_cb,
>  	   BLOCK_PGLUE_B},
>  	  {"PGLUE misc_mctp", ATTENTION_SINGLE, OSAL_NULL,
> MAX_BLOCK_ID},
>  	  {"Flash event", ATTENTION_SINGLE, OSAL_NULL, MAX_BLOCK_ID},
> @@ -827,8 +824,9 @@ static void ecore_int_attn_print(struct ecore_hwfn
> *p_hwfn,
>  				     ATTN_TYPE_INTERRUPT, !b_fatal);  }
> 
> +	/* @DPDK */
>  	/* Reach assertion if attention is fatal */
> -	if (b_fatal) {
> +	if (b_fatal || (strcmp(p_bit_name, "PGLUE B RBC") == 0)) {
>  		DP_NOTICE(p_hwfn, true, "`%s': Fatal attention\n",
>  			  p_bit_name);
> 
> diff --git a/drivers/net/qede/base/ecore_int.h
> b/drivers/net/qede/base/ecore_int.h
> index 0c8929e..067ed60 100644
> --- a/drivers/net/qede/base/ecore_int.h
> +++ b/drivers/net/qede/base/ecore_int.h
> @@ -208,4 +208,7 @@ enum _ecore_status_t
> ecore_int_set_timer_res(struct ecore_hwfn *p_hwfn,  #define
> ECORE_MAPPING_MEMORY_SIZE(dev) NUM_OF_SBS(dev)  #endif
> 
> +enum _ecore_status_t ecore_pglueb_rbc_attn_handler(struct ecore_hwfn
> *p_hwfn,
> +						   struct ecore_ptt *p_ptt);
> +
>  #endif /* __ECORE_INT_H__ */
> diff --git a/drivers/net/qede/base/ecore_mcp.c
> b/drivers/net/qede/base/ecore_mcp.c
> index 03cc901..88c5ceb 100644
> --- a/drivers/net/qede/base/ecore_mcp.c
> +++ b/drivers/net/qede/base/ecore_mcp.c
> @@ -893,6 +893,30 @@ enum _ecore_status_t ecore_mcp_load_req(struct
> ecore_hwfn *p_hwfn,
>  	return ECORE_SUCCESS;
>  }
> 
> +enum _ecore_status_t ecore_mcp_load_done(struct ecore_hwfn
> *p_hwfn,
> +					 struct ecore_ptt *p_ptt)
> +{
> +	u32 resp = 0, param = 0;
> +	enum _ecore_status_t rc;
> +
> +	rc = ecore_mcp_cmd(p_hwfn, p_ptt,
> DRV_MSG_CODE_LOAD_DONE, 0, &resp,
> +			   &param);
> +	if (rc != ECORE_SUCCESS) {
> +		DP_NOTICE(p_hwfn, false,
> +			  "Failed to send a LOAD_DONE command, rc =
> %d\n", rc);
> +		return rc;
> +	}
> +
> +#define FW_MB_PARAM_LOAD_DONE_DID_EFUSE_ERROR     (1 << 0)
> +
> +	/* Check if there is a DID mismatch between nvm-cfg/efuse */
> +	if (param & FW_MB_PARAM_LOAD_DONE_DID_EFUSE_ERROR)
> +		DP_NOTICE(p_hwfn, false,
> +			  "warning: device configuration is not supported on
> this board
> +type. The device may not function as expected.\n");
> +
> +	return ECORE_SUCCESS;
> +}
> +
>  enum _ecore_status_t ecore_mcp_unload_req(struct ecore_hwfn
> *p_hwfn,
>  					  struct ecore_ptt *p_ptt)
>  {
> @@ -2893,6 +2917,27 @@ struct ecore_resc_alloc_out_params {
>  	u32 flags;
>  };
> 
> +#define ECORE_RECOVERY_PROLOG_SLEEP_MS	100
> +
> +enum _ecore_status_t ecore_recovery_prolog(struct ecore_dev *p_dev) {
> +	struct ecore_hwfn *p_hwfn = ECORE_LEADING_HWFN(p_dev);
> +	struct ecore_ptt *p_ptt = p_hwfn->p_main_ptt;
> +	enum _ecore_status_t rc;
> +
> +	/* Allow ongoing PCIe transactions to complete */
> +	OSAL_MSLEEP(ECORE_RECOVERY_PROLOG_SLEEP_MS);
> +
> +	/* Clear the PF's internal FID_enable in the PXP */
> +	rc = ecore_pglueb_set_pfid_enable(p_hwfn, p_ptt, false);
> +	if (rc != ECORE_SUCCESS)
> +		DP_NOTICE(p_hwfn, false,
> +			  "ecore_pglueb_set_pfid_enable() failed. rc =
> %d.\n",
> +			  rc);
> +
> +	return rc;
> +}
> +
>  static enum _ecore_status_t
>  ecore_mcp_resc_allocation_msg(struct ecore_hwfn *p_hwfn,
>  			      struct ecore_ptt *p_ptt,
> diff --git a/drivers/net/qede/base/ecore_mcp.h
> b/drivers/net/qede/base/ecore_mcp.h
> index 37d1835..77fb5a3 100644
> --- a/drivers/net/qede/base/ecore_mcp.h
> +++ b/drivers/net/qede/base/ecore_mcp.h
> @@ -171,6 +171,17 @@ enum _ecore_status_t ecore_mcp_load_req(struct
> ecore_hwfn *p_hwfn,
>  					struct ecore_load_req_params
> *p_params);
> 
>  /**
> + * @brief Sends a LOAD_DONE message to the MFW
> + *
> + * @param p_hwfn
> + * @param p_ptt
> + *
> + * @return enum _ecore_status_t - ECORE_SUCCESS - Operation was
> successful.
> + */
> +enum _ecore_status_t ecore_mcp_load_done(struct ecore_hwfn
> *p_hwfn,
> +					 struct ecore_ptt *p_ptt);
> +
> +/**
>   * @brief Sends a UNLOAD_REQ message to the MFW
>   *
>   * @param p_hwfn
> diff --git a/drivers/net/qede/base/ecore_mcp_api.h
> b/drivers/net/qede/base/ecore_mcp_api.h
> index 190c135..abc190c 100644
> --- a/drivers/net/qede/base/ecore_mcp_api.h
> +++ b/drivers/net/qede/base/ecore_mcp_api.h
> @@ -736,6 +736,17 @@ enum _ecore_status_t
> ecore_start_recovery_process(struct ecore_hwfn *p_hwfn,
>  						  struct ecore_ptt *p_ptt);
> 
>  /**
> + * @brief A recovery handler must call this function as its first step.
> + *        It is assumed that the handler is not run from an interrupt context.
> + *
> + *  @param p_dev
> + *  @param p_ptt
> + *
> + * @return enum _ecore_status_t
> + */
> +enum _ecore_status_t ecore_recovery_prolog(struct ecore_dev *p_dev);
> +
> +/**
>   * @brief Notify MFW about the change in base device properties
>   *
>   *  @param p_hwfn
> --
> 1.7.10.3
  
Thomas Monjalon July 24, 2017, 3 p.m. UTC | #2
24/07/2017 13:18, Mody, Rasesh:
> Please ignore this patch-set with 'INTERNAL REVIEW' subject prefix. Sending a follow up with proper subject prefix.

Please update patchwork as well.
  

Patch

diff --git a/drivers/net/qede/base/ecore_dev.c b/drivers/net/qede/base/ecore_dev.c
index 4cfa668..65b89b8 100644
--- a/drivers/net/qede/base/ecore_dev.c
+++ b/drivers/net/qede/base/ecore_dev.c
@@ -1080,7 +1080,7 @@  enum _ecore_status_t ecore_final_cleanup(struct ecore_hwfn *p_hwfn,
 	}
 
 	DP_VERBOSE(p_hwfn, ECORE_MSG_IOV,
-		   "Sending final cleanup for PFVF[%d] [Command %08x\n]",
+		   "Sending final cleanup for PFVF[%d] [Command %08x]\n",
 		   id, command);
 
 	ecore_wr(p_hwfn, p_ptt, XSDM_REG_OPERATION_GEN, command);
@@ -1776,13 +1776,6 @@  static enum _ecore_status_t ecore_hw_init_port(struct ecore_hwfn *p_hwfn,
 	/* perform debug configuration when chip is out of reset */
 	OSAL_BEFORE_PF_START((void *)p_hwfn->p_dev, p_hwfn->my_id);
 
-	/* Cleanup chip from previous driver if such remains exist */
-	rc = ecore_final_cleanup(p_hwfn, p_ptt, rel_pf_id, false);
-	if (rc != ECORE_SUCCESS) {
-		ecore_hw_err_notify(p_hwfn, ECORE_HW_ERR_RAMROD_FAIL);
-		return rc;
-	}
-
 	/* PF Init sequence */
 	rc = ecore_init_run(p_hwfn, p_ptt, PHASE_PF, rel_pf_id, hw_mode);
 	if (rc)
@@ -1866,17 +1859,17 @@  static enum _ecore_status_t ecore_hw_init_port(struct ecore_hwfn *p_hwfn,
 	return rc;
 }
 
-static enum _ecore_status_t
-ecore_change_pci_hwfn(struct ecore_hwfn *p_hwfn,
-		      struct ecore_ptt *p_ptt, u8 enable)
+enum _ecore_status_t ecore_pglueb_set_pfid_enable(struct ecore_hwfn *p_hwfn,
+						  struct ecore_ptt *p_ptt,
+						  bool b_enable)
 {
-	u32 delay_idx = 0, val, set_val = enable ? 1 : 0;
+	u32 delay_idx = 0, val, set_val = b_enable ? 1 : 0;
 
-	/* Change PF in PXP */
+	/* Configure the PF's internal FID_enable for master transactions */
 	ecore_wr(p_hwfn, p_ptt,
 		 PGLUE_B_REG_INTERNAL_PFID_ENABLE_MASTER, set_val);
 
-	/* wait until value is set - try for 1 second every 50us */
+	/* Wait until value is set - try for 1 second every 50us */
 	for (delay_idx = 0; delay_idx < 20000; delay_idx++) {
 		val = ecore_rd(p_hwfn, p_ptt,
 			       PGLUE_B_REG_INTERNAL_PFID_ENABLE_MASTER);
@@ -1918,14 +1911,21 @@  enum _ecore_status_t ecore_vf_start(struct ecore_hwfn *p_hwfn,
 	return ECORE_SUCCESS;
 }
 
+static void ecore_pglueb_clear_err(struct ecore_hwfn *p_hwfn,
+				     struct ecore_ptt *p_ptt)
+{
+	ecore_wr(p_hwfn, p_ptt, PGLUE_B_REG_WAS_ERROR_PF_31_0_CLR,
+		 1 << p_hwfn->abs_pf_id);
+}
+
 enum _ecore_status_t ecore_hw_init(struct ecore_dev *p_dev,
 				   struct ecore_hw_init_params *p_params)
 {
 	struct ecore_load_req_params load_req_params;
-	u32 load_code, param, drv_mb_param;
+	u32 load_code, resp, param, drv_mb_param;
 	bool b_default_mtu = true;
 	struct ecore_hwfn *p_hwfn;
-	enum _ecore_status_t rc = ECORE_SUCCESS, mfw_rc;
+	enum _ecore_status_t rc = ECORE_SUCCESS;
 	int i;
 
 	if ((p_params->int_mode == ECORE_INT_MODE_MSI) &&
@@ -1942,7 +1942,7 @@  enum _ecore_status_t ecore_hw_init(struct ecore_dev *p_dev,
 	}
 
 	for_each_hwfn(p_dev, i) {
-		struct ecore_hwfn *p_hwfn = &p_dev->hwfns[i];
+		p_hwfn = &p_dev->hwfns[i];
 
 		/* If management didn't provide a default, set one of our own */
 		if (!p_hwfn->hw_info.mtu) {
@@ -1955,11 +1955,6 @@  enum _ecore_status_t ecore_hw_init(struct ecore_dev *p_dev,
 			continue;
 		}
 
-		/* Enable DMAE in PXP */
-		rc = ecore_change_pci_hwfn(p_hwfn, p_hwfn->p_main_ptt, true);
-		if (rc != ECORE_SUCCESS)
-			return rc;
-
 		rc = ecore_calc_hw_mode(p_hwfn);
 		if (rc != ECORE_SUCCESS)
 			return rc;
@@ -2009,6 +2004,30 @@  enum _ecore_status_t ecore_hw_init(struct ecore_dev *p_dev,
 			qm_lock_init = true;
 		}
 
+		/* Clean up chip from previous driver if such remains exist.
+		 * This is not needed when the PF is the first one on the
+		 * engine, since afterwards we are going to init the FW.
+		 */
+		if (load_code != FW_MSG_CODE_DRV_LOAD_ENGINE) {
+			rc = ecore_final_cleanup(p_hwfn, p_hwfn->p_main_ptt,
+						 p_hwfn->rel_pf_id, false);
+			if (rc != ECORE_SUCCESS) {
+				ecore_hw_err_notify(p_hwfn,
+						    ECORE_HW_ERR_RAMROD_FAIL);
+				goto load_err;
+			}
+		}
+
+		/* Log and clean previous pglue_b errors if such exist */
+		ecore_pglueb_rbc_attn_handler(p_hwfn, p_hwfn->p_main_ptt);
+		ecore_pglueb_clear_err(p_hwfn, p_hwfn->p_main_ptt);
+
+		/* Enable the PF's internal FID_enable in the PXP */
+		rc = ecore_pglueb_set_pfid_enable(p_hwfn, p_hwfn->p_main_ptt,
+						  true);
+		if (rc != ECORE_SUCCESS)
+			goto load_err;
+
 		switch (load_code) {
 		case FW_MSG_CODE_DRV_LOAD_ENGINE:
 			rc = ecore_hw_init_common(p_hwfn, p_hwfn->p_main_ptt,
@@ -2037,35 +2056,28 @@  enum _ecore_status_t ecore_hw_init(struct ecore_dev *p_dev,
 			break;
 		}
 
-		if (rc != ECORE_SUCCESS)
+		if (rc != ECORE_SUCCESS) {
 			DP_NOTICE(p_hwfn, true,
 				  "init phase failed for loadcode 0x%x (rc %d)\n",
 				  load_code, rc);
+			goto load_err;
+		}
 
-		/* ACK mfw regardless of success or failure of initialization */
-		mfw_rc = ecore_mcp_cmd(p_hwfn, p_hwfn->p_main_ptt,
-				       DRV_MSG_CODE_LOAD_DONE,
-				       0, &load_code, &param);
+		rc = ecore_mcp_load_done(p_hwfn, p_hwfn->p_main_ptt);
 		if (rc != ECORE_SUCCESS)
 			return rc;
 
-		if (mfw_rc != ECORE_SUCCESS) {
-			DP_NOTICE(p_hwfn, true,
-				  "Failed sending a LOAD_DONE command\n");
-			return mfw_rc;
-		}
-
 		/* send DCBX attention request command */
 		DP_VERBOSE(p_hwfn, ECORE_MSG_DCB,
 			   "sending phony dcbx set command to trigger DCBx attention handling\n");
-		mfw_rc = ecore_mcp_cmd(p_hwfn, p_hwfn->p_main_ptt,
-				       DRV_MSG_CODE_SET_DCBX,
-				       1 << DRV_MB_PARAM_DCBX_NOTIFY_SHIFT,
-				       &load_code, &param);
-		if (mfw_rc != ECORE_SUCCESS) {
+		rc = ecore_mcp_cmd(p_hwfn, p_hwfn->p_main_ptt,
+				   DRV_MSG_CODE_SET_DCBX,
+				   1 << DRV_MB_PARAM_DCBX_NOTIFY_SHIFT, &resp,
+				   &param);
+		if (rc != ECORE_SUCCESS) {
 			DP_NOTICE(p_hwfn, true,
 				  "Failed to send DCBX attention request\n");
-			return mfw_rc;
+			return rc;
 		}
 
 		p_hwfn->hw_init_done = true;
@@ -2076,7 +2088,7 @@  enum _ecore_status_t ecore_hw_init(struct ecore_dev *p_dev,
 		drv_mb_param = STORM_FW_VERSION;
 		rc = ecore_mcp_cmd(p_hwfn, p_hwfn->p_main_ptt,
 				   DRV_MSG_CODE_OV_UPDATE_STORM_FW_VER,
-				   drv_mb_param, &load_code, &param);
+				   drv_mb_param, &resp, &param);
 		if (rc != ECORE_SUCCESS)
 			DP_INFO(p_hwfn, "Failed to update firmware version\n");
 
@@ -2094,6 +2106,14 @@  enum _ecore_status_t ecore_hw_init(struct ecore_dev *p_dev,
 	}
 
 	return rc;
+
+load_err:
+	/* The MFW load lock should be released regardless of success or failure
+	 * of initialization.
+	 * TODO: replace this with an attempt to send cancel_load.
+	 */
+	ecore_mcp_load_done(p_hwfn, p_hwfn->p_main_ptt);
+	return rc;
 }
 
 #define ECORE_HW_STOP_RETRY_LIMIT	(10)
@@ -2261,18 +2281,20 @@  enum _ecore_status_t ecore_hw_stop(struct ecore_dev *p_dev)
 		}
 	} /* hwfn loop */
 
-	if (IS_PF(p_dev)) {
+	if (IS_PF(p_dev) && !p_dev->recov_in_prog) {
 		p_hwfn = ECORE_LEADING_HWFN(p_dev);
 		p_ptt = ECORE_LEADING_HWFN(p_dev)->p_main_ptt;
 
-		/* Disable DMAE in PXP - in CMT, this should only be done for
-		 * first hw-function, and only after all transactions have
-		 * stopped for all active hw-functions.
-		 */
-		rc = ecore_change_pci_hwfn(p_hwfn, p_ptt, false);
+		 /* Clear the PF's internal FID_enable in the PXP.
+		  * In CMT this should only be done for first hw-function, and
+		  * only after all transactions have stopped for all active
+		  * hw-functions.
+		  */
+		rc = ecore_pglueb_set_pfid_enable(p_hwfn, p_hwfn->p_main_ptt,
+						  false);
 		if (rc != ECORE_SUCCESS) {
 			DP_NOTICE(p_hwfn, true,
-				  "ecore_change_pci_hwfn failed. rc = %d.\n",
+				  "ecore_pglueb_set_pfid_enable() failed. rc = %d.\n",
 				  rc);
 			rc2 = ECORE_UNKNOWN_ERROR;
 		}
@@ -2370,9 +2392,8 @@  static void ecore_hw_hwfn_prepare(struct ecore_hwfn *p_hwfn)
 			 PGLUE_B_REG_PGL_ADDR_94_F0_BB, 0);
 	}
 
-	/* Clean Previous errors if such exist */
-	ecore_wr(p_hwfn, p_hwfn->p_main_ptt,
-		 PGLUE_B_REG_WAS_ERROR_PF_31_0_CLR, 1 << p_hwfn->abs_pf_id);
+	/* Clean previous pglue_b errors if such exist */
+	ecore_pglueb_clear_err(p_hwfn, p_hwfn->p_main_ptt);
 
 	/* enable internal target-read */
 	ecore_wr(p_hwfn, p_hwfn->p_main_ptt,
diff --git a/drivers/net/qede/base/ecore_dev_api.h b/drivers/net/qede/base/ecore_dev_api.h
index 886407b..eea22e0 100644
--- a/drivers/net/qede/base/ecore_dev_api.h
+++ b/drivers/net/qede/base/ecore_dev_api.h
@@ -584,4 +584,16 @@  enum _ecore_status_t
 ecore_set_queue_coalesce(struct ecore_hwfn *p_hwfn, u16 rx_coal,
 			 u16 tx_coal, void *p_handle);
 
+/**
+ * @brief ecore_pglueb_set_pfid_enable - Enable or disable PCI BUS MASTER
+ *
+ * @param p_hwfn
+ * @param p_ptt
+ * @param b_enable - true/false
+ *
+ * @return enum _ecore_status_t
+ */
+enum _ecore_status_t ecore_pglueb_set_pfid_enable(struct ecore_hwfn *p_hwfn,
+						  struct ecore_ptt *p_ptt,
+						  bool b_enable);
 #endif
diff --git a/drivers/net/qede/base/ecore_int.c b/drivers/net/qede/base/ecore_int.c
index 2afca29..b57c510 100644
--- a/drivers/net/qede/base/ecore_int.c
+++ b/drivers/net/qede/base/ecore_int.c
@@ -284,122 +284,119 @@  static enum _ecore_status_t ecore_grc_attn_cb(struct ecore_hwfn *p_hwfn)
 #define ECORE_PGLUE_ATTENTION_ICPL_VALID (1 << 23)
 #define ECORE_PGLUE_ATTENTION_ZLR_VALID (1 << 25)
 #define ECORE_PGLUE_ATTENTION_ILT_VALID (1 << 23)
-static enum _ecore_status_t ecore_pglub_rbc_attn_cb(struct ecore_hwfn *p_hwfn)
+
+enum _ecore_status_t ecore_pglueb_rbc_attn_handler(struct ecore_hwfn *p_hwfn,
+						   struct ecore_ptt *p_ptt)
 {
 	u32 tmp;
 
-	tmp = ecore_rd(p_hwfn, p_hwfn->p_dpc_ptt,
-		       PGLUE_B_REG_TX_ERR_WR_DETAILS2);
+	tmp = ecore_rd(p_hwfn, p_ptt, PGLUE_B_REG_TX_ERR_WR_DETAILS2);
 	if (tmp & ECORE_PGLUE_ATTENTION_VALID) {
 		u32 addr_lo, addr_hi, details;
 
-		addr_lo = ecore_rd(p_hwfn, p_hwfn->p_dpc_ptt,
+		addr_lo = ecore_rd(p_hwfn, p_ptt,
 				   PGLUE_B_REG_TX_ERR_WR_ADD_31_0);
-		addr_hi = ecore_rd(p_hwfn, p_hwfn->p_dpc_ptt,
+		addr_hi = ecore_rd(p_hwfn, p_ptt,
 				   PGLUE_B_REG_TX_ERR_WR_ADD_63_32);
-		details = ecore_rd(p_hwfn, p_hwfn->p_dpc_ptt,
+		details = ecore_rd(p_hwfn, p_ptt,
 				   PGLUE_B_REG_TX_ERR_WR_DETAILS);
 
-		DP_INFO(p_hwfn,
-			"Illegal write by chip to [%08x:%08x] blocked."
-			"Details: %08x [PFID %02x, VFID %02x, VF_VALID %02x]"
-			" Details2 %08x [Was_error %02x BME deassert %02x"
-			" FID_enable deassert %02x]\n",
-			addr_hi, addr_lo, details,
-			(u8)((details &
-			      ECORE_PGLUE_ATTENTION_DETAILS_PFID_MASK) >>
-			     ECORE_PGLUE_ATTENTION_DETAILS_PFID_SHIFT),
-			(u8)((details &
-			      ECORE_PGLUE_ATTENTION_DETAILS_VFID_MASK) >>
-			     ECORE_PGLUE_ATTENTION_DETAILS_VFID_SHIFT),
-			(u8)((details & ECORE_PGLUE_ATTENTION_DETAILS_VF_VALID)
-			     ? 1 : 0), tmp,
-			(u8)((tmp & ECORE_PGLUE_ATTENTION_DETAILS2_WAS_ERR) ? 1
-			     : 0),
-			(u8)((tmp & ECORE_PGLUE_ATTENTION_DETAILS2_BME) ? 1 :
-			     0),
-			(u8)((tmp & ECORE_PGLUE_ATTENTION_DETAILS2_FID_EN) ? 1
-			     : 0));
+		DP_NOTICE(p_hwfn, false,
+			  "Illegal write by chip to [%08x:%08x] blocked. Details: %08x [PFID %02x, VFID %02x, VF_VALID %02x] Details2 %08x [Was_error %02x BME deassert %02x FID_enable deassert %02x]\n",
+			  addr_hi, addr_lo, details,
+			  (u8)((details &
+				ECORE_PGLUE_ATTENTION_DETAILS_PFID_MASK) >>
+			       ECORE_PGLUE_ATTENTION_DETAILS_PFID_SHIFT),
+			  (u8)((details &
+				ECORE_PGLUE_ATTENTION_DETAILS_VFID_MASK) >>
+			       ECORE_PGLUE_ATTENTION_DETAILS_VFID_SHIFT),
+			  (u8)((details &
+			       ECORE_PGLUE_ATTENTION_DETAILS_VF_VALID) ? 1 : 0),
+			  tmp,
+			  (u8)((tmp & ECORE_PGLUE_ATTENTION_DETAILS2_WAS_ERR) ?
+				1 : 0),
+			  (u8)((tmp & ECORE_PGLUE_ATTENTION_DETAILS2_BME) ?
+				1 : 0),
+			  (u8)((tmp & ECORE_PGLUE_ATTENTION_DETAILS2_FID_EN) ?
+				1 : 0));
 	}
 
-	tmp = ecore_rd(p_hwfn, p_hwfn->p_dpc_ptt,
-		       PGLUE_B_REG_TX_ERR_RD_DETAILS2);
+	tmp = ecore_rd(p_hwfn, p_ptt, PGLUE_B_REG_TX_ERR_RD_DETAILS2);
 	if (tmp & ECORE_PGLUE_ATTENTION_RD_VALID) {
 		u32 addr_lo, addr_hi, details;
 
-		addr_lo = ecore_rd(p_hwfn, p_hwfn->p_dpc_ptt,
+		addr_lo = ecore_rd(p_hwfn, p_ptt,
 				   PGLUE_B_REG_TX_ERR_RD_ADD_31_0);
-		addr_hi = ecore_rd(p_hwfn, p_hwfn->p_dpc_ptt,
+		addr_hi = ecore_rd(p_hwfn, p_ptt,
 				   PGLUE_B_REG_TX_ERR_RD_ADD_63_32);
-		details = ecore_rd(p_hwfn, p_hwfn->p_dpc_ptt,
+		details = ecore_rd(p_hwfn, p_ptt,
 				   PGLUE_B_REG_TX_ERR_RD_DETAILS);
 
-		DP_INFO(p_hwfn,
-			"Illegal read by chip from [%08x:%08x] blocked."
-			" Details: %08x [PFID %02x, VFID %02x, VF_VALID %02x]"
-			" Details2 %08x [Was_error %02x BME deassert %02x"
-			" FID_enable deassert %02x]\n",
-			addr_hi, addr_lo, details,
-			(u8)((details &
-			      ECORE_PGLUE_ATTENTION_DETAILS_PFID_MASK) >>
-			     ECORE_PGLUE_ATTENTION_DETAILS_PFID_SHIFT),
-			(u8)((details &
-			      ECORE_PGLUE_ATTENTION_DETAILS_VFID_MASK) >>
-			     ECORE_PGLUE_ATTENTION_DETAILS_VFID_SHIFT),
-			(u8)((details & ECORE_PGLUE_ATTENTION_DETAILS_VF_VALID)
-			     ? 1 : 0), tmp,
-			(u8)((tmp & ECORE_PGLUE_ATTENTION_DETAILS2_WAS_ERR) ? 1
-			     : 0),
-			(u8)((tmp & ECORE_PGLUE_ATTENTION_DETAILS2_BME) ? 1 :
-			     0),
-			(u8)((tmp & ECORE_PGLUE_ATTENTION_DETAILS2_FID_EN) ? 1
-			     : 0));
+		DP_NOTICE(p_hwfn, false,
+			  "Illegal read by chip from [%08x:%08x] blocked. Details: %08x [PFID %02x, VFID %02x, VF_VALID %02x] Details2 %08x [Was_error %02x BME deassert %02x FID_enable deassert %02x]\n",
+			  addr_hi, addr_lo, details,
+			  (u8)((details &
+				ECORE_PGLUE_ATTENTION_DETAILS_PFID_MASK) >>
+			       ECORE_PGLUE_ATTENTION_DETAILS_PFID_SHIFT),
+			  (u8)((details &
+				ECORE_PGLUE_ATTENTION_DETAILS_VFID_MASK) >>
+			       ECORE_PGLUE_ATTENTION_DETAILS_VFID_SHIFT),
+			  (u8)((details &
+			       ECORE_PGLUE_ATTENTION_DETAILS_VF_VALID) ? 1 : 0),
+			  tmp,
+			  (u8)((tmp & ECORE_PGLUE_ATTENTION_DETAILS2_WAS_ERR) ?
+				1 : 0),
+			  (u8)((tmp & ECORE_PGLUE_ATTENTION_DETAILS2_BME) ?
+				1 : 0),
+			  (u8)((tmp & ECORE_PGLUE_ATTENTION_DETAILS2_FID_EN) ?
+				1 : 0));
 	}
 
-	tmp = ecore_rd(p_hwfn, p_hwfn->p_dpc_ptt,
-		       PGLUE_B_REG_TX_ERR_WR_DETAILS_ICPL);
+	tmp = ecore_rd(p_hwfn, p_ptt, PGLUE_B_REG_TX_ERR_WR_DETAILS_ICPL);
 	if (tmp & ECORE_PGLUE_ATTENTION_ICPL_VALID)
-		DP_INFO(p_hwfn, "ICPL error - %08x\n", tmp);
+		DP_NOTICE(p_hwfn, false, "ICPL erorr - %08x\n", tmp);
 
-	tmp = ecore_rd(p_hwfn, p_hwfn->p_dpc_ptt,
-		       PGLUE_B_REG_MASTER_ZLR_ERR_DETAILS);
+	tmp = ecore_rd(p_hwfn, p_ptt, PGLUE_B_REG_MASTER_ZLR_ERR_DETAILS);
 	if (tmp & ECORE_PGLUE_ATTENTION_ZLR_VALID) {
 		u32 addr_hi, addr_lo;
 
-		addr_lo = ecore_rd(p_hwfn, p_hwfn->p_dpc_ptt,
+		addr_lo = ecore_rd(p_hwfn, p_ptt,
 				   PGLUE_B_REG_MASTER_ZLR_ERR_ADD_31_0);
-		addr_hi = ecore_rd(p_hwfn, p_hwfn->p_dpc_ptt,
+		addr_hi = ecore_rd(p_hwfn, p_ptt,
 				   PGLUE_B_REG_MASTER_ZLR_ERR_ADD_63_32);
 
-		DP_INFO(p_hwfn, "ICPL error - %08x [Address %08x:%08x]\n",
-			tmp, addr_hi, addr_lo);
+		DP_NOTICE(p_hwfn, false,
+			  "ICPL erorr - %08x [Address %08x:%08x]\n",
+			  tmp, addr_hi, addr_lo);
 	}
 
-	tmp = ecore_rd(p_hwfn, p_hwfn->p_dpc_ptt,
-		       PGLUE_B_REG_VF_ILT_ERR_DETAILS2);
+	tmp = ecore_rd(p_hwfn, p_ptt, PGLUE_B_REG_VF_ILT_ERR_DETAILS2);
 	if (tmp & ECORE_PGLUE_ATTENTION_ILT_VALID) {
 		u32 addr_hi, addr_lo, details;
 
-		addr_lo = ecore_rd(p_hwfn, p_hwfn->p_dpc_ptt,
+		addr_lo = ecore_rd(p_hwfn, p_ptt,
 				   PGLUE_B_REG_VF_ILT_ERR_ADD_31_0);
-		addr_hi = ecore_rd(p_hwfn, p_hwfn->p_dpc_ptt,
+		addr_hi = ecore_rd(p_hwfn, p_ptt,
 				   PGLUE_B_REG_VF_ILT_ERR_ADD_63_32);
-		details = ecore_rd(p_hwfn, p_hwfn->p_dpc_ptt,
+		details = ecore_rd(p_hwfn, p_ptt,
 				   PGLUE_B_REG_VF_ILT_ERR_DETAILS);
 
-		DP_INFO(p_hwfn,
-			"ILT error - Details %08x Details2 %08x"
-			" [Address %08x:%08x]\n",
-			details, tmp, addr_hi, addr_lo);
+		DP_NOTICE(p_hwfn, false,
+			  "ILT error - Details %08x Details2 %08x [Address %08x:%08x]\n",
+			  details, tmp, addr_hi, addr_lo);
 	}
 
 	/* Clear the indications */
-	ecore_wr(p_hwfn, p_hwfn->p_dpc_ptt,
-		 PGLUE_B_REG_LATCHED_ERRORS_CLR, (1 << 2));
+	ecore_wr(p_hwfn, p_ptt, PGLUE_B_REG_LATCHED_ERRORS_CLR, (1 << 2));
 
 	return ECORE_SUCCESS;
 }
 
+static enum _ecore_status_t ecore_pglueb_rbc_attn_cb(struct ecore_hwfn *p_hwfn)
+{
+	return ecore_pglueb_rbc_attn_handler(p_hwfn, p_hwfn->p_dpc_ptt);
+}
+
 static enum _ecore_status_t ecore_fw_assertion(struct ecore_hwfn *p_hwfn)
 {
 	DP_NOTICE(p_hwfn, false, "FW assertion!\n");
@@ -505,7 +502,7 @@  enum aeu_invert_reg_special_type {
 	 {			/* After Invert 2 */
 	  {"PGLUE config_space", ATTENTION_SINGLE, OSAL_NULL, MAX_BLOCK_ID},
 	  {"PGLUE misc_flr", ATTENTION_SINGLE, OSAL_NULL, MAX_BLOCK_ID},
-	  {"PGLUE B RBC", ATTENTION_PAR_INT, ecore_pglub_rbc_attn_cb,
+	  {"PGLUE B RBC", ATTENTION_PAR_INT, ecore_pglueb_rbc_attn_cb,
 	   BLOCK_PGLUE_B},
 	  {"PGLUE misc_mctp", ATTENTION_SINGLE, OSAL_NULL, MAX_BLOCK_ID},
 	  {"Flash event", ATTENTION_SINGLE, OSAL_NULL, MAX_BLOCK_ID},
@@ -827,8 +824,9 @@  static void ecore_int_attn_print(struct ecore_hwfn *p_hwfn,
 				     ATTN_TYPE_INTERRUPT, !b_fatal);
 }
 
+	/* @DPDK */
 	/* Reach assertion if attention is fatal */
-	if (b_fatal) {
+	if (b_fatal || (strcmp(p_bit_name, "PGLUE B RBC") == 0)) {
 		DP_NOTICE(p_hwfn, true, "`%s': Fatal attention\n",
 			  p_bit_name);
 
diff --git a/drivers/net/qede/base/ecore_int.h b/drivers/net/qede/base/ecore_int.h
index 0c8929e..067ed60 100644
--- a/drivers/net/qede/base/ecore_int.h
+++ b/drivers/net/qede/base/ecore_int.h
@@ -208,4 +208,7 @@  enum _ecore_status_t ecore_int_set_timer_res(struct ecore_hwfn *p_hwfn,
 #define ECORE_MAPPING_MEMORY_SIZE(dev) NUM_OF_SBS(dev)
 #endif
 
+enum _ecore_status_t ecore_pglueb_rbc_attn_handler(struct ecore_hwfn *p_hwfn,
+						   struct ecore_ptt *p_ptt);
+
 #endif /* __ECORE_INT_H__ */
diff --git a/drivers/net/qede/base/ecore_mcp.c b/drivers/net/qede/base/ecore_mcp.c
index 03cc901..88c5ceb 100644
--- a/drivers/net/qede/base/ecore_mcp.c
+++ b/drivers/net/qede/base/ecore_mcp.c
@@ -893,6 +893,30 @@  enum _ecore_status_t ecore_mcp_load_req(struct ecore_hwfn *p_hwfn,
 	return ECORE_SUCCESS;
 }
 
+enum _ecore_status_t ecore_mcp_load_done(struct ecore_hwfn *p_hwfn,
+					 struct ecore_ptt *p_ptt)
+{
+	u32 resp = 0, param = 0;
+	enum _ecore_status_t rc;
+
+	rc = ecore_mcp_cmd(p_hwfn, p_ptt, DRV_MSG_CODE_LOAD_DONE, 0, &resp,
+			   &param);
+	if (rc != ECORE_SUCCESS) {
+		DP_NOTICE(p_hwfn, false,
+			  "Failed to send a LOAD_DONE command, rc = %d\n", rc);
+		return rc;
+	}
+
+#define FW_MB_PARAM_LOAD_DONE_DID_EFUSE_ERROR     (1 << 0)
+
+	/* Check if there is a DID mismatch between nvm-cfg/efuse */
+	if (param & FW_MB_PARAM_LOAD_DONE_DID_EFUSE_ERROR)
+		DP_NOTICE(p_hwfn, false,
+			  "warning: device configuration is not supported on this board type. The device may not function as expected.\n");
+
+	return ECORE_SUCCESS;
+}
+
 enum _ecore_status_t ecore_mcp_unload_req(struct ecore_hwfn *p_hwfn,
 					  struct ecore_ptt *p_ptt)
 {
@@ -2893,6 +2917,27 @@  struct ecore_resc_alloc_out_params {
 	u32 flags;
 };
 
+#define ECORE_RECOVERY_PROLOG_SLEEP_MS	100
+
+enum _ecore_status_t ecore_recovery_prolog(struct ecore_dev *p_dev)
+{
+	struct ecore_hwfn *p_hwfn = ECORE_LEADING_HWFN(p_dev);
+	struct ecore_ptt *p_ptt = p_hwfn->p_main_ptt;
+	enum _ecore_status_t rc;
+
+	/* Allow ongoing PCIe transactions to complete */
+	OSAL_MSLEEP(ECORE_RECOVERY_PROLOG_SLEEP_MS);
+
+	/* Clear the PF's internal FID_enable in the PXP */
+	rc = ecore_pglueb_set_pfid_enable(p_hwfn, p_ptt, false);
+	if (rc != ECORE_SUCCESS)
+		DP_NOTICE(p_hwfn, false,
+			  "ecore_pglueb_set_pfid_enable() failed. rc = %d.\n",
+			  rc);
+
+	return rc;
+}
+
 static enum _ecore_status_t
 ecore_mcp_resc_allocation_msg(struct ecore_hwfn *p_hwfn,
 			      struct ecore_ptt *p_ptt,
diff --git a/drivers/net/qede/base/ecore_mcp.h b/drivers/net/qede/base/ecore_mcp.h
index 37d1835..77fb5a3 100644
--- a/drivers/net/qede/base/ecore_mcp.h
+++ b/drivers/net/qede/base/ecore_mcp.h
@@ -171,6 +171,17 @@  enum _ecore_status_t ecore_mcp_load_req(struct ecore_hwfn *p_hwfn,
 					struct ecore_load_req_params *p_params);
 
 /**
+ * @brief Sends a LOAD_DONE message to the MFW
+ *
+ * @param p_hwfn
+ * @param p_ptt
+ *
+ * @return enum _ecore_status_t - ECORE_SUCCESS - Operation was successful.
+ */
+enum _ecore_status_t ecore_mcp_load_done(struct ecore_hwfn *p_hwfn,
+					 struct ecore_ptt *p_ptt);
+
+/**
  * @brief Sends a UNLOAD_REQ message to the MFW
  *
  * @param p_hwfn
diff --git a/drivers/net/qede/base/ecore_mcp_api.h b/drivers/net/qede/base/ecore_mcp_api.h
index 190c135..abc190c 100644
--- a/drivers/net/qede/base/ecore_mcp_api.h
+++ b/drivers/net/qede/base/ecore_mcp_api.h
@@ -736,6 +736,17 @@  enum _ecore_status_t ecore_start_recovery_process(struct ecore_hwfn *p_hwfn,
 						  struct ecore_ptt *p_ptt);
 
 /**
+ * @brief A recovery handler must call this function as its first step.
+ *        It is assumed that the handler is not run from an interrupt context.
+ *
+ *  @param p_dev
+ *  @param p_ptt
+ *
+ * @return enum _ecore_status_t
+ */
+enum _ecore_status_t ecore_recovery_prolog(struct ecore_dev *p_dev);
+
+/**
  * @brief Notify MFW about the change in base device properties
  *
  *  @param p_hwfn