[dpdk-dev] [PATCH 14/53] net/qede/base: add mdump sub-commands

Rasesh Mody rasesh.mody at cavium.com
Tue Sep 19 03:29:54 CEST 2017


 - Add support to retain/clear data for crash dump by introducing the mdump
   GET_RETAIN/CLR_RETAIN sub commands, new APIs
   ecore_mcp_mdump_get_retain() and ecore_mcp_mdump_clr_retain()
 - Avoid checking for mdump logs and data in case of an emulator
 - Fix "deadbeaf" returned value in case of pcie status command read
   fails (prevent false detection)

Signed-off-by: Rasesh Mody <rasesh.mody at cavium.com>
---
 drivers/net/qede/base/ecore_dev.c     |   24 +++++++--
 drivers/net/qede/base/ecore_mcp.c     |   87 +++++++++++++++++++++++++++------
 drivers/net/qede/base/ecore_mcp.h     |   21 ++++++++
 drivers/net/qede/base/ecore_mcp_api.h |   11 +++++
 drivers/net/qede/base/mcp_public.h    |   10 ++++
 5 files changed, 132 insertions(+), 21 deletions(-)

diff --git a/drivers/net/qede/base/ecore_dev.c b/drivers/net/qede/base/ecore_dev.c
index 938834b..93c2306 100644
--- a/drivers/net/qede/base/ecore_dev.c
+++ b/drivers/net/qede/base/ecore_dev.c
@@ -3564,6 +3564,7 @@ void ecore_prepare_hibernate(struct ecore_dev *p_dev)
 			void OSAL_IOMEM * p_doorbells,
 			struct ecore_hw_prepare_params *p_params)
 {
+	struct ecore_mdump_retain_data mdump_retain;
 	struct ecore_dev *p_dev = p_hwfn->p_dev;
 	struct ecore_mdump_info mdump_info;
 	enum _ecore_status_t rc = ECORE_SUCCESS;
@@ -3631,24 +3632,37 @@ void ecore_prepare_hibernate(struct ecore_dev *p_dev)
 	/* Sending a mailbox to the MFW should be after ecore_get_hw_info() is
 	 * called, since among others it sets the ports number in an engine.
 	 */
-	if (p_params->initiate_pf_flr && p_hwfn == ECORE_LEADING_HWFN(p_dev) &&
+	if (p_params->initiate_pf_flr && IS_LEAD_HWFN(p_hwfn) &&
 	    !p_dev->recov_in_prog) {
 		rc = ecore_mcp_initiate_pf_flr(p_hwfn, p_hwfn->p_main_ptt);
 		if (rc != ECORE_SUCCESS)
 			DP_NOTICE(p_hwfn, false, "Failed to initiate PF FLR\n");
 	}
 
-	/* Check if mdump logs are present and update the epoch value */
-	if (p_hwfn == ECORE_LEADING_HWFN(p_hwfn->p_dev)) {
+	/* Check if mdump logs/data are present and update the epoch value */
+	if (IS_LEAD_HWFN(p_hwfn)) {
+#ifndef ASIC_ONLY
+		if (!CHIP_REV_IS_EMUL(p_dev)) {
+#endif
 		rc = ecore_mcp_mdump_get_info(p_hwfn, p_hwfn->p_main_ptt,
 					      &mdump_info);
-		if (rc == ECORE_SUCCESS && mdump_info.num_of_logs > 0) {
+		if (rc == ECORE_SUCCESS && mdump_info.num_of_logs)
 			DP_NOTICE(p_hwfn, false,
 				  "* * * IMPORTANT - HW ERROR register dump captured by device * * *\n");
-		}
+
+		rc = ecore_mcp_mdump_get_retain(p_hwfn, p_hwfn->p_main_ptt,
+						&mdump_retain);
+		if (rc == ECORE_SUCCESS && mdump_retain.valid)
+			DP_NOTICE(p_hwfn, false,
+				  "mdump retained data: epoch 0x%08x, pf 0x%x, status 0x%08x\n",
+				  mdump_retain.epoch, mdump_retain.pf,
+				  mdump_retain.status);
 
 		ecore_mcp_mdump_set_values(p_hwfn, p_hwfn->p_main_ptt,
 					   p_params->epoch);
+#ifndef ASIC_ONLY
+		}
+#endif
 	}
 
 	/* Allocate the init RT array and initialize the init-ops engine */
diff --git a/drivers/net/qede/base/ecore_mcp.c b/drivers/net/qede/base/ecore_mcp.c
index 868b075..462fcc9 100644
--- a/drivers/net/qede/base/ecore_mcp.c
+++ b/drivers/net/qede/base/ecore_mcp.c
@@ -1434,11 +1434,16 @@ struct ecore_mdump_cmd_params {
 		return rc;
 
 	p_mdump_cmd_params->mcp_resp = mb_params.mcp_resp;
+
 	if (p_mdump_cmd_params->mcp_resp == FW_MSG_CODE_MDUMP_INVALID_CMD) {
-		DP_NOTICE(p_hwfn, false,
-			  "MFW claims that the mdump command is illegal [mdump_cmd 0x%x]\n",
-			  p_mdump_cmd_params->cmd);
-		rc = ECORE_INVAL;
+		DP_INFO(p_hwfn,
+			"The mdump sub command is unsupported by the MFW [mdump_cmd 0x%x]\n",
+			p_mdump_cmd_params->cmd);
+		rc = ECORE_NOTIMPL;
+	} else if (p_mdump_cmd_params->mcp_resp == FW_MSG_CODE_UNSUPPORTED) {
+		DP_INFO(p_hwfn,
+			"The mdump command is not supported by the MFW\n");
+		rc = ECORE_NOTIMPL;
 	}
 
 	return rc;
@@ -1496,16 +1501,10 @@ enum _ecore_status_t ecore_mcp_mdump_trigger(struct ecore_hwfn *p_hwfn,
 	if (rc != ECORE_SUCCESS)
 		return rc;
 
-	if (mdump_cmd_params.mcp_resp == FW_MSG_CODE_UNSUPPORTED) {
-		DP_INFO(p_hwfn,
-			"The mdump command is not supported by the MFW\n");
-		return ECORE_NOTIMPL;
-	}
-
 	if (mdump_cmd_params.mcp_resp != FW_MSG_CODE_OK) {
-		DP_NOTICE(p_hwfn, false,
-			  "Failed to get the mdump configuration and logs info [mcp_resp 0x%x]\n",
-			  mdump_cmd_params.mcp_resp);
+		DP_INFO(p_hwfn,
+			"Failed to get the mdump configuration and logs info [mcp_resp 0x%x]\n",
+			mdump_cmd_params.mcp_resp);
 		rc = ECORE_UNKNOWN_ERROR;
 	}
 
@@ -1566,17 +1565,71 @@ enum _ecore_status_t ecore_mcp_mdump_clear_logs(struct ecore_hwfn *p_hwfn,
 	return ecore_mcp_mdump_cmd(p_hwfn, p_ptt, &mdump_cmd_params);
 }
 
+enum _ecore_status_t
+ecore_mcp_mdump_get_retain(struct ecore_hwfn *p_hwfn, struct ecore_ptt *p_ptt,
+			   struct ecore_mdump_retain_data *p_mdump_retain)
+{
+	struct ecore_mdump_cmd_params mdump_cmd_params;
+	struct mdump_retain_data_stc mfw_mdump_retain;
+	enum _ecore_status_t rc;
+
+	OSAL_MEM_ZERO(&mdump_cmd_params, sizeof(mdump_cmd_params));
+	mdump_cmd_params.cmd = DRV_MSG_CODE_MDUMP_GET_RETAIN;
+	mdump_cmd_params.p_data_dst = &mfw_mdump_retain;
+	mdump_cmd_params.data_dst_size = sizeof(mfw_mdump_retain);
+
+	rc = ecore_mcp_mdump_cmd(p_hwfn, p_ptt, &mdump_cmd_params);
+	if (rc != ECORE_SUCCESS)
+		return rc;
+
+	if (mdump_cmd_params.mcp_resp != FW_MSG_CODE_OK) {
+		DP_INFO(p_hwfn,
+			"Failed to get the mdump retained data [mcp_resp 0x%x]\n",
+			mdump_cmd_params.mcp_resp);
+		return ECORE_UNKNOWN_ERROR;
+	}
+
+	p_mdump_retain->valid = mfw_mdump_retain.valid;
+	p_mdump_retain->epoch = mfw_mdump_retain.epoch;
+	p_mdump_retain->pf = mfw_mdump_retain.pf;
+	p_mdump_retain->status = mfw_mdump_retain.status;
+
+	return ECORE_SUCCESS;
+}
+
+enum _ecore_status_t ecore_mcp_mdump_clr_retain(struct ecore_hwfn *p_hwfn,
+						struct ecore_ptt *p_ptt)
+{
+	struct ecore_mdump_cmd_params mdump_cmd_params;
+
+	OSAL_MEM_ZERO(&mdump_cmd_params, sizeof(mdump_cmd_params));
+	mdump_cmd_params.cmd = DRV_MSG_CODE_MDUMP_CLR_RETAIN;
+
+	return ecore_mcp_mdump_cmd(p_hwfn, p_ptt, &mdump_cmd_params);
+}
+
 static void ecore_mcp_handle_critical_error(struct ecore_hwfn *p_hwfn,
 					    struct ecore_ptt *p_ptt)
 {
+	struct ecore_mdump_retain_data mdump_retain;
+	enum _ecore_status_t rc;
+
 	/* In CMT mode - no need for more than a single acknowledgment to the
 	 * MFW, and no more than a single notification to the upper driver.
 	 */
 	if (p_hwfn != ECORE_LEADING_HWFN(p_hwfn->p_dev))
 		return;
 
-	DP_NOTICE(p_hwfn, false,
-		  "Received a critical error notification from the MFW!\n");
+	rc = ecore_mcp_mdump_get_retain(p_hwfn, p_ptt, &mdump_retain);
+	if (rc == ECORE_SUCCESS && mdump_retain.valid) {
+		DP_NOTICE(p_hwfn, false,
+			  "The MFW notified that a critical error occurred in the device [epoch 0x%08x, pf 0x%x, status 0x%08x]\n",
+			  mdump_retain.epoch, mdump_retain.pf,
+			  mdump_retain.status);
+	} else {
+		DP_NOTICE(p_hwfn, false,
+			  "The MFW notified that a critical error occurred in the device\n");
+	}
 
 	if (p_hwfn->p_dev->allow_mdump) {
 		DP_NOTICE(p_hwfn, false,
@@ -1584,6 +1637,8 @@ static void ecore_mcp_handle_critical_error(struct ecore_hwfn *p_hwfn,
 		return;
 	}
 
+	DP_NOTICE(p_hwfn, false,
+		  "Acknowledging the notification to not allow the MFW crash dump [driver debug data collection is preferable]\n");
 	ecore_mcp_mdump_ack(p_hwfn, p_ptt);
 	ecore_hw_err_notify(p_hwfn, ECORE_HW_ERR_HW_ATTN);
 }
@@ -2245,8 +2300,8 @@ enum _ecore_status_t ecore_mcp_mask_parities(struct ecore_hwfn *p_hwfn,
 					     struct ecore_ptt *p_ptt,
 					     u32 mask_parities)
 {
-	enum _ecore_status_t rc;
 	u32 resp = 0, param = 0;
+	enum _ecore_status_t rc;
 
 	rc = ecore_mcp_cmd(p_hwfn, p_ptt, DRV_MSG_CODE_MASK_PARITIES,
 			   mask_parities, &resp, &param);
diff --git a/drivers/net/qede/base/ecore_mcp.h b/drivers/net/qede/base/ecore_mcp.h
index 9b6a9b4..b84f0d1 100644
--- a/drivers/net/qede/base/ecore_mcp.h
+++ b/drivers/net/qede/base/ecore_mcp.h
@@ -376,12 +376,33 @@ enum _ecore_status_t ecore_mcp_mdump_set_values(struct ecore_hwfn *p_hwfn,
  *
  * @param p_hwfn
  * @param p_ptt
+ * @param epoch
  *
  * @param return ECORE_SUCCESS upon success.
  */
 enum _ecore_status_t ecore_mcp_mdump_trigger(struct ecore_hwfn *p_hwfn,
 					     struct ecore_ptt *p_ptt);
 
+struct ecore_mdump_retain_data {
+	u32 valid;
+	u32 epoch;
+	u32 pf;
+	u32 status;
+};
+
+/**
+ * @brief - Gets the mdump retained data from the MFW.
+ *
+ * @param p_hwfn
+ * @param p_ptt
+ * @param p_mdump_retain
+ *
+ * @param return ECORE_SUCCESS upon success.
+ */
+enum _ecore_status_t
+ecore_mcp_mdump_get_retain(struct ecore_hwfn *p_hwfn, struct ecore_ptt *p_ptt,
+			   struct ecore_mdump_retain_data *p_mdump_retain);
+
 /**
  * @brief - Sets the MFW's max value for the given resource
  *
diff --git a/drivers/net/qede/base/ecore_mcp_api.h b/drivers/net/qede/base/ecore_mcp_api.h
index 86fa0cb..059b55e 100644
--- a/drivers/net/qede/base/ecore_mcp_api.h
+++ b/drivers/net/qede/base/ecore_mcp_api.h
@@ -1123,6 +1123,17 @@ enum _ecore_status_t ecore_mcp_mdump_clear_logs(struct ecore_hwfn *p_hwfn,
 						struct ecore_ptt *p_ptt);
 
 /**
+ * @brief - Clear the mdump retained data.
+ *
+ * @param p_hwfn
+ * @param p_ptt
+ *
+ * @param return ECORE_SUCCESS upon success.
+ */
+enum _ecore_status_t ecore_mcp_mdump_clr_retain(struct ecore_hwfn *p_hwfn,
+						struct ecore_ptt *p_ptt);
+
+/**
  * @brief - Processes the TLV request from MFW i.e., get the required TLV info
  *          from the ecore client and send it to the MFW.
  *
diff --git a/drivers/net/qede/base/mcp_public.h b/drivers/net/qede/base/mcp_public.h
index 41711cc..f934c17 100644
--- a/drivers/net/qede/base/mcp_public.h
+++ b/drivers/net/qede/base/mcp_public.h
@@ -1108,6 +1108,13 @@ struct load_rsp_stc {
 #define LOAD_RSP_FLAGS0_DRV_EXISTS	(0x1 << 0)
 };
 
+struct mdump_retain_data_stc {
+	u32 valid;
+	u32 epoch;
+	u32 pf;
+	u32 status;
+};
+
 union drv_union_data {
 	struct mcp_mac wol_mac; /* UNLOAD_DONE */
 
@@ -1138,6 +1145,7 @@ struct load_rsp_stc {
 
 	struct load_req_stc load_req;
 	struct load_rsp_stc load_rsp;
+	struct mdump_retain_data_stc mdump_retain;
 	/* ... */
 };
 
@@ -1350,6 +1358,8 @@ struct public_drv_mb {
 #define DRV_MSG_CODE_MDUMP_SET_ENABLE		0x05
 /* Clear all logs */
 #define DRV_MSG_CODE_MDUMP_CLEAR_LOGS		0x06
+#define DRV_MSG_CODE_MDUMP_GET_RETAIN		0x07 /* Get retained data */
+#define DRV_MSG_CODE_MDUMP_CLR_RETAIN		0x08 /* Clear retain data */
 #define DRV_MSG_CODE_MEM_ECC_EVENTS		0x00260000 /* Param: None */
 /* Param: [0:15] - gpio number */
 #define DRV_MSG_CODE_GPIO_INFO			0x00270000
-- 
1.7.10.3



More information about the dev mailing list