[dpdk-dev] [PATCH 14/53] net/qede/base: add mdump sub-commands
Rasesh Mody
rasesh.mody at cavium.com
Tue Sep 19 03:29:54 CEST 2017
- Add support to retain/clear data for crash dump by introducing the mdump
GET_RETAIN/CLR_RETAIN sub commands, new APIs
ecore_mcp_mdump_get_retain() and ecore_mcp_mdump_clr_retain()
- Avoid checking for mdump logs and data in case of an emulator
- Fix "deadbeaf" returned value in case of pcie status command read
fails (prevent false detection)
Signed-off-by: Rasesh Mody <rasesh.mody at cavium.com>
---
drivers/net/qede/base/ecore_dev.c | 24 +++++++--
drivers/net/qede/base/ecore_mcp.c | 87 +++++++++++++++++++++++++++------
drivers/net/qede/base/ecore_mcp.h | 21 ++++++++
drivers/net/qede/base/ecore_mcp_api.h | 11 +++++
drivers/net/qede/base/mcp_public.h | 10 ++++
5 files changed, 132 insertions(+), 21 deletions(-)
diff --git a/drivers/net/qede/base/ecore_dev.c b/drivers/net/qede/base/ecore_dev.c
index 938834b..93c2306 100644
--- a/drivers/net/qede/base/ecore_dev.c
+++ b/drivers/net/qede/base/ecore_dev.c
@@ -3564,6 +3564,7 @@ void ecore_prepare_hibernate(struct ecore_dev *p_dev)
void OSAL_IOMEM * p_doorbells,
struct ecore_hw_prepare_params *p_params)
{
+ struct ecore_mdump_retain_data mdump_retain;
struct ecore_dev *p_dev = p_hwfn->p_dev;
struct ecore_mdump_info mdump_info;
enum _ecore_status_t rc = ECORE_SUCCESS;
@@ -3631,24 +3632,37 @@ void ecore_prepare_hibernate(struct ecore_dev *p_dev)
/* Sending a mailbox to the MFW should be after ecore_get_hw_info() is
* called, since among others it sets the ports number in an engine.
*/
- if (p_params->initiate_pf_flr && p_hwfn == ECORE_LEADING_HWFN(p_dev) &&
+ if (p_params->initiate_pf_flr && IS_LEAD_HWFN(p_hwfn) &&
!p_dev->recov_in_prog) {
rc = ecore_mcp_initiate_pf_flr(p_hwfn, p_hwfn->p_main_ptt);
if (rc != ECORE_SUCCESS)
DP_NOTICE(p_hwfn, false, "Failed to initiate PF FLR\n");
}
- /* Check if mdump logs are present and update the epoch value */
- if (p_hwfn == ECORE_LEADING_HWFN(p_hwfn->p_dev)) {
+ /* Check if mdump logs/data are present and update the epoch value */
+ if (IS_LEAD_HWFN(p_hwfn)) {
+#ifndef ASIC_ONLY
+ if (!CHIP_REV_IS_EMUL(p_dev)) {
+#endif
rc = ecore_mcp_mdump_get_info(p_hwfn, p_hwfn->p_main_ptt,
&mdump_info);
- if (rc == ECORE_SUCCESS && mdump_info.num_of_logs > 0) {
+ if (rc == ECORE_SUCCESS && mdump_info.num_of_logs)
DP_NOTICE(p_hwfn, false,
"* * * IMPORTANT - HW ERROR register dump captured by device * * *\n");
- }
+
+ rc = ecore_mcp_mdump_get_retain(p_hwfn, p_hwfn->p_main_ptt,
+ &mdump_retain);
+ if (rc == ECORE_SUCCESS && mdump_retain.valid)
+ DP_NOTICE(p_hwfn, false,
+ "mdump retained data: epoch 0x%08x, pf 0x%x, status 0x%08x\n",
+ mdump_retain.epoch, mdump_retain.pf,
+ mdump_retain.status);
ecore_mcp_mdump_set_values(p_hwfn, p_hwfn->p_main_ptt,
p_params->epoch);
+#ifndef ASIC_ONLY
+ }
+#endif
}
/* Allocate the init RT array and initialize the init-ops engine */
diff --git a/drivers/net/qede/base/ecore_mcp.c b/drivers/net/qede/base/ecore_mcp.c
index 868b075..462fcc9 100644
--- a/drivers/net/qede/base/ecore_mcp.c
+++ b/drivers/net/qede/base/ecore_mcp.c
@@ -1434,11 +1434,16 @@ struct ecore_mdump_cmd_params {
return rc;
p_mdump_cmd_params->mcp_resp = mb_params.mcp_resp;
+
if (p_mdump_cmd_params->mcp_resp == FW_MSG_CODE_MDUMP_INVALID_CMD) {
- DP_NOTICE(p_hwfn, false,
- "MFW claims that the mdump command is illegal [mdump_cmd 0x%x]\n",
- p_mdump_cmd_params->cmd);
- rc = ECORE_INVAL;
+ DP_INFO(p_hwfn,
+ "The mdump sub command is unsupported by the MFW [mdump_cmd 0x%x]\n",
+ p_mdump_cmd_params->cmd);
+ rc = ECORE_NOTIMPL;
+ } else if (p_mdump_cmd_params->mcp_resp == FW_MSG_CODE_UNSUPPORTED) {
+ DP_INFO(p_hwfn,
+ "The mdump command is not supported by the MFW\n");
+ rc = ECORE_NOTIMPL;
}
return rc;
@@ -1496,16 +1501,10 @@ enum _ecore_status_t ecore_mcp_mdump_trigger(struct ecore_hwfn *p_hwfn,
if (rc != ECORE_SUCCESS)
return rc;
- if (mdump_cmd_params.mcp_resp == FW_MSG_CODE_UNSUPPORTED) {
- DP_INFO(p_hwfn,
- "The mdump command is not supported by the MFW\n");
- return ECORE_NOTIMPL;
- }
-
if (mdump_cmd_params.mcp_resp != FW_MSG_CODE_OK) {
- DP_NOTICE(p_hwfn, false,
- "Failed to get the mdump configuration and logs info [mcp_resp 0x%x]\n",
- mdump_cmd_params.mcp_resp);
+ DP_INFO(p_hwfn,
+ "Failed to get the mdump configuration and logs info [mcp_resp 0x%x]\n",
+ mdump_cmd_params.mcp_resp);
rc = ECORE_UNKNOWN_ERROR;
}
@@ -1566,17 +1565,71 @@ enum _ecore_status_t ecore_mcp_mdump_clear_logs(struct ecore_hwfn *p_hwfn,
return ecore_mcp_mdump_cmd(p_hwfn, p_ptt, &mdump_cmd_params);
}
+enum _ecore_status_t
+ecore_mcp_mdump_get_retain(struct ecore_hwfn *p_hwfn, struct ecore_ptt *p_ptt,
+ struct ecore_mdump_retain_data *p_mdump_retain)
+{
+ struct ecore_mdump_cmd_params mdump_cmd_params;
+ struct mdump_retain_data_stc mfw_mdump_retain;
+ enum _ecore_status_t rc;
+
+ OSAL_MEM_ZERO(&mdump_cmd_params, sizeof(mdump_cmd_params));
+ mdump_cmd_params.cmd = DRV_MSG_CODE_MDUMP_GET_RETAIN;
+ mdump_cmd_params.p_data_dst = &mfw_mdump_retain;
+ mdump_cmd_params.data_dst_size = sizeof(mfw_mdump_retain);
+
+ rc = ecore_mcp_mdump_cmd(p_hwfn, p_ptt, &mdump_cmd_params);
+ if (rc != ECORE_SUCCESS)
+ return rc;
+
+ if (mdump_cmd_params.mcp_resp != FW_MSG_CODE_OK) {
+ DP_INFO(p_hwfn,
+ "Failed to get the mdump retained data [mcp_resp 0x%x]\n",
+ mdump_cmd_params.mcp_resp);
+ return ECORE_UNKNOWN_ERROR;
+ }
+
+ p_mdump_retain->valid = mfw_mdump_retain.valid;
+ p_mdump_retain->epoch = mfw_mdump_retain.epoch;
+ p_mdump_retain->pf = mfw_mdump_retain.pf;
+ p_mdump_retain->status = mfw_mdump_retain.status;
+
+ return ECORE_SUCCESS;
+}
+
+enum _ecore_status_t ecore_mcp_mdump_clr_retain(struct ecore_hwfn *p_hwfn,
+ struct ecore_ptt *p_ptt)
+{
+ struct ecore_mdump_cmd_params mdump_cmd_params;
+
+ OSAL_MEM_ZERO(&mdump_cmd_params, sizeof(mdump_cmd_params));
+ mdump_cmd_params.cmd = DRV_MSG_CODE_MDUMP_CLR_RETAIN;
+
+ return ecore_mcp_mdump_cmd(p_hwfn, p_ptt, &mdump_cmd_params);
+}
+
static void ecore_mcp_handle_critical_error(struct ecore_hwfn *p_hwfn,
struct ecore_ptt *p_ptt)
{
+ struct ecore_mdump_retain_data mdump_retain;
+ enum _ecore_status_t rc;
+
/* In CMT mode - no need for more than a single acknowledgment to the
* MFW, and no more than a single notification to the upper driver.
*/
if (p_hwfn != ECORE_LEADING_HWFN(p_hwfn->p_dev))
return;
- DP_NOTICE(p_hwfn, false,
- "Received a critical error notification from the MFW!\n");
+ rc = ecore_mcp_mdump_get_retain(p_hwfn, p_ptt, &mdump_retain);
+ if (rc == ECORE_SUCCESS && mdump_retain.valid) {
+ DP_NOTICE(p_hwfn, false,
+ "The MFW notified that a critical error occurred in the device [epoch 0x%08x, pf 0x%x, status 0x%08x]\n",
+ mdump_retain.epoch, mdump_retain.pf,
+ mdump_retain.status);
+ } else {
+ DP_NOTICE(p_hwfn, false,
+ "The MFW notified that a critical error occurred in the device\n");
+ }
if (p_hwfn->p_dev->allow_mdump) {
DP_NOTICE(p_hwfn, false,
@@ -1584,6 +1637,8 @@ static void ecore_mcp_handle_critical_error(struct ecore_hwfn *p_hwfn,
return;
}
+ DP_NOTICE(p_hwfn, false,
+ "Acknowledging the notification to not allow the MFW crash dump [driver debug data collection is preferable]\n");
ecore_mcp_mdump_ack(p_hwfn, p_ptt);
ecore_hw_err_notify(p_hwfn, ECORE_HW_ERR_HW_ATTN);
}
@@ -2245,8 +2300,8 @@ enum _ecore_status_t ecore_mcp_mask_parities(struct ecore_hwfn *p_hwfn,
struct ecore_ptt *p_ptt,
u32 mask_parities)
{
- enum _ecore_status_t rc;
u32 resp = 0, param = 0;
+ enum _ecore_status_t rc;
rc = ecore_mcp_cmd(p_hwfn, p_ptt, DRV_MSG_CODE_MASK_PARITIES,
mask_parities, &resp, ¶m);
diff --git a/drivers/net/qede/base/ecore_mcp.h b/drivers/net/qede/base/ecore_mcp.h
index 9b6a9b4..b84f0d1 100644
--- a/drivers/net/qede/base/ecore_mcp.h
+++ b/drivers/net/qede/base/ecore_mcp.h
@@ -376,12 +376,33 @@ enum _ecore_status_t ecore_mcp_mdump_set_values(struct ecore_hwfn *p_hwfn,
*
* @param p_hwfn
* @param p_ptt
+ * @param epoch
*
* @param return ECORE_SUCCESS upon success.
*/
enum _ecore_status_t ecore_mcp_mdump_trigger(struct ecore_hwfn *p_hwfn,
struct ecore_ptt *p_ptt);
+struct ecore_mdump_retain_data {
+ u32 valid;
+ u32 epoch;
+ u32 pf;
+ u32 status;
+};
+
+/**
+ * @brief - Gets the mdump retained data from the MFW.
+ *
+ * @param p_hwfn
+ * @param p_ptt
+ * @param p_mdump_retain
+ *
+ * @param return ECORE_SUCCESS upon success.
+ */
+enum _ecore_status_t
+ecore_mcp_mdump_get_retain(struct ecore_hwfn *p_hwfn, struct ecore_ptt *p_ptt,
+ struct ecore_mdump_retain_data *p_mdump_retain);
+
/**
* @brief - Sets the MFW's max value for the given resource
*
diff --git a/drivers/net/qede/base/ecore_mcp_api.h b/drivers/net/qede/base/ecore_mcp_api.h
index 86fa0cb..059b55e 100644
--- a/drivers/net/qede/base/ecore_mcp_api.h
+++ b/drivers/net/qede/base/ecore_mcp_api.h
@@ -1123,6 +1123,17 @@ enum _ecore_status_t ecore_mcp_mdump_clear_logs(struct ecore_hwfn *p_hwfn,
struct ecore_ptt *p_ptt);
/**
+ * @brief - Clear the mdump retained data.
+ *
+ * @param p_hwfn
+ * @param p_ptt
+ *
+ * @param return ECORE_SUCCESS upon success.
+ */
+enum _ecore_status_t ecore_mcp_mdump_clr_retain(struct ecore_hwfn *p_hwfn,
+ struct ecore_ptt *p_ptt);
+
+/**
* @brief - Processes the TLV request from MFW i.e., get the required TLV info
* from the ecore client and send it to the MFW.
*
diff --git a/drivers/net/qede/base/mcp_public.h b/drivers/net/qede/base/mcp_public.h
index 41711cc..f934c17 100644
--- a/drivers/net/qede/base/mcp_public.h
+++ b/drivers/net/qede/base/mcp_public.h
@@ -1108,6 +1108,13 @@ struct load_rsp_stc {
#define LOAD_RSP_FLAGS0_DRV_EXISTS (0x1 << 0)
};
+struct mdump_retain_data_stc {
+ u32 valid;
+ u32 epoch;
+ u32 pf;
+ u32 status;
+};
+
union drv_union_data {
struct mcp_mac wol_mac; /* UNLOAD_DONE */
@@ -1138,6 +1145,7 @@ struct load_rsp_stc {
struct load_req_stc load_req;
struct load_rsp_stc load_rsp;
+ struct mdump_retain_data_stc mdump_retain;
/* ... */
};
@@ -1350,6 +1358,8 @@ struct public_drv_mb {
#define DRV_MSG_CODE_MDUMP_SET_ENABLE 0x05
/* Clear all logs */
#define DRV_MSG_CODE_MDUMP_CLEAR_LOGS 0x06
+#define DRV_MSG_CODE_MDUMP_GET_RETAIN 0x07 /* Get retained data */
+#define DRV_MSG_CODE_MDUMP_CLR_RETAIN 0x08 /* Clear retain data */
#define DRV_MSG_CODE_MEM_ECC_EVENTS 0x00260000 /* Param: None */
/* Param: [0:15] - gpio number */
#define DRV_MSG_CODE_GPIO_INFO 0x00270000
--
1.7.10.3
More information about the dev
mailing list