[dpdk-dev] [PATCH 25/53] net/qede/base: add support for doorbell overflow recovery

Rasesh Mody rasesh.mody at cavium.com
Tue Sep 19 03:30:05 CEST 2017


Add support for doorbell overflow recovery mechanism:
The doorbell recovery mechanism consists of a list of entries which
represent doorbelling entities (l2 queues, roce sq/rq/cqs, the slowpath
spq, etc). Each entity needs to register with the mechanism and provide
the parameters describing it's doorbell, including a location where last
used doorbell data can be found. The doorbell execute function will
traverse the list and doorbell all of the registered entries.

Signed-off-by: Rasesh Mody <rasesh.mody at cavium.com>
---
 drivers/net/qede/base/bcm_osal.h      |    3 +
 drivers/net/qede/base/ecore.h         |   24 ++-
 drivers/net/qede/base/ecore_dev.c     |  322 ++++++++++++++++++++++++++++++++-
 drivers/net/qede/base/ecore_dev_api.h |   39 ++++
 drivers/net/qede/base/ecore_int.c     |  141 +++++++++++++--
 drivers/net/qede/base/ecore_spq.c     |   51 ++++--
 drivers/net/qede/base/ecore_spq.h     |    3 +
 drivers/net/qede/base/reg_addr.h      |   10 +
 drivers/net/qede/qede_main.c          |    1 +
 9 files changed, 557 insertions(+), 37 deletions(-)

diff --git a/drivers/net/qede/base/bcm_osal.h b/drivers/net/qede/base/bcm_osal.h
index f4c7028..70b1a7f 100644
--- a/drivers/net/qede/base/bcm_osal.h
+++ b/drivers/net/qede/base/bcm_osal.h
@@ -148,6 +148,9 @@ void *osal_dma_alloc_coherent_aligned(struct ecore_dev *, dma_addr_t *,
 			      ((u8 *)(uintptr_t)(_p_hwfn->doorbells) +	\
 			      (_db_addr)), (u32)_val)
 
+#define DIRECT_REG_WR64(hwfn, addr, value) nothing
+#define DIRECT_REG_RD64(hwfn, addr) 0
+
 /* Mutexes */
 
 typedef pthread_mutex_t osal_mutex_t;
diff --git a/drivers/net/qede/base/ecore.h b/drivers/net/qede/base/ecore.h
index 2d2f6f3..d921d9e 100644
--- a/drivers/net/qede/base/ecore.h
+++ b/drivers/net/qede/base/ecore.h
@@ -351,6 +351,12 @@ enum ecore_hw_err_type {
 };
 #endif
 
+enum ecore_db_rec_exec {
+	DB_REC_DRY_RUN,
+	DB_REC_REAL_DEAL,
+	DB_REC_ONCE,
+};
+
 struct ecore_hw_info {
 	/* PCI personality */
 	enum ecore_pci_personality personality;
@@ -479,6 +485,12 @@ struct ecore_qm_info {
 	u8			num_pf_rls;
 };
 
+struct ecore_db_recovery_info {
+	osal_list_t list;
+	osal_spinlock_t lock;
+	u32 db_recovery_counter;
+};
+
 struct storm_stats {
 	u32 address;
 	u32 len;
@@ -605,6 +617,9 @@ struct ecore_hwfn {
 	/* L2-related */
 	struct ecore_l2_info		*p_l2_info;
 
+	/* Mechanism for recovering from doorbell drop */
+	struct ecore_db_recovery_info	db_recovery_info;
+
 	/* @DPDK */
 	struct ecore_ptt		*p_arfs_ptt;
 };
@@ -860,6 +875,13 @@ void ecore_set_fw_mac_addr(__le16 *fw_msb, __le16 *fw_mid, __le16 *fw_lsb,
 u16 ecore_get_cm_pq_idx_vf(struct ecore_hwfn *p_hwfn, u16 vf);
 u16 ecore_get_cm_pq_idx_rl(struct ecore_hwfn *p_hwfn, u8 qpid);
 
+const char *ecore_hw_get_resc_name(enum ecore_resources res_id);
+
+/* doorbell recovery mechanism */
+void ecore_db_recovery_dp(struct ecore_hwfn *p_hwfn);
+void ecore_db_recovery_execute(struct ecore_hwfn *p_hwfn,
+			       enum ecore_db_rec_exec);
+
 /* amount of resources used in qm init */
 u8 ecore_init_qm_get_num_tcs(struct ecore_hwfn *p_hwfn);
 u16 ecore_init_qm_get_num_vfs(struct ecore_hwfn *p_hwfn);
@@ -869,6 +891,4 @@ void ecore_set_fw_mac_addr(__le16 *fw_msb, __le16 *fw_mid, __le16 *fw_lsb,
 
 #define ECORE_LEADING_HWFN(dev)	(&dev->hwfns[0])
 
-const char *ecore_hw_get_resc_name(enum ecore_resources res_id);
-
 #endif /* __ECORE_H */
diff --git a/drivers/net/qede/base/ecore_dev.c b/drivers/net/qede/base/ecore_dev.c
index 2fe30d7..711a824 100644
--- a/drivers/net/qede/base/ecore_dev.c
+++ b/drivers/net/qede/base/ecore_dev.c
@@ -42,6 +42,318 @@
 static osal_spinlock_t qm_lock;
 static bool qm_lock_init;
 
+/******************** Doorbell Recovery *******************/
+/* The doorbell recovery mechanism consists of a list of entries which represent
+ * doorbelling entities (l2 queues, roce sq/rq/cqs, the slowpath spq, etc). Each
+ * entity needs to register with the mechanism and provide the parameters
+ * describing it's doorbell, including a location where last used doorbell data
+ * can be found. The doorbell execute function will traverse the list and
+ * doorbell all of the registered entries.
+ */
+struct ecore_db_recovery_entry {
+	osal_list_entry_t	list_entry;
+	void OSAL_IOMEM		*db_addr;
+	void			*db_data;
+	enum ecore_db_rec_width	db_width;
+	enum ecore_db_rec_space	db_space;
+	u8			hwfn_idx;
+};
+
+/* display a single doorbell recovery entry */
+void ecore_db_recovery_dp_entry(struct ecore_hwfn *p_hwfn,
+				struct ecore_db_recovery_entry *db_entry,
+				const char *action)
+{
+	DP_VERBOSE(p_hwfn, ECORE_MSG_SPQ, "(%s: db_entry %p, addr %p, data %p, width %s, %s space, hwfn %d)\n",
+		   action, db_entry, db_entry->db_addr, db_entry->db_data,
+		   db_entry->db_width == DB_REC_WIDTH_32B ? "32b" : "64b",
+		   db_entry->db_space == DB_REC_USER ? "user" : "kernel",
+		   db_entry->hwfn_idx);
+}
+
+/* doorbell address sanity (address within doorbell bar range) */
+bool ecore_db_rec_sanity(struct ecore_dev *p_dev, void OSAL_IOMEM *db_addr,
+			 void *db_data)
+{
+	/* make sure doorbell address  is within the doorbell bar */
+	if (db_addr < p_dev->doorbells || (u8 *)db_addr >
+			(u8 *)p_dev->doorbells + p_dev->db_size) {
+		OSAL_WARN(true,
+			  "Illegal doorbell address: %p. Legal range for doorbell addresses is [%p..%p]\n",
+			  db_addr, p_dev->doorbells,
+			  (u8 *)p_dev->doorbells + p_dev->db_size);
+		return false;
+	}
+
+	/* make sure doorbell data pointer is not null */
+	if (!db_data) {
+		OSAL_WARN(true, "Illegal doorbell data pointer: %p", db_data);
+		return false;
+	}
+
+	return true;
+}
+
+/* find hwfn according to the doorbell address */
+struct ecore_hwfn *ecore_db_rec_find_hwfn(struct ecore_dev *p_dev,
+					  void OSAL_IOMEM *db_addr)
+{
+	struct ecore_hwfn *p_hwfn;
+
+	/* In CMT doorbell bar is split down the middle between engine 0 and
+	 * enigne 1
+	 */
+	if (p_dev->num_hwfns > 1)
+		p_hwfn = db_addr < p_dev->hwfns[1].doorbells ?
+			&p_dev->hwfns[0] : &p_dev->hwfns[1];
+	else
+		p_hwfn = ECORE_LEADING_HWFN(p_dev);
+
+	return p_hwfn;
+}
+
+/* add a new entry to the doorbell recovery mechanism */
+enum _ecore_status_t ecore_db_recovery_add(struct ecore_dev *p_dev,
+					   void OSAL_IOMEM *db_addr,
+					   void *db_data,
+					   enum ecore_db_rec_width db_width,
+					   enum ecore_db_rec_space db_space)
+{
+	struct ecore_db_recovery_entry *db_entry;
+	struct ecore_hwfn *p_hwfn;
+
+	/* shortcircuit VFs, for now */
+	if (IS_VF(p_dev)) {
+		DP_VERBOSE(p_dev, ECORE_MSG_IOV, "db recovery - skipping VF doorbell\n");
+		return ECORE_SUCCESS;
+	}
+
+	/* sanitize doorbell address */
+	if (!ecore_db_rec_sanity(p_dev, db_addr, db_data))
+		return ECORE_INVAL;
+
+	/* obtain hwfn from doorbell address */
+	p_hwfn = ecore_db_rec_find_hwfn(p_dev, db_addr);
+
+	/* create entry */
+	db_entry = OSAL_ZALLOC(p_hwfn->p_dev, GFP_KERNEL, sizeof(*db_entry));
+	if (!db_entry) {
+		DP_NOTICE(p_dev, false, "Failed to allocate a db recovery entry\n");
+		return ECORE_NOMEM;
+	}
+
+	/* populate entry */
+	db_entry->db_addr = db_addr;
+	db_entry->db_data = db_data;
+	db_entry->db_width = db_width;
+	db_entry->db_space = db_space;
+	db_entry->hwfn_idx = p_hwfn->my_id;
+
+	/* display */
+	ecore_db_recovery_dp_entry(p_hwfn, db_entry, "Adding");
+
+	/* protect the list */
+	OSAL_SPIN_LOCK(&p_hwfn->db_recovery_info.lock);
+	OSAL_LIST_PUSH_TAIL(&db_entry->list_entry,
+			    &p_hwfn->db_recovery_info.list);
+	OSAL_SPIN_UNLOCK(&p_hwfn->db_recovery_info.lock);
+
+	return ECORE_SUCCESS;
+}
+
+/* remove an entry from the doorbell recovery mechanism */
+enum _ecore_status_t ecore_db_recovery_del(struct ecore_dev *p_dev,
+					   void OSAL_IOMEM *db_addr,
+					   void *db_data)
+{
+	struct ecore_db_recovery_entry *db_entry = OSAL_NULL;
+	enum _ecore_status_t rc = ECORE_INVAL;
+	struct ecore_hwfn *p_hwfn;
+
+	/* shortcircuit VFs, for now */
+	if (IS_VF(p_dev)) {
+		DP_VERBOSE(p_dev, ECORE_MSG_IOV, "db recovery - skipping VF doorbell\n");
+		return ECORE_SUCCESS;
+	}
+
+	/* sanitize doorbell address */
+	if (!ecore_db_rec_sanity(p_dev, db_addr, db_data))
+		return ECORE_INVAL;
+
+	/* obtain hwfn from doorbell address */
+	p_hwfn = ecore_db_rec_find_hwfn(p_dev, db_addr);
+
+	/* protect the list */
+	OSAL_SPIN_LOCK(&p_hwfn->db_recovery_info.lock);
+	OSAL_LIST_FOR_EACH_ENTRY(db_entry,
+				 &p_hwfn->db_recovery_info.list,
+				 list_entry,
+				 struct ecore_db_recovery_entry) {
+		/* search according to db_data addr since db_addr is not unique
+		 * (roce)
+		 */
+		if (db_entry->db_data == db_data) {
+			ecore_db_recovery_dp_entry(p_hwfn, db_entry,
+						   "Deleting");
+			OSAL_LIST_REMOVE_ENTRY(&db_entry->list_entry,
+					       &p_hwfn->db_recovery_info.list);
+			rc = ECORE_SUCCESS;
+			break;
+		}
+	}
+
+	OSAL_SPIN_UNLOCK(&p_hwfn->db_recovery_info.lock);
+
+	if (rc == ECORE_INVAL)
+		/*OSAL_WARN(true,*/
+		DP_NOTICE(p_hwfn, false,
+			  "Failed to find element in list. Key (db_data addr) was %p. db_addr was %p\n",
+			  db_data, db_addr);
+	else
+		OSAL_FREE(p_dev, db_entry);
+
+	return rc;
+}
+
+/* initialize the doorbell recovery mechanism */
+enum _ecore_status_t ecore_db_recovery_setup(struct ecore_hwfn *p_hwfn)
+{
+	DP_VERBOSE(p_hwfn, ECORE_MSG_SPQ, "Setting up db recovery\n");
+
+	/* make sure db_size was set in p_dev */
+	if (!p_hwfn->p_dev->db_size) {
+		DP_ERR(p_hwfn->p_dev, "db_size not set\n");
+		return ECORE_INVAL;
+	}
+
+	OSAL_LIST_INIT(&p_hwfn->db_recovery_info.list);
+#ifdef CONFIG_ECORE_LOCK_ALLOC
+	OSAL_SPIN_LOCK_ALLOC(p_hwfn, &p_hwfn->db_recovery_info.lock);
+#endif
+	OSAL_SPIN_LOCK_INIT(&p_hwfn->db_recovery_info.lock);
+	p_hwfn->db_recovery_info.db_recovery_counter = 0;
+
+	return ECORE_SUCCESS;
+}
+
+/* destroy the doorbell recovery mechanism */
+void ecore_db_recovery_teardown(struct ecore_hwfn *p_hwfn)
+{
+	struct ecore_db_recovery_entry *db_entry = OSAL_NULL;
+
+	DP_VERBOSE(p_hwfn, ECORE_MSG_SPQ, "Tearing down db recovery\n");
+	if (!OSAL_LIST_IS_EMPTY(&p_hwfn->db_recovery_info.list)) {
+		DP_VERBOSE(p_hwfn, false, "Doorbell Recovery teardown found the doorbell recovery list was not empty (Expected in disorderly driver unload (e.g. recovery) otherwise this probably means some flow forgot to db_recovery_del). Prepare to purge doorbell recovery list...\n");
+		while (!OSAL_LIST_IS_EMPTY(&p_hwfn->db_recovery_info.list)) {
+			db_entry = OSAL_LIST_FIRST_ENTRY(
+						&p_hwfn->db_recovery_info.list,
+						struct ecore_db_recovery_entry,
+						list_entry);
+			ecore_db_recovery_dp_entry(p_hwfn, db_entry, "Purging");
+			OSAL_LIST_REMOVE_ENTRY(&db_entry->list_entry,
+					       &p_hwfn->db_recovery_info.list);
+			OSAL_FREE(p_hwfn->p_dev, db_entry);
+		}
+	}
+#ifdef CONFIG_ECORE_LOCK_ALLOC
+	OSAL_SPIN_LOCK_DEALLOC(&p_hwfn->db_recovery_info.lock);
+#endif
+	p_hwfn->db_recovery_info.db_recovery_counter = 0;
+}
+
+/* print the content of the doorbell recovery mechanism */
+void ecore_db_recovery_dp(struct ecore_hwfn *p_hwfn)
+{
+	struct ecore_db_recovery_entry *db_entry = OSAL_NULL;
+
+	DP_NOTICE(p_hwfn, false,
+		  "Dispalying doorbell recovery database. Counter was %d\n",
+		  p_hwfn->db_recovery_info.db_recovery_counter);
+
+	/* protect the list */
+	OSAL_SPIN_LOCK(&p_hwfn->db_recovery_info.lock);
+	OSAL_LIST_FOR_EACH_ENTRY(db_entry,
+				 &p_hwfn->db_recovery_info.list,
+				 list_entry,
+				 struct ecore_db_recovery_entry) {
+		ecore_db_recovery_dp_entry(p_hwfn, db_entry, "Printing");
+	}
+
+	OSAL_SPIN_UNLOCK(&p_hwfn->db_recovery_info.lock);
+}
+
+/* ring the doorbell of a single doorbell recovery entry */
+void ecore_db_recovery_ring(struct ecore_hwfn *p_hwfn,
+			    struct ecore_db_recovery_entry *db_entry,
+			    enum ecore_db_rec_exec db_exec)
+{
+	/* Print according to width */
+	if (db_entry->db_width == DB_REC_WIDTH_32B)
+		DP_VERBOSE(p_hwfn, ECORE_MSG_SPQ, "%s doorbell address %p data %x\n",
+			   db_exec == DB_REC_DRY_RUN ? "would have rung" : "ringing",
+			   db_entry->db_addr, *(u32 *)db_entry->db_data);
+	else
+		DP_VERBOSE(p_hwfn, ECORE_MSG_SPQ, "%s doorbell address %p data %lx\n",
+			   db_exec == DB_REC_DRY_RUN ? "would have rung" : "ringing",
+			   db_entry->db_addr,
+			   *(unsigned long *)(db_entry->db_data));
+
+	/* Sanity */
+	if (!ecore_db_rec_sanity(p_hwfn->p_dev, db_entry->db_addr,
+				 db_entry->db_data))
+		return;
+
+	/* Flush the write combined buffer. Since there are multiple doorbelling
+	 * entities using the same address, if we don't flush, a transaction
+	 * could be lost.
+	 */
+	OSAL_WMB(p_hwfn->p_dev);
+
+	/* Ring the doorbell */
+	if (db_exec == DB_REC_REAL_DEAL || db_exec == DB_REC_ONCE) {
+		if (db_entry->db_width == DB_REC_WIDTH_32B)
+			DIRECT_REG_WR(p_hwfn, db_entry->db_addr,
+				      *(u32 *)(db_entry->db_data));
+		else
+			DIRECT_REG_WR64(p_hwfn, db_entry->db_addr,
+					*(u64 *)(db_entry->db_data));
+	}
+
+	/* Flush the write combined buffer. Next doorbell may come from a
+	 * different entity to the same address...
+	 */
+	OSAL_WMB(p_hwfn->p_dev);
+}
+
+/* traverse the doorbell recovery entry list and ring all the doorbells */
+void ecore_db_recovery_execute(struct ecore_hwfn *p_hwfn,
+			       enum ecore_db_rec_exec db_exec)
+{
+	struct ecore_db_recovery_entry *db_entry = OSAL_NULL;
+
+	if (db_exec != DB_REC_ONCE) {
+		DP_NOTICE(p_hwfn, false, "Executing doorbell recovery. Counter was %d\n",
+			  p_hwfn->db_recovery_info.db_recovery_counter);
+
+		/* track amount of times recovery was executed */
+		p_hwfn->db_recovery_info.db_recovery_counter++;
+	}
+
+	/* protect the list */
+	OSAL_SPIN_LOCK(&p_hwfn->db_recovery_info.lock);
+	OSAL_LIST_FOR_EACH_ENTRY(db_entry,
+				 &p_hwfn->db_recovery_info.list,
+				 list_entry,
+				 struct ecore_db_recovery_entry) {
+		ecore_db_recovery_ring(p_hwfn, db_entry, db_exec);
+		if (db_exec == DB_REC_ONCE)
+			break;
+	}
+
+	OSAL_SPIN_UNLOCK(&p_hwfn->db_recovery_info.lock);
+}
+/******************** Doorbell Recovery end ****************/
+
 /* Configurable */
 #define ECORE_MIN_DPIS		(4)	/* The minimal num of DPIs required to
 					 * load the driver. The number was
@@ -172,6 +484,9 @@ void ecore_resc_free(struct ecore_dev *p_dev)
 		ecore_dmae_info_free(p_hwfn);
 		ecore_dcbx_info_free(p_hwfn, p_hwfn->p_dcbx_info);
 		/* @@@TBD Flush work-queue ? */
+
+		/* destroy doorbell recovery mechanism */
+		ecore_db_recovery_teardown(p_hwfn);
 	}
 }
 
@@ -863,12 +1178,17 @@ enum _ecore_status_t ecore_resc_alloc(struct ecore_dev *p_dev)
 		struct ecore_hwfn *p_hwfn = &p_dev->hwfns[i];
 		u32 n_eqes, num_cons;
 
+		/* initialize the doorbell recovery mechanism */
+		rc = ecore_db_recovery_setup(p_hwfn);
+		if (rc)
+			goto alloc_err;
+
 		/* First allocate the context manager structure */
 		rc = ecore_cxt_mngr_alloc(p_hwfn);
 		if (rc)
 			goto alloc_err;
 
-		/* Set the HW cid/tid numbers (in the contest manager)
+		/* Set the HW cid/tid numbers (in the context manager)
 		 * Must be done prior to any further computations.
 		 */
 		rc = ecore_cxt_set_pf_params(p_hwfn);
diff --git a/drivers/net/qede/base/ecore_dev_api.h b/drivers/net/qede/base/ecore_dev_api.h
index b3c9f89..8b28af9 100644
--- a/drivers/net/qede/base/ecore_dev_api.h
+++ b/drivers/net/qede/base/ecore_dev_api.h
@@ -155,6 +155,45 @@ enum _ecore_status_t ecore_hw_init(struct ecore_dev *p_dev,
  *
  */
 void ecore_prepare_hibernate(struct ecore_dev *p_dev);
+
+enum ecore_db_rec_width {
+	DB_REC_WIDTH_32B,
+	DB_REC_WIDTH_64B,
+};
+
+enum ecore_db_rec_space {
+	DB_REC_KERNEL,
+	DB_REC_USER,
+};
+
+/**
+ * @brief db_recovery_add - add doorbell information to the doorbell
+ * recovery mechanism.
+ *
+ * @param p_dev
+ * @param db_addr - doorbell address
+ * @param db_data - address of where db_data is stored
+ * @param db_width - doorbell is 32b pr 64b
+ * @param db_space - doorbell recovery addresses are user or kernel space
+ */
+enum _ecore_status_t ecore_db_recovery_add(struct ecore_dev *p_dev,
+					   void OSAL_IOMEM *db_addr,
+					   void *db_data,
+					   enum ecore_db_rec_width db_width,
+					   enum ecore_db_rec_space db_space);
+
+/**
+ * @brief db_recovery_del - remove doorbell information from the doorbell
+ * recovery mechanism. db_data serves as key (db_addr is not unique).
+ *
+ * @param cdev
+ * @param db_addr - doorbell address
+ * @param db_data - address where db_data is stored. Serves as key for the
+ *                  entry to delete.
+ */
+enum _ecore_status_t ecore_db_recovery_del(struct ecore_dev *p_dev,
+					   void OSAL_IOMEM *db_addr,
+					   void *db_data);
 #endif
 
 /**
diff --git a/drivers/net/qede/base/ecore_int.c b/drivers/net/qede/base/ecore_int.c
index acf8759..d86f56e 100644
--- a/drivers/net/qede/base/ecore_int.c
+++ b/drivers/net/qede/base/ecore_int.c
@@ -414,31 +414,136 @@ static enum _ecore_status_t ecore_fw_assertion(struct ecore_hwfn *p_hwfn)
 	return ECORE_SUCCESS;
 }
 
-#define ECORE_DORQ_ATTENTION_REASON_MASK (0xfffff)
-#define ECORE_DORQ_ATTENTION_OPAQUE_MASK (0xffff)
-#define ECORE_DORQ_ATTENTION_SIZE_MASK	 (0x7f0000)
-#define ECORE_DORQ_ATTENTION_SIZE_SHIFT	 (16)
+#define ECORE_DORQ_ATTENTION_REASON_MASK	(0xfffff)
+#define ECORE_DORQ_ATTENTION_OPAQUE_MASK	(0xffff)
+#define ECORE_DORQ_ATTENTION_OPAQUE_SHIFT	(0x0)
+#define ECORE_DORQ_ATTENTION_SIZE_MASK		(0x7f)
+#define ECORE_DORQ_ATTENTION_SIZE_SHIFT		(16)
+
+#define ECORE_DB_REC_COUNT			10
+#define ECORE_DB_REC_INTERVAL			100
+
+/* assumes sticky overflow indication was set for this PF */
+static enum _ecore_status_t ecore_db_rec_attn(struct ecore_hwfn *p_hwfn,
+					      struct ecore_ptt *p_ptt)
+{
+	u8 count = ECORE_DB_REC_COUNT;
+	u32 usage = 1;
+
+	/* wait for usage to zero or count to run out. This is necessary since
+	 * EDPM doorbell transactions can take multiple 64b cycles, and as such
+	 * can "split" over the pci. Possibly, the doorbell drop can happen with
+	 * half an EDPM in the queue and other half dropped. Another EDPM
+	 * doorbell to the same address (from doorbell recovery mechanism or
+	 * from the doorbelling entity) could have first half dropped and second
+	 * half interperted as continuation of the first. To prevent such
+	 * malformed doorbells from reaching the device, flush the queue before
+	 * releaseing the overflow sticky indication.
+	 */
+	while (count-- && usage) {
+		usage = ecore_rd(p_hwfn, p_ptt, DORQ_REG_PF_USAGE_CNT);
+		OSAL_UDELAY(ECORE_DB_REC_INTERVAL);
+	}
+
+	/* should have been depleted by now */
+	if (usage) {
+		DP_NOTICE(p_hwfn->p_dev, false,
+			  "DB recovery: doorbell usage failed to zero after %d usec. usage was %x\n",
+			  ECORE_DB_REC_INTERVAL * ECORE_DB_REC_COUNT, usage);
+		return ECORE_TIMEOUT;
+	}
+
+	/* flush any pedning (e)dpm as they may never arrive */
+	ecore_wr(p_hwfn, p_ptt, DORQ_REG_DPM_FORCE_ABORT, 0x1);
+
+	/* release overflow sticky indication (stop silently dropping
+	 * everything)
+	 */
+	ecore_wr(p_hwfn, p_ptt, DORQ_REG_PF_OVFL_STICKY, 0x0);
+
+	/* repeat all last doorbells (doorbell drop recovery) */
+	ecore_db_recovery_execute(p_hwfn, DB_REC_REAL_DEAL);
+
+	return ECORE_SUCCESS;
+}
 
 static enum _ecore_status_t ecore_dorq_attn_cb(struct ecore_hwfn *p_hwfn)
 {
-	u32 reason;
+	u32 int_sts, first_drop_reason, details, address, overflow,
+		all_drops_reason;
+	struct ecore_ptt *p_ptt = p_hwfn->p_dpc_ptt;
+	enum _ecore_status_t rc;
 
-	reason = ecore_rd(p_hwfn, p_hwfn->p_dpc_ptt, DORQ_REG_DB_DROP_REASON) &
-	    ECORE_DORQ_ATTENTION_REASON_MASK;
-	if (reason) {
-		u32 details = ecore_rd(p_hwfn, p_hwfn->p_dpc_ptt,
-				       DORQ_REG_DB_DROP_DETAILS);
+	int_sts = ecore_rd(p_hwfn, p_ptt, DORQ_REG_INT_STS);
+	DP_NOTICE(p_hwfn->p_dev, false, "DORQ attention. int_sts was %x\n",
+		  int_sts);
 
-		DP_INFO(p_hwfn->p_dev,
-			"DORQ db_drop: address 0x%08x Opaque FID 0x%04x"
-			" Size [bytes] 0x%08x Reason: 0x%08x\n",
-			ecore_rd(p_hwfn, p_hwfn->p_dpc_ptt,
-				 DORQ_REG_DB_DROP_DETAILS_ADDRESS),
-			(u16)(details & ECORE_DORQ_ATTENTION_OPAQUE_MASK),
-			((details & ECORE_DORQ_ATTENTION_SIZE_MASK) >>
-			 ECORE_DORQ_ATTENTION_SIZE_SHIFT) * 4, reason);
+	/* int_sts may be zero since all PFs were interrupted for doorbell
+	 * overflow but another one already handled it. Can abort here. If
+	 * This PF also requires overflow recovery we will be interrupted again
+	 */
+	if (!int_sts)
+		return ECORE_SUCCESS;
+
+	/* check if db_drop or overflow happened */
+	if (int_sts & (DORQ_REG_INT_STS_DB_DROP |
+		       DORQ_REG_INT_STS_DORQ_FIFO_OVFL_ERR)) {
+		/* obtain data about db drop/overflow */
+		first_drop_reason = ecore_rd(p_hwfn, p_ptt,
+				  DORQ_REG_DB_DROP_REASON) &
+				  ECORE_DORQ_ATTENTION_REASON_MASK;
+		details = ecore_rd(p_hwfn, p_ptt,
+				   DORQ_REG_DB_DROP_DETAILS);
+		address = ecore_rd(p_hwfn, p_ptt,
+				   DORQ_REG_DB_DROP_DETAILS_ADDRESS);
+		overflow = ecore_rd(p_hwfn, p_ptt,
+				    DORQ_REG_PF_OVFL_STICKY);
+		all_drops_reason = ecore_rd(p_hwfn, p_ptt,
+					    DORQ_REG_DB_DROP_DETAILS_REASON);
+
+		/* log info */
+		DP_NOTICE(p_hwfn->p_dev, false,
+			  "Doorbell drop occurred\n"
+			  "Address\t\t0x%08x\t(second BAR address)\n"
+			  "FID\t\t0x%04x\t\t(Opaque FID)\n"
+			  "Size\t\t0x%04x\t\t(in bytes)\n"
+			  "1st drop reason\t0x%08x\t(details on first drop since last handling)\n"
+			  "Sticky reasons\t0x%08x\t(all drop reasons since last handling)\n"
+			  "Overflow\t0x%x\t\t(a per PF indication)\n",
+			  address,
+			  GET_FIELD(details, ECORE_DORQ_ATTENTION_OPAQUE),
+			  GET_FIELD(details, ECORE_DORQ_ATTENTION_SIZE) * 4,
+			  first_drop_reason, all_drops_reason, overflow);
+
+		/* if this PF caused overflow, initiate recovery */
+		if (overflow) {
+			rc = ecore_db_rec_attn(p_hwfn, p_ptt);
+			if (rc != ECORE_SUCCESS)
+				return rc;
+		}
+
+		/* clear the doorbell drop details and prepare for next drop */
+		ecore_wr(p_hwfn, p_ptt, DORQ_REG_DB_DROP_DETAILS_REL, 0);
+
+		/* mark interrupt as handeld (note: even if drop was due to a
+		 * different reason than overflow we mark as handled)
+		 */
+		ecore_wr(p_hwfn, p_ptt, DORQ_REG_INT_STS_WR,
+			 DORQ_REG_INT_STS_DB_DROP |
+			 DORQ_REG_INT_STS_DORQ_FIFO_OVFL_ERR);
+
+		/* if there are no indications otherthan drop indications,
+		 * success
+		 */
+		if ((int_sts & ~(DORQ_REG_INT_STS_DB_DROP |
+				 DORQ_REG_INT_STS_DORQ_FIFO_OVFL_ERR |
+				 DORQ_REG_INT_STS_DORQ_FIFO_AFULL)) == 0)
+			return ECORE_SUCCESS;
 	}
 
+	/* some other indication was present - non recoverable */
+	DP_INFO(p_hwfn, "DORQ fatal attention\n");
+
 	return ECORE_INVAL;
 }
 
diff --git a/drivers/net/qede/base/ecore_spq.c b/drivers/net/qede/base/ecore_spq.c
index 29ba660..716799a 100644
--- a/drivers/net/qede/base/ecore_spq.c
+++ b/drivers/net/qede/base/ecore_spq.c
@@ -231,9 +231,9 @@ static enum _ecore_status_t ecore_spq_hw_post(struct ecore_hwfn *p_hwfn,
 					      struct ecore_spq_entry *p_ent)
 {
 	struct ecore_chain *p_chain = &p_hwfn->p_spq->chain;
+	struct core_db_data *p_db_data = &p_spq->db_data;
 	u16 echo = ecore_chain_get_prod_idx(p_chain);
 	struct slow_path_element *elem;
-	struct core_db_data db;
 
 	p_ent->elem.hdr.echo = OSAL_CPU_TO_LE16(echo);
 	elem = ecore_chain_produce(p_chain);
@@ -242,31 +242,24 @@ static enum _ecore_status_t ecore_spq_hw_post(struct ecore_hwfn *p_hwfn,
 		return ECORE_INVAL;
 	}
 
-	*elem = p_ent->elem;	/* struct assignment */
+	*elem = p_ent->elem;	/* Struct assignment */
 
-	/* send a doorbell on the slow hwfn session */
-	OSAL_MEMSET(&db, 0, sizeof(db));
-	SET_FIELD(db.params, CORE_DB_DATA_DEST, DB_DEST_XCM);
-	SET_FIELD(db.params, CORE_DB_DATA_AGG_CMD, DB_AGG_CMD_SET);
-	SET_FIELD(db.params, CORE_DB_DATA_AGG_VAL_SEL,
-		  DQ_XCM_CORE_SPQ_PROD_CMD);
-	db.agg_flags = DQ_XCM_CORE_DQ_CF_CMD;
-	db.spq_prod = OSAL_CPU_TO_LE16(ecore_chain_get_prod_idx(p_chain));
+	p_db_data->spq_prod =
+		OSAL_CPU_TO_LE16(ecore_chain_get_prod_idx(p_chain));
 
-	/* make sure the SPQE is updated before the doorbell */
+	/* Make sure the SPQE is updated before the doorbell */
 	OSAL_WMB(p_hwfn->p_dev);
 
-	DOORBELL(p_hwfn, DB_ADDR(p_spq->cid, DQ_DEMS_LEGACY),
-		 *(u32 *)&db);
+	DOORBELL(p_hwfn, p_spq->db_addr_offset, *(u32 *)p_db_data);
 
-	/* make sure doorbell is rang */
+	/* Make sure doorbell is rang */
 	OSAL_WMB(p_hwfn->p_dev);
 
 	DP_VERBOSE(p_hwfn, ECORE_MSG_SPQ,
 		   "Doorbelled [0x%08x, CID 0x%08x] with Flags: %02x"
 		   " agg_params: %02x, prod: %04x\n",
-		   DB_ADDR(p_spq->cid, DQ_DEMS_LEGACY), p_spq->cid, db.params,
-		   db.agg_flags, ecore_chain_get_prod_idx(p_chain));
+		   p_spq->db_addr_offset, p_spq->cid, p_db_data->params,
+		   p_db_data->agg_flags, ecore_chain_get_prod_idx(p_chain));
 
 	return ECORE_SUCCESS;
 }
@@ -456,8 +449,11 @@ void ecore_spq_setup(struct ecore_hwfn *p_hwfn)
 {
 	struct ecore_spq *p_spq = p_hwfn->p_spq;
 	struct ecore_spq_entry *p_virt = OSAL_NULL;
+	struct core_db_data *p_db_data;
+	void OSAL_IOMEM *db_addr;
 	dma_addr_t p_phys = 0;
 	u32 i, capacity;
+	enum _ecore_status_t rc;
 
 	OSAL_LIST_INIT(&p_spq->pending);
 	OSAL_LIST_INIT(&p_spq->completion_pending);
@@ -495,6 +491,24 @@ void ecore_spq_setup(struct ecore_hwfn *p_hwfn)
 
 	/* reset the chain itself */
 	ecore_chain_reset(&p_spq->chain);
+
+	/* Initialize the address/data of the SPQ doorbell */
+	p_spq->db_addr_offset = DB_ADDR(p_spq->cid, DQ_DEMS_LEGACY);
+	p_db_data = &p_spq->db_data;
+	OSAL_MEM_ZERO(p_db_data, sizeof(*p_db_data));
+	SET_FIELD(p_db_data->params, CORE_DB_DATA_DEST, DB_DEST_XCM);
+	SET_FIELD(p_db_data->params, CORE_DB_DATA_AGG_CMD, DB_AGG_CMD_MAX);
+	SET_FIELD(p_db_data->params, CORE_DB_DATA_AGG_VAL_SEL,
+		  DQ_XCM_CORE_SPQ_PROD_CMD);
+	p_db_data->agg_flags = DQ_XCM_CORE_DQ_CF_CMD;
+
+	/* Register the SPQ doorbell with the doorbell recovery mechanism */
+	db_addr = (void *)((u8 *)p_hwfn->doorbells + p_spq->db_addr_offset);
+	rc = ecore_db_recovery_add(p_hwfn->p_dev, db_addr, &p_spq->db_data,
+				   DB_REC_WIDTH_32B, DB_REC_KERNEL);
+	if (rc != ECORE_SUCCESS)
+		DP_INFO(p_hwfn,
+			"Failed to register the SPQ doorbell with the doorbell recovery mechanism\n");
 }
 
 enum _ecore_status_t ecore_spq_alloc(struct ecore_hwfn *p_hwfn)
@@ -552,11 +566,16 @@ enum _ecore_status_t ecore_spq_alloc(struct ecore_hwfn *p_hwfn)
 void ecore_spq_free(struct ecore_hwfn *p_hwfn)
 {
 	struct ecore_spq *p_spq = p_hwfn->p_spq;
+	void OSAL_IOMEM *db_addr;
 	u32 capacity;
 
 	if (!p_spq)
 		return;
 
+	/* Delete the SPQ doorbell from the doorbell recovery mechanism */
+	db_addr = (void *)((u8 *)p_hwfn->doorbells + p_spq->db_addr_offset);
+	ecore_db_recovery_del(p_hwfn->p_dev, db_addr, &p_spq->db_data);
+
 	if (p_spq->p_virt) {
 		capacity = ecore_chain_get_capacity(&p_spq->chain);
 		OSAL_DMA_FREE_COHERENT(p_hwfn->p_dev,
diff --git a/drivers/net/qede/base/ecore_spq.h b/drivers/net/qede/base/ecore_spq.h
index e530f83..31d8a3e 100644
--- a/drivers/net/qede/base/ecore_spq.h
+++ b/drivers/net/qede/base/ecore_spq.h
@@ -124,6 +124,9 @@ struct ecore_spq {
 	u32				comp_count;
 
 	u32				cid;
+
+	u32				db_addr_offset;
+	struct core_db_data		db_data;
 };
 
 struct ecore_port;
diff --git a/drivers/net/qede/base/reg_addr.h b/drivers/net/qede/base/reg_addr.h
index 116fe78..9048581 100644
--- a/drivers/net/qede/base/reg_addr.h
+++ b/drivers/net/qede/base/reg_addr.h
@@ -1208,3 +1208,13 @@
 
 #define PSWRQ2_REG_WR_MBS0 0x240400UL
 #define PGLUE_B_REG_MASTER_WRITE_PAD_ENABLE 0x2aae30UL
+#define DORQ_REG_PF_USAGE_CNT 0x1009c0UL
+#define DORQ_REG_DPM_FORCE_ABORT 0x1009d8UL
+#define DORQ_REG_PF_OVFL_STICKY 0x1009d0UL
+#define DORQ_REG_INT_STS 0x100180UL
+  #define DORQ_REG_INT_STS_DB_DROP (0x1 << 1)
+  #define DORQ_REG_INT_STS_DORQ_FIFO_OVFL_ERR (0x1 << 2)
+  #define DORQ_REG_INT_STS_DORQ_FIFO_AFULL (0x1 << 3)
+#define DORQ_REG_DB_DROP_DETAILS_REL 0x100a28UL
+#define DORQ_REG_INT_STS_WR 0x100188UL
+#define DORQ_REG_DB_DROP_DETAILS_REASON 0x100a20UL
diff --git a/drivers/net/qede/qede_main.c b/drivers/net/qede/qede_main.c
index 71b3a39..e6d2351 100644
--- a/drivers/net/qede/qede_main.c
+++ b/drivers/net/qede/qede_main.c
@@ -36,6 +36,7 @@ static void qed_init_pci(struct ecore_dev *edev, struct rte_pci_device *pci_dev)
 {
 	edev->regview = pci_dev->mem_resource[0].addr;
 	edev->doorbells = pci_dev->mem_resource[2].addr;
+	edev->db_size = pci_dev->mem_resource[2].len;
 }
 
 static int
-- 
1.7.10.3



More information about the dev mailing list