[dpdk-dev] [PATCH 3/4] ixgbe: automatic link recovery on VF

Wenzhuo Lu wenzhuo.lu at intel.com
Wed May 4 23:10:45 CEST 2016


When the physical link is down and recover later,
the VF link cannot recover until the user stop and
start it manually.
This patch implements the automatic recovery of VF
port.
The automatic recovery bases on the link up/down
message received from PF. When VF receives the link
up/down message, it will replace the RX/TX and
operation functions with fake ones to stop RX/TX
and any future operation. Then reset the VF port.
After successfully resetting the port, recover the
RX/TX and operation functions.

Signed-off-by: Wenzhuo Lu <wenzhuo.lu at intel.com>
---
 doc/guides/rel_notes/release_16_07.rst |  5 ++
 drivers/net/ixgbe/ixgbe_ethdev.c       | 86 +++++++++++++++++++++++++++++++++-
 drivers/net/ixgbe/ixgbe_ethdev.h       | 14 ++++++
 drivers/net/ixgbe/ixgbe_rxtx.c         | 34 ++++++++++++++
 drivers/net/ixgbe/ixgbe_rxtx.h         |  2 +
 5 files changed, 140 insertions(+), 1 deletion(-)

diff --git a/doc/guides/rel_notes/release_16_07.rst b/doc/guides/rel_notes/release_16_07.rst
index 8d45915..d80f449 100644
--- a/doc/guides/rel_notes/release_16_07.rst
+++ b/doc/guides/rel_notes/release_16_07.rst
@@ -40,6 +40,11 @@ This section should contain new features added in this release. Sample format:
   VF. To handle this link up/down event, add the mailbox interruption
   support to receive the message.
 
+* **Added the support of automatic link recovery for ixgbe VF.**
+
+  When the physical link becomes down and recover later, VF will receive
+  the mailbox message for that. VF handles this message by resetting the
+  VF port. Then the VF link can recover automatically.
 
 Resolved Issues
 ---------------
diff --git a/drivers/net/ixgbe/ixgbe_ethdev.c b/drivers/net/ixgbe/ixgbe_ethdev.c
index 8e5f64f..f1f67f2 100644
--- a/drivers/net/ixgbe/ixgbe_ethdev.c
+++ b/drivers/net/ixgbe/ixgbe_ethdev.c
@@ -589,6 +589,8 @@ static const struct eth_dev_ops ixgbevf_eth_dev_ops = {
 	.rss_hash_conf_get    = ixgbe_dev_rss_hash_conf_get,
 };
 
+static const struct eth_dev_ops ixgbevf_eth_dev_ops_fake = {NULL};
+
 /* store statistics names and its offset in stats structure */
 struct rte_ixgbe_xstats_name_off {
 	char name[RTE_ETH_XSTATS_NAME_SIZE];
@@ -1322,12 +1324,15 @@ eth_ixgbevf_dev_init(struct rte_eth_dev *eth_dev)
 	struct ixgbe_hwstrip *hwstrip =
 		IXGBE_DEV_PRIVATE_TO_HWSTRIP_BITMAP(eth_dev->data->dev_private);
 	struct ether_addr *perm_addr = (struct ether_addr *) hw->mac.perm_addr;
+	struct ixgbe_adapter *adapter =
+		(struct ixgbe_adapter *)eth_dev->data->dev_private;
 
 	PMD_INIT_FUNC_TRACE();
 
 	eth_dev->dev_ops = &ixgbevf_eth_dev_ops;
 	eth_dev->rx_pkt_burst = &ixgbe_recv_pkts;
 	eth_dev->tx_pkt_burst = &ixgbe_xmit_pkts;
+	rte_spinlock_init(&adapter->vf_reset_lock);
 
 	/* for secondary processes, we don't initialise any further as primary
 	 * has already done this work. Only check we don't need a different
@@ -7152,14 +7157,93 @@ ixgbevf_dev_allmulticast_disable(struct rte_eth_dev *dev)
 static void ixgbevf_mbx_process(struct rte_eth_dev *dev)
 {
 	struct ixgbe_hw *hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
+	struct ixgbe_adapter *adapter =
+		(struct ixgbe_adapter *)dev->data->dev_private;
 	u32 in_msg = 0;
 
 	if (ixgbe_read_mbx(hw, &in_msg, 1, 0))
 		return;
 
 	/* PF reset VF event */
-	if (in_msg == IXGBE_PF_CONTROL_MSG)
+	if (in_msg == IXGBE_PF_CONTROL_MSG) {
 		_rte_eth_dev_callback_process(dev, RTE_ETH_EVENT_INTR_RESET);
+
+		/* Stop the ops and rx/tx */
+		if (dev->data->dev_started) {
+			PMD_DRV_LOG(DEBUG, "Link up/down event detected.");
+			dev->dev_ops = &ixgbevf_eth_dev_ops_fake;
+
+			adapter->rx_backup = dev->rx_pkt_burst;
+			adapter->tx_backup = dev->tx_pkt_burst;
+			dev->rx_pkt_burst = ixgbevf_recv_pkts_fake;
+			dev->tx_pkt_burst = ixgbevf_xmit_pkts_fake;
+		}
+	}
+}
+
+void
+ixgbevf_dev_link_up_down_handler(struct rte_eth_dev *dev)
+{
+	struct ixgbe_hw *hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
+	struct ixgbe_adapter *adapter =
+		(struct ixgbe_adapter *)dev->data->dev_private;
+	int diag;
+	uint32_t vteiam;
+
+	/* Only one working core need to performance VF reset */
+	if (rte_spinlock_trylock(&adapter->vf_reset_lock)) {
+		/**
+		 * When fake rec/xmit is replaced, working thread may is running
+		 * into real RX/TX func, so wait long enough to assume all
+		 * working thread exit. The assumption is it will spend less
+		 * than 100us for each execution of RX and TX func.
+		 */
+		rte_delay_us(100);
+
+		do {
+			dev->data->dev_started = 0;
+			ixgbevf_dev_stop(dev);
+			rte_delay_us(1000000);
+
+			diag = ixgbevf_dev_start(dev);
+			if (diag) {
+				PMD_INIT_LOG(ERR, "Ixgbe VF reset: "
+					     "Failed to start device.");
+				return;
+			}
+			dev->data->dev_started = 1;
+			ixgbevf_dev_stats_reset(dev);
+			if (dev->data->dev_conf.intr_conf.lsc == 0)
+			diag = ixgbe_dev_link_update(dev, 0);
+			if (diag) {
+				PMD_INIT_LOG(INFO, "Ixgbe VF reset: "
+					     "Failed to update link.");
+			}
+
+			/**
+			 * When the PF link is down, there has chance
+			 * that VF cannot operate its registers. Will
+			 * check if the registers is written
+			 * successfully. If not, repeat stop/start until
+			 * the PF link is up, in other words, until the
+			 * registers can be written.
+			 */
+			vteiam = IXGBE_READ_REG(hw, IXGBE_VTEIAM);
+		/* Reference ixgbevf_intr_enable when checking */
+		} while (vteiam != IXGBE_VF_IRQ_ENABLE_MASK);
+
+		dev->rx_pkt_burst = adapter->rx_backup;
+		dev->tx_pkt_burst = adapter->tx_backup;
+		dev->dev_ops = &ixgbevf_eth_dev_ops;
+
+		/**
+		 * Wait a while to ensure other working thread is running with
+		 * real rx/tx func. Can avoid other working thread runs into and
+		 * reset device again.
+		 */
+		rte_delay_us(100);
+		rte_spinlock_unlock(&adapter->vf_reset_lock);
+	}
 }
 
 static int
diff --git a/drivers/net/ixgbe/ixgbe_ethdev.h b/drivers/net/ixgbe/ixgbe_ethdev.h
index 4ff6338..daca27c 100644
--- a/drivers/net/ixgbe/ixgbe_ethdev.h
+++ b/drivers/net/ixgbe/ixgbe_ethdev.h
@@ -38,6 +38,7 @@
 #include "base/ixgbe_dcb_82598.h"
 #include "ixgbe_bypass.h"
 #include <rte_time.h>
+#include <rte_spinlock.h>
 
 /* need update link, bit flag */
 #define IXGBE_FLAG_NEED_LINK_UPDATE (uint32_t)(1 << 0)
@@ -289,6 +290,9 @@ struct ixgbe_adapter {
 	struct rte_timecounter      systime_tc;
 	struct rte_timecounter      rx_tstamp_tc;
 	struct rte_timecounter      tx_tstamp_tc;
+	eth_rx_burst_t              rx_backup;
+	eth_tx_burst_t              tx_backup;
+	rte_spinlock_t              vf_reset_lock;
 };
 
 #define IXGBE_DEV_PRIVATE_TO_HW(adapter)\
@@ -396,6 +400,14 @@ uint16_t ixgbe_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts,
 uint16_t ixgbe_xmit_pkts_simple(void *tx_queue, struct rte_mbuf **tx_pkts,
 		uint16_t nb_pkts);
 
+uint16_t ixgbevf_recv_pkts_fake(void *rx_queue,
+				struct rte_mbuf **rx_pkts,
+				uint16_t nb_pkts);
+
+uint16_t ixgbevf_xmit_pkts_fake(void *tx_queue,
+				struct rte_mbuf **tx_pkts,
+				uint16_t nb_pkts);
+
 int ixgbe_dev_rss_hash_update(struct rte_eth_dev *dev,
 			      struct rte_eth_rss_conf *rss_conf);
 
@@ -442,4 +454,6 @@ uint32_t ixgbe_convert_vm_rx_mask_to_val(uint16_t rx_mask, uint32_t orig_val);
 
 int ixgbe_fdir_ctrl_func(struct rte_eth_dev *dev,
 			enum rte_filter_op filter_op, void *arg);
+
+void ixgbevf_dev_link_up_down_handler(struct rte_eth_dev *dev);
 #endif /* _IXGBE_ETHDEV_H_ */
diff --git a/drivers/net/ixgbe/ixgbe_rxtx.c b/drivers/net/ixgbe/ixgbe_rxtx.c
index 9fb38a6..d99e1fe 100644
--- a/drivers/net/ixgbe/ixgbe_rxtx.c
+++ b/drivers/net/ixgbe/ixgbe_rxtx.c
@@ -2344,6 +2344,7 @@ ixgbe_dev_tx_queue_setup(struct rte_eth_dev *dev,
 		return -ENOMEM;
 	}
 
+	txq->dev = dev;
 	txq->nb_tx_desc = nb_desc;
 	txq->tx_rs_thresh = tx_rs_thresh;
 	txq->tx_free_thresh = tx_free_thresh;
@@ -2623,6 +2624,7 @@ ixgbe_dev_rx_queue_setup(struct rte_eth_dev *dev,
 				 RTE_CACHE_LINE_SIZE, socket_id);
 	if (rxq == NULL)
 		return -ENOMEM;
+	rxq->dev = dev;
 	rxq->mb_pool = mp;
 	rxq->nb_rx_desc = nb_desc;
 	rxq->rx_free_thresh = rx_conf->rx_free_thresh;
@@ -5245,3 +5247,35 @@ ixgbe_rxq_vec_setup(struct ixgbe_rx_queue __rte_unused *rxq)
 {
 	return -1;
 }
+
+/**
+ * A function for link up/down.
+ * Handle the link up/down event but not receiving.
+ */
+uint16_t
+ixgbevf_recv_pkts_fake(void *rx_queue,
+		       struct rte_mbuf __rte_unused **rx_pkts,
+		       uint16_t __rte_unused nb_pkts)
+{
+	struct ixgbe_rx_queue *rxq;
+
+	rxq = rx_queue;
+	ixgbevf_dev_link_up_down_handler(rxq->dev);
+	return 0;
+}
+
+/**
+ * A function for link up/down.
+ * Handle the link up/down event but not transmitting.
+ */
+uint16_t
+ixgbevf_xmit_pkts_fake(void *tx_queue,
+		       struct rte_mbuf __rte_unused **tx_pkts,
+		       uint16_t __rte_unused nb_pkts)
+{
+	struct ixgbe_tx_queue *txq;
+
+	txq = tx_queue;
+	ixgbevf_dev_link_up_down_handler(txq->dev);
+	return 0;
+}
diff --git a/drivers/net/ixgbe/ixgbe_rxtx.h b/drivers/net/ixgbe/ixgbe_rxtx.h
index 3691a19..50971e1 100644
--- a/drivers/net/ixgbe/ixgbe_rxtx.h
+++ b/drivers/net/ixgbe/ixgbe_rxtx.h
@@ -125,6 +125,7 @@ struct ixgbe_rx_queue {
 	struct ixgbe_scattered_rx_entry *sw_sc_ring; /**< address of scattered Rx software ring. */
 	struct rte_mbuf *pkt_first_seg; /**< First segment of current packet. */
 	struct rte_mbuf *pkt_last_seg; /**< Last segment of current packet. */
+	struct rte_eth_dev *dev; /**< device this queue belongs to. */
 	uint64_t            mbuf_initializer; /**< value to init mbufs */
 	uint16_t            nb_rx_desc; /**< number of RX descriptors. */
 	uint16_t            rx_tail;  /**< current value of RDT register. */
@@ -212,6 +213,7 @@ struct ixgbe_tx_queue {
 		struct ixgbe_tx_entry_v *sw_ring_v; /**< address of SW ring for vector PMD */
 	};
 	volatile uint32_t   *tdt_reg_addr; /**< Address of TDT register. */
+	struct rte_eth_dev  *dev; /**< device this queue belongs to. */
 	uint16_t            nb_tx_desc;    /**< number of TX descriptors. */
 	uint16_t            tx_tail;       /**< current value of TDT reg. */
 	/**< Start freeing TX buffers if there are less free descriptors than
-- 
1.9.3



More information about the dev mailing list