[dpdk-dev] [PATCH v3 6/8] igb: implement device reset on VF
Zhe Tao
zhe.tao at intel.com
Tue Jun 7 08:12:46 CEST 2016
Implement the device reset function.
1, Add the fake RX/TX functions.
2, The reset function tries to stop RX/TX by replacing
the RX/TX functions with the fake ones and getting the
locks to make sure the regular RX/TX finished.
3, After the RX/TX stopped, reset the VF port, and then
release the locks and restore the RX/TX functions.
BTW: The definition of some structures are moved from .c
file to .h file.
Signed-off-by: Wenzhuo Lu <wenzhuo.lu at intel.com>
---
doc/guides/rel_notes/release_16_07.rst | 2 +-
drivers/net/e1000/e1000_ethdev.h | 116 ++++++++++++++++++++++++++++++
drivers/net/e1000/igb_ethdev.c | 104 +++++++++++++++++++++++++++
drivers/net/e1000/igb_rxtx.c | 128 ++++++---------------------------
4 files changed, 243 insertions(+), 107 deletions(-)
diff --git a/doc/guides/rel_notes/release_16_07.rst b/doc/guides/rel_notes/release_16_07.rst
index d36c4b1..a4c0cc3 100644
--- a/doc/guides/rel_notes/release_16_07.rst
+++ b/doc/guides/rel_notes/release_16_07.rst
@@ -53,7 +53,7 @@ New Features
VF. To handle this link up/down event, add the mailbox interruption
support to receive the message.
-* **Added device reset support for ixgbe VF.**
+* **Added device reset support for ixgbe/igb VF.**
Added the device reset API. APP can call this API to reset the VF port
when it's not working.
diff --git a/drivers/net/e1000/e1000_ethdev.h b/drivers/net/e1000/e1000_ethdev.h
index 6a42994..4ae03ce 100644
--- a/drivers/net/e1000/e1000_ethdev.h
+++ b/drivers/net/e1000/e1000_ethdev.h
@@ -34,6 +34,7 @@
#ifndef _E1000_ETHDEV_H_
#define _E1000_ETHDEV_H_
#include <rte_time.h>
+#include <rte_spinlock.h>
/* need update link, bit flag */
#define E1000_FLAG_NEED_LINK_UPDATE (uint32_t)(1 << 0)
@@ -261,6 +262,113 @@ struct e1000_adapter {
struct rte_timecounter systime_tc;
struct rte_timecounter rx_tstamp_tc;
struct rte_timecounter tx_tstamp_tc;
+ eth_rx_burst_t rx_backup;
+ eth_tx_burst_t tx_backup;
+};
+
+/**
+ * Structure associated with each descriptor of the RX ring of a RX queue.
+ */
+struct igb_rx_entry {
+ struct rte_mbuf *mbuf; /**< mbuf associated with RX descriptor. */
+};
+
+/**
+ * Structure associated with each descriptor of the TX ring of a TX queue.
+ */
+struct igb_tx_entry {
+ struct rte_mbuf *mbuf; /**< mbuf associated with TX desc, if any. */
+ uint16_t next_id; /**< Index of next descriptor in ring. */
+ uint16_t last_id; /**< Index of last scattered descriptor. */
+};
+
+/**
+ * Hardware context number
+ */
+enum igb_advctx_num {
+ IGB_CTX_0 = 0, /**< CTX0 */
+ IGB_CTX_1 = 1, /**< CTX1 */
+ IGB_CTX_NUM = 2, /**< CTX_NUM */
+};
+
+/** Offload features */
+union igb_tx_offload {
+ uint64_t data;
+ struct {
+ uint64_t l3_len:9; /**< L3 (IP) Header Length. */
+ uint64_t l2_len:7; /**< L2 (MAC) Header Length. */
+ uint64_t vlan_tci:16; /**< VLAN Tag Control Identifier(CPU order). */
+ uint64_t l4_len:8; /**< L4 (TCP/UDP) Header Length. */
+ uint64_t tso_segsz:16; /**< TCP TSO segment size. */
+
+ /* uint64_t unused:8; */
+ };
+};
+
+/**
+ * Strucutre to check if new context need be built
+ */
+struct igb_advctx_info {
+ uint64_t flags; /**< ol_flags related to context build. */
+ /** tx offload: vlan, tso, l2-l3-l4 lengths. */
+ union igb_tx_offload tx_offload;
+ /** compare mask for tx offload. */
+ union igb_tx_offload tx_offload_mask;
+};
+
+/**
+ * Structure associated with each RX queue.
+ */
+struct igb_rx_queue {
+ struct rte_mempool *mb_pool; /**< mbuf pool to populate RX ring. */
+ volatile union e1000_adv_rx_desc *rx_ring; /**< RX ring virtual address. */
+ uint64_t rx_ring_phys_addr; /**< RX ring DMA address. */
+ volatile uint32_t *rdt_reg_addr; /**< RDT register address. */
+ volatile uint32_t *rdh_reg_addr; /**< RDH register address. */
+ struct igb_rx_entry *sw_ring; /**< address of RX software ring. */
+ struct rte_mbuf *pkt_first_seg; /**< First segment of current packet. */
+ struct rte_mbuf *pkt_last_seg; /**< Last segment of current packet. */
+ rte_spinlock_t rx_lock; /**< Lock for packet receiption. */
+ uint16_t nb_rx_desc; /**< number of RX descriptors. */
+ uint16_t rx_tail; /**< current value of RDT register. */
+ uint16_t nb_rx_hold; /**< number of held free RX desc. */
+ uint16_t rx_free_thresh; /**< max free RX desc to hold. */
+ uint16_t queue_id; /**< RX queue index. */
+ uint16_t reg_idx; /**< RX queue register index. */
+ uint8_t port_id; /**< Device port identifier. */
+ uint8_t pthresh; /**< Prefetch threshold register. */
+ uint8_t hthresh; /**< Host threshold register. */
+ uint8_t wthresh; /**< Write-back threshold register. */
+ uint8_t crc_len; /**< 0 if CRC stripped, 4 otherwise. */
+ uint8_t drop_en; /**< If not 0, set SRRCTL.Drop_En. */
+};
+
+/**
+ * Structure associated with each TX queue.
+ */
+struct igb_tx_queue {
+ volatile union e1000_adv_tx_desc *tx_ring; /**< TX ring address */
+ uint64_t tx_ring_phys_addr; /**< TX ring DMA address. */
+ struct igb_tx_entry *sw_ring; /**< virtual address of SW ring. */
+ volatile uint32_t *tdt_reg_addr; /**< Address of TDT register. */
+ rte_spinlock_t tx_lock; /**< Lock for packet transmission. */
+ uint32_t txd_type; /**< Device-specific TXD type */
+ uint16_t nb_tx_desc; /**< number of TX descriptors. */
+ uint16_t tx_tail; /**< Current value of TDT register. */
+ uint16_t tx_head;
+ /**< Index of first used TX descriptor. */
+ uint16_t queue_id; /**< TX queue index. */
+ uint16_t reg_idx; /**< TX queue register index. */
+ uint8_t port_id; /**< Device port identifier. */
+ uint8_t pthresh; /**< Prefetch threshold register. */
+ uint8_t hthresh; /**< Host threshold register. */
+ uint8_t wthresh; /**< Write-back threshold register. */
+ uint32_t ctx_curr;
+ /**< Current used hardware descriptor. */
+ uint32_t ctx_start;
+ /**< Start context position for transmit queue. */
+ struct igb_advctx_info ctx_cache[IGB_CTX_NUM];
+ /**< Hardware context history.*/
};
#define E1000_DEV_PRIVATE(adapter) \
@@ -316,6 +424,14 @@ uint16_t eth_igb_xmit_pkts(void *txq, struct rte_mbuf **tx_pkts,
uint16_t eth_igb_recv_pkts(void *rxq, struct rte_mbuf **rx_pkts,
uint16_t nb_pkts);
+uint16_t eth_igbvf_xmit_pkts_fake(void *txq,
+ struct rte_mbuf **tx_pkts,
+ uint16_t nb_pkts);
+
+uint16_t eth_igbvf_recv_pkts_fake(void *rxq,
+ struct rte_mbuf **rx_pkts,
+ uint16_t nb_pkts);
+
uint16_t eth_igb_recv_scattered_pkts(void *rxq,
struct rte_mbuf **rx_pkts, uint16_t nb_pkts);
diff --git a/drivers/net/e1000/igb_ethdev.c b/drivers/net/e1000/igb_ethdev.c
index 8aad741..4b78a25 100644
--- a/drivers/net/e1000/igb_ethdev.c
+++ b/drivers/net/e1000/igb_ethdev.c
@@ -268,6 +268,7 @@ static void eth_igb_configure_msix_intr(struct rte_eth_dev *dev);
static void eth_igbvf_interrupt_handler(struct rte_intr_handle *handle,
void *param);
static void igbvf_mbx_process(struct rte_eth_dev *dev);
+static int igbvf_dev_reset(struct rte_eth_dev *dev);
/*
* Define VF Stats MACRO for Non "cleared on read" register
@@ -409,6 +410,7 @@ static const struct eth_dev_ops igbvf_eth_dev_ops = {
.mac_addr_set = igbvf_default_mac_addr_set,
.get_reg_length = igbvf_get_reg_length,
.get_reg = igbvf_get_regs,
+ .dev_reset = igbvf_dev_reset,
};
/* store statistics names and its offset in stats structure */
@@ -2663,6 +2665,108 @@ void igbvf_mbx_process(struct rte_eth_dev *dev)
}
static int
+igbvf_dev_reset(struct rte_eth_dev *dev)
+{
+ struct e1000_adapter *adapter =
+ (struct e1000_adapter *)dev->data->dev_private;
+ struct e1000_hw *hw =
+ E1000_DEV_PRIVATE_TO_HW(dev->data->dev_private);
+ int diag = 0;
+ uint32_t eiam;
+ uint16_t i;
+ struct igb_rx_queue *rxq;
+ struct igb_tx_queue *txq;
+ /* Reference igbvf_intr_enable */
+ uint32_t eiam_mbx = 1 << E1000_VTIVAR_MISC_MAILBOX;
+
+ /* Nothing needs to be done if the device is not started. */
+ if (!dev->data->dev_started)
+ return 0;
+
+ PMD_DRV_LOG(DEBUG, "Link up/down event detected.");
+
+ /**
+ * Stop RX/TX by fake functions and locks.
+ * Fake functions are used to make RX/TX lock easier.
+ */
+ adapter->rx_backup = dev->rx_pkt_burst;
+ adapter->tx_backup = dev->tx_pkt_burst;
+ dev->rx_pkt_burst = eth_igbvf_recv_pkts_fake;
+ dev->tx_pkt_burst = eth_igbvf_xmit_pkts_fake;
+
+ if (dev->data->rx_queues)
+ for (i = 0; i < dev->data->nb_rx_queues; i++) {
+ rxq = dev->data->rx_queues[i];
+ rte_spinlock_lock(&rxq->rx_lock);
+ }
+
+ if (dev->data->tx_queues)
+ for (i = 0; i < dev->data->nb_tx_queues; i++) {
+ txq = dev->data->tx_queues[i];
+ rte_spinlock_lock(&txq->tx_lock);
+ }
+
+ /* Performance VF reset. */
+ do {
+ dev->data->dev_started = 0;
+ igbvf_dev_stop(dev);
+ if (dev->data->dev_conf.intr_conf.lsc == 0)
+ diag = eth_igb_link_update(dev, 0);
+ if (diag) {
+ PMD_INIT_LOG(INFO, "Igb VF reset: "
+ "Failed to update link.");
+ }
+ rte_delay_ms(1000);
+
+ diag = igbvf_dev_start(dev);
+ if (diag) {
+ PMD_INIT_LOG(ERR, "Igb VF reset: "
+ "Failed to start device.");
+ return diag;
+ }
+ dev->data->dev_started = 1;
+ eth_igbvf_stats_reset(dev);
+ if (dev->data->dev_conf.intr_conf.lsc == 0)
+ diag = eth_igb_link_update(dev, 0);
+ if (diag) {
+ PMD_INIT_LOG(INFO, "Igb VF reset: "
+ "Failed to update link.");
+ }
+
+ /**
+ * When the PF link is down, there has chance
+ * that VF cannot operate its registers. Will
+ * check if the registers is written
+ * successfully. If not, repeat stop/start until
+ * the PF link is up, in other words, until the
+ * registers can be written.
+ */
+ eiam = E1000_READ_REG(hw, E1000_EIAM);
+ } while (!(eiam & eiam_mbx));
+
+ /**
+ * Release the locks for queues.
+ * Restore the RX/TX functions.
+ */
+ if (dev->data->rx_queues)
+ for (i = 0; i < dev->data->nb_rx_queues; i++) {
+ rxq = dev->data->rx_queues[i];
+ rte_spinlock_unlock(&rxq->rx_lock);
+ }
+
+ if (dev->data->tx_queues)
+ for (i = 0; i < dev->data->nb_tx_queues; i++) {
+ txq = dev->data->tx_queues[i];
+ rte_spinlock_unlock(&txq->tx_lock);
+ }
+
+ dev->rx_pkt_burst = adapter->rx_backup;
+ dev->tx_pkt_burst = adapter->tx_backup;
+
+ return 0;
+}
+
+static int
eth_igbvf_interrupt_action(struct rte_eth_dev *dev)
{
struct e1000_interrupt *intr =
diff --git a/drivers/net/e1000/igb_rxtx.c b/drivers/net/e1000/igb_rxtx.c
index 7e97330..5af7173 100644
--- a/drivers/net/e1000/igb_rxtx.c
+++ b/drivers/net/e1000/igb_rxtx.c
@@ -67,7 +67,6 @@
#include <rte_tcp.h>
#include <rte_sctp.h>
#include <rte_string_fns.h>
-#include <rte_spinlock.h>
#include "e1000_logs.h"
#include "base/e1000_api.h"
@@ -80,72 +79,6 @@
PKT_TX_L4_MASK | \
PKT_TX_TCP_SEG)
-/**
- * Structure associated with each descriptor of the RX ring of a RX queue.
- */
-struct igb_rx_entry {
- struct rte_mbuf *mbuf; /**< mbuf associated with RX descriptor. */
-};
-
-/**
- * Structure associated with each descriptor of the TX ring of a TX queue.
- */
-struct igb_tx_entry {
- struct rte_mbuf *mbuf; /**< mbuf associated with TX desc, if any. */
- uint16_t next_id; /**< Index of next descriptor in ring. */
- uint16_t last_id; /**< Index of last scattered descriptor. */
-};
-
-/**
- * Structure associated with each RX queue.
- */
-struct igb_rx_queue {
- struct rte_mempool *mb_pool; /**< mbuf pool to populate RX ring. */
- volatile union e1000_adv_rx_desc *rx_ring; /**< RX ring virtual address. */
- uint64_t rx_ring_phys_addr; /**< RX ring DMA address. */
- volatile uint32_t *rdt_reg_addr; /**< RDT register address. */
- volatile uint32_t *rdh_reg_addr; /**< RDH register address. */
- struct igb_rx_entry *sw_ring; /**< address of RX software ring. */
- struct rte_mbuf *pkt_first_seg; /**< First segment of current packet. */
- struct rte_mbuf *pkt_last_seg; /**< Last segment of current packet. */
- rte_spinlock_t rx_lock; /**< Lock for packet receiption. */
- uint16_t nb_rx_desc; /**< number of RX descriptors. */
- uint16_t rx_tail; /**< current value of RDT register. */
- uint16_t nb_rx_hold; /**< number of held free RX desc. */
- uint16_t rx_free_thresh; /**< max free RX desc to hold. */
- uint16_t queue_id; /**< RX queue index. */
- uint16_t reg_idx; /**< RX queue register index. */
- uint8_t port_id; /**< Device port identifier. */
- uint8_t pthresh; /**< Prefetch threshold register. */
- uint8_t hthresh; /**< Host threshold register. */
- uint8_t wthresh; /**< Write-back threshold register. */
- uint8_t crc_len; /**< 0 if CRC stripped, 4 otherwise. */
- uint8_t drop_en; /**< If not 0, set SRRCTL.Drop_En. */
-};
-
-/**
- * Hardware context number
- */
-enum igb_advctx_num {
- IGB_CTX_0 = 0, /**< CTX0 */
- IGB_CTX_1 = 1, /**< CTX1 */
- IGB_CTX_NUM = 2, /**< CTX_NUM */
-};
-
-/** Offload features */
-union igb_tx_offload {
- uint64_t data;
- struct {
- uint64_t l3_len:9; /**< L3 (IP) Header Length. */
- uint64_t l2_len:7; /**< L2 (MAC) Header Length. */
- uint64_t vlan_tci:16; /**< VLAN Tag Control Identifier(CPU order). */
- uint64_t l4_len:8; /**< L4 (TCP/UDP) Header Length. */
- uint64_t tso_segsz:16; /**< TCP TSO segment size. */
-
- /* uint64_t unused:8; */
- };
-};
-
/*
* Compare mask for igb_tx_offload.data,
* should be in sync with igb_tx_offload layout.
@@ -158,45 +91,6 @@ union igb_tx_offload {
#define TX_TSO_CMP_MASK \
(TX_MACIP_LEN_CMP_MASK | TX_TCP_LEN_CMP_MASK | TX_TSO_MSS_CMP_MASK)
-/**
- * Strucutre to check if new context need be built
- */
-struct igb_advctx_info {
- uint64_t flags; /**< ol_flags related to context build. */
- /** tx offload: vlan, tso, l2-l3-l4 lengths. */
- union igb_tx_offload tx_offload;
- /** compare mask for tx offload. */
- union igb_tx_offload tx_offload_mask;
-};
-
-/**
- * Structure associated with each TX queue.
- */
-struct igb_tx_queue {
- volatile union e1000_adv_tx_desc *tx_ring; /**< TX ring address */
- uint64_t tx_ring_phys_addr; /**< TX ring DMA address. */
- struct igb_tx_entry *sw_ring; /**< virtual address of SW ring. */
- rte_spinlock_t tx_lock; /**< Lock for packet transmission. */
- volatile uint32_t *tdt_reg_addr; /**< Address of TDT register. */
- uint32_t txd_type; /**< Device-specific TXD type */
- uint16_t nb_tx_desc; /**< number of TX descriptors. */
- uint16_t tx_tail; /**< Current value of TDT register. */
- uint16_t tx_head;
- /**< Index of first used TX descriptor. */
- uint16_t queue_id; /**< TX queue index. */
- uint16_t reg_idx; /**< TX queue register index. */
- uint8_t port_id; /**< Device port identifier. */
- uint8_t pthresh; /**< Prefetch threshold register. */
- uint8_t hthresh; /**< Host threshold register. */
- uint8_t wthresh; /**< Write-back threshold register. */
- uint32_t ctx_curr;
- /**< Current used hardware descriptor. */
- uint32_t ctx_start;
- /**< Start context position for transmit queue. */
- struct igb_advctx_info ctx_cache[IGB_CTX_NUM];
- /**< Hardware context history.*/
-};
-
#if 1
#define RTE_PMD_USE_PREFETCH
#endif
@@ -2530,3 +2424,25 @@ igb_txq_info_get(struct rte_eth_dev *dev, uint16_t queue_id,
qinfo->conf.tx_thresh.hthresh = txq->hthresh;
qinfo->conf.tx_thresh.wthresh = txq->wthresh;
}
+
+/**
+ * A fake function to stop transmission.
+ */
+uint16_t
+eth_igbvf_xmit_pkts_fake(void __rte_unused *tx_queue,
+ struct rte_mbuf __rte_unused **tx_pkts,
+ uint16_t __rte_unused nb_pkts)
+{
+ return 0;
+}
+
+/**
+ * A fake function to stop receiption.
+ */
+uint16_t
+eth_igbvf_recv_pkts_fake(void __rte_unused *rx_queue,
+ struct rte_mbuf __rte_unused **rx_pkts,
+ uint16_t __rte_unused nb_pkts)
+{
+ return 0;
+}
--
2.1.4
More information about the dev
mailing list