[dpdk-dev] [PATCH v7 09/10] igb: enable rx queue interrupts for PF
Cunming Liang
cunming.liang at intel.com
Tue May 5 07:39:45 CEST 2015
The patch does below for igb PF:
- Setup NIC to generate MSI-X interrupts
- Set the IVAR register to map interrupt causes to vectors
- Implement interrupt enable/disable functions
Signed-off-by: Danny Zhou <danny.zhou at intel.com>
Signed-off-by: Cunming Liang <cunming.liang at intel.com>
---
v7 changes
- add condition check when intr vector is not enabled
v6 changes
- fill queue-vector mapping table
v5 changes
- Rebase the patchset onto the HEAD
v3 changes
- Remove unnecessary variables in e1000_mac_info
- Remove spinlok from PMD
v2 changes
- Consolidate review comments related to coding style
lib/librte_pmd_e1000/e1000_ethdev.h | 3 +
lib/librte_pmd_e1000/igb_ethdev.c | 256 ++++++++++++++++++++++++++++++++----
2 files changed, 234 insertions(+), 25 deletions(-)
diff --git a/lib/librte_pmd_e1000/e1000_ethdev.h b/lib/librte_pmd_e1000/e1000_ethdev.h
index c451faa..13c4cad 100644
--- a/lib/librte_pmd_e1000/e1000_ethdev.h
+++ b/lib/librte_pmd_e1000/e1000_ethdev.h
@@ -108,6 +108,9 @@
ETH_RSS_IPV6_TCP_EX | \
ETH_RSS_IPV6_UDP_EX)
+/* maximum number of other interrupts besides Rx & Tx interrupts */
+#define E1000_MAX_OTHER_INTR 1
+
/* structure for interrupt relative data */
struct e1000_interrupt {
uint32_t flags;
diff --git a/lib/librte_pmd_e1000/igb_ethdev.c b/lib/librte_pmd_e1000/igb_ethdev.c
index 4415155..d7ec696 100644
--- a/lib/librte_pmd_e1000/igb_ethdev.c
+++ b/lib/librte_pmd_e1000/igb_ethdev.c
@@ -96,6 +96,7 @@ static int eth_igb_flow_ctrl_get(struct rte_eth_dev *dev,
static int eth_igb_flow_ctrl_set(struct rte_eth_dev *dev,
struct rte_eth_fc_conf *fc_conf);
static int eth_igb_lsc_interrupt_setup(struct rte_eth_dev *dev);
+static int eth_igb_rxq_interrupt_setup(struct rte_eth_dev *dev);
static int eth_igb_interrupt_get_status(struct rte_eth_dev *dev);
static int eth_igb_interrupt_action(struct rte_eth_dev *dev);
static void eth_igb_interrupt_handler(struct rte_intr_handle *handle,
@@ -194,6 +195,16 @@ static int eth_igb_filter_ctrl(struct rte_eth_dev *dev,
enum rte_filter_op filter_op,
void *arg);
+static int eth_igb_rx_queue_intr_enable(struct rte_eth_dev *dev,
+ uint16_t queue_id);
+static int eth_igb_rx_queue_intr_disable(struct rte_eth_dev *dev,
+ uint16_t queue_id);
+static void eth_igb_assign_msix_vector(struct e1000_hw *hw, int8_t direction,
+ uint8_t queue, uint8_t msix_vector);
+static void eth_igb_configure_msix_intr(struct rte_eth_dev *dev);
+static void eth_igb_write_ivar(struct e1000_hw *hw, uint8_t msix_vector,
+ uint8_t index, uint8_t offset);
+
/*
* Define VF Stats MACRO for Non "cleared on read" register
*/
@@ -253,6 +264,8 @@ static const struct eth_dev_ops eth_igb_ops = {
.vlan_tpid_set = eth_igb_vlan_tpid_set,
.vlan_offload_set = eth_igb_vlan_offload_set,
.rx_queue_setup = eth_igb_rx_queue_setup,
+ .rx_queue_intr_enable = eth_igb_rx_queue_intr_enable,
+ .rx_queue_intr_disable = eth_igb_rx_queue_intr_disable,
.rx_queue_release = eth_igb_rx_queue_release,
.rx_queue_count = eth_igb_rx_queue_count,
.rx_descriptor_done = eth_igb_rx_descriptor_done,
@@ -463,6 +476,7 @@ eth_igb_dev_init(struct rte_eth_dev *eth_dev)
struct e1000_filter_info *filter_info =
E1000_DEV_PRIVATE_TO_FILTER_INFO(eth_dev->data->dev_private);
uint32_t ctrl_ext;
+ struct rte_eth_dev_info dev_info;
pci_dev = eth_dev->pci_dev;
eth_dev->dev_ops = ð_igb_ops;
@@ -584,6 +598,23 @@ eth_igb_dev_init(struct rte_eth_dev *eth_dev)
eth_dev->data->port_id, pci_dev->id.vendor_id,
pci_dev->id.device_id);
+ /* set max interrupt vfio request */
+ memset(&dev_info, 0, sizeof(dev_info));
+ eth_igb_infos_get(eth_dev, &dev_info);
+
+ if (pci_dev->intr_handle.vec_en) {
+ pci_dev->intr_handle.max_intr = dev_info.max_rx_queues +
+ E1000_MAX_OTHER_INTR;
+ pci_dev->intr_handle.intr_vec =
+ rte_zmalloc("intr_vec",
+ dev_info.max_rx_queues * sizeof(int), 0);
+ if (pci_dev->intr_handle.intr_vec == NULL) {
+ PMD_INIT_LOG(ERR, "Failed to allocate %d rx_queues"
+ " intr_vec\n", dev_info.max_rx_queues);
+ return -ENOMEM;
+ }
+ }
+
rte_intr_callback_register(&(pci_dev->intr_handle),
eth_igb_interrupt_handler, (void *)eth_dev);
@@ -752,7 +783,7 @@ eth_igb_start(struct rte_eth_dev *dev)
{
struct e1000_hw *hw =
E1000_DEV_PRIVATE_TO_HW(dev->data->dev_private);
- int ret, i, mask;
+ int ret, mask;
uint32_t ctrl_ext;
PMD_INIT_FUNC_TRACE();
@@ -792,6 +823,9 @@ eth_igb_start(struct rte_eth_dev *dev)
/* configure PF module if SRIOV enabled */
igb_pf_host_configure(dev);
+ /* confiugre msix for rx interrupt */
+ eth_igb_configure_msix_intr(dev);
+
/* Configure for OS presence */
igb_init_manageability(hw);
@@ -819,33 +853,9 @@ eth_igb_start(struct rte_eth_dev *dev)
igb_vmdq_vlan_hw_filter_enable(dev);
}
- /*
- * Configure the Interrupt Moderation register (EITR) with the maximum
- * possible value (0xFFFF) to minimize "System Partial Write" issued by
- * spurious [DMA] memory updates of RX and TX ring descriptors.
- *
- * With a EITR granularity of 2 microseconds in the 82576, only 7/8
- * spurious memory updates per second should be expected.
- * ((65535 * 2) / 1000.1000 ~= 0.131 second).
- *
- * Because interrupts are not used at all, the MSI-X is not activated
- * and interrupt moderation is controlled by EITR[0].
- *
- * Note that having [almost] disabled memory updates of RX and TX ring
- * descriptors through the Interrupt Moderation mechanism, memory
- * updates of ring descriptors are now moderated by the configurable
- * value of Write-Back Threshold registers.
- */
if ((hw->mac.type == e1000_82576) || (hw->mac.type == e1000_82580) ||
(hw->mac.type == e1000_i350) || (hw->mac.type == e1000_i210) ||
(hw->mac.type == e1000_i211)) {
- uint32_t ivar;
-
- /* Enable all RX & TX queues in the IVAR registers */
- ivar = (uint32_t) ((E1000_IVAR_VALID << 16) | E1000_IVAR_VALID);
- for (i = 0; i < 8; i++)
- E1000_WRITE_REG_ARRAY(hw, E1000_IVAR0, i, ivar);
-
/* Configure EITR with the maximum possible value (0xFFFF) */
E1000_WRITE_REG(hw, E1000_EITR(0), 0xFFFF);
}
@@ -899,6 +909,10 @@ eth_igb_start(struct rte_eth_dev *dev)
if (dev->data->dev_conf.intr_conf.lsc != 0)
ret = eth_igb_lsc_interrupt_setup(dev);
+ /* check if rxq interrupt is enabled */
+ if (dev->data->dev_conf.intr_conf.rxq != 0)
+ eth_igb_rxq_interrupt_setup(dev);
+
/* resume enabled intr since hw reset */
igb_intr_enable(dev);
@@ -987,6 +1001,7 @@ eth_igb_close(struct rte_eth_dev *dev)
{
struct e1000_hw *hw = E1000_DEV_PRIVATE_TO_HW(dev->data->dev_private);
struct rte_eth_link link;
+ struct rte_pci_device *pci_dev;
eth_igb_stop(dev);
e1000_phy_hw_reset(hw);
@@ -1004,6 +1019,12 @@ eth_igb_close(struct rte_eth_dev *dev)
igb_dev_clear_queues(dev);
+ pci_dev = dev->pci_dev;
+ if (pci_dev->intr_handle.intr_vec) {
+ rte_free(pci_dev->intr_handle.intr_vec);
+ pci_dev->intr_handle.intr_vec = NULL;
+ }
+
memset(&link, 0, sizeof(link));
rte_igb_dev_atomic_write_link_status(dev, &link);
}
@@ -1828,6 +1849,34 @@ eth_igb_lsc_interrupt_setup(struct rte_eth_dev *dev)
}
/*
+ * It clears the interrupt causes and enables the interrupt.
+ * It will be called once only during nic initialized.
+ *
+ * @param dev
+ * Pointer to struct rte_eth_dev.
+ *
+ * @return
+ * - On success, zero.
+ * - On failure, a negative value.
+ */
+static int eth_igb_rxq_interrupt_setup(struct rte_eth_dev *dev)
+{
+ uint32_t mask, regval;
+ struct e1000_hw *hw =
+ E1000_DEV_PRIVATE_TO_HW(dev->data->dev_private);
+ struct rte_eth_dev_info dev_info;
+
+ memset(&dev_info, 0, sizeof(dev_info));
+ eth_igb_infos_get(dev, &dev_info);
+
+ mask = 0xFFFFFFFF >> (32 - dev_info.max_rx_queues);
+ regval = E1000_READ_REG(hw, E1000_EIMS);
+ E1000_WRITE_REG(hw, E1000_EIMS, regval | mask);
+
+ return 0;
+}
+
+/*
* It reads ICR and gets interrupt causes, check it and set a bit flag
* to update link status.
*
@@ -3652,5 +3701,162 @@ static struct rte_driver pmd_igbvf_drv = {
.init = rte_igbvf_pmd_init,
};
+static int
+eth_igb_rx_queue_intr_disable(struct rte_eth_dev *dev, uint16_t queue_id)
+{
+ struct e1000_hw *hw =
+ E1000_DEV_PRIVATE_TO_HW(dev->data->dev_private);
+ uint32_t mask = 1 << queue_id;
+
+ E1000_WRITE_REG(hw, E1000_EIMC, mask);
+ E1000_WRITE_FLUSH(hw);
+
+ return 0;
+}
+
+static int
+eth_igb_rx_queue_intr_enable(struct rte_eth_dev *dev, uint16_t queue_id)
+{
+ struct e1000_hw *hw =
+ E1000_DEV_PRIVATE_TO_HW(dev->data->dev_private);
+ uint32_t mask = 1 << queue_id;
+ uint32_t regval;
+
+ regval = E1000_READ_REG(hw, E1000_EIMS);
+ E1000_WRITE_REG(hw, E1000_EIMS, regval | mask);
+ E1000_WRITE_FLUSH(hw);
+
+ return 0;
+}
+
+static void
+eth_igb_write_ivar(struct e1000_hw *hw, uint8_t msix_vector,
+ uint8_t index, uint8_t offset)
+{
+ uint32_t val = E1000_READ_REG_ARRAY(hw, E1000_IVAR0, index);
+
+ /* clear bits */
+ val &= ~((uint32_t)0xFF << offset);
+
+ /* write vector and valid bit */
+ val |= (msix_vector | E1000_IVAR_VALID) << offset;
+
+ E1000_WRITE_REG_ARRAY(hw, E1000_IVAR0, index, val);
+}
+
+static void
+eth_igb_assign_msix_vector(struct e1000_hw *hw, int8_t direction,
+ uint8_t queue, uint8_t msix_vector)
+{
+ uint32_t tmp = 0;
+ if (hw->mac.type == e1000_82575) {
+ if (direction == 0)
+ tmp = E1000_EICR_RX_QUEUE0 << queue;
+ else if (direction == 1)
+ tmp = E1000_EICR_TX_QUEUE0 << queue;
+ E1000_WRITE_REG(hw, E1000_MSIXBM(msix_vector), tmp);
+ } else if (hw->mac.type == e1000_82576) {
+ if ((direction == 0) || (direction == 1))
+ eth_igb_write_ivar(hw, msix_vector, queue & 0x7,
+ ((queue & 0x8) << 1) + 8 * direction);
+ } else if ((hw->mac.type == e1000_82580) ||
+ (hw->mac.type == e1000_i350) ||
+ (hw->mac.type == e1000_i354) ||
+ (hw->mac.type == e1000_i210) ||
+ (hw->mac.type == e1000_i211)) {
+ if ((direction == 0) || (direction == 1))
+ eth_igb_write_ivar(hw, msix_vector,
+ queue >> 1,
+ ((queue & 0x1) << 4) + 8 * direction);
+ }
+}
+
+/*
+ * Sets up the hardware to generate MSI-X interrupts properly
+ * @hw
+ * board private structure
+ */
+static void
+eth_igb_configure_msix_intr(struct rte_eth_dev *dev)
+{
+ int queue_id;
+ uint32_t tmpval, regval, intr_mask;
+ uint32_t max_rx_queues;
+ struct rte_eth_dev_info dev_info;
+ struct e1000_hw *hw =
+ E1000_DEV_PRIVATE_TO_HW(dev->data->dev_private);
+ struct rte_intr_handle *intr_handle = &dev->pci_dev->intr_handle;
+ int vec = 0;
+
+ /* won't configure msix register if no mapping is done
+ * between intr vector and event fd */
+ if (!intr_handle->vec_en)
+ return;
+
+ memset(&dev_info, 0, sizeof(dev_info));
+ eth_igb_infos_get(dev, &dev_info);
+ max_rx_queues = dev_info.max_rx_queues;
+
+ /* set interrupt vector for other causes */
+ if (hw->mac.type == e1000_82575) {
+ tmpval = E1000_READ_REG(hw, E1000_CTRL_EXT);
+ /* enable MSI-X PBA support */
+ tmpval |= E1000_CTRL_EXT_PBA_CLR;
+
+ /* Auto-Mask interrupts upon ICR read */
+ tmpval |= E1000_CTRL_EXT_EIAME;
+ tmpval |= E1000_CTRL_EXT_IRCA;
+
+ E1000_WRITE_REG(hw, E1000_CTRL_EXT, tmpval);
+
+ /* enable msix_other interrupt */
+ E1000_WRITE_REG_ARRAY(hw, E1000_MSIXBM(0), 0, E1000_EIMS_OTHER);
+ regval = E1000_READ_REG(hw, E1000_EIAC);
+ E1000_WRITE_REG(hw, E1000_EIAC, regval | E1000_EIMS_OTHER);
+ regval = E1000_READ_REG(hw, E1000_EIAM);
+ E1000_WRITE_REG(hw, E1000_EIMS, regval | E1000_EIMS_OTHER);
+ } else if ((hw->mac.type == e1000_82576) ||
+ (hw->mac.type == e1000_82580) ||
+ (hw->mac.type == e1000_i350) ||
+ (hw->mac.type == e1000_i354) ||
+ (hw->mac.type == e1000_i210) ||
+ (hw->mac.type == e1000_i211)) {
+ /* turn on MSI-X capability first */
+ E1000_WRITE_REG(hw, E1000_GPIE, E1000_GPIE_MSIX_MODE |
+ E1000_GPIE_PBA | E1000_GPIE_EIAME |
+ E1000_GPIE_NSICR);
+
+ /* enable msix_other interrupt */
+ intr_mask = 1 << max_rx_queues;
+ regval = E1000_READ_REG(hw, E1000_EIAC);
+ E1000_WRITE_REG(hw, E1000_EIAC, regval | intr_mask);
+ regval = E1000_READ_REG(hw, E1000_EIMS);
+ E1000_WRITE_REG(hw, E1000_EIMS, regval | intr_mask);
+ tmpval = (max_rx_queues | E1000_IVAR_VALID) << 8;
+
+ E1000_WRITE_REG(hw, E1000_IVAR_MISC, tmpval);
+ }
+
+ /*
+ * use EIAM and EIAC to auto-mask and auto-clear when MSI-X interrupt
+ * is asserted, this saves a register write for every interrupt
+ */
+ intr_mask = 0xFFFFFFFF >> (32 - max_rx_queues);
+ regval = E1000_READ_REG(hw, E1000_EIAC);
+ E1000_WRITE_REG(hw, E1000_EIAC, regval | intr_mask);
+ regval = E1000_READ_REG(hw, E1000_EIAM);
+ E1000_WRITE_REG(hw, E1000_EIAM, regval | intr_mask);
+
+ for (queue_id = 0; queue_id < dev->data->nb_rx_queues; queue_id++) {
+ eth_igb_assign_msix_vector(hw, 0, queue_id, vec);
+ intr_handle->intr_vec[queue_id] = vec;
+ if (vec < RTE_MAX_RXTX_INTR_VEC_ID)
+ vec++;
+ }
+
+ E1000_WRITE_FLUSH(hw);
+}
+
+
PMD_REGISTER_DRIVER(pmd_igb_drv);
PMD_REGISTER_DRIVER(pmd_igbvf_drv);
--
1.8.1.4
More information about the dev
mailing list