[dpdk-dev] [PATCH v6 4/8] eal/linux: add per rx queue interrupt handling based on VFIO
Cunming Liang
cunming.liang at intel.com
Fri Feb 27 05:56:12 CET 2015
This patch does below:
- Create multiple VFIO eventfd for rx queues.
- Handle per rx queue interrupt.
- Eliminate unnecessary suspended DPDK polling thread wakeup mechanism
for rx interrupt by allowing polling thread epoll_wait rx queue
interrupt notification.
Signed-off-by: Danny Zhou <danny.zhou at intel.com>
Signed-off-by: Cunming Liang <cunming.liang at intel.com>
---
v6 changes
- split rte_intr_wait_rx_pkt into two function, wait and set.
- rewrite rte_intr_rx_wait/rte_intr_rx_set to remove queue visibility on eal.
- rte_intr_rx_wait to support multiplexing.
- allow epfd as input to support flexible event fd combination.
v5 changes
- Rebase the patchset onto the HEAD
- Isolate ethdev from EAL for new-added wait-for-rx interrupt function
- Export wait-for-rx interrupt function for shared libraries
v4 changes:
- Adjust position of new-added structure fields
v3 changes:
- Fix review comments
v2 changes:
- Fix compilation issue for a missed header file
- Bug fix: free unreleased resources on the exception path before return
- Consolidate coding style related review comments
lib/librte_eal/linuxapp/eal/eal_interrupts.c | 224 +++++++++++++++++++-----
lib/librte_eal/linuxapp/eal/eal_pci_vfio.c | 23 ++-
lib/librte_eal/linuxapp/eal/rte_eal_version.map | 2 +
3 files changed, 201 insertions(+), 48 deletions(-)
diff --git a/lib/librte_eal/linuxapp/eal/eal_interrupts.c b/lib/librte_eal/linuxapp/eal/eal_interrupts.c
index 8c5b834..f90c2b4 100644
--- a/lib/librte_eal/linuxapp/eal/eal_interrupts.c
+++ b/lib/librte_eal/linuxapp/eal/eal_interrupts.c
@@ -44,6 +44,7 @@
#include <sys/epoll.h>
#include <sys/signalfd.h>
#include <sys/ioctl.h>
+#include <assert.h>
#include <rte_common.h>
#include <rte_interrupts.h>
@@ -70,6 +71,8 @@
#define EAL_INTR_EPOLL_WAIT_FOREVER (-1)
+static RTE_DEFINE_PER_LCORE(int, _epfd) = -1; /**< epoll fd per thread */
+
/**
* union for pipe fds.
*/
@@ -127,6 +130,9 @@ static pthread_t intr_thread;
#ifdef VFIO_PRESENT
#define IRQ_SET_BUF_LEN (sizeof(struct vfio_irq_set) + sizeof(int))
+/* irq set buffer length for queue interrupts and LSC interrupt */
+#define MSIX_IRQ_SET_BUF_LEN (sizeof(struct vfio_irq_set) + \
+ sizeof(int) * (VFIO_MAX_RXTX_INTR_ID + 1))
/* enable legacy (INTx) interrupts */
static int
@@ -218,10 +224,10 @@ vfio_disable_intx(struct rte_intr_handle *intr_handle) {
return 0;
}
-/* enable MSI-X interrupts */
+/* enable MSI interrupts */
static int
vfio_enable_msi(struct rte_intr_handle *intr_handle) {
- int len, ret;
+ int len, ret, max_intr;
char irq_set_buf[IRQ_SET_BUF_LEN];
struct vfio_irq_set *irq_set;
int *fd_ptr;
@@ -230,12 +236,19 @@ vfio_enable_msi(struct rte_intr_handle *intr_handle) {
irq_set = (struct vfio_irq_set *) irq_set_buf;
irq_set->argsz = len;
- irq_set->count = 1;
+ if ((!intr_handle->max_intr) ||
+ (intr_handle->max_intr > VFIO_MAX_RXTX_INTR_ID))
+ max_intr = VFIO_MAX_RXTX_INTR_ID + 1;
+ else
+ max_intr = intr_handle->max_intr;
+
+ irq_set->count = max_intr;
irq_set->flags = VFIO_IRQ_SET_DATA_EVENTFD | VFIO_IRQ_SET_ACTION_TRIGGER;
irq_set->index = VFIO_PCI_MSI_IRQ_INDEX;
irq_set->start = 0;
fd_ptr = (int *) &irq_set->data;
- *fd_ptr = intr_handle->fd;
+ memcpy(fd_ptr, intr_handle->efds, sizeof(intr_handle->efds));
+ fd_ptr[max_intr - 1] = intr_handle->fd;
ret = ioctl(intr_handle->vfio_dev_fd, VFIO_DEVICE_SET_IRQS, irq_set);
@@ -244,27 +257,10 @@ vfio_enable_msi(struct rte_intr_handle *intr_handle) {
intr_handle->fd);
return -1;
}
-
- /* manually trigger interrupt to enable it */
- memset(irq_set, 0, len);
- len = sizeof(struct vfio_irq_set);
- irq_set->argsz = len;
- irq_set->count = 1;
- irq_set->flags = VFIO_IRQ_SET_DATA_NONE | VFIO_IRQ_SET_ACTION_TRIGGER;
- irq_set->index = VFIO_PCI_MSI_IRQ_INDEX;
- irq_set->start = 0;
-
- ret = ioctl(intr_handle->vfio_dev_fd, VFIO_DEVICE_SET_IRQS, irq_set);
-
- if (ret) {
- RTE_LOG(ERR, EAL, "Error triggering MSI interrupts for fd %d\n",
- intr_handle->fd);
- return -1;
- }
return 0;
}
-/* disable MSI-X interrupts */
+/* disable MSI interrupts */
static int
vfio_disable_msi(struct rte_intr_handle *intr_handle) {
struct vfio_irq_set *irq_set;
@@ -292,8 +288,8 @@ vfio_disable_msi(struct rte_intr_handle *intr_handle) {
/* enable MSI-X interrupts */
static int
vfio_enable_msix(struct rte_intr_handle *intr_handle) {
- int len, ret;
- char irq_set_buf[IRQ_SET_BUF_LEN];
+ int len, ret, max_intr;
+ char irq_set_buf[MSIX_IRQ_SET_BUF_LEN];
struct vfio_irq_set *irq_set;
int *fd_ptr;
@@ -301,12 +297,19 @@ vfio_enable_msix(struct rte_intr_handle *intr_handle) {
irq_set = (struct vfio_irq_set *) irq_set_buf;
irq_set->argsz = len;
- irq_set->count = 1;
+ if ((!intr_handle->max_intr) ||
+ (intr_handle->max_intr > VFIO_MAX_RXTX_INTR_ID))
+ max_intr = VFIO_MAX_RXTX_INTR_ID + 1;
+ else
+ max_intr = intr_handle->max_intr;
+
+ irq_set->count = max_intr;
irq_set->flags = VFIO_IRQ_SET_DATA_EVENTFD | VFIO_IRQ_SET_ACTION_TRIGGER;
irq_set->index = VFIO_PCI_MSIX_IRQ_INDEX;
irq_set->start = 0;
fd_ptr = (int *) &irq_set->data;
- *fd_ptr = intr_handle->fd;
+ memcpy(fd_ptr, intr_handle->efds, sizeof(intr_handle->efds));
+ fd_ptr[max_intr - 1] = intr_handle->fd;
ret = ioctl(intr_handle->vfio_dev_fd, VFIO_DEVICE_SET_IRQS, irq_set);
@@ -316,22 +319,6 @@ vfio_enable_msix(struct rte_intr_handle *intr_handle) {
return -1;
}
- /* manually trigger interrupt to enable it */
- memset(irq_set, 0, len);
- len = sizeof(struct vfio_irq_set);
- irq_set->argsz = len;
- irq_set->count = 1;
- irq_set->flags = VFIO_IRQ_SET_DATA_NONE | VFIO_IRQ_SET_ACTION_TRIGGER;
- irq_set->index = VFIO_PCI_MSIX_IRQ_INDEX;
- irq_set->start = 0;
-
- ret = ioctl(intr_handle->vfio_dev_fd, VFIO_DEVICE_SET_IRQS, irq_set);
-
- if (ret) {
- RTE_LOG(ERR, EAL, "Error triggering MSI-X interrupts for fd %d\n",
- intr_handle->fd);
- return -1;
- }
return 0;
}
@@ -339,7 +326,7 @@ vfio_enable_msix(struct rte_intr_handle *intr_handle) {
static int
vfio_disable_msix(struct rte_intr_handle *intr_handle) {
struct vfio_irq_set *irq_set;
- char irq_set_buf[IRQ_SET_BUF_LEN];
+ char irq_set_buf[MSIX_IRQ_SET_BUF_LEN];
int len, ret;
len = sizeof(struct vfio_irq_set);
@@ -860,3 +847,154 @@ rte_eal_intr_init(void)
return -ret;
}
+static void
+eal_intr_process_rxtx_interrupts(struct rte_intr_handle *intr_handle,
+ struct epoll_event *events,
+ uint32_t *vec, int nfds)
+{
+ int i, bytes_read;
+ union rte_intr_read_buffer buf;
+ int fd;
+
+ for (i = 0; i < nfds; i++) {
+ /* set the length to be read for different handle type */
+ switch (intr_handle->type) {
+ case RTE_INTR_HANDLE_UIO:
+ bytes_read = sizeof(buf.uio_intr_count);
+ break;
+ case RTE_INTR_HANDLE_ALARM:
+ bytes_read = sizeof(buf.timerfd_num);
+ break;
+#ifdef VFIO_PRESENT
+ case RTE_INTR_HANDLE_VFIO_MSIX:
+ case RTE_INTR_HANDLE_VFIO_MSI:
+ case RTE_INTR_HANDLE_VFIO_LEGACY:
+ bytes_read = sizeof(buf.vfio_intr_count);
+ break;
+#endif
+ default:
+ bytes_read = 1;
+ break;
+ }
+
+ /**
+ * read out to clear the ready-to-be-read flag
+ * for epoll_wait.
+ */
+ vec[i] = events[i].data.u32;
+ assert(vec[i] < VFIO_MAX_RXTX_INTR_ID);
+
+ fd = intr_handle->efds[vec[i]];
+ bytes_read = read(fd, &buf, bytes_read);
+ if (bytes_read < 0)
+ RTE_LOG(ERR, EAL, "Error reading from file "
+ "descriptor %d: %s\n", fd, strerror(errno));
+ else if (bytes_read == 0)
+ RTE_LOG(ERR, EAL, "Read nothing from file "
+ "descriptor %d\n", fd);
+ }
+}
+
+static int init_tls_epfd(void)
+{
+ int pfd = epoll_create(1);
+ if (pfd < 0) {
+ RTE_LOG(ERR, EAL,
+ "Cannot create epoll instance\n");
+ return -1;
+ }
+ return pfd;
+}
+
+int
+rte_intr_rx_wait(struct rte_intr_handle *intr_handle, int epfd,
+ uint32_t *vec, uint16_t num)
+{
+#define MAX_EVENTS 8
+ struct epoll_event events[MAX_EVENTS];
+ int ret, nfds = 0;
+
+ if (!intr_handle || !vec) {
+ RTE_LOG(ERR, EAL, "invalid input parameter\n");
+ return -1;
+ }
+
+ if (intr_handle->type != RTE_INTR_HANDLE_VFIO_MSIX) {
+ RTE_LOG(ERR, EAL, "intr type should be VFIO_MSIX\n");
+ return -1;
+ }
+
+ if (epfd == RTE_EPOLL_FD_ANY) {
+ /* using per thread epoll fd */
+ if (unlikely(RTE_PER_LCORE(_epfd) == -1))
+ RTE_PER_LCORE(_epfd) = init_tls_epfd();
+ epfd = RTE_PER_LCORE(_epfd);
+ }
+
+ do {
+ ret = epoll_wait(epfd, events,
+ RTE_MIN(num, MAX_EVENTS),
+ EAL_INTR_EPOLL_WAIT_FOREVER);
+ if (unlikely(ret < 0)) {
+ /* epoll_wait fail */
+ RTE_LOG(ERR, EAL, "epoll_wait returns with fail\n");
+ return -1;
+ } else if (ret > 0) {
+ /* epoll_wait has at least one fd ready to read */
+ eal_intr_process_rxtx_interrupts(intr_handle, events,
+ vec, ret);
+ num -= ret;
+ vec += ret;
+ nfds += ret;
+ } else if (nfds > 0)
+ break;
+ } while (num > 0);
+
+ return nfds;
+}
+
+int
+rte_intr_rx_set(struct rte_intr_handle *intr_handle, int epfd,
+ int op, uint32_t vec)
+{
+ struct epoll_event ev;
+
+ if (!intr_handle || vec >= VFIO_MAX_RXTX_INTR_ID) {
+ RTE_LOG(ERR, EAL, "invalid input parameter\n");
+ return -1;
+ }
+
+ if (intr_handle->type != RTE_INTR_HANDLE_VFIO_MSIX) {
+ RTE_LOG(ERR, EAL, "intr type should be VFIO_MSIX\n");
+ return -1;
+ }
+
+ switch (op) {
+ case RTE_INTR_EVENT_ADD:
+ op = EPOLL_CTL_ADD;
+ break;
+ case RTE_INTR_EVENT_DEL:
+ op = EPOLL_CTL_DEL;
+ break;
+ default:
+ RTE_LOG(ERR, EAL, "event op type mismatch\n");
+ return -1;
+ }
+
+ if (epfd == RTE_EPOLL_FD_ANY) {
+ /* using per thread epoll fd */
+ if (RTE_PER_LCORE(_epfd) == -1)
+ RTE_PER_LCORE(_epfd) = init_tls_epfd();
+ epfd = RTE_PER_LCORE(_epfd);
+ }
+
+ ev.data.u32 = vec;
+ ev.events = EPOLLIN | EPOLLPRI;
+ if (epoll_ctl(epfd, op, intr_handle->efds[vec], &ev) < 0) {
+ RTE_LOG(ERR, EAL, "Error op %d fd %d epoll_ctl, %s\n",
+ op, intr_handle->efds[vec], strerror(errno));
+ return -1;
+ }
+
+ return 0;
+}
diff --git a/lib/librte_eal/linuxapp/eal/eal_pci_vfio.c b/lib/librte_eal/linuxapp/eal/eal_pci_vfio.c
index ee9660f..d90d23c 100644
--- a/lib/librte_eal/linuxapp/eal/eal_pci_vfio.c
+++ b/lib/librte_eal/linuxapp/eal/eal_pci_vfio.c
@@ -38,6 +38,7 @@
#include <sys/socket.h>
#include <sys/ioctl.h>
#include <sys/mman.h>
+#include <sys/epoll.h>
#include <rte_log.h>
#include <rte_pci.h>
@@ -274,16 +275,18 @@ pci_vfio_setup_interrupts(struct rte_pci_device *dev, int vfio_dev_fd)
ret = ioctl(vfio_dev_fd, VFIO_DEVICE_GET_IRQ_INFO, &irq);
if (ret < 0) {
RTE_LOG(ERR, EAL, " cannot get IRQ info, "
- "error %i (%s)\n", errno, strerror(errno));
+ "error %i (%s)\n", errno, strerror(errno));
return -1;
}
/* if this vector cannot be used with eventfd, fail if we explicitly
* specified interrupt type, otherwise continue */
if ((irq.flags & VFIO_IRQ_INFO_EVENTFD) == 0) {
- if (internal_config.vfio_intr_mode != RTE_INTR_MODE_NONE) {
+ if (internal_config.vfio_intr_mode !=
+ RTE_INTR_MODE_NONE) {
RTE_LOG(ERR, EAL,
- " interrupt vector does not support eventfd!\n");
+ " interrupt vector "
+ "does not support eventfd!\n");
return -1;
} else
continue;
@@ -293,17 +296,27 @@ pci_vfio_setup_interrupts(struct rte_pci_device *dev, int vfio_dev_fd)
fd = eventfd(0, 0);
if (fd < 0) {
RTE_LOG(ERR, EAL, " cannot set up eventfd, "
- "error %i (%s)\n", errno, strerror(errno));
+ "error %i (%s)\n", errno, strerror(errno));
return -1;
}
dev->intr_handle.fd = fd;
dev->intr_handle.vfio_dev_fd = vfio_dev_fd;
-
switch (i) {
case VFIO_PCI_MSIX_IRQ_INDEX:
internal_config.vfio_intr_mode = RTE_INTR_MODE_MSIX;
dev->intr_handle.type = RTE_INTR_HANDLE_VFIO_MSIX;
+ for (i = 0; i < VFIO_MAX_RXTX_INTR_ID; i++) {
+ fd = eventfd(0, 0);
+ if (fd < 0) {
+ RTE_LOG(ERR, EAL,
+ "cannot setup eventfd,"
+ "error %i (%s)\n",
+ errno, strerror(errno));
+ return -1;
+ }
+ dev->intr_handle.efds[i] = fd;
+ }
break;
case VFIO_PCI_MSI_IRQ_INDEX:
internal_config.vfio_intr_mode = RTE_INTR_MODE_MSI;
diff --git a/lib/librte_eal/linuxapp/eal/rte_eal_version.map b/lib/librte_eal/linuxapp/eal/rte_eal_version.map
index 5f1857d..892a452 100644
--- a/lib/librte_eal/linuxapp/eal/rte_eal_version.map
+++ b/lib/librte_eal/linuxapp/eal/rte_eal_version.map
@@ -64,6 +64,8 @@ DPDK_2.0 {
rte_intr_callback_unregister;
rte_intr_disable;
rte_intr_enable;
+ rte_intr_rx_set;
+ rte_intr_rx_wait;
rte_log;
rte_log_add_in_history;
rte_log_cur_msg_loglevel;
--
1.8.1.4
More information about the dev
mailing list