[dpdk-dev,v2,5/9] net/virtio: setup rxq interrupts
Checks
Commit Message
This patch mainly allocates structure to store queue/irq mapping,
and configure queue/irq mapping down through PCI ops. It also creates
eventfds for each Rx queue and tell the kernel about the eventfd/intr
binding.
Mostly importantly, different from previous NICs (usually implements
these logic in dev_start()), virtio's interrupt settings should be
configured down to QEMU before sending DRIVER_OK notification.
Note: We only support 1:1 queue/irq mapping so far, which means, each
rx queue has one exclusive interrupt (corresponding to irqfd in the
qemu/kvm) to get notified when packets are available on that queue.
Signed-off-by: Jianfeng Tan <jianfeng.tan@intel.com>
---
drivers/net/virtio/virtio_ethdev.c | 89 ++++++++++++++++++++++++++++++++++++++
1 file changed, 89 insertions(+)
Comments
On Thu, Dec 29, 2016 at 07:30:39AM +0000, Jianfeng Tan wrote:
> This patch mainly allocates structure to store queue/irq mapping,
> and configure queue/irq mapping down through PCI ops. It also creates
> eventfds for each Rx queue and tell the kernel about the eventfd/intr
> binding.
>
> Mostly importantly, different from previous NICs (usually implements
> these logic in dev_start()), virtio's interrupt settings should be
> configured down to QEMU before sending DRIVER_OK notification.
Isn't it obvious we have to have all driver stuff (including interrupt
settings) configured properly before setting DRIVER_OK? :) That said,
it's meanless to state the fact that virtio acts differently than other
nics here on dev_start/stop.
> Note: We only support 1:1 queue/irq mapping so far, which means, each
> rx queue has one exclusive interrupt (corresponding to irqfd in the
> qemu/kvm) to get notified when packets are available on that queue.
That means you have to setup the "vectors=N" option has to set correctly
in QEMU, otherwise it won't work? If so, you also have to doc it somewhere.
> Signed-off-by: Jianfeng Tan <jianfeng.tan@intel.com>
> ---
> drivers/net/virtio/virtio_ethdev.c | 89 ++++++++++++++++++++++++++++++++++++++
> 1 file changed, 89 insertions(+)
>
> diff --git a/drivers/net/virtio/virtio_ethdev.c b/drivers/net/virtio/virtio_ethdev.c
> index 3f8b90c..082346b 100644
> --- a/drivers/net/virtio/virtio_ethdev.c
> +++ b/drivers/net/virtio/virtio_ethdev.c
> @@ -1206,6 +1206,76 @@ rx_func_get(struct rte_eth_dev *eth_dev)
> eth_dev->rx_pkt_burst = &virtio_recv_pkts;
> }
>
> +/* Only support 1:1 queue/interrupt mapping so far.
> + * TODO: under below cases, lsc and rxq interrupt share one interrupt.
> + * a) binded to uio, igb_uio, vfio (type1);
> + * b) device only has one vec, see _vectors_ option in -device virtio-net-pci.
> + * TODO: support n:1 queue/interrupt mapping.
Both TODOs are actually the same: supporting n:1 mapping. That said, you
don't have to write 2 TODOs here. Please, don't be mean by adding some
whitespace lines.
> + */
> +static int
> +virtio_queues_bind_intr(struct rte_eth_dev *dev)
> +{
> + uint32_t i;
> + struct rte_intr_handle *intr_handle = &dev->pci_dev->intr_handle;
> + struct virtio_hw *hw = dev->data->dev_private;
> +
> + PMD_INIT_LOG(INFO, "queue/interrupt binding\n");
> + for (i = 0; i < dev->data->nb_rx_queues; ++i) {
> + intr_handle->intr_vec[i] = i + 1;
> + if (vtpci_irq_queue(hw->vqs[i * VTNET_CQ], i + 1) ==
It's logically wrong to use VTNET_CQ to get the Rx queue index. You
could either use 2 simply, or define a macro for that.
> + VIRTIO_MSI_NO_VECTOR) {
> + PMD_DRV_LOG(ERR, "failed to set queue vector");
> + return -EBUSY;
> + }
> + }
> +
> + return 0;
> +}
> +
> +static int
> +virtio_configure_intr(struct rte_eth_dev *dev)
> +{
> + uint32_t intr_vector;
> + struct rte_intr_handle *intr_handle = &dev->pci_dev->intr_handle;
> +
> + /* check if rxq interrupt is enabled */
Unnecessary comment, the function name and the error log explains it
well.
> + if (!rte_intr_cap_multiple(intr_handle)) {
> + PMD_INIT_LOG(ERR, "Multiple intr vector not supported");
> + return -ENOTSUP;
> + }
> +
> + intr_vector = dev->data->nb_rx_queues;
> + if (rte_intr_efd_enable(intr_handle, intr_vector)) {
> + PMD_INIT_LOG(ERR, "Fail to create eventfd");
> + return -1;
> + }
> +
> + if (!intr_handle->intr_vec) {
> + intr_handle->intr_vec =
> + rte_zmalloc("intr_vec", intr_vector * sizeof(int), 0);
> + if (!intr_handle->intr_vec) {
> + PMD_INIT_LOG(ERR, "Failed to allocate %d rxq vectors",
> + intr_vector);
> + return -ENOMEM;
> + }
> + }
> +
> + if (virtio_queues_bind_intr(dev) < 0) {
> + PMD_INIT_LOG(ERR, "Failed to bind queue/interrupt");
> + return -1;
You have to free intr_handle->intr_vec, otherwise, memory leak occurs.
> + }
> +
> + /* DO NOT try remove this! This function will enable msix, or QEMU
> + * will encounter SIGSEGV.
> + */
Looks like a QEMU bug to me. I mean, even though the driver is badly
configured, it should not crash QEMU.
> + if (rte_intr_enable(intr_handle) < 0) {
> + PMD_DRV_LOG(ERR, "interrupt enable failed");
> + return -1;
> + }
> +
> + return 0;
> +}
> +
> /* reset device and renegotiate features if needed */
> static int
> virtio_init_device(struct rte_eth_dev *eth_dev, uint64_t req_features)
> @@ -1299,6 +1369,17 @@ virtio_init_device(struct rte_eth_dev *eth_dev, uint64_t req_features)
> ret = virtio_alloc_queues(eth_dev);
> if (ret < 0)
> return ret;
> +
> + /* Make sure rxq interrupt is configured before sending DRIVER_OK,
> + * so that QEMU can properly set those irq into kvm.
> + */
As said, I don't think such comment is needed: for sure, we have to
setup everything properly (about the device) before setting the
DRIVER_OK flag.
> + if (eth_dev->data->dev_conf.intr_conf.rxq) {
> + if (virtio_configure_intr(eth_dev) < 0) {
> + PMD_INIT_LOG(ERR, "failed to configure interrupt");
> + return -1;
> + }
> + }
> +
> vtpci_reinit_complete(hw);
>
> if (pci_dev)
> @@ -1503,7 +1584,15 @@ virtio_dev_start(struct rte_eth_dev *dev)
> PMD_DRV_LOG(ERR, "link status not supported by host");
> return -ENOTSUP;
> }
> + }
>
> + /* Enable uio/vfio intr/eventfd mapping: althrough we already did that
> + * in device configure, but it could be unmapped when device is
> + * stopped.
Well, I didn't see you do that; I did see you do that in next patch though.
That said, the next patch should be merged here, into one patch.
--yliu
> + */
> + if (dev->data->dev_conf.intr_conf.lsc ||
> + dev->data->dev_conf.intr_conf.rxq) {
> + rte_intr_disable(&dev->pci_dev->intr_handle);
> if (rte_intr_enable(&dev->pci_dev->intr_handle) < 0) {
> PMD_DRV_LOG(ERR, "interrupt enable failed");
> return -EIO;
> --
> 2.7.4
On 12/30/2016 2:27 PM, Yuanhan Liu wrote:
> On Thu, Dec 29, 2016 at 07:30:39AM +0000, Jianfeng Tan wrote:
>> This patch mainly allocates structure to store queue/irq mapping,
>> and configure queue/irq mapping down through PCI ops. It also creates
>> eventfds for each Rx queue and tell the kernel about the eventfd/intr
>> binding.
>>
>> Mostly importantly, different from previous NICs (usually implements
>> these logic in dev_start()), virtio's interrupt settings should be
>> configured down to QEMU before sending DRIVER_OK notification.
> Isn't it obvious we have to have all driver stuff (including interrupt
> settings) configured properly before setting DRIVER_OK? :) That said,
> it's meanless to state the fact that virtio acts differently than other
> nics here on dev_start/stop.
>
>> Note: We only support 1:1 queue/irq mapping so far, which means, each
>> rx queue has one exclusive interrupt (corresponding to irqfd in the
>> qemu/kvm) to get notified when packets are available on that queue.
> That means you have to setup the "vectors=N" option has to set correctly
> in QEMU, otherwise it won't work?
Yes, actually, the correct value should be "vectors>=N+1", with N
standing for the number of queue pairs. It's due to the hard coded
mapping logic:
0 -> config irq
1 -> rxq0
2 -> rxq1
...
> If so, you also have to doc it somewhere.
Agreed.
[...]
>> +
>> + if (virtio_queues_bind_intr(dev) < 0) {
>> + PMD_INIT_LOG(ERR, "Failed to bind queue/interrupt");
>> + return -1;
> You have to free intr_handle->intr_vec, otherwise, memory leak occurs.
It's freed at dev_close(). Do you mean freeing and reallocating here? As
nr_rx_queues is not a changeable value, I don't see the necessity here.
I miss something?
Thanks,
Jianfeng
On Wed, Jan 04, 2017 at 02:56:50PM +0800, Tan, Jianfeng wrote:
>
>
> On 12/30/2016 2:27 PM, Yuanhan Liu wrote:
> >On Thu, Dec 29, 2016 at 07:30:39AM +0000, Jianfeng Tan wrote:
> >>This patch mainly allocates structure to store queue/irq mapping,
> >>and configure queue/irq mapping down through PCI ops. It also creates
> >>eventfds for each Rx queue and tell the kernel about the eventfd/intr
> >>binding.
> >>
> >>Mostly importantly, different from previous NICs (usually implements
> >>these logic in dev_start()), virtio's interrupt settings should be
> >>configured down to QEMU before sending DRIVER_OK notification.
> >Isn't it obvious we have to have all driver stuff (including interrupt
> >settings) configured properly before setting DRIVER_OK? :) That said,
> >it's meanless to state the fact that virtio acts differently than other
> >nics here on dev_start/stop.
> >
> >>Note: We only support 1:1 queue/irq mapping so far, which means, each
> >>rx queue has one exclusive interrupt (corresponding to irqfd in the
> >>qemu/kvm) to get notified when packets are available on that queue.
> >That means you have to setup the "vectors=N" option has to set correctly
> >in QEMU, otherwise it won't work?
>
> Yes, actually, the correct value should be "vectors>=N+1", with N standing
Yeah, and it's a typo.
> for the number of queue pairs. It's due to the hard coded mapping logic:
> 0 -> config irq
> 1 -> rxq0
> 2 -> rxq1
> ...
>
> > If so, you also have to doc it somewhere.
>
> Agreed.
>
> [...]
> >>+
> >>+ if (virtio_queues_bind_intr(dev) < 0) {
> >>+ PMD_INIT_LOG(ERR, "Failed to bind queue/interrupt");
> >>+ return -1;
> >You have to free intr_handle->intr_vec, otherwise, memory leak occurs.
>
> It's freed at dev_close(). Do you mean freeing and reallocating here? As
The typical way is free the resources have been allocated when errors
happens.
> nr_rx_queues is not a changeable value, I don't see the necessity here. I
> miss something?
No. nb_rx_queues does change, when people reconfigure the queue number.
However, the MAX queues the virito supports does not change. You could
use that number for allocation.
--yliu
> -----Original Message-----
> From: Yuanhan Liu [mailto:yuanhan.liu@linux.intel.com]
> Sent: Wednesday, January 4, 2017 3:23 PM
> To: Tan, Jianfeng
> Cc: dev@dpdk.org; stephen@networkplumber.org
> Subject: Re: [PATCH v2 5/9] net/virtio: setup rxq interrupts
>
> On Wed, Jan 04, 2017 at 02:56:50PM +0800, Tan, Jianfeng wrote:
> >
> >
> > [...]
> > >>+
> > >>+ if (virtio_queues_bind_intr(dev) < 0) {
> > >>+ PMD_INIT_LOG(ERR, "Failed to bind queue/interrupt");
> > >>+ return -1;
> > >You have to free intr_handle->intr_vec, otherwise, memory leak occurs.
> >
> > It's freed at dev_close(). Do you mean freeing and reallocating here? As
>
> The typical way is free the resources have been allocated when errors
> happens.
>
> > nr_rx_queues is not a changeable value, I don't see the necessity here. I
> > miss something?
>
> No. nb_rx_queues does change, when people reconfigure the queue
> number.
> However, the MAX queues the virito supports does not change. You could
> use that number for allocation.
Oh yes. I will fix it.
Thanks,
Jianfeng
>
> --yliu
@@ -1206,6 +1206,76 @@ rx_func_get(struct rte_eth_dev *eth_dev)
eth_dev->rx_pkt_burst = &virtio_recv_pkts;
}
+/* Only support 1:1 queue/interrupt mapping so far.
+ * TODO: under below cases, lsc and rxq interrupt share one interrupt.
+ * a) binded to uio, igb_uio, vfio (type1);
+ * b) device only has one vec, see _vectors_ option in -device virtio-net-pci.
+ * TODO: support n:1 queue/interrupt mapping.
+ */
+static int
+virtio_queues_bind_intr(struct rte_eth_dev *dev)
+{
+ uint32_t i;
+ struct rte_intr_handle *intr_handle = &dev->pci_dev->intr_handle;
+ struct virtio_hw *hw = dev->data->dev_private;
+
+ PMD_INIT_LOG(INFO, "queue/interrupt binding\n");
+ for (i = 0; i < dev->data->nb_rx_queues; ++i) {
+ intr_handle->intr_vec[i] = i + 1;
+ if (vtpci_irq_queue(hw->vqs[i * VTNET_CQ], i + 1) ==
+ VIRTIO_MSI_NO_VECTOR) {
+ PMD_DRV_LOG(ERR, "failed to set queue vector");
+ return -EBUSY;
+ }
+ }
+
+ return 0;
+}
+
+static int
+virtio_configure_intr(struct rte_eth_dev *dev)
+{
+ uint32_t intr_vector;
+ struct rte_intr_handle *intr_handle = &dev->pci_dev->intr_handle;
+
+ /* check if rxq interrupt is enabled */
+ if (!rte_intr_cap_multiple(intr_handle)) {
+ PMD_INIT_LOG(ERR, "Multiple intr vector not supported");
+ return -ENOTSUP;
+ }
+
+ intr_vector = dev->data->nb_rx_queues;
+ if (rte_intr_efd_enable(intr_handle, intr_vector)) {
+ PMD_INIT_LOG(ERR, "Fail to create eventfd");
+ return -1;
+ }
+
+ if (!intr_handle->intr_vec) {
+ intr_handle->intr_vec =
+ rte_zmalloc("intr_vec", intr_vector * sizeof(int), 0);
+ if (!intr_handle->intr_vec) {
+ PMD_INIT_LOG(ERR, "Failed to allocate %d rxq vectors",
+ intr_vector);
+ return -ENOMEM;
+ }
+ }
+
+ if (virtio_queues_bind_intr(dev) < 0) {
+ PMD_INIT_LOG(ERR, "Failed to bind queue/interrupt");
+ return -1;
+ }
+
+ /* DO NOT try remove this! This function will enable msix, or QEMU
+ * will encounter SIGSEGV.
+ */
+ if (rte_intr_enable(intr_handle) < 0) {
+ PMD_DRV_LOG(ERR, "interrupt enable failed");
+ return -1;
+ }
+
+ return 0;
+}
+
/* reset device and renegotiate features if needed */
static int
virtio_init_device(struct rte_eth_dev *eth_dev, uint64_t req_features)
@@ -1299,6 +1369,17 @@ virtio_init_device(struct rte_eth_dev *eth_dev, uint64_t req_features)
ret = virtio_alloc_queues(eth_dev);
if (ret < 0)
return ret;
+
+ /* Make sure rxq interrupt is configured before sending DRIVER_OK,
+ * so that QEMU can properly set those irq into kvm.
+ */
+ if (eth_dev->data->dev_conf.intr_conf.rxq) {
+ if (virtio_configure_intr(eth_dev) < 0) {
+ PMD_INIT_LOG(ERR, "failed to configure interrupt");
+ return -1;
+ }
+ }
+
vtpci_reinit_complete(hw);
if (pci_dev)
@@ -1503,7 +1584,15 @@ virtio_dev_start(struct rte_eth_dev *dev)
PMD_DRV_LOG(ERR, "link status not supported by host");
return -ENOTSUP;
}
+ }
+ /* Enable uio/vfio intr/eventfd mapping: althrough we already did that
+ * in device configure, but it could be unmapped when device is
+ * stopped.
+ */
+ if (dev->data->dev_conf.intr_conf.lsc ||
+ dev->data->dev_conf.intr_conf.rxq) {
+ rte_intr_disable(&dev->pci_dev->intr_handle);
if (rte_intr_enable(&dev->pci_dev->intr_handle) < 0) {
PMD_DRV_LOG(ERR, "interrupt enable failed");
return -EIO;