[dpdk-dev,12/14] vhost: dequeue for packed queues
Checks
Commit Message
Implement code to dequeue and process descriptors from
the vring if VIRTIO_F_PACKED is enabled.
Check if descriptor was made available by driver by looking at
VIRTIO_F_DESC_AVAIL flag in descriptor. If so dequeue and set
the used flag VIRTIO_F_DESC_USED to the current value of the
used wrap counter.
Used ring wrap counter needs to be toggled when last descriptor is
written out. This allows the host/guest to detect new descriptors even
after the ring has wrapped.
Signed-off-by: Jens Freimann <jfreimann@redhat.com>
---
lib/librte_vhost/vhost.c | 1 +
lib/librte_vhost/vhost.h | 1 +
lib/librte_vhost/virtio_net.c | 194 ++++++++++++++++++++++++++++++++++++++++++
3 files changed, 196 insertions(+)
Comments
On 01/29/2018 03:11 PM, Jens Freimann wrote:
> Implement code to dequeue and process descriptors from
> the vring if VIRTIO_F_PACKED is enabled.
>
> Check if descriptor was made available by driver by looking at
> VIRTIO_F_DESC_AVAIL flag in descriptor. If so dequeue and set
> the used flag VIRTIO_F_DESC_USED to the current value of the
> used wrap counter.
>
> Used ring wrap counter needs to be toggled when last descriptor is
> written out. This allows the host/guest to detect new descriptors even
> after the ring has wrapped.
>
> Signed-off-by: Jens Freimann <jfreimann@redhat.com>
> ---
> lib/librte_vhost/vhost.c | 1 +
> lib/librte_vhost/vhost.h | 1 +
> lib/librte_vhost/virtio_net.c | 194 ++++++++++++++++++++++++++++++++++++++++++
> 3 files changed, 196 insertions(+)
>
> diff --git a/lib/librte_vhost/vhost.c b/lib/librte_vhost/vhost.c
> index 78913912c..e5f58d9c8 100644
> --- a/lib/librte_vhost/vhost.c
> +++ b/lib/librte_vhost/vhost.c
> @@ -191,6 +191,7 @@ init_vring_queue(struct virtio_net *dev, uint32_t vring_idx)
>
> vq->kickfd = VIRTIO_UNINITIALIZED_EVENTFD;
> vq->callfd = VIRTIO_UNINITIALIZED_EVENTFD;
> + vq->used_wrap_counter = 1;
>
> vhost_user_iotlb_init(dev, vring_idx);
> /* Backends are set to -1 indicating an inactive device. */
> diff --git a/lib/librte_vhost/vhost.h b/lib/librte_vhost/vhost.h
> index 8554d51d8..a3d4214b6 100644
> --- a/lib/librte_vhost/vhost.h
> +++ b/lib/librte_vhost/vhost.h
> @@ -106,6 +106,7 @@ struct vhost_virtqueue {
>
> struct batch_copy_elem *batch_copy_elems;
> uint16_t batch_copy_nb_elems;
> + uint32_t used_wrap_counter;
>
> rte_rwlock_t iotlb_lock;
> rte_rwlock_t iotlb_pending_lock;
> diff --git a/lib/librte_vhost/virtio_net.c b/lib/librte_vhost/virtio_net.c
> index edfab3ba6..5d4cfe8cc 100644
> --- a/lib/librte_vhost/virtio_net.c
> +++ b/lib/librte_vhost/virtio_net.c
> @@ -19,6 +19,7 @@
>
> #include "iotlb.h"
> #include "vhost.h"
> +#include "virtio-1.1.h"
>
> #define MAX_PKT_BURST 32
>
> @@ -1111,6 +1112,199 @@ restore_mbuf(struct rte_mbuf *m)
> }
> }
>
> +static inline uint16_t
> +dequeue_desc(struct virtio_net *dev, struct vhost_virtqueue *vq,
> + struct rte_mempool *mbuf_pool, struct rte_mbuf *m,
> + struct vring_desc_1_1 *descs)
> +{
> + struct vring_desc_1_1 *desc;
> + uint64_t desc_addr;
> + uint32_t desc_avail, desc_offset;
> + uint32_t mbuf_avail, mbuf_offset;
> + uint32_t cpy_len;
> + struct rte_mbuf *cur = m, *prev = m;
> + struct virtio_net_hdr *hdr = NULL;
> + uint16_t head_idx = vq->last_used_idx & (vq->size - 1);
> + int wrap_counter = vq->used_wrap_counter;
> +
> + desc = &descs[vq->last_used_idx & (vq->size - 1)];
> + if (unlikely((desc->len < dev->vhost_hlen)) ||
> + (desc->flags & VRING_DESC_F_INDIRECT))
> + rte_panic("INDIRECT not supported yet");
Using rte_panic() may not be a good idea here, because a malicious guest
could make the vswitch to crash easily.
> +
> + desc_addr = rte_vhost_gpa_to_vva(dev->mem, desc->addr);
You should use vhost_iova_to_vva() here and everywhere else, otherwise
you break IOMMU support.
> + if (unlikely(!desc_addr))
> + return -1;
> +
> + if (virtio_net_with_host_offload(dev)) {
> + hdr = (struct virtio_net_hdr *)((uintptr_t)desc_addr);
> + rte_prefetch0(hdr);
> + }
> +
> + /*
> + * A virtio driver normally uses at least 2 desc buffers
> + * for Tx: the first for storing the header, and others
> + * for storing the data.
> + */
> + if (likely((desc->len == dev->vhost_hlen) &&
> + (desc->flags & VRING_DESC_F_NEXT) != 0)) {
> + if ((++vq->last_used_idx & (vq->size - 1)) == 0)
> + toggle_wrap_counter(vq);
> +
> + desc = &descs[vq->last_used_idx & (vq->size - 1)];
> +
> + if (unlikely(desc->flags & VRING_DESC_F_INDIRECT))
> + rte_panic("INDIRECT not supported yet");
Ditto.
> +
> + desc_addr = rte_vhost_gpa_to_vva(dev->mem, desc->addr);
> + if (unlikely(!desc_addr))
> + return -1;
> +
> + desc_offset = 0;
> + desc_avail = desc->len;
> + } else {
> + desc_avail = desc->len - dev->vhost_hlen;
> + desc_offset = dev->vhost_hlen;
> + }
> +
> + rte_prefetch0((void *)(uintptr_t)(desc_addr + desc_offset));
> +
> + PRINT_PACKET(dev, (uintptr_t)(desc_addr + desc_offset), desc_avail, 0);
> +
> + mbuf_offset = 0;
> + mbuf_avail = m->buf_len - RTE_PKTMBUF_HEADROOM;
> + while (1) {
> + uint64_t hpa;
> +
> + cpy_len = RTE_MIN(desc_avail, mbuf_avail);
> +
> + /*
> + * A desc buf might across two host physical pages that are
> + * not continuous. In such case (gpa_to_hpa returns 0), data
> + * will be copied even though zero copy is enabled.
> + */
> + if (unlikely(dev->dequeue_zero_copy && (hpa = gpa_to_hpa(dev,
> + desc->addr + desc_offset, cpy_len)))) {
> + cur->data_len = cpy_len;
> + cur->data_off = 0;
> + cur->buf_addr = (void *)(uintptr_t)desc_addr;
> + cur->buf_physaddr = hpa;
> +
> + /*
> + * In zero copy mode, one mbuf can only reference data
> + * for one or partial of one desc buff.
> + */
> + mbuf_avail = cpy_len;
> + } else {
> + rte_memcpy(rte_pktmbuf_mtod_offset(cur, void *,
> + mbuf_offset),
> + (void *)((uintptr_t)(desc_addr + desc_offset)),
> + cpy_len);
> + }
> +
> + mbuf_avail -= cpy_len;
> + mbuf_offset += cpy_len;
> + desc_avail -= cpy_len;
> + desc_offset += cpy_len;
> +
> + /* This desc reaches to its end, get the next one */
> + if (desc_avail == 0) {
> + if ((desc->flags & VRING_DESC_F_NEXT) == 0)
> + break;
> +
> + if ((++vq->last_used_idx & (vq->size - 1)) == 0)
> + toggle_wrap_counter(vq);
> +
> + desc = &descs[vq->last_used_idx & (vq->size - 1)];
> + if (unlikely(desc->flags & VRING_DESC_F_INDIRECT))
> + rte_panic("INDIRECT not supported yet");
> +
> + desc_addr = rte_vhost_gpa_to_vva(dev->mem, desc->addr);
> + if (unlikely(!desc_addr))
> + return -1;
> +
> + rte_prefetch0((void *)(uintptr_t)desc_addr);
> +
> + desc_offset = 0;
> + desc_avail = desc->len;
> +
> + PRINT_PACKET(dev, (uintptr_t)desc_addr, desc->len, 0);
> + }
> +
> + /*
> + * This mbuf reaches to its end, get a new one
> + * to hold more data.
> + */
> + if (mbuf_avail == 0) {
> + cur = rte_pktmbuf_alloc(mbuf_pool);
> + if (unlikely(cur == NULL)) {
> + RTE_LOG(ERR, VHOST_DATA, "Failed to "
> + "allocate memory for mbuf.\n");
> + return -1;
> + }
> +
> + prev->next = cur;
> + prev->data_len = mbuf_offset;
> + m->nb_segs += 1;
> + m->pkt_len += mbuf_offset;
> + prev = cur;
> +
> + mbuf_offset = 0;
> + mbuf_avail = cur->buf_len - RTE_PKTMBUF_HEADROOM;
> + }
> + }
> +
> + if (hdr)
> + vhost_dequeue_offload(hdr, m);
> +
> + if ((++vq->last_used_idx & (vq->size - 1)) == 0)
> + toggle_wrap_counter(vq);
> +
> + rte_smp_wmb();
> + _set_desc_used(&descs[head_idx], wrap_counter);
> +
> + prev->data_len = mbuf_offset;
> + m->pkt_len += mbuf_offset;
> +
> + return 0;
> +}
> +
> +static inline uint16_t
> +vhost_dequeue_burst_1_1(struct virtio_net *dev, struct vhost_virtqueue *vq,
> + struct rte_mempool *mbuf_pool, struct rte_mbuf **pkts,
> + uint16_t count)
> +{
> + uint16_t i;
> + uint16_t idx;
> + struct vring_desc_1_1 *desc = vq->desc_1_1;
> + int err;
> +
> + count = RTE_MIN(MAX_PKT_BURST, count);
> + for (i = 0; i < count; i++) {
> + idx = vq->last_used_idx & (vq->size - 1);
> + if (!desc_is_avail(vq, &desc[idx]))
> + break;
> + rte_smp_rmb();
> +
> + pkts[i] = rte_pktmbuf_alloc(mbuf_pool);
> + if (unlikely(pkts[i] == NULL)) {
> + RTE_LOG(ERR, VHOST_DATA,
> + "Failed to allocate memory for mbuf.\n");
> + break;
> + }
> +
> + err = dequeue_desc(dev, vq, mbuf_pool, pkts[i], desc);
> + if (unlikely(err)) {
> + rte_pktmbuf_free(pkts[i]);
> + break;
> + }
> + }
> +
> + rte_spinlock_unlock(&vq->access_lock);
Where is it locked? It looks unbalanced.
> +
> + return i;
> +}
> +
> uint16_t
> rte_vhost_dequeue_burst(int vid, uint16_t queue_id,
> struct rte_mempool *mbuf_pool, struct rte_mbuf **pkts, uint16_t count)
>
On Thu, Feb 01, 2018 at 10:35:18AM +0100, Maxime Coquelin wrote:
>
>
>On 01/29/2018 03:11 PM, Jens Freimann wrote:
>>Implement code to dequeue and process descriptors from
>>the vring if VIRTIO_F_PACKED is enabled.
>>
>>Check if descriptor was made available by driver by looking at
>>VIRTIO_F_DESC_AVAIL flag in descriptor. If so dequeue and set
>>the used flag VIRTIO_F_DESC_USED to the current value of the
>>used wrap counter.
>>
>>Used ring wrap counter needs to be toggled when last descriptor is
>>written out. This allows the host/guest to detect new descriptors even
>>after the ring has wrapped.
>>
>>Signed-off-by: Jens Freimann <jfreimann@redhat.com>
>>---
>> lib/librte_vhost/vhost.c | 1 +
>> lib/librte_vhost/vhost.h | 1 +
>> lib/librte_vhost/virtio_net.c | 194 ++++++++++++++++++++++++++++++++++++++++++
>> 3 files changed, 196 insertions(+)
>>
>>diff --git a/lib/librte_vhost/vhost.c b/lib/librte_vhost/vhost.c
>>index 78913912c..e5f58d9c8 100644
>>--- a/lib/librte_vhost/vhost.c
>>+++ b/lib/librte_vhost/vhost.c
>>@@ -191,6 +191,7 @@ init_vring_queue(struct virtio_net *dev, uint32_t vring_idx)
>> vq->kickfd = VIRTIO_UNINITIALIZED_EVENTFD;
>> vq->callfd = VIRTIO_UNINITIALIZED_EVENTFD;
>>+ vq->used_wrap_counter = 1;
>> vhost_user_iotlb_init(dev, vring_idx);
>> /* Backends are set to -1 indicating an inactive device. */
>>diff --git a/lib/librte_vhost/vhost.h b/lib/librte_vhost/vhost.h
>>index 8554d51d8..a3d4214b6 100644
>>--- a/lib/librte_vhost/vhost.h
>>+++ b/lib/librte_vhost/vhost.h
>>@@ -106,6 +106,7 @@ struct vhost_virtqueue {
>> struct batch_copy_elem *batch_copy_elems;
>> uint16_t batch_copy_nb_elems;
>>+ uint32_t used_wrap_counter;
>> rte_rwlock_t iotlb_lock;
>> rte_rwlock_t iotlb_pending_lock;
>>diff --git a/lib/librte_vhost/virtio_net.c b/lib/librte_vhost/virtio_net.c
>>index edfab3ba6..5d4cfe8cc 100644
>>--- a/lib/librte_vhost/virtio_net.c
>>+++ b/lib/librte_vhost/virtio_net.c
>>@@ -19,6 +19,7 @@
>> #include "iotlb.h"
>> #include "vhost.h"
>>+#include "virtio-1.1.h"
>> #define MAX_PKT_BURST 32
>>@@ -1111,6 +1112,199 @@ restore_mbuf(struct rte_mbuf *m)
>> }
>> }
>>+static inline uint16_t
>>+dequeue_desc(struct virtio_net *dev, struct vhost_virtqueue *vq,
>>+ struct rte_mempool *mbuf_pool, struct rte_mbuf *m,
>>+ struct vring_desc_1_1 *descs)
>>+{
>>+ struct vring_desc_1_1 *desc;
>>+ uint64_t desc_addr;
>>+ uint32_t desc_avail, desc_offset;
>>+ uint32_t mbuf_avail, mbuf_offset;
>>+ uint32_t cpy_len;
>>+ struct rte_mbuf *cur = m, *prev = m;
>>+ struct virtio_net_hdr *hdr = NULL;
>>+ uint16_t head_idx = vq->last_used_idx & (vq->size - 1);
>>+ int wrap_counter = vq->used_wrap_counter;
>>+
>>+ desc = &descs[vq->last_used_idx & (vq->size - 1)];
>>+ if (unlikely((desc->len < dev->vhost_hlen)) ||
>>+ (desc->flags & VRING_DESC_F_INDIRECT))
>>+ rte_panic("INDIRECT not supported yet");
>
>Using rte_panic() may not be a good idea here, because a malicious guest
>could make the vswitch to crash easily.
Good point. It was for debugging only, I will remove it.
>>+
>>+ desc_addr = rte_vhost_gpa_to_vva(dev->mem, desc->addr);
>
>You should use vhost_iova_to_vva() here and everywhere else, otherwise
>you break IOMMU support.
Yes, I'll change it.
>>+ if (unlikely(!desc_addr))
>>+ return -1;
>>+
>>+ if (virtio_net_with_host_offload(dev)) {
>>+ hdr = (struct virtio_net_hdr *)((uintptr_t)desc_addr);
>>+ rte_prefetch0(hdr);
>>+ }
>>+
>>+ /*
>>+ * A virtio driver normally uses at least 2 desc buffers
>>+ * for Tx: the first for storing the header, and others
>>+ * for storing the data.
>>+ */
>>+ if (likely((desc->len == dev->vhost_hlen) &&
>>+ (desc->flags & VRING_DESC_F_NEXT) != 0)) {
>>+ if ((++vq->last_used_idx & (vq->size - 1)) == 0)
>>+ toggle_wrap_counter(vq);
>>+
>>+ desc = &descs[vq->last_used_idx & (vq->size - 1)];
>>+
>>+ if (unlikely(desc->flags & VRING_DESC_F_INDIRECT))
>>+ rte_panic("INDIRECT not supported yet");
>Ditto.
>
>>+
>>+ desc_addr = rte_vhost_gpa_to_vva(dev->mem, desc->addr);
>>+ if (unlikely(!desc_addr))
>>+ return -1;
>>+
>>+ desc_offset = 0;
>>+ desc_avail = desc->len;
>>+ } else {
>>+ desc_avail = desc->len - dev->vhost_hlen;
>>+ desc_offset = dev->vhost_hlen;
>>+ }
>>+
>>+ rte_prefetch0((void *)(uintptr_t)(desc_addr + desc_offset));
>>+
>>+ PRINT_PACKET(dev, (uintptr_t)(desc_addr + desc_offset), desc_avail, 0);
>>+
>>+ mbuf_offset = 0;
>>+ mbuf_avail = m->buf_len - RTE_PKTMBUF_HEADROOM;
>>+ while (1) {
>>+ uint64_t hpa;
>>+
>>+ cpy_len = RTE_MIN(desc_avail, mbuf_avail);
>>+
>>+ /*
>>+ * A desc buf might across two host physical pages that are
>>+ * not continuous. In such case (gpa_to_hpa returns 0), data
>>+ * will be copied even though zero copy is enabled.
>>+ */
>>+ if (unlikely(dev->dequeue_zero_copy && (hpa = gpa_to_hpa(dev,
>>+ desc->addr + desc_offset, cpy_len)))) {
>>+ cur->data_len = cpy_len;
>>+ cur->data_off = 0;
>>+ cur->buf_addr = (void *)(uintptr_t)desc_addr;
>>+ cur->buf_physaddr = hpa;
>>+
>>+ /*
>>+ * In zero copy mode, one mbuf can only reference data
>>+ * for one or partial of one desc buff.
>>+ */
>>+ mbuf_avail = cpy_len;
>>+ } else {
>>+ rte_memcpy(rte_pktmbuf_mtod_offset(cur, void *,
>>+ mbuf_offset),
>>+ (void *)((uintptr_t)(desc_addr + desc_offset)),
>>+ cpy_len);
>>+ }
>>+
>>+ mbuf_avail -= cpy_len;
>>+ mbuf_offset += cpy_len;
>>+ desc_avail -= cpy_len;
>>+ desc_offset += cpy_len;
>>+
>>+ /* This desc reaches to its end, get the next one */
>>+ if (desc_avail == 0) {
>>+ if ((desc->flags & VRING_DESC_F_NEXT) == 0)
>>+ break;
>>+
>>+ if ((++vq->last_used_idx & (vq->size - 1)) == 0)
>>+ toggle_wrap_counter(vq);
>>+
>>+ desc = &descs[vq->last_used_idx & (vq->size - 1)];
>>+ if (unlikely(desc->flags & VRING_DESC_F_INDIRECT))
>>+ rte_panic("INDIRECT not supported yet");
>>+
>>+ desc_addr = rte_vhost_gpa_to_vva(dev->mem, desc->addr);
>>+ if (unlikely(!desc_addr))
>>+ return -1;
>>+
>>+ rte_prefetch0((void *)(uintptr_t)desc_addr);
>>+
>>+ desc_offset = 0;
>>+ desc_avail = desc->len;
>>+
>>+ PRINT_PACKET(dev, (uintptr_t)desc_addr, desc->len, 0);
>>+ }
>>+
>>+ /*
>>+ * This mbuf reaches to its end, get a new one
>>+ * to hold more data.
>>+ */
>>+ if (mbuf_avail == 0) {
>>+ cur = rte_pktmbuf_alloc(mbuf_pool);
>>+ if (unlikely(cur == NULL)) {
>>+ RTE_LOG(ERR, VHOST_DATA, "Failed to "
>>+ "allocate memory for mbuf.\n");
>>+ return -1;
>>+ }
>>+
>>+ prev->next = cur;
>>+ prev->data_len = mbuf_offset;
>>+ m->nb_segs += 1;
>>+ m->pkt_len += mbuf_offset;
>>+ prev = cur;
>>+
>>+ mbuf_offset = 0;
>>+ mbuf_avail = cur->buf_len - RTE_PKTMBUF_HEADROOM;
>>+ }
>>+ }
>>+
>>+ if (hdr)
>>+ vhost_dequeue_offload(hdr, m);
>>+
>>+ if ((++vq->last_used_idx & (vq->size - 1)) == 0)
>>+ toggle_wrap_counter(vq);
>>+
>>+ rte_smp_wmb();
>>+ _set_desc_used(&descs[head_idx], wrap_counter);
>>+
>>+ prev->data_len = mbuf_offset;
>>+ m->pkt_len += mbuf_offset;
>>+
>>+ return 0;
>>+}
>>+
>>+static inline uint16_t
>>+vhost_dequeue_burst_1_1(struct virtio_net *dev, struct vhost_virtqueue *vq,
>>+ struct rte_mempool *mbuf_pool, struct rte_mbuf **pkts,
>>+ uint16_t count)
>>+{
>>+ uint16_t i;
>>+ uint16_t idx;
>>+ struct vring_desc_1_1 *desc = vq->desc_1_1;
>>+ int err;
>>+
>>+ count = RTE_MIN(MAX_PKT_BURST, count);
>>+ for (i = 0; i < count; i++) {
>>+ idx = vq->last_used_idx & (vq->size - 1);
>>+ if (!desc_is_avail(vq, &desc[idx]))
>>+ break;
>>+ rte_smp_rmb();
>>+
>>+ pkts[i] = rte_pktmbuf_alloc(mbuf_pool);
>>+ if (unlikely(pkts[i] == NULL)) {
>>+ RTE_LOG(ERR, VHOST_DATA,
>>+ "Failed to allocate memory for mbuf.\n");
>>+ break;
>>+ }
>>+
>>+ err = dequeue_desc(dev, vq, mbuf_pool, pkts[i], desc);
>>+ if (unlikely(err)) {
>>+ rte_pktmbuf_free(pkts[i]);
>>+ break;
>>+ }
>>+ }
>>+
>>+ rte_spinlock_unlock(&vq->access_lock);
>
>Where is it locked? It looks unbalanced.
It is locked in the caller rte_vhost_dequeue_burst() and we return
immediately. But I could change it to lock and unlock right here.
Thanks for the review!
regards,
Jens
@@ -191,6 +191,7 @@ init_vring_queue(struct virtio_net *dev, uint32_t vring_idx)
vq->kickfd = VIRTIO_UNINITIALIZED_EVENTFD;
vq->callfd = VIRTIO_UNINITIALIZED_EVENTFD;
+ vq->used_wrap_counter = 1;
vhost_user_iotlb_init(dev, vring_idx);
/* Backends are set to -1 indicating an inactive device. */
@@ -106,6 +106,7 @@ struct vhost_virtqueue {
struct batch_copy_elem *batch_copy_elems;
uint16_t batch_copy_nb_elems;
+ uint32_t used_wrap_counter;
rte_rwlock_t iotlb_lock;
rte_rwlock_t iotlb_pending_lock;
@@ -19,6 +19,7 @@
#include "iotlb.h"
#include "vhost.h"
+#include "virtio-1.1.h"
#define MAX_PKT_BURST 32
@@ -1111,6 +1112,199 @@ restore_mbuf(struct rte_mbuf *m)
}
}
+static inline uint16_t
+dequeue_desc(struct virtio_net *dev, struct vhost_virtqueue *vq,
+ struct rte_mempool *mbuf_pool, struct rte_mbuf *m,
+ struct vring_desc_1_1 *descs)
+{
+ struct vring_desc_1_1 *desc;
+ uint64_t desc_addr;
+ uint32_t desc_avail, desc_offset;
+ uint32_t mbuf_avail, mbuf_offset;
+ uint32_t cpy_len;
+ struct rte_mbuf *cur = m, *prev = m;
+ struct virtio_net_hdr *hdr = NULL;
+ uint16_t head_idx = vq->last_used_idx & (vq->size - 1);
+ int wrap_counter = vq->used_wrap_counter;
+
+ desc = &descs[vq->last_used_idx & (vq->size - 1)];
+ if (unlikely((desc->len < dev->vhost_hlen)) ||
+ (desc->flags & VRING_DESC_F_INDIRECT))
+ rte_panic("INDIRECT not supported yet");
+
+ desc_addr = rte_vhost_gpa_to_vva(dev->mem, desc->addr);
+ if (unlikely(!desc_addr))
+ return -1;
+
+ if (virtio_net_with_host_offload(dev)) {
+ hdr = (struct virtio_net_hdr *)((uintptr_t)desc_addr);
+ rte_prefetch0(hdr);
+ }
+
+ /*
+ * A virtio driver normally uses at least 2 desc buffers
+ * for Tx: the first for storing the header, and others
+ * for storing the data.
+ */
+ if (likely((desc->len == dev->vhost_hlen) &&
+ (desc->flags & VRING_DESC_F_NEXT) != 0)) {
+ if ((++vq->last_used_idx & (vq->size - 1)) == 0)
+ toggle_wrap_counter(vq);
+
+ desc = &descs[vq->last_used_idx & (vq->size - 1)];
+
+ if (unlikely(desc->flags & VRING_DESC_F_INDIRECT))
+ rte_panic("INDIRECT not supported yet");
+
+ desc_addr = rte_vhost_gpa_to_vva(dev->mem, desc->addr);
+ if (unlikely(!desc_addr))
+ return -1;
+
+ desc_offset = 0;
+ desc_avail = desc->len;
+ } else {
+ desc_avail = desc->len - dev->vhost_hlen;
+ desc_offset = dev->vhost_hlen;
+ }
+
+ rte_prefetch0((void *)(uintptr_t)(desc_addr + desc_offset));
+
+ PRINT_PACKET(dev, (uintptr_t)(desc_addr + desc_offset), desc_avail, 0);
+
+ mbuf_offset = 0;
+ mbuf_avail = m->buf_len - RTE_PKTMBUF_HEADROOM;
+ while (1) {
+ uint64_t hpa;
+
+ cpy_len = RTE_MIN(desc_avail, mbuf_avail);
+
+ /*
+ * A desc buf might across two host physical pages that are
+ * not continuous. In such case (gpa_to_hpa returns 0), data
+ * will be copied even though zero copy is enabled.
+ */
+ if (unlikely(dev->dequeue_zero_copy && (hpa = gpa_to_hpa(dev,
+ desc->addr + desc_offset, cpy_len)))) {
+ cur->data_len = cpy_len;
+ cur->data_off = 0;
+ cur->buf_addr = (void *)(uintptr_t)desc_addr;
+ cur->buf_physaddr = hpa;
+
+ /*
+ * In zero copy mode, one mbuf can only reference data
+ * for one or partial of one desc buff.
+ */
+ mbuf_avail = cpy_len;
+ } else {
+ rte_memcpy(rte_pktmbuf_mtod_offset(cur, void *,
+ mbuf_offset),
+ (void *)((uintptr_t)(desc_addr + desc_offset)),
+ cpy_len);
+ }
+
+ mbuf_avail -= cpy_len;
+ mbuf_offset += cpy_len;
+ desc_avail -= cpy_len;
+ desc_offset += cpy_len;
+
+ /* This desc reaches to its end, get the next one */
+ if (desc_avail == 0) {
+ if ((desc->flags & VRING_DESC_F_NEXT) == 0)
+ break;
+
+ if ((++vq->last_used_idx & (vq->size - 1)) == 0)
+ toggle_wrap_counter(vq);
+
+ desc = &descs[vq->last_used_idx & (vq->size - 1)];
+ if (unlikely(desc->flags & VRING_DESC_F_INDIRECT))
+ rte_panic("INDIRECT not supported yet");
+
+ desc_addr = rte_vhost_gpa_to_vva(dev->mem, desc->addr);
+ if (unlikely(!desc_addr))
+ return -1;
+
+ rte_prefetch0((void *)(uintptr_t)desc_addr);
+
+ desc_offset = 0;
+ desc_avail = desc->len;
+
+ PRINT_PACKET(dev, (uintptr_t)desc_addr, desc->len, 0);
+ }
+
+ /*
+ * This mbuf reaches to its end, get a new one
+ * to hold more data.
+ */
+ if (mbuf_avail == 0) {
+ cur = rte_pktmbuf_alloc(mbuf_pool);
+ if (unlikely(cur == NULL)) {
+ RTE_LOG(ERR, VHOST_DATA, "Failed to "
+ "allocate memory for mbuf.\n");
+ return -1;
+ }
+
+ prev->next = cur;
+ prev->data_len = mbuf_offset;
+ m->nb_segs += 1;
+ m->pkt_len += mbuf_offset;
+ prev = cur;
+
+ mbuf_offset = 0;
+ mbuf_avail = cur->buf_len - RTE_PKTMBUF_HEADROOM;
+ }
+ }
+
+ if (hdr)
+ vhost_dequeue_offload(hdr, m);
+
+ if ((++vq->last_used_idx & (vq->size - 1)) == 0)
+ toggle_wrap_counter(vq);
+
+ rte_smp_wmb();
+ _set_desc_used(&descs[head_idx], wrap_counter);
+
+ prev->data_len = mbuf_offset;
+ m->pkt_len += mbuf_offset;
+
+ return 0;
+}
+
+static inline uint16_t
+vhost_dequeue_burst_1_1(struct virtio_net *dev, struct vhost_virtqueue *vq,
+ struct rte_mempool *mbuf_pool, struct rte_mbuf **pkts,
+ uint16_t count)
+{
+ uint16_t i;
+ uint16_t idx;
+ struct vring_desc_1_1 *desc = vq->desc_1_1;
+ int err;
+
+ count = RTE_MIN(MAX_PKT_BURST, count);
+ for (i = 0; i < count; i++) {
+ idx = vq->last_used_idx & (vq->size - 1);
+ if (!desc_is_avail(vq, &desc[idx]))
+ break;
+ rte_smp_rmb();
+
+ pkts[i] = rte_pktmbuf_alloc(mbuf_pool);
+ if (unlikely(pkts[i] == NULL)) {
+ RTE_LOG(ERR, VHOST_DATA,
+ "Failed to allocate memory for mbuf.\n");
+ break;
+ }
+
+ err = dequeue_desc(dev, vq, mbuf_pool, pkts[i], desc);
+ if (unlikely(err)) {
+ rte_pktmbuf_free(pkts[i]);
+ break;
+ }
+ }
+
+ rte_spinlock_unlock(&vq->access_lock);
+
+ return i;
+}
+
uint16_t
rte_vhost_dequeue_burst(int vid, uint16_t queue_id,
struct rte_mempool *mbuf_pool, struct rte_mbuf **pkts, uint16_t count)