[RFC,04/13] add vhost packed ring fast dequeue function

Message ID 20190708171320.38802-5-yong.liu@intel.com (mailing list archive)
State Superseded, archived
Delegated to: Maxime Coquelin
Headers
Series [RFC,01/13] add vhost normal enqueue function |

Checks

Context Check Description
ci/checkpatch warning coding style issues
ci/Intel-compilation fail Compilation issues

Commit Message

Marvin Liu July 8, 2019, 5:13 p.m. UTC
  Add fast dequeue function just like enqueue function, fast dequeue
function will not support chained nor indirect descriptors, normal
function will handle that.

Signed-off-by: Marvin Liu <yong.liu@intel.com>
  

Patch

diff --git a/lib/librte_vhost/vhost.h b/lib/librte_vhost/vhost.h
index f24026acd..329a7658b 100644
--- a/lib/librte_vhost/vhost.h
+++ b/lib/librte_vhost/vhost.h
@@ -41,6 +41,10 @@ 
 
 /* Used in fast packed ring functions */
 #define PACKED_DESC_PER_CACHELINE (RTE_CACHE_LINE_SIZE / sizeof(struct vring_packed_desc))
+
+/* Indicated that normal path will handle */
+#define VIRTIO_DESC_NORMAL_FLAG (VRING_DESC_F_NEXT | VRING_DESC_F_INDIRECT)
+
 /**
  * Structure contains buffer address, length and descriptor index
  * from vring to do scatter RX.
diff --git a/lib/librte_vhost/virtio_net.c b/lib/librte_vhost/virtio_net.c
index 410837122..a62e0feda 100644
--- a/lib/librte_vhost/virtio_net.c
+++ b/lib/librte_vhost/virtio_net.c
@@ -1613,6 +1613,158 @@  virtio_dev_tx_split(struct virtio_net *dev, struct vhost_virtqueue *vq,
 	return i;
 }
 
+static __rte_always_inline int
+vhost_dequeue_fast_packed(struct virtio_net *dev, struct vhost_virtqueue *vq,
+	struct rte_mempool *mbuf_pool, struct rte_mbuf **pkts, uint16_t avail_idx,
+	uintptr_t *desc_addr, uint16_t *ids)
+{
+	bool wrap_counter = vq->avail_wrap_counter;
+	struct vring_packed_desc *descs = vq->desc_packed;
+	uint64_t len, len1, len2, len3;
+	uint32_t buf_offset = dev->vhost_hlen;
+
+	// check whether desc is cache aligned
+	if (unlikely(avail_idx & 0x3))
+		return -1;
+
+	// prefetch next cache line
+	if (unlikely(avail_idx  < (vq->size - PACKED_DESC_PER_CACHELINE)))
+		rte_prefetch0((void *)(uintptr_t)&descs[avail_idx +
+			PACKED_DESC_PER_CACHELINE]);
+	else
+		rte_prefetch0((void *)(uintptr_t)&descs[0]);
+
+	if (unlikely(!desc_is_avail(&descs[avail_idx], wrap_counter)) |
+		unlikely(!desc_is_avail(&descs[avail_idx + 1], wrap_counter)) |
+		unlikely(!desc_is_avail(&descs[avail_idx + 2], wrap_counter)) |
+		unlikely(!desc_is_avail(&descs[avail_idx + 3], wrap_counter)))
+		return 1;
+
+	if (unlikely((descs[avail_idx].flags & VIRTIO_DESC_NORMAL_FLAG) |
+		(descs[avail_idx + 1].flags & VIRTIO_DESC_NORMAL_FLAG) |
+		(descs[avail_idx + 2].flags & VIRTIO_DESC_NORMAL_FLAG) |
+		(descs[avail_idx + 3].flags & VIRTIO_DESC_NORMAL_FLAG)))
+		return -1;
+
+	rte_smp_rmb();
+
+	len = descs[avail_idx].len;
+	len1 = descs[avail_idx + 1].len;
+	len2 = descs[avail_idx + 2].len;
+	len3 = descs[avail_idx + 3].len;
+
+	ids[0] = descs[avail_idx].id;
+	ids[1] = descs[avail_idx + 1].id;
+	ids[2] = descs[avail_idx + 2].id;
+	ids[3] = descs[avail_idx + 3].id;
+
+	desc_addr[0] = vhost_iova_to_vva(dev, vq,
+			descs[avail_idx].addr,
+			&len,
+			VHOST_ACCESS_RW);
+
+	desc_addr[1] = vhost_iova_to_vva(dev, vq,
+			descs[avail_idx + 1].addr,
+			&len1,
+			VHOST_ACCESS_RW);
+
+	desc_addr[2] = vhost_iova_to_vva(dev, vq,
+			descs[avail_idx + 2].addr,
+			&len2,
+			VHOST_ACCESS_RW);
+
+	desc_addr[3] = vhost_iova_to_vva(dev, vq,
+			descs[avail_idx + 3].addr,
+			&len3,
+			VHOST_ACCESS_RW);
+
+	if (unlikely((len != descs[avail_idx].len) |
+		(len1 != descs[avail_idx + 1].len) |
+		(len2 != descs[avail_idx + 2].len) |
+		(len3 != descs[avail_idx + 3].len))) {
+		return -1;
+	}
+
+	if (rte_pktmbuf_alloc_bulk(mbuf_pool, pkts ,4))
+		return -1;
+
+	if (unlikely(((uint64_t)(pkts[0]->buf_len - pkts[0]->data_off) <
+				(len + buf_offset)) |
+			((uint64_t)(pkts[1]->buf_len - pkts[1]->data_off) <
+				(len1 + buf_offset)) |
+			((uint64_t)(pkts[2]->buf_len - pkts[2]->data_off) <
+				(len2 + buf_offset)) |
+			((uint64_t)(pkts[3]->buf_len - pkts[3]->data_off) <
+				(len3 + buf_offset)))) {
+		rte_pktmbuf_free(pkts[0]);
+		rte_pktmbuf_free(pkts[1]);
+		rte_pktmbuf_free(pkts[2]);
+		rte_pktmbuf_free(pkts[3]);
+		return -1;
+	}
+
+	pkts[0]->pkt_len = descs[avail_idx].len - buf_offset;
+	pkts[1]->pkt_len = descs[avail_idx + 1].len - buf_offset;
+	pkts[2]->pkt_len = descs[avail_idx + 2].len - buf_offset;
+	pkts[3]->pkt_len = descs[avail_idx + 3].len - buf_offset;
+
+	pkts[0]->data_len = pkts[0]->pkt_len;
+	pkts[1]->data_len = pkts[1]->pkt_len;
+	pkts[2]->data_len = pkts[2]->pkt_len;
+	pkts[3]->data_len = pkts[3]->pkt_len;
+
+	return 0;
+}
+
+static __rte_always_inline int
+virtio_dev_tx_fast_packed(struct virtio_net *dev, struct vhost_virtqueue *vq,
+	struct rte_mempool *mbuf_pool, struct rte_mbuf **pkts)
+{
+	uint16_t avail_idx = vq->last_avail_idx;
+	uint32_t buf_offset = dev->vhost_hlen;
+	uintptr_t desc_addr[4];
+	uint16_t ids[4];
+	int ret;
+	struct virtio_net_hdr *hdr, *hdr1, *hdr2, *hdr3;
+
+	ret = vhost_dequeue_fast_packed(dev, vq, mbuf_pool, pkts, avail_idx,
+				desc_addr, ids);
+
+	if (ret)
+		return ret;
+
+	rte_memcpy(rte_pktmbuf_mtod_offset(pkts[0], void *, 0),
+		(void *)(uintptr_t)(desc_addr[0] + buf_offset),
+		pkts[0]->pkt_len);
+	rte_memcpy(rte_pktmbuf_mtod_offset(pkts[1], void *, 0),
+		(void *)(uintptr_t)(desc_addr[1] + buf_offset),
+		pkts[1]->pkt_len);
+	rte_memcpy(rte_pktmbuf_mtod_offset(pkts[2], void *, 0),
+		(void *)(uintptr_t)(desc_addr[2] + buf_offset),
+		pkts[2]->pkt_len);
+	rte_memcpy(rte_pktmbuf_mtod_offset(pkts[3], void *, 0),
+		(void *)(uintptr_t)(desc_addr[3] + buf_offset),
+		pkts[3]->pkt_len);
+
+	if (virtio_net_with_host_offload(dev)) {
+		hdr = (struct virtio_net_hdr *)((uintptr_t)desc_addr[0]);
+		hdr1 = (struct virtio_net_hdr *)((uintptr_t)desc_addr[1]);
+		hdr2 = (struct virtio_net_hdr *)((uintptr_t)desc_addr[2]);
+		hdr3 = (struct virtio_net_hdr *)((uintptr_t)desc_addr[3]);
+		vhost_dequeue_offload(hdr, pkts[0]);
+		vhost_dequeue_offload(hdr1, pkts[1]);
+		vhost_dequeue_offload(hdr2, pkts[2]);
+		vhost_dequeue_offload(hdr3, pkts[3]);
+	}
+
+	vq->last_avail_idx += PACKED_DESC_PER_CACHELINE;
+	if (vq->last_avail_idx >= vq->size) {
+		vq->last_avail_idx -= vq->size;
+		vq->avail_wrap_counter ^= 1;
+	}
+	return 0;
+}
+
 static __rte_always_inline int
 vhost_dequeue_normal_packed(struct virtio_net *dev,
 		struct vhost_virtqueue *vq,