[dpdk-dev,v2,17/19] vhost-user: iommu: postpone device creation until ring are mapped

Message ID 20170924161921.30010-18-maxime.coquelin@redhat.com (mailing list archive)
State Accepted, archived
Headers

Checks

Context Check Description
ci/checkpatch success coding style OK
ci/Intel-compilation fail Compilation issues

Commit Message

Maxime Coquelin Sept. 24, 2017, 4:19 p.m. UTC
  Translating the start addresses of the rings is not enough, we need to
be sure all the ring is made available by the guest.

It depends on the size of the rings, which is not known on SET_VRING_ADDR
reception. Furthermore, we need to be be safe against vring pages
invalidates.

This patch introduces a new access_ok flag per virtqueue, which is set
when all the rings are mapped, and cleared as soon as a page used by a
ring is invalidated. The invalidation part is implemented in a following
patch.

Signed-off-by: Maxime Coquelin <maxime.coquelin@redhat.com>
---
 lib/librte_vhost/vhost.c      | 37 ++++++++++++++++++++++++++
 lib/librte_vhost/vhost.h      |  2 ++
 lib/librte_vhost/vhost_user.c | 62 +++++++++++++++++++++++++++++++------------
 lib/librte_vhost/virtio_net.c | 60 +++++++++++++++++++++++++----------------
 4 files changed, 121 insertions(+), 40 deletions(-)
  

Comments

Yuanhan Liu Sept. 29, 2017, 12:34 p.m. UTC | #1
On Sun, Sep 24, 2017 at 06:19:19PM +0200, Maxime Coquelin wrote:
>  static int
> -vhost_user_iotlb_msg(struct virtio_net *dev, struct VhostUserMsg *msg)
> +is_vring_iotlb_update(struct vhost_virtqueue *vq, struct vhost_iotlb_msg *imsg)
>  {
> +	struct vhost_vring_addr *ra;
> +	uint64_t start, end;
> +
> +	start = imsg->iova;
> +	end = start + imsg->size;
> +
> +	ra = &vq->ring_addrs;
> +	if (ra->desc_user_addr >= start && ra->desc_user_addr < end)
> +		return 1;
> +	if (ra->avail_user_addr >= start && ra->avail_user_addr < end)
> +		return 1;
> +	if (ra->used_user_addr >= start && ra->used_user_addr < end)
> +		return 1;
> +
> +	return -1;

It should be "return 0" here.

	--yliu
> +}
  
Maxime Coquelin Sept. 29, 2017, 12:51 p.m. UTC | #2
On 09/29/2017 02:34 PM, Yuanhan Liu wrote:
> On Sun, Sep 24, 2017 at 06:19:19PM +0200, Maxime Coquelin wrote:
>>   static int
>> -vhost_user_iotlb_msg(struct virtio_net *dev, struct VhostUserMsg *msg)
>> +is_vring_iotlb_update(struct vhost_virtqueue *vq, struct vhost_iotlb_msg *imsg)
>>   {
>> +	struct vhost_vring_addr *ra;
>> +	uint64_t start, end;
>> +
>> +	start = imsg->iova;
>> +	end = start + imsg->size;
>> +
>> +	ra = &vq->ring_addrs;
>> +	if (ra->desc_user_addr >= start && ra->desc_user_addr < end)
>> +		return 1;
>> +	if (ra->avail_user_addr >= start && ra->avail_user_addr < end)
>> +		return 1;
>> +	if (ra->used_user_addr >= start && ra->used_user_addr < end)
>> +		return 1;
>> +
>> +	return -1;
> 
> It should be "return 0" here.

Oops, thanks for spotting this.

Maxime

> 	--yliu
>> +}
  

Patch

diff --git a/lib/librte_vhost/vhost.c b/lib/librte_vhost/vhost.c
index 0e2ad3322..ef54835a6 100644
--- a/lib/librte_vhost/vhost.c
+++ b/lib/librte_vhost/vhost.c
@@ -135,6 +135,43 @@  free_device(struct virtio_net *dev)
 	rte_free(dev);
 }
 
+int
+vring_translate(struct virtio_net *dev, struct vhost_virtqueue *vq)
+{
+	uint64_t size;
+
+	if (!(dev->features & (1ULL << VIRTIO_F_IOMMU_PLATFORM)))
+		goto out;
+
+	size = sizeof(struct vring_desc) * vq->size;
+	vq->desc = (struct vring_desc *)vhost_iova_to_vva(dev, vq,
+						vq->ring_addrs.desc_user_addr,
+						size, VHOST_ACCESS_RW);
+	if (!vq->desc)
+		return -1;
+
+	size = sizeof(struct vring_avail);
+	size += sizeof(uint16_t) * vq->size;
+	vq->avail = (struct vring_avail *)vhost_iova_to_vva(dev, vq,
+						vq->ring_addrs.avail_user_addr,
+						size, VHOST_ACCESS_RW);
+	if (!vq->avail)
+		return -1;
+
+	size = sizeof(struct vring_used);
+	size += sizeof(struct vring_used_elem) * vq->size;
+	vq->used = (struct vring_used *)vhost_iova_to_vva(dev, vq,
+						vq->ring_addrs.used_user_addr,
+						size, VHOST_ACCESS_RW);
+	if (!vq->used)
+		return -1;
+
+out:
+	vq->access_ok = 1;
+
+	return 0;
+}
+
 static void
 init_vring_queue(struct virtio_net *dev, uint32_t vring_idx)
 {
diff --git a/lib/librte_vhost/vhost.h b/lib/librte_vhost/vhost.h
index 903da5db5..b3fe6bb8e 100644
--- a/lib/librte_vhost/vhost.h
+++ b/lib/librte_vhost/vhost.h
@@ -113,6 +113,7 @@  struct vhost_virtqueue {
 	/* Currently unused as polling mode is enabled */
 	int			kickfd;
 	int			enabled;
+	int			access_ok;
 
 	/* Physical address of used ring, for logging */
 	uint64_t		log_guest_addr;
@@ -378,6 +379,7 @@  void vhost_backend_cleanup(struct virtio_net *dev);
 
 uint64_t __vhost_iova_to_vva(struct virtio_net *dev, struct vhost_virtqueue *vq,
 			uint64_t iova, uint64_t size, uint8_t perm);
+int vring_translate(struct virtio_net *dev, struct vhost_virtqueue *vq);
 
 static __rte_always_inline uint64_t
 vhost_iova_to_vva(struct virtio_net *dev, struct vhost_virtqueue *vq,
diff --git a/lib/librte_vhost/vhost_user.c b/lib/librte_vhost/vhost_user.c
index 90b209764..6d2c4d191 100644
--- a/lib/librte_vhost/vhost_user.c
+++ b/lib/librte_vhost/vhost_user.c
@@ -391,6 +391,12 @@  vhost_user_set_vring_addr(struct virtio_net *dev, VhostUserMsg *msg)
 	 */
 	memcpy(&vq->ring_addrs, addr, sizeof(*addr));
 
+	vq->desc = NULL;
+	vq->avail = NULL;
+	vq->used = NULL;
+
+	vq->access_ok = 0;
+
 	return 0;
 }
 
@@ -407,10 +413,10 @@  static struct virtio_net *translate_ring_addresses(struct virtio_net *dev,
 	vq->desc = (struct vring_desc *)(uintptr_t)ring_addr_to_vva(dev,
 			vq, addr->desc_user_addr, sizeof(struct vring_desc));
 	if (vq->desc == 0) {
-		RTE_LOG(ERR, VHOST_CONFIG,
+		RTE_LOG(DEBUG, VHOST_CONFIG,
 			"(%d) failed to find desc ring address.\n",
 			dev->vid);
-		return NULL;
+		return dev;
 	}
 
 	dev = numa_realloc(dev, vq_index);
@@ -419,19 +425,19 @@  static struct virtio_net *translate_ring_addresses(struct virtio_net *dev,
 	vq->avail = (struct vring_avail *)(uintptr_t)ring_addr_to_vva(dev,
 			vq, addr->avail_user_addr, sizeof(struct vring_avail));
 	if (vq->avail == 0) {
-		RTE_LOG(ERR, VHOST_CONFIG,
+		RTE_LOG(DEBUG, VHOST_CONFIG,
 			"(%d) failed to find avail ring address.\n",
 			dev->vid);
-		return NULL;
+		return dev;
 	}
 
 	vq->used = (struct vring_used *)(uintptr_t)ring_addr_to_vva(dev,
 			vq, addr->used_user_addr, sizeof(struct vring_used));
 	if (vq->used == 0) {
-		RTE_LOG(ERR, VHOST_CONFIG,
+		RTE_LOG(DEBUG, VHOST_CONFIG,
 			"(%d) failed to find used ring address.\n",
 			dev->vid);
-		return NULL;
+		return dev;
 	}
 
 	if (vq->last_used_idx != vq->used->idx) {
@@ -677,7 +683,7 @@  vhost_user_set_mem_table(struct virtio_net *dev, struct VhostUserMsg *pmsg)
 static int
 vq_is_ready(struct vhost_virtqueue *vq)
 {
-	return vq && vq->desc   &&
+	return vq && vq->desc && vq->avail && vq->used &&
 	       vq->kickfd != VIRTIO_UNINITIALIZED_EVENTFD &&
 	       vq->callfd != VIRTIO_UNINITIALIZED_EVENTFD;
 }
@@ -986,8 +992,29 @@  vhost_user_set_req_fd(struct virtio_net *dev, struct VhostUserMsg *msg)
 }
 
 static int
-vhost_user_iotlb_msg(struct virtio_net *dev, struct VhostUserMsg *msg)
+is_vring_iotlb_update(struct vhost_virtqueue *vq, struct vhost_iotlb_msg *imsg)
 {
+	struct vhost_vring_addr *ra;
+	uint64_t start, end;
+
+	start = imsg->iova;
+	end = start + imsg->size;
+
+	ra = &vq->ring_addrs;
+	if (ra->desc_user_addr >= start && ra->desc_user_addr < end)
+		return 1;
+	if (ra->avail_user_addr >= start && ra->avail_user_addr < end)
+		return 1;
+	if (ra->used_user_addr >= start && ra->used_user_addr < end)
+		return 1;
+
+	return -1;
+}
+
+static int
+vhost_user_iotlb_msg(struct virtio_net **pdev, struct VhostUserMsg *msg)
+{
+	struct virtio_net *dev = *pdev;
 	struct vhost_iotlb_msg *imsg = &msg->payload.iotlb;
 	uint16_t i;
 	uint64_t vva;
@@ -1003,6 +1030,9 @@  vhost_user_iotlb_msg(struct virtio_net *dev, struct VhostUserMsg *msg)
 
 			vhost_user_iotlb_cache_insert(vq, imsg->iova, vva,
 					imsg->size, imsg->perm);
+
+			if (is_vring_iotlb_update(vq, imsg))
+				*pdev = dev = translate_ring_addresses(dev, i);
 		}
 		break;
 	case VHOST_IOTLB_INVALIDATE:
@@ -1151,8 +1181,12 @@  vhost_user_msg_handler(int vid, int fd)
 	}
 
 	ret = 0;
-	RTE_LOG(INFO, VHOST_CONFIG, "read message %s\n",
-		vhost_message_str[msg.request]);
+	if (msg.request != VHOST_USER_IOTLB_MSG)
+		RTE_LOG(INFO, VHOST_CONFIG, "read message %s\n",
+			vhost_message_str[msg.request]);
+	else
+		RTE_LOG(DEBUG, VHOST_CONFIG, "read message %s\n",
+			vhost_message_str[msg.request]);
 
 	ret = vhost_user_check_and_alloc_queue_pair(dev, &msg);
 	if (ret < 0) {
@@ -1254,7 +1288,7 @@  vhost_user_msg_handler(int vid, int fd)
 		break;
 
 	case VHOST_USER_IOTLB_MSG:
-		ret = vhost_user_iotlb_msg(dev, &msg);
+		ret = vhost_user_iotlb_msg(&dev, &msg);
 		break;
 
 	default:
@@ -1263,12 +1297,6 @@  vhost_user_msg_handler(int vid, int fd)
 
 	}
 
-	/*
-	 * The virtio_net struct might have been reallocated on a different
-	 * NUMA node, so dev pointer might no more be valid.
-	 */
-	dev = get_device(vid);
-
 	if (msg.flags & VHOST_USER_NEED_REPLY) {
 		msg.payload.u64 = !!ret;
 		msg.size = sizeof(msg.payload.u64);
diff --git a/lib/librte_vhost/virtio_net.c b/lib/librte_vhost/virtio_net.c
index cdfb6f957..b75c93cf1 100644
--- a/lib/librte_vhost/virtio_net.c
+++ b/lib/librte_vhost/virtio_net.c
@@ -329,13 +329,23 @@  virtio_dev_rx(struct virtio_net *dev, uint16_t queue_id,
 	if (unlikely(vq->enabled == 0))
 		return 0;
 
+	if (dev->features & (1ULL << VIRTIO_F_IOMMU_PLATFORM))
+		vhost_user_iotlb_rd_lock(vq);
+
+	if (unlikely(vq->access_ok == 0)) {
+		if (unlikely(vring_translate(dev, vq) < 0)) {
+			count = 0;
+			goto out;
+		}
+	}
+
 	avail_idx = *((volatile uint16_t *)&vq->avail->idx);
 	start_idx = vq->last_used_idx;
 	free_entries = avail_idx - start_idx;
 	count = RTE_MIN(count, free_entries);
 	count = RTE_MIN(count, (uint32_t)MAX_PKT_BURST);
 	if (count == 0)
-		return 0;
+		goto out;
 
 	LOG_DEBUG(VHOST_DATA, "(%d) start_idx %d | end_idx %d\n",
 		dev->vid, start_idx, start_idx + count);
@@ -356,10 +366,6 @@  virtio_dev_rx(struct virtio_net *dev, uint16_t queue_id,
 	}
 
 	rte_prefetch0(&vq->desc[desc_indexes[0]]);
-
-	if (dev->features & (1ULL << VIRTIO_F_IOMMU_PLATFORM))
-		vhost_user_iotlb_rd_lock(vq);
-
 	for (i = 0; i < count; i++) {
 		uint16_t desc_idx = desc_indexes[i];
 		int err;
@@ -394,9 +400,6 @@  virtio_dev_rx(struct virtio_net *dev, uint16_t queue_id,
 
 	do_data_copy_enqueue(dev, vq);
 
-	if (dev->features & (1ULL << VIRTIO_F_IOMMU_PLATFORM))
-		vhost_user_iotlb_rd_unlock(vq);
-
 	rte_smp_wmb();
 
 	*(volatile uint16_t *)&vq->used->idx += count;
@@ -412,6 +415,10 @@  virtio_dev_rx(struct virtio_net *dev, uint16_t queue_id,
 	if (!(vq->avail->flags & VRING_AVAIL_F_NO_INTERRUPT)
 			&& (vq->callfd >= 0))
 		eventfd_write(vq->callfd, (eventfd_t)1);
+out:
+	if (dev->features & (1ULL << VIRTIO_F_IOMMU_PLATFORM))
+		vhost_user_iotlb_rd_unlock(vq);
+
 	return count;
 }
 
@@ -647,9 +654,16 @@  virtio_dev_merge_rx(struct virtio_net *dev, uint16_t queue_id,
 	if (unlikely(vq->enabled == 0))
 		return 0;
 
+	if (dev->features & (1ULL << VIRTIO_F_IOMMU_PLATFORM))
+		vhost_user_iotlb_rd_lock(vq);
+
+	if (unlikely(vq->access_ok == 0))
+		if (unlikely(vring_translate(dev, vq) < 0))
+			goto out;
+
 	count = RTE_MIN((uint32_t)MAX_PKT_BURST, count);
 	if (count == 0)
-		return 0;
+		goto out;
 
 	vq->batch_copy_nb_elems = 0;
 
@@ -657,10 +671,6 @@  virtio_dev_merge_rx(struct virtio_net *dev, uint16_t queue_id,
 
 	vq->shadow_used_idx = 0;
 	avail_head = *((volatile uint16_t *)&vq->avail->idx);
-
-	if (dev->features & (1ULL << VIRTIO_F_IOMMU_PLATFORM))
-		vhost_user_iotlb_rd_lock(vq);
-
 	for (pkt_idx = 0; pkt_idx < count; pkt_idx++) {
 		uint32_t pkt_len = pkts[pkt_idx]->pkt_len + dev->vhost_hlen;
 
@@ -689,9 +699,6 @@  virtio_dev_merge_rx(struct virtio_net *dev, uint16_t queue_id,
 
 	do_data_copy_enqueue(dev, vq);
 
-	if (dev->features & (1ULL << VIRTIO_F_IOMMU_PLATFORM))
-		vhost_user_iotlb_rd_unlock(vq);
-
 	if (likely(vq->shadow_used_idx)) {
 		flush_shadow_used_ring(dev, vq);
 
@@ -704,6 +711,10 @@  virtio_dev_merge_rx(struct virtio_net *dev, uint16_t queue_id,
 			eventfd_write(vq->callfd, (eventfd_t)1);
 	}
 
+out:
+	if (dev->features & (1ULL << VIRTIO_F_IOMMU_PLATFORM))
+		vhost_user_iotlb_rd_unlock(vq);
+
 	return pkt_idx;
 }
 
@@ -1173,6 +1184,13 @@  rte_vhost_dequeue_burst(int vid, uint16_t queue_id,
 
 	vq->batch_copy_nb_elems = 0;
 
+	if (dev->features & (1ULL << VIRTIO_F_IOMMU_PLATFORM))
+		vhost_user_iotlb_rd_lock(vq);
+
+	if (unlikely(vq->access_ok == 0))
+		if (unlikely(vring_translate(dev, vq) < 0))
+			goto out;
+
 	if (unlikely(dev->dequeue_zero_copy)) {
 		struct zcopy_mbuf *zmbuf, *next;
 		int nr_updated = 0;
@@ -1262,10 +1280,6 @@  rte_vhost_dequeue_burst(int vid, uint16_t queue_id,
 
 	/* Prefetch descriptor index. */
 	rte_prefetch0(&vq->desc[desc_indexes[0]]);
-
-	if (dev->features & (1ULL << VIRTIO_F_IOMMU_PLATFORM))
-		vhost_user_iotlb_rd_lock(vq);
-
 	for (i = 0; i < count; i++) {
 		struct vring_desc *desc;
 		uint16_t sz, idx;
@@ -1329,9 +1343,6 @@  rte_vhost_dequeue_burst(int vid, uint16_t queue_id,
 			TAILQ_INSERT_TAIL(&vq->zmbuf_list, zmbuf, next);
 		}
 	}
-	if (dev->features & (1ULL << VIRTIO_F_IOMMU_PLATFORM))
-		vhost_user_iotlb_rd_unlock(vq);
-
 	vq->last_avail_idx += i;
 
 	if (likely(dev->dequeue_zero_copy == 0)) {
@@ -1341,6 +1352,9 @@  rte_vhost_dequeue_burst(int vid, uint16_t queue_id,
 	}
 
 out:
+	if (dev->features & (1ULL << VIRTIO_F_IOMMU_PLATFORM))
+		vhost_user_iotlb_rd_unlock(vq);
+
 	if (unlikely(rarp_mbuf != NULL)) {
 		/*
 		 * Inject it to the head of "pkts" array, so that switch's mac