[dpdk-dev] [PATCH v3 17/19] vhost-user: iommu: postpone device creation until ring are mapped

Yao, Lei A lei.a.yao at intel.com
Thu Nov 2 08:21:45 CET 2017



> -----Original Message-----
> From: dev [mailto:dev-bounces at dpdk.org] On Behalf Of Maxime Coquelin
> Sent: Thursday, October 5, 2017 4:36 PM
> To: dev at dpdk.org; Horton, Remy <remy.horton at intel.com>; Bie, Tiwei
> <tiwei.bie at intel.com>; yliu at fridaylinux.org
> Cc: mst at redhat.com; jfreiman at redhat.com; vkaplans at redhat.com;
> jasowang at redhat.com; Maxime Coquelin <maxime.coquelin at redhat.com>
> Subject: [dpdk-dev] [PATCH v3 17/19] vhost-user: iommu: postpone device
> creation until ring are mapped
> 
> Translating the start addresses of the rings is not enough, we need to
> be sure all the ring is made available by the guest.
> 
> It depends on the size of the rings, which is not known on SET_VRING_ADDR
> reception. Furthermore, we need to be be safe against vring pages
> invalidates.
> 
> This patch introduces a new access_ok flag per virtqueue, which is set
> when all the rings are mapped, and cleared as soon as a page used by a
> ring is invalidated. The invalidation part is implemented in a following
> patch.
> 
> Signed-off-by: Maxime Coquelin <maxime.coquelin at redhat.com>
> ---
>  lib/librte_vhost/vhost.c      | 37 ++++++++++++++++++++++++++
>  lib/librte_vhost/vhost.h      |  2 ++
>  lib/librte_vhost/vhost_user.c | 62 +++++++++++++++++++++++++++++++--
> ----------
>  lib/librte_vhost/virtio_net.c | 60 +++++++++++++++++++++++++-------------
> ---
>  4 files changed, 121 insertions(+), 40 deletions(-)
> 
> diff --git a/lib/librte_vhost/vhost.c b/lib/librte_vhost/vhost.c
> index 0e2ad3322..ef54835a6 100644
> --- a/lib/librte_vhost/vhost.c
> +++ b/lib/librte_vhost/vhost.c
> @@ -135,6 +135,43 @@ free_device(struct virtio_net *dev)
>  	rte_free(dev);
>  }
> 
> +int
> +vring_translate(struct virtio_net *dev, struct vhost_virtqueue *vq)
> +{
> +	uint64_t size;
> +
> +	if (!(dev->features & (1ULL << VIRTIO_F_IOMMU_PLATFORM)))
> +		goto out;
> +
> +	size = sizeof(struct vring_desc) * vq->size;
> +	vq->desc = (struct vring_desc *)vhost_iova_to_vva(dev, vq,
> +						vq-
> >ring_addrs.desc_user_addr,
> +						size, VHOST_ACCESS_RW);
> +	if (!vq->desc)
> +		return -1;
> +
> +	size = sizeof(struct vring_avail);
> +	size += sizeof(uint16_t) * vq->size;
> +	vq->avail = (struct vring_avail *)vhost_iova_to_vva(dev, vq,
> +						vq-
> >ring_addrs.avail_user_addr,
> +						size, VHOST_ACCESS_RW);
> +	if (!vq->avail)
> +		return -1;
> +
> +	size = sizeof(struct vring_used);
> +	size += sizeof(struct vring_used_elem) * vq->size;
> +	vq->used = (struct vring_used *)vhost_iova_to_vva(dev, vq,
> +						vq-
> >ring_addrs.used_user_addr,
> +						size, VHOST_ACCESS_RW);
> +	if (!vq->used)
> +		return -1;
> +
> +out:
> +	vq->access_ok = 1;
> +
> +	return 0;
> +}
> +
>  static void
>  init_vring_queue(struct virtio_net *dev, uint32_t vring_idx)
>  {
> diff --git a/lib/librte_vhost/vhost.h b/lib/librte_vhost/vhost.h
> index 903da5db5..b3fe6bb8e 100644
> --- a/lib/librte_vhost/vhost.h
> +++ b/lib/librte_vhost/vhost.h
> @@ -113,6 +113,7 @@ struct vhost_virtqueue {
>  	/* Currently unused as polling mode is enabled */
>  	int			kickfd;
>  	int			enabled;
> +	int			access_ok;
> 
>  	/* Physical address of used ring, for logging */
>  	uint64_t		log_guest_addr;
> @@ -378,6 +379,7 @@ void vhost_backend_cleanup(struct virtio_net *dev);
> 
>  uint64_t __vhost_iova_to_vva(struct virtio_net *dev, struct
> vhost_virtqueue *vq,
>  			uint64_t iova, uint64_t size, uint8_t perm);
> +int vring_translate(struct virtio_net *dev, struct vhost_virtqueue *vq);
> 
>  static __rte_always_inline uint64_t
>  vhost_iova_to_vva(struct virtio_net *dev, struct vhost_virtqueue *vq,
> diff --git a/lib/librte_vhost/vhost_user.c b/lib/librte_vhost/vhost_user.c
> index 90b209764..dd6562fd8 100644
> --- a/lib/librte_vhost/vhost_user.c
> +++ b/lib/librte_vhost/vhost_user.c
> @@ -391,6 +391,12 @@ vhost_user_set_vring_addr(struct virtio_net *dev,
> VhostUserMsg *msg)
>  	 */
>  	memcpy(&vq->ring_addrs, addr, sizeof(*addr));
> 
> +	vq->desc = NULL;
> +	vq->avail = NULL;
> +	vq->used = NULL;
> +
> +	vq->access_ok = 0;
> +
>  	return 0;
>  }
> 
> @@ -407,10 +413,10 @@ static struct virtio_net
> *translate_ring_addresses(struct virtio_net *dev,
>  	vq->desc = (struct vring_desc *)(uintptr_t)ring_addr_to_vva(dev,
>  			vq, addr->desc_user_addr, sizeof(struct vring_desc));
>  	if (vq->desc == 0) {
> -		RTE_LOG(ERR, VHOST_CONFIG,
> +		RTE_LOG(DEBUG, VHOST_CONFIG,
>  			"(%d) failed to find desc ring address.\n",
>  			dev->vid);
> -		return NULL;
> +		return dev;
>  	}
> 
>  	dev = numa_realloc(dev, vq_index);
> @@ -419,19 +425,19 @@ static struct virtio_net
> *translate_ring_addresses(struct virtio_net *dev,
>  	vq->avail = (struct vring_avail *)(uintptr_t)ring_addr_to_vva(dev,
>  			vq, addr->avail_user_addr, sizeof(struct vring_avail));
>  	if (vq->avail == 0) {
> -		RTE_LOG(ERR, VHOST_CONFIG,
> +		RTE_LOG(DEBUG, VHOST_CONFIG,
>  			"(%d) failed to find avail ring address.\n",
>  			dev->vid);
> -		return NULL;
> +		return dev;
>  	}
> 
>  	vq->used = (struct vring_used *)(uintptr_t)ring_addr_to_vva(dev,
>  			vq, addr->used_user_addr, sizeof(struct
> vring_used));
>  	if (vq->used == 0) {
> -		RTE_LOG(ERR, VHOST_CONFIG,
> +		RTE_LOG(DEBUG, VHOST_CONFIG,
>  			"(%d) failed to find used ring address.\n",
>  			dev->vid);
> -		return NULL;
> +		return dev;
>  	}
> 
>  	if (vq->last_used_idx != vq->used->idx) {
> @@ -677,7 +683,7 @@ vhost_user_set_mem_table(struct virtio_net *dev,
> struct VhostUserMsg *pmsg)
>  static int
>  vq_is_ready(struct vhost_virtqueue *vq)
>  {
> -	return vq && vq->desc   &&
> +	return vq && vq->desc && vq->avail && vq->used &&
>  	       vq->kickfd != VIRTIO_UNINITIALIZED_EVENTFD &&
>  	       vq->callfd != VIRTIO_UNINITIALIZED_EVENTFD;
>  }
> @@ -986,8 +992,29 @@ vhost_user_set_req_fd(struct virtio_net *dev,
> struct VhostUserMsg *msg)
>  }
> 
>  static int
> -vhost_user_iotlb_msg(struct virtio_net *dev, struct VhostUserMsg *msg)
> +is_vring_iotlb_update(struct vhost_virtqueue *vq, struct vhost_iotlb_msg
> *imsg)
>  {
> +	struct vhost_vring_addr *ra;
> +	uint64_t start, end;
> +
> +	start = imsg->iova;
> +	end = start + imsg->size;
> +
> +	ra = &vq->ring_addrs;
> +	if (ra->desc_user_addr >= start && ra->desc_user_addr < end)
> +		return 1;
> +	if (ra->avail_user_addr >= start && ra->avail_user_addr < end)
> +		return 1;
> +	if (ra->used_user_addr >= start && ra->used_user_addr < end)
> +		return 1;
> +
> +	return 0;
> +}
> +
> +static int
> +vhost_user_iotlb_msg(struct virtio_net **pdev, struct VhostUserMsg *msg)
> +{
> +	struct virtio_net *dev = *pdev;
>  	struct vhost_iotlb_msg *imsg = &msg->payload.iotlb;
>  	uint16_t i;
>  	uint64_t vva;
> @@ -1003,6 +1030,9 @@ vhost_user_iotlb_msg(struct virtio_net *dev,
> struct VhostUserMsg *msg)
> 
>  			vhost_user_iotlb_cache_insert(vq, imsg->iova, vva,
>  					imsg->size, imsg->perm);
> +
> +			if (is_vring_iotlb_update(vq, imsg))
> +				*pdev = dev = translate_ring_addresses(dev,
> i);
>  		}
>  		break;
>  	case VHOST_IOTLB_INVALIDATE:
> @@ -1151,8 +1181,12 @@ vhost_user_msg_handler(int vid, int fd)
>  	}
> 
>  	ret = 0;
> -	RTE_LOG(INFO, VHOST_CONFIG, "read message %s\n",
> -		vhost_message_str[msg.request]);
> +	if (msg.request != VHOST_USER_IOTLB_MSG)
> +		RTE_LOG(INFO, VHOST_CONFIG, "read message %s\n",
> +			vhost_message_str[msg.request]);
> +	else
> +		RTE_LOG(DEBUG, VHOST_CONFIG, "read message %s\n",
> +			vhost_message_str[msg.request]);
> 
>  	ret = vhost_user_check_and_alloc_queue_pair(dev, &msg);
>  	if (ret < 0) {
> @@ -1254,7 +1288,7 @@ vhost_user_msg_handler(int vid, int fd)
>  		break;
> 
>  	case VHOST_USER_IOTLB_MSG:
> -		ret = vhost_user_iotlb_msg(dev, &msg);
> +		ret = vhost_user_iotlb_msg(&dev, &msg);
>  		break;
> 
>  	default:
> @@ -1263,12 +1297,6 @@ vhost_user_msg_handler(int vid, int fd)
> 
>  	}
> 
> -	/*
> -	 * The virtio_net struct might have been reallocated on a different
> -	 * NUMA node, so dev pointer might no more be valid.
> -	 */
> -	dev = get_device(vid);
> -
>  	if (msg.flags & VHOST_USER_NEED_REPLY) {
>  		msg.payload.u64 = !!ret;
>  		msg.size = sizeof(msg.payload.u64);
> diff --git a/lib/librte_vhost/virtio_net.c b/lib/librte_vhost/virtio_net.c
> index cdfb6f957..b75c93cf1 100644
> --- a/lib/librte_vhost/virtio_net.c
> +++ b/lib/librte_vhost/virtio_net.c
> @@ -329,13 +329,23 @@ virtio_dev_rx(struct virtio_net *dev, uint16_t
> queue_id,
>  	if (unlikely(vq->enabled == 0))
>  		return 0;
> 
> +	if (dev->features & (1ULL << VIRTIO_F_IOMMU_PLATFORM))
> +		vhost_user_iotlb_rd_lock(vq);
> +
> +	if (unlikely(vq->access_ok == 0)) {
> +		if (unlikely(vring_translate(dev, vq) < 0)) {
> +			count = 0;
> +			goto out;
> +		}
> +	}
> +
>  	avail_idx = *((volatile uint16_t *)&vq->avail->idx);
>  	start_idx = vq->last_used_idx;
>  	free_entries = avail_idx - start_idx;
>  	count = RTE_MIN(count, free_entries);
>  	count = RTE_MIN(count, (uint32_t)MAX_PKT_BURST);
>  	if (count == 0)
> -		return 0;
> +		goto out;
> 
>  	LOG_DEBUG(VHOST_DATA, "(%d) start_idx %d | end_idx %d\n",
>  		dev->vid, start_idx, start_idx + count);
> @@ -356,10 +366,6 @@ virtio_dev_rx(struct virtio_net *dev, uint16_t
> queue_id,
>  	}
> 
>  	rte_prefetch0(&vq->desc[desc_indexes[0]]);
> -
> -	if (dev->features & (1ULL << VIRTIO_F_IOMMU_PLATFORM))
> -		vhost_user_iotlb_rd_lock(vq);
> -
>  	for (i = 0; i < count; i++) {
>  		uint16_t desc_idx = desc_indexes[i];
>  		int err;
> @@ -394,9 +400,6 @@ virtio_dev_rx(struct virtio_net *dev, uint16_t
> queue_id,
> 
>  	do_data_copy_enqueue(dev, vq);
> 
> -	if (dev->features & (1ULL << VIRTIO_F_IOMMU_PLATFORM))
> -		vhost_user_iotlb_rd_unlock(vq);
> -
>  	rte_smp_wmb();
> 
>  	*(volatile uint16_t *)&vq->used->idx += count;
> @@ -412,6 +415,10 @@ virtio_dev_rx(struct virtio_net *dev, uint16_t
> queue_id,
>  	if (!(vq->avail->flags & VRING_AVAIL_F_NO_INTERRUPT)
>  			&& (vq->callfd >= 0))
>  		eventfd_write(vq->callfd, (eventfd_t)1);
> +out:
> +	if (dev->features & (1ULL << VIRTIO_F_IOMMU_PLATFORM))
> +		vhost_user_iotlb_rd_unlock(vq);
> +
>  	return count;
>  }
> 
> @@ -647,9 +654,16 @@ virtio_dev_merge_rx(struct virtio_net *dev,
> uint16_t queue_id,
>  	if (unlikely(vq->enabled == 0))
>  		return 0;
> 
> +	if (dev->features & (1ULL << VIRTIO_F_IOMMU_PLATFORM))
> +		vhost_user_iotlb_rd_lock(vq);
> +
> +	if (unlikely(vq->access_ok == 0))
> +		if (unlikely(vring_translate(dev, vq) < 0))
> +			goto out;
> +
>  	count = RTE_MIN((uint32_t)MAX_PKT_BURST, count);
>  	if (count == 0)
> -		return 0;
> +		goto out;
> 
>  	vq->batch_copy_nb_elems = 0;
> 
> @@ -657,10 +671,6 @@ virtio_dev_merge_rx(struct virtio_net *dev,
> uint16_t queue_id,
> 
>  	vq->shadow_used_idx = 0;
>  	avail_head = *((volatile uint16_t *)&vq->avail->idx);
> -
> -	if (dev->features & (1ULL << VIRTIO_F_IOMMU_PLATFORM))
> -		vhost_user_iotlb_rd_lock(vq);
> -
>  	for (pkt_idx = 0; pkt_idx < count; pkt_idx++) {
>  		uint32_t pkt_len = pkts[pkt_idx]->pkt_len + dev-
> >vhost_hlen;
> 
> @@ -689,9 +699,6 @@ virtio_dev_merge_rx(struct virtio_net *dev, uint16_t
> queue_id,
> 
>  	do_data_copy_enqueue(dev, vq);
> 
> -	if (dev->features & (1ULL << VIRTIO_F_IOMMU_PLATFORM))
> -		vhost_user_iotlb_rd_unlock(vq);
> -
>  	if (likely(vq->shadow_used_idx)) {
>  		flush_shadow_used_ring(dev, vq);
> 
> @@ -704,6 +711,10 @@ virtio_dev_merge_rx(struct virtio_net *dev,
> uint16_t queue_id,
>  			eventfd_write(vq->callfd, (eventfd_t)1);
>  	}
> 
> +out:
> +	if (dev->features & (1ULL << VIRTIO_F_IOMMU_PLATFORM))
> +		vhost_user_iotlb_rd_unlock(vq);
> +
>  	return pkt_idx;
>  }
> 
> @@ -1173,6 +1184,13 @@ rte_vhost_dequeue_burst(int vid, uint16_t
> queue_id,
> 
>  	vq->batch_copy_nb_elems = 0;
> 
> +	if (dev->features & (1ULL << VIRTIO_F_IOMMU_PLATFORM))
> +		vhost_user_iotlb_rd_lock(vq);
> +
> +	if (unlikely(vq->access_ok == 0))
> +		if (unlikely(vring_translate(dev, vq) < 0))
> +			goto out;
> +
>  	if (unlikely(dev->dequeue_zero_copy)) {
>  		struct zcopy_mbuf *zmbuf, *next;
>  		int nr_updated = 0;
> @@ -1262,10 +1280,6 @@ rte_vhost_dequeue_burst(int vid, uint16_t
> queue_id,
> 
>  	/* Prefetch descriptor index. */
>  	rte_prefetch0(&vq->desc[desc_indexes[0]]);
> -
> -	if (dev->features & (1ULL << VIRTIO_F_IOMMU_PLATFORM))
> -		vhost_user_iotlb_rd_lock(vq);
> -
>  	for (i = 0; i < count; i++) {
>  		struct vring_desc *desc;
>  		uint16_t sz, idx;
> @@ -1329,9 +1343,6 @@ rte_vhost_dequeue_burst(int vid, uint16_t
> queue_id,
>  			TAILQ_INSERT_TAIL(&vq->zmbuf_list, zmbuf, next);
>  		}
>  	}
> -	if (dev->features & (1ULL << VIRTIO_F_IOMMU_PLATFORM))
> -		vhost_user_iotlb_rd_unlock(vq);
> -
>  	vq->last_avail_idx += i;
> 
>  	if (likely(dev->dequeue_zero_copy == 0)) {
> @@ -1341,6 +1352,9 @@ rte_vhost_dequeue_burst(int vid, uint16_t
> queue_id,
>  	}
> 
>  out:
> +	if (dev->features & (1ULL << VIRTIO_F_IOMMU_PLATFORM))
> +		vhost_user_iotlb_rd_unlock(vq);
> +
>  	if (unlikely(rarp_mbuf != NULL)) {
>  		/*
>  		 * Inject it to the head of "pkts" array, so that switch's mac
> --
> 2.13.6
Hi, Maxime

I met one issue with your patch set during the v17.11 test.
The test scenario is following, 
1.	Bind one NIC, use test-pmd set vhost-user with 2 queue
usertools/dpdk-devbind.py --bind=igb_uio 0000:05:00.0
./x86_64-native-linuxapp-gcc/app/testpmd -c 0xe -n 4 --socket-mem 1024,1024 \
--vdev 'net_vhost0,iface=vhost-net,queues=2' - -i --rxq=2 --txq=2 --nb-cores=2 --rss-ip
2.	Launch qemu with  virtio device which has 2 queue 
3.	In VM, launch testpmd with virtio-pmd using only 1 queue.
x86_64-native-linuxapp-gcc/app/testpmd -c 0x07 -n 3 - -i --txqflags=0xf01 \
--rxq=1 --txq=1 --rss-ip --nb-cores=1

First, 
commit 09927b5249694bad1c094d3068124673722e6b8f
vhost: translate ring addresses when IOMMU enabled
The patch causes no traffic in PVP test. but link status is still up in vhost-user.

Second, 
eefac9536a901a1f0bb52aa3b6fec8f375f09190 
vhost: postpone device creation until rings are mapped
The patch causes link status "down" in vhost-user.

Could you have a check at your side? Thanks.

BRs
Lei



More information about the dev mailing list