[dpdk-dev] [PATCH v3 17/19] vhost-user: iommu: postpone device creation until ring are mapped
Yao, Lei A
lei.a.yao at intel.com
Thu Nov 2 08:21:45 CET 2017
> -----Original Message-----
> From: dev [mailto:dev-bounces at dpdk.org] On Behalf Of Maxime Coquelin
> Sent: Thursday, October 5, 2017 4:36 PM
> To: dev at dpdk.org; Horton, Remy <remy.horton at intel.com>; Bie, Tiwei
> <tiwei.bie at intel.com>; yliu at fridaylinux.org
> Cc: mst at redhat.com; jfreiman at redhat.com; vkaplans at redhat.com;
> jasowang at redhat.com; Maxime Coquelin <maxime.coquelin at redhat.com>
> Subject: [dpdk-dev] [PATCH v3 17/19] vhost-user: iommu: postpone device
> creation until ring are mapped
>
> Translating the start addresses of the rings is not enough, we need to
> be sure all the ring is made available by the guest.
>
> It depends on the size of the rings, which is not known on SET_VRING_ADDR
> reception. Furthermore, we need to be be safe against vring pages
> invalidates.
>
> This patch introduces a new access_ok flag per virtqueue, which is set
> when all the rings are mapped, and cleared as soon as a page used by a
> ring is invalidated. The invalidation part is implemented in a following
> patch.
>
> Signed-off-by: Maxime Coquelin <maxime.coquelin at redhat.com>
> ---
> lib/librte_vhost/vhost.c | 37 ++++++++++++++++++++++++++
> lib/librte_vhost/vhost.h | 2 ++
> lib/librte_vhost/vhost_user.c | 62 +++++++++++++++++++++++++++++++--
> ----------
> lib/librte_vhost/virtio_net.c | 60 +++++++++++++++++++++++++-------------
> ---
> 4 files changed, 121 insertions(+), 40 deletions(-)
>
> diff --git a/lib/librte_vhost/vhost.c b/lib/librte_vhost/vhost.c
> index 0e2ad3322..ef54835a6 100644
> --- a/lib/librte_vhost/vhost.c
> +++ b/lib/librte_vhost/vhost.c
> @@ -135,6 +135,43 @@ free_device(struct virtio_net *dev)
> rte_free(dev);
> }
>
> +int
> +vring_translate(struct virtio_net *dev, struct vhost_virtqueue *vq)
> +{
> + uint64_t size;
> +
> + if (!(dev->features & (1ULL << VIRTIO_F_IOMMU_PLATFORM)))
> + goto out;
> +
> + size = sizeof(struct vring_desc) * vq->size;
> + vq->desc = (struct vring_desc *)vhost_iova_to_vva(dev, vq,
> + vq-
> >ring_addrs.desc_user_addr,
> + size, VHOST_ACCESS_RW);
> + if (!vq->desc)
> + return -1;
> +
> + size = sizeof(struct vring_avail);
> + size += sizeof(uint16_t) * vq->size;
> + vq->avail = (struct vring_avail *)vhost_iova_to_vva(dev, vq,
> + vq-
> >ring_addrs.avail_user_addr,
> + size, VHOST_ACCESS_RW);
> + if (!vq->avail)
> + return -1;
> +
> + size = sizeof(struct vring_used);
> + size += sizeof(struct vring_used_elem) * vq->size;
> + vq->used = (struct vring_used *)vhost_iova_to_vva(dev, vq,
> + vq-
> >ring_addrs.used_user_addr,
> + size, VHOST_ACCESS_RW);
> + if (!vq->used)
> + return -1;
> +
> +out:
> + vq->access_ok = 1;
> +
> + return 0;
> +}
> +
> static void
> init_vring_queue(struct virtio_net *dev, uint32_t vring_idx)
> {
> diff --git a/lib/librte_vhost/vhost.h b/lib/librte_vhost/vhost.h
> index 903da5db5..b3fe6bb8e 100644
> --- a/lib/librte_vhost/vhost.h
> +++ b/lib/librte_vhost/vhost.h
> @@ -113,6 +113,7 @@ struct vhost_virtqueue {
> /* Currently unused as polling mode is enabled */
> int kickfd;
> int enabled;
> + int access_ok;
>
> /* Physical address of used ring, for logging */
> uint64_t log_guest_addr;
> @@ -378,6 +379,7 @@ void vhost_backend_cleanup(struct virtio_net *dev);
>
> uint64_t __vhost_iova_to_vva(struct virtio_net *dev, struct
> vhost_virtqueue *vq,
> uint64_t iova, uint64_t size, uint8_t perm);
> +int vring_translate(struct virtio_net *dev, struct vhost_virtqueue *vq);
>
> static __rte_always_inline uint64_t
> vhost_iova_to_vva(struct virtio_net *dev, struct vhost_virtqueue *vq,
> diff --git a/lib/librte_vhost/vhost_user.c b/lib/librte_vhost/vhost_user.c
> index 90b209764..dd6562fd8 100644
> --- a/lib/librte_vhost/vhost_user.c
> +++ b/lib/librte_vhost/vhost_user.c
> @@ -391,6 +391,12 @@ vhost_user_set_vring_addr(struct virtio_net *dev,
> VhostUserMsg *msg)
> */
> memcpy(&vq->ring_addrs, addr, sizeof(*addr));
>
> + vq->desc = NULL;
> + vq->avail = NULL;
> + vq->used = NULL;
> +
> + vq->access_ok = 0;
> +
> return 0;
> }
>
> @@ -407,10 +413,10 @@ static struct virtio_net
> *translate_ring_addresses(struct virtio_net *dev,
> vq->desc = (struct vring_desc *)(uintptr_t)ring_addr_to_vva(dev,
> vq, addr->desc_user_addr, sizeof(struct vring_desc));
> if (vq->desc == 0) {
> - RTE_LOG(ERR, VHOST_CONFIG,
> + RTE_LOG(DEBUG, VHOST_CONFIG,
> "(%d) failed to find desc ring address.\n",
> dev->vid);
> - return NULL;
> + return dev;
> }
>
> dev = numa_realloc(dev, vq_index);
> @@ -419,19 +425,19 @@ static struct virtio_net
> *translate_ring_addresses(struct virtio_net *dev,
> vq->avail = (struct vring_avail *)(uintptr_t)ring_addr_to_vva(dev,
> vq, addr->avail_user_addr, sizeof(struct vring_avail));
> if (vq->avail == 0) {
> - RTE_LOG(ERR, VHOST_CONFIG,
> + RTE_LOG(DEBUG, VHOST_CONFIG,
> "(%d) failed to find avail ring address.\n",
> dev->vid);
> - return NULL;
> + return dev;
> }
>
> vq->used = (struct vring_used *)(uintptr_t)ring_addr_to_vva(dev,
> vq, addr->used_user_addr, sizeof(struct
> vring_used));
> if (vq->used == 0) {
> - RTE_LOG(ERR, VHOST_CONFIG,
> + RTE_LOG(DEBUG, VHOST_CONFIG,
> "(%d) failed to find used ring address.\n",
> dev->vid);
> - return NULL;
> + return dev;
> }
>
> if (vq->last_used_idx != vq->used->idx) {
> @@ -677,7 +683,7 @@ vhost_user_set_mem_table(struct virtio_net *dev,
> struct VhostUserMsg *pmsg)
> static int
> vq_is_ready(struct vhost_virtqueue *vq)
> {
> - return vq && vq->desc &&
> + return vq && vq->desc && vq->avail && vq->used &&
> vq->kickfd != VIRTIO_UNINITIALIZED_EVENTFD &&
> vq->callfd != VIRTIO_UNINITIALIZED_EVENTFD;
> }
> @@ -986,8 +992,29 @@ vhost_user_set_req_fd(struct virtio_net *dev,
> struct VhostUserMsg *msg)
> }
>
> static int
> -vhost_user_iotlb_msg(struct virtio_net *dev, struct VhostUserMsg *msg)
> +is_vring_iotlb_update(struct vhost_virtqueue *vq, struct vhost_iotlb_msg
> *imsg)
> {
> + struct vhost_vring_addr *ra;
> + uint64_t start, end;
> +
> + start = imsg->iova;
> + end = start + imsg->size;
> +
> + ra = &vq->ring_addrs;
> + if (ra->desc_user_addr >= start && ra->desc_user_addr < end)
> + return 1;
> + if (ra->avail_user_addr >= start && ra->avail_user_addr < end)
> + return 1;
> + if (ra->used_user_addr >= start && ra->used_user_addr < end)
> + return 1;
> +
> + return 0;
> +}
> +
> +static int
> +vhost_user_iotlb_msg(struct virtio_net **pdev, struct VhostUserMsg *msg)
> +{
> + struct virtio_net *dev = *pdev;
> struct vhost_iotlb_msg *imsg = &msg->payload.iotlb;
> uint16_t i;
> uint64_t vva;
> @@ -1003,6 +1030,9 @@ vhost_user_iotlb_msg(struct virtio_net *dev,
> struct VhostUserMsg *msg)
>
> vhost_user_iotlb_cache_insert(vq, imsg->iova, vva,
> imsg->size, imsg->perm);
> +
> + if (is_vring_iotlb_update(vq, imsg))
> + *pdev = dev = translate_ring_addresses(dev,
> i);
> }
> break;
> case VHOST_IOTLB_INVALIDATE:
> @@ -1151,8 +1181,12 @@ vhost_user_msg_handler(int vid, int fd)
> }
>
> ret = 0;
> - RTE_LOG(INFO, VHOST_CONFIG, "read message %s\n",
> - vhost_message_str[msg.request]);
> + if (msg.request != VHOST_USER_IOTLB_MSG)
> + RTE_LOG(INFO, VHOST_CONFIG, "read message %s\n",
> + vhost_message_str[msg.request]);
> + else
> + RTE_LOG(DEBUG, VHOST_CONFIG, "read message %s\n",
> + vhost_message_str[msg.request]);
>
> ret = vhost_user_check_and_alloc_queue_pair(dev, &msg);
> if (ret < 0) {
> @@ -1254,7 +1288,7 @@ vhost_user_msg_handler(int vid, int fd)
> break;
>
> case VHOST_USER_IOTLB_MSG:
> - ret = vhost_user_iotlb_msg(dev, &msg);
> + ret = vhost_user_iotlb_msg(&dev, &msg);
> break;
>
> default:
> @@ -1263,12 +1297,6 @@ vhost_user_msg_handler(int vid, int fd)
>
> }
>
> - /*
> - * The virtio_net struct might have been reallocated on a different
> - * NUMA node, so dev pointer might no more be valid.
> - */
> - dev = get_device(vid);
> -
> if (msg.flags & VHOST_USER_NEED_REPLY) {
> msg.payload.u64 = !!ret;
> msg.size = sizeof(msg.payload.u64);
> diff --git a/lib/librte_vhost/virtio_net.c b/lib/librte_vhost/virtio_net.c
> index cdfb6f957..b75c93cf1 100644
> --- a/lib/librte_vhost/virtio_net.c
> +++ b/lib/librte_vhost/virtio_net.c
> @@ -329,13 +329,23 @@ virtio_dev_rx(struct virtio_net *dev, uint16_t
> queue_id,
> if (unlikely(vq->enabled == 0))
> return 0;
>
> + if (dev->features & (1ULL << VIRTIO_F_IOMMU_PLATFORM))
> + vhost_user_iotlb_rd_lock(vq);
> +
> + if (unlikely(vq->access_ok == 0)) {
> + if (unlikely(vring_translate(dev, vq) < 0)) {
> + count = 0;
> + goto out;
> + }
> + }
> +
> avail_idx = *((volatile uint16_t *)&vq->avail->idx);
> start_idx = vq->last_used_idx;
> free_entries = avail_idx - start_idx;
> count = RTE_MIN(count, free_entries);
> count = RTE_MIN(count, (uint32_t)MAX_PKT_BURST);
> if (count == 0)
> - return 0;
> + goto out;
>
> LOG_DEBUG(VHOST_DATA, "(%d) start_idx %d | end_idx %d\n",
> dev->vid, start_idx, start_idx + count);
> @@ -356,10 +366,6 @@ virtio_dev_rx(struct virtio_net *dev, uint16_t
> queue_id,
> }
>
> rte_prefetch0(&vq->desc[desc_indexes[0]]);
> -
> - if (dev->features & (1ULL << VIRTIO_F_IOMMU_PLATFORM))
> - vhost_user_iotlb_rd_lock(vq);
> -
> for (i = 0; i < count; i++) {
> uint16_t desc_idx = desc_indexes[i];
> int err;
> @@ -394,9 +400,6 @@ virtio_dev_rx(struct virtio_net *dev, uint16_t
> queue_id,
>
> do_data_copy_enqueue(dev, vq);
>
> - if (dev->features & (1ULL << VIRTIO_F_IOMMU_PLATFORM))
> - vhost_user_iotlb_rd_unlock(vq);
> -
> rte_smp_wmb();
>
> *(volatile uint16_t *)&vq->used->idx += count;
> @@ -412,6 +415,10 @@ virtio_dev_rx(struct virtio_net *dev, uint16_t
> queue_id,
> if (!(vq->avail->flags & VRING_AVAIL_F_NO_INTERRUPT)
> && (vq->callfd >= 0))
> eventfd_write(vq->callfd, (eventfd_t)1);
> +out:
> + if (dev->features & (1ULL << VIRTIO_F_IOMMU_PLATFORM))
> + vhost_user_iotlb_rd_unlock(vq);
> +
> return count;
> }
>
> @@ -647,9 +654,16 @@ virtio_dev_merge_rx(struct virtio_net *dev,
> uint16_t queue_id,
> if (unlikely(vq->enabled == 0))
> return 0;
>
> + if (dev->features & (1ULL << VIRTIO_F_IOMMU_PLATFORM))
> + vhost_user_iotlb_rd_lock(vq);
> +
> + if (unlikely(vq->access_ok == 0))
> + if (unlikely(vring_translate(dev, vq) < 0))
> + goto out;
> +
> count = RTE_MIN((uint32_t)MAX_PKT_BURST, count);
> if (count == 0)
> - return 0;
> + goto out;
>
> vq->batch_copy_nb_elems = 0;
>
> @@ -657,10 +671,6 @@ virtio_dev_merge_rx(struct virtio_net *dev,
> uint16_t queue_id,
>
> vq->shadow_used_idx = 0;
> avail_head = *((volatile uint16_t *)&vq->avail->idx);
> -
> - if (dev->features & (1ULL << VIRTIO_F_IOMMU_PLATFORM))
> - vhost_user_iotlb_rd_lock(vq);
> -
> for (pkt_idx = 0; pkt_idx < count; pkt_idx++) {
> uint32_t pkt_len = pkts[pkt_idx]->pkt_len + dev-
> >vhost_hlen;
>
> @@ -689,9 +699,6 @@ virtio_dev_merge_rx(struct virtio_net *dev, uint16_t
> queue_id,
>
> do_data_copy_enqueue(dev, vq);
>
> - if (dev->features & (1ULL << VIRTIO_F_IOMMU_PLATFORM))
> - vhost_user_iotlb_rd_unlock(vq);
> -
> if (likely(vq->shadow_used_idx)) {
> flush_shadow_used_ring(dev, vq);
>
> @@ -704,6 +711,10 @@ virtio_dev_merge_rx(struct virtio_net *dev,
> uint16_t queue_id,
> eventfd_write(vq->callfd, (eventfd_t)1);
> }
>
> +out:
> + if (dev->features & (1ULL << VIRTIO_F_IOMMU_PLATFORM))
> + vhost_user_iotlb_rd_unlock(vq);
> +
> return pkt_idx;
> }
>
> @@ -1173,6 +1184,13 @@ rte_vhost_dequeue_burst(int vid, uint16_t
> queue_id,
>
> vq->batch_copy_nb_elems = 0;
>
> + if (dev->features & (1ULL << VIRTIO_F_IOMMU_PLATFORM))
> + vhost_user_iotlb_rd_lock(vq);
> +
> + if (unlikely(vq->access_ok == 0))
> + if (unlikely(vring_translate(dev, vq) < 0))
> + goto out;
> +
> if (unlikely(dev->dequeue_zero_copy)) {
> struct zcopy_mbuf *zmbuf, *next;
> int nr_updated = 0;
> @@ -1262,10 +1280,6 @@ rte_vhost_dequeue_burst(int vid, uint16_t
> queue_id,
>
> /* Prefetch descriptor index. */
> rte_prefetch0(&vq->desc[desc_indexes[0]]);
> -
> - if (dev->features & (1ULL << VIRTIO_F_IOMMU_PLATFORM))
> - vhost_user_iotlb_rd_lock(vq);
> -
> for (i = 0; i < count; i++) {
> struct vring_desc *desc;
> uint16_t sz, idx;
> @@ -1329,9 +1343,6 @@ rte_vhost_dequeue_burst(int vid, uint16_t
> queue_id,
> TAILQ_INSERT_TAIL(&vq->zmbuf_list, zmbuf, next);
> }
> }
> - if (dev->features & (1ULL << VIRTIO_F_IOMMU_PLATFORM))
> - vhost_user_iotlb_rd_unlock(vq);
> -
> vq->last_avail_idx += i;
>
> if (likely(dev->dequeue_zero_copy == 0)) {
> @@ -1341,6 +1352,9 @@ rte_vhost_dequeue_burst(int vid, uint16_t
> queue_id,
> }
>
> out:
> + if (dev->features & (1ULL << VIRTIO_F_IOMMU_PLATFORM))
> + vhost_user_iotlb_rd_unlock(vq);
> +
> if (unlikely(rarp_mbuf != NULL)) {
> /*
> * Inject it to the head of "pkts" array, so that switch's mac
> --
> 2.13.6
Hi, Maxime
I met one issue with your patch set during the v17.11 test.
The test scenario is following,
1. Bind one NIC, use test-pmd set vhost-user with 2 queue
usertools/dpdk-devbind.py --bind=igb_uio 0000:05:00.0
./x86_64-native-linuxapp-gcc/app/testpmd -c 0xe -n 4 --socket-mem 1024,1024 \
--vdev 'net_vhost0,iface=vhost-net,queues=2' - -i --rxq=2 --txq=2 --nb-cores=2 --rss-ip
2. Launch qemu with virtio device which has 2 queue
3. In VM, launch testpmd with virtio-pmd using only 1 queue.
x86_64-native-linuxapp-gcc/app/testpmd -c 0x07 -n 3 - -i --txqflags=0xf01 \
--rxq=1 --txq=1 --rss-ip --nb-cores=1
First,
commit 09927b5249694bad1c094d3068124673722e6b8f
vhost: translate ring addresses when IOMMU enabled
The patch causes no traffic in PVP test. but link status is still up in vhost-user.
Second,
eefac9536a901a1f0bb52aa3b6fec8f375f09190
vhost: postpone device creation until rings are mapped
The patch causes link status "down" in vhost-user.
Could you have a check at your side? Thanks.
BRs
Lei
More information about the dev
mailing list