[dpdk-dev] [PATCH 3/3] vhost: support VFIO based accelerator

Tiwei Bie tiwei.bie at intel.com
Tue Mar 6 11:43:27 CET 2018


This commit adds the VFIO based accelerator support to
vhost. A new API is provided to support asking QEMU to
do further setup to allow notifications and interrupts
being delivered directly between the driver in guest
and the vDPA device in host.

Signed-off-by: Tiwei Bie <tiwei.bie at intel.com>
---
 lib/librte_vhost/rte_vhost.h           |  28 ++++++
 lib/librte_vhost/rte_vhost_version.map |   1 +
 lib/librte_vhost/vhost_user.c          | 166 +++++++++++++++++++++++++++++++++
 lib/librte_vhost/vhost_user.h          |   9 ++
 4 files changed, 204 insertions(+)

diff --git a/lib/librte_vhost/rte_vhost.h b/lib/librte_vhost/rte_vhost.h
index d5589c543..68842e908 100644
--- a/lib/librte_vhost/rte_vhost.h
+++ b/lib/librte_vhost/rte_vhost.h
@@ -35,6 +35,7 @@ extern "C" {
 #define RTE_VHOST_USER_PROTOCOL_F_REPLY_ACK	3
 #define RTE_VHOST_USER_PROTOCOL_F_NET_MTU	4
 #define RTE_VHOST_USER_PROTOCOL_F_SLAVE_REQ	5
+#define RTE_VHOST_USER_PROTOCOL_F_VFIO		8
 #define RTE_VHOST_USER_F_PROTOCOL_FEATURES	30
 
 /**
@@ -591,6 +592,33 @@ rte_vhost_get_vdpa_eid(int vid);
 int __rte_experimental
 rte_vhost_get_vdpa_did(int vid);
 
+/**
+ * @warning
+ * @b EXPERIMENTAL: this API may change without prior notice
+ *
+ * Enable or disable the VFIO based accelerator for vhost-user.
+ *
+ * This function is to ask QEMU to do further setup to better
+ * support the vDPA device at vhost user backend. With this
+ * setup, the notifications and interrupts will be delivered
+ * directly between the driver in guest and the vDPA device
+ * in host if platform supports e.g. EPT and Posted interrupt.
+ * It's nice to have, and not mandatory.
+ *
+ * @param vid
+ *  vhost device ID
+ * @param int
+ *  Enable or disable
+ *
+ * @return
+ *   0: success
+ *   -ENODEV: no such vhost device
+ *   -ENOTSUP: device does not support VFIO based accelerator feature
+ *   -EINVAL: there is no accelerator assigned to this vhost device
+ *   -EFAULT: failed to talk with QEMU
+ */
+int rte_vhost_vfio_accelerator_ctrl(int vid, int enable);
+
 #ifdef __cplusplus
 }
 #endif
diff --git a/lib/librte_vhost/rte_vhost_version.map b/lib/librte_vhost/rte_vhost_version.map
index 36257e51b..ca970170f 100644
--- a/lib/librte_vhost/rte_vhost_version.map
+++ b/lib/librte_vhost/rte_vhost_version.map
@@ -72,6 +72,7 @@ EXPERIMENTAL {
 	rte_vhost_set_vring_base;
 	rte_vhost_get_vdpa_eid;
 	rte_vhost_get_vdpa_did;
+	rte_vhost_vfio_accelerator_ctrl;
 	rte_vdpa_register_engine;
 	rte_vdpa_unregister_engine;
 	rte_vdpa_find_engine_id;
diff --git a/lib/librte_vhost/vhost_user.c b/lib/librte_vhost/vhost_user.c
index e3a1dfbfb..a65598d80 100644
--- a/lib/librte_vhost/vhost_user.c
+++ b/lib/librte_vhost/vhost_user.c
@@ -35,6 +35,7 @@
 #include <rte_common.h>
 #include <rte_malloc.h>
 #include <rte_log.h>
+#include <rte_vhost.h>
 
 #include "iotlb.h"
 #include "vhost.h"
@@ -1628,6 +1629,27 @@ vhost_user_msg_handler(int vid, int fd)
 	return 0;
 }
 
+static int process_slave_message_reply(struct virtio_net *dev,
+				       const VhostUserMsg *msg)
+{
+	VhostUserMsg msg_reply;
+
+	if ((msg->flags & VHOST_USER_NEED_REPLY) == 0)
+		return 0;
+
+	if (read_vhost_message(dev->slave_req_fd, &msg_reply) < 0)
+		return -1;
+
+	if (msg_reply.request.slave != msg->request.slave) {
+		RTE_LOG(ERR, VHOST_CONFIG,
+			"received unexpected msg type (%u), expected %u\n",
+			msg_reply.request.slave, msg->request.slave);
+		return -1;
+	}
+
+	return msg_reply.payload.u64;
+}
+
 int
 vhost_user_iotlb_miss(struct virtio_net *dev, uint64_t iova, uint8_t perm)
 {
@@ -1653,3 +1675,147 @@ vhost_user_iotlb_miss(struct virtio_net *dev, uint64_t iova, uint8_t perm)
 
 	return 0;
 }
+
+static int vhost_user_slave_set_vring_file(struct virtio_net *dev,
+					   uint32_t request,
+					   struct vhost_vring_file *file)
+{
+	int *fdp = NULL;
+	size_t fd_num = 0;
+	int ret;
+	struct VhostUserMsg msg = {
+		.request.slave = request,
+		.flags = VHOST_USER_VERSION | VHOST_USER_NEED_REPLY,
+		.payload.u64 = file->index & VHOST_USER_VRING_IDX_MASK,
+		.size = sizeof(msg.payload.u64),
+	};
+
+	if (file->fd < 0)
+		msg.payload.u64 |= VHOST_USER_VRING_NOFD_MASK;
+	else {
+		fdp = &file->fd;
+		fd_num = 1;
+	}
+
+	ret = send_vhost_message(dev->slave_req_fd, &msg, fdp, fd_num);
+	if (ret < 0) {
+		RTE_LOG(ERR, VHOST_CONFIG,
+			"Failed to send slave message %u (%d)\n",
+			request, ret);
+		return ret;
+	}
+
+	return process_slave_message_reply(dev, &msg);
+}
+
+static int vhost_user_slave_set_vring_notify_area(struct virtio_net *dev,
+						  int index, int fd,
+						  uint64_t offset,
+						  uint64_t size)
+{
+	int *fdp = NULL;
+	size_t fd_num = 0;
+	int ret;
+	struct VhostUserMsg msg = {
+		.request.slave = VHOST_USER_SLAVE_VRING_NOTIFY_AREA_MSG,
+		.flags = VHOST_USER_VERSION | VHOST_USER_NEED_REPLY,
+		.payload.area = {
+			.u64 = index & VHOST_USER_VRING_IDX_MASK,
+			.size = size,
+			.offset = offset,
+		},
+		.size = sizeof(msg.payload.area),
+	};
+
+	if (fd < 0)
+		msg.payload.area.u64 |= VHOST_USER_VRING_NOFD_MASK;
+	else {
+		fdp = &fd;
+		fd_num = 1;
+	}
+
+	ret = send_vhost_message(dev->slave_req_fd, &msg, fdp, fd_num);
+	if (ret < 0) {
+		RTE_LOG(ERR, VHOST_CONFIG,
+			"Failed to set vring notify area (%d)\n", ret);
+		return ret;
+	}
+
+	return process_slave_message_reply(dev, &msg);
+}
+
+int __rte_experimental
+rte_vhost_vfio_accelerator_ctrl(int vid, int enable)
+{
+	struct virtio_net *dev = get_device(vid);
+	int groupfd, devicefd, eid, ret = 0;
+	struct rte_vdpa_eng_driver *drv;
+	struct vhost_vring_file file;
+	uint64_t offset, size;
+	unsigned int i;
+
+	if (!dev)
+		return -ENODEV;
+
+	eid = dev->eid;
+	if (eid < 0)
+		return -EINVAL;
+
+	if (!(dev->features & (1ULL << VIRTIO_F_VERSION_1)) ||
+	    !(dev->features & (1ULL << RTE_VHOST_USER_F_PROTOCOL_FEATURES)) ||
+	    !(dev->protocol_features &
+			(1ULL << RTE_VHOST_USER_PROTOCOL_F_VFIO)))
+		return -ENOTSUP;
+
+	drv = vdpa_engines[eid]->eng_drv;
+
+	RTE_FUNC_PTR_OR_ERR_RET(drv->dev_ops.get_vfio_device_fd, -ENOTSUP);
+	RTE_FUNC_PTR_OR_ERR_RET(drv->dev_ops.get_vfio_group_fd, -ENOTSUP);
+	RTE_FUNC_PTR_OR_ERR_RET(drv->dev_ops.get_notify_area, -ENOTSUP);
+
+	devicefd = drv->dev_ops.get_vfio_device_fd(vid);
+	if (devicefd < 0)
+		return -ENOTSUP;
+
+	groupfd = drv->dev_ops.get_vfio_group_fd(vid);
+	if (groupfd < 0)
+		return -ENOTSUP;
+
+	if (enable) {
+		for (i = 0; i < dev->nr_vring * 2; i++) {
+			file.index = i;
+			file.fd = groupfd;
+
+			if (drv->dev_ops.get_notify_area(vid, i, &offset,
+					&size) < 0) {
+				ret = -ENOTSUP;
+				goto disable;
+			}
+
+			if (vhost_user_slave_set_vring_file(dev,
+					VHOST_USER_SLAVE_VRING_VFIO_GROUP_MSG,
+					&file) < 0) {
+				ret = -EFAULT;
+				goto disable;
+			}
+			if (vhost_user_slave_set_vring_notify_area(dev, i,
+					devicefd, offset, size) < 0) {
+				ret = -EFAULT;
+				goto disable;
+			}
+		}
+	} else {
+disable:
+		for (i = 0; i < dev->nr_vring * 2; i++) {
+			file.index = i;
+			file.fd = -1;
+			vhost_user_slave_set_vring_file(dev,
+					VHOST_USER_SLAVE_VRING_VFIO_GROUP_MSG,
+					&file);
+			vhost_user_slave_set_vring_notify_area(dev, i, -1,
+					0, 0);
+		}
+	}
+
+	return ret;
+}
diff --git a/lib/librte_vhost/vhost_user.h b/lib/librte_vhost/vhost_user.h
index 066e772dd..c74d288d4 100644
--- a/lib/librte_vhost/vhost_user.h
+++ b/lib/librte_vhost/vhost_user.h
@@ -52,6 +52,8 @@ typedef enum VhostUserRequest {
 typedef enum VhostUserSlaveRequest {
 	VHOST_USER_SLAVE_NONE = 0,
 	VHOST_USER_SLAVE_IOTLB_MSG = 1,
+	VHOST_USER_SLAVE_VRING_VFIO_GROUP_MSG = 3,
+	VHOST_USER_SLAVE_VRING_NOTIFY_AREA_MSG = 4,
 	VHOST_USER_SLAVE_MAX
 } VhostUserSlaveRequest;
 
@@ -73,6 +75,12 @@ typedef struct VhostUserLog {
 	uint64_t mmap_offset;
 } VhostUserLog;
 
+typedef struct VhostUserVringArea {
+	uint64_t u64;
+	uint64_t size;
+	uint64_t offset;
+} VhostUserVringArea;
+
 typedef struct VhostUserMsg {
 	union {
 		uint32_t master; /* a VhostUserRequest value */
@@ -93,6 +101,7 @@ typedef struct VhostUserMsg {
 		VhostUserMemory memory;
 		VhostUserLog    log;
 		struct vhost_iotlb_msg iotlb;
+		VhostUserVringArea area;
 	} payload;
 	int fds[VHOST_MEMORY_MAX_NREGIONS];
 } __attribute((packed)) VhostUserMsg;
-- 
2.11.0



More information about the dev mailing list