[dpdk-dev] [PATCH RFC v2 08/12] lib/librte_vhost: vhost-user support

Linhaifeng haifeng.lin at huawei.com
Thu Dec 11 06:36:13 CET 2014



On 2014/12/11 5:37, Huawei Xie wrote:
> vhost-user support
> 
> 
> Signed-off-by: Huawei Xie <huawei.xie at intel.com>
> ---
>  lib/librte_vhost/Makefile                     |   5 +-
>  lib/librte_vhost/vhost-net.h                  |   4 +
>  lib/librte_vhost/vhost_cuse/virtio-net-cdev.c |   9 +
>  lib/librte_vhost/vhost_user/vhost-net-user.c  | 422 ++++++++++++++++++++++++++
>  lib/librte_vhost/vhost_user/vhost-net-user.h  | 108 +++++++
>  lib/librte_vhost/vhost_user/virtio-net-user.c | 199 ++++++++++++
>  lib/librte_vhost/vhost_user/virtio-net-user.h |  48 +++
>  lib/librte_vhost/virtio-net.c                 |  16 +-
>  lib/librte_vhost/virtio-net.h                 |  43 +++
>  9 files changed, 842 insertions(+), 12 deletions(-)
>  create mode 100644 lib/librte_vhost/vhost_user/vhost-net-user.c
>  create mode 100644 lib/librte_vhost/vhost_user/vhost-net-user.h
>  create mode 100644 lib/librte_vhost/vhost_user/virtio-net-user.c
>  create mode 100644 lib/librte_vhost/vhost_user/virtio-net-user.h
>  create mode 100644 lib/librte_vhost/virtio-net.h
> 
> diff --git a/lib/librte_vhost/Makefile b/lib/librte_vhost/Makefile
> index e0d0ef6..b2f14a0 100644
> --- a/lib/librte_vhost/Makefile
> +++ b/lib/librte_vhost/Makefile
> @@ -34,10 +34,11 @@ include $(RTE_SDK)/mk/rte.vars.mk
>  # library name
>  LIB = librte_vhost.a
>  
> -CFLAGS += $(WERROR_FLAGS) -I$(SRCDIR) -I vhost_cuse -O3 -D_FILE_OFFSET_BITS=64 -lfuse
> +CFLAGS += $(WERROR_FLAGS) -I$(SRCDIR) -I vhost_cuse -I vhost_user -O3 -D_FILE_OFFSET_BITS=64 -lfuse
>  LDFLAGS += -lfuse
>  # all source are stored in SRCS-y
> -SRCS-$(CONFIG_RTE_LIBRTE_VHOST) := vhost_cuse/vhost-net-cdev.c vhost_cuse/virtio-net-cdev.c virtio-net.c vhost_rxtx.c
> +#SRCS-$(CONFIG_RTE_LIBRTE_VHOST) := vhost_cuse/vhost-net-cdev.c vhost_cuse/virtio-net-cdev.c virtio-net.c vhost_rxtx.c
> +SRCS-$(CONFIG_RTE_LIBRTE_VHOST) := vhost_user/vhost-net-user.c vhost_user/virtio-net-user.c vhost_user/fd_man.c virtio-net.c vhost_rxtx.c
>  
>  # install includes
>  SYMLINK-$(CONFIG_RTE_LIBRTE_VHOST)-include += rte_virtio_net.h
> diff --git a/lib/librte_vhost/vhost-net.h b/lib/librte_vhost/vhost-net.h
> index f7e96fd..f9ec40b 100644
> --- a/lib/librte_vhost/vhost-net.h
> +++ b/lib/librte_vhost/vhost-net.h
> @@ -41,8 +41,12 @@
>  
>  #include <rte_log.h>
>  
> +#include "rte_virtio_net.h"
> +
>  #define VHOST_MEMORY_MAX_NREGIONS 8
>  
> +extern struct vhost_net_device_ops const *ops;
> +
>  /* Macros for printing using RTE_LOG */
>  #define RTE_LOGTYPE_VHOST_CONFIG RTE_LOGTYPE_USER1
>  #define RTE_LOGTYPE_VHOST_DATA   RTE_LOGTYPE_USER1
> diff --git a/lib/librte_vhost/vhost_cuse/virtio-net-cdev.c b/lib/librte_vhost/vhost_cuse/virtio-net-cdev.c
> index edcbc10..8ac3360 100644
> --- a/lib/librte_vhost/vhost_cuse/virtio-net-cdev.c
> +++ b/lib/librte_vhost/vhost_cuse/virtio-net-cdev.c
> @@ -268,6 +268,7 @@ cuse_set_mem_table(struct vhost_device_ctx ctx,
>  	struct vhost_memory_region *mem_regions = (void *)(uintptr_t)
>  		((uint64_t)(uintptr_t)mem_regions_addr + size);
>  	uint64_t base_address = 0, mapped_address, mapped_size;
> +	struct virtio_dev *dev;
>  
>  	for (idx = 0; idx < nregions; idx++) {
>  		regions[idx].guest_phys_address =
> @@ -335,6 +336,14 @@ cuse_set_mem_table(struct vhost_device_ctx ctx,
>  			regions[idx].guest_phys_address;
>  	}
>  
> +	dev = get_device(ctx);
> +	if (dev && dev->mem && dev->mmaped_address) {
> +		munmap((void *)(uintptr_t)dev->mmaped_address,
> +			(size_t)dev->mmaped_size);
> +		free(dev->mem);
> +		dev->mem = NULL;
> +	}
> +
>  	ops->set_mem_table(ctx, &regions[0], valid_regions);
>  	return 0;
>  }
> diff --git a/lib/librte_vhost/vhost_user/vhost-net-user.c b/lib/librte_vhost/vhost_user/vhost-net-user.c
> new file mode 100644
> index 0000000..841d7e6
> --- /dev/null
> +++ b/lib/librte_vhost/vhost_user/vhost-net-user.c
> @@ -0,0 +1,422 @@
> +/*-
> + *   BSD LICENSE
> + *
> + *   Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
> + *   All rights reserved.
> + *
> + *   Redistribution and use in source and binary forms, with or without
> + *   modification, are permitted provided that the following conditions
> + *   are met:
> + *
> + *     * Redistributions of source code must retain the above copyright
> + *       notice, this list of conditions and the following disclaimer.
> + *     * Redistributions in binary form must reproduce the above copyright
> + *       notice, this list of conditions and the following disclaimer in
> + *       the documentation and/or other materials provided with the
> + *       distribution.
> + *     * Neither the name of Intel Corporation nor the names of its
> + *       contributors may be used to endorse or promote products derived
> + *       from this software without specific prior written permission.
> + *
> + *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
> + *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
> + *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
> + *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
> + *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
> + *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
> + *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
> + *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
> + *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
> + *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
> + *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
> + */
> +
> +#include <stdint.h>
> +#include <stdio.h>
> +#include <limits.h>
> +#include <stdlib.h>
> +#include <unistd.h>
> +#include <string.h>
> +#include <sys/types.h>
> +#include <sys/socket.h>
> +#include <sys/un.h>
> +#include <errno.h>
> +
> +#include <rte_log.h>
> +#include <rte_virtio_net.h>
> +
> +#include "fd_man.h"
> +#include "vhost-net-user.h"
> +#include "vhost-net.h"
> +#include "virtio-net-user.h"
> +
> +static void vserver_new_vq_conn(int fd, uint64_t data);
> +static void vserver_message_handler(int fd, uint64_t dat);
> +struct vhost_net_device_ops const *ops;
> +
> +static struct vhost_server *g_vhost_server;
> +
> +static const char *vhost_message_str[VHOST_USER_MAX] = {
> +	[VHOST_USER_NONE] = "VHOST_USER_NONE",
> +	[VHOST_USER_GET_FEATURES] = "VHOST_USER_GET_FEATURES",
> +	[VHOST_USER_SET_FEATURES] = "VHOST_USER_SET_FEATURES",
> +	[VHOST_USER_SET_OWNER] = "VHOST_USER_SET_OWNER",
> +	[VHOST_USER_RESET_OWNER] = "VHOST_USER_RESET_OWNER",
> +	[VHOST_USER_SET_MEM_TABLE] = "VHOST_USER_SET_MEM_TABLE",
> +	[VHOST_USER_SET_LOG_BASE] = "VHOST_USER_SET_LOG_BASE",
> +	[VHOST_USER_SET_LOG_FD] = "VHOST_USER_SET_LOG_FD",
> +	[VHOST_USER_SET_VRING_NUM] = "VHOST_USER_SET_VRING_NUM",
> +	[VHOST_USER_SET_VRING_ADDR] = "VHOST_USER_SET_VRING_ADDR",
> +	[VHOST_USER_SET_VRING_BASE] = "VHOST_USER_SET_VRING_BASE",
> +	[VHOST_USER_GET_VRING_BASE] = "VHOST_USER_GET_VRING_BASE",
> +	[VHOST_USER_SET_VRING_KICK] = "VHOST_USER_SET_VRING_KICK",
> +	[VHOST_USER_SET_VRING_CALL] = "VHOST_USER_SET_VRING_CALL",
> +	[VHOST_USER_SET_VRING_ERR]  = "VHOST_USER_SET_VRING_ERR"
> +};
> +
> +/**
> + * Create a unix domain socket, bind to path and listen for connection.
> + * @return
> + *  socket fd or -1 on failure
> + */
> +static int
> +uds_socket(const char *path)
> +{
> +	struct sockaddr_un un;
> +	int sockfd;
> +	int ret;
> +
> +	if (path == NULL)
> +		return -1;
> +
> +	sockfd = socket(AF_UNIX, SOCK_STREAM, 0);
> +	if (sockfd < 0)
> +		return -1;
> +	RTE_LOG(INFO, VHOST_CONFIG, "socket created, fd:%d\n", sockfd);
> +
> +	memset(&un, 0, sizeof(un));
> +	un.sun_family = AF_UNIX;
> +	snprintf(un.sun_path, sizeof(un.sun_path), "%s", path);
> +	ret = bind(sockfd, (struct sockaddr *)&un, sizeof(un));
> +	if (ret == -1)
> +		goto err;
> +	RTE_LOG(INFO, VHOST_CONFIG, "bind to %s\n", path);
> +
> +	ret = listen(sockfd, 1);
> +	if (ret == -1)
> +		goto err;
> +
> +	return sockfd;
> +
> +err:
> +	close(sockfd);
> +	return -1;
> +}
> +
> +/* return bytes# of read on success or negative val on failure. */
> +static int
> +read_fd_message(int sockfd, char *buf, int buflen, int *fds, int fd_num)
> +{
> +	struct iovec iov;
> +	struct msghdr msgh = { 0 };
> +	size_t fdsize = fd_num * sizeof(int);
> +	char control[CMSG_SPACE(fdsize)];
> +	struct cmsghdr *cmsg;
> +	int ret;
> +
> +	iov.iov_base = buf;
> +	iov.iov_len  = buflen;
> +
> +	msgh.msg_iov = &iov;
> +	msgh.msg_iovlen = 1;
> +	msgh.msg_control = control;
> +	msgh.msg_controllen = sizeof(control);
> +
> +	ret = recvmsg(sockfd, &msgh, 0);
> +	if (ret <= 0) {
> +		RTE_LOG(ERR, VHOST_CONFIG, "recvmsg failed\n");
> +		return ret;
> +	}
> +
> +	if (msgh.msg_flags & (MSG_TRUNC | MSG_CTRUNC)) {
> +		RTE_LOG(ERR, VHOST_CONFIG, "Truncted msg\n");
> +		return -1;
> +	}
> +
> +	for (cmsg = CMSG_FIRSTHDR(&msgh); cmsg != NULL;
> +		cmsg = CMSG_NXTHDR(&msgh, cmsg)) {
> +		if ((cmsg->cmsg_level == SOL_SOCKET) &&
> +			(cmsg->cmsg_type == SCM_RIGHTS)) {
> +			memcpy(fds, CMSG_DATA(cmsg), fdsize);
> +			break;
> +		}
> +	}
> +
> +	return ret;
> +}
> +
> +/* return bytes# of read on success or negative val on failure. */
> +static int
> +read_vhost_message(int sockfd, struct VhostUserMsg *msg)
> +{
> +	int ret;
> +
> +	ret = read_fd_message(sockfd, (char *)msg, VHOST_USER_HDR_SIZE,
> +		msg->fds, VHOST_MEMORY_MAX_NREGIONS);
> +	if (ret <= 0)
> +		return ret;
> +
> +	if (msg && msg->size) {
> +		if (msg->size > sizeof(msg->payload)) {
> +			RTE_LOG(ERR, VHOST_CONFIG,
> +				"invalid msg size: %d\n", msg->size);
> +			return -1;
> +		}
> +		ret = read(sockfd, &msg->payload, msg->size);
> +		if (ret <= 0)
> +			return ret;
> +		if (ret != (int)msg->size) {
> +			RTE_LOG(ERR, VHOST_CONFIG,
> +				"read control message failed\n");
> +			return -1;
> +		}
> +	}
> +
> +	return ret;
> +}
> +
> +static int
> +send_fd_message(int sockfd, char *buf, int buflen, int *fds, int fd_num)
> +{
> +
> +	struct iovec iov;
> +	struct msghdr msgh = { 0 };
> +	size_t fdsize = fd_num * sizeof(int);
> +	char control[CMSG_SPACE(fdsize)];
> +	struct cmsghdr *cmsg;
> +	int ret;
> +
> +	iov.iov_base = buf;
> +	iov.iov_len = buflen;
> +
> +	msgh.msg_iov = &iov;
> +	msgh.msg_iovlen = 1;
> +
> +	if (fds && fd_num > 0) {
> +		msgh.msg_control = control;
> +		msgh.msg_controllen = sizeof(control);
> +		cmsg = CMSG_FIRSTHDR(&msgh);
> +		cmsg->cmsg_len = CMSG_LEN(fdsize);
> +		cmsg->cmsg_level = SOL_SOCKET;
> +		cmsg->cmsg_type = SCM_RIGHTS;
> +		memcpy(CMSG_DATA(cmsg), fds, fdsize);
> +	} else {
> +		msgh.msg_control = NULL;
> +		msgh.msg_controllen = 0;
> +	}
> +
> +	do {
> +		ret = sendmsg(sockfd, &msgh, 0);
> +	} while (ret < 0 && errno == EINTR);
> +
> +	if (ret < 0) {
> +		RTE_LOG(ERR, VHOST_CONFIG,  "sendmsg error\n");
> +		return ret;
> +	}
> +
> +	return ret;
> +}
> +
> +static int
> +send_vhost_message(int sockfd, struct VhostUserMsg *msg)
> +{
> +	int ret;
> +
> +	if (!msg)
> +		return 0;
> +
> +	msg->flags &= ~VHOST_USER_VERSION_MASK;
> +	msg->flags |= VHOST_USER_VERSION;
> +	sg->flags |= VHOST_USER_REPLY_MASK;
> +
> +	ret = send_fd_message(sockfd, (char *)msg,
> +		VHOST_USER_HDR_SIZE + msg->size, NULL, 0);
> +
> +	return ret;
> +}
> +
> +/* call back when there is new virtio connection.  */
> +static void
> +vserver_new_vq_conn(int fd, uint64_t dat)
> +{
> +	struct vhost_server *vserver = (void *)(uintptr_t)dat;
> +	int conn_fd;
> +	uint32_t fh;
> +	struct vhost_device_ctx vdev_ctx = { 0 };
> +
> +	conn_fd = accept(fd, NULL, NULL);
> +	RTE_LOG(INFO, VHOST_CONFIG,
> +		"new virtio connection is %d\n", conn_fd);
> +	if (conn_fd < 0)
> +		return;
> +
> +	fh = ops->new_device(vdev_ctx);
> +	RTE_LOG(INFO, VHOST_CONFIG, "new device, handle is %d\n", fh);
> +
> +	fdset_add(&vserver->fdset,
> +		conn_fd, vserver_message_handler, NULL, fh);
> +}
> +
> +/* callback when there is message on the connfd */
> +static void
> +vserver_message_handler(int connfd, uint64_t dat)
> +{
> +	struct vhost_device_ctx ctx;
> +	uint32_t fh = (uint32_t)dat;
> +	struct VhostUserMsg msg;
> +	uint64_t features;
> +	int ret;
> +
> +	ctx.fh = fh;
> +	ret = read_vhost_message(connfd, &msg);
> +	if (ret < 0) {
> +		RTE_LOG(ERR, VHOST_CONFIG,
> +			"vhost read message failed\n");
> +
> +		/*TODO: cleanup */
> +		close(connfd);
> +		fdset_del(&g_vhost_server->fdset, connfd);
> +		ops->destroy_device(ctx);
> +
> +		return;
> +	} else if (ret == 0) {
> +		RTE_LOG(INFO, VHOST_CONFIG,
> +			"vhost peer closed\n");
> +
> +		/*TODO: cleanup */
> +		close(connfd);
> +		fdset_del(&g_vhost_server->fdset, connfd);
> +		ops->destroy_device(ctx);
> +
> +		return;
> +	}
> +	if (msg.request > VHOST_USER_MAX) {
> +		RTE_LOG(ERR, VHOST_CONFIG,
> +			"vhost read incorrect message\n");
> +
> +		/*TODO: cleanup */
> +		close(connfd);
> +		fdset_del(&g_vhost_server->fdset, connfd);
> +
> +		return;
> +	}
> +
> +	RTE_LOG(INFO, VHOST_CONFIG, "read message %s\n",
> +		vhost_message_str[msg.request]);
> +	switch (msg.request) {
> +	case VHOST_USER_GET_FEATURES:
> +		ret = ops->get_features(ctx, &features);
> +		msg.payload.u64 = ret;
> +		msg.size = sizeof(msg.payload.u64);
> +		send_vhost_message(connfd, &msg);
> +		break;
> +	case VHOST_USER_SET_FEATURES:
> +		ops->set_features(ctx, &features);
> +		break;
> +
> +	case VHOST_USER_SET_OWNER:
> +		ops->set_owner(ctx);
> +		break;
> +	case VHOST_USER_RESET_OWNER:
> +		ops->reset_owner(ctx);
> +		break;
> +
> +	case VHOST_USER_SET_MEM_TABLE:
> +		user_set_mem_table(ctx, &msg);
> +		break;
> +
> +	case VHOST_USER_SET_LOG_BASE:
> +	case VHOST_USER_SET_LOG_FD:

should close fd for fd leak when receive VHOST_USER_SET_LOG_FD msg?

> +		RTE_LOG(INFO, VHOST_CONFIG, "not implemented.\n");
> +		break;
> +
> +	case VHOST_USER_SET_VRING_NUM:
> +		ops->set_vring_num(ctx, &msg.payload.state);
> +		break;
> +	case VHOST_USER_SET_VRING_ADDR:
> +		ops->set_vring_addr(ctx, &msg.payload.addr);
> +		break;
> +	case VHOST_USER_SET_VRING_BASE:
> +		ops->set_vring_base(ctx, &msg.payload.state);
> +		break;
> +
> +	case VHOST_USER_GET_VRING_BASE:
> +		ret = user_get_vring_base(ctx, &msg.payload.state);
> +		msg.size = sizeof(msg.payload.state);
> +		send_vhost_message(connfd, &msg);
> +		break;
> +
> +	case VHOST_USER_SET_VRING_KICK:
> +		user_set_vring_kick(ctx, &msg);
> +		break;
> +	case VHOST_USER_SET_VRING_CALL:
> +		user_set_vring_call(ctx, &msg);
> +		break;
> +
> +	case VHOST_USER_SET_VRING_ERR:

should close fd for fd leak?

> +		RTE_LOG(INFO, VHOST_CONFIG, "not implemented\n");
> +		break;
> +
> +	default:
> +		break;
> +
> +	}
> +}
> +
> +
> +/**
> + * Creates and initialise the vhost server.
> + */
> +int
> +rte_vhost_driver_register(const char *path)
> +{
> +
> +	struct vhost_server *vserver;
> +
> +	if (g_vhost_server != NULL)
> +		return -1;
> +
> +	vserver = calloc(sizeof(struct vhost_server), 1);
> +	if (vserver == NULL)
> +		return -1;
> +
> +	fdset_init(&vserver->fdset);
> +
> +	unlink(path);
> +
> +	vserver->listenfd = uds_socket(path);
> +	if (vserver->listenfd < 0) {
> +		free(vserver);
> +		return -1;
> +	}
> +	vserver->path = path;
> +
> +	fdset_add(&vserver->fdset, vserver->listenfd,
> +		vserver_new_vq_conn, NULL,
> +		(uint64_t)(uintptr_t)vserver);
> +
> +	ops = get_virtio_net_callbacks();
> +
> +	g_vhost_server = vserver;
> +
> +	return 0;
> +}
> +
> +
> +int
> +rte_vhost_driver_session_start(void)
> +{
> +	fdset_event_dispatch(&g_vhost_server->fdset);
> +	return 0;
> +}
> +
> diff --git a/lib/librte_vhost/vhost_user/vhost-net-user.h b/lib/librte_vhost/vhost_user/vhost-net-user.h
> new file mode 100644
> index 0000000..c138844
> --- /dev/null
> +++ b/lib/librte_vhost/vhost_user/vhost-net-user.h
> @@ -0,0 +1,108 @@
> +/*-
> + *   BSD LICENSE
> + *
> + *   Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
> + *   All rights reserved.
> + *
> + *   Redistribution and use in source and binary forms, with or without
> + *   modification, are permitted provided that the following conditions
> + *   are met:
> + *
> + *     * Redistributions of source code must retain the above copyright
> + *       notice, this list of conditions and the following disclaimer.
> + *     * Redistributions in binary form must reproduce the above copyright
> + *       notice, this list of conditions and the following disclaimer in
> + *       the documentation and/or other materials provided with the
> + *       distribution.
> + *     * Neither the name of Intel Corporation nor the names of its
> + *       contributors may be used to endorse or promote products derived
> + *       from this software without specific prior written permission.
> + *
> + *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
> + *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
> + *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
> + *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
> + *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
> + *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
> + *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
> + *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
> + *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
> + *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
> + *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
> + */
> +
> +#ifndef _VHOST_NET_USER_H
> +#define _VHOST_NET_USER_H
> +
> +#include <stdint.h>
> +#include <linux/vhost.h>
> +
> +#include "fd_man.h"
> +
> +struct vhost_server {
> +	const char *path; /**< The path the uds is bind to. */
> +	int listenfd;     /**< The listener sockfd. */
> +	struct fdset fdset; /**< The fd list this vhost server manages. */
> +};
> +
> +/* refer to hw/virtio/vhost-user.c */
> +
> +#define VHOST_MEMORY_MAX_NREGIONS    8
> +
> +typedef enum VhostUserRequest {
> +	VHOST_USER_NONE = 0,
> +	VHOST_USER_GET_FEATURES = 1,
> +	VHOST_USER_SET_FEATURES = 2,
> +	VHOST_USER_SET_OWNER = 3,
> +	VHOST_USER_RESET_OWNER = 4,
> +	VHOST_USER_SET_MEM_TABLE = 5,
> +	VHOST_USER_SET_LOG_BASE = 6,
> +	VHOST_USER_SET_LOG_FD = 7,
> +	VHOST_USER_SET_VRING_NUM = 8,
> +	VHOST_USER_SET_VRING_ADDR = 9,
> +	VHOST_USER_SET_VRING_BASE = 10,
> +	VHOST_USER_GET_VRING_BASE = 11,
> +	VHOST_USER_SET_VRING_KICK = 12,
> +	VHOST_USER_SET_VRING_CALL = 13,
> +	VHOST_USER_SET_VRING_ERR = 14,
> +	VHOST_USER_MAX
> +} VhostUserRequest;
> +
> +typedef struct VhostUserMemoryRegion {
> +	uint64_t guest_phys_addr;
> +	uint64_t memory_size;
> +	uint64_t userspace_addr;
> +	uint64_t mmap_offset;
> +} VhostUserMemoryRegion;
> +
> +typedef struct VhostUserMemory {
> +	uint32_t nregions;
> +	uint32_t padding;
> +	VhostUserMemoryRegion regions[VHOST_MEMORY_MAX_NREGIONS];
> +} VhostUserMemory;
> +
> +typedef struct VhostUserMsg {
> +	VhostUserRequest request;
> +
> +#define VHOST_USER_VERSION_MASK     (0x3)
> +#define VHOST_USER_REPLY_MASK       (0x1 << 2)
> +	uint32_t flags;
> +	uint32_t size; /* the following payload size */
> +	union {
> +#define VHOST_USER_VRING_IDX_MASK   (0xff)
> +#define VHOST_USER_VRING_NOFD_MASK  (0x1<<8)
> +		uint64_t u64;
> +		struct vhost_vring_state state;
> +		struct vhost_vring_addr addr;
> +	VhostUserMemory memory;
> +	} payload;
> +	int fds[VHOST_MEMORY_MAX_NREGIONS];
> +} __attribute((packed)) VhostUserMsg;
> +
> +#define VHOST_USER_HDR_SIZE (intptr_t)(&((VhostUserMsg *)0)->payload.u64)
> +
> +/* The version of the protocol we support */
> +#define VHOST_USER_VERSION    (0x1)
> +
> +/*****************************************************************************/
> +#endif
> diff --git a/lib/librte_vhost/vhost_user/virtio-net-user.c b/lib/librte_vhost/vhost_user/virtio-net-user.c
> new file mode 100644
> index 0000000..ad59fcc
> --- /dev/null
> +++ b/lib/librte_vhost/vhost_user/virtio-net-user.c
> @@ -0,0 +1,199 @@
> +/*-
> + *   BSD LICENSE
> + *
> + *   Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
> + *   All rights reserved.
> + *
> + *   Redistribution and use in source and binary forms, with or without
> + *   modification, are permitted provided that the following conditions
> + *   are met:
> + *
> + *     * Redistributions of source code must retain the above copyright
> + *       notice, this list of conditions and the following disclaimer.
> + *     * Redistributions in binary form must reproduce the above copyright
> + *       notice, this list of conditions and the following disclaimer in
> + *       the documentation and/or other materials provided with the
> + *       distribution.
> + *     * Neither the name of Intel Corporation nor the names of its
> + *       contributors may be used to endorse or promote products derived
> + *       from this software without specific prior written permission.
> + *
> + *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
> + *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
> + *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
> + *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
> + *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
> + *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
> + *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
> + *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
> + *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
> + *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
> + *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
> + */
> +
> +#include <stdint.h>
> +#include <stdio.h>
> +#include <stdlib.h>
> +#include <unistd.h>
> +#include <sys/mman.h>
> +
> +#include <rte_log.h>
> +
> +#include "virtio-net.h"
> +#include "virtio-net-user.h"
> +#include "vhost-net-user.h"
> +#include "vhost-net.h"
> +
> +int
> +user_set_mem_table(struct vhost_device_ctx ctx, struct VhostUserMsg *pmsg)
> +{
> +	unsigned int idx;
> +	struct VhostUserMemory memory = pmsg->payload.memory;
> +	struct virtio_memory_regions regions[VHOST_MEMORY_MAX_NREGIONS];
> +	uint64_t mapped_address, base_address = 0;
> +
> +	for (idx = 0; idx < memory.nregions; idx++) {
> +		if (memory.regions[idx].guest_phys_addr == 0)
> +			base_address = memory.regions[idx].userspace_addr;
> +	}
> +	if (base_address == 0) {
> +		RTE_LOG(ERR, VHOST_CONFIG,
> +			"couldn't find the mem region whose GPA is 0.\n");
> +		return -1;
> +	}
> +
> +	for (idx = 0; idx < memory.nregions; idx++) {
> +		regions[idx].guest_phys_address =
> +			memory.regions[idx].guest_phys_addr;
> +		regions[idx].guest_phys_address_end =
> +			memory.regions[idx].guest_phys_addr +
> +			memory.regions[idx].memory_size;
> +		regions[idx].memory_size = memory.regions[idx].memory_size;
> +		regions[idx].userspace_address =
> +			memory.regions[idx].userspace_addr;
> +
> +		/* This is ugly */
> +		mapped_address = (uint64_t)(uintptr_t)mmap(NULL,
> +			regions[idx].memory_size +
> +				memory.regions[idx].mmap_offset,
> +			PROT_READ | PROT_WRITE, MAP_SHARED,
> +			pmsg->fds[idx],
> +			0);
> +		RTE_LOG(INFO, VHOST_CONFIG,
> +			"mapped region %d to %p\n",
> +			idx, (void *)mapped_address);
> +
> +		if (mapped_address == (uint64_t)(uintptr_t)MAP_FAILED) {
> +			RTE_LOG(ERR, VHOST_CONFIG,
> +				"mmap qemu guest failed.\n");
> +			return -1;
> +		}
> +
> +		mapped_address +=  memory.regions[idx].mmap_offset;
> +
> +		regions[idx].address_offset = mapped_address -
> +			regions[idx].guest_phys_address;
> +		LOG_DEBUG(VHOST_CONFIG,
> +			"REGION: %u GPA: %p QEMU VA: %p SIZE (%"PRIu64")\n",
> +			idx,
> +			(void *)(uintptr_t)regions[idx].guest_phys_address,
> +			(void *)(uintptr_t)regions[idx].userspace_address,
> +			 regions[idx].memory_size);
> +	}
> +	ops->set_mem_table(ctx, regions, memory.nregions);
> +	return 0;
> +}
> +
> +
> +static int
> +virtio_is_ready(struct virtio_net *dev)
> +{
> +	struct vhost_virtqueue *rvq, *tvq;
> +
> +	/* mq support in future.*/
> +	rvq = dev->virtqueue[VIRTIO_RXQ];
> +	tvq = dev->virtqueue[VIRTIO_TXQ];
> +	if (rvq && tvq && rvq->desc && tvq->desc &&
> +		(rvq->kickfd != (eventfd_t)-1) &&
> +		(rvq->callfd != (eventfd_t)-1) &&
> +		(tvq->kickfd != (eventfd_t)-1) &&
> +		(tvq->callfd != (eventfd_t)-1)) {
> +		RTE_LOG(INFO, VHOST_CONFIG,
> +			"virtio is now ready for processing.\n");
> +		return 1;
> +	}
> +	RTE_LOG(INFO, VHOST_CONFIG,
> +		"virtio isn't ready for processing.\n");
> +	return 0;
> +}
> +
> +void
> +user_set_vring_call(struct vhost_device_ctx ctx, struct VhostUserMsg *pmsg)
> +{
> +	struct vhost_vring_file file;
> +
> +	file.index = pmsg->payload.u64 & VHOST_USER_VRING_IDX_MASK;
> +	file.fd = pmsg->fds[0];
> +	RTE_LOG(INFO, VHOST_CONFIG,
> +		"vring call idx:%d file:%d\n", file.index, file.fd);
> +	ops->set_vring_call(ctx, &file);
> +}
> +
> +
> +/*
> + *  In vhost-user, when we receive kick message, will test whether virtio
> + *  device is ready for packet processing.
> + */
> +void
> +user_set_vring_kick(struct vhost_device_ctx ctx, struct VhostUserMsg *pmsg)
> +{
> +	struct vhost_vring_file file;
> +	struct virtio_net *dev = get_device(ctx);
> +
> +	file.index = pmsg->payload.u64 & VHOST_USER_VRING_IDX_MASK;
> +	file.fd = pmsg->fds[0];
> +	RTE_LOG(INFO, VHOST_CONFIG,
> +		"vring kick idx:%d file:%d\n", file.index, file.fd);
> +	ops->set_vring_kick(ctx, &file);
> +
> +	if (virtio_is_ready(dev) &&
> +		!(dev->flags & VIRTIO_DEV_RUNNING))
> +			notify_ops->new_device(dev);
> +
> +}
> +
> +/*
> + * when virtio is stopped, qemu will send us the GET_VRING_BASE message.
> + */
> +int
> +user_get_vring_base(struct vhost_device_ctx ctx,
> +	struct vhost_vring_state *state)
> +{
> +	struct virtio_net *dev = get_device(ctx);
> +
> +	/* We have to stop the queue (virtio) if it is running. */
> +	if (dev->flags & VIRTIO_DEV_RUNNING)
> +		notify_ops->destroy_device(dev);
> +
> +	/* Here we are safe to get the last used index */
> +	ops->get_vring_base(ctx, state->index, state);
> +
> +	RTE_LOG(INFO, VHOST_CONFIG,
> +		"vring base idx:%d file:%d\n", state->index, state->num);
> +	/*
> +	 * Based on current qemu vhost-user implementation, this message is
> +	 * sent and only sent in vhost_vring_stop.
> +	 * TODO: cleanup the vring, it isn't usable since here.
> +	 */
> +	if (dev->virtqueue[VIRTIO_RXQ]->callfd) {
> +		close(dev->virtqueue[VIRTIO_RXQ]->callfd);
> +		dev->virtqueue[VIRTIO_RXQ]->callfd = (eventfd_t)-1;
> +	}
> +	if (dev->virtqueue[VIRTIO_TXQ]->callfd) {
> +		close(dev->virtqueue[VIRTIO_TXQ]->callfd);
> +		dev->virtqueue[VIRTIO_TXQ]->callfd = (eventfd_t)-1;
> +	}
> +
> +	return 0;
> +
> +}
> diff --git a/lib/librte_vhost/vhost_user/virtio-net-user.h b/lib/librte_vhost/vhost_user/virtio-net-user.h
> new file mode 100644
> index 0000000..0f6a75a
> --- /dev/null
> +++ b/lib/librte_vhost/vhost_user/virtio-net-user.h
> @@ -0,0 +1,48 @@
> +/*-
> + *   BSD LICENSE
> + *
> + *   Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
> + *   All rights reserved.
> + *
> + *   Redistribution and use in source and binary forms, with or without
> + *   modification, are permitted provided that the following conditions
> + *   are met:
> + *
> + *     * Redistributions of source code must retain the above copyright
> + *       notice, this list of conditions and the following disclaimer.
> + *     * Redistributions in binary form must reproduce the above copyright
> + *       notice, this list of conditions and the following disclaimer in
> + *       the documentation and/or other materials provided with the
> + *       distribution.
> + *     * Neither the name of Intel Corporation nor the names of its
> + *       contributors may be used to endorse or promote products derived
> + *       from this software without specific prior written permission.
> + *
> + *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
> + *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
> + *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
> + *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
> + *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
> + *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
> + *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
> + *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
> + *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
> + *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
> + *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
> + */
> +
> +#ifndef _VIRTIO_NET_USER_H
> +#define _VIRTIO_NET_USER_H
> +
> +#include "vhost-net.h"
> +#include "vhost-net-user.h"
> +
> +int user_set_mem_table(struct vhost_device_ctx, struct VhostUserMsg *);
> +
> +void user_set_vring_call(struct vhost_device_ctx, struct VhostUserMsg *);
> +
> +void user_set_vring_kick(struct vhost_device_ctx, struct VhostUserMsg *);
> +
> +int user_get_vring_base(struct vhost_device_ctx, struct vhost_vring_state *);
> +
> +#endif
> diff --git a/lib/librte_vhost/virtio-net.c b/lib/librte_vhost/virtio-net.c
> index f81e459..0b49f1b 100644
> --- a/lib/librte_vhost/virtio-net.c
> +++ b/lib/librte_vhost/virtio-net.c
> @@ -46,6 +46,7 @@
>  #include <rte_virtio_net.h>
>  
>  #include "vhost-net.h"
> +#include "virtio-net.h"
>  
>  /*
>   * Device linked list structure for configuration.
> @@ -56,7 +57,7 @@ struct virtio_net_config_ll {
>  };
>  
>  /* device ops to add/remove device to/from data core. */
> -static struct virtio_net_device_ops const *notify_ops;
> +struct virtio_net_device_ops const *notify_ops;
>  /* root address of the linked list of managed virtio devices */
>  static struct virtio_net_config_ll *ll_root;
>  
> @@ -83,8 +84,9 @@ qva_to_vva(struct virtio_net *dev, uint64_t qemu_va)
>  		if ((qemu_va >= region->userspace_address) &&
>  			(qemu_va <= region->userspace_address +
>  			region->memory_size)) {
> -			vhost_va = dev->mem->mapped_address + qemu_va -
> -					dev->mem->base_address;
> +			vhost_va = qemu_va + region->guest_phys_address +
> +				region->address_offset -
> +				region->userspace_address;
>  			break;
>  		}
>  	}
> @@ -114,7 +116,7 @@ get_config_ll_entry(struct vhost_device_ctx ctx)
>   * Searches the configuration core linked list and
>   * retrieves the device if it exists.
>   */
> -static struct virtio_net *
> +struct virtio_net *
>  get_device(struct vhost_device_ctx ctx)
>  {
>  	struct virtio_net_config_ll *ll_dev;
> @@ -450,12 +452,6 @@ set_mem_table(struct vhost_device_ctx ctx,
>  	if (dev == NULL)
>  		return -1;
>  
> -	if (dev->mem) {
> -		munmap((void *)(uintptr_t)dev->mem->mapped_address,
> -			(size_t)dev->mem->mapped_size);
> -		free(dev->mem);
> -	}
> -
>  	/* Malloc the memory structure depending on the number of regions. */
>  	mem = calloc(1, sizeof(struct virtio_memory) +
>  		(sizeof(struct virtio_memory_regions) * nregions));
> diff --git a/lib/librte_vhost/virtio-net.h b/lib/librte_vhost/virtio-net.h
> new file mode 100644
> index 0000000..da4ade0
> --- /dev/null
> +++ b/lib/librte_vhost/virtio-net.h
> @@ -0,0 +1,43 @@
> +/*-
> + *   BSD LICENSE
> + *
> + *   Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
> + *   All rights reserved.
> + *
> + *   Redistribution and use in source and binary forms, with or without
> + *   modification, are permitted provided that the following conditions
> + *   are met:
> + *
> + *     * Redistributions of source code must retain the above copyright
> + *       notice, this list of conditions and the following disclaimer.
> + *     * Redistributions in binary form must reproduce the above copyright
> + *       notice, this list of conditions and the following disclaimer in
> + *       the documentation and/or other materials provided with the
> + *       distribution.
> + *     * Neither the name of Intel Corporation nor the names of its
> + *       contributors may be used to endorse or promote products derived
> + *       from this software without specific prior written permission.
> + *
> + *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
> + *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
> + *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
> + *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
> + *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
> + *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
> + *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
> + *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
> + *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
> + *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
> + *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
> + */
> +
> +#ifndef _VIRTIO_NET_H
> +#define _VIRTIO_NET_H
> +
> +#include "vhost-net.h"
> +#include "rte_virtio_net.h"
> +
> +struct virtio_net_device_ops const *notify_ops;
> +struct virtio_net * get_device(struct vhost_device_ctx ctx);
> +
> +#endif
> 

-- 
Regards,
Haifeng



More information about the dev mailing list