[dpdk-dev,RFC,4/4] examples/vdpa: add virtio-net PCI device driver

Message ID 1514570702-154906-5-git-send-email-xiao.w.wang@intel.com (mailing list archive)
State Superseded, archived
Delegated to: Maxime Coquelin
Headers

Checks

Context Check Description
ci/checkpatch warning coding style issues
ci/Intel-compilation fail apply patch file failure

Commit Message

Xiao Wang Dec. 29, 2017, 6:05 p.m. UTC
  This sample shows an instance of vDPA device driver based on vDPA
lib, this driver uses a standard virtio-net PCI device as vDPA
device, which can serve as a backend for a virtio-net pci device
in nested VM.

The key driver ops implemented are:

* vdpa_virtio_eng_init
Prepare a resource pool to be used as vDPA device for a engine.

* vdpa_virtio_eng_uninit
Reset the vDPA resource pool for a engine.

* vdpa_virtio_dev_init
Allocate a device for corresponding vhost socket.

* vdpa_virtio_dev_uninit
Free a device that is previously allocated.

* vdpa_virtio_dev_conf
With the guest virtio information recorded in virtio_net structure,
driver configures device and IOMMU to set up vhost datapath, which
includes: vring operation, VFIO interrupt, kick relay.

* vdpa_virtio_dev_close
Unset the stuff that are configured in dev_conf.

* device capability reporting, e.g. queue number, features.

Below are setup steps for your reference:

1. Make sure your kernnel vhost module and QEMU support vIOMMU.
   - OS: CentOS 7.4
   - QEMU: 2.10.1
   - Guest OS: CentOS 7.2
   - Nested VM OS: CentOS 7.2

2. enable VT-x feature for vCPU in VM.
   modprobe kvm_intel nested=1

3. Start a VM with a virtio-net-pci device.
   ./qemu-2.10.1/x86_64-softmmu/qemu-system-x86_64 -enable-kvm -cpu host \
   <snip>
   -machine q35 \
   -device intel-iommu \
   -netdev tap,id=mytap,ifname=vdpa,vhostforce=on \
   -device virtio-net-pci,netdev=mytap,mac=00:aa:bb:cc:dd:ee,\
   disable-modern=off,disable-legacy=on,iommu_platform=on \

4. Bind VFIO-pci to virtio_net_pci device
   a) login to VM;
   b) modprobe vfio-pci
   c) rmmod vfio_iommu_type1
   d) modprobe vfio_iommu_type1 allow_unsafe_interrupts=1
   e) ./usertools/dpdk-devbind.py -b vfio-pci 00:03.0

5. Start vDPA sample
   Based on DPDK 17.11 and the vDPA RFC patch, apply this patch set.
   Sample compilation is just like the other DPDK samples.

   ./examples/vdpa/build/vdpa -c 0x6 -n 4 --socket-mem 512 --no-pci -- \
   --bdf 0000:00:03.0 --devcnt 1 --engine vdpa_virtio_net \
   --iface /tmp/vhost-user- --queue 1

6. Start nested VM
   ./qemu-2.10.1/x86_64-softmmu/qemu-system-x86_64 -cpu host -enable-kvm \
   <snip>
   -mem-prealloc \
   -chardev socket,id=char0,path=/tmp/vhost-user-0 \
   -netdev type=vhost-user,id=vdpa,chardev=char0,vhostforce \
   -device virtio-net-pci,netdev=vdpa,mac=00:aa:bb:cc:dd:ee \

7. Login the nested VM, and verify the virtio in nested VM can communicate
   with tap device on host.

Signed-off-by: Xiao Wang <xiao.w.wang@intel.com>
---
 examples/vdpa/Makefile          |   59 ++
 examples/vdpa/main.c            |  321 ++++++++++
 examples/vdpa/vdpa_virtio_net.c | 1274 +++++++++++++++++++++++++++++++++++++++
 examples/vdpa/vdpa_virtio_net.h |  144 +++++
 4 files changed, 1798 insertions(+)
 create mode 100644 examples/vdpa/Makefile
 create mode 100644 examples/vdpa/main.c
 create mode 100644 examples/vdpa/vdpa_virtio_net.c
 create mode 100644 examples/vdpa/vdpa_virtio_net.h
  

Patch

diff --git a/examples/vdpa/Makefile b/examples/vdpa/Makefile
new file mode 100644
index 0000000..6571a05
--- /dev/null
+++ b/examples/vdpa/Makefile
@@ -0,0 +1,59 @@ 
+#   BSD LICENSE
+#
+#   Copyright(c) 2010-2017 Intel Corporation. All rights reserved.
+#   All rights reserved.
+#
+#   Redistribution and use in source and binary forms, with or without
+#   modification, are permitted provided that the following conditions
+#   are met:
+#
+#     * Redistributions of source code must retain the above copyright
+#       notice, this list of conditions and the following disclaimer.
+#     * Redistributions in binary form must reproduce the above copyright
+#       notice, this list of conditions and the following disclaimer in
+#       the documentation and/or other materials provided with the
+#       distribution.
+#     * Neither the name of Intel Corporation nor the names of its
+#       contributors may be used to endorse or promote products derived
+#       from this software without specific prior written permission.
+#
+#   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+#   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+#   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+#   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+#   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+#   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+#   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+#   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+#   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+#   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+#   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+ifeq ($(RTE_SDK),)
+$(error "Please define RTE_SDK environment variable")
+endif
+
+# Default target, can be overridden by command line or environment
+RTE_TARGET ?= x86_64-native-linuxapp-gcc
+
+include $(RTE_SDK)/mk/rte.vars.mk
+
+ifneq ($(CONFIG_RTE_EXEC_ENV),"linuxapp")
+$(info This application can only operate in a linuxapp environment, \
+please change the definition of the RTE_TARGET environment variable)
+all:
+else
+
+# binary name
+APP = vdpa
+
+# all source are stored in SRCS-y
+SRCS-y := main.c vdpa_virtio_net.c
+
+CFLAGS += -O2 -D_FILE_OFFSET_BITS=64
+CFLAGS += $(WERROR_FLAGS)
+CFLAGS += -D_GNU_SOURCE
+
+include $(RTE_SDK)/mk/rte.extapp.mk
+
+endif
diff --git a/examples/vdpa/main.c b/examples/vdpa/main.c
new file mode 100644
index 0000000..3cf6c78
--- /dev/null
+++ b/examples/vdpa/main.c
@@ -0,0 +1,321 @@ 
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright(c) 2010-2017 Intel Corporation. All rights reserved.
+ *   All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of Intel Corporation nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <getopt.h>
+#include <signal.h>
+#include <stdint.h>
+#include <string.h>
+#include <unistd.h>
+
+#include <rte_cycles.h>
+#include <rte_ethdev.h>
+#include <rte_malloc.h>
+#include <rte_vhost.h>
+#include <rte_vdpa.h>
+#include "vdpa_virtio_net.h"
+
+#define MAX_PATH_LEN 128
+#define MAX_VDPA_SAMPLE_PORTS 8
+
+struct vdpa_port {
+	char ifname[MAX_PATH_LEN];
+	int eid;
+	int did;
+	int vid;
+};
+
+struct vdpa_port vports[MAX_VDPA_SAMPLE_PORTS];
+struct rte_vdpa_eng_id dev_id;
+char engine[MAX_PATH_LEN];
+char iface[MAX_PATH_LEN];
+int engid;
+int queue;
+int devcnt;
+
+static int
+get_unsigned(const char *str, int base)
+{
+	unsigned long num;
+	char *end = NULL;
+
+	errno = 0;
+	num = strtoul(str, &end, base);
+	if ((str[0] == '\0') || (end == NULL) || (*end != '\0') || (errno != 0))
+		return -1;
+
+	return num;
+}
+
+static int
+parse_args(int argc, char **argv)
+{
+	static const char *short_option = "";
+	static struct option long_option[] = {
+		{"bdf", required_argument, NULL, 0},
+		{"engine", required_argument, NULL, 0},
+		{"queue", required_argument, NULL, 0},
+		{"devcnt", required_argument, NULL, 0},
+		{"iface", required_argument, NULL, 0},
+		{NULL, 0, 0, 0},
+	};
+	char str[MAX_PATH_LEN];
+	int opt, idx;
+	int num[4];
+	int i, j;
+
+	while ((opt = getopt_long(argc, argv, short_option, long_option, &idx))
+			!= EOF) {
+		switch (opt) {
+		case 0:
+			if (strncmp(long_option[idx].name, "bdf",
+						MAX_PATH_LEN) == 0) {
+				strcpy(str, optarg);
+				memset(num, 0, 4 * sizeof(num[0]));
+				i = strlen(str) - 1;
+				j = 3;
+				while (i > 0 && j >= 0) {
+					while ((str[i - 1] != ':'
+							&& str[i - 1] != '.')
+							&& i > 0)
+						i--;
+					num[j--] = get_unsigned(&str[i], 16);
+					i--;
+					if (i >= 0)
+						str[i] = '\0';
+				}
+				dev_id.pci_addr.domain = num[0];
+				dev_id.pci_addr.bus = num[1];
+				dev_id.pci_addr.devid = num[2];
+				dev_id.pci_addr.function = num[3];
+				printf("bdf %04x:%02x:%02x.%02x\n",
+						dev_id.pci_addr.domain,
+						dev_id.pci_addr.bus,
+						dev_id.pci_addr.devid,
+						dev_id.pci_addr.function);
+			} else if (strncmp(long_option[idx].name, "queue",
+						MAX_PATH_LEN) == 0) {
+				queue = get_unsigned(optarg, 10);
+				printf("queue %d\n", queue);
+			} else if (strncmp(long_option[idx].name, "devcnt",
+						MAX_PATH_LEN) == 0) {
+				devcnt = get_unsigned(optarg, 10);
+				printf("devcnt %d\n", devcnt);
+			} else if (strncmp(long_option[idx].name, "engine",
+						MAX_PATH_LEN) == 0) {
+				strcpy(engine, optarg);
+				printf("engine %s\n", engine);
+			} else if (strncmp(long_option[idx].name, "iface",
+						MAX_PATH_LEN) == 0) {
+				strcpy(iface, optarg);
+				printf("iface %s\n", iface);
+			}
+			break;
+		default:
+			return -1;
+		}
+	}
+
+	return 0;
+}
+
+static int
+register_engine(void)
+{
+	if (strlen(engine) <= 0)
+		return -1;
+
+	engid = rte_vdpa_register_engine(engine, &dev_id);
+
+	return engid;
+}
+
+static int
+unregister_engine(void)
+{
+	if (engid < 0)
+		return -1;
+
+	engid = rte_vdpa_unregister_engine(engid);
+
+	return engid;
+}
+
+static int
+init(void)
+{
+	devcnt = MAX_VDPA_SAMPLE_PORTS;
+	engid = -1;
+	queue = 1;
+	memset(&dev_id, 0, sizeof(dev_id));
+	memset(engine, 0, MAX_PATH_LEN * sizeof(engine[0]));
+	memset(iface, 0, MAX_PATH_LEN * sizeof(iface[0]));
+
+	return 0;
+}
+
+static void
+sigint_handler(__rte_unused int signum)
+{
+	exit(0);
+}
+
+static int
+new_device(int vid)
+{
+	char ifname[MAX_PATH_LEN];
+	int i;
+
+	rte_vhost_get_ifname(vid, ifname, sizeof(ifname));
+	for (i = 0; i < MAX_VDPA_SAMPLE_PORTS; i++) {
+		if (strcmp(ifname, vports[i].ifname) == 0) {
+			vports[i].vid = vid;
+			break;
+		}
+	}
+
+	if (i >= MAX_VDPA_SAMPLE_PORTS)
+		return -1;
+
+	rte_vhost_set_vdpa_eid(vid, vports[i].eid);
+	rte_vhost_set_vdpa_did(vid, vports[i].did);
+
+	return 0;
+}
+
+static void
+destroy_device(int vid)
+{
+	int i;
+
+	for (i = 0; i < MAX_VDPA_SAMPLE_PORTS; i++)
+		if (vid == vports[i].vid) {
+			vports[i].vid = -1;
+			break;
+		}
+}
+
+static const struct vhost_device_ops vdpa_devops = {
+	.new_device = new_device,
+	.destroy_device = destroy_device,
+	.vring_state_changed = NULL,
+	.features_changed = NULL,
+	.new_connection = NULL,
+	.destroy_connection = NULL,
+};
+
+int
+main(int argc, char *argv[])
+{
+	char ifname[MAX_PATH_LEN];
+	char ch;
+	int did, ret, i;
+	uint64_t flags = 0;
+
+	signal(SIGINT, sigint_handler);
+	ret = rte_eal_init(argc, argv);
+	if (ret < 0)
+		rte_exit(EXIT_FAILURE, "eal init failed\n");
+
+	argc -= ret;
+	argv += ret;
+	ret = init();
+	if (ret)
+		rte_exit(EXIT_FAILURE, "app init failed\n");
+
+	ret = parse_args(argc, argv);
+	if (ret < 0)
+		rte_exit(EXIT_FAILURE, "invalid argument\n");
+
+	ret = register_engine();
+	if (ret < 0)
+		rte_exit(EXIT_FAILURE, "register engine failed\n");
+
+	if (devcnt > vdpa_virtio_get_device_num(engid))
+		rte_exit(EXIT_FAILURE, "not enough devices in engine\n");
+
+	for (i = 0; i < devcnt; i++) {
+		sprintf(ifname, "%s%d", iface, i);
+		/* for vdpa devices, need to reserve resource via driver */
+		did = vdpa_virtio_dev_init(engid, ifname);
+		vports[i].eid = engid;
+		vports[i].did = did;
+		strcpy(vports[i].ifname, ifname);
+
+		ret = rte_vhost_driver_register(ifname, flags);
+		if (ret != 0)
+			rte_exit(EXIT_FAILURE,
+					"register driver failed: %s\n",
+					ifname);
+
+		rte_vhost_driver_callback_register(ifname, &vdpa_devops);
+		if (ret != 0)
+			rte_exit(EXIT_FAILURE,
+					"register driver ops failed: %s\n",
+					ifname);
+		/* for vdpa devices, need to set capabilities via vhost lib */
+		rte_vhost_driver_set_queue_num(ifname,
+				RTE_MIN(vdpa_virtio_get_queue_num(engid, did),
+					queue));
+		rte_vhost_driver_set_features(ifname,
+				vdpa_virtio_get_features(engid, did));
+		rte_vhost_driver_set_protocol_features(ifname,
+				vdpa_virtio_get_protocol_features(engid, did));
+
+		if (rte_vhost_driver_start(ifname) < 0)
+			rte_exit(EXIT_FAILURE,
+					"start vhost driver failed: %s\n",
+					ifname);
+
+		/* for vdpa devices, need to start device via driver */
+		vdpa_virtio_dev_start(engid, did);
+	}
+
+	printf("enter \'q\' to quit\n");
+	while (scanf("%c", &ch)) {
+		if (ch == 'q')
+			break;
+		while (ch != '\n')
+			scanf("%c", &ch);
+		printf("enter \'q\' to quit\n");
+	}
+
+	/* for vdpa devices, need to free resources via driver */
+	for (i = 0; i < devcnt; i++) {
+		vdpa_virtio_dev_stop(vports[i].eid, vports[i].did);
+		vdpa_virtio_dev_uninit(vports[i].eid, vports[i].did);
+	}
+
+	ret = unregister_engine();
+
+	return ret;
+}
diff --git a/examples/vdpa/vdpa_virtio_net.c b/examples/vdpa/vdpa_virtio_net.c
new file mode 100644
index 0000000..62ab797
--- /dev/null
+++ b/examples/vdpa/vdpa_virtio_net.c
@@ -0,0 +1,1274 @@ 
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright(c) 2010-2017 Intel Corporation. All rights reserved.
+ *   All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of Intel Corporation nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <stdlib.h>
+#include <unistd.h>
+#include <pthread.h>
+#include <sys/syscall.h>
+#include <linux/pci_regs.h>
+#include <sys/epoll.h>
+#include <sys/eventfd.h>
+#include <sys/socket.h>
+#include <sys/mman.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include <sys/ioctl.h>
+#include <rte_log.h>
+#include <rte_ethdev.h>
+#include <rte_io.h>
+#include <rte_malloc.h>
+#include <rte_memzone.h>
+#include <rte_cycles.h>
+#include "vdpa_virtio_net.h"
+
+#define True 1
+#define False 0
+
+#define PCI_CAPABILITY_LIST	0x34
+#define PCI_CAP_ID_VNDR		0x09
+
+/*
+ * Maximum number of virtqueues per device.
+ */
+#define VIRTIO_MAX_VIRTQUEUES 1
+
+/* Common configuration */
+#define VIRTIO_PCI_CAP_COMMON_CFG	1
+/* Notifications */
+#define VIRTIO_PCI_CAP_NOTIFY_CFG	2
+/* ISR Status */
+#define VIRTIO_PCI_CAP_ISR_CFG		3
+/* Device specific configuration */
+#define VIRTIO_PCI_CAP_DEVICE_CFG	4
+/* PCI configuration access */
+#define VIRTIO_PCI_CAP_PCI_CFG		5
+
+/* The feature bitmap for virtio net */
+#define VIRTIO_NET_F_CSUM	0	/* Host handles pkts w/ partial csum */
+#define VIRTIO_NET_F_GUEST_CSUM	1	/* Guest handles pkts w/ partial csum */
+#define VIRTIO_NET_F_MAC	5	/* Host has given MAC address. */
+#define VIRTIO_NET_F_GUEST_TSO4	7	/* Guest can handle TSOv4 in. */
+#define VIRTIO_NET_F_GUEST_TSO6	8	/* Guest can handle TSOv6 in. */
+#define VIRTIO_NET_F_GUEST_ECN	9	/* Guest can handle TSO w/ ECN in. */
+#define VIRTIO_NET_F_GUEST_UFO	10	/* Guest can handle UFO in. */
+#define VIRTIO_NET_F_HOST_TSO4	11	/* Host can handle TSOv4 in. */
+#define VIRTIO_NET_F_HOST_TSO6	12	/* Host can handle TSOv6 in. */
+#define VIRTIO_NET_F_HOST_ECN	13	/* Host can handle TSO w/ ECN in. */
+#define VIRTIO_NET_F_HOST_UFO	14	/* Host can handle UFO in. */
+#define VIRTIO_NET_F_MRG_RXBUF	15	/* Host can merge receive buffers. */
+#define VIRTIO_NET_F_STATUS	16	/* virtio_net_config.status available */
+#define VIRTIO_NET_F_CTRL_VQ	17	/* Control channel available */
+#define VIRTIO_NET_F_CTRL_RX	18	/* Control channel RX mode support */
+#define VIRTIO_NET_F_CTRL_VLAN	19	/* Control channel VLAN filtering */
+#define VIRTIO_NET_F_CTRL_RX_EXTRA 20	/* Extra RX mode control support */
+#define VIRTIO_NET_F_GUEST_ANNOUNCE 21	/* Guest can announce device on the network */
+#define VIRTIO_NET_F_MQ		22	/* Device supports Receive Flow Steering */
+#define VIRTIO_NET_F_CTRL_MAC_ADDR 23	/* Set MAC address */
+
+#define VIRTIO_F_NOTIFY_ON_EMPTY	24
+
+#define VIRTIO_F_ANY_LAYOUT		27
+
+/* We support indirect buffer descriptors */
+#define VIRTIO_RING_F_INDIRECT_DESC	28
+
+#define VIRTIO_F_VERSION_1		32
+#define VIRTIO_F_IOMMU_PLATFORM	33
+
+/*
+ * Some VirtIO feature bits (currently bits 28 through 31) are
+ * reserved for the transport being used (eg. virtio_ring), the
+ * rest are per-device feature bits.
+ */
+#define VIRTIO_TRANSPORT_F_START 28
+#define VIRTIO_NET_VIRTIO_TRANSPORT_F_END   34
+
+/*
+ * The Guest publishes the used index for which it expects an interrupt
+ * at the end of the avail ring.
+ * The Host publishes the avail index for which it expects a kick
+ * at the end of the used ring.
+ */
+#define VIRTIO_RING_F_EVENT_IDX		29
+
+/* Status byte for driver to report progress. */
+#define VIRTIO_CONFIG_STATUS_RESET     0x00
+#define VIRTIO_CONFIG_STATUS_ACK       0x01
+#define VIRTIO_CONFIG_STATUS_DRIVER    0x02
+#define VIRTIO_CONFIG_STATUS_DRIVER_OK 0x04
+#define VIRTIO_CONFIG_STATUS_FEATURES_OK 0x08
+#define VIRTIO_CONFIG_STATUS_FAILED    0x80
+#define VIRTIO_MSI_NO_VECTOR 0xFFFF
+
+#define VFIO_GET_REGION_ADDR(x) ((uint64_t) x << 40ULL)
+#define MSIX_IRQ_SET_BUF_LEN (sizeof(struct vfio_irq_set) + \
+		sizeof(int) * (32 + 1))
+#define PAGE_SIZE   (sysconf(_SC_PAGESIZE))
+#define PAGE_MASK   (~(PAGE_SIZE - 1))
+
+static int pool_initiated[MAX_VDPA_ENGINE_NUM] = {0};
+static int total_virtio[MAX_VDPA_ENGINE_NUM] = {0};
+
+struct virtio_net_pci virtio_net_pool[MAX_VDPA_ENGINE_NUM][MAX_VDPA_DEVICE_VIRTIO];
+
+void *pci_find_max_end_va(void);
+int pci_get_kernel_driver_by_path(const char *filename, char *dri_name);
+int pci_parse_sysfs_resource(const char *filename, struct rte_pci_device *dev);
+
+int vfio_get_container_fd(void);
+int vfio_get_group_fd(int iommu_group_no);
+int vfio_get_group_no(const char *sysfs_base,
+		const char *dev_addr, int *iommu_group_no);
+
+int pci_vfio_get_msix_bar(int fd, struct pci_msix_table *msix_table);
+int pci_vfio_setup_interrupts(struct rte_pci_device *dev, int vfio_dev_fd);
+int pci_vfio_set_bus_master(int dev_fd, bool op);
+
+extern void *pci_map_addr;
+
+static int
+read_pci_dev(struct rte_pci_device *dev)
+{
+	char filename[PATH_MAX];
+	char dev_dir[PATH_MAX];
+	char driver[PATH_MAX];
+	int ret;
+
+	snprintf(dev_dir, sizeof(dev_dir), "%s/" PCI_PRI_FMT,
+			rte_pci_get_sysfs_path(),
+			dev->addr.domain, dev->addr.bus,
+			dev->addr.devid, dev->addr.function);
+	if (access(dev_dir, R_OK) != 0) {
+		DEBUG("\n%s: %s not exist\n", __func__, dev_dir);
+		return -1;
+	}
+
+	/* parse resources */
+	snprintf(filename, sizeof(filename), "%s/resource", dev_dir);
+	if (pci_parse_sysfs_resource(filename, dev) < 0) {
+		DEBUG("%s(): cannot parse resource\n", __func__);
+		return -1;
+	}
+
+	/* parse driver */
+	snprintf(filename, sizeof(filename), "%s/driver", dev_dir);
+	ret = pci_get_kernel_driver_by_path(filename, driver);
+	if (ret < 0) {
+		DEBUG("Fail to get kernel driver\n");
+		return -1;
+	}
+
+	if (ret > 0 || strcmp(driver, "vfio-pci") != 0) {
+		DEBUG("Kernel driver is not vfio-pci\n");
+		return -1;
+	}
+	return 0;
+}
+
+static inline int invalid_port(int eid, int did)
+{
+	if (did < 0 || did >= total_virtio[eid])
+		return 1;
+	return 0;
+}
+
+static int extract_index(char *path)
+{
+	int i, len, device_id;
+	char *str, *end;
+	len = strlen(path);
+	for (i = len - 1; i >= 0; i--) {
+		if (path[i] == '-')
+			break;
+	}
+	str = &path[i+1];
+	device_id = strtoul(str, &end, 10);
+	if ((str[0] == '\0') || (end == NULL) || (*end != '\0') || (errno != 0))
+		return -1;
+
+	return device_id;
+}
+
+static int virtio_net_alloc_hw_ele(int eid, char *args)
+{
+	int device_id;
+
+	DEBUG("\n%s: the vhost socket path %s\n", __func__, args);
+	device_id = extract_index(args);
+	if (device_id < 0 || device_id >= total_virtio[eid]) {
+		DEBUG("\n%s: device_id %d must be within the interval 0 ~ %d\n",
+				__func__, device_id, total_virtio[eid] - 1);
+		return -1;
+	}
+
+	if (True == virtio_net_pool[eid][device_id].used) {
+		DEBUG("\n%s: device_id %d has been taken already\n",
+				__func__, device_id);
+		return -1;
+	}
+
+	virtio_net_pool[eid][device_id].used = True;
+	return device_id;
+}
+
+static int virtio_net_free_hw_ele(int eid, int did)
+{
+	if (invalid_port(eid, did))
+		return -1;
+
+	virtio_net_pool[eid][did].used = False;
+	return 0;
+}
+
+static void *notify_relay(void *arg)
+{
+	int i, kickfd, epfd, nfds = 0;
+	struct virtio_net *dev = (struct virtio_net *)arg;
+	struct virtio_net_pci *vpci = &virtio_net_pool[dev->eid][dev->did];
+	struct virtio_net_hw *hw = &virtio_net_pool[dev->eid][dev->did].hw;
+	uint32_t n, qid, q_num = dev->nr_vring;
+	struct epoll_event events[q_num];
+	struct epoll_event ev;
+	struct vhost_virtqueue **vq = dev->virtqueue;
+	uint64_t buf;
+	int nbytes;
+
+	epfd = epoll_create(32);
+	vpci->epfd = epfd;
+	for (n = 0; n < dev->nr_vring; n++) {
+		ev.data.u32 = n;
+		ev.events = EPOLLIN | EPOLLPRI;
+		if (epoll_ctl(epfd, EPOLL_CTL_ADD, vq[n]->kickfd, &ev) < 0) {
+			DEBUG("Error epoll add failed, %s\n", strerror(errno));
+			return NULL;
+		}
+	}
+
+	for (;;) {
+		nfds = epoll_wait(epfd, events, q_num, -1);
+		if (nfds < 0) {
+			if (errno == EINTR)
+				continue;
+			DEBUG("epoll_wait return fail\n");
+			return NULL;
+		} else if (nfds == 0)
+			continue;
+
+		for (i = 0; i < nfds; i++) {
+			qid = events[i].data.u32;
+			kickfd = vq[qid]->kickfd;
+
+			do {
+				nbytes = read(kickfd, &buf, 8);
+				if (nbytes < 0) {
+					if (errno == EINTR || errno == EWOULDBLOCK ||
+							errno == EAGAIN)
+						continue;
+					DEBUG("Error reading from kickfd %d: %s\n",
+							kickfd, strerror(errno));
+				} else if (nbytes == 0)
+					DEBUG("Read nothing from kickfd %d\n", kickfd);
+				break;
+			} while (1);
+
+			rte_write16(qid, hw->notify_addr[qid]);
+		}
+	}
+
+	return NULL;
+}
+
+static int setup_notify_relay(struct virtio_net *dev)
+{
+	struct virtio_net_pci *vpci = &virtio_net_pool[dev->eid][dev->did];
+	int ret;
+
+	ret = pthread_create(&vpci->tid, NULL, notify_relay, dev);
+	if (ret != 0) {
+		DEBUG("failed to create notify relay pthread\n");
+		return -1;
+	}
+
+	return 0;
+}
+
+static int unset_notify_relay(struct virtio_net *dev)
+{
+	struct virtio_net_pci *vpci = &virtio_net_pool[dev->eid][dev->did];
+	void *status;
+	if (vpci->tid) {
+		pthread_cancel(vpci->tid);
+		pthread_join(vpci->tid, &status);
+		DEBUG("\n%s device_id %d, cancel relay tid %lu\n", __func__,
+				dev->did, vpci->tid);
+	}
+	vpci->tid = 0;
+
+	if (vpci->epfd >= 0) {
+		close(vpci->epfd);
+		DEBUG("\n%s close epfd %d\n", __func__, vpci->epfd);
+	}
+	vpci->epfd = -1;
+
+	return 0;
+}
+
+static void *
+get_cap_addr(struct rte_pci_device *dev, struct virtio_net_pci_cap *cap)
+{
+	uint8_t bar = cap->bar;
+	uint32_t length = cap->length;
+	uint32_t offset = cap->offset;
+	uint8_t *base;
+
+	if (bar > 5) {
+		DEBUG("invalid bar: %u", bar);
+		return NULL;
+	}
+
+	if (offset + length < offset) {
+		DEBUG("offset(%u) + length(%u) overflows",
+			offset, length);
+		return NULL;
+	}
+
+	if (offset + length > dev->mem_resource[bar].len) {
+		DEBUG("invalid cap: overflows bar space: %u > %" PRIu64,
+			offset + length, dev->mem_resource[bar].len);
+		return NULL;
+	}
+
+	base = dev->mem_resource[bar].addr;
+	if (base == NULL) {
+		DEBUG("bar %u base addr is NULL", bar);
+		return NULL;
+	}
+
+	return base + offset;
+}
+
+static int vfio_setup_device(const char *sysfs_base, const char *dev_addr,
+		int *vfio_dev_fd, struct vfio_device_info *device_info,
+		struct virtio_net_hw *hw)
+{
+	struct vfio_group_status group_status = {
+			.argsz = sizeof(group_status)
+	};
+	int vfio_group_fd;
+	int iommu_group_no;
+	int ret;
+	struct vfio_config *vfio_cfg;
+
+	vfio_cfg = &(hw->vfio_cfg);
+	vfio_cfg->group_fd = -1;
+	vfio_cfg->group_no = -1;
+	vfio_cfg->vfio_container_fd = vfio_get_container_fd();
+
+	/* check if we have VFIO driver enabled */
+	if (vfio_cfg->vfio_container_fd == -1) {
+		DEBUG("VFIO support could not be initialized\n");
+		return -1;
+	}
+
+	/* get group number */
+	ret = vfio_get_group_no(sysfs_base, dev_addr, &iommu_group_no);
+	if (ret <= 0) {
+		DEBUG("%s not managed by VFIO driver\n", dev_addr);
+		return -1;
+	}
+
+	/* get the actual group fd */
+	vfio_group_fd = vfio_get_group_fd(iommu_group_no);
+	DEBUG("\nget group no %u group fd %u\n", iommu_group_no, vfio_group_fd);
+	if (vfio_group_fd <= 0)
+		return -1;
+
+	/* store group fd */
+	vfio_cfg->group_no = iommu_group_no;
+	vfio_cfg->group_fd = vfio_group_fd;
+
+	/* check if the group is viable */
+	ret = ioctl(vfio_group_fd, VFIO_GROUP_GET_STATUS, &group_status);
+	if (ret) {
+		DEBUG("%s cannot get group status, error %i (%s)\n",
+				dev_addr, errno, strerror(errno));
+		close(vfio_group_fd);
+		return -1;
+	} else if (!(group_status.flags & VFIO_GROUP_FLAGS_VIABLE)) {
+		DEBUG("%s VFIO group is not viable!\n", dev_addr);
+		close(vfio_group_fd);
+		return -1;
+	}
+
+	/* check if group does not have a container yet */
+	if (!(group_status.flags & VFIO_GROUP_FLAGS_CONTAINER_SET)) {
+		/* add group to a container */
+		ret = ioctl(vfio_group_fd, VFIO_GROUP_SET_CONTAINER,
+				&vfio_cfg->vfio_container_fd);
+		if (ret) {
+			DEBUG("%s cannot add VFIO group to container, error %i (%s)\n",
+					dev_addr, errno, strerror(errno));
+			close(vfio_group_fd);
+			return -1;
+		}
+		DEBUG("\nvfio_group_fd %u ---> container_fd %u\n",
+				vfio_group_fd, vfio_cfg->vfio_container_fd);
+	}
+
+	ret = ioctl(vfio_cfg->vfio_container_fd, VFIO_SET_IOMMU, VFIO_TYPE1_IOMMU);
+	if (ret) {
+		DEBUG("%s set IOMMU type failed, error %i (%s)\n",
+				dev_addr, errno, strerror(errno));
+		return -1;
+	}
+
+	/* get a file descriptor for the device */
+	*vfio_dev_fd = ioctl(vfio_group_fd, VFIO_GROUP_GET_DEVICE_FD, dev_addr);
+	if (*vfio_dev_fd < 0) {
+		DEBUG("%s not managed by VFIO driver\n", dev_addr);
+		return -1;
+	}
+
+	/* test and setup the device */
+	ret = ioctl(*vfio_dev_fd, VFIO_DEVICE_GET_INFO, device_info);
+	if (ret) {
+		DEBUG("%s cannot get device info, error %i (%s)\n",
+				dev_addr, errno, strerror(errno));
+		close(*vfio_dev_fd);
+		return -1;
+	}
+
+	return 0;
+}
+
+static int
+virtio_net_pci_vfio_map_resource(struct virtio_net_pci *vpci)
+{
+	struct rte_pci_device *pdev = &vpci->pdev;
+	struct vfio_device_info device_info = { .argsz = sizeof(device_info) };
+	char pci_addr[PATH_MAX] = {0};
+	int vfio_dev_fd;
+	struct rte_pci_addr *loc = &pdev->addr;
+	int i, ret, nb_maps;
+
+	uint32_t ioport_bar;
+	struct pci_msix_table msix_table;
+
+	pdev->intr_handle.fd = -1;
+	pdev->intr_handle.type = RTE_INTR_HANDLE_UNKNOWN;
+
+	/* store PCI address string */
+	snprintf(pci_addr, sizeof(pci_addr), PCI_PRI_FMT,
+			loc->domain, loc->bus, loc->devid, loc->function);
+
+	ret = vfio_setup_device(rte_pci_get_sysfs_path(), pci_addr,
+			&vfio_dev_fd, &device_info, &vpci->hw);
+	if (ret)
+		return ret;
+
+	ret = pci_vfio_get_msix_bar(vfio_dev_fd, &msix_table);
+	if (ret < 0) {
+		DEBUG("%s cannot get MSI-X BAR number!\n", pci_addr);
+		close(vfio_dev_fd);
+		return -1;
+	}
+
+	/* get number of regions (up to BAR5) */
+	nb_maps = RTE_MIN((int) device_info.num_regions,
+				VFIO_PCI_BAR5_REGION_INDEX + 1);
+
+	/* map BARs */
+	for (i = 0; i < nb_maps; i++) {
+		struct vfio_region_info reg = { .argsz = sizeof(reg) };
+		void *bar_addr;
+
+		reg.index = i;
+		ret = ioctl(vfio_dev_fd, VFIO_DEVICE_GET_REGION_INFO, &reg);
+
+		if (ret) {
+			DEBUG("%s cannot get device region info error %i (%s)\n",
+					pci_addr, errno, strerror(errno));
+			goto fail;
+		}
+
+		ret = pread(vfio_dev_fd, &ioport_bar, sizeof(ioport_bar),
+				VFIO_GET_REGION_ADDR(VFIO_PCI_CONFIG_REGION_INDEX)
+				+ PCI_BASE_ADDRESS_0 + i * 4);
+
+		if (ret != sizeof(ioport_bar)) {
+			DEBUG("Cannot read command (%x) from config space!\n",
+				PCI_BASE_ADDRESS_0 + i * 4);
+			goto fail;
+		}
+
+		/* check for io port region */
+		if (ioport_bar & PCI_BASE_ADDRESS_SPACE_IO)
+			continue;
+
+		/* skip non-mmapable BARs */
+		if ((reg.flags & VFIO_REGION_INFO_FLAG_MMAP) == 0)
+			continue;
+
+		if (i == msix_table.bar_index)
+			continue;
+
+		/* try mapping somewhere close to the end of hugepages */
+		if (pci_map_addr == NULL)
+			pci_map_addr = pci_find_max_end_va();
+
+		bar_addr = pci_map_addr;
+		pci_map_addr = RTE_PTR_ADD(bar_addr, (size_t) reg.size);
+
+		/* reserve the address using an inaccessible mapping */
+		bar_addr = mmap(bar_addr, reg.size, 0, MAP_PRIVATE |
+				MAP_ANONYMOUS, -1, 0);
+		if (bar_addr != MAP_FAILED) {
+			void *map_addr = NULL;
+			if (reg.size) {
+				map_addr = pci_map_resource(bar_addr, vfio_dev_fd,
+					reg.offset, reg.size, MAP_FIXED);
+			}
+
+			if (map_addr == MAP_FAILED || !map_addr) {
+				munmap(bar_addr, reg.size);
+				bar_addr = MAP_FAILED;
+			}
+		}
+
+		if (bar_addr == MAP_FAILED) {
+			DEBUG("%s mapping BAR%i failed: %s\n", pci_addr, i,
+					strerror(errno));
+			goto fail;
+		}
+		pdev->mem_resource[i].addr = bar_addr;
+	}
+
+	if (pci_vfio_setup_interrupts(pdev, vfio_dev_fd) != 0) {
+		DEBUG("%s error setting up interrupts!\n", pci_addr);
+		goto fail;
+	}
+
+	/* set bus mastering for the device */
+	if (pci_vfio_set_bus_master(vfio_dev_fd, true)) {
+		DEBUG("%s cannot set up bus mastering!\n", pci_addr);
+		goto fail;
+	}
+
+	/* Reset the device */
+	ioctl(vfio_dev_fd, VFIO_DEVICE_RESET);
+	vpci->hw.vfio_cfg.vfio_dev_fd = vfio_dev_fd;
+
+	return 0;
+
+fail:
+	close(vfio_dev_fd);
+	return -1;
+}
+
+/* With vfio-pci, map config space to virtio_net_hw. */
+static int
+virtio_net_map_pci(struct virtio_net_pci *vpci)
+{
+	uint8_t pos;
+	struct virtio_net_pci_cap cap;
+	struct rte_pci_device *dev = &vpci->pdev;
+	struct virtio_net_hw *hw = &vpci->hw;
+	int ret;
+
+	if (virtio_net_pci_vfio_map_resource(vpci)) {
+		DEBUG("failed to map pci device!\n");
+		return -1;
+	}
+
+	ret = rte_pci_read_config(dev, &pos, 1, PCI_CAPABILITY_LIST);
+	if (ret < 0) {
+		DEBUG("failed to read pci capability list\n");
+		return -1;
+	}
+
+	while (pos) {
+		ret = rte_pci_read_config(dev, &cap, sizeof(cap), pos);
+		if (ret < 0) {
+			DEBUG("failed to read pci cap at pos: %x", pos);
+			break;
+		}
+
+		if (cap.cap_vndr != PCI_CAP_ID_VNDR)
+			goto next;
+
+		DEBUG("[%2x] cfg type: %u, bar: %u, offset: %04x, len: %u\n",
+			pos, cap.cfg_type, cap.bar, cap.offset, cap.length);
+
+		switch (cap.cfg_type) {
+		case VIRTIO_PCI_CAP_COMMON_CFG:
+			hw->common_cfg = get_cap_addr(dev, &cap);
+			break;
+		case VIRTIO_PCI_CAP_NOTIFY_CFG:
+			rte_pci_read_config(dev, &hw->notify_off_multiplier,
+						4, pos + sizeof(cap));
+			hw->notify_base = get_cap_addr(dev, &cap);
+			break;
+		case VIRTIO_PCI_CAP_DEVICE_CFG:
+			hw->dev_cfg = get_cap_addr(dev, &cap);
+			break;
+		case VIRTIO_PCI_CAP_ISR_CFG:
+			hw->isr = get_cap_addr(dev, &cap);
+			break;
+		}
+
+next:
+		pos = cap.cap_next;
+	}
+
+	if (hw->common_cfg == NULL || hw->notify_base == NULL ||
+			hw->dev_cfg == NULL || hw->isr == NULL) {
+		DEBUG("no modern virtio pci device found.\n");
+		return -1;
+	}
+
+	DEBUG("capability mapping:\ncommon cfg: %p\ndevice cfg: %p\n"
+			"isr cfg: %p\nnotify base: %p\nmultiplier: %u\n",
+			hw->common_cfg, hw->dev_cfg,
+			hw->isr, hw->notify_base, hw->notify_off_multiplier);
+
+	return 0;
+}
+
+static uint8_t
+virtio_net_get_status(struct virtio_net_hw *hw)
+{
+	return rte_read8(&hw->common_cfg->device_status);
+}
+
+static void
+virtio_net_set_status(struct virtio_net_hw *hw, uint8_t status)
+{
+	rte_write8(status, &hw->common_cfg->device_status);
+}
+
+static void
+virtio_net_vtpci_reset(struct virtio_net_hw *hw)
+{
+	virtio_net_set_status(hw, VIRTIO_CONFIG_STATUS_RESET);
+	/* flush status write */
+	while (virtio_net_get_status(hw))
+		rte_delay_ms(1);
+}
+
+static void
+virtio_net_vtpci_set_status(struct virtio_net_hw *hw, uint8_t status)
+{
+	if (status != VIRTIO_CONFIG_STATUS_RESET)
+		status |= virtio_net_get_status(hw);
+
+	virtio_net_set_status(hw, status);
+	virtio_net_get_status(hw);
+}
+
+static uint64_t
+virtio_net_get_features(struct virtio_net_hw *hw)
+{
+	uint32_t features_lo, features_hi;
+	struct virtio_net_pci_common_cfg *cfg = hw->common_cfg;
+
+	rte_write32(0, &cfg->device_feature_select);
+	features_lo = rte_read32(&cfg->device_feature);
+
+	rte_write32(1, &cfg->device_feature_select);
+	features_hi = rte_read32(&cfg->device_feature);
+
+	return ((uint64_t)features_hi << 32) | features_lo;
+}
+
+static void
+virtio_net_set_features(struct virtio_net_hw *hw, uint64_t features)
+{
+	struct virtio_net_pci_common_cfg *cfg = hw->common_cfg;
+
+	/** vIOMMU to support this virtio device used as vDPA device **/
+	features |= (1ULL << VIRTIO_F_IOMMU_PLATFORM);
+
+	rte_write32(0, &cfg->guest_feature_select);
+	rte_write32(features & ((1ULL << 32) - 1), &cfg->guest_feature);
+
+	rte_write32(1, &cfg->guest_feature_select);
+	rte_write32(features >> 32, &cfg->guest_feature);
+}
+
+static int
+virtio_net_negotiate_features(struct virtio_net_hw *hw, uint64_t req_features)
+{
+	uint64_t host_features;
+
+	DEBUG("%s: qemu and guest negotiated feature: 0x%lx\n",
+			__func__, req_features);
+
+	/* Read device(host) feature bits */
+	host_features = virtio_net_get_features(hw);
+	DEBUG("%s: VIRTIO_NET device supported feature: 0x%lx\n",
+			__func__, host_features);
+
+	hw->req_guest_features = req_features;
+	hw->guest_features = req_features & host_features;
+	virtio_net_set_features(hw, hw->guest_features);
+	DEBUG("%s: VIRTIO_NET device configed feature: 0x%lx\n",
+			__func__, hw->guest_features);
+
+	virtio_net_vtpci_set_status(hw, VIRTIO_CONFIG_STATUS_FEATURES_OK);
+	if (!(virtio_net_get_status(hw) & VIRTIO_CONFIG_STATUS_FEATURES_OK)) {
+		DEBUG("failed to set FEATURES_OK status!\n");
+		return -1;
+	}
+
+	return 0;
+}
+
+static inline void
+virtio_net_io_write64_twopart(uint64_t val, uint32_t *lo, uint32_t *hi)
+{
+	rte_write32(val & ((1ULL << 32) - 1), lo);
+	rte_write32(val >> 32, hi);
+}
+
+static inline uint64_t qva_to_gpa(struct virtio_net *dev, uint64_t qva)
+{
+	struct rte_vhost_mem_region *reg;
+	uint32_t i;
+	uint64_t gpa = 0;
+
+	for (i = 0; i < dev->mem->nregions; i++) {
+		reg = &dev->mem->regions[i];
+
+		if (qva >= reg->host_user_addr &&
+				qva < reg->host_user_addr + reg->size) {
+			gpa = qva - reg->host_user_addr + reg->guest_phys_addr;
+		}
+	}
+
+	if (gpa == 0)
+		rte_panic("failed to get gpa\n");
+
+	return gpa;
+}
+
+static int virtio_net_config_queues(struct virtio_net *dev)
+{
+	struct virtio_net_hw *hw = &virtio_net_pool[dev->eid][dev->did].hw;
+	struct virtio_net_pci_common_cfg *cfg = hw->common_cfg;
+	struct vhost_virtqueue *vq;
+	uint64_t desc_addr, avail_addr, used_addr;
+	uint32_t i;
+	uint16_t notify_off;
+
+	for (i = 0; i < dev->nr_vring; i++) {
+		vq = dev->virtqueue[i];
+		desc_addr = qva_to_gpa(dev, (uint64_t)vq->desc);
+		avail_addr = qva_to_gpa(dev, (uint64_t)vq->avail);
+		used_addr = qva_to_gpa(dev, (uint64_t)vq->used);
+
+		rte_write16(i, &cfg->queue_select);
+		virtio_net_io_write64_twopart(desc_addr, &cfg->queue_desc_lo,
+				&cfg->queue_desc_hi);
+		virtio_net_io_write64_twopart(avail_addr, &cfg->queue_avail_lo,
+				&cfg->queue_avail_hi);
+		virtio_net_io_write64_twopart(used_addr, &cfg->queue_used_lo,
+				&cfg->queue_used_hi);
+		rte_write16((uint16_t)vq->size, &cfg->queue_size);
+
+		notify_off = rte_read16(&cfg->queue_notify_off);
+		hw->notify_addr[i] = (void *)((uint8_t *)hw->notify_base +
+				notify_off * hw->notify_off_multiplier);
+		rte_write16(1, &cfg->queue_enable);
+
+		DEBUG("queue %u addresses:\n"
+				"desc_addr: 0x%lx\tavail_addr: 0x%lx\tused_addr: 0x%lx\n"
+				"queue size: %u\t\tnotify addr: %p\tnotify offset: %u\n",
+				i, desc_addr, avail_addr, used_addr,
+				vq->size, hw->notify_addr[i], notify_off);
+	}
+
+	return 0;
+}
+
+static int virtio_net_config_irqs(struct virtio_net *dev)
+{
+	uint32_t i;
+	struct virtio_net_hw *hw = &virtio_net_pool[dev->eid][dev->did].hw;
+	struct virtio_net_pci_common_cfg *cfg = hw->common_cfg;
+
+	rte_write16(0, &cfg->msix_config);
+	if (rte_read16(&cfg->msix_config) == VIRTIO_MSI_NO_VECTOR) {
+		DEBUG("For LSC, allocate msix vec failed\n");
+		return -1;
+	}
+
+	for (i = 0; i < dev->nr_vring; i++) {
+		rte_write16(i, &cfg->queue_select);
+		rte_write16(i + 1, &cfg->queue_msix_vector);
+		if (rte_read16(&cfg->queue_msix_vector) == VIRTIO_MSI_NO_VECTOR) {
+			DEBUG("queue id %u, allocate msix vec failed\n", i);
+			return -1;
+		}
+	}
+	DEBUG("\n%s config irqs OK, num of queues %u\n", __func__, i);
+	return 0;
+}
+
+static void virtio_net_stop_queues(struct virtio_net *dev)
+{
+	uint32_t i;
+	struct virtio_net_hw *hw = &virtio_net_pool[dev->eid][dev->did].hw;
+	struct virtio_net_pci_common_cfg *cfg = hw->common_cfg;
+
+	rte_write16(VIRTIO_MSI_NO_VECTOR, &cfg->msix_config);
+	for (i = 0; i < dev->nr_vring; i++) {
+		rte_write16(i, &cfg->queue_select);
+		rte_write16(0, &cfg->queue_enable);
+		rte_write16(VIRTIO_MSI_NO_VECTOR, &cfg->queue_msix_vector);
+	}
+}
+
+static int virtio_net_enable_vfio_intr(struct virtio_net *dev)
+{
+	int ret;
+	uint32_t i, len;
+	char irq_set_buf[MSIX_IRQ_SET_BUF_LEN];
+	struct vfio_irq_set *irq_set;
+	int *fd_ptr;
+	struct virtio_net_pci *vpci;
+
+	vpci = &virtio_net_pool[dev->eid][dev->did];
+	len = sizeof(irq_set_buf);
+	irq_set = (struct vfio_irq_set *) irq_set_buf;
+	irq_set->argsz = len;
+	irq_set->count = dev->nr_vring + 1;
+	irq_set->flags = VFIO_IRQ_SET_DATA_EVENTFD | VFIO_IRQ_SET_ACTION_TRIGGER;
+	irq_set->index = VFIO_PCI_MSIX_IRQ_INDEX;
+	irq_set->start = 0;
+	fd_ptr = (int *) &irq_set->data;
+	fd_ptr[RTE_INTR_VEC_ZERO_OFFSET] = vpci->pdev.intr_handle.fd;
+
+	DEBUG("\n%s device_id %d LSC fd %u, vfio_dev_fd %u\n", __func__,
+			dev->did, vpci->pdev.intr_handle.fd,
+			vpci->pdev.intr_handle.vfio_dev_fd);
+	for (i = 0; i < dev->nr_vring; i++)
+		fd_ptr[RTE_INTR_VEC_RXTX_OFFSET + i] = dev->virtqueue[i]->callfd;
+
+	ret = ioctl(vpci->pdev.intr_handle.vfio_dev_fd,
+			VFIO_DEVICE_SET_IRQS, irq_set);
+
+	if (ret) {
+		DEBUG("Error enabling MSI-X interrupts, dev id %u\n", dev->did);
+		return -1;
+	}
+
+	return 0;
+}
+
+static int virtio_net_disable_vfio_intr(struct virtio_net *dev)
+{
+	int len, ret;
+	char irq_set_buf[MSIX_IRQ_SET_BUF_LEN];
+	struct vfio_irq_set *irq_set;
+	struct virtio_net_pci *vpci;
+
+	vpci = &virtio_net_pool[dev->eid][dev->did];
+	len = sizeof(irq_set_buf);
+	irq_set = (struct vfio_irq_set *) irq_set_buf;
+	irq_set->argsz = len;
+	irq_set->count = 0;
+	irq_set->flags = VFIO_IRQ_SET_DATA_NONE | VFIO_IRQ_SET_ACTION_TRIGGER;
+	irq_set->index = VFIO_PCI_MSIX_IRQ_INDEX;
+	irq_set->start = 0;
+
+	ret = ioctl(vpci->pdev.intr_handle.vfio_dev_fd,
+			VFIO_DEVICE_SET_IRQS, irq_set);
+	if (ret) {
+		DEBUG("Error disabling MSI-X interrupts, dev id %u\n", dev->did);
+		return -1;
+	}
+
+	return 0;
+}
+
+static int virtio_net_conf_pci(struct virtio_net *dev)
+{
+	struct virtio_net_hw *hw = &virtio_net_pool[dev->eid][dev->did].hw;
+	uint64_t features = dev->features;
+
+	/* Reset the device although not necessary at startup. */
+	virtio_net_vtpci_reset(hw);
+
+	/* Tell the host we've noticed this device. */
+	virtio_net_vtpci_set_status(hw, VIRTIO_CONFIG_STATUS_ACK);
+
+	/* Tell the host we've known how to drive the device. */
+	virtio_net_vtpci_set_status(hw, VIRTIO_CONFIG_STATUS_DRIVER);
+
+	if (virtio_net_negotiate_features(hw, features) < 0)
+		return -1;
+
+	virtio_net_config_queues(dev);
+	virtio_net_config_irqs(dev);
+	virtio_net_vtpci_set_status(hw, VIRTIO_CONFIG_STATUS_DRIVER_OK);
+	return 0;
+}
+
+static int
+virtio_net_pci_dma_map(int vfio_container_fd, struct virtio_net *vdev)
+{
+	uint32_t i, ret;
+	struct rte_vhost_memory *mem = vdev->mem;
+
+	for (i = 0; i < mem->nregions; i++) {
+		struct vfio_iommu_type1_dma_map dma_map;
+		struct rte_vhost_mem_region *reg;
+		reg = &mem->regions[i];
+
+		DEBUG("\n%s device_id %d vfio_container_fd %d, %u th regison, total %u region\n",
+				__func__, vdev->did, vfio_container_fd,
+				i, mem->nregions);
+		DEBUG("region %u host_user_addr 0x%lx, guest_phys_addr 0x%lx, size 0x%0lx\n",
+				i, reg->host_user_addr,
+				reg->guest_phys_addr, reg->size);
+
+		memset(&dma_map, 0, sizeof(dma_map));
+		dma_map.argsz = sizeof(struct vfio_iommu_type1_dma_map);
+		dma_map.vaddr = reg->host_user_addr;
+		dma_map.size = reg->size;
+		dma_map.iova = reg->guest_phys_addr;
+		dma_map.flags = VFIO_DMA_MAP_FLAG_READ | VFIO_DMA_MAP_FLAG_WRITE;
+
+		ret = ioctl(vfio_container_fd, VFIO_IOMMU_MAP_DMA, &dma_map);
+		if (ret) {
+			DEBUG(" cannot set up DMA remapping, error %i (%s)\n",
+					errno, strerror(errno));
+			return -1;
+		}
+	}
+
+	return 0;
+}
+
+static int
+virtio_net_pci_dma_unmap(int vfio_container_fd, struct virtio_net *vdev)
+{
+	uint32_t i, ret;
+	struct rte_vhost_memory *mem = vdev->mem;
+
+	/* VM start fails */
+	if (mem == NULL)
+		return 0;
+
+	for (i = 0; i < mem->nregions; i++) {
+		struct vfio_iommu_type1_dma_unmap dma_unmap;
+		struct rte_vhost_mem_region *reg;
+		reg = &mem->regions[i];
+
+		DEBUG("region %u host_user_addr 0x%lx, guest_phys_addr 0x%lx, size 0x%0lx\n",
+				i, reg->host_user_addr,
+				reg->guest_phys_addr, reg->size);
+
+		memset(&dma_unmap, 0, sizeof(dma_unmap));
+		dma_unmap.argsz = sizeof(struct vfio_iommu_type1_dma_unmap);
+		dma_unmap.size = reg->size;
+		dma_unmap.iova = reg->guest_phys_addr;
+		dma_unmap.flags = 0;
+
+		ret = ioctl(vfio_container_fd, VFIO_IOMMU_UNMAP_DMA, &dma_unmap);
+
+		if (ret) {
+			DEBUG(" cannot unset DMA remapping, error %i (%s)\n",
+					errno, strerror(errno));
+			return -1;
+		}
+	}
+
+	return 0;
+}
+
+static int disable_device(struct virtio_net *dev)
+{
+	struct virtio_net_hw *hw = &virtio_net_pool[dev->eid][dev->did].hw;
+
+	virtio_net_stop_queues(dev);
+	virtio_net_vtpci_reset(hw);
+
+	return 0;
+}
+
+static int virtio_net_dev_config(struct virtio_net *dev)
+{
+	if (!dev || invalid_port(dev->eid, dev->did)) {
+		DEBUG("Invalid virtio_net struct\n");
+		return -1;
+	}
+
+	int ret;
+	struct rte_pci_device *pdev = &virtio_net_pool[dev->eid][dev->did].pdev;
+	struct virtio_net_hw *hw = &virtio_net_pool[dev->eid][dev->did].hw;
+
+	DEBUG("Try to config device: dev id %u bus %02x device %02x function %0x\n",
+			dev->did, pdev->addr.bus,
+			pdev->addr.devid, pdev->addr.function);
+
+	DEBUG("\ndevice_id %d mapped, set DMAR\n", dev->did);
+	ret = virtio_net_pci_dma_map(hw->vfio_cfg.vfio_container_fd, dev);
+	if (ret) {
+		DEBUG("device_id %u DMA remapping failed, error %i (%s)\n",
+				dev->did, errno, strerror(errno));
+		return -1;
+	}
+
+	DEBUG("\ndevice_id %d DMAR set, config it\n", dev->did);
+	ret = virtio_net_conf_pci(dev);
+	if (ret)
+		return -1;
+
+	ret = virtio_net_enable_vfio_intr(dev);
+	if (ret)
+		return -1;
+
+	ret = setup_notify_relay(dev);
+	if (ret)
+		return -1;
+
+	return 0;
+}
+
+static int virtio_net_dev_close(struct virtio_net *dev)
+{
+	int ret;
+	struct virtio_net_pci *vpci = &virtio_net_pool[dev->eid][dev->did];
+	int vfio_container_fd = vpci->hw.vfio_cfg.vfio_container_fd;
+
+	if (!dev || invalid_port(dev->eid, dev->did)) {
+		DEBUG("Invalid virtio_net struct\n");
+		return -1;
+	}
+
+	disable_device(dev);
+	unset_notify_relay(dev);
+	ret = virtio_net_disable_vfio_intr(dev);
+	if (ret < 0)
+		return -1;
+
+	DEBUG("\n%s: unset DMAR for device_id %d\n", __func__, dev->did);
+	ret = virtio_net_pci_dma_unmap(vfio_container_fd, dev);
+	if (ret) {
+		DEBUG("device_id %u DMA reset DMAR failed, error %i (%s)\n",
+				dev->did, errno, strerror(errno));
+		return -1;
+	}
+
+	return 0;
+}
+
+static int virtio_net_pool_init(int eid)
+{
+	uint32_t i, ret;
+	struct virtio_net_pci *vpci;
+	struct rte_pci_device *pdev;
+	struct rte_pci_addr *eng_addr;
+	char dev_dir[PATH_MAX];
+
+	eng_addr = &(vdpa_engines[eid]->eng_attr.id->pci_addr);
+	snprintf(dev_dir, sizeof(dev_dir), "%s/" PCI_PRI_FMT,
+			rte_pci_get_sysfs_path(),
+			eng_addr->domain, eng_addr->bus,
+			eng_addr->devid, eng_addr->function);
+
+	if (access(dev_dir, R_OK) != 0) {
+		DEBUG("%s: "PCI_PRI_FMT" does not exist\n", __func__,
+				eng_addr->domain, eng_addr->bus,
+				eng_addr->devid, eng_addr->function);
+		return -1;
+	}
+
+	memset((char *)virtio_net_pool[eid], 0, sizeof(virtio_net_pool[0]));
+
+	for (i = 0; i < MAX_VDPA_DEVICE_VIRTIO; i++) {
+		pdev = &virtio_net_pool[eid][i].pdev;
+		pdev->addr.domain = eng_addr->domain;
+		pdev->addr.bus = eng_addr->bus;
+		pdev->addr.devid = eng_addr->devid;
+		pdev->addr.function = eng_addr->function;
+		/*
+		 * Assume that the virtio-net-pci listed is like
+		 * 00:03.0, 00:04.0, 00:05,0, and so on.
+		 */
+		pdev->addr.devid += i % 8;
+		if (read_pci_dev(pdev) < 0) {
+			DEBUG("Read PCI device failed, dev id %d\n", i);
+			errno = 0;
+			break;
+		}
+		DEBUG("%s: detected "PCI_PRI_FMT"\n", __func__,
+				pdev->addr.domain, pdev->addr.bus,
+				pdev->addr.devid, pdev->addr.function);
+
+		/* Take control of an device by mapping it with vfio. */
+		vpci = &virtio_net_pool[eid][i];
+		ret = virtio_net_map_pci(vpci);
+		if (ret) {
+			DEBUG("\npci map to userspace failed\n");
+			break;
+		}
+	}
+	total_virtio[eid] = i;
+
+	if (total_virtio[eid] <= 0) {
+		DEBUG("\n%s: find no virtio devices\n", __func__);
+		return -1;
+	}
+
+	pool_initiated[eid] = 1;
+	return 0;
+}
+
+static int virtio_net_pool_uninit(int eid)
+{
+	int i;
+	struct vfio_config *vfio_cfg;
+
+	for (i = 0; i < total_virtio[eid]; i++) {
+		vfio_cfg = &virtio_net_pool[eid][i].hw.vfio_cfg;
+		close(vfio_cfg->vfio_dev_fd);
+		close(vfio_cfg->group_fd);
+		close(vfio_cfg->vfio_container_fd);
+	}
+
+	total_virtio[eid] = 0;
+	pool_initiated[eid] = 0;
+	memset((char *)virtio_net_pool[eid], 0, sizeof(virtio_net_pool[0]));
+
+	return 0;
+}
+
+static int vdpa_virtio_eng_init(int eid,
+		struct rte_vdpa_eng_id *id __rte_unused)
+{
+	if (!pool_initiated[eid] && virtio_net_pool_init(eid) == 0)
+		return 0;
+	return -1;
+}
+
+static int vdpa_virtio_eng_uninit(int eid __rte_unused)
+{
+	if (pool_initiated[eid] && virtio_net_pool_uninit(eid) == 0)
+		return 0;
+	return -1;
+}
+
+static int vdpa_virtio_dev_conf(int vid)
+{
+	struct virtio_net *dev = get_device(vid);
+
+	return virtio_net_dev_config(dev);
+}
+
+static int vdpa_virtio_dev_close(int vid)
+{
+	struct virtio_net *dev = get_device(vid);
+
+	return virtio_net_dev_close(dev);
+}
+
+int vdpa_virtio_get_device_num(int eid __rte_unused)
+{
+	/* Assume we have MAX_VDPA_DEVICE_VIRTIO virtio_net_pci devices */
+	return MAX_VDPA_DEVICE_VIRTIO;
+}
+
+int vdpa_virtio_get_queue_num(int eid __rte_unused, int did __rte_unused)
+{
+	return MAX_QUEUES_VIRTIO;
+}
+
+#define VDPA_SUPPORTED_FEATURES \
+		((1ULL << VIRTIO_F_ANY_LAYOUT) | \
+		(1ULL << VIRTIO_F_VERSION_1) | \
+		(1ULL << VIRTIO_NET_F_MRG_RXBUF) | \
+		(1ULL << VHOST_USER_F_PROTOCOL_FEATURES))
+
+uint64_t vdpa_virtio_get_features(int eid __rte_unused, int did __rte_unused)
+{
+	return VDPA_SUPPORTED_FEATURES;
+}
+
+#define VDPA_SUPPORTED_PROTOCOL_FEATURES \
+		((1ULL << VHOST_USER_PROTOCOL_F_LOG_SHMFD) |\
+		(1ULL << VHOST_USER_PROTOCOL_F_REPLY_ACK))
+
+uint64_t vdpa_virtio_get_protocol_features(int eid __rte_unused,
+		int did __rte_unused)
+{
+	return VDPA_SUPPORTED_PROTOCOL_FEATURES;
+}
+
+int vdpa_virtio_dev_init(int eid, char *args)
+{
+	return virtio_net_alloc_hw_ele(eid, args);
+}
+
+int vdpa_virtio_dev_uninit(int eid, int did)
+{
+	return virtio_net_free_hw_ele(eid, did);
+}
+
+int vdpa_virtio_dev_start(int eid __rte_unused, int did __rte_unused)
+{
+	return 0;
+}
+
+int vdpa_virtio_dev_stop(int eid __rte_unused, int did __rte_unused)
+{
+	return 0;
+}
+
+struct rte_vdpa_eng_driver vdpa_virtio_net_driver = {
+	.name = "vdpa_virtio_net",
+	.eng_ops = {
+		.eng_init = vdpa_virtio_eng_init,
+		.eng_uninit = vdpa_virtio_eng_uninit,
+	},
+	.dev_ops = {
+		.dev_conf = vdpa_virtio_dev_conf,
+		.dev_close = vdpa_virtio_dev_close,
+		.vring_state_set = NULL,
+		.migration_done = NULL,
+	},
+};
+
+RTE_VDPA_REGISTER_DRIVER(vdpa_virtio_net, vdpa_virtio_net_driver);
diff --git a/examples/vdpa/vdpa_virtio_net.h b/examples/vdpa/vdpa_virtio_net.h
new file mode 100644
index 0000000..b0a386a
--- /dev/null
+++ b/examples/vdpa/vdpa_virtio_net.h
@@ -0,0 +1,144 @@ 
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright(c) 2010-2017 Intel Corporation. All rights reserved.
+ *   All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of Intel Corporation nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef _VDPA_VIRTIO_NET_H_
+#define _VDPA_VIRTIO_NET_H
+#include <stdint.h>
+#include <stdbool.h>
+#include <pthread.h>
+#include <linux/vfio.h>
+#include <rte_bus_pci.h>
+#include <rte_vdpa.h>
+#include <vhost.h>
+#include <vhost_user.h>
+
+#define MAX_VDPA_DEVICE_VIRTIO	8
+#define MAX_QUEUES_VIRTIO	1
+
+/* This is the PCI capability header: */
+struct virtio_net_pci_cap {
+	uint8_t cap_vndr;    /* Generic PCI field: PCI_CAP_ID_VNDR */
+	uint8_t cap_next;    /* Generic PCI field: next ptr. */
+	uint8_t cap_len;     /* Generic PCI field: capability length */
+	uint8_t cfg_type;    /* Identifies the structure. */
+	uint8_t bar;         /* Where to find it. */
+	uint8_t padding[3];  /* Pad to full dword. */
+	uint32_t offset;     /* Offset within bar. */
+	uint32_t length;     /* Length of the structure, in bytes. */
+};
+
+struct virtio_net_pci_notify_cap {
+	struct virtio_net_pci_cap cap;
+	uint32_t notify_off_multiplier;	/* Multiplier for queue_notify_off. */
+};
+
+/* Fields in VIRTIO_PCI_CAP_COMMON_CFG: */
+struct virtio_net_pci_common_cfg {
+	/* About the whole device. */
+	uint32_t device_feature_select;	/* read-write */
+	uint32_t device_feature;	/* read-only */
+	uint32_t guest_feature_select;	/* read-write */
+	uint32_t guest_feature;		/* read-write */
+	uint16_t msix_config;		/* read-write */
+	uint16_t num_queues;		/* read-only */
+	uint8_t device_status;		/* read-write */
+	uint8_t config_generation;	/* read-only */
+
+	/* About a specific virtqueue. */
+	uint16_t queue_select;		/* read-write */
+	uint16_t queue_size;		/* read-write, power of 2. */
+	uint16_t queue_msix_vector;	/* read-write */
+	uint16_t queue_enable;		/* read-write */
+	uint16_t queue_notify_off;	/* read-only */
+	uint32_t queue_desc_lo;		/* read-write */
+	uint32_t queue_desc_hi;		/* read-write */
+	uint32_t queue_avail_lo;	/* read-write */
+	uint32_t queue_avail_hi;	/* read-write */
+	uint32_t queue_used_lo;		/* read-write */
+	uint32_t queue_used_hi;		/* read-write */
+};
+
+struct virtio_net_net_config {
+	/* The config defining mac address (if VIRTIO_NET_F_MAC) */
+	uint8_t    mac[ETHER_ADDR_LEN];
+	/* See VIRTIO_NET_F_STATUS and VIRTIO_NET_S_* above */
+	uint16_t   status;
+	uint16_t   max_virtqueue_pairs;
+} __attribute__((packed));
+
+struct vfio_config {
+	int vfio_dev_fd;
+	int vfio_container_fd;
+	int group_no;
+	int group_fd;
+};
+
+struct virtio_net_hw {
+	struct virtnet_ctl *cvq;
+	uint64_t    req_guest_features;
+	uint64_t    guest_features;
+	uint32_t    notify_off_multiplier;
+	uint8_t     *isr;
+	uint16_t    *notify_base;
+	struct virtio_net_pci_common_cfg *common_cfg;
+	struct virtio_net_net_config *dev_cfg;
+	uint16_t    *notify_addr[MAX_QUEUES_VIRTIO * 2];
+	struct vfio_config vfio_cfg;
+};
+
+struct virtio_net_pci {
+	bool used;
+	struct rte_pci_device pdev;
+	struct virtio_net_hw hw;
+	pthread_t tid;	/* thread for notify relay */
+	int epfd;
+};
+
+#define RTE_LIBRTE_VHOST_VIRTIO_NET_DEBUG
+#ifdef RTE_LIBRTE_VHOST_VIRTIO_NET_DEBUG
+	#define DEBUG(fmt, ...) printf(fmt, ##__VA_ARGS__)
+#else
+	#define DEBUG(fmt, ...) do {} while (0)
+#endif
+
+int vdpa_virtio_get_device_num(int eid);
+int vdpa_virtio_get_queue_num(int eid, int did);
+uint64_t vdpa_virtio_get_features(int eid, int did);
+uint64_t vdpa_virtio_get_protocol_features(int eid, int did);
+int vdpa_virtio_dev_init(int eid, char *args);
+int vdpa_virtio_dev_uninit(int eid, int did);
+int vdpa_virtio_dev_start(int eid, int did);
+int vdpa_virtio_dev_stop(int eid, int did);
+
+#endif /* _VDPA_VIRTIO_NET_H_ */