[dpdk-dev,3/3] examples/vdpa: add a new sample for vdpa

Message ID 20180204145542.38345-4-xiao.w.wang@intel.com (mailing list archive)
State Superseded, archived
Delegated to: Maxime Coquelin
Headers

Checks

Context Check Description
ci/checkpatch warning coding style issues
ci/Intel-compilation fail Compilation issues

Commit Message

Xiao Wang Feb. 4, 2018, 2:55 p.m. UTC
  This patch adds a sample which creates vhost-user socket based on
vdpa driver. vdpa driver can help to set up vhost datapath so this
app doesn't need to spend a dedicated worker thread on vhost
enqueue/dequeue operations.

Below are setup steps for your reference:

1. Make sure your kernnel vhost module and QEMU support vIOMMU.
   - OS: CentOS 7.4
   - QEMU: 2.10.1
   - Guest OS: CentOS 7.2
   - Nested VM OS: CentOS 7.2

2. enable VT-x feature for vCPU in VM.
   modprobe kvm_intel nested=1

3. Start a VM with a virtio-net-pci device.
   ./qemu-2.10.1/x86_64-softmmu/qemu-system-x86_64 -enable-kvm -cpu host \
   <snip>
   -machine q35 \
   -device intel-iommu \
   -netdev tap,id=mytap,ifname=vdpa,vhostforce=on \
   -device virtio-net-pci,netdev=mytap,mac=00:aa:bb:cc:dd:ee,\
   disable-modern=off,disable-legacy=on,iommu_platform=on \

4. Bind VFIO-pci to virtio_net_pci device
   a) login to VM;
   b) modprobe vfio-pci
   c) rmmod vfio_iommu_type1
   d) modprobe vfio_iommu_type1 allow_unsafe_interrupts=1
   e) ./usertools/dpdk-devbind.py -b vfio-pci 00:03.0

5. Start vdpa sample
   ./examples/vdpa/build/vdpa -c 0x2 -n 4 --socket-mem 1024 --no-pci \
    --vdev "net_vdpa_virtio_pci0,bdf=0000:00:03.0" -- --bdf 0000:00:03.0 \
    --iface /tmp/vhost-user- --devcnt 1  --queue 1

6. Start nested VM
   ./qemu-2.10.1/x86_64-softmmu/qemu-system-x86_64 -cpu host -enable-kvm \
   <snip>
   -mem-prealloc \
   -chardev socket,id=char0,path=/tmp/vhost-user-0 \
   -netdev type=vhost-user,id=vdpa,chardev=char0,vhostforce \
   -device virtio-net-pci,netdev=vdpa,mac=00:aa:bb:cc:dd:ee \

7. Login the nested VM, and verify the virtio in nested VM can communicate
   with tap device on the host.

Signed-off-by: Xiao Wang <xiao.w.wang@intel.com>
---
 examples/vdpa/Makefile |  32 ++++
 examples/vdpa/main.c   | 387 +++++++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 419 insertions(+)
 create mode 100644 examples/vdpa/Makefile
 create mode 100644 examples/vdpa/main.c
  

Patch

diff --git a/examples/vdpa/Makefile b/examples/vdpa/Makefile
new file mode 100644
index 000000000..42672a2bc
--- /dev/null
+++ b/examples/vdpa/Makefile
@@ -0,0 +1,32 @@ 
+# SPDX-License-Identifier: BSD-3-Clause
+# Copyright(c) 2018 Intel Corporation
+
+ifeq ($(RTE_SDK),)
+$(error "Please define RTE_SDK environment variable")
+endif
+
+# Default target, can be overridden by command line or environment
+RTE_TARGET ?= x86_64-native-linuxapp-gcc
+
+include $(RTE_SDK)/mk/rte.vars.mk
+
+ifneq ($(CONFIG_RTE_EXEC_ENV),"linuxapp")
+$(info This application can only operate in a linuxapp environment, \
+please change the definition of the RTE_TARGET environment variable)
+all:
+else
+
+# binary name
+APP = vdpa
+
+# all source are stored in SRCS-y
+SRCS-y := main.c
+
+CFLAGS += -O2 -D_FILE_OFFSET_BITS=64
+CFLAGS += $(WERROR_FLAGS)
+CFLAGS += -D_GNU_SOURCE
+CFLAGS += -DALLOW_EXPERIMENTAL_API
+
+include $(RTE_SDK)/mk/rte.extapp.mk
+
+endif
diff --git a/examples/vdpa/main.c b/examples/vdpa/main.c
new file mode 100644
index 000000000..1c9143469
--- /dev/null
+++ b/examples/vdpa/main.c
@@ -0,0 +1,387 @@ 
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2018 Intel Corporation
+ */
+
+#include <getopt.h>
+#include <signal.h>
+#include <stdint.h>
+#include <string.h>
+#include <unistd.h>
+
+#include <rte_ethdev.h>
+#include <rte_malloc.h>
+#include <rte_vhost.h>
+#include <rte_vdpa.h>
+
+#define NUM_MBUFS 8191
+#define MBUF_CACHE_SIZE 250
+
+#define RX_RING_SIZE 128
+#define TX_RING_SIZE 128
+
+#define MAX_PATH_LEN 128
+#define MAX_VDPA_SAMPLE_PORTS 1024
+
+struct vdpa_port {
+	char ifname[MAX_PATH_LEN];
+	int eid;
+	int did;
+	int vid;
+};
+
+struct vdpa_port vports[MAX_VDPA_SAMPLE_PORTS];
+
+struct rte_vdpa_eng_attr attr;
+struct rte_vdpa_eng_addr dev_id;
+char iface[MAX_PATH_LEN];
+int queue;
+int devcnt;
+
+/* display usage */
+static void
+vdpa_usage(const char *prgname)
+{
+	printf("%s [EAL options]"
+		" -- --bdf B:D:F --iface <path> --devcnt ND  --queue NQ\n"
+		" --bdf B:D:F, the PCI device used for vdpa\n"
+		" --iface <path>: The path of the socket file\n"
+		" --devcnt ND: number of vhost sockets to be created, default 1\n"
+		" --queue NQ: number of queue pairs to be configured, default 1\n",
+		prgname);
+}
+
+static int
+get_unsigned(const char *str, int base)
+{
+	unsigned long num;
+	char *end = NULL;
+
+	errno = 0;
+	num = strtoul(str, &end, base);
+	if (str[0] == '\0' || end == NULL || *end != '\0' || errno != 0)
+		return -1;
+
+	return num;
+}
+
+static int
+parse_args(int argc, char **argv)
+{
+	static const char *short_option = "";
+	static struct option long_option[] = {
+		{"bdf", required_argument, NULL, 0},
+		{"queue", required_argument, NULL, 0},
+		{"devcnt", required_argument, NULL, 0},
+		{"iface", required_argument, NULL, 0},
+		{NULL, 0, 0, 0},
+	};
+	char str[MAX_PATH_LEN];
+	int opt, idx;
+	int num[4] = {0};
+	int i, j;
+	char *prgname = argv[0];
+
+	while ((opt = getopt_long(argc, argv, short_option, long_option, &idx))
+			!= EOF) {
+		switch (opt) {
+		case 0:
+			if (strncmp(long_option[idx].name, "bdf",
+						MAX_PATH_LEN) == 0) {
+				strcpy(str, optarg);
+				i = strlen(str) - 1;
+				j = 3;
+				while (i > 0 && j >= 0) {
+					while ((str[i - 1] != ':'
+							&& str[i - 1] != '.')
+							&& i > 0)
+						i--;
+					num[j--] = get_unsigned(&str[i], 16);
+					i--;
+					if (i >= 0)
+						str[i] = '\0';
+				}
+				dev_id.pci_addr.domain = num[0];
+				dev_id.pci_addr.bus = num[1];
+				dev_id.pci_addr.devid = num[2];
+				dev_id.pci_addr.function = num[3];
+				printf("bdf %04x:%02x:%02x.%02x\n",
+						dev_id.pci_addr.domain,
+						dev_id.pci_addr.bus,
+						dev_id.pci_addr.devid,
+						dev_id.pci_addr.function);
+			} else if (strncmp(long_option[idx].name, "queue",
+						MAX_PATH_LEN) == 0) {
+				queue = get_unsigned(optarg, 10);
+				printf("queue %d\n", queue);
+			} else if (strncmp(long_option[idx].name, "devcnt",
+						MAX_PATH_LEN) == 0) {
+				devcnt = get_unsigned(optarg, 10);
+				printf("devcnt %d\n", devcnt);
+			} else if (strncmp(long_option[idx].name, "iface",
+						MAX_PATH_LEN) == 0) {
+				strncpy(iface, optarg, MAX_PATH_LEN);
+				printf("iface %s\n", iface);
+			}
+
+			break;
+
+		default:
+			vdpa_usage(prgname);
+			return -1;
+		}
+	}
+
+	if (queue <= 0 || devcnt <= 0 || *iface == '\0') {
+		vdpa_usage(prgname);
+		return -1;
+	}
+
+	return 0;
+}
+
+static void
+data_init(void)
+{
+	devcnt = 1;
+	queue = 1;
+	memset(&dev_id, 0, sizeof(dev_id));
+	memset(iface, 0, MAX_PATH_LEN * sizeof(iface[0]));
+	memset(vports, 0, MAX_VDPA_SAMPLE_PORTS * sizeof(vports[0]));
+
+	return;
+}
+
+static void
+signal_handler(int signum)
+{
+	uint16_t portid, nb_ports;
+
+	if (signum == SIGINT || signum == SIGTERM) {
+		printf("\nSignal %d received, preparing to exit...\n",
+				signum);
+		nb_ports = rte_eth_dev_count();
+		for (portid = 0; portid < nb_ports; portid++) {
+			printf("Closing port %d...\n", portid);
+			rte_eth_dev_stop(portid);
+			rte_eth_dev_close(portid);
+		}
+		exit(0);
+	}
+}
+
+static int
+new_device(int vid)
+{
+	char ifname[MAX_PATH_LEN];
+	int i;
+
+	rte_vhost_get_ifname(vid, ifname, sizeof(ifname));
+	for (i = 0; i < MAX_VDPA_SAMPLE_PORTS; i++) {
+		if (strcmp(ifname, vports[i].ifname) == 0) {
+			printf("\nport %s connected, eid: %d, did %d\n",
+					ifname, vports[i].eid, vports[i].did);
+			vports[i].vid = vid;
+			break;
+		}
+	}
+
+	if (i >= MAX_VDPA_SAMPLE_PORTS)
+		return -1;
+
+	return 0;
+}
+
+static void
+destroy_device(int vid)
+{
+	char ifname[MAX_PATH_LEN];
+	int i;
+
+	rte_vhost_get_ifname(vid, ifname, sizeof(ifname));
+	for (i = 0; i < MAX_VDPA_SAMPLE_PORTS; i++) {
+		if (strcmp(ifname, vports[i].ifname) == 0) {
+			printf("\nport %s disconnected, eid: %d, did %d\n",
+					ifname, vports[i].eid, vports[i].did);
+			vports[i].vid = vid;
+			break;
+		}
+	}
+
+	return;
+}
+
+static const struct vhost_device_ops vdpa_sample_devops = {
+	.new_device = new_device,
+	.destroy_device = destroy_device,
+	.vring_state_changed = NULL,
+	.features_changed = NULL,
+	.new_connection = NULL,
+	.destroy_connection = NULL,
+};
+
+static const struct rte_eth_conf port_conf_default = {
+	.rxmode = {
+		.max_rx_pkt_len = ETHER_MAX_LEN,
+		.ignore_offload_bitfield = 1,
+	},
+};
+
+static inline int
+port_init(uint16_t port, struct rte_mempool *mbuf_pool)
+{
+	uint16_t rx_rings = 1, tx_rings = 1;
+	uint16_t nb_rxd = RX_RING_SIZE;
+	uint16_t nb_txd = TX_RING_SIZE;
+	int retval;
+	uint16_t q;
+	struct rte_eth_dev_info dev_info;
+	struct rte_eth_txconf txconf;
+	struct ether_addr addr;
+
+	if (port >= rte_eth_dev_count())
+		return -1;
+
+	rte_eth_dev_info_get(port, &dev_info);
+
+	/* Configure the Ethernet device. */
+	retval = rte_eth_dev_configure(port, rx_rings, tx_rings,
+			&port_conf_default);
+	if (retval < 0)
+		return retval;
+
+	/* Allocate and set up 1 Rx queue per Ethernet port. */
+	for (q = 0; q < rx_rings; q++) {
+		retval = rte_eth_rx_queue_setup(port, q, nb_rxd,
+				rte_eth_dev_socket_id(port), NULL, mbuf_pool);
+		if (retval < 0)
+			return retval;
+	}
+
+	txconf = dev_info.default_txconf;
+	/* Allocate and set up 1 Tx queue per Ethernet port. */
+	for (q = 0; q < tx_rings; q++) {
+		retval = rte_eth_tx_queue_setup(port, q, nb_txd,
+				rte_eth_dev_socket_id(port), &txconf);
+		if (retval < 0)
+			return retval;
+	}
+
+	/* Start the Ethernet port. */
+	retval = rte_eth_dev_start(port);
+	if (retval < 0)
+		return retval;
+
+	/* Display the port MAC address. */
+	rte_eth_macaddr_get(port, &addr);
+	printf("Port %u MAC: %02" PRIx8 " %02" PRIx8 " %02" PRIx8
+			   " %02" PRIx8 " %02" PRIx8 " %02" PRIx8 "\n",
+			port,
+			addr.addr_bytes[0], addr.addr_bytes[1],
+			addr.addr_bytes[2], addr.addr_bytes[3],
+			addr.addr_bytes[4], addr.addr_bytes[5]);
+
+	return 0;
+}
+
+int
+main(int argc, char *argv[])
+{
+	char ifname[MAX_PATH_LEN];
+	uint16_t nb_ports, portid;
+	struct rte_mempool *mbuf_pool;
+	char ch;
+	int i, eid, did;
+	int ret;
+	uint64_t flags = 0;
+
+	ret = rte_eal_init(argc, argv);
+	if (ret < 0)
+		rte_exit(EXIT_FAILURE, "eal init failed\n");
+	argc -= ret;
+	argv += ret;
+
+	signal(SIGINT, signal_handler);
+	signal(SIGTERM, signal_handler);
+
+	nb_ports = rte_eth_dev_count();
+	mbuf_pool = rte_pktmbuf_pool_create("MBUF_POOL", NUM_MBUFS * nb_ports,
+		MBUF_CACHE_SIZE, 0, RTE_MBUF_DEFAULT_BUF_SIZE, rte_socket_id());
+
+	/* Initialize all ports. */
+	for (portid = 0; portid < nb_ports; portid++)
+		if (port_init(portid, mbuf_pool) != 0)
+			rte_exit(EXIT_FAILURE, "Cannot init port %d\n",
+					portid);
+
+	data_init();
+
+	ret = parse_args(argc, argv);
+	if (ret < 0)
+		rte_exit(EXIT_FAILURE, "invalid argument\n");
+
+	eid = rte_vdpa_find_engine_id(&dev_id);
+	if (eid < 0)
+		rte_exit(EXIT_FAILURE, "no vDPA engine found\n");
+
+	printf("\nuse engine %d to create vhost socket\n", eid);
+	rte_vdpa_info_query(eid, &attr);
+	if (devcnt > (int)attr.dev_num)
+		rte_exit(EXIT_FAILURE, "not enough devices in engine\n");
+
+	if (queue > (int)attr.queue_num)
+		rte_exit(EXIT_FAILURE, "not enough queues in engine\n");
+
+	for (i = 0; i <  RTE_MIN(MAX_VDPA_SAMPLE_PORTS, devcnt); i++) {
+		snprintf(ifname, sizeof(ifname), "%s%d", iface, i);
+		did = i;
+		vports[i].eid = eid;
+		vports[i].did = did;
+		strcpy(vports[i].ifname, ifname);
+
+		ret = rte_vhost_driver_register(ifname, flags);
+		if (ret != 0)
+			rte_exit(EXIT_FAILURE,
+					"register driver failed: %s\n",
+					ifname);
+
+		ret = rte_vhost_driver_callback_register(ifname,
+				&vdpa_sample_devops);
+		if (ret != 0)
+			rte_exit(EXIT_FAILURE,
+					"register driver ops failed: %s\n",
+					ifname);
+
+		rte_vhost_driver_set_vdpa_eid(ifname, eid);
+		rte_vhost_driver_set_vdpa_did(ifname, did);
+		/*
+		 * Configure vhost port with vDPA device's maximum capability.
+		 * App has the flexibility to change the features, queue num.
+		 */
+		rte_vhost_driver_set_queue_num(ifname, attr.queue_num);
+		rte_vhost_driver_set_features(ifname, attr.features);
+		rte_vhost_driver_set_protocol_features(ifname,
+				attr.protocol_features);
+
+		if (rte_vhost_driver_start(ifname) < 0)
+			rte_exit(EXIT_FAILURE,
+					"start vhost driver failed: %s\n",
+					ifname);
+	}
+
+	printf("enter \'q\' to quit\n");
+	while (scanf("%c", &ch)) {
+		if (ch == 'q')
+			break;
+		while (ch != '\n')
+			scanf("%c", &ch);
+		printf("enter \'q\' to quit\n");
+	}
+
+	for (portid = 0; portid < nb_ports; portid++) {
+		printf("Closing port %d...\n", portid);
+		rte_eth_dev_stop(portid);
+		rte_eth_dev_close(portid);
+	}
+
+	return 0;
+}