[dpdk-dev] [PATCH v3 5/6] virtio: Add support for qtest virtio-net PMD

Tan, Jianfeng jianfeng.tan at intel.com
Fri Mar 4 03:18:23 CET 2016


Hi Tetsuya,

Seems that this patch is too long. Is it possible to split into multiple 
commits?

On 2/22/2016 4:17 PM, Tetsuya Mukawa wrote:
> The patch adds a new virtio-net PMD configuration that allows the PMD to
> work on host as if the PMD is in VM.
> Here is new configuration for virtio-net PMD.
>   - CONFIG_RTE_VIRTIO_VDEV_QTEST
> To use this mode, EAL needs map all hugepages as one file. Also the file
> should be mapped between (1 << 31) and (1 << 44). And start address
> should be aligned by EAL memory size.
>
> To allocate like above, use below options.
>   --single-file
>   --range-virtaddr=0x80000000-0x100000000000
>   --align-memsize
> If a free regions isn't found, EAL will return error.
>
> To prepare virtio-net device on host, the users need to invoke QEMU
> process in special qtest mode. This mode is mainly used for testing QEMU
> devices from outer process. In this mode, no guest runs.
> Here is QEMU command line.
>
>   $ qemu-system-x86_64 \
>       -machine pc-i440fx-1.4,accel=qtest \
>       -display none -qtest-log /dev/null \
>       -qtest unix:/tmp/socket,server \
>       -netdev type=tap,script=/etc/qemu-ifup,id=net0,queues=1 \
>       -device virtio-net-pci,netdev=net0,mq=on,disable-modern=false,addr=3 \
>       -chardev socket,id=chr1,path=/tmp/ivshmem,server \
>       -device ivshmem,size=1G,chardev=chr1,vectors=1,addr=4
>
>   * Should use qemu-2.5.1, or above.
>   * QEMU process is needed per port.
>   * virtio-1.0 device are only supported.
>   * The vhost backends like vhost-net and vhost-user can be specified.
>   * In most cases, just using above command is enough, but you can also
>     specify other QEMU virtio-net options.
>   * Only checked "pc-i440fx-1.4" machine, but may work with other
>     machines.
>   * Should not add "--enable-kvm" to QEMU command line.

Correct me if wrong: all control msgs go through qemu process, e.g., tx 
notifications and rx interrupts need follow frontend-qemu-backend path. 
Question: qemu is started without --enable-kvm, as I understand, 
ioeventfd, the basis of kickfd/callfd, will not be available. So how 
does qemu kick backend or be kicked by backend?

>
> After invoking QEMU, the PMD can connect to QEMU process using unix
> domain sockets. Over these sockets, virtio-net, ivshmem and piix3
> device in QEMU are probed by the PMD.
> Here is example of command line.
>
>   $ testpmd -c f -n 1 -m 1024 --no-pci --single-file \
>        --range-virtaddr=0x80000000-0x100000000000 --align-memsize \
>        --vdev="eth_qtest_virtio0,qtest=/tmp/socket,ivshmem=/tmp/ivshmem"\
>        -- --disable-hw-vlan --txqflags=0xf00 -i
>
> Please specify same unix domain sockets and memory size in both QEMU
> and DPDK command lines like above.
> The share memory size should be power of 2, because ivshmem only
> accepts such memory size.
>
> Signed-off-by: Tetsuya Mukawa <mukawa at igel.co.jp>
> ---
>   config/common_linuxapp             |    1 +
>   drivers/net/virtio/Makefile        |    4 +
>   drivers/net/virtio/qtest.c         | 1342 ++++++++++++++++++++++++++++++++++++
>   drivers/net/virtio/qtest.h         |   65 ++
>   drivers/net/virtio/virtio_ethdev.c |  383 +++++++++-
>   drivers/net/virtio/virtio_pci.c    |  364 +++++++++-
>   drivers/net/virtio/virtio_pci.h    |    5 +-
>   7 files changed, 2122 insertions(+), 42 deletions(-)
>   create mode 100644 drivers/net/virtio/qtest.c
>   create mode 100644 drivers/net/virtio/qtest.h
>
> diff --git a/config/common_linuxapp b/config/common_linuxapp
> index 452f39c..f6e53bc 100644
> --- a/config/common_linuxapp
> +++ b/config/common_linuxapp
> @@ -533,3 +533,4 @@ CONFIG_RTE_TEST_PMD_RECORD_BURST_STATS=n
>   # Enable virtio support for container
>   #
>   CONFIG_RTE_VIRTIO_VDEV=y
> +CONFIG_RTE_VIRTIO_VDEV_QTEST=y
> diff --git a/drivers/net/virtio/Makefile b/drivers/net/virtio/Makefile
> index ef920f9..6c11378 100644
> --- a/drivers/net/virtio/Makefile
> +++ b/drivers/net/virtio/Makefile
> @@ -56,6 +56,10 @@ ifeq ($(CONFIG_RTE_VIRTIO_VDEV),y)
>   	SRCS-$(CONFIG_RTE_LIBRTE_VIRTIO_PMD) += vhost_embedded.c
>   endif
>   
> +ifeq ($(CONFIG_RTE_VIRTIO_VDEV_QTEST),y)
> +	SRCS-$(CONFIG_RTE_LIBRTE_VIRTIO_PMD) += qtest.c
> +endif
> +
>   # this lib depends upon:
>   DEPDIRS-$(CONFIG_RTE_LIBRTE_VIRTIO_PMD) += lib/librte_eal lib/librte_ether
>   DEPDIRS-$(CONFIG_RTE_LIBRTE_VIRTIO_PMD) += lib/librte_mempool lib/librte_mbuf
> diff --git a/drivers/net/virtio/qtest.c b/drivers/net/virtio/qtest.c
> new file mode 100644
> index 0000000..061aab5
> --- /dev/null
> +++ b/drivers/net/virtio/qtest.c
> @@ -0,0 +1,1342 @@
> +/*-
> + *   BSD LICENSE
> + *
> + *   Copyright(c) 2016 IGEL Co., Ltd. All rights reserved.
> + *   All rights reserved.
> + *
> + *   Redistribution and use in source and binary forms, with or without
> + *   modification, are permitted provided that the following conditions
> + *   are met:
> + *
> + *     * Redistributions of source code must retain the above copyright
> + *       notice, this list of conditions and the following disclaimer.
> + *     * Redistributions in binary form must reproduce the above copyright
> + *       notice, this list of conditions and the following disclaimer in
> + *       the documentation and/or other materials provided with the
> + *       distribution.
> + *     * Neither the name of IGEL Co., Ltd. nor the names of its
> + *       contributors may be used to endorse or promote products derived
> + *       from this software without specific prior written permission.
> + *
> + *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
> + *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
> + *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
> + *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
> + *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
> + *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
> + *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
> + *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
> + *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
> + *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
> + *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
> + */
> +#include <stdint.h>
> +#include <stdlib.h>
> +#include <string.h>
> +#include <unistd.h>
> +#include <sys/types.h>
> +#include <sys/socket.h>
> +#include <sys/un.h>
> +#include <sys/queue.h>
> +#include <signal.h>
> +#include <pthread.h>
> +#include <sys/stat.h>
> +#include <fcntl.h>
> +#include <sys/eventfd.h>
> +#include <linux/pci_regs.h>
> +
> +#include <rte_memory.h>
> +#include <rte_malloc.h>
> +#include <rte_common.h>
> +#include <rte_interrupts.h>
> +
> +#include "virtio_pci.h"
> +#include "virtio_logs.h"
> +#include "virtio_ethdev.h"
> +#include "qtest.h"
> +
> +#define NB_BAR                          6
> +
> +/* PIIX3 configuration registers */
> +#define PIIX3_REG_ADDR_PIRQA            0x60
> +#define PIIX3_REG_ADDR_PIRQB            0x61
> +#define PIIX3_REG_ADDR_PIRQC            0x62
> +#define PIIX3_REG_ADDR_PIRQD            0x63
> +
> +/* Device information */
> +#define VIRTIO_NET_DEVICE_ID            0x1000
> +#define VIRTIO_NET_VENDOR_ID            0x1af4
> +#define VIRTIO_NET_IRQ_NUM              10
> +#define IVSHMEM_DEVICE_ID               0x1110
> +#define IVSHMEM_VENDOR_ID               0x1af4
> +#define IVSHMEM_PROTOCOL_VERSION        0
> +#define PIIX3_DEVICE_ID                 0x7000
> +#define PIIX3_VENDOR_ID                 0x8086
> +
> +/* ------------------------------------------------------------
> + * IO port mapping of qtest guest
> + * ------------------------------------------------------------
> + * 0x0000 - 0xbfff : not used
> + * 0xc000 - 0xc03f : virtio-net(BAR0)
> + * 0xc040 - 0xffff : not used
> + *
> + * ------------------------------------------------------------
> + * Memory mapping of qtest quest
> + * ------------------------------------------------------------
> + * 0x00000000_00000000 - 0x00000000_3fffffff : not used
> + * 0x00000000_40000000 - 0x00000000_40000fff : virtio-net(BAR1)
> + * 0x00000000_40001000 - 0x00000000_40ffffff : not used
> + * 0x00000000_41000000 - 0x00000000_417fffff : virtio-net(BAR4)
> + * 0x00000000_41800000 - 0x00000000_41ffffff : not used
> + * 0x00000000_42000000 - 0x00000000_420000ff : ivshmem(BAR0)
> + * 0x00000000_42000100 - 0x00000000_42ffffff : not used
> + * 0x00000000_80000000 - 0xffffffff_ffffffff : ivshmem(BAR2)
> + *

Is it possible to arrange multiple virtio-net devices here? What's the 
challenges?

Seems that lots of below code do the same work as libqos. So can we just 
link libqos? Or we need to maintain this code.

> + * We can only specify start address of a region. The region size
> + * will be defined by the device implementation in QEMU.
> + * The size will be pow of 2 according to the PCI specification.
> + * Also, the region start address should be aligned by region size.
> + *
> + * BAR2 of ivshmem will be used to mmap DPDK application memory.
> + * So this address will be dynamically changed, but not to overlap
> + * others, it should be mmaped between above addresses. Such allocation
> + * is done by EAL. Check rte_eal_get_free_region() also.
> + */
> +#define VIRTIO_NET_IO_START             0xc000
> +#define VIRTIO_NET_MEMORY1_START	0x40000000
> +#define VIRTIO_NET_MEMORY2_START	0x41000000
> +#define IVSHMEM_MEMORY_START            0x42000000
> +
> +#define PCI_CONFIG_ADDR(_bus, _device, _function, _offset) ( \
> +	(1 << 31) | ((_bus) & 0xff) << 16 | ((_device) & 0x1f) << 11 | \
> +	((_function) & 0x7) << 8 | ((_offset) & 0xfc))
> +
> +static char interrupt_message[32];
> +
> +enum qtest_pci_bar_type {
> +	QTEST_PCI_BAR_DISABLE = 0,
> +	QTEST_PCI_BAR_IO,
> +	QTEST_PCI_BAR_MEMORY_UNDER_1MB,
> +	QTEST_PCI_BAR_MEMORY_32,
> +	QTEST_PCI_BAR_MEMORY_64
> +};
> +
> +struct qtest_pci_bar {
> +	enum qtest_pci_bar_type type;
> +	uint8_t addr;
> +	uint64_t region_start;
> +	uint64_t region_size;
> +};
> +
> +struct qtest_session;
> +TAILQ_HEAD(qtest_pci_device_list, qtest_pci_device);
> +struct qtest_pci_device {
> +	TAILQ_ENTRY(qtest_pci_device) next;
> +	const char *name;
> +	uint16_t device_id;
> +	uint16_t vendor_id;
> +	uint8_t bus_addr;
> +	uint8_t device_addr;
> +	struct qtest_pci_bar bar[NB_BAR];
> +	int (*init)(struct qtest_session *s, struct qtest_pci_device *dev);
> +};
> +
> +union qtest_pipefds {
> +	struct {
> +		int pipefd[2];
> +	};
> +	struct {
> +		int readfd;
> +		int writefd;
> +	};
> +};
> +
> +struct qtest_session {
> +	int qtest_socket;
> +	pthread_mutex_t qtest_session_lock;
> +
> +	struct qtest_pci_device_list head;
> +	int ivshmem_socket;
> +
> +	pthread_t event_th;
> +	char *evq;
> +	char *evq_dequeue_ptr;
> +	size_t evq_total_len;
> +
> +	union qtest_pipefds msgfds;
> +
> +	pthread_t intr_th;
> +	int eventfd;
> +	rte_atomic16_t enable_intr;
> +	rte_intr_callback_fn cb;
> +	void *cb_arg;
> +	struct rte_eth_dev_data *eth_data;
> +};
> +
> +static int
> +qtest_raw_send(int fd, char *buf, size_t count)
> +{
> +	size_t len = count;
> +	size_t total_len = 0;
> +	int ret = 0;
> +
> +	while (len > 0) {
> +		ret = write(fd, buf, len);
> +		if (ret == -1) {
> +			if (errno == EINTR)
> +				continue;
> +			return ret;
> +		}
> +		if (ret == (int)len)
> +			break;

This _if_ complicates this function, remove this and rely on while 
condition to quit? Same as in the below qtest_raw_recv().

Thanks,
Jianfeng

> +		total_len += ret;
> +		buf += ret;
> +		len -= ret;
> +	}
> +	return total_len + ret;
> +}
> +
> +static int
> +qtest_raw_recv(int fd, char *buf, size_t count)
> +{
> +	size_t len = count;
> +	size_t total_len = 0;
> +	int ret = 0;
> +
> +	while (len > 0) {
> +		ret = read(fd, buf, len);
> +		if (ret <= 0) {
> +			if (errno == EINTR) {
> +				continue;
> +			}
> +			return ret;
> +		}
> +		if (ret == (int)len)
> +			break;
> +		if (*(buf + ret - 1) == '\n')
> +			break;
> +		total_len += ret;
> +		buf += ret;
> +		len -= ret;
> +	}
> +	return total_len + ret;
> +}
> +
[...]


More information about the dev mailing list