[dpdk-dev] [PATCH 2/3] lib/librte_vhost: vhost library support to facilitate integration with DPDK accelerated vswitch

Tetsuya.Mukawa mukawa at igel.co.jp
Wed Sep 3 08:43:09 CEST 2014


(2014/09/03 14:39), Xie, Huawei wrote:
> Thanks Tetsuya:
> 	Some of them are due to 80 character limitation. Is it ok to break the limitation for better indentation?
It's is up to the cording style of dpdk. But, there may be no strict
rule in dpdk.
So please use your original cord. :)

Thanks,
Tetsuya

>> -----Original Message-----
>> From: Tetsuya.Mukawa [mailto:mukawa at igel.co.jp]
>> Sent: Wednesday, September 03, 2014 11:39 AM
>> To: Xie, Huawei; dev at dpdk.org
>> Subject: Re: [dpdk-dev] [PATCH 2/3] lib/librte_vhost: vhost library support to
>> facilitate integration with DPDK accelerated vswitch
>>
>> Hi Huawei,
>>
>> I added few comments. Mainly those comments are about source code indent.
>>
>> (2014/09/02 17:55), Huawei Xie wrote:
>>> +
>>> +/**
>>> + * Structure contains variables relevant to RX/TX virtqueues.
>>> + */
>>> +struct vhost_virtqueue {
>>> +	/**< descriptor ring. */
>>> +	struct vring_desc    *desc;
>>> +	/**< available ring. */
>>> +	struct vring_avail   *avail;
>>> +	/**< used ring. */
>>> +	struct vring_used    *used;
>>> +	/**< Size of descriptor ring. */
>>> +	uint32_t             size;
>>> +	/**< Backend value to determine if device should be started/stopped. */
>>> +	uint32_t             backend;
>>> +	/**< Vhost header length (varies depending on RX merge buffers. */
>>> +	uint16_t             vhost_hlen;
>>> +	/**< Last index used on the available ring. */
>>> +	volatile uint16_t    last_used_idx;
>>> +	/**< Used for multiple devices reserving buffers. */
>>> +	volatile uint16_t    last_used_idx_res;
>>> +	/**< Currently unused as polling mode is enabled. */
>>> +	eventfd_t            callfd;
>>> +	/**< Used to notify the guest (trigger interrupt). */
>>> +	eventfd_t            kickfd;
>>> +} __rte_cache_aligned;
>>> +
>>> +/**
>>> + * Information relating to memory regions including offsets to
>>> + * addresses in QEMUs memory file.
>>> + */
>>> +struct virtio_memory_regions {
>>> +	/**< Base guest physical address of region. */
>>> +	uint64_t    guest_phys_address;
>>> +	/**< End guest physical address of region. */
>>> +	uint64_t    guest_phys_address_end;
>>> +	/**< Size of region. */
>>> +	uint64_t    memory_size;
>>> +	/**< Base userspace address of region. */
>>> +	uint64_t    userspace_address;
>>> +	/**< Offset of region for address translation. */
>>> +	uint64_t    address_offset;
>>> +};
>>> +
>>> +
>>> +/**
>>> + * Memory structure includes region and mapping information.
>>> + */
>>> +struct virtio_memory {
>>> +	/**< Base QEMU userspace address of the memory file. */
>>> +	uint64_t    base_address;
>>> +	/**< Mapped address of memory file in this process's memory space. */
>>> +	uint64_t    mapped_address;
>>> +	/**< Total size of memory file. */
>>> +	uint64_t    mapped_size;
>>> +	/**< Number of memory regions. */
>>> +	uint32_t    nregions;
>>> +	/**< Memory region information. */
>>> +	struct virtio_memory_regions      regions[0];
>>> +};
>>> +
>>> +/**
>>> + * Device structure contains all configuration information relating to the
>> device.
>>> + */
>>> +struct virtio_net {
>>> +	/**< Contains all virtqueue information. */
>>> +	struct vhost_virtqueue  *virtqueue[VIRTIO_QNUM];
>>> +	/**< QEMU memory and memory region information. */
>>> +	struct virtio_memory    *mem;
>>> +	/**< Negotiated feature set. */
>>> +	uint64_t features;
>>> +	/**< Device identifier. */
>>> +	uint64_t device_fh;
>>> +	/**< Device flags, used to check if device is running on data core. */
>>> +	uint32_t flags;
>>> +	void     *priv;
>>> +} __rte_cache_aligned;
>>> +
>> Above comments of 'vhost_virtqueue', 'struct virtio_memory_regions',
>> 'struct virtio-memory'
>> and 'struct virtio-net' will break API documentation created by Doxygen.
>> Not to break, I guess those comment need to be placed after variable.
> Thanks. I will check doxgen rule. Moved the comment above to avoid 80 character limitation warning.
>>> +/**
>>> + * cuse_info is populated and used to register the cuse device.
>>> + * vhost_net_device_ops is passed when the device is registered in main.c.
>>> + */
>>> +int
>>> +rte_vhost_driver_register(const char *dev_name)
>>> +{
>>> +	struct cuse_info cuse_info;
>>> +	char device_name[PATH_MAX] = "";
>>> +	char char_device_name[PATH_MAX] = "";
>>> +	const char *device_argv[] = { device_name };
>>> +
>>> +	char fuse_opt_dummy[] = FUSE_OPT_DUMMY;
>>> +	char fuse_opt_fore[] = FUSE_OPT_FORE;
>>> +	char fuse_opt_nomulti[] = FUSE_OPT_NOMULTI;
>>> +	char *fuse_argv[] = {fuse_opt_dummy, fuse_opt_fore,
>> fuse_opt_nomulti};
>>> +
>>> +	if (access(cuse_device_name, R_OK | W_OK) < 0) {
>>> +		RTE_LOG(ERR, VHOST_CONFIG,
>>> +		"Character device %s can't be accessed, maybe not exist\n",
>>> +		cuse_device_name);
>> Need one more indent?
> Again to avoid 80 char warning for some lengthy RTE_LOG.
>>> +/**
>>> + * Locate the file containing QEMU's memory space and map it to our
>> address space.
>>> + */
>>> +static int
>>> +host_memory_map(struct virtio_net *dev, struct virtio_memory *mem, pid_t
>> pid,
>>> +		uint64_t addr)
>>> +{
>>> +	struct dirent *dptr = NULL;
>>> +	struct procmap procmap;
>>> +	DIR *dp = NULL;
>>> +	int fd;
>>> +	int i;
>>> +	char memfile[PATH_MAX];
>>> +	char mapfile[PATH_MAX];
>>> +	char procdir[PATH_MAX];
>>> +	char resolved_path[PATH_MAX];
>>> +	FILE *fmap;
>>> +	void *map;
>>> +	uint8_t	found = 0;
>>> +	char line[BUFSIZE];
>>> +	char dlm[] = "-   :   ";
>>> +	char *str, *sp, *in[PROCMAP_SZ];
>>> +	char *end = NULL;
>>> +
>>> +	/* Path where mem files are located. */
>>> +	snprintf(procdir, PATH_MAX, "/proc/%u/fd/", pid);
>>> +	/* Maps file used to locate mem file. */
>>> +	snprintf(mapfile, PATH_MAX, "/proc/%u/maps", pid);
>>> +
>>> +	fmap = fopen(mapfile, "r");
>>> +	if (fmap == NULL) {
>>> +		RTE_LOG(ERR, VHOST_CONFIG,
>>> +			"(%"PRIu64") Failed to open maps file for pid %d\n",
>>> +		dev->device_fh, pid);
>>> +		return -1;
>>> +	}
>>> +
>>> +	/* Read through maps file until we find out base_address. */
>>> +	while (fgets(line, BUFSIZE, fmap) != 0) {
>>> +		str = line;
>>> +		errno = 0;
>>> +		/* Split line in to fields. */
>>> +		for (i = 0; i < PROCMAP_SZ; i++) {
>>> +			in[i] = strtok_r(str, &dlm[i], &sp);
>>> +			if ((in[i] == NULL) || (errno != 0)) {
>>> +				fclose(fmap);
>>> +				return -1;
>>> +			}
>>> +			str = NULL;
>>> +		}
>>> +
>>> +		/* Convert/Copy each field as needed. */
>>> +		procmap.va_start = strtoull(in[0], &end, 16);
>>> +		if ((in[0] == '\0') || (end == NULL) || (*end != '\0') ||
>>> +			(errno != 0)) {
>>> +			fclose(fmap);
>>> +			return -1;
>>> +		}
>>> +
>>> +		procmap.len = strtoull(in[1], &end, 16);
>>> +		if ((in[1] == '\0') || (end == NULL) || (*end != '\0') ||
>>> +			(errno != 0)) {
>>> +			fclose(fmap);
>>> +			return -1;
>>> +		}
>>> +
>>> +		procmap.pgoff = strtoull(in[3], &end, 16);
>>> +		if ((in[3] == '\0') || (end == NULL) || (*end != '\0') ||
>>> +			(errno != 0)) {
>>> +			fclose(fmap);
>>> +			return -1;
>>> +		}
>>> +
>>> +		procmap.maj = strtoul(in[4], &end, 16);
>>> +		if ((in[4] == '\0') || (end == NULL) || (*end != '\0') ||
>>> +			(errno != 0)) {
>>> +			fclose(fmap);
>>> +			return -1;
>>> +		}
>>> +
>>> +		procmap.min = strtoul(in[5], &end, 16);
>>> +		if ((in[5] == '\0') || (end == NULL) || (*end != '\0') ||
>>> +			(errno != 0)) {
>>> +			fclose(fmap);
>>> +			return -1;
>>> +		}
>>> +
>>> +		procmap.ino = strtoul(in[6], &end, 16);
>>> +		if ((in[6] == '\0') || (end == NULL) || (*end != '\0') ||
>>> +			(errno != 0)) {
>>> +			fclose(fmap);
>>> +			return -1;
>>> +		}
>>> +
>>> +		memcpy(&procmap.prot, in[2], PROT_SZ);
>>> +		memcpy(&procmap.fname, in[7], PATH_MAX);
>>> +
>>> +		if (procmap.va_start == addr) {
>>> +			procmap.len = procmap.len - procmap.va_start;
>>> +			found = 1;
>>> +			break;
>>> +		}
>>> +	}
>>> +	fclose(fmap);
>>> +
>>> +	if (!found) {
>>> +		RTE_LOG(ERR, VHOST_CONFIG,
>>> +		"(%"PRIu64") Failed to find memory file in pid %d maps file\n",
>>> +		dev->device_fh, pid);
>> Need one more indent?
>>
>>> +/**
>>> + * Searches the configuration core linked list and retrieves the device if it
>> exists.
>>> + */
>>> +static struct virtio_net *
>>> +get_device(struct vhost_device_ctx ctx)
>>> +{
>>> +	struct virtio_net_config_ll *ll_dev;
>>> +
>>> +	ll_dev = get_config_ll_entry(ctx);
>>> +
>>> +	/* If a matching entry is found in the linked list, return the device
>>> +	* in that entry. */
>> Need a blank space at start of comment?
>>
>>> +	if (ll_dev)
>>> +		return &ll_dev->dev;
>>> +
>>> +	RTE_LOG(ERR, VHOST_CONFIG,
>>> +		"(%"PRIu64") Device not found in linked list.\n", ctx.fh);
>>> +	return NULL;
>>> +}
>>> +
>>> +/**
>>> + * Add entry containing a device to the device configuration linked list.
>>> + */
>>> +static void
>>> +add_config_ll_entry(struct virtio_net_config_ll *new_ll_dev)
>>> +{
>>> +	struct virtio_net_config_ll *ll_dev = ll_root;
>>> +
>>> +	/* If ll_dev == NULL then this is the first device so go to else */
>>> +	if (ll_dev) {
>>> +		/* If the 1st device_fh != 0 then we insert our device here. */
>>> +		if (ll_dev->dev.device_fh != 0)	{
>>> +			new_ll_dev->dev.device_fh = 0;
>>> +			new_ll_dev->next = ll_dev;
>>> +			ll_root = new_ll_dev;
>>> +		} else {
>>> +			/* Increment through the ll until we find un unused
>>> +			* device_fh. Insert the device at that entry*/
>> Need a blank space at end of comment?
> Will fix.
>>> +/**
>>> + * Function is called from the CUSE release function. This function will
>> cleanup
>>> + * the device and remove it from device configuration linked list.
>>> + */
>>> +static void
>>> +destroy_device(struct vhost_device_ctx ctx)
>>> +{
>>> +	struct virtio_net_config_ll *ll_dev_cur_ctx, *ll_dev_last = NULL;
>>> +	struct virtio_net_config_ll *ll_dev_cur = ll_root;
>>> +
>>> +	/* Find the linked list entry for the device to be removed. */
>>> +	ll_dev_cur_ctx = get_config_ll_entry(ctx);
>>> +	while (ll_dev_cur != NULL) {
>>> +		/* If the device is found or a device that doesn't exist is
>>> +		* found then it is removed. */
>> Need a blank space?
> Will fix.
>>> +/**
>>> + * Called from CUSE IOCTL: VHOST_SET_FEATURES
>>> + * We receive the negotiated set of features supported by us and the virtio
>>> + * device.
>>> + */
>>> +static int
>>> +set_features(struct vhost_device_ctx ctx, uint64_t *pu)
>>> +{
>>> +	struct virtio_net *dev;
>>> +
>>> +	dev = get_device(ctx);
>>> +	if (dev == NULL)
>>> +		return -1;
>>> +	if (*pu & ~VHOST_FEATURES)
>>> +		return -1;
>>> +
>>> +	/* Store the negotiated feature list for the device. */
>>> +	dev->features = *pu;
>>> +
>>> +	/* Set the vhost_hlen depending on if VIRTIO_NET_F_MRG_RXBUF is set.
>> */
>>> +	if (dev->features & (1 << VIRTIO_NET_F_MRG_RXBUF)) {
>>> +		LOG_DEBUG(VHOST_CONFIG,
>>> +		"(%"PRIu64") Mergeable RX buffers enabled\n", dev->device_fh);
>>> +		dev->virtqueue[VIRTIO_RXQ]->vhost_hlen =
>>> +			sizeof(struct virtio_net_hdr_mrg_rxbuf);
>>> +		dev->virtqueue[VIRTIO_TXQ]->vhost_hlen =
>>> +			sizeof(struct virtio_net_hdr_mrg_rxbuf);
>> One more indent?
>>
>>> +/**
>>> + * Called from CUSE IOCTL: VHOST_SET_MEM_TABLE
>>> + * This function creates and populates the memory structure for the device.
>>> + * This includes storing offsets used to translate buffer addresses.
>>> + */
>>> +static int
>>> +set_mem_table(struct vhost_device_ctx ctx, const void *mem_regions_addr,
>>> +	uint32_t nregions)
>>> +{
>>> +	struct virtio_net *dev;
>>> +	struct vhost_memory_region *mem_regions;
>>> +	struct virtio_memory *mem;
>>> +	uint64_t size = offsetof(struct vhost_memory, regions);
>>> +	uint32_t regionidx, valid_regions;
>>> +
>>> +	dev = get_device(ctx);
>>> +	if (dev == NULL)
>>> +		return -1;
>>> +
>>> +	if (dev->mem) {
>>> +		munmap((void *)(uintptr_t)dev->mem->mapped_address,
>>> +			(size_t)dev->mem->mapped_size);
>>> +		free(dev->mem);
>>> +	}
>>> +
>>> +	/* Malloc the memory structure depending on the number of regions. */
>>> +	mem = calloc(1, sizeof(struct virtio_memory) +
>>> +		(sizeof(struct virtio_memory_regions) * nregions));
>>> +	if (mem == NULL) {
>>> +		RTE_LOG(ERR, VHOST_CONFIG,
>>> +			"(%"PRIu64") Failed to allocate memory for dev-
>>> mem.\n",
>>> +			dev->device_fh);
>>> +		return -1;
>>> +	}
>>> +
>>> +	mem->nregions = nregions;
>>> +
>>> +	mem_regions = (void *)(uintptr_t)
>>> +		((uint64_t)(uintptr_t)mem_regions_addr + size);
>>> +
>>> +	for (regionidx = 0; regionidx < mem->nregions; regionidx++) {
>>> +		/* Populate the region structure for each region. */
>>> +		mem->regions[regionidx].guest_phys_address =
>>> +			mem_regions[regionidx].guest_phys_addr;
>>> +		mem->regions[regionidx].guest_phys_address_end =
>>> +			mem->regions[regionidx].guest_phys_address +
>>> +			mem_regions[regionidx].memory_size;
>>> +		mem->regions[regionidx].memory_size =
>>> +			mem_regions[regionidx].memory_size;
>>> +		mem->regions[regionidx].userspace_address =
>>> +			mem_regions[regionidx].userspace_addr;
>>> +
>>> +		LOG_DEBUG(VHOST_CONFIG,
>>> +			"(%"PRIu64") REGION: %u - GPA: %p - QEMU VA: %p -
>> SIZE "
>>> +			"(%"PRIu64")\n",
>>> +			dev->device_fh, regionidx,
>>> +			(void *)(uintptr_t)
>>> +				mem->regions[regionidx].guest_phys_address,
>>> +			(void *)(uintptr_t)
>>> +				mem->regions[regionidx].userspace_address,
>>> +			mem->regions[regionidx].memory_size);
>>> +
>>> +		/*set the base address mapping*/
>> Need a blank space at start and end of comment?
> Thanks for careful review.
>>> +		if (mem->regions[regionidx].guest_phys_address == 0x0) {
>>> +			mem->base_address =
>>> +				mem->regions[regionidx].userspace_address;
>>> +			/* Map VM memory file */
>>> +			if (host_memory_map(dev, mem, ctx.pid,
>>> +				mem->base_address) != 0) {
>>> +				free(mem);
>>> +				return -1;
>>> +			}
>>> +		}
>>> +	}
>>> +
>>> +	/* Check that we have a valid base address. */
>>> +	if (mem->base_address == 0) {
>>> +		RTE_LOG(ERR, VHOST_CONFIG,
>>> +		"(%"PRIu64") Failed to find base address of qemu memory "
>>> +		"file.\n", dev->device_fh);
>> One more indent?
>>
>>> +		free(mem);
>>> +		return -1;
>>> +	}
>>> +
>>> +	/* Check if all of our regions have valid mappings. Usually one does
>>> +	* not exist in the QEMU memory file. */
>> Need a blank space?
>>
>>> +	valid_regions = mem->nregions;
>>> +	for (regionidx = 0; regionidx < mem->nregions; regionidx++) {
>>> +		if ((mem->regions[regionidx].userspace_address <
>>> +			mem->base_address) ||
>>> +			(mem->regions[regionidx].userspace_address >
>>> +				(mem->base_address + mem->mapped_size)))
>>> +				valid_regions--;
>>> +	}
>>> +
>>> +	/* If a region does not have a valid mapping we rebuild our memory
>>> +	* struct to contain only valid entries. */
>> Need a blank space?
>>> +	if (valid_regions != mem->nregions) {
>>> +		LOG_DEBUG(VHOST_CONFIG,
>>> +		"(%"PRIu64") Not all memory regions exist in the QEMU mem
>> file."
>>> +		"Re-populating mem structure\n",
>> One more indent?
>>
>>> +			dev->device_fh);
>>> +
>>> +		/* Re-populate the memory structure with only valid regions.
>>> +		* Invalid regions are over-written with memmove. */
>> Need a blank space?
>>
>> Thanks,
>> Tetsuya



More information about the dev mailing list