[dpdk-dev] [PATCH 1/6] vhost: simplify memory regions handling

Xu, Qian Q qian.q.xu at intel.com
Wed Aug 24 09:36:01 CEST 2016


OK, it's better to claim that his patchset have the dependency on another one.   

-----Original Message-----
From: Yuanhan Liu [mailto:yuanhan.liu at linux.intel.com] 
Sent: Wednesday, August 24, 2016 3:40 PM
To: Xu, Qian Q <qian.q.xu at intel.com>
Cc: dev at dpdk.org; Maxime Coquelin <maxime.coquelin at redhat.com>
Subject: Re: [dpdk-dev] [PATCH 1/6] vhost: simplify memory regions handling

Yes, it depends on the vhost-cuse removal patchset I sent last week.

	--yliu

On Wed, Aug 24, 2016 at 07:26:07AM +0000, Xu, Qian Q wrote:
> I want to apply the patch on the latest DPDK, see below commit ID but failed since no vhost.h and vhost-user.h files. So do you have any dependency on other patches? 
> 
> commit 28d8abaf250c3fb4dcb6416518f4c54b4ae67205
> Author: Deirdre O'Connor <deirdre.o.connor at intel.com>
> Date:   Mon Aug 22 17:20:08 2016 +0100
> 
>     doc: fix patchwork link
> 
>     Fixes: 58abf6e77c6b ("doc: add contributors guide")
> 
>     Reported-by: Jon Loeliger <jdl at netgate.com>
>     Signed-off-by: Deirdre O'Connor <deirdre.o.connor at intel.com>
>     Acked-by: John McNamara <john.mcnamara at intel.com>
> 
> 
> -----Original Message-----
> From: dev [mailto:dev-bounces at dpdk.org] On Behalf Of Yuanhan Liu
> Sent: Tuesday, August 23, 2016 4:11 PM
> To: dev at dpdk.org
> Cc: Maxime Coquelin <maxime.coquelin at redhat.com>; Yuanhan Liu <yuanhan.liu at linux.intel.com>
> Subject: [dpdk-dev] [PATCH 1/6] vhost: simplify memory regions handling
> 
> Due to history reason (that vhost-cuse comes before vhost-user), some fields for maintaining the vhost-user memory mappings (such as mmapped address and size, with those we then can unmap on destroy) are kept in "orig_region_map" struct, a structure that is defined only in vhost-user source file.
> 
> The right way to go is to remove the structure and move all those fields into virtio_memory_region struct. But we simply can't do that before, because it breaks the ABI.
> 
> Now, thanks to the ABI refactoring, it's never been a blocking issue any more. And here it goes: this patch removes orig_region_map and redefines virtio_memory_region, to include all necessary info.
> 
> With that, we can simplify the guest/host address convert a bit.
> 
> Signed-off-by: Yuanhan Liu <yuanhan.liu at linux.intel.com>
> ---
>  lib/librte_vhost/vhost.h      |  49 ++++++------
>  lib/librte_vhost/vhost_user.c | 172 +++++++++++++++++-------------------------
>  2 files changed, 90 insertions(+), 131 deletions(-)
> 
> diff --git a/lib/librte_vhost/vhost.h b/lib/librte_vhost/vhost.h index c2dfc3c..df2107b 100644
> --- a/lib/librte_vhost/vhost.h
> +++ b/lib/librte_vhost/vhost.h
> @@ -143,12 +143,14 @@ struct virtio_net {
>   * Information relating to memory regions including offsets to
>   * addresses in QEMUs memory file.
>   */
> -struct virtio_memory_regions {
> -	uint64_t guest_phys_address;
> -	uint64_t guest_phys_address_end;
> -	uint64_t memory_size;
> -	uint64_t userspace_address;
> -	uint64_t address_offset;
> +struct virtio_memory_region {
> +	uint64_t guest_phys_addr;
> +	uint64_t guest_user_addr;
> +	uint64_t host_user_addr;
> +	uint64_t size;
> +	void	 *mmap_addr;
> +	uint64_t mmap_size;
> +	int fd;
>  };
>  
>  
> @@ -156,12 +158,8 @@ struct virtio_memory_regions {
>   * Memory structure includes region and mapping information.
>   */
>  struct virtio_memory {
> -	/* Base QEMU userspace address of the memory file. */
> -	uint64_t base_address;
> -	uint64_t mapped_address;
> -	uint64_t mapped_size;
>  	uint32_t nregions;
> -	struct virtio_memory_regions regions[0];
> +	struct virtio_memory_region regions[0];
>  };
>  
>  
> @@ -200,26 +198,23 @@ extern uint64_t VHOST_FEATURES;
>  #define MAX_VHOST_DEVICE	1024
>  extern struct virtio_net *vhost_devices[MAX_VHOST_DEVICE];
>  
> -/**
> - * Function to convert guest physical addresses to vhost virtual addresses.
> - * This is used to convert guest virtio buffer addresses.
> - */
> +/* Convert guest physical Address to host virtual address */
>  static inline uint64_t __attribute__((always_inline)) -gpa_to_vva(struct virtio_net *dev, uint64_t guest_pa)
> +gpa_to_vva(struct virtio_net *dev, uint64_t gpa)
>  {
> -	struct virtio_memory_regions *region;
> -	uint32_t regionidx;
> -	uint64_t vhost_va = 0;
> -
> -	for (regionidx = 0; regionidx < dev->mem->nregions; regionidx++) {
> -		region = &dev->mem->regions[regionidx];
> -		if ((guest_pa >= region->guest_phys_address) &&
> -			(guest_pa <= region->guest_phys_address_end)) {
> -			vhost_va = region->address_offset + guest_pa;
> -			break;
> +	struct virtio_memory_region *reg;
> +	uint32_t i;
> +
> +	for (i = 0; i < dev->mem->nregions; i++) {
> +		reg = &dev->mem->regions[i];
> +		if (gpa >= reg->guest_phys_addr &&
> +		    gpa <  reg->guest_phys_addr + reg->size) {
> +			return gpa - reg->guest_phys_addr +
> +			       reg->host_user_addr;
>  		}
>  	}
> -	return vhost_va;
> +
> +	return 0;
>  }
>  
>  struct virtio_net_device_ops const *notify_ops; diff --git a/lib/librte_vhost/vhost_user.c b/lib/librte_vhost/vhost_user.c index eee99e9..d2071fd 100644
> --- a/lib/librte_vhost/vhost_user.c
> +++ b/lib/librte_vhost/vhost_user.c
> @@ -74,18 +74,6 @@ static const char *vhost_message_str[VHOST_USER_MAX] = {
>  	[VHOST_USER_SEND_RARP]  = "VHOST_USER_SEND_RARP",  };
>  
> -struct orig_region_map {
> -	int fd;
> -	uint64_t mapped_address;
> -	uint64_t mapped_size;
> -	uint64_t blksz;
> -};
> -
> -#define orig_region(ptr, nregions) \
> -	((struct orig_region_map *)RTE_PTR_ADD((ptr), \
> -		sizeof(struct virtio_memory) + \
> -		sizeof(struct virtio_memory_regions) * (nregions)))
> -
>  static uint64_t
>  get_blk_size(int fd)
>  {
> @@ -99,18 +87,17 @@ get_blk_size(int fd)  static void  free_mem_region(struct virtio_net *dev)  {
> -	struct orig_region_map *region;
> -	unsigned int idx;
> +	uint32_t i;
> +	struct virtio_memory_region *reg;
>  
>  	if (!dev || !dev->mem)
>  		return;
>  
> -	region = orig_region(dev->mem, dev->mem->nregions);
> -	for (idx = 0; idx < dev->mem->nregions; idx++) {
> -		if (region[idx].mapped_address) {
> -			munmap((void *)(uintptr_t)region[idx].mapped_address,
> -					region[idx].mapped_size);
> -			close(region[idx].fd);
> +	for (i = 0; i < dev->mem->nregions; i++) {
> +		reg = &dev->mem->regions[i];
> +		if (reg->host_user_addr) {
> +			munmap(reg->mmap_addr, reg->mmap_size);
> +			close(reg->fd);
>  		}
>  	}
>  }
> @@ -120,7 +107,7 @@ vhost_backend_cleanup(struct virtio_net *dev)  {
>  	if (dev->mem) {
>  		free_mem_region(dev);
> -		free(dev->mem);
> +		rte_free(dev->mem);
>  		dev->mem = NULL;
>  	}
>  	if (dev->log_addr) {
> @@ -286,25 +273,23 @@ numa_realloc(struct virtio_net *dev, int index __rte_unused)
>   * used to convert the ring addresses to our address space.
>   */
>  static uint64_t
> -qva_to_vva(struct virtio_net *dev, uint64_t qemu_va)
> +qva_to_vva(struct virtio_net *dev, uint64_t qva)
>  {
> -	struct virtio_memory_regions *region;
> -	uint64_t vhost_va = 0;
> -	uint32_t regionidx = 0;
> +	struct virtio_memory_region *reg;
> +	uint32_t i;
>  
>  	/* Find the region where the address lives. */
> -	for (regionidx = 0; regionidx < dev->mem->nregions; regionidx++) {
> -		region = &dev->mem->regions[regionidx];
> -		if ((qemu_va >= region->userspace_address) &&
> -			(qemu_va <= region->userspace_address +
> -			region->memory_size)) {
> -			vhost_va = qemu_va + region->guest_phys_address +
> -				region->address_offset -
> -				region->userspace_address;
> -			break;
> +	for (i = 0; i < dev->mem->nregions; i++) {
> +		reg = &dev->mem->regions[i];
> +
> +		if (qva >= reg->guest_user_addr &&
> +		    qva <  reg->guest_user_addr + reg->size) {
> +			return qva - reg->guest_user_addr +
> +			       reg->host_user_addr;
>  		}
>  	}
> -	return vhost_va;
> +
> +	return 0;
>  }
>  
>  /*
> @@ -391,11 +376,13 @@ static int
>  vhost_user_set_mem_table(struct virtio_net *dev, struct VhostUserMsg *pmsg)  {
>  	struct VhostUserMemory memory = pmsg->payload.memory;
> -	struct virtio_memory_regions *pregion;
> -	uint64_t mapped_address, mapped_size;
> -	unsigned int idx = 0;
> -	struct orig_region_map *pregion_orig;
> +	struct virtio_memory_region *reg;
> +	void *mmap_addr;
> +	uint64_t mmap_size;
> +	uint64_t mmap_offset;
>  	uint64_t alignment;
> +	uint32_t i;
> +	int fd;
>  
>  	/* Remove from the data plane. */
>  	if (dev->flags & VIRTIO_DEV_RUNNING) { @@ -405,14 +392,12 @@ vhost_user_set_mem_table(struct virtio_net *dev, struct VhostUserMsg *pmsg)
>  
>  	if (dev->mem) {
>  		free_mem_region(dev);
> -		free(dev->mem);
> +		rte_free(dev->mem);
>  		dev->mem = NULL;
>  	}
>  
> -	dev->mem = calloc(1,
> -		sizeof(struct virtio_memory) +
> -		sizeof(struct virtio_memory_regions) * memory.nregions +
> -		sizeof(struct orig_region_map) * memory.nregions);
> +	dev->mem = rte_zmalloc("vhost-mem-table", sizeof(struct virtio_memory) +
> +		sizeof(struct virtio_memory_region) * memory.nregions, 0);
>  	if (dev->mem == NULL) {
>  		RTE_LOG(ERR, VHOST_CONFIG,
>  			"(%d) failed to allocate memory for dev->mem\n", @@ -421,22 +406,17 @@ vhost_user_set_mem_table(struct virtio_net *dev, struct VhostUserMsg *pmsg)
>  	}
>  	dev->mem->nregions = memory.nregions;
>  
> -	pregion_orig = orig_region(dev->mem, memory.nregions);
> -	for (idx = 0; idx < memory.nregions; idx++) {
> -		pregion = &dev->mem->regions[idx];
> -		pregion->guest_phys_address =
> -			memory.regions[idx].guest_phys_addr;
> -		pregion->guest_phys_address_end =
> -			memory.regions[idx].guest_phys_addr +
> -			memory.regions[idx].memory_size;
> -		pregion->memory_size =
> -			memory.regions[idx].memory_size;
> -		pregion->userspace_address =
> -			memory.regions[idx].userspace_addr;
> -
> -		/* This is ugly */
> -		mapped_size = memory.regions[idx].memory_size +
> -			memory.regions[idx].mmap_offset;
> +	for (i = 0; i < memory.nregions; i++) {
> +		fd  = pmsg->fds[i];
> +		reg = &dev->mem->regions[i];
> +
> +		reg->guest_phys_addr = memory.regions[i].guest_phys_addr;
> +		reg->guest_user_addr = memory.regions[i].userspace_addr;
> +		reg->size            = memory.regions[i].memory_size;
> +		reg->fd              = fd;
> +
> +		mmap_offset = memory.regions[i].mmap_offset;
> +		mmap_size   = reg->size + mmap_offset;
>  
>  		/* mmap() without flag of MAP_ANONYMOUS, should be called
>  		 * with length argument aligned with hugepagesz at older @@ -446,67 +426,51 @@ vhost_user_set_mem_table(struct virtio_net *dev, struct VhostUserMsg *pmsg)
>  		 * to avoid failure, make sure in caller to keep length
>  		 * aligned.
>  		 */
> -		alignment = get_blk_size(pmsg->fds[idx]);
> +		alignment = get_blk_size(fd);
>  		if (alignment == (uint64_t)-1) {
>  			RTE_LOG(ERR, VHOST_CONFIG,
>  				"couldn't get hugepage size through fstat\n");
>  			goto err_mmap;
>  		}
> -		mapped_size = RTE_ALIGN_CEIL(mapped_size, alignment);
> +		mmap_size = RTE_ALIGN_CEIL(mmap_size, alignment);
>  
> -		mapped_address = (uint64_t)(uintptr_t)mmap(NULL,
> -			mapped_size,
> -			PROT_READ | PROT_WRITE, MAP_SHARED,
> -			pmsg->fds[idx],
> -			0);
> +		mmap_addr = mmap(NULL, mmap_size,
> +				 PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0);
>  
> -		RTE_LOG(INFO, VHOST_CONFIG,
> -			"mapped region %d fd:%d to:%p sz:0x%"PRIx64" "
> -			"off:0x%"PRIx64" align:0x%"PRIx64"\n",
> -			idx, pmsg->fds[idx], (void *)(uintptr_t)mapped_address,
> -			mapped_size, memory.regions[idx].mmap_offset,
> -			alignment);
> -
> -		if (mapped_address == (uint64_t)(uintptr_t)MAP_FAILED) {
> +		if (mmap_addr == MAP_FAILED) {
>  			RTE_LOG(ERR, VHOST_CONFIG,
> -				"mmap qemu guest failed.\n");
> +				"mmap region %u failed.\n", i);
>  			goto err_mmap;
>  		}
>  
> -		pregion_orig[idx].mapped_address = mapped_address;
> -		pregion_orig[idx].mapped_size = mapped_size;
> -		pregion_orig[idx].blksz = alignment;
> -		pregion_orig[idx].fd = pmsg->fds[idx];
> -
> -		mapped_address +=  memory.regions[idx].mmap_offset;
> +		reg->mmap_addr = mmap_addr;
> +		reg->mmap_size = mmap_size;
> +		reg->host_user_addr = (uint64_t)(uintptr_t)mmap_addr + mmap_offset;
>  
> -		pregion->address_offset = mapped_address -
> -			pregion->guest_phys_address;
> -
> -		if (memory.regions[idx].guest_phys_addr == 0) {
> -			dev->mem->base_address =
> -				memory.regions[idx].userspace_addr;
> -			dev->mem->mapped_address =
> -				pregion->address_offset;
> -		}
> -
> -		LOG_DEBUG(VHOST_CONFIG,
> -			"REGION: %u GPA: %p QEMU VA: %p SIZE (%"PRIu64")\n",
> -			idx,
> -			(void *)(uintptr_t)pregion->guest_phys_address,
> -			(void *)(uintptr_t)pregion->userspace_address,
> -			 pregion->memory_size);
> +		RTE_LOG(INFO, VHOST_CONFIG,
> +			"guest memory region %u, size: 0x%" PRIx64 "\n"
> +			"\t guest physical addr: 0x%" PRIx64 "\n"
> +			"\t guest virtual  addr: 0x%" PRIx64 "\n"
> +			"\t host  virtual  addr: 0x%" PRIx64 "\n"
> +			"\t mmap addr : 0x%" PRIx64 "\n"
> +			"\t mmap size : 0x%" PRIx64 "\n"
> +			"\t mmap align: 0x%" PRIx64 "\n"
> +			"\t mmap off  : 0x%" PRIx64 "\n",
> +			i, reg->size,
> +			reg->guest_phys_addr,
> +			reg->guest_user_addr,
> +			reg->host_user_addr,
> +			(uint64_t)(uintptr_t)mmap_addr,
> +			mmap_size,
> +			alignment,
> +			mmap_offset);
>  	}
>  
>  	return 0;
>  
>  err_mmap:
> -	while (idx--) {
> -		munmap((void *)(uintptr_t)pregion_orig[idx].mapped_address,
> -				pregion_orig[idx].mapped_size);
> -		close(pregion_orig[idx].fd);
> -	}
> -	free(dev->mem);
> +	free_mem_region(dev);
> +	rte_free(dev->mem);
>  	dev->mem = NULL;
>  	return -1;
>  }
> --
> 1.9.0


More information about the dev mailing list