[dpdk-dev] [RFC 4/5] virtio/container: adjust memory initialization process

Ananyev, Konstantin konstantin.ananyev at intel.com
Fri Nov 6 17:21:55 CET 2015


Hi,

> -----Original Message-----
> From: dev [mailto:dev-bounces at dpdk.org] On Behalf Of Jianfeng Tan
> Sent: Thursday, November 05, 2015 6:31 PM
> To: dev at dpdk.org
> Cc: nakajima.yoshihiro at lab.ntt.co.jp; zhbzg at huawei.com; mst at redhat.com; gaoxiaoqiu at huawei.com; oscar.zhangbo at huawei.com;
> ann.zhuangyanying at huawei.com; zhoujingbin at huawei.com; guohongzhen at huawei.com
> Subject: [dpdk-dev] [RFC 4/5] virtio/container: adjust memory initialization process
> 
> When using virtio for container, we should specify --no-huge so
> that in memory initialization, shm_open() is used to alloc memory
> from tmpfs filesystem /dev/shm/.
> 
> Signed-off-by: Huawei Xie <huawei.xie at intel.com>
> Signed-off-by: Jianfeng Tan <jianfeng.tan at intel.com>
> ---
>  lib/librte_eal/common/include/rte_memory.h |  5 +++
>  lib/librte_eal/linuxapp/eal/eal_memory.c   | 58 ++++++++++++++++++++++++++++--
>  lib/librte_mempool/rte_mempool.c           | 16 ++++-----
>  3 files changed, 69 insertions(+), 10 deletions(-)
> 
> diff --git a/lib/librte_eal/common/include/rte_memory.h b/lib/librte_eal/common/include/rte_memory.h
> index 1bed415..9c1effc 100644
> --- a/lib/librte_eal/common/include/rte_memory.h
> +++ b/lib/librte_eal/common/include/rte_memory.h
> @@ -100,6 +100,7 @@ struct rte_memseg {
>  	int32_t socket_id;          /**< NUMA socket ID. */
>  	uint32_t nchannel;          /**< Number of channels. */
>  	uint32_t nrank;             /**< Number of ranks. */
> +	int fd;                     /**< fd used for share this memory */
>  #ifdef RTE_LIBRTE_XEN_DOM0
>  	 /**< store segment MFNs */
>  	uint64_t mfn[DOM0_NUM_MEMBLOCK];
> @@ -128,6 +129,10 @@ int rte_mem_lock_page(const void *virt);
>   */
>  phys_addr_t rte_mem_virt2phy(const void *virt);
> 
> +
> +int
> +rte_memseg_info_get(int index, int *pfd, uint64_t *psize, void **paddr);
> +
>  /**
>   * Get the layout of the available physical memory.
>   *
> diff --git a/lib/librte_eal/linuxapp/eal/eal_memory.c b/lib/librte_eal/linuxapp/eal/eal_memory.c
> index ac2745e..9abbfc6 100644
> --- a/lib/librte_eal/linuxapp/eal/eal_memory.c
> +++ b/lib/librte_eal/linuxapp/eal/eal_memory.c
> @@ -80,6 +80,9 @@
>  #include <errno.h>
>  #include <sys/ioctl.h>
>  #include <sys/time.h>
> +#include <mntent.h>
> +#include <sys/mman.h>
> +#include <sys/file.h>
> 
>  #include <rte_log.h>
>  #include <rte_memory.h>
> @@ -143,6 +146,18 @@ rte_mem_lock_page(const void *virt)
>  	return mlock((void*)aligned, page_size);
>  }
> 
> +int
> +rte_memseg_info_get(int index, int *pfd, uint64_t *psize, void **paddr)
> +{
> +	struct rte_mem_config *mcfg;
> +	mcfg = rte_eal_get_configuration()->mem_config;
> +
> +	*pfd = mcfg->memseg[index].fd;
> +	*psize = (uint64_t)mcfg->memseg[index].len;
> +	*paddr = (void *)(uint64_t)mcfg->memseg[index].addr;
> +	return 0;
> +}

Wonder who will use that function?
Can't see any references to that function in that patch or next. 

> +
>  /*
>   * Get physical address of any mapped virtual address in the current process.
>   */
> @@ -1044,6 +1059,42 @@ calc_num_pages_per_socket(uint64_t * memory,
>  	return total_num_pages;
>  }
> 
> +static void *
> +rte_eal_shm_create(int *pfd)
> +{
> +	int ret, fd;
> +	char filepath[256];
> +	void *vaddr;
> +	uint64_t size = internal_config.memory;
> +
> +	sprintf(filepath, "/%s_cvio", internal_config.hugefile_prefix);
> +
> +	fd = shm_open(filepath, O_CREAT | O_RDWR, S_IRUSR | S_IWUSR);
> +	if (fd < 0) {
> +		rte_panic("shm_open %s failed: %s\n", filepath, strerror(errno));
> +	}
> +	ret = flock(fd, LOCK_EX);
> +	if (ret < 0) {
> +		close(fd);
> +		rte_panic("flock %s failed: %s\n", filepath, strerror(errno));
> +	}
> +
> +	ret = ftruncate(fd, size);
> +	if (ret < 0) {
> +		rte_panic("ftruncate failed: %s\n", strerror(errno));
> +	}
> +	/* flag: MAP_HUGETLB */

Any explanation what that comment means here?
Do you plan to use MAP_HUGETLb in the call below or ...?

> +	vaddr = mmap(NULL, size, PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0);
> +	if (vaddr == MAP_FAILED) {
> +		rte_panic("mmap failed: %s\n", strerror(errno));
> +	}
> +	memset(vaddr, 0, size);
> +	*pfd = fd;
> +
> +	return vaddr;
> +}
> +
> +
>  /*
>   * Prepare physical memory mapping: fill configuration structure with
>   * these infos, return 0 on success.
> @@ -1072,7 +1123,9 @@ rte_eal_hugepage_init(void)
>  	int new_pages_count[MAX_HUGEPAGE_SIZES];
>  #endif
> 
> +#ifndef RTE_VIRTIO_VDEV
>  	test_proc_pagemap_readable();
> +#endif
> 
>  	memset(used_hp, 0, sizeof(used_hp));
> 
> @@ -1081,8 +1134,8 @@ rte_eal_hugepage_init(void)
> 
>  	/* hugetlbfs can be disabled */
>  	if (internal_config.no_hugetlbfs) {
> -		addr = mmap(NULL, internal_config.memory, PROT_READ | PROT_WRITE,
> -				MAP_PRIVATE | MAP_ANONYMOUS, 0, 0);
> +		int fd;
> +		addr = rte_eal_shm_create(&fd);

Why do you remove ability to map(dev/zero) here?
Probably not everyone plan to use --no-hugepages only inside containers. 


>  		if (addr == MAP_FAILED) {
>  			RTE_LOG(ERR, EAL, "%s: mmap() failed: %s\n", __func__,
>  					strerror(errno));
> @@ -1093,6 +1146,7 @@ rte_eal_hugepage_init(void)
>  		mcfg->memseg[0].hugepage_sz = RTE_PGSIZE_4K;
>  		mcfg->memseg[0].len = internal_config.memory;
>  		mcfg->memseg[0].socket_id = 0;
> +		mcfg->memseg[0].fd = fd;
>  		return 0;
>  	}
> 
> diff --git a/lib/librte_mempool/rte_mempool.c b/lib/librte_mempool/rte_mempool.c
> index e57cbbd..8f8852b 100644
> --- a/lib/librte_mempool/rte_mempool.c
> +++ b/lib/librte_mempool/rte_mempool.c
> @@ -453,13 +453,6 @@ rte_mempool_xmem_create(const char *name, unsigned n, unsigned elt_size,
>  		rte_errno = EINVAL;
>  		return NULL;
>  	}
> -
> -	/* check that we have both VA and PA */
> -	if (vaddr != NULL && paddr == NULL) {
> -		rte_errno = EINVAL;
> -		return NULL;
> -	}
> -
>  	/* Check that pg_num and pg_shift parameters are valid. */
>  	if (pg_num < RTE_DIM(mp->elt_pa) || pg_shift > MEMPOOL_PG_SHIFT_MAX) {
>  		rte_errno = EINVAL;
> @@ -596,8 +589,15 @@ rte_mempool_xmem_create(const char *name, unsigned n, unsigned elt_size,
> 
>  	/* mempool elements in a separate chunk of memory. */
>  	} else {
> +		/* when VA is specified, PA should be specified? */
> +		if (rte_eal_has_hugepages()) {
> +			if (paddr == NULL) {
> +				rte_errno = EINVAL;
> +				return NULL;
> +			}
> +			memcpy(mp->elt_pa, paddr, sizeof (mp->elt_pa[0]) * pg_num);
> +		}
>  		mp->elt_va_start = (uintptr_t)vaddr;
> -		memcpy(mp->elt_pa, paddr, sizeof (mp->elt_pa[0]) * pg_num);

Could you explain the reason for that change?
Specially why mempool over external memory now only allowed for hugepages config?
Konstantin

>  	}
> 
>  	mp->elt_va_end = mp->elt_va_start;
> --
> 2.1.4



More information about the dev mailing list