[dpdk-dev] [PATCH v8] eal: map PCI memory resources after hugepages

Bruce Richardson bruce.richardson at intel.com
Thu Nov 13 14:46:55 CET 2014


On Tue, Nov 11, 2014 at 10:09:25AM +0000, Anatoly Burakov wrote:
> Multi-process DPDK application must mmap hugepages and PCI resources
> into the same virtual address space. By default the virtual addresses
> are chosen by the primary process automatically when calling the mmap.
> But sometimes the chosen virtual addresses aren't usable in secondary
> process - for example, secondary process is linked with more libraries
> than primary process, and the library occupies the same address space
> that the primary process has requested for PCI mappings.
> 
> This patch makes EAL try and map PCI BARs right after the hugepages
> (instead of location chosen by mmap) in virtual memory, so that PCI BARs
> have less chance of ending up in random places in virtual memory.
> 
> Signed-off-by: Liang Xu <liang.xu at cinfotech.cn>
> Signed-off-by: Anatoly Burakov <anatoly.burakov at intel.com>

Acked-by: Bruce Richardson <bruce.richardson at intel.com>

> ---
>  lib/librte_eal/linuxapp/eal/eal_pci.c              | 30 ++++++++++++++++------
>  lib/librte_eal/linuxapp/eal/eal_pci_uio.c          | 13 ++++++++--
>  lib/librte_eal/linuxapp/eal/eal_pci_vfio.c         | 19 +++++++++++---
>  lib/librte_eal/linuxapp/eal/include/eal_pci_init.h |  6 +++++
>  4 files changed, 55 insertions(+), 13 deletions(-)
> 
> diff --git a/lib/librte_eal/linuxapp/eal/eal_pci.c b/lib/librte_eal/linuxapp/eal/eal_pci.c
> index 5fe3961..79fbbb8 100644
> --- a/lib/librte_eal/linuxapp/eal/eal_pci.c
> +++ b/lib/librte_eal/linuxapp/eal/eal_pci.c
> @@ -97,6 +97,25 @@ error:
>  	return -1;
>  }
>  
> +void *
> +pci_find_max_end_va(void)
> +{
> +	const struct rte_memseg *seg = rte_eal_get_physmem_layout();
> +	const struct rte_memseg *last = seg;
> +	unsigned i = 0;
> +
> +	for (i = 0; i < RTE_MAX_MEMSEG; i++, seg++) {
> +		if (seg->addr == NULL)
> +			break;
> +
> +		if (seg->addr > last->addr)
> +			last = seg;
> +
> +	}
> +	return RTE_PTR_ADD(last->addr, last->len);
> +}
> +
> +
>  /* map a particular resource from a file */
>  void *
>  pci_map_resource(void *requested_addr, int fd, off_t offset, size_t size)
> @@ -106,21 +125,16 @@ pci_map_resource(void *requested_addr, int fd, off_t offset, size_t size)
>  	/* Map the PCI memory resource of device */
>  	mapaddr = mmap(requested_addr, size, PROT_READ | PROT_WRITE,
>  			MAP_SHARED, fd, offset);
> -	if (mapaddr == MAP_FAILED ||
> -			(requested_addr != NULL && mapaddr != requested_addr)) {
> +	if (mapaddr == MAP_FAILED) {
>  		RTE_LOG(ERR, EAL, "%s(): cannot mmap(%d, %p, 0x%lx, 0x%lx): %s (%p)\n",
>  			__func__, fd, requested_addr,
>  			(unsigned long)size, (unsigned long)offset,
>  			strerror(errno), mapaddr);
> -		goto fail;
> +	} else {
> +		RTE_LOG(DEBUG, EAL, "  PCI memory mapped at %p\n", mapaddr);
>  	}
>  
> -	RTE_LOG(DEBUG, EAL, "  PCI memory mapped at %p\n", mapaddr);
> -
>  	return mapaddr;
> -
> -fail:
> -	return NULL;
>  }
>  
>  /* parse the "resource" sysfs file */
> diff --git a/lib/librte_eal/linuxapp/eal/eal_pci_uio.c b/lib/librte_eal/linuxapp/eal/eal_pci_uio.c
> index 7e62266..e53f06b 100644
> --- a/lib/librte_eal/linuxapp/eal/eal_pci_uio.c
> +++ b/lib/librte_eal/linuxapp/eal/eal_pci_uio.c
> @@ -35,6 +35,7 @@
>  #include <fcntl.h>
>  #include <dirent.h>
>  #include <sys/stat.h>
> +#include <sys/mman.h>
>  
>  #include <rte_log.h>
>  #include <rte_pci.h>
> @@ -48,6 +49,8 @@
>  
>  static int pci_parse_sysfs_value(const char *filename, uint64_t *val);
>  
> +void *pci_map_addr = NULL;
> +
>  
>  #define OFF_MAX              ((uint64_t)(off_t)-1)
>  static int
> @@ -371,10 +374,16 @@ pci_uio_map_resource(struct rte_pci_device *dev)
>  			if (maps[j].addr != NULL)
>  				fail = 1;
>  			else {
> -				mapaddr = pci_map_resource(NULL, fd, (off_t)offset,
> +				/* try mapping somewhere close to the end of hugepages */
> +				if (pci_map_addr == NULL)
> +					pci_map_addr = pci_find_max_end_va();
> +
> +				mapaddr = pci_map_resource(pci_map_addr, fd, (off_t)offset,
>  						(size_t)maps[j].size);
> -				if (mapaddr == NULL)
> +				if (mapaddr == MAP_FAILED)
>  					fail = 1;
> +
> +				pci_map_addr = RTE_PTR_ADD(mapaddr, (size_t) maps[j].size);
>  			}
>  
>  			if (fail) {
> diff --git a/lib/librte_eal/linuxapp/eal/eal_pci_vfio.c b/lib/librte_eal/linuxapp/eal/eal_pci_vfio.c
> index c776ddc..c1246e8 100644
> --- a/lib/librte_eal/linuxapp/eal/eal_pci_vfio.c
> +++ b/lib/librte_eal/linuxapp/eal/eal_pci_vfio.c
> @@ -37,6 +37,7 @@
>  #include <sys/eventfd.h>
>  #include <sys/socket.h>
>  #include <sys/ioctl.h>
> +#include <sys/mman.h>
>  
>  #include <rte_log.h>
>  #include <rte_pci.h>
> @@ -720,10 +721,22 @@ pci_vfio_map_resource(struct rte_pci_device *dev)
>  		if (i == msix_bar)
>  			continue;
>  
> -		bar_addr = pci_map_resource(maps[i].addr, vfio_dev_fd, reg.offset,
> -				reg.size);
> +		if (internal_config.process_type == RTE_PROC_PRIMARY) {
> +			/* try mapping somewhere close to the end of hugepages */
> +			if (pci_map_addr == NULL)
> +				pci_map_addr = pci_find_max_end_va();
> +
> +			bar_addr = pci_map_resource(pci_map_addr, vfio_dev_fd, reg.offset,
> +					reg.size);
> +			pci_map_addr = RTE_PTR_ADD(bar_addr, (size_t) reg.size);
> +		} else {
> +			bar_addr = pci_map_resource(maps[i].addr, vfio_dev_fd, reg.offset,
> +					reg.size);
> +		}
>  
> -		if (bar_addr == NULL) {
> +		if (bar_addr == MAP_FAILED ||
> +				(internal_config.process_type == RTE_PROC_SECONDARY &&
> +						bar_addr != maps[i].addr)) {
>  			RTE_LOG(ERR, EAL, "  %s mapping BAR%i failed: %s\n", pci_addr, i,
>  					strerror(errno));
>  			close(vfio_dev_fd);
> diff --git a/lib/librte_eal/linuxapp/eal/include/eal_pci_init.h b/lib/librte_eal/linuxapp/eal/include/eal_pci_init.h
> index d758bee..1070eb8 100644
> --- a/lib/librte_eal/linuxapp/eal/include/eal_pci_init.h
> +++ b/lib/librte_eal/linuxapp/eal/include/eal_pci_init.h
> @@ -59,6 +59,12 @@ struct mapped_pci_resource {
>  TAILQ_HEAD(mapped_pci_res_list, mapped_pci_resource);
>  extern struct mapped_pci_res_list *pci_res_list;
>  
> +/*
> + * Helper function to map PCI resources right after hugepages in virtual memory
> + */
> +extern void *pci_map_addr;
> +void *pci_find_max_end_va(void);
> +
>  void *pci_map_resource(void *requested_addr, int fd, off_t offset,
>  		size_t size);
>  
> -- 
> 1.8.1.4
> 


More information about the dev mailing list