[dpdk-dev] [PATCH v2 27/41] eal: add multiprocess init with memory hotplug
Anatoly Burakov
anatoly.burakov at intel.com
Wed Mar 7 17:56:55 CET 2018
for legacy memory mode, attach to primary's memseg list, and
map hugepages as before.
for non-legacy mode, preallocate all VA space and then do a
sync of local memory map.
Signed-off-by: Anatoly Burakov <anatoly.burakov at intel.com>
---
lib/librte_eal/bsdapp/eal/eal_hugepage_info.c | 7 ++
lib/librte_eal/common/eal_common_memory.c | 99 +++++++++++++++++++++----
lib/librte_eal/common/eal_hugepages.h | 5 ++
lib/librte_eal/linuxapp/eal/eal.c | 18 +++--
lib/librte_eal/linuxapp/eal/eal_hugepage_info.c | 53 ++++++++-----
lib/librte_eal/linuxapp/eal/eal_memory.c | 24 ++++--
6 files changed, 159 insertions(+), 47 deletions(-)
diff --git a/lib/librte_eal/bsdapp/eal/eal_hugepage_info.c b/lib/librte_eal/bsdapp/eal/eal_hugepage_info.c
index be2dbf0..18e6e5e 100644
--- a/lib/librte_eal/bsdapp/eal/eal_hugepage_info.c
+++ b/lib/librte_eal/bsdapp/eal/eal_hugepage_info.c
@@ -103,3 +103,10 @@ eal_hugepage_info_init(void)
return 0;
}
+
+/* memory hotplug is not supported in FreeBSD, so no need to implement this */
+int
+eal_hugepage_info_read(void)
+{
+ return 0;
+}
diff --git a/lib/librte_eal/common/eal_common_memory.c b/lib/librte_eal/common/eal_common_memory.c
index 457e239..a571e24 100644
--- a/lib/librte_eal/common/eal_common_memory.c
+++ b/lib/librte_eal/common/eal_common_memory.c
@@ -20,6 +20,7 @@
#include <rte_errno.h>
#include <rte_log.h>
+#include "eal_memalloc.h"
#include "eal_private.h"
#include "eal_internal_cfg.h"
@@ -147,19 +148,11 @@ alloc_memseg_list(struct rte_memseg_list *msl, uint64_t page_sz,
char name[RTE_FBARRAY_NAME_LEN];
int max_pages;
uint64_t mem_amount;
- void *addr;
if (!internal_config.legacy_mem) {
mem_amount = get_mem_amount(page_sz);
max_pages = mem_amount / page_sz;
-
- addr = eal_get_virtual_area(NULL, &mem_amount, page_sz, 0, 0);
- if (addr == NULL) {
- RTE_LOG(ERR, EAL, "Cannot reserve memory\n");
- return -1;
- }
} else {
- addr = NULL;
/* numer of memsegs in each list, these will not be single-page
* segments, so RTE_MAX_LEGACY_MEMSEG is like old default.
*/
@@ -177,7 +170,7 @@ alloc_memseg_list(struct rte_memseg_list *msl, uint64_t page_sz,
msl->hugepage_sz = page_sz;
msl->socket_id = socket_id;
- msl->base_va = addr;
+ msl->base_va = NULL;
RTE_LOG(DEBUG, EAL, "Memseg list allocated: 0x%zxkB at socket %i\n",
page_sz >> 10, socket_id);
@@ -186,16 +179,46 @@ alloc_memseg_list(struct rte_memseg_list *msl, uint64_t page_sz,
}
static int
-memseg_init(void)
+alloc_va_space(struct rte_memseg_list *msl)
+{
+ uint64_t mem_sz, page_sz;
+ void *addr;
+ int flags = 0;
+
+#ifdef RTE_ARCH_PPC_64
+ flags |= MAP_HUGETLB;
+#endif
+
+ page_sz = msl->hugepage_sz;
+ mem_sz = page_sz * msl->memseg_arr.len;
+
+ addr = eal_get_virtual_area(msl->base_va, &mem_sz, page_sz, 0, flags);
+ if (addr == NULL) {
+ if (rte_errno == EADDRNOTAVAIL)
+ RTE_LOG(ERR, EAL, "Could not mmap %llu bytes at [%p] - please use '--base-virtaddr' option\n",
+ (unsigned long long)mem_sz, msl->base_va);
+ else
+ RTE_LOG(ERR, EAL, "Cannot reserve memory\n");
+ return -1;
+ }
+ msl->base_va = addr;
+
+ return 0;
+}
+
+
+static int
+memseg_primary_init(void)
{
struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config;
int socket_id, hpi_idx, msl_idx = 0;
struct rte_memseg_list *msl;
- if (rte_eal_process_type() == RTE_PROC_SECONDARY) {
- RTE_LOG(ERR, EAL, "Secondary process not supported\n");
- return -1;
- }
+ /* if we start allocating memory segments for pages straight away, VA
+ * space will become fragmented, reducing chances of success when
+ * secondary process maps the same addresses. to fix this, allocate
+ * fbarrays first, and then allocate VA space for them.
+ */
/* create memseg lists */
for (hpi_idx = 0; hpi_idx < (int) internal_config.num_hugepage_sizes;
@@ -235,12 +258,55 @@ memseg_init(void)
total_segs += msl->memseg_arr.len;
total_mem = total_segs * msl->hugepage_sz;
type_msl_idx++;
+
+ /* no need to preallocate VA in legacy mode */
+ if (internal_config.legacy_mem)
+ continue;
+
+ if (alloc_va_space(msl)) {
+ RTE_LOG(ERR, EAL, "Cannot allocate VA space for memseg list\n");
+ return -1;
+ }
}
}
}
return 0;
}
+static int
+memseg_secondary_init(void)
+{
+ struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config;
+ int msl_idx = 0;
+ struct rte_memseg_list *msl;
+
+ for (msl_idx = 0; msl_idx < RTE_MAX_MEMSEG_LISTS; msl_idx++) {
+
+ msl = &mcfg->memsegs[msl_idx];
+
+ /* skip empty memseg lists */
+ if (msl->memseg_arr.len == 0)
+ continue;
+
+ if (rte_fbarray_attach(&msl->memseg_arr)) {
+ RTE_LOG(ERR, EAL, "Cannot attach to primary process memseg lists\n");
+ return -1;
+ }
+
+ /* no need to preallocate VA space in legacy mode */
+ if (internal_config.legacy_mem)
+ continue;
+
+ /* preallocate VA space */
+ if (alloc_va_space(msl)) {
+ RTE_LOG(ERR, EAL, "Cannot preallocate VA space for hugepage memory\n");
+ return -1;
+ }
+ }
+
+ return 0;
+}
+
static struct rte_memseg *
virt2memseg(const void *addr, const struct rte_memseg_list *msl)
{
@@ -480,7 +546,10 @@ rte_eal_memory_init(void)
int retval;
RTE_LOG(DEBUG, EAL, "Setting up physically contiguous memory...\n");
- retval = memseg_init();
+ retval = rte_eal_process_type() == RTE_PROC_PRIMARY ?
+ memseg_primary_init() :
+ memseg_secondary_init();
+
if (retval < 0)
return -1;
diff --git a/lib/librte_eal/common/eal_hugepages.h b/lib/librte_eal/common/eal_hugepages.h
index f963ae5..38d0b04 100644
--- a/lib/librte_eal/common/eal_hugepages.h
+++ b/lib/librte_eal/common/eal_hugepages.h
@@ -34,4 +34,9 @@ struct hugepage_file {
*/
int eal_hugepage_info_init(void);
+/**
+ * Read information about hugepages on Linux, but don't clear them out.
+ */
+int eal_hugepage_info_read(void);
+
#endif /* EAL_HUGEPAGES_H */
diff --git a/lib/librte_eal/linuxapp/eal/eal.c b/lib/librte_eal/linuxapp/eal/eal.c
index d336c96..7a0d742 100644
--- a/lib/librte_eal/linuxapp/eal/eal.c
+++ b/lib/librte_eal/linuxapp/eal/eal.c
@@ -805,13 +805,17 @@ rte_eal_init(int argc, char **argv)
"KNI module inserted\n");
}
- if (internal_config.no_hugetlbfs == 0 &&
- internal_config.process_type != RTE_PROC_SECONDARY &&
- eal_hugepage_info_init() < 0) {
- rte_eal_init_alert("Cannot get hugepage information.");
- rte_errno = EACCES;
- rte_atomic32_clear(&run_once);
- return -1;
+ if (internal_config.no_hugetlbfs == 0) {
+ /* rte_config isn't initialized yet */
+ ret = internal_config.process_type == RTE_PROC_PRIMARY ?
+ eal_hugepage_info_init() :
+ eal_hugepage_info_read();
+ if (ret < 0) {
+ rte_eal_init_alert("Cannot get hugepage information.");
+ rte_errno = EACCES;
+ rte_atomic32_clear(&run_once);
+ return -1;
+ }
}
if (internal_config.memory == 0 && internal_config.force_sockets == 0) {
diff --git a/lib/librte_eal/linuxapp/eal/eal_hugepage_info.c b/lib/librte_eal/linuxapp/eal/eal_hugepage_info.c
index 7e2475f..7a4adce 100644
--- a/lib/librte_eal/linuxapp/eal/eal_hugepage_info.c
+++ b/lib/librte_eal/linuxapp/eal/eal_hugepage_info.c
@@ -6,6 +6,7 @@
#include <sys/types.h>
#include <sys/file.h>
#include <dirent.h>
+#include <stdbool.h>
#include <stdint.h>
#include <stdlib.h>
#include <stdio.h>
@@ -299,15 +300,9 @@ compare_hpi(const void *a, const void *b)
return hpi_b->hugepage_sz - hpi_a->hugepage_sz;
}
-/*
- * when we initialize the hugepage info, everything goes
- * to socket 0 by default. it will later get sorted by memory
- * initialization procedure.
- */
-int
-eal_hugepage_info_init(void)
-{
- const char dirent_start_text[] = "hugepages-";
+static int
+hugepage_info_init(bool clear_hugepages)
+{ const char dirent_start_text[] = "hugepages-";
const size_t dirent_start_len = sizeof(dirent_start_text) - 1;
unsigned int i, total_pages, num_sizes = 0;
DIR *dir;
@@ -350,18 +345,20 @@ eal_hugepage_info_init(void)
continue;
}
- /* try to obtain a writelock */
- hpi->lock_descriptor = open(hpi->hugedir, O_RDONLY);
+ if (clear_hugepages) {
+ /* try to obtain a writelock */
+ hpi->lock_descriptor = open(hpi->hugedir, O_RDONLY);
- /* if blocking lock failed */
- if (flock(hpi->lock_descriptor, LOCK_EX) == -1) {
- RTE_LOG(CRIT, EAL,
- "Failed to lock hugepage directory!\n");
- break;
+ /* if blocking lock failed */
+ if (flock(hpi->lock_descriptor, LOCK_EX) == -1) {
+ RTE_LOG(CRIT, EAL,
+ "Failed to lock hugepage directory!\n");
+ break;
+ }
+ /* clear out the hugepages dir from unused pages */
+ if (clear_hugedir(hpi->hugedir) == -1)
+ break;
}
- /* clear out the hugepages dir from unused pages */
- if (clear_hugedir(hpi->hugedir) == -1)
- break;
/*
* first, try to put all hugepages into relevant sockets, but
@@ -417,10 +414,26 @@ eal_hugepage_info_init(void)
num_pages += hpi->num_pages[j];
}
if (internal_config.hugepage_info[i].hugedir != NULL &&
- num_pages > 0)
+ (num_pages > 0 || !clear_hugepages))
return 0;
}
/* no valid hugepage mounts available, return error */
return -1;
}
+
+int eal_hugepage_info_read(void)
+{
+ return hugepage_info_init(false);
+}
+
+/*
+ * when we initialize the hugepage info, everything goes
+ * to socket 0 by default. it will later get sorted by memory
+ * initialization procedure.
+ */
+int
+eal_hugepage_info_init(void)
+{
+ return hugepage_info_init(true);
+}
diff --git a/lib/librte_eal/linuxapp/eal/eal_memory.c b/lib/librte_eal/linuxapp/eal/eal_memory.c
index e0b4988..f74291f 100644
--- a/lib/librte_eal/linuxapp/eal/eal_memory.c
+++ b/lib/librte_eal/linuxapp/eal/eal_memory.c
@@ -1569,6 +1569,22 @@ eal_legacy_hugepage_attach(void)
return -1;
}
+static int
+eal_hugepage_attach(void)
+{
+ if (eal_memalloc_sync_with_primary()) {
+ RTE_LOG(ERR, EAL, "Could not map memory from primary process\n");
+ if (aslr_enabled() > 0) {
+ RTE_LOG(ERR, EAL, "It is recommended to "
+ "disable ASLR in the kernel "
+ "and retry running both primary "
+ "and secondary processes\n");
+ }
+ return -1;
+ }
+ return 0;
+}
+
int
rte_eal_hugepage_init(void)
{
@@ -1580,11 +1596,9 @@ rte_eal_hugepage_init(void)
int
rte_eal_hugepage_attach(void)
{
- if (internal_config.legacy_mem)
- return eal_legacy_hugepage_attach();
- else
- RTE_LOG(ERR, EAL, "Secondary processes aren't supported yet\n");
- return -1;
+ return internal_config.legacy_mem ?
+ eal_legacy_hugepage_attach() :
+ eal_hugepage_attach();
}
int
--
2.7.4
More information about the dev
mailing list