[dpdk-dev] [PATCH] eal: use get_mempolicy(2) to find numa socket on Linux

Gregory Etelson gregory at weka.io
Sun Jun 18 17:52:02 CEST 2017


Use get_mempolicy(2) to find numa socket on Linux
instead of parsing /proc/self/numa_maps.
When process maps around 1K hugepages
numa_maps file can miss huge records in older Linux kernels.
get_mempolicy() proved more reliable

Requires numactl dev package 

Signed-off-by: Gregory Etelson <gregory at weka.io>
---
 lib/librte_eal/linuxapp/eal/eal_memory.c | 85 ++++----------------------------
 mk/exec-env/linuxapp/rte.vars.mk         |  1 +
 mk/internal/rte.build-pre.mk             |  1 +
 3 files changed, 11 insertions(+), 76 deletions(-)

diff --git a/lib/librte_eal/linuxapp/eal/eal_memory.c b/lib/librte_eal/linuxapp/eal/eal_memory.c
index e17c9cb..48b71bc 100644
--- a/lib/librte_eal/linuxapp/eal/eal_memory.c
+++ b/lib/librte_eal/linuxapp/eal/eal_memory.c
@@ -54,6 +54,7 @@
 #include <sys/time.h>
 #include <signal.h>
 #include <setjmp.h>
+#include <numaif.h>
 
 #include <rte_log.h>
 #include <rte_memory.h>
@@ -495,86 +496,20 @@ unmap_all_hugepages_orig(struct hugepage_file *hugepg_tbl, struct hugepage_info
         return 0;
 }
 
-/*
- * Parse /proc/self/numa_maps to get the NUMA socket ID for each huge
- * page.
- */
 static int
 find_numasocket(struct hugepage_file *hugepg_tbl, struct hugepage_info *hpi)
 {
-	int socket_id;
-	char *end, *nodestr;
-	unsigned i, hp_count = 0;
-	uint64_t virt_addr;
-	char buf[BUFSIZ];
-	char hugedir_str[PATH_MAX];
-	FILE *f;
-
-	f = fopen("/proc/self/numa_maps", "r");
-	if (f == NULL) {
-		RTE_LOG(NOTICE, EAL, "cannot open /proc/self/numa_maps,"
-				" consider that all memory is in socket_id 0\n");
-		return 0;
-	}
-
-	snprintf(hugedir_str, sizeof(hugedir_str),
-			"%s/%s", hpi->hugedir, internal_config.hugefile_prefix);
-
-	/* parse numa map */
-	while (fgets(buf, sizeof(buf), f) != NULL) {
-
-		/* ignore non huge page */
-		if (strstr(buf, " huge ") == NULL &&
-				strstr(buf, hugedir_str) == NULL)
-			continue;
-
-		/* get zone addr */
-		virt_addr = strtoull(buf, &end, 16);
-		if (virt_addr == 0 || end == buf) {
-			RTE_LOG(ERR, EAL, "%s(): error in numa_maps parsing\n", __func__);
-			goto error;
-		}
-
-		/* get node id (socket id) */
-		nodestr = strstr(buf, " N");
-		if (nodestr == NULL) {
-			RTE_LOG(ERR, EAL, "%s(): error in numa_maps parsing\n", __func__);
-			goto error;
-		}
-		nodestr += 2;
-		end = strstr(nodestr, "=");
-		if (end == NULL) {
-			RTE_LOG(ERR, EAL, "%s(): error in numa_maps parsing\n", __func__);
-			goto error;
-		}
-		end[0] = '\0';
-		end = NULL;
-
-		socket_id = strtoul(nodestr, &end, 0);
-		if ((nodestr[0] == '\0') || (end == NULL) || (*end != '\0')) {
-			RTE_LOG(ERR, EAL, "%s(): error in numa_maps parsing\n", __func__);
-			goto error;
-		}
-
-		/* if we find this page in our mappings, set socket_id */
-		for (i = 0; i < hpi->num_pages[0]; i++) {
-			void *va = (void *)(unsigned long)virt_addr;
-			if (hugepg_tbl[i].orig_va == va) {
-				hugepg_tbl[i].socket_id = socket_id;
-				hp_count++;
-			}
+	unsigned int i;
+	for (i = 0; i < hpi->num_pages[0]; i++) {
+		if (get_mempolicy(&hugepg_tbl[i].socket_id,
+			NULL, 0, hugepg_tbl[i].orig_va,
+			MPOL_F_NODE | MPOL_F_ADDR) < 0) {
+			RTE_LOG(ERR, EAL, "Failed to find NUMA socket for %p\n",
+				hugepg_tbl[i].orig_va);
+			return -1;
 		}
 	}
-
-	if (hp_count < hpi->num_pages[0])
-		goto error;
-
-	fclose(f);
 	return 0;
-
-error:
-	fclose(f);
-	return -1;
 }
 
 static int
@@ -1051,8 +986,6 @@ rte_eal_hugepage_init(void)
 		}
 
 		if (find_numasocket(&tmp_hp[hp_offset], hpi) < 0){
-			RTE_LOG(DEBUG, EAL, "Failed to find NUMA socket for %u MB pages\n",
-					(unsigned)(hpi->hugepage_sz / 0x100000));
 			goto fail;
 		}
 
diff --git a/mk/exec-env/linuxapp/rte.vars.mk b/mk/exec-env/linuxapp/rte.vars.mk
index 9a71699..0da977a 100644
--- a/mk/exec-env/linuxapp/rte.vars.mk
+++ b/mk/exec-env/linuxapp/rte.vars.mk
@@ -59,5 +59,6 @@ LINK_USING_CC := 1
 EXECENV_LDFLAGS += -export-dynamic
 # Add library to the group to resolve symbols
 EXECENV_LDLIBS  += -ldl
+EXECENV_LDLIBS  += -lnuma
 
 export EXECENV_CFLAGS EXECENV_LDFLAGS EXECENV_ASFLAGS EXECENV_LDLIBS
diff --git a/mk/internal/rte.build-pre.mk b/mk/internal/rte.build-pre.mk
index 560cf82..9bc0ffd 100644
--- a/mk/internal/rte.build-pre.mk
+++ b/mk/internal/rte.build-pre.mk
@@ -29,6 +29,7 @@
 #   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 #   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
+LDLIBS += -lnuma
 _BUILD_TARGETS := _prebuild _build _postbuild
 
 comma := ,
-- 
2.9.3




More information about the dev mailing list