[dpdk-dev,RFC,v2,14/23] eal: add support for dynamic unmapping of pages

Message ID bc77dcd45d3d4ca301fc4753bec25e5db93e94ec.1513681966.git.anatoly.burakov@intel.com (mailing list archive)
State Superseded, archived
Headers

Checks

Context Check Description
ci/checkpatch warning coding style issues
ci/Intel-compilation fail Compilation issues

Commit Message

Burakov, Anatoly Dec. 19, 2017, 11:14 a.m. UTC
  This isn't used anywhere yet, but the support is now there. Also,
adding cleanup to allocation procedures, so that if we fail to
allocate everything we asked for, we can free all of it back.

Signed-off-by: Anatoly Burakov <anatoly.burakov@intel.com>
---
 lib/librte_eal/common/eal_memalloc.h       |   3 +
 lib/librte_eal/linuxapp/eal/eal_memalloc.c | 131 ++++++++++++++++++++++++++++-
 2 files changed, 133 insertions(+), 1 deletion(-)
  

Patch

diff --git a/lib/librte_eal/common/eal_memalloc.h b/lib/librte_eal/common/eal_memalloc.h
index 59fd330..47e4367 100755
--- a/lib/librte_eal/common/eal_memalloc.h
+++ b/lib/librte_eal/common/eal_memalloc.h
@@ -44,4 +44,7 @@  int
 eal_memalloc_alloc_page_bulk(struct rte_memseg **ms, int n, uint64_t size,
 		int socket, bool exact);
 
+int
+eal_memalloc_free_page(struct rte_memseg *ms);
+
 #endif // EAL_MEMALLOC_H
diff --git a/lib/librte_eal/linuxapp/eal/eal_memalloc.c b/lib/librte_eal/linuxapp/eal/eal_memalloc.c
index 527c2f6..13172a0 100755
--- a/lib/librte_eal/linuxapp/eal/eal_memalloc.c
+++ b/lib/librte_eal/linuxapp/eal/eal_memalloc.c
@@ -109,6 +109,18 @@  huge_recover_sigbus(void)
 	}
 }
 
+/*
+ * uses fstat to report the size of a file on disk
+ */
+static bool
+is_zero_length(int fd)
+{
+	struct stat st;
+	if (fstat(fd, &st) < 0)
+		return false;
+	return st.st_blocks == 0;
+}
+
 #ifdef RTE_EAL_NUMA_AWARE_HUGEPAGES
 static bool
 prepare_numa(int *oldpolicy, struct bitmask *oldmask, int socket_id) {
@@ -267,6 +279,61 @@  alloc_page(struct rte_memseg *ms, void *addr, uint64_t size, int socket_id,
 	return ret;
 }
 
+static int
+free_page(struct rte_memseg *ms, struct hugepage_info *hi, unsigned list_idx,
+		unsigned seg_idx) {
+	uint64_t fa_offset;
+	char path[PATH_MAX];
+	int fd;
+
+	fa_offset = seg_idx * ms->hugepage_sz;
+
+	if (internal_config.single_file_segments) {
+		eal_get_hugefile_path(path, sizeof(path), hi->hugedir, list_idx);
+	} else {
+		eal_get_hugefile_path(path, sizeof(path), hi->hugedir,
+				list_idx * RTE_MAX_MEMSEG_PER_LIST + seg_idx);
+	}
+
+	munmap(ms->addr, ms->hugepage_sz);
+
+	// TODO: race condition?
+
+	if (mmap(ms->addr, ms->hugepage_sz, PROT_READ,
+			MAP_PRIVATE | MAP_ANONYMOUS | MAP_FIXED, -1, 0) ==
+				MAP_FAILED) {
+		RTE_LOG(DEBUG, EAL, "couldn't unmap page\n");
+		return -1;
+	}
+
+	if (internal_config.single_file_segments) {
+		/* now, truncate or remove the original file */
+		fd = open(path, O_RDWR, 0600);
+		if (fd < 0) {
+			RTE_LOG(DEBUG, EAL, "%s(): open failed: %s\n", __func__,
+					strerror(errno));
+			// TODO: proper error handling
+			return -1;
+		}
+
+		if (fallocate(fd, FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE,
+				fa_offset, ms->hugepage_sz)) {
+			RTE_LOG(DEBUG, EAL, "Page deallocation failed: %s\n",
+				strerror(errno));
+		}
+		if (is_zero_length(fd)) {
+			unlink(path);
+		}
+		close(fd);
+	} else {
+		unlink(path);
+	}
+
+	memset(ms, 0, sizeof(*ms));
+
+	return 0;
+}
+
 int
 eal_memalloc_alloc_page_bulk(struct rte_memseg **ms, int n,
 		uint64_t size, int socket, bool exact) {
@@ -274,7 +341,7 @@  eal_memalloc_alloc_page_bulk(struct rte_memseg **ms, int n,
 	struct rte_memseg_list *msl = NULL;
 	void *addr;
 	unsigned msl_idx;
-	int cur_idx, next_idx, end_idx, i, ret = 0;
+	int cur_idx, next_idx, start_idx, end_idx, i, j, ret = 0;
 #ifdef RTE_EAL_NUMA_AWARE_HUGEPAGES
 	bool have_numa;
 	int oldpolicy;
@@ -366,6 +433,7 @@  eal_memalloc_alloc_page_bulk(struct rte_memseg **ms, int n,
 	}
 
 	end_idx = cur_idx + n;
+	start_idx = cur_idx;
 
 #ifdef RTE_EAL_NUMA_AWARE_HUGEPAGES
 	have_numa = prepare_numa(&oldpolicy, oldmask, socket);
@@ -387,6 +455,20 @@  eal_memalloc_alloc_page_bulk(struct rte_memseg **ms, int n,
 				ret = i;
 				goto restore_numa;
 			}
+			RTE_LOG(DEBUG, EAL, "exact amount of pages was requested, so returning %i allocated pages\n",
+				i);
+
+			/* clean up */
+			for (j = start_idx; j < cur_idx; j++) {
+				struct rte_memseg *tmp;
+				struct rte_fbarray *arr = &msl->memseg_arr;
+
+				tmp = rte_fbarray_get(arr, j);
+				if (free_page(tmp, hi, msl_idx, start_idx + j))
+					rte_panic("Cannot free page\n");
+
+				rte_fbarray_set_used(arr, j, false);
+			}
 			if (ms)
 				memset(ms, 0, sizeof(struct rte_memseg*) * n);
 			ret = -1;
@@ -414,3 +496,50 @@  eal_memalloc_alloc_page(uint64_t size, int socket) {
 		return NULL;
 	return ms;
 }
+
+int
+eal_memalloc_free_page(struct rte_memseg *ms) {
+	struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config;
+	struct rte_memseg_list *msl = NULL;
+	unsigned msl_idx, seg_idx;
+	struct hugepage_info *hi = NULL;
+
+	/* dynamic free not supported in legacy mode */
+	if (internal_config.legacy_mem)
+		return -1;
+
+	for (int i = 0; i < (int) RTE_DIM(internal_config.hugepage_info); i++) {
+		if (ms->hugepage_sz ==
+				internal_config.hugepage_info[i].hugepage_sz) {
+			hi = &internal_config.hugepage_info[i];
+			break;
+		}
+	}
+	if (!hi) {
+		RTE_LOG(ERR, EAL, "Can't find relevant hugepage_info entry\n");
+		return -1;
+	}
+
+	for (msl_idx = 0; msl_idx < RTE_MAX_MEMSEG_LISTS; msl_idx++) {
+		uintptr_t start_addr, end_addr;
+		struct rte_memseg_list *cur = &mcfg->memsegs[msl_idx];
+
+		start_addr = (uintptr_t) cur->base_va;
+		end_addr = start_addr +
+				cur->memseg_arr.capacity * cur->hugepage_sz;
+
+		if ((uintptr_t) ms->addr < start_addr ||
+				(uintptr_t) ms->addr >= end_addr) {
+			continue;
+		}
+		msl = cur;
+		seg_idx = RTE_PTR_DIFF(ms->addr, start_addr) / ms->hugepage_sz;
+		break;
+	}
+	if (!msl) {
+		RTE_LOG(ERR, EAL, "Couldn't find memseg list\n");
+		return -1;
+	}
+	rte_fbarray_set_used(&msl->memseg_arr, seg_idx, false);
+	return free_page(ms, hi, msl_idx, seg_idx);
+}