[dpdk-dev,15/41] eal: add support for unmapping pages at runtime

Message ID 421887e2d97e0bf14106cf23ad65e99cad670845.1520083504.git.anatoly.burakov@intel.com (mailing list archive)
State Superseded, archived
Headers

Checks

Context Check Description
ci/checkpatch success coding style OK
ci/Intel-compilation fail apply patch file failure

Commit Message

Anatoly Burakov March 3, 2018, 1:46 p.m. UTC
  This isn't used anywhere yet, but the support is now there. Also,
adding cleanup to allocation procedures, so that if we fail to
allocate everything we asked for, we can free all of it back.

Signed-off-by: Anatoly Burakov <anatoly.burakov@intel.com>
---
 lib/librte_eal/common/eal_memalloc.h       |   3 +
 lib/librte_eal/linuxapp/eal/eal_memalloc.c | 148 ++++++++++++++++++++++++++++-
 2 files changed, 146 insertions(+), 5 deletions(-)
  

Patch

diff --git a/lib/librte_eal/common/eal_memalloc.h b/lib/librte_eal/common/eal_memalloc.h
index c1076cf..adf59c4 100644
--- a/lib/librte_eal/common/eal_memalloc.h
+++ b/lib/librte_eal/common/eal_memalloc.h
@@ -16,4 +16,7 @@  int
 eal_memalloc_alloc_page_bulk(struct rte_memseg **ms, int n, uint64_t size,
 		int socket, bool exact);
 
+int
+eal_memalloc_free_page(struct rte_memseg *ms);
+
 #endif // EAL_MEMALLOC_H
diff --git a/lib/librte_eal/linuxapp/eal/eal_memalloc.c b/lib/librte_eal/linuxapp/eal/eal_memalloc.c
index 1ba1201..bbeeeba 100644
--- a/lib/librte_eal/linuxapp/eal/eal_memalloc.c
+++ b/lib/librte_eal/linuxapp/eal/eal_memalloc.c
@@ -499,6 +499,64 @@  alloc_page(struct rte_memseg *ms, void *addr, uint64_t size, int socket_id,
 	return -1;
 }
 
+static int
+free_page(struct rte_memseg *ms, struct hugepage_info *hi,
+		unsigned int list_idx, unsigned int seg_idx)
+{
+	uint64_t map_offset;
+	char path[PATH_MAX];
+	int fd, ret;
+
+	if (mmap(ms->addr, ms->hugepage_sz, PROT_READ,
+			MAP_PRIVATE | MAP_ANONYMOUS | MAP_FIXED, -1, 0) ==
+				MAP_FAILED) {
+		RTE_LOG(DEBUG, EAL, "couldn't unmap page\n");
+		return -1;
+	}
+
+	fd = get_page_fd(path, sizeof(path), hi, list_idx, seg_idx);
+	if (fd < 0)
+		return -1;
+
+	if (internal_config.single_file_segments) {
+		map_offset = seg_idx * ms->hugepage_sz;
+		if (resize_hugefile(fd, map_offset, ms->hugepage_sz, false))
+			return -1;
+		/* if file is zero-length, we've already shrunk it, so it's
+		 * safe to remove.
+		 */
+		if (is_zero_length(fd)) {
+			struct msl_entry *te = get_msl_entry_by_idx(list_idx);
+			if (te != NULL && te->fd >= 0) {
+				close(te->fd);
+				te->fd = -1;
+			}
+			unlink(path);
+		}
+		ret = 0;
+	} else {
+		/* if we're able to take out a write lock, we're the last one
+		 * holding onto this page.
+		 */
+
+		ret = lock(fd, 0, ms->hugepage_sz, F_WRLCK);
+		if (ret >= 0) {
+			/* no one else is using this page */
+			if (ret == 1)
+				unlink(path);
+			ret = lock(fd, 0, ms->hugepage_sz, F_UNLCK);
+			if (ret != 1)
+				RTE_LOG(ERR, EAL, "%s(): unable to unlock file %s\n",
+					__func__, path);
+		}
+		close(fd);
+	}
+
+	memset(ms, 0, sizeof(*ms));
+
+	return ret;
+}
+
 int
 eal_memalloc_alloc_page_bulk(struct rte_memseg **ms, int n,
 		uint64_t size, int socket, bool exact)
@@ -507,7 +565,7 @@  eal_memalloc_alloc_page_bulk(struct rte_memseg **ms, int n,
 	struct rte_memseg_list *msl = NULL;
 	void *addr;
 	unsigned int msl_idx;
-	int cur_idx, end_idx, i, ret = -1;
+	int cur_idx, start_idx, end_idx, i, j, ret = -1;
 #ifdef RTE_EAL_NUMA_AWARE_HUGEPAGES
 	bool have_numa;
 	int oldpolicy;
@@ -557,6 +615,7 @@  eal_memalloc_alloc_page_bulk(struct rte_memseg **ms, int n,
 			continue;
 
 		end_idx = cur_idx + n;
+		start_idx = cur_idx;
 
 		for (i = 0; cur_idx < end_idx; cur_idx++, i++) {
 			struct rte_memseg *cur;
@@ -567,25 +626,56 @@  eal_memalloc_alloc_page_bulk(struct rte_memseg **ms, int n,
 
 			if (alloc_page(cur, addr, size, socket, hi, msl_idx,
 					cur_idx)) {
+
 				RTE_LOG(DEBUG, EAL, "attempted to allocate %i pages, but only %i were allocated\n",
 					n, i);
 
-				/* if exact number wasn't requested, stop */
-				if (!exact)
+				/* if exact number of pages wasn't requested,
+				 * failing to allocate is not an error. we could
+				 * of course try other lists to see if there are
+				 * better fits, but a bird in the hand...
+				 */
+				if (!exact) {
 					ret = i;
-				goto restore_numa;
+					goto restore_numa;
+				}
+				RTE_LOG(DEBUG, EAL, "exact amount of pages was requested, so returning %i allocated pages\n",
+					i);
+
+				/* clean up */
+				for (j = start_idx; j < cur_idx; j++) {
+					struct rte_memseg *tmp;
+					struct rte_fbarray *arr =
+							&msl->memseg_arr;
+
+					tmp = rte_fbarray_get(arr, j);
+					if (free_page(tmp, hi, msl_idx,
+							start_idx + j))
+						rte_panic("Cannot free page\n");
+
+					rte_fbarray_set_free(arr, j);
+				}
+				/* clear the list */
+				if (ms)
+					memset(ms, 0, sizeof(*ms) * n);
+
+				/* try next list */
+				goto next_list;
 			}
 			if (ms)
 				ms[i] = cur;
 
 			rte_fbarray_set_used(&msl->memseg_arr, cur_idx);
 		}
+		/* we allocated all pages */
 		ret = n;
 
 		break;
+next_list:
+		/* dummy semi-colon to make label work */;
 	}
 	/* we didn't break */
-	if (!msl) {
+	if (msl_idx == RTE_MAX_MEMSEG_LISTS) {
 		RTE_LOG(ERR, EAL, "%s(): couldn't find suitable memseg_list\n",
 			__func__);
 	}
@@ -607,3 +697,51 @@  eal_memalloc_alloc_page(uint64_t size, int socket)
 	/* return pointer to newly allocated memseg */
 	return ms;
 }
+
+int
+eal_memalloc_free_page(struct rte_memseg *ms)
+{
+	struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config;
+	struct rte_memseg_list *msl = NULL;
+	unsigned int msl_idx, seg_idx;
+	struct hugepage_info *hi = NULL;
+	int i;
+
+	/* dynamic free not supported in legacy mode */
+	if (internal_config.legacy_mem)
+		return -1;
+
+	for (i = 0; i < (int) RTE_DIM(internal_config.hugepage_info); i++) {
+		if (ms->hugepage_sz ==
+				internal_config.hugepage_info[i].hugepage_sz) {
+			hi = &internal_config.hugepage_info[i];
+			break;
+		}
+	}
+	if (!hi) {
+		RTE_LOG(ERR, EAL, "Can't find relevant hugepage_info entry\n");
+		return -1;
+	}
+
+	for (msl_idx = 0; msl_idx < RTE_MAX_MEMSEG_LISTS; msl_idx++) {
+		uintptr_t start_addr, end_addr;
+		struct rte_memseg_list *cur = &mcfg->memsegs[msl_idx];
+
+		start_addr = (uintptr_t) cur->base_va;
+		end_addr = start_addr + cur->memseg_arr.len * cur->hugepage_sz;
+
+		if ((uintptr_t) ms->addr < start_addr ||
+				(uintptr_t) ms->addr >= end_addr) {
+			continue;
+		}
+		msl = cur;
+		seg_idx = RTE_PTR_DIFF(ms->addr, start_addr) / ms->hugepage_sz;
+		break;
+	}
+	if (!msl) {
+		RTE_LOG(ERR, EAL, "Couldn't find memseg list\n");
+		return -1;
+	}
+	rte_fbarray_set_free(&msl->memseg_arr, seg_idx);
+	return free_page(ms, hi, msl_idx, seg_idx);
+}