[dpdk-dev] [PATCH 30/41] eal: enable callbacks on malloc/free and mp sync

Anatoly Burakov anatoly.burakov at intel.com
Sat Mar 3 14:46:18 CET 2018


Also, rewrite VFIO to rely on memory callbacks instead of manually
registering memory with VFIO. Callbacks will only be registered if
VFIO is enabled.

Signed-off-by: Anatoly Burakov <anatoly.burakov at intel.com>
---
 lib/librte_eal/common/malloc_heap.c        | 21 +++++++++++++++++
 lib/librte_eal/linuxapp/eal/eal_memalloc.c | 37 +++++++++++++++++++++---------
 lib/librte_eal/linuxapp/eal/eal_vfio.c     | 35 ++++++++++++++++++++++++++++
 3 files changed, 82 insertions(+), 11 deletions(-)

diff --git a/lib/librte_eal/common/malloc_heap.c b/lib/librte_eal/common/malloc_heap.c
index 9109555..9d055c8 100644
--- a/lib/librte_eal/common/malloc_heap.c
+++ b/lib/librte_eal/common/malloc_heap.c
@@ -223,6 +223,7 @@ try_expand_heap_primary(struct malloc_heap *heap, uint64_t pg_sz,
 	void *map_addr;
 	size_t map_len;
 	int n_pages;
+	bool callback_triggered = false;
 
 	map_len = RTE_ALIGN_CEIL(align + elt_size +
 			MALLOC_ELEM_TRAILER_LEN, pg_sz);
@@ -242,14 +243,25 @@ try_expand_heap_primary(struct malloc_heap *heap, uint64_t pg_sz,
 
 	map_addr = ms[0]->addr;
 
+	/* notify user about changes in memory map */
+	eal_memalloc_notify(RTE_MEM_EVENT_ALLOC, map_addr, map_len);
+
 	/* notify other processes that this has happened */
 	if (request_sync()) {
 		/* we couldn't ensure all processes have mapped memory,
 		 * so free it back and notify everyone that it's been
 		 * freed back.
+		 *
+		 * technically, we could've avoided adding memory addresses to
+		 * the map, but that would've led to inconsistent behavior
+		 * between primary and secondary processes, as those get
+		 * callbacks during sync. therefore, force primary process to
+		 * do alloc-and-rollback syncs as well.
 		 */
+		callback_triggered = true;
 		goto free_elem;
 	}
+
 	heap->total_size += map_len;
 
 	RTE_LOG(DEBUG, EAL, "Heap on socket %d was expanded by %zdMB\n",
@@ -260,6 +272,9 @@ try_expand_heap_primary(struct malloc_heap *heap, uint64_t pg_sz,
 	return 0;
 
 free_elem:
+	if (callback_triggered)
+		eal_memalloc_notify(RTE_MEM_EVENT_FREE, map_addr, map_len);
+
 	rollback_expand_heap(ms, n_pages, elem, map_addr, map_len);
 
 	request_sync();
@@ -615,6 +630,10 @@ malloc_heap_free(struct malloc_elem *elem)
 	heap->total_size -= n_pages * msl->hugepage_sz;
 
 	if (rte_eal_process_type() == RTE_PROC_PRIMARY) {
+		/* notify user about changes in memory map */
+		eal_memalloc_notify(RTE_MEM_EVENT_FREE,
+				aligned_start, aligned_len);
+
 		/* don't care if any of this fails */
 		malloc_heap_free_pages(aligned_start, aligned_len);
 
@@ -637,6 +656,8 @@ malloc_heap_free(struct malloc_elem *elem)
 		 * already removed from the heap, so it is, for all intents and
 		 * purposes, hidden from the rest of DPDK even if some other
 		 * process (including this one) may have these pages mapped.
+		 *
+		 * notifications about deallocated memory happen during sync.
 		 */
 		request_to_primary(&req);
 	}
diff --git a/lib/librte_eal/linuxapp/eal/eal_memalloc.c b/lib/librte_eal/linuxapp/eal/eal_memalloc.c
index 227d703..1008fae 100644
--- a/lib/librte_eal/linuxapp/eal/eal_memalloc.c
+++ b/lib/librte_eal/linuxapp/eal/eal_memalloc.c
@@ -34,7 +34,6 @@
 #include <rte_eal.h>
 #include <rte_memory.h>
 #include <rte_spinlock.h>
-#include <rte_vfio.h>
 
 #include "eal_filesystem.h"
 #include "eal_internal_cfg.h"
@@ -480,10 +479,6 @@ alloc_page(struct rte_memseg *ms, void *addr, uint64_t size, int socket_id,
 	ms->iova = iova;
 	ms->socket_id = socket_id;
 
-	/* map the segment so that VFIO has access to it */
-	if (rte_eal_iova_mode() == RTE_IOVA_VA &&
-			rte_vfio_dma_map(ms->addr_64, iova, size))
-		RTE_LOG(DEBUG, EAL, "Cannot register segment with VFIO\n");
 	return 0;
 
 mapped:
@@ -515,12 +510,6 @@ free_page(struct rte_memseg *ms, struct hugepage_info *hi,
 	char path[PATH_MAX];
 	int fd, ret;
 
-	/* unmap the segment from VFIO */
-	if (rte_eal_iova_mode() == RTE_IOVA_VA &&
-			rte_vfio_dma_unmap(ms->addr_64, ms->iova, ms->len)) {
-		RTE_LOG(DEBUG, EAL, "Cannot unregister segment with VFIO\n");
-	}
-
 	if (mmap(ms->addr, ms->hugepage_sz, PROT_READ,
 			MAP_PRIVATE | MAP_ANONYMOUS | MAP_FIXED, -1, 0) ==
 				MAP_FAILED) {
@@ -808,6 +797,19 @@ sync_chunk(struct rte_memseg_list *primary_msl,
 
 	diff_len = RTE_MIN(chunk_len, diff_len);
 
+	/* if we are freeing memory, notif the application */
+	if (!used) {
+		struct rte_memseg *ms;
+		void *start_va;
+		size_t len;
+
+		ms = rte_fbarray_get(l_arr, start);
+		start_va = ms->addr;
+		len = ms->len * diff_len;
+
+		eal_memalloc_notify(RTE_MEM_EVENT_FREE, start_va, len);
+	}
+
 	for (i = 0; i < diff_len; i++) {
 		struct rte_memseg *p_ms, *l_ms;
 		int seg_idx = start + i;
@@ -834,6 +836,19 @@ sync_chunk(struct rte_memseg_list *primary_msl,
 		}
 	}
 
+	/* if we just allocated memory, notify the application */
+	if (used) {
+		struct rte_memseg *ms;
+		void *start_va;
+		size_t len;
+
+		ms = rte_fbarray_get(l_arr, start);
+		start_va = ms->addr;
+		len = ms->len * diff_len;
+
+		eal_memalloc_notify(RTE_MEM_EVENT_ALLOC, start_va, len);
+	}
+
 	/* calculate how much we can advance until next chunk */
 	diff_len = used ?
 			rte_fbarray_find_contig_used(l_arr, start) :
diff --git a/lib/librte_eal/linuxapp/eal/eal_vfio.c b/lib/librte_eal/linuxapp/eal/eal_vfio.c
index 8fe8984..d3c3b70 100644
--- a/lib/librte_eal/linuxapp/eal/eal_vfio.c
+++ b/lib/librte_eal/linuxapp/eal/eal_vfio.c
@@ -214,6 +214,37 @@ vfio_group_device_count(int vfio_group_fd)
 	return vfio_cfg.vfio_groups[i].devices;
 }
 
+static void
+vfio_mem_event_callback(enum rte_mem_event type, const void *addr, size_t len)
+{
+	struct rte_memseg_list *msl;
+	struct rte_memseg *ms;
+	size_t cur_len = 0;
+	uint64_t pgsz;
+
+	msl = rte_mem_virt2memseg_list(addr);
+	pgsz = msl->hugepage_sz;
+
+	while (cur_len < len) {
+		const void *va = RTE_PTR_ADD(addr, cur_len);
+		uint64_t vfio_va, iova;
+
+		ms = rte_mem_virt2memseg(va, msl);
+		vfio_va = (uint64_t) (uintptr_t) va;
+		iova = ms->iova;
+
+		/* this never gets called in legacy mode, so we can be sure that
+		 * each segment is a single page.
+		 */
+		if (type == RTE_MEM_EVENT_ALLOC)
+			rte_vfio_dma_map(vfio_va, iova, pgsz);
+		else
+			rte_vfio_dma_unmap(vfio_va, iova, pgsz);
+
+		cur_len += pgsz;
+	}
+}
+
 int
 rte_vfio_clear_group(int vfio_group_fd)
 {
@@ -507,6 +538,10 @@ rte_vfio_enable(const char *modname)
 	if (vfio_cfg.vfio_container_fd != -1) {
 		RTE_LOG(NOTICE, EAL, "VFIO support initialized\n");
 		vfio_cfg.vfio_enabled = 1;
+
+		/* register callback for mem events */
+		rte_mem_event_register_callback("vfio_mem_event_clb",
+				vfio_mem_event_callback);
 	} else {
 		RTE_LOG(NOTICE, EAL, "VFIO support could not be initialized\n");
 	}
-- 
2.7.4


More information about the dev mailing list