@@ -90,6 +90,10 @@ Memory-related options
Unlink hugepage files after creating them (implies no secondary process
support).
+* ``--match-allocations``
+
+ Free hugepages back to system exactly as they were originally allocated.
+
Other options
~~~~~~~~~~~~~
@@ -169,6 +169,20 @@ not allow acquiring or releasing hugepages from the system at runtime.
If neither ``-m`` nor ``--socket-mem`` were specified, the entire available
hugepage memory will be preallocated.
++ Hugepage allocation matching
+
+This behavior is enabled by specifying the ``--match-allocations`` command-line
+switch to the EAL. This switch is Linux-only and not supported with
+``--legacy-mem`` nor ``--no-huge``.
+
+Some applications using memory event callbacks may require that hugepages be
+freed exactly as they were allocated. These applications may also require
+that any allocation from the malloc heap not span across allocations
+associated with two different memory event callbacks. Hugepage allocation
+matching can be used by these types of applications to satisfy both of these
+requirements. This can result in some increased memory usage which is
+very dependent on the memory allocation patterns of the application.
+
+ 32-bit support
Additional restrictions are present when running in 32-bit mode. In dynamic
@@ -60,6 +60,14 @@ New Features
* Added the handler to get firmware version string.
* Added support for multicast filtering.
+* **Added support to free hugepages exactly as originally allocated.**
+
+ Some applications using memory event callbacks (especially for managing
+ RDMA memory regions) require that memory be freed back to the system
+ exactly as it was originally allocated. These applications typically
+ also require that a malloc allocation not span across two separate
+ hugepage allocations. A new ``--match-allocations`` EAL init flag has
+ been added to fulfill both of these requirements.
Removed Items
-------------
@@ -79,6 +79,7 @@ eal_long_options[] = {
{OPT_VMWARE_TSC_MAP, 0, NULL, OPT_VMWARE_TSC_MAP_NUM },
{OPT_LEGACY_MEM, 0, NULL, OPT_LEGACY_MEM_NUM },
{OPT_SINGLE_FILE_SEGMENTS, 0, NULL, OPT_SINGLE_FILE_SEGMENTS_NUM},
+ {OPT_MATCH_ALLOCATIONS, 0, NULL, OPT_MATCH_ALLOCATIONS_NUM},
{0, 0, NULL, 0 }
};
@@ -1424,6 +1425,16 @@ eal_check_common_options(struct internal_config *internal_cfg)
"with --"OPT_IN_MEMORY"\n");
return -1;
}
+ if (internal_cfg->legacy_mem && internal_cfg->match_allocations) {
+ RTE_LOG(ERR, EAL, "Option --"OPT_LEGACY_MEM" is not compatible "
+ "with --"OPT_MATCH_ALLOCATIONS"\n");
+ return -1;
+ }
+ if (internal_cfg->no_hugetlbfs && internal_cfg->match_allocations) {
+ RTE_LOG(ERR, EAL, "Option --"OPT_NO_HUGE" is not compatible "
+ "with --"OPT_MATCH_ALLOCATIONS"\n");
+ return -1;
+ }
return 0;
}
@@ -57,6 +57,8 @@ struct internal_config {
/**< true to enable legacy memory behavior (no dynamic allocation,
* IOVA-contiguous segments).
*/
+ volatile unsigned match_allocations;
+ /**< true to free hugepages exactly as allocated */
volatile unsigned single_file_segments;
/**< true if storing all pages within single files (per-page-size,
* per-node) non-legacy mode only.
@@ -65,6 +65,8 @@ enum {
OPT_SINGLE_FILE_SEGMENTS_NUM,
#define OPT_IOVA_MODE "iova-mode"
OPT_IOVA_MODE_NUM,
+#define OPT_MATCH_ALLOCATIONS "match-allocations"
+ OPT_MATCH_ALLOCATIONS_NUM,
OPT_LONG_MAX_NUM
};
@@ -110,7 +110,8 @@ malloc_elem_find_max_iova_contig(struct malloc_elem *elem, size_t align)
*/
void
malloc_elem_init(struct malloc_elem *elem, struct malloc_heap *heap,
- struct rte_memseg_list *msl, size_t size)
+ struct rte_memseg_list *msl, size_t size,
+ struct malloc_elem *orig_elem, size_t orig_size)
{
elem->heap = heap;
elem->msl = msl;
@@ -120,6 +121,8 @@ malloc_elem_init(struct malloc_elem *elem, struct malloc_heap *heap,
elem->state = ELEM_FREE;
elem->size = size;
elem->pad = 0;
+ elem->orig_elem = orig_elem;
+ elem->orig_size = orig_size;
set_header(elem);
set_trailer(elem);
}
@@ -278,7 +281,8 @@ split_elem(struct malloc_elem *elem, struct malloc_elem *split_pt)
const size_t old_elem_size = (uintptr_t)split_pt - (uintptr_t)elem;
const size_t new_elem_size = elem->size - old_elem_size;
- malloc_elem_init(split_pt, elem->heap, elem->msl, new_elem_size);
+ malloc_elem_init(split_pt, elem->heap, elem->msl, new_elem_size,
+ elem->orig_elem, elem->orig_size);
split_pt->prev = elem;
split_pt->next = next_elem;
if (next_elem)
@@ -317,14 +321,18 @@ static int
next_elem_is_adjacent(struct malloc_elem *elem)
{
return elem->next == RTE_PTR_ADD(elem, elem->size) &&
- elem->next->msl == elem->msl;
+ elem->next->msl == elem->msl &&
+ (!internal_config.match_allocations ||
+ elem->orig_elem == elem->next->orig_elem);
}
static int
prev_elem_is_adjacent(struct malloc_elem *elem)
{
return elem == RTE_PTR_ADD(elem->prev, elem->prev->size) &&
- elem->prev->msl == elem->msl;
+ elem->prev->msl == elem->msl &&
+ (!internal_config.match_allocations ||
+ elem->orig_elem == elem->prev->orig_elem);
}
/*
@@ -32,6 +32,8 @@ struct malloc_elem {
volatile enum elem_state state;
uint32_t pad;
size_t size;
+ struct malloc_elem *orig_elem;
+ size_t orig_size;
#ifdef RTE_MALLOC_DEBUG
uint64_t header_cookie; /* Cookie marking start of data */
/* trailer cookie at start + size */
@@ -116,7 +118,9 @@ void
malloc_elem_init(struct malloc_elem *elem,
struct malloc_heap *heap,
struct rte_memseg_list *msl,
- size_t size);
+ size_t size,
+ struct malloc_elem *orig_elem,
+ size_t orig_size);
void
malloc_elem_insert(struct malloc_elem *elem);
@@ -94,7 +94,7 @@ malloc_heap_add_memory(struct malloc_heap *heap, struct rte_memseg_list *msl,
{
struct malloc_elem *elem = start;
- malloc_elem_init(elem, heap, msl, len);
+ malloc_elem_init(elem, heap, msl, len, elem, len);
malloc_elem_insert(elem);
@@ -857,6 +857,13 @@ malloc_heap_free(struct malloc_elem *elem)
if (elem->size < page_sz)
goto free_unlock;
+ /* if user requested to match allocations, the sizes must match - if not,
+ * we will defer freeing these hugepages until the entire original allocation
+ * can be freed
+ */
+ if (internal_config.match_allocations && elem->size != elem->orig_size)
+ goto free_unlock;
+
/* probably, but let's make sure, as we may not be using up full page */
start = elem;
len = elem->size;
@@ -1259,6 +1266,10 @@ rte_eal_malloc_heap_init(void)
struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config;
unsigned int i;
+ if (internal_config.match_allocations) {
+ RTE_LOG(DEBUG, EAL, "Hugepages will be freed exactly as allocated.\n");
+ }
+
if (rte_eal_process_type() == RTE_PROC_PRIMARY) {
/* assign min socket ID to external heaps */
mcfg->next_socket_id = EXTERNAL_HEAP_MIN_SOCKET_ID;
@@ -431,6 +431,7 @@ eal_usage(const char *prgname)
" --"OPT_VFIO_INTR" Interrupt mode for VFIO (legacy|msi|msix)\n"
" --"OPT_LEGACY_MEM" Legacy memory mode (no dynamic allocation, contiguous segments)\n"
" --"OPT_SINGLE_FILE_SEGMENTS" Put all hugepage memory in single files\n"
+ " --"OPT_MATCH_ALLOCATIONS" Free hugepages exactly as allocated\n"
"\n");
/* Allow the application to print its usage message too if hook is set */
if ( rte_application_usage_hook ) {
@@ -699,6 +700,10 @@ eal_parse_args(int argc, char **argv)
strdup(optarg);
break;
+ case OPT_MATCH_ALLOCATIONS_NUM:
+ internal_config.match_allocations = 1;
+ break;
+
default:
if (opt < OPT_LONG_MIN_NUM && isprint(opt)) {
RTE_LOG(ERR, EAL, "Option %c is not supported "
@@ -262,13 +262,26 @@ test_multi_alloc_statistics(void)
struct rte_malloc_socket_stats pre_stats, post_stats ,first_stats, second_stats;
size_t size = 2048;
int align = 1024;
-#ifndef RTE_MALLOC_DEBUG
- int trailer_size = 0;
-#else
- int trailer_size = RTE_CACHE_LINE_SIZE;
-#endif
- int overhead = RTE_CACHE_LINE_SIZE + trailer_size;
+ int overhead = 0;
+
+ /* Dynamically calculate the overhead by allocating one cacheline and
+ * then comparing what was allocated from the heap.
+ */
+ rte_malloc_get_socket_stats(socket, &pre_stats);
+
+ void *dummy = rte_malloc_socket(NULL, RTE_CACHE_LINE_SIZE, 0, socket);
+ if (dummy == NULL)
+ return -1;
+
+ rte_malloc_get_socket_stats(socket, &post_stats);
+
+ /* after subtracting cache line, remainder is overhead */
+ overhead = post_stats.heap_allocsz_bytes - pre_stats.heap_allocsz_bytes;
+ overhead -= RTE_CACHE_LINE_SIZE;
+
+ rte_free(dummy);
+ /* Now start the real tests */
rte_malloc_get_socket_stats(socket, &pre_stats);
void *p1 = rte_malloc_socket("stats", size , align, socket);