@@ -11,21 +11,6 @@ API and ABI deprecation notices are to be posted here.
Deprecation Notices
-------------------
-* eal: certain structures will change in EAL on account of upcoming external
- memory support. Aside from internal changes leading to an ABI break, the
- following externally visible changes will also be implemented:
-
- - ``rte_memseg_list`` will change to include a boolean flag indicating
- whether a particular memseg list is externally allocated. This will have
- implications for any users of memseg-walk-related functions, as they will
- now have to skip externally allocated segments in most cases if the intent
- is to only iterate over internal DPDK memory.
- - ``socket_id`` parameter across the entire DPDK will gain additional meaning,
- as some socket ID's will now be representing externally allocated memory. No
- changes will be required for existing code as backwards compatibility will
- be kept, and those who do not use this feature will not see these extra
- socket ID's.
-
* eal: both declaring and identifying devices will be streamlined in v18.11.
New functions will appear to query a specific port from buses, classes of
device and device drivers. Device declaration will be made coherent with the
@@ -102,6 +102,12 @@ API Changes
Also, make sure to start the actual text at the margin.
=========================================================
+* eal: ``rte_memseg_list`` structure now has an additional flag indicating
+ whether the memseg list is externally allocated. This will have implications
+ for any users of memseg-walk-related functions, as they will now have to skip
+ externally allocated segments in most cases if the intent is to only iterate
+ over internal DPDK memory.
+
* mbuf: The ``__rte_mbuf_raw_free()`` and ``__rte_pktmbuf_prefree_seg()``
functions were deprecated since 17.05 and are replaced by
``rte_mbuf_raw_free()`` and ``rte_pktmbuf_prefree_seg()``.
@@ -118,7 +124,6 @@ API Changes
To request keeping CRC, application should set ``DEV_RX_OFFLOAD_KEEP_CRC`` Rx
offload.
-
ABI Changes
-----------
@@ -138,6 +143,8 @@ ABI Changes
supporting external memory in DPDK:
- structure ``rte_memseg_list`` now has a new field indicating length
of memory addressed by the segment list
+ - structure ``rte_memseg_list`` now has a new flag indicating whether
+ the memseg list refers to external memory
Removed Items
@@ -318,11 +318,15 @@ fslmc_unmap_dma(uint64_t vaddr, uint64_t iovaddr __rte_unused, size_t len)
static int
fslmc_dmamap_seg(const struct rte_memseg_list *msl __rte_unused,
- const struct rte_memseg *ms, void *arg)
+ const struct rte_memseg *ms, void *arg)
{
int *n_segs = arg;
int ret;
+ /* if IOVA address is invalid, skip */
+ if (ms->iova == RTE_BAD_IOVA)
+ return 0;
+
ret = fslmc_map_dma(ms->addr_64, ms->iova, ms->len);
if (ret)
DPAA2_BUS_ERR("Unable to VFIO map (addr=%p, len=%zu)",
@@ -568,11 +568,13 @@ static struct rte_pci_driver mlx5_driver;
static void *uar_base;
static int
-find_lower_va_bound(const struct rte_memseg_list *msl __rte_unused,
+find_lower_va_bound(const struct rte_memseg_list *msl,
const struct rte_memseg *ms, void *arg)
{
void **addr = arg;
+ if (msl->external)
+ return 0;
if (*addr == NULL)
*addr = ms->addr;
else
@@ -78,6 +78,9 @@ add_memseg_list(const struct rte_memseg_list *msl, void *arg)
void *start_addr;
uint64_t len;
+ if (msl->external)
+ return 0;
+
if (vm->nregions >= max_regions)
return -1;
@@ -502,6 +502,9 @@ check_socket(const struct rte_memseg_list *msl, void *arg)
{
int *socket_id = arg;
+ if (msl->external)
+ return 0;
+
if (msl->socket_id == *socket_id && msl->memseg_arr.count != 0)
return 1;
@@ -236,12 +236,15 @@ struct attach_walk_args {
int seg_idx;
};
static int
-attach_segment(const struct rte_memseg_list *msl __rte_unused,
- const struct rte_memseg *ms, void *arg)
+attach_segment(const struct rte_memseg_list *msl, const struct rte_memseg *ms,
+ void *arg)
{
struct attach_walk_args *wa = arg;
void *addr;
+ if (msl->external)
+ return 0;
+
addr = mmap(ms->addr, ms->len, PROT_READ | PROT_WRITE,
MAP_SHARED | MAP_FIXED, wa->fd_hugepage,
wa->seg_idx * EAL_PAGE_SIZE);
@@ -272,6 +272,9 @@ physmem_size(const struct rte_memseg_list *msl, void *arg)
{
uint64_t *total_len = arg;
+ if (msl->external)
+ return 0;
+
*total_len += msl->memseg_arr.count * msl->page_sz;
return 0;
@@ -33,6 +33,7 @@ struct rte_memseg_list {
size_t len; /**< Length of memory area covered by this memseg list. */
int socket_id; /**< Socket ID for all memsegs in this list. */
uint64_t page_sz; /**< Page size for all memsegs in this list. */
+ unsigned int external; /**< 1 if this list points to external memory */
volatile uint32_t version; /**< version number for multiprocess sync. */
struct rte_fbarray memseg_arr;
};
@@ -215,6 +215,9 @@ typedef int (*rte_memseg_list_walk_t)(const struct rte_memseg_list *msl,
* @note This function read-locks the memory hotplug subsystem, and thus cannot
* be used within memory-related callback functions.
*
+ * @note This function will also walk through externally allocated segments. It
+ * is up to the user to decide whether to skip through these segments.
+ *
* @param func
* Iterator function
* @param arg
@@ -233,6 +236,9 @@ rte_memseg_walk(rte_memseg_walk_t func, void *arg);
* @note This function read-locks the memory hotplug subsystem, and thus cannot
* be used within memory-related callback functions.
*
+ * @note This function will also walk through externally allocated segments. It
+ * is up to the user to decide whether to skip through these segments.
+ *
* @param func
* Iterator function
* @param arg
@@ -251,6 +257,9 @@ rte_memseg_contig_walk(rte_memseg_contig_walk_t func, void *arg);
* @note This function read-locks the memory hotplug subsystem, and thus cannot
* be used within memory-related callback functions.
*
+ * @note This function will also walk through externally allocated segments. It
+ * is up to the user to decide whether to skip through these segments.
+ *
* @param func
* Iterator function
* @param arg
@@ -39,10 +39,14 @@ malloc_elem_find_max_iova_contig(struct malloc_elem *elem, size_t align)
contig_seg_start = RTE_PTR_ALIGN_CEIL(data_start, align);
/* if we're in IOVA as VA mode, or if we're in legacy mode with
- * hugepages, all elements are IOVA-contiguous.
+ * hugepages, all elements are IOVA-contiguous. however, we can only
+ * make these assumptions about internal memory - externally allocated
+ * segments have to be checked.
*/
- if (rte_eal_iova_mode() == RTE_IOVA_VA ||
- (internal_config.legacy_mem && rte_eal_has_hugepages()))
+ if (!elem->msl->external &&
+ (rte_eal_iova_mode() == RTE_IOVA_VA ||
+ (internal_config.legacy_mem &&
+ rte_eal_has_hugepages())))
return RTE_PTR_DIFF(data_end, contig_seg_start);
cur_page = RTE_PTR_ALIGN_FLOOR(contig_seg_start, page_sz);
@@ -95,6 +95,9 @@ malloc_add_seg(const struct rte_memseg_list *msl,
struct malloc_heap *heap;
int msl_idx;
+ if (msl->external)
+ return 0;
+
heap = &mcfg->malloc_heaps[msl->socket_id];
/* msl is const, so find it */
@@ -754,8 +757,10 @@ malloc_heap_free(struct malloc_elem *elem)
/* anything after this is a bonus */
ret = 0;
- /* ...of which we can't avail if we are in legacy mode */
- if (internal_config.legacy_mem)
+ /* ...of which we can't avail if we are in legacy mode, or if this is an
+ * externally allocated segment.
+ */
+ if (internal_config.legacy_mem || msl->external)
goto free_unlock;
/* check if we can free any memory back to the system */
@@ -223,7 +223,7 @@ rte_malloc_virt2iova(const void *addr)
if (elem == NULL)
return RTE_BAD_IOVA;
- if (rte_eal_iova_mode() == RTE_IOVA_VA)
+ if (!elem->msl->external && rte_eal_iova_mode() == RTE_IOVA_VA)
return (uintptr_t) addr;
ms = rte_mem_virt2memseg(addr, elem->msl);
@@ -725,6 +725,9 @@ check_socket(const struct rte_memseg_list *msl, void *arg)
{
int *socket_id = arg;
+ if (msl->external)
+ return 0;
+
return *socket_id == msl->socket_id;
}
@@ -1059,7 +1062,12 @@ mark_freeable(const struct rte_memseg_list *msl, const struct rte_memseg *ms,
void *arg __rte_unused)
{
/* ms is const, so find this memseg */
- struct rte_memseg *found = rte_mem_virt2memseg(ms->addr, msl);
+ struct rte_memseg *found;
+
+ if (msl->external)
+ return 0;
+
+ found = rte_mem_virt2memseg(ms->addr, msl);
found->flags &= ~RTE_MEMSEG_FLAG_DO_NOT_FREE;
@@ -1408,6 +1408,9 @@ sync_walk(const struct rte_memseg_list *msl, void *arg __rte_unused)
unsigned int i;
int msl_idx;
+ if (msl->external)
+ return 0;
+
msl_idx = msl - mcfg->memsegs;
primary_msl = &mcfg->memsegs[msl_idx];
local_msl = &local_memsegs[msl_idx];
@@ -1456,6 +1459,9 @@ secondary_msl_create_walk(const struct rte_memseg_list *msl,
char name[PATH_MAX];
int msl_idx, ret;
+ if (msl->external)
+ return 0;
+
msl_idx = msl - mcfg->memsegs;
primary_msl = &mcfg->memsegs[msl_idx];
local_msl = &local_memsegs[msl_idx];
@@ -1509,6 +1515,9 @@ fd_list_create_walk(const struct rte_memseg_list *msl,
unsigned int len;
int msl_idx;
+ if (msl->external)
+ return 0;
+
msl_idx = msl - mcfg->memsegs;
len = msl->memseg_arr.len;
@@ -1082,11 +1082,14 @@ rte_vfio_get_group_num(const char *sysfs_base,
}
static int
-type1_map(const struct rte_memseg_list *msl __rte_unused,
- const struct rte_memseg *ms, void *arg)
+type1_map(const struct rte_memseg_list *msl, const struct rte_memseg *ms,
+ void *arg)
{
int *vfio_container_fd = arg;
+ if (msl->external)
+ return 0;
+
return vfio_type1_dma_mem_map(*vfio_container_fd, ms->addr_64, ms->iova,
ms->len, 1);
}
@@ -1196,11 +1199,14 @@ vfio_spapr_dma_do_map(int vfio_container_fd, uint64_t vaddr, uint64_t iova,
}
static int
-vfio_spapr_map_walk(const struct rte_memseg_list *msl __rte_unused,
+vfio_spapr_map_walk(const struct rte_memseg_list *msl,
const struct rte_memseg *ms, void *arg)
{
int *vfio_container_fd = arg;
+ if (msl->external)
+ return 0;
+
return vfio_spapr_dma_mem_map(*vfio_container_fd, ms->addr_64, ms->iova,
ms->len, 1);
}
@@ -1210,12 +1216,15 @@ struct spapr_walk_param {
uint64_t hugepage_sz;
};
static int
-vfio_spapr_window_size_walk(const struct rte_memseg_list *msl __rte_unused,
+vfio_spapr_window_size_walk(const struct rte_memseg_list *msl,
const struct rte_memseg *ms, void *arg)
{
struct spapr_walk_param *param = arg;
uint64_t max = ms->iova + ms->len;
+ if (msl->external)
+ return 0;
+
if (max > param->window_size) {
param->hugepage_sz = ms->hugepage_sz;
param->window_size = max;
@@ -99,25 +99,44 @@ static unsigned optimize_object_size(unsigned obj_size)
return new_obj_size * RTE_MEMPOOL_ALIGN;
}
+struct pagesz_walk_arg {
+ int socket_id;
+ size_t min;
+};
+
static int
find_min_pagesz(const struct rte_memseg_list *msl, void *arg)
{
- size_t *min = arg;
+ struct pagesz_walk_arg *wa = arg;
+ bool valid;
- if (msl->page_sz < *min)
- *min = msl->page_sz;
+ /*
+ * we need to only look at page sizes available for a particular socket
+ * ID. so, we either need an exact match on socket ID (can match both
+ * native and external memory), or, if SOCKET_ID_ANY was specified as a
+ * socket ID argument, we must only look at native memory and ignore any
+ * page sizes associated with external memory.
+ */
+ valid = msl->socket_id == wa->socket_id;
+ valid |= wa->socket_id == SOCKET_ID_ANY && msl->external == 0;
+
+ if (valid && msl->page_sz < wa->min)
+ wa->min = msl->page_sz;
return 0;
}
static size_t
-get_min_page_size(void)
+get_min_page_size(int socket_id)
{
- size_t min_pagesz = SIZE_MAX;
+ struct pagesz_walk_arg wa;
- rte_memseg_list_walk(find_min_pagesz, &min_pagesz);
+ wa.min = SIZE_MAX;
+ wa.socket_id = socket_id;
- return min_pagesz == SIZE_MAX ? (size_t) getpagesize() : min_pagesz;
+ rte_memseg_list_walk(find_min_pagesz, &wa);
+
+ return wa.min == SIZE_MAX ? (size_t) getpagesize() : wa.min;
}
@@ -470,7 +489,7 @@ rte_mempool_populate_default(struct rte_mempool *mp)
pg_sz = 0;
pg_shift = 0;
} else if (try_contig) {
- pg_sz = get_min_page_size();
+ pg_sz = get_min_page_size(mp->socket_id);
pg_shift = rte_bsf32(pg_sz);
} else {
pg_sz = getpagesize();
@@ -711,6 +711,9 @@ check_socket_mem(const struct rte_memseg_list *msl, void *arg)
{
int32_t *socket = arg;
+ if (msl->external)
+ return 0;
+
return *socket == msl->socket_id;
}
@@ -115,6 +115,9 @@ find_available_pagesz(const struct rte_memseg_list *msl, void *arg)
{
struct walk_arg *wa = arg;
+ if (msl->external)
+ return 0;
+
if (msl->page_sz == RTE_PGSIZE_2M)
wa->hugepage_2MB_avail = 1;
if (msl->page_sz == RTE_PGSIZE_1G)