[dpdk-dev] [PATCH v4 04/11] mempool: add op to calculate memory size to be allocated

Andrew Rybchenko arybchenko at solarflare.com
Mon Apr 16 15:24:33 CEST 2018


Size of memory chunk required to populate mempool objects depends
on how objects are stored in the memory. Different mempool drivers
may have different requirements and a new operation allows to
calculate memory size in accordance with driver requirements and
advertise requirements on minimum memory chunk size and alignment
in a generic way.

Bump ABI version since the patch breaks it.

Suggested-by: Olivier Matz <olivier.matz at 6wind.com>
Signed-off-by: Andrew Rybchenko <arybchenko at solarflare.com>
---
v3 -> v4:
 - rebased on top of memory rework
 - dropped previous Ack's since rebase is not trivial
 - check size calculation failure in rte_mempool_populate_anon() and
   rte_mempool_memchunk_anon_free()

v2 -> v3:
 - none

v1 -> v2:
 - clarify min_chunk_size meaning
 - rebase on top of patch series which fixes library version in meson
   build

RFCv2 -> v1:
 - move default calc_mem_size callback to rte_mempool_ops_default.c
 - add ABI changes to release notes
 - name default callback consistently: rte_mempool_op_<callback>_default()
 - bump ABI version since it is the first patch which breaks ABI
 - describe default callback behaviour in details
 - avoid introduction of internal function to cope with deprecation
   (keep it to deprecation patch)
 - move cache-line or page boundary chunk alignment to default callback
 - highlight that min_chunk_size and align parameters are output only

 doc/guides/rel_notes/deprecation.rst         |   3 +-
 doc/guides/rel_notes/release_18_05.rst       |   8 +-
 lib/librte_mempool/Makefile                  |   3 +-
 lib/librte_mempool/meson.build               |   5 +-
 lib/librte_mempool/rte_mempool.c             | 114 +++++++++++++++------------
 lib/librte_mempool/rte_mempool.h             |  86 +++++++++++++++++++-
 lib/librte_mempool/rte_mempool_ops.c         |  18 +++++
 lib/librte_mempool/rte_mempool_ops_default.c |  38 +++++++++
 lib/librte_mempool/rte_mempool_version.map   |   7 ++
 9 files changed, 225 insertions(+), 57 deletions(-)
 create mode 100644 lib/librte_mempool/rte_mempool_ops_default.c

diff --git a/doc/guides/rel_notes/deprecation.rst b/doc/guides/rel_notes/deprecation.rst
index c929dcc..2aa5ef3 100644
--- a/doc/guides/rel_notes/deprecation.rst
+++ b/doc/guides/rel_notes/deprecation.rst
@@ -60,8 +60,7 @@ Deprecation Notices
 
   - removal of ``get_capabilities`` mempool ops and related flags.
   - substitute ``register_memory_area`` with ``populate`` ops.
-  - addition of new ops to customize required memory chunk calculation,
-    customize objects population and allocate contiguous
+  - addition of new ops to customize objects population and allocate contiguous
     block of objects if underlying driver supports it.
 
 * mbuf: The opaque ``mbuf->hash.sched`` field will be updated to support generic
diff --git a/doc/guides/rel_notes/release_18_05.rst b/doc/guides/rel_notes/release_18_05.rst
index 84295e4..7dbe7ac 100644
--- a/doc/guides/rel_notes/release_18_05.rst
+++ b/doc/guides/rel_notes/release_18_05.rst
@@ -195,6 +195,12 @@ ABI Changes
   type ``uint16_t``: ``burst_size``, ``ring_size``, and ``nb_queues``. These
   are parameter values recommended for use by the PMD.
 
+* **Changed rte_mempool_ops structure.**
+
+  A new callback ``calc_mem_size`` has been added to ``rte_mempool_ops``
+  to allow to customize required memory size calculation.
+
+
 Removed Items
 -------------
 
@@ -267,7 +273,7 @@ The libraries prepended with a plus sign were incremented in this version.
      librte_latencystats.so.1
      librte_lpm.so.2
    + librte_mbuf.so.4
-     librte_mempool.so.3
+   + librte_mempool.so.4
    + librte_meter.so.2
      librte_metrics.so.1
      librte_net.so.1
diff --git a/lib/librte_mempool/Makefile b/lib/librte_mempool/Makefile
index 1f85d34..421e2a7 100644
--- a/lib/librte_mempool/Makefile
+++ b/lib/librte_mempool/Makefile
@@ -11,7 +11,7 @@ LDLIBS += -lrte_eal -lrte_ring
 
 EXPORT_MAP := rte_mempool_version.map
 
-LIBABIVER := 3
+LIBABIVER := 4
 
 # memseg walk is not yet part of stable API
 CFLAGS += -DALLOW_EXPERIMENTAL_API
@@ -19,6 +19,7 @@ CFLAGS += -DALLOW_EXPERIMENTAL_API
 # all source are stored in SRCS-y
 SRCS-$(CONFIG_RTE_LIBRTE_MEMPOOL) +=  rte_mempool.c
 SRCS-$(CONFIG_RTE_LIBRTE_MEMPOOL) +=  rte_mempool_ops.c
+SRCS-$(CONFIG_RTE_LIBRTE_MEMPOOL) +=  rte_mempool_ops_default.c
 # install includes
 SYMLINK-$(CONFIG_RTE_LIBRTE_MEMPOOL)-include := rte_mempool.h
 
diff --git a/lib/librte_mempool/meson.build b/lib/librte_mempool/meson.build
index 89506c5..6181ad8 100644
--- a/lib/librte_mempool/meson.build
+++ b/lib/librte_mempool/meson.build
@@ -1,8 +1,9 @@
 # SPDX-License-Identifier: BSD-3-Clause
 # Copyright(c) 2017 Intel Corporation
 
-version = 3
-sources = files('rte_mempool.c', 'rte_mempool_ops.c')
+version = 4
+sources = files('rte_mempool.c', 'rte_mempool_ops.c',
+		'rte_mempool_ops_default.c')
 headers = files('rte_mempool.h')
 deps += ['ring']
 
diff --git a/lib/librte_mempool/rte_mempool.c b/lib/librte_mempool/rte_mempool.c
index b15b79b..fdcee05 100644
--- a/lib/librte_mempool/rte_mempool.c
+++ b/lib/librte_mempool/rte_mempool.c
@@ -574,12 +574,12 @@ rte_mempool_populate_default(struct rte_mempool *mp)
 	unsigned int mz_flags = RTE_MEMZONE_1GB|RTE_MEMZONE_SIZE_HINT_ONLY;
 	char mz_name[RTE_MEMZONE_NAMESIZE];
 	const struct rte_memzone *mz;
-	size_t size, total_elt_sz, align, pg_sz, pg_shift;
+	ssize_t mem_size;
+	size_t align, pg_sz, pg_shift;
 	rte_iova_t iova;
 	unsigned mz_id, n;
-	unsigned int mp_flags;
 	int ret;
-	bool force_contig, no_contig, try_contig, no_pageshift;
+	bool no_contig, try_contig, no_pageshift;
 
 	ret = mempool_ops_alloc_once(mp);
 	if (ret != 0)
@@ -589,22 +589,12 @@ rte_mempool_populate_default(struct rte_mempool *mp)
 	if (mp->nb_mem_chunks != 0)
 		return -EEXIST;
 
-	/* Get mempool capabilities */
-	mp_flags = 0;
-	ret = rte_mempool_ops_get_capabilities(mp, &mp_flags);
-	if ((ret < 0) && (ret != -ENOTSUP))
-		return ret;
-
-	/* update mempool capabilities */
-	mp->flags |= mp_flags;
-
 	no_contig = mp->flags & MEMPOOL_F_NO_IOVA_CONTIG;
-	force_contig = mp->flags & MEMPOOL_F_CAPA_PHYS_CONTIG;
 
 	/*
 	 * the following section calculates page shift and page size values.
 	 *
-	 * these values impact the result of rte_mempool_xmem_size(), which
+	 * these values impact the result of calc_mem_size operation, which
 	 * returns the amount of memory that should be allocated to store the
 	 * desired number of objects. when not zero, it allocates more memory
 	 * for the padding between objects, to ensure that an object does not
@@ -625,7 +615,7 @@ rte_mempool_populate_default(struct rte_mempool *mp)
 	 *
 	 * if our IO addresses are virtual, not actual physical (IOVA as VA
 	 * case), then no page shift needed - our memory allocation will give us
-	 * contiguous physical memory as far as the hardware is concerned, so
+	 * contiguous IO memory as far as the hardware is concerned, so
 	 * act as if we're getting contiguous memory.
 	 *
 	 * if our IO addresses are physical, we may get memory from bigger
@@ -643,39 +633,35 @@ rte_mempool_populate_default(struct rte_mempool *mp)
 	 * 1G page on a 10MB memzone). If we fail to get enough contiguous
 	 * memory, then we'll go and reserve space page-by-page.
 	 */
-	no_pageshift = no_contig || force_contig ||
-			rte_eal_iova_mode() == RTE_IOVA_VA;
+	no_pageshift = no_contig || rte_eal_iova_mode() == RTE_IOVA_VA;
 	try_contig = !no_contig && !no_pageshift && rte_eal_has_hugepages();
-	if (force_contig)
-		mz_flags |= RTE_MEMZONE_IOVA_CONTIG;
 
 	if (no_pageshift) {
 		pg_sz = 0;
 		pg_shift = 0;
-		align = RTE_CACHE_LINE_SIZE;
 	} else if (try_contig) {
 		pg_sz = get_min_page_size();
 		pg_shift = rte_bsf32(pg_sz);
-		/* we're trying to reserve contiguous memzone first, so try
-		 * align to cache line; if we fail to reserve a contiguous
-		 * memzone, we'll adjust alignment to equal pagesize later.
-		 */
-		align = RTE_CACHE_LINE_SIZE;
 	} else {
 		pg_sz = getpagesize();
 		pg_shift = rte_bsf32(pg_sz);
-		align = pg_sz;
 	}
 
-	total_elt_sz = mp->header_size + mp->elt_size + mp->trailer_size;
 	for (mz_id = 0, n = mp->size; n > 0; mz_id++, n -= ret) {
+		size_t min_chunk_size;
 		unsigned int flags;
+
 		if (try_contig || no_pageshift)
-			size = rte_mempool_xmem_size(n, total_elt_sz, 0,
-				mp->flags);
+			mem_size = rte_mempool_ops_calc_mem_size(mp, n,
+					0, &min_chunk_size, &align);
 		else
-			size = rte_mempool_xmem_size(n, total_elt_sz, pg_shift,
-				mp->flags);
+			mem_size = rte_mempool_ops_calc_mem_size(mp, n,
+					pg_shift, &min_chunk_size, &align);
+
+		if (mem_size < 0) {
+			ret = mem_size;
+			goto fail;
+		}
 
 		ret = snprintf(mz_name, sizeof(mz_name),
 			RTE_MEMPOOL_MZ_FORMAT "_%d", mp->name, mz_id);
@@ -692,27 +678,31 @@ rte_mempool_populate_default(struct rte_mempool *mp)
 		if (try_contig)
 			flags |= RTE_MEMZONE_IOVA_CONTIG;
 
-		mz = rte_memzone_reserve_aligned(mz_name, size, mp->socket_id,
-				flags, align);
+		mz = rte_memzone_reserve_aligned(mz_name, mem_size,
+				mp->socket_id, flags, align);
 
-		/* if we were trying to allocate contiguous memory, adjust
-		 * memzone size and page size to fit smaller page sizes, and
-		 * try again.
+		/* if we were trying to allocate contiguous memory, failed and
+		 * minimum required contiguous chunk fits minimum page, adjust
+		 * memzone size to the page size, and try again.
 		 */
-		if (mz == NULL && try_contig) {
+		if (mz == NULL && try_contig && min_chunk_size <= pg_sz) {
 			try_contig = false;
 			flags &= ~RTE_MEMZONE_IOVA_CONTIG;
-			align = pg_sz;
-			size = rte_mempool_xmem_size(n, total_elt_sz,
-				pg_shift, mp->flags);
 
-			mz = rte_memzone_reserve_aligned(mz_name, size,
+			mem_size = rte_mempool_ops_calc_mem_size(mp, n,
+					pg_shift, &min_chunk_size, &align);
+			if (mem_size < 0) {
+				ret = mem_size;
+				goto fail;
+			}
+
+			mz = rte_memzone_reserve_aligned(mz_name, mem_size,
 				mp->socket_id, flags, align);
 		}
 		/* don't try reserving with 0 size if we were asked to reserve
 		 * IOVA-contiguous memory.
 		 */
-		if (!force_contig && mz == NULL) {
+		if (min_chunk_size < (size_t)mem_size && mz == NULL) {
 			/* not enough memory, retry with the biggest zone we
 			 * have
 			 */
@@ -724,6 +714,12 @@ rte_mempool_populate_default(struct rte_mempool *mp)
 			goto fail;
 		}
 
+		if (mz->len < min_chunk_size) {
+			rte_memzone_free(mz);
+			ret = -ENOMEM;
+			goto fail;
+		}
+
 		if (no_contig)
 			iova = RTE_BAD_IOVA;
 		else
@@ -753,16 +749,18 @@ rte_mempool_populate_default(struct rte_mempool *mp)
 }
 
 /* return the memory size required for mempool objects in anonymous mem */
-static size_t
+static ssize_t
 get_anon_size(const struct rte_mempool *mp)
 {
-	size_t size, total_elt_sz, pg_sz, pg_shift;
+	ssize_t size;
+	size_t pg_sz, pg_shift;
+	size_t min_chunk_size;
+	size_t align;
 
 	pg_sz = getpagesize();
 	pg_shift = rte_bsf32(pg_sz);
-	total_elt_sz = mp->header_size + mp->elt_size + mp->trailer_size;
-	size = rte_mempool_xmem_size(mp->size, total_elt_sz, pg_shift,
-					mp->flags);
+	size = rte_mempool_ops_calc_mem_size(mp, mp->size, pg_shift,
+					     &min_chunk_size, &align);
 
 	return size;
 }
@@ -772,14 +770,25 @@ static void
 rte_mempool_memchunk_anon_free(struct rte_mempool_memhdr *memhdr,
 	void *opaque)
 {
-	munmap(opaque, get_anon_size(memhdr->mp));
+	ssize_t size;
+
+	/*
+	 * Calculate size since memhdr->len has contiguous chunk length
+	 * which may be smaller if anon map is split into many contiguous
+	 * chunks. Result must be the same as we calculated on populate.
+	 */
+	size = get_anon_size(memhdr->mp);
+	if (size < 0)
+		return;
+
+	munmap(opaque, size);
 }
 
 /* populate the mempool with an anonymous mapping */
 int
 rte_mempool_populate_anon(struct rte_mempool *mp)
 {
-	size_t size;
+	ssize_t size;
 	int ret;
 	char *addr;
 
@@ -793,8 +802,13 @@ rte_mempool_populate_anon(struct rte_mempool *mp)
 	if (ret != 0)
 		return ret;
 
-	/* get chunk of virtually continuous memory */
 	size = get_anon_size(mp);
+	if (size < 0) {
+		rte_errno = -size;
+		return 0;
+	}
+
+	/* get chunk of virtually continuous memory */
 	addr = mmap(NULL, size, PROT_READ | PROT_WRITE,
 		MAP_SHARED | MAP_ANONYMOUS, -1, 0);
 	if (addr == MAP_FAILED) {
diff --git a/lib/librte_mempool/rte_mempool.h b/lib/librte_mempool/rte_mempool.h
index e531a15..191255d 100644
--- a/lib/librte_mempool/rte_mempool.h
+++ b/lib/librte_mempool/rte_mempool.h
@@ -400,6 +400,62 @@ typedef int (*rte_mempool_get_capabilities_t)(const struct rte_mempool *mp,
 typedef int (*rte_mempool_ops_register_memory_area_t)
 (const struct rte_mempool *mp, char *vaddr, rte_iova_t iova, size_t len);
 
+/**
+ * Calculate memory size required to store given number of objects.
+ *
+ * If mempool objects are not required to be IOVA-contiguous
+ * (the flag MEMPOOL_F_NO_IOVA_CONTIG is set), min_chunk_size defines
+ * virtually contiguous chunk size. Otherwise, if mempool objects must
+ * be IOVA-contiguous (the flag MEMPOOL_F_NO_IOVA_CONTIG is clear),
+ * min_chunk_size defines IOVA-contiguous chunk size.
+ *
+ * @param[in] mp
+ *   Pointer to the memory pool.
+ * @param[in] obj_num
+ *   Number of objects.
+ * @param[in] pg_shift
+ *   LOG2 of the physical pages size. If set to 0, ignore page boundaries.
+ * @param[out] min_chunk_size
+ *   Location for minimum size of the memory chunk which may be used to
+ *   store memory pool objects.
+ * @param[out] align
+ *   Location for required memory chunk alignment.
+ * @return
+ *   Required memory size aligned at page boundary.
+ */
+typedef ssize_t (*rte_mempool_calc_mem_size_t)(const struct rte_mempool *mp,
+		uint32_t obj_num,  uint32_t pg_shift,
+		size_t *min_chunk_size, size_t *align);
+
+/**
+ * Default way to calculate memory size required to store given number of
+ * objects.
+ *
+ * If page boundaries may be ignored, it is just a product of total
+ * object size including header and trailer and number of objects.
+ * Otherwise, it is a number of pages required to store given number of
+ * objects without crossing page boundary.
+ *
+ * Note that if object size is bigger than page size, then it assumes
+ * that pages are grouped in subsets of physically continuous pages big
+ * enough to store at least one object.
+ *
+ * If mempool driver requires object addresses to be block size aligned
+ * (MEMPOOL_F_CAPA_BLK_ALIGNED_OBJECTS), space for one extra element is
+ * reserved to be able to meet the requirement.
+ *
+ * Minimum size of memory chunk is either all required space, if
+ * capabilities say that whole memory area must be physically contiguous
+ * (MEMPOOL_F_CAPA_PHYS_CONTIG), or a maximum of the page size and total
+ * element size.
+ *
+ * Required memory chunk alignment is a maximum of page size and cache
+ * line size.
+ */
+ssize_t rte_mempool_op_calc_mem_size_default(const struct rte_mempool *mp,
+		uint32_t obj_num, uint32_t pg_shift,
+		size_t *min_chunk_size, size_t *align);
+
 /** Structure defining mempool operations structure */
 struct rte_mempool_ops {
 	char name[RTE_MEMPOOL_OPS_NAMESIZE]; /**< Name of mempool ops struct. */
@@ -416,6 +472,11 @@ struct rte_mempool_ops {
 	 * Notify new memory area to mempool
 	 */
 	rte_mempool_ops_register_memory_area_t register_memory_area;
+	/**
+	 * Optional callback to calculate memory size required to
+	 * store specified number of objects.
+	 */
+	rte_mempool_calc_mem_size_t calc_mem_size;
 } __rte_cache_aligned;
 
 #define RTE_MEMPOOL_MAX_OPS_IDX 16  /**< Max registered ops structs */
@@ -565,6 +626,29 @@ rte_mempool_ops_register_memory_area(const struct rte_mempool *mp,
 				char *vaddr, rte_iova_t iova, size_t len);
 
 /**
+ * @internal wrapper for mempool_ops calc_mem_size callback.
+ * API to calculate size of memory required to store specified number of
+ * object.
+ *
+ * @param[in] mp
+ *   Pointer to the memory pool.
+ * @param[in] obj_num
+ *   Number of objects.
+ * @param[in] pg_shift
+ *   LOG2 of the physical pages size. If set to 0, ignore page boundaries.
+ * @param[out] min_chunk_size
+ *   Location for minimum size of the memory chunk which may be used to
+ *   store memory pool objects.
+ * @param[out] align
+ *   Location for required memory chunk alignment.
+ * @return
+ *   Required memory size aligned at page boundary.
+ */
+ssize_t rte_mempool_ops_calc_mem_size(const struct rte_mempool *mp,
+				      uint32_t obj_num, uint32_t pg_shift,
+				      size_t *min_chunk_size, size_t *align);
+
+/**
  * @internal wrapper for mempool_ops free callback.
  *
  * @param mp
@@ -1534,7 +1618,7 @@ uint32_t rte_mempool_calc_obj_size(uint32_t elt_size, uint32_t flags,
  * of objects. Assume that the memory buffer will be aligned at page
  * boundary.
  *
- * Note that if object size is bigger then page size, then it assumes
+ * Note that if object size is bigger than page size, then it assumes
  * that pages are grouped in subsets of physically continuous pages big
  * enough to store at least one object.
  *
diff --git a/lib/librte_mempool/rte_mempool_ops.c b/lib/librte_mempool/rte_mempool_ops.c
index 0732255..26908cc 100644
--- a/lib/librte_mempool/rte_mempool_ops.c
+++ b/lib/librte_mempool/rte_mempool_ops.c
@@ -59,6 +59,7 @@ rte_mempool_register_ops(const struct rte_mempool_ops *h)
 	ops->get_count = h->get_count;
 	ops->get_capabilities = h->get_capabilities;
 	ops->register_memory_area = h->register_memory_area;
+	ops->calc_mem_size = h->calc_mem_size;
 
 	rte_spinlock_unlock(&rte_mempool_ops_table.sl);
 
@@ -123,6 +124,23 @@ rte_mempool_ops_register_memory_area(const struct rte_mempool *mp, char *vaddr,
 	return ops->register_memory_area(mp, vaddr, iova, len);
 }
 
+/* wrapper to notify new memory area to external mempool */
+ssize_t
+rte_mempool_ops_calc_mem_size(const struct rte_mempool *mp,
+				uint32_t obj_num, uint32_t pg_shift,
+				size_t *min_chunk_size, size_t *align)
+{
+	struct rte_mempool_ops *ops;
+
+	ops = rte_mempool_get_ops(mp->ops_index);
+
+	if (ops->calc_mem_size == NULL)
+		return rte_mempool_op_calc_mem_size_default(mp, obj_num,
+				pg_shift, min_chunk_size, align);
+
+	return ops->calc_mem_size(mp, obj_num, pg_shift, min_chunk_size, align);
+}
+
 /* sets mempool ops previously registered by rte_mempool_register_ops. */
 int
 rte_mempool_set_ops_byname(struct rte_mempool *mp, const char *name,
diff --git a/lib/librte_mempool/rte_mempool_ops_default.c b/lib/librte_mempool/rte_mempool_ops_default.c
new file mode 100644
index 0000000..57fe79b
--- /dev/null
+++ b/lib/librte_mempool/rte_mempool_ops_default.c
@@ -0,0 +1,38 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2016 Intel Corporation.
+ * Copyright(c) 2016 6WIND S.A.
+ * Copyright(c) 2018 Solarflare Communications Inc.
+ */
+
+#include <rte_mempool.h>
+
+ssize_t
+rte_mempool_op_calc_mem_size_default(const struct rte_mempool *mp,
+				     uint32_t obj_num, uint32_t pg_shift,
+				     size_t *min_chunk_size, size_t *align)
+{
+	unsigned int mp_flags;
+	int ret;
+	size_t total_elt_sz;
+	size_t mem_size;
+
+	/* Get mempool capabilities */
+	mp_flags = 0;
+	ret = rte_mempool_ops_get_capabilities(mp, &mp_flags);
+	if ((ret < 0) && (ret != -ENOTSUP))
+		return ret;
+
+	total_elt_sz = mp->header_size + mp->elt_size + mp->trailer_size;
+
+	mem_size = rte_mempool_xmem_size(obj_num, total_elt_sz, pg_shift,
+					 mp->flags | mp_flags);
+
+	if (mp_flags & MEMPOOL_F_CAPA_PHYS_CONTIG)
+		*min_chunk_size = mem_size;
+	else
+		*min_chunk_size = RTE_MAX((size_t)1 << pg_shift, total_elt_sz);
+
+	*align = RTE_MAX((size_t)RTE_CACHE_LINE_SIZE, (size_t)1 << pg_shift);
+
+	return mem_size;
+}
diff --git a/lib/librte_mempool/rte_mempool_version.map b/lib/librte_mempool/rte_mempool_version.map
index 62b76f9..cb38189 100644
--- a/lib/librte_mempool/rte_mempool_version.map
+++ b/lib/librte_mempool/rte_mempool_version.map
@@ -51,3 +51,10 @@ DPDK_17.11 {
 	rte_mempool_populate_iova_tab;
 
 } DPDK_16.07;
+
+DPDK_18.05 {
+	global:
+
+	rte_mempool_op_calc_mem_size_default;
+
+} DPDK_17.11;
-- 
2.7.4



More information about the dev mailing list