[dpdk-dev] custom align for mempool elements

Message ID 4486793.msUSBAJSWm@polaris (mailing list archive)
State Changes Requested, archived
Delegated to: Thomas Monjalon
Headers

Checks

Context Check Description
ci/checkpatch warning coding style issues
ci/Intel-compilation success Compilation OK

Commit Message

Gregory Etelson April 26, 2017, 4 a.m. UTC
  Signed-off-by: Gregory Etelson <gregory@weka.io>
---
 lib/librte_mempool/rte_mempool.c | 27 ++++++++++++++++++++-------
 lib/librte_mempool/rte_mempool.h |  1 +
 2 files changed, 21 insertions(+), 7 deletions(-)
  

Comments

Olivier Matz May 5, 2017, 11:26 a.m. UTC | #1
Hi Gregory,

On Wed, 26 Apr 2017 07:00:49 +0300, Gregory Etelson <gregory@weka.io> wrote:
> Signed-off-by: Gregory Etelson <gregory@weka.io>
> ---
>  lib/librte_mempool/rte_mempool.c | 27 ++++++++++++++++++++-------
>  lib/librte_mempool/rte_mempool.h |  1 +
>  2 files changed, 21 insertions(+), 7 deletions(-)
> 
> diff --git a/lib/librte_mempool/rte_mempool.c b/lib/librte_mempool/rte_mempool.c
> index f65310f..c780df3 100644
> --- a/lib/librte_mempool/rte_mempool.c
> +++ b/lib/librte_mempool/rte_mempool.c
> @@ -382,7 +382,7 @@ rte_mempool_populate_phys(struct rte_mempool *mp, char *vaddr,
>  	if (mp->flags & MEMPOOL_F_NO_CACHE_ALIGN)
>  		off = RTE_PTR_ALIGN_CEIL(vaddr, 8) - vaddr;
>  	else
> -		off = RTE_PTR_ALIGN_CEIL(vaddr, RTE_CACHE_LINE_SIZE) - vaddr;
> +		off = RTE_PTR_ALIGN_CEIL(vaddr, mp->elt_align) - vaddr;
>  
>  	while (off + total_elt_sz <= len && mp->populated_size < mp->size) {
>  		off += mp->header_size;
> @@ -392,6 +392,7 @@ rte_mempool_populate_phys(struct rte_mempool *mp, char *vaddr,
>  		else
>  			mempool_add_elem(mp, (char *)vaddr + off, paddr + off);
>  		off += mp->elt_size + mp->trailer_size;
> +		off = RTE_ALIGN_CEIL(off, mp->elt_align);
>  		i++;
>  	}
>  
> @@ -508,6 +509,20 @@ rte_mempool_populate_virt(struct rte_mempool *mp, char *addr,
>  	return ret;
>  }
>  
> +static uint32_t
> +mempool_default_elt_aligment(void)
> +{
> +	uint32_t align;
> +	if (rte_xen_dom0_supported()) {
> +		   align = RTE_PGSIZE_2M;;
> +	} else if (rte_eal_has_hugepages()) {
> +		   align = RTE_CACHE_LINE_SIZE;
> +	} else {
> +		   align = getpagesize();
> +	}
> +	return align;
> +}
> +
>  /* Default function to populate the mempool: allocate memory in memzones,
>   * and populate them. Return the number of objects added, or a negative
>   * value on error.
> @@ -518,7 +533,7 @@ rte_mempool_populate_default(struct rte_mempool *mp)
>  	int mz_flags = RTE_MEMZONE_1GB|RTE_MEMZONE_SIZE_HINT_ONLY;
>  	char mz_name[RTE_MEMZONE_NAMESIZE];
>  	const struct rte_memzone *mz;
> -	size_t size, total_elt_sz, align, pg_sz, pg_shift;
> +	size_t size, total_elt_sz, pg_sz, pg_shift;
>  	phys_addr_t paddr;
>  	unsigned mz_id, n;
>  	int ret;
> @@ -530,15 +545,12 @@ rte_mempool_populate_default(struct rte_mempool *mp)
>  	if (rte_xen_dom0_supported()) {
>  		pg_sz = RTE_PGSIZE_2M;
>  		pg_shift = rte_bsf32(pg_sz);
> -		align = pg_sz;
>  	} else if (rte_eal_has_hugepages()) {
>  		pg_shift = 0; /* not needed, zone is physically contiguous */
>  		pg_sz = 0;
> -		align = RTE_CACHE_LINE_SIZE;
>  	} else {
>  		pg_sz = getpagesize();
>  		pg_shift = rte_bsf32(pg_sz);
> -		align = pg_sz;
>  	}
>  
>  	total_elt_sz = mp->header_size + mp->elt_size + mp->trailer_size;
> @@ -553,11 +565,11 @@ rte_mempool_populate_default(struct rte_mempool *mp)
>  		}
>  
>  		mz = rte_memzone_reserve_aligned(mz_name, size,
> -			mp->socket_id, mz_flags, align);
> +			mp->socket_id, mz_flags, mp->elt_align);
>  		/* not enough memory, retry with the biggest zone we have */
>  		if (mz == NULL)
>  			mz = rte_memzone_reserve_aligned(mz_name, 0,
> -				mp->socket_id, mz_flags, align);
> +				mp->socket_id, mz_flags, mp->elt_align);
>  		if (mz == NULL) {
>  			ret = -rte_errno;
>  			goto fail;
> @@ -827,6 +839,7 @@ rte_mempool_create_empty(const char *name, unsigned n, unsigned elt_size,
>  	/* Size of default caches, zero means disabled. */
>  	mp->cache_size = cache_size;
>  	mp->private_data_size = private_data_size;
> +	mp->elt_align = mempool_default_elt_aligment();
>  	STAILQ_INIT(&mp->elt_list);
>  	STAILQ_INIT(&mp->mem_list);
>  
> diff --git a/lib/librte_mempool/rte_mempool.h b/lib/librte_mempool/rte_mempool.h
> index 48bc8ea..6631973 100644
> --- a/lib/librte_mempool/rte_mempool.h
> +++ b/lib/librte_mempool/rte_mempool.h
> @@ -245,6 +245,7 @@ struct rte_mempool {
>  	 * this mempool.
>  	 */
>  	int32_t ops_index;
> +	uint32_t elt_align;
>  
>  	struct rte_mempool_cache *local_cache; /**< Per-lcore local cache */
>  

It looks the patch will break the ABI (the mempool structure), so it
has to follow the abi deprecation process (= a notice in 17.05 and
the patch for 17.08).

Could you give us some details about why you need such feature, how you
use it (since no API is added)?

Thanks,
Olivier
  
Gregory Etelson May 5, 2017, 5:23 p.m. UTC | #2
Hello Oliver,

Our application writes data from incoming MBUFs to storage device.
For performance considerations we use O_DIRECT storage access
and work in 'zero copy' data mode.
To achieve the 'zero copy' we MUST arrange data in all MBUFs to be 512
bytes aligned
With pre-calculated custom pool element alignment and right
 RTE_PKTMBUF_HEADROOM value
we can set MBUF data alignment to any value. In our case, 512 bytes

Current implementation sets custom mempool alignment like this:

    struct rte_mempool *mp = rte_mempool_create_empty("MBUF pool",
mbufs_count,
                 elt_size, cache_size, sizeof(struct
rte_pktmbuf_pool_private), rte_socket_id(), 0);

    rte_pktmbuf_pool_init(mp, &mbp_priv);
  *mp->elt_align = align*;
    rte_mempool_populate_default(mp);

Regards,
Gregory

On Fri, May 5, 2017 at 2:26 PM, Olivier Matz <olivier.matz@6wind.com> wrote:

> Hi Gregory,
>
> On Wed, 26 Apr 2017 07:00:49 +0300, Gregory Etelson <gregory@weka.io>
> wrote:
> > Signed-off-by: Gregory Etelson <gregory@weka.io>
> > ---
> >  lib/librte_mempool/rte_mempool.c | 27 ++++++++++++++++++++-------
> >  lib/librte_mempool/rte_mempool.h |  1 +
> >  2 files changed, 21 insertions(+), 7 deletions(-)
> >
> > diff --git a/lib/librte_mempool/rte_mempool.c b/lib/librte_mempool/rte_
> mempool.c
> > index f65310f..c780df3 100644
> > --- a/lib/librte_mempool/rte_mempool.c
> > +++ b/lib/librte_mempool/rte_mempool.c
> > @@ -382,7 +382,7 @@ rte_mempool_populate_phys(struct rte_mempool *mp,
> char *vaddr,
> >       if (mp->flags & MEMPOOL_F_NO_CACHE_ALIGN)
> >               off = RTE_PTR_ALIGN_CEIL(vaddr, 8) - vaddr;
> >       else
> > -             off = RTE_PTR_ALIGN_CEIL(vaddr, RTE_CACHE_LINE_SIZE) -
> vaddr;
> > +             off = RTE_PTR_ALIGN_CEIL(vaddr, mp->elt_align) - vaddr;
> >
> >       while (off + total_elt_sz <= len && mp->populated_size < mp->size)
> {
> >               off += mp->header_size;
> > @@ -392,6 +392,7 @@ rte_mempool_populate_phys(struct rte_mempool *mp,
> char *vaddr,
> >               else
> >                       mempool_add_elem(mp, (char *)vaddr + off, paddr +
> off);
> >               off += mp->elt_size + mp->trailer_size;
> > +             off = RTE_ALIGN_CEIL(off, mp->elt_align);
> >               i++;
> >       }
> >
> > @@ -508,6 +509,20 @@ rte_mempool_populate_virt(struct rte_mempool *mp,
> char *addr,
> >       return ret;
> >  }
> >
> > +static uint32_t
> > +mempool_default_elt_aligment(void)
> > +{
> > +     uint32_t align;
> > +     if (rte_xen_dom0_supported()) {
> > +                align = RTE_PGSIZE_2M;;
> > +     } else if (rte_eal_has_hugepages()) {
> > +                align = RTE_CACHE_LINE_SIZE;
> > +     } else {
> > +                align = getpagesize();
> > +     }
> > +     return align;
> > +}
> > +
> >  /* Default function to populate the mempool: allocate memory in
> memzones,
> >   * and populate them. Return the number of objects added, or a negative
> >   * value on error.
> > @@ -518,7 +533,7 @@ rte_mempool_populate_default(struct rte_mempool *mp)
> >       int mz_flags = RTE_MEMZONE_1GB|RTE_MEMZONE_SIZE_HINT_ONLY;
> >       char mz_name[RTE_MEMZONE_NAMESIZE];
> >       const struct rte_memzone *mz;
> > -     size_t size, total_elt_sz, align, pg_sz, pg_shift;
> > +     size_t size, total_elt_sz, pg_sz, pg_shift;
> >       phys_addr_t paddr;
> >       unsigned mz_id, n;
> >       int ret;
> > @@ -530,15 +545,12 @@ rte_mempool_populate_default(struct rte_mempool
> *mp)
> >       if (rte_xen_dom0_supported()) {
> >               pg_sz = RTE_PGSIZE_2M;
> >               pg_shift = rte_bsf32(pg_sz);
> > -             align = pg_sz;
> >       } else if (rte_eal_has_hugepages()) {
> >               pg_shift = 0; /* not needed, zone is physically contiguous
> */
> >               pg_sz = 0;
> > -             align = RTE_CACHE_LINE_SIZE;
> >       } else {
> >               pg_sz = getpagesize();
> >               pg_shift = rte_bsf32(pg_sz);
> > -             align = pg_sz;
> >       }
> >
> >       total_elt_sz = mp->header_size + mp->elt_size + mp->trailer_size;
> > @@ -553,11 +565,11 @@ rte_mempool_populate_default(struct rte_mempool
> *mp)
> >               }
> >
> >               mz = rte_memzone_reserve_aligned(mz_name, size,
> > -                     mp->socket_id, mz_flags, align);
> > +                     mp->socket_id, mz_flags, mp->elt_align);
> >               /* not enough memory, retry with the biggest zone we have
> */
> >               if (mz == NULL)
> >                       mz = rte_memzone_reserve_aligned(mz_name, 0,
> > -                             mp->socket_id, mz_flags, align);
> > +                             mp->socket_id, mz_flags, mp->elt_align);
> >               if (mz == NULL) {
> >                       ret = -rte_errno;
> >                       goto fail;
> > @@ -827,6 +839,7 @@ rte_mempool_create_empty(const char *name, unsigned
> n, unsigned elt_size,
> >       /* Size of default caches, zero means disabled. */
> >       mp->cache_size = cache_size;
> >       mp->private_data_size = private_data_size;
> > +     mp->elt_align = mempool_default_elt_aligment();
> >       STAILQ_INIT(&mp->elt_list);
> >       STAILQ_INIT(&mp->mem_list);
> >
> > diff --git a/lib/librte_mempool/rte_mempool.h b/lib/librte_mempool/rte_
> mempool.h
> > index 48bc8ea..6631973 100644
> > --- a/lib/librte_mempool/rte_mempool.h
> > +++ b/lib/librte_mempool/rte_mempool.h
> > @@ -245,6 +245,7 @@ struct rte_mempool {
> >        * this mempool.
> >        */
> >       int32_t ops_index;
> > +     uint32_t elt_align;
> >
> >       struct rte_mempool_cache *local_cache; /**< Per-lcore local cache
> */
> >
>
> It looks the patch will break the ABI (the mempool structure), so it
> has to follow the abi deprecation process (= a notice in 17.05 and
> the patch for 17.08).
>
> Could you give us some details about why you need such feature, how you
> use it (since no API is added)?
>
> Thanks,
> Olivier
>
  
Olivier Matz May 10, 2017, 3:01 p.m. UTC | #3
Hi Gregory,

On Fri, 5 May 2017 20:23:15 +0300, Gregory Etelson <gregory@weka.io> wrote:
> Hello Oliver,
> 
> Our application writes data from incoming MBUFs to storage device.
> For performance considerations we use O_DIRECT storage access
> and work in 'zero copy' data mode.
> To achieve the 'zero copy' we MUST arrange data in all MBUFs to be 512
> bytes aligned
> With pre-calculated custom pool element alignment and right
>  RTE_PKTMBUF_HEADROOM value
> we can set MBUF data alignment to any value. In our case, 512 bytes
> 
> Current implementation sets custom mempool alignment like this:
> 
>     struct rte_mempool *mp = rte_mempool_create_empty("MBUF pool",
> mbufs_count,
>                  elt_size, cache_size, sizeof(struct
> rte_pktmbuf_pool_private), rte_socket_id(), 0);
> 
>     rte_pktmbuf_pool_init(mp, &mbp_priv);
>   *mp->elt_align = align*;
>     rte_mempool_populate_default(mp);

I think we should try to avoid modifying mp->elt_align directly.
A new api could be added for that, maybe something like:

  int rte_mempool_set_obj_align(struct rte_mempool *mp, size_t align)
  size_t rte_mempool_get_obj_align(struct rte_mempool *mp)

The set() function would have to be called before mempool_populate().

We need to take care about conflict with the MEMPOOL_F_NO_CACHE_ALIGN
flag. I think we should keep the compat for this flag user. This flag
could be deprecated and removed later. I think there may also be some
conflicts with MEMPOOL_F_NO_SPREAD.

As I said in my previous mail, if the patch breaks the ABI (the mempool
structure), it has to follow the abi deprecation process (= a notice in
17.05 and the patch for 17.08).

I'm afraid it would be quite late for the notice though.

Regards,
Olivier


> 
> Regards,
> Gregory
> 
> On Fri, May 5, 2017 at 2:26 PM, Olivier Matz <olivier.matz@6wind.com> wrote:
> 
> > Hi Gregory,
> >
> > On Wed, 26 Apr 2017 07:00:49 +0300, Gregory Etelson <gregory@weka.io>
> > wrote:  
> > > Signed-off-by: Gregory Etelson <gregory@weka.io>
> > > ---
> > >  lib/librte_mempool/rte_mempool.c | 27 ++++++++++++++++++++-------
> > >  lib/librte_mempool/rte_mempool.h |  1 +
> > >  2 files changed, 21 insertions(+), 7 deletions(-)
> > >
> > > diff --git a/lib/librte_mempool/rte_mempool.c b/lib/librte_mempool/rte_  
> > mempool.c  
> > > index f65310f..c780df3 100644
> > > --- a/lib/librte_mempool/rte_mempool.c
> > > +++ b/lib/librte_mempool/rte_mempool.c
> > > @@ -382,7 +382,7 @@ rte_mempool_populate_phys(struct rte_mempool *mp,  
> > char *vaddr,  
> > >       if (mp->flags & MEMPOOL_F_NO_CACHE_ALIGN)
> > >               off = RTE_PTR_ALIGN_CEIL(vaddr, 8) - vaddr;
> > >       else
> > > -             off = RTE_PTR_ALIGN_CEIL(vaddr, RTE_CACHE_LINE_SIZE) -  
> > vaddr;  
> > > +             off = RTE_PTR_ALIGN_CEIL(vaddr, mp->elt_align) - vaddr;
> > >
> > >       while (off + total_elt_sz <= len && mp->populated_size < mp->size)  
> > {  
> > >               off += mp->header_size;
> > > @@ -392,6 +392,7 @@ rte_mempool_populate_phys(struct rte_mempool *mp,  
> > char *vaddr,  
> > >               else
> > >                       mempool_add_elem(mp, (char *)vaddr + off, paddr +  
> > off);  
> > >               off += mp->elt_size + mp->trailer_size;
> > > +             off = RTE_ALIGN_CEIL(off, mp->elt_align);
> > >               i++;
> > >       }
> > >
> > > @@ -508,6 +509,20 @@ rte_mempool_populate_virt(struct rte_mempool *mp,  
> > char *addr,  
> > >       return ret;
> > >  }
> > >
> > > +static uint32_t
> > > +mempool_default_elt_aligment(void)
> > > +{
> > > +     uint32_t align;
> > > +     if (rte_xen_dom0_supported()) {
> > > +                align = RTE_PGSIZE_2M;;
> > > +     } else if (rte_eal_has_hugepages()) {
> > > +                align = RTE_CACHE_LINE_SIZE;
> > > +     } else {
> > > +                align = getpagesize();
> > > +     }
> > > +     return align;
> > > +}
> > > +
> > >  /* Default function to populate the mempool: allocate memory in  
> > memzones,  
> > >   * and populate them. Return the number of objects added, or a negative
> > >   * value on error.
> > > @@ -518,7 +533,7 @@ rte_mempool_populate_default(struct rte_mempool *mp)
> > >       int mz_flags = RTE_MEMZONE_1GB|RTE_MEMZONE_SIZE_HINT_ONLY;
> > >       char mz_name[RTE_MEMZONE_NAMESIZE];
> > >       const struct rte_memzone *mz;
> > > -     size_t size, total_elt_sz, align, pg_sz, pg_shift;
> > > +     size_t size, total_elt_sz, pg_sz, pg_shift;
> > >       phys_addr_t paddr;
> > >       unsigned mz_id, n;
> > >       int ret;
> > > @@ -530,15 +545,12 @@ rte_mempool_populate_default(struct rte_mempool  
> > *mp)  
> > >       if (rte_xen_dom0_supported()) {
> > >               pg_sz = RTE_PGSIZE_2M;
> > >               pg_shift = rte_bsf32(pg_sz);
> > > -             align = pg_sz;
> > >       } else if (rte_eal_has_hugepages()) {
> > >               pg_shift = 0; /* not needed, zone is physically contiguous  
> > */  
> > >               pg_sz = 0;
> > > -             align = RTE_CACHE_LINE_SIZE;
> > >       } else {
> > >               pg_sz = getpagesize();
> > >               pg_shift = rte_bsf32(pg_sz);
> > > -             align = pg_sz;
> > >       }
> > >
> > >       total_elt_sz = mp->header_size + mp->elt_size + mp->trailer_size;
> > > @@ -553,11 +565,11 @@ rte_mempool_populate_default(struct rte_mempool  
> > *mp)  
> > >               }
> > >
> > >               mz = rte_memzone_reserve_aligned(mz_name, size,
> > > -                     mp->socket_id, mz_flags, align);
> > > +                     mp->socket_id, mz_flags, mp->elt_align);
> > >               /* not enough memory, retry with the biggest zone we have  
> > */  
> > >               if (mz == NULL)
> > >                       mz = rte_memzone_reserve_aligned(mz_name, 0,
> > > -                             mp->socket_id, mz_flags, align);
> > > +                             mp->socket_id, mz_flags, mp->elt_align);
> > >               if (mz == NULL) {
> > >                       ret = -rte_errno;
> > >                       goto fail;
> > > @@ -827,6 +839,7 @@ rte_mempool_create_empty(const char *name, unsigned  
> > n, unsigned elt_size,  
> > >       /* Size of default caches, zero means disabled. */
> > >       mp->cache_size = cache_size;
> > >       mp->private_data_size = private_data_size;
> > > +     mp->elt_align = mempool_default_elt_aligment();
> > >       STAILQ_INIT(&mp->elt_list);
> > >       STAILQ_INIT(&mp->mem_list);
> > >
> > > diff --git a/lib/librte_mempool/rte_mempool.h b/lib/librte_mempool/rte_  
> > mempool.h  
> > > index 48bc8ea..6631973 100644
> > > --- a/lib/librte_mempool/rte_mempool.h
> > > +++ b/lib/librte_mempool/rte_mempool.h
> > > @@ -245,6 +245,7 @@ struct rte_mempool {
> > >        * this mempool.
> > >        */
> > >       int32_t ops_index;
> > > +     uint32_t elt_align;
> > >
> > >       struct rte_mempool_cache *local_cache; /**< Per-lcore local cache  
> > */  
> > >  
> >
> > It looks the patch will break the ABI (the mempool structure), so it
> > has to follow the abi deprecation process (= a notice in 17.05 and
> > the patch for 17.08).
> >
> > Could you give us some details about why you need such feature, how you
> > use it (since no API is added)?
> >
> > Thanks,
> > Olivier
> >
  

Patch

diff --git a/lib/librte_mempool/rte_mempool.c b/lib/librte_mempool/rte_mempool.c
index f65310f..c780df3 100644
--- a/lib/librte_mempool/rte_mempool.c
+++ b/lib/librte_mempool/rte_mempool.c
@@ -382,7 +382,7 @@  rte_mempool_populate_phys(struct rte_mempool *mp, char *vaddr,
 	if (mp->flags & MEMPOOL_F_NO_CACHE_ALIGN)
 		off = RTE_PTR_ALIGN_CEIL(vaddr, 8) - vaddr;
 	else
-		off = RTE_PTR_ALIGN_CEIL(vaddr, RTE_CACHE_LINE_SIZE) - vaddr;
+		off = RTE_PTR_ALIGN_CEIL(vaddr, mp->elt_align) - vaddr;
 
 	while (off + total_elt_sz <= len && mp->populated_size < mp->size) {
 		off += mp->header_size;
@@ -392,6 +392,7 @@  rte_mempool_populate_phys(struct rte_mempool *mp, char *vaddr,
 		else
 			mempool_add_elem(mp, (char *)vaddr + off, paddr + off);
 		off += mp->elt_size + mp->trailer_size;
+		off = RTE_ALIGN_CEIL(off, mp->elt_align);
 		i++;
 	}
 
@@ -508,6 +509,20 @@  rte_mempool_populate_virt(struct rte_mempool *mp, char *addr,
 	return ret;
 }
 
+static uint32_t
+mempool_default_elt_aligment(void)
+{
+	uint32_t align;
+	if (rte_xen_dom0_supported()) {
+		   align = RTE_PGSIZE_2M;;
+	} else if (rte_eal_has_hugepages()) {
+		   align = RTE_CACHE_LINE_SIZE;
+	} else {
+		   align = getpagesize();
+	}
+	return align;
+}
+
 /* Default function to populate the mempool: allocate memory in memzones,
  * and populate them. Return the number of objects added, or a negative
  * value on error.
@@ -518,7 +533,7 @@  rte_mempool_populate_default(struct rte_mempool *mp)
 	int mz_flags = RTE_MEMZONE_1GB|RTE_MEMZONE_SIZE_HINT_ONLY;
 	char mz_name[RTE_MEMZONE_NAMESIZE];
 	const struct rte_memzone *mz;
-	size_t size, total_elt_sz, align, pg_sz, pg_shift;
+	size_t size, total_elt_sz, pg_sz, pg_shift;
 	phys_addr_t paddr;
 	unsigned mz_id, n;
 	int ret;
@@ -530,15 +545,12 @@  rte_mempool_populate_default(struct rte_mempool *mp)
 	if (rte_xen_dom0_supported()) {
 		pg_sz = RTE_PGSIZE_2M;
 		pg_shift = rte_bsf32(pg_sz);
-		align = pg_sz;
 	} else if (rte_eal_has_hugepages()) {
 		pg_shift = 0; /* not needed, zone is physically contiguous */
 		pg_sz = 0;
-		align = RTE_CACHE_LINE_SIZE;
 	} else {
 		pg_sz = getpagesize();
 		pg_shift = rte_bsf32(pg_sz);
-		align = pg_sz;
 	}
 
 	total_elt_sz = mp->header_size + mp->elt_size + mp->trailer_size;
@@ -553,11 +565,11 @@  rte_mempool_populate_default(struct rte_mempool *mp)
 		}
 
 		mz = rte_memzone_reserve_aligned(mz_name, size,
-			mp->socket_id, mz_flags, align);
+			mp->socket_id, mz_flags, mp->elt_align);
 		/* not enough memory, retry with the biggest zone we have */
 		if (mz == NULL)
 			mz = rte_memzone_reserve_aligned(mz_name, 0,
-				mp->socket_id, mz_flags, align);
+				mp->socket_id, mz_flags, mp->elt_align);
 		if (mz == NULL) {
 			ret = -rte_errno;
 			goto fail;
@@ -827,6 +839,7 @@  rte_mempool_create_empty(const char *name, unsigned n, unsigned elt_size,
 	/* Size of default caches, zero means disabled. */
 	mp->cache_size = cache_size;
 	mp->private_data_size = private_data_size;
+	mp->elt_align = mempool_default_elt_aligment();
 	STAILQ_INIT(&mp->elt_list);
 	STAILQ_INIT(&mp->mem_list);
 
diff --git a/lib/librte_mempool/rte_mempool.h b/lib/librte_mempool/rte_mempool.h
index 48bc8ea..6631973 100644
--- a/lib/librte_mempool/rte_mempool.h
+++ b/lib/librte_mempool/rte_mempool.h
@@ -245,6 +245,7 @@  struct rte_mempool {
 	 * this mempool.
 	 */
 	int32_t ops_index;
+	uint32_t elt_align;
 
 	struct rte_mempool_cache *local_cache; /**< Per-lcore local cache */