[dpdk-dev,v6,2/3] eal: add u64 bit variant for reciprocal

Message ID 1504693294-2100-2-git-send-email-pbhagavatula@caviumnetworks.com (mailing list archive)
State Superseded, archived
Delegated to: Thomas Monjalon
Headers

Checks

Context Check Description
ci/checkpatch success coding style OK
ci/Intel-compilation success Compilation OK

Commit Message

Pavan Nikhilesh Sept. 6, 2017, 10:21 a.m. UTC
  From: Pavan Bhagavatula <pbhagavatula@caviumnetworks.com>

Currently, rte_reciprocal only supports unsigned 32bit divisors. This
commit adds support for unsigned 64bit divisors.

Rename unsigned 32bit specific functions appropriately and update
librte_sched accordingly.

Signed-off-by: Pavan Nikhilesh <pbhagavatula@caviumnetworks.com>
---
 lib/librte_eal/bsdapp/eal/rte_eal_version.map   |   3 +-
 lib/librte_eal/common/include/rte_reciprocal.h  | 109 ++++++++++++++++++++--
 lib/librte_eal/common/rte_reciprocal.c          | 116 +++++++++++++++++++++---
 lib/librte_eal/linuxapp/eal/rte_eal_version.map |   3 +-
 lib/librte_sched/Makefile                       |   4 +-
 lib/librte_sched/rte_sched.c                    |   9 +-
 6 files changed, 219 insertions(+), 25 deletions(-)
  

Comments

Kevin Traynor Sept. 6, 2017, 12:28 p.m. UTC | #1
On 09/06/2017 11:21 AM, Pavan Nikhilesh wrote:
> From: Pavan Bhagavatula <pbhagavatula@caviumnetworks.com>
> 
> Currently, rte_reciprocal only supports unsigned 32bit divisors. This
> commit adds support for unsigned 64bit divisors.
> 
> Rename unsigned 32bit specific functions appropriately and update
> librte_sched accordingly.
> 
> Signed-off-by: Pavan Nikhilesh <pbhagavatula@caviumnetworks.com>
> ---
>  lib/librte_eal/bsdapp/eal/rte_eal_version.map   |   3 +-
>  lib/librte_eal/common/include/rte_reciprocal.h  | 109 ++++++++++++++++++++--
>  lib/librte_eal/common/rte_reciprocal.c          | 116 +++++++++++++++++++++---
>  lib/librte_eal/linuxapp/eal/rte_eal_version.map |   3 +-
>  lib/librte_sched/Makefile                       |   4 +-
>  lib/librte_sched/rte_sched.c                    |   9 +-
>  6 files changed, 219 insertions(+), 25 deletions(-)
> 
> diff --git a/lib/librte_eal/bsdapp/eal/rte_eal_version.map b/lib/librte_eal/bsdapp/eal/rte_eal_version.map
> index 90d7258..59a85bb 100644
> --- a/lib/librte_eal/bsdapp/eal/rte_eal_version.map
> +++ b/lib/librte_eal/bsdapp/eal/rte_eal_version.map
> @@ -241,6 +241,7 @@ EXPERIMENTAL {
>  DPDK_17.11 {
>  	global:
>  
> -	rte_reciprocal_value;
> +	rte_reciprocal_value_u32;
> +	rte_reciprocal_value_u64;
>  
>  } DPDK_17.08;
> diff --git a/lib/librte_eal/common/include/rte_reciprocal.h b/lib/librte_eal/common/include/rte_reciprocal.h
> index b6d752f..85599e6 100644
> --- a/lib/librte_eal/common/include/rte_reciprocal.h
> +++ b/lib/librte_eal/common/include/rte_reciprocal.h

Hi Pavan, sorry for commenting late but the license in v1 of this file
states it cannot be removed. It is not included in later versions - can
you explain why?

+/*
+ * libdivide
+ * Copyright (C) 2010 ridiculous_fish
+ * This software is provided 'as-is', without any express or implied
+ * warranty.  In no event will the authors be held liable for any damages
+ * arising from the use of this software.
+ * Permission is granted to anyone to use this software for any purpose,
+ * including commercial applications, and to alter it and redistribute it
+ * freely, subject to the following restrictions:
+ *
+ * 1. The origin of this software must not be misrepresented; you must not
+ *    claim that you wrote the original software. If you use this software
+ *    in a product, an acknowledgment in the product documentation would be
+ *    appreciated but is not required.
+ *
+ * 2. Altered source versions must be plainly marked as such, and must
not be
+ *    misrepresented as being the original software.
+ *
+ * 3. This notice may not be removed or altered from any source
distribution.
+ *
+ * libdivide@ridiculousfish.com
+ *
+ */
+



> @@ -22,22 +22,117 @@
>  #ifndef _RTE_RECIPROCAL_H_
>  #define _RTE_RECIPROCAL_H_
>  
> -#include <stdint.h>
> +#include <rte_memory.h>
>  
> -struct rte_reciprocal {
> +/**
> + * Unsigned 32-bit divisor structure.
> + */
> +struct rte_reciprocal_u32 {
>  	uint32_t m;
>  	uint8_t sh1, sh2;
>  };
>  
> +/**
> + * Unsigned 64-bit divisor structure.
> + */
> +struct rte_reciprocal_u64 {
> +	uint64_t m;
> +	uint8_t sh1;
> +};
> +
> +/**
> + * Divide given unsigned 32-bit integer with pre calculated divisor.
> + *
> + * @param a
> + *   The 32-bit dividend.
> + * @param R
> + *   The pointer to pre calculated divisor reciprocal structure.
> + *
> + * @return
> + *   The result of the division
> + */
>  static inline uint32_t
> -rte_reciprocal_divide(uint32_t a, struct rte_reciprocal R)
> +rte_reciprocal_divide_u32(uint32_t a, struct rte_reciprocal_u32 *R)
>  {
> -	uint32_t t = (uint32_t)(((uint64_t)a * R.m) >> 32);
> +	uint32_t t = (((uint64_t)a * R->m) >> 32);
>  
> -	return (t + ((a - t) >> R.sh1)) >> R.sh2;
> +	return (t + ((a - t) >> R->sh1)) >> R->sh2;
>  }
>  
> -struct rte_reciprocal
> -rte_reciprocal_value(uint32_t d);
> +static inline uint64_t
> +mullhi_u64(uint64_t x, uint64_t y)
> +{
> +#ifdef __SIZEOF_INT128__
> +	__uint128_t xl = x;
> +	__uint128_t rl = xl * y;
> +
> +	return (rl >> 64);
> +#else
> +	uint64_t u0, u1, v0, v1, k, t;
> +	uint64_t w1, w2;
> +	uint64_t whi;
> +
> +	u1 = x >> 32; u0 = x & 0xFFFFFFFF;
> +	v1 = y >> 32; v0 = y & 0xFFFFFFFF;
> +
> +	t = u0*v0;
> +	k = t >> 32;
> +
> +	t = u1*v0 + k;
> +	w1 = t & 0xFFFFFFFF;
> +	w2 = t >> 32;
> +
> +	t = u0*v1 + w1;
> +	k = t >> 32;
> +
> +	whi = u1*v1 + w2 + k;
> +
> +	return whi;
> +#endif
> +}
> +
> +/**
> + * Divide given unsigned 64-bit integer with pre calculated divisor.
> + *
> + * @param a
> + *   The 64-bit dividend.
> + * @param R
> + *   The pointer to pre calculated divisor reciprocal structure.
> + *
> + * @return
> + *   The result of the division
> + */
> +static inline uint64_t
> +rte_reciprocal_divide_u64(uint64_t a, struct rte_reciprocal_u64 *R)
> +{
> +	uint64_t q = mullhi_u64(R->m, a);
> +	uint64_t t = ((a - q) >> 1) + q;
> +
> +	return t >> R->sh1;
> +}
> +
> +/**
> + * Generate pre calculated divisor structure.
> + *
> + * @param d
> + *   The unsigned 32-bit divisor.
> + *
> + * @return
> + *   Divisor structure.
> + */
> +struct rte_reciprocal_u32
> +rte_reciprocal_value_u32(uint32_t d);
> +
> +/**
> + * Generate pre calculated divisor structure.
> + *
> + * @param d
> + *   The unsigned 64-bit divisor.
> + *
> + * @return
> + *   Divisor structure.
> + */
> +struct rte_reciprocal_u64
> +rte_reciprocal_value_u64(uint64_t d);
>  
>  #endif /* _RTE_RECIPROCAL_H_ */
> diff --git a/lib/librte_eal/common/rte_reciprocal.c b/lib/librte_eal/common/rte_reciprocal.c
> index 7ab99b4..2024e62 100644
> --- a/lib/librte_eal/common/rte_reciprocal.c
> +++ b/lib/librte_eal/common/rte_reciprocal.c
> @@ -31,18 +31,13 @@
>   *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
>   */
>  
> -#include <stdio.h>
> -#include <stdint.h>
> -
> -#include <rte_common.h>
> -
> -#include "rte_reciprocal.h"
> +#include <rte_reciprocal.h>
>  
>  /* find largest set bit.
>   * portable and slow but does not matter for this usage.
>   */
>  static inline int
> -fls(uint32_t x)
> +fls_u32(uint32_t x)
>  {
>  	int b;
>  
> @@ -54,14 +49,14 @@ fls(uint32_t x)
>  	return 0;
>  }
>  
> -struct rte_reciprocal
> -rte_reciprocal_value(uint32_t d)
> +struct rte_reciprocal_u32
> +rte_reciprocal_value_u32(uint32_t d)
>  {
> -	struct rte_reciprocal R;
> +	struct rte_reciprocal_u32 R;
>  	uint64_t m;
>  	int l;
>  
> -	l = fls(d - 1);
> +	l = fls_u32(d - 1);
>  	m = ((1ULL << 32) * ((1ULL << l) - d));
>  	m /= d;
>  
> @@ -72,3 +67,102 @@ rte_reciprocal_value(uint32_t d)
>  
>  	return R;
>  }
> +
> +/* Code taken from Hacker's Delight:
> + * http://www.hackersdelight.org/HDcode/divlu.c.
> + * License permits inclusion here per:
> + * http://www.hackersdelight.org/permissions.htm
> + */
> +static inline uint64_t
> +divide_128_div_64_to_64(uint64_t u1, uint64_t u0, uint64_t v, uint64_t *r)
> +{
> +	const uint64_t b = (1ULL << 32); /* Number base (16 bits). */
> +	uint64_t un1, un0,           /* Norm. dividend LSD's. */
> +			 vn1, vn0,           /* Norm. divisor digits. */
> +			 q1, q0,             /* Quotient digits. */
> +			 un64, un21, un10,   /* Dividend digit pairs. */
> +			 rhat;               /* A remainder. */
> +	int s;                       /* Shift amount for norm. */
> +
> +    /* If overflow, set rem. to an impossible value. */
> +	if (u1 >= v) {
> +		if (r != NULL)
> +			*r = (uint64_t) -1;
> +		return (uint64_t) -1;
> +	}
> +
> +	/* Count leading zeros. */
> +	s = __builtin_clzll(v);
> +	if (s > 0) {
> +		v = v << s;
> +		un64 = (u1 << s) | ((u0 >> (64 - s)) & (-s >> 31));
> +		un10 = u0 << s;
> +	} else {
> +
> +		un64 = u1 | u0;
> +		un10 = u0;
> +	}
> +
> +	vn1 = v >> 32;
> +	vn0 = v & 0xFFFFFFFF;
> +
> +	un1 = un10 >> 32;
> +	un0 = un10 & 0xFFFFFFFF;
> +
> +	q1 = un64/vn1;
> +	rhat = un64 - q1*vn1;
> +again1:
> +	if (q1 >= b || q1*vn0 > b*rhat + un1) {
> +		q1 = q1 - 1;
> +		rhat = rhat + vn1;
> +		if (rhat < b)
> +			goto again1;
> +	}
> +
> +	un21 = un64*b + un1 - q1*v;
> +
> +	q0 = un21/vn1;
> +	rhat = un21 - q0*vn1;
> +again2:
> +	if (q0 >= b || q0*vn0 > b*rhat + un0) {
> +		q0 = q0 - 1;
> +		rhat = rhat + vn1;
> +		if (rhat < b)
> +			goto again2;
> +	}
> +
> +	if (r != NULL)
> +		*r = (un21*b + un0 - q0*v) >> s;
> +	return q1*b + q0;
> +}
> +
> +struct rte_reciprocal_u64
> +rte_reciprocal_value_u64(uint64_t d)
> +{
> +	struct rte_reciprocal_u64 R;
> +
> +	const uint32_t fld = 63 - __builtin_clzll(d);
> +
> +	if ((d & (d - 1)) == 0) {
> +		R.m = 0;
> +		R.sh1 = (fld - 1) | 0x40;
> +	} else {
> +		uint64_t rem;
> +		uint64_t multiplier;
> +		uint8_t more;
> +
> +		multiplier = divide_128_div_64_to_64(1ULL << fld, 0, d, &rem);
> +		multiplier += multiplier;
> +
> +		const uint64_t twice_rem = rem + rem;
> +		if (twice_rem >= d || twice_rem < rem)
> +			multiplier += 1;
> +		more = fld;
> +		R.m = 1 + multiplier;
> +		R.sh1 = more | 0x40;
> +	}
> +
> +	R.sh1 &= 0x3F;
> +
> +	return R;
> +}
> diff --git a/lib/librte_eal/linuxapp/eal/rte_eal_version.map b/lib/librte_eal/linuxapp/eal/rte_eal_version.map
> index 2070cba..2671627 100644
> --- a/lib/librte_eal/linuxapp/eal/rte_eal_version.map
> +++ b/lib/librte_eal/linuxapp/eal/rte_eal_version.map
> @@ -246,6 +246,7 @@ EXPERIMENTAL {
>  DPDK_17.11 {
>  	global:
>  
> -	rte_reciprocal_value;
> +	rte_reciprocal_value_u32;
> +	rte_reciprocal_value_u64;
>  
>  } DPDK_17.08;
> diff --git a/lib/librte_sched/Makefile b/lib/librte_sched/Makefile
> index 569656b..a2fd6f3 100644
> --- a/lib/librte_sched/Makefile
> +++ b/lib/librte_sched/Makefile
> @@ -54,6 +54,8 @@ LIBABIVER := 1
>  SRCS-$(CONFIG_RTE_LIBRTE_SCHED) += rte_sched.c rte_red.c rte_approx.c
>  
>  # install includes
> -SYMLINK-$(CONFIG_RTE_LIBRTE_SCHED)-include := rte_sched.h rte_bitmap.h rte_sched_common.h rte_red.h rte_approx.h
> +SYMLINK-$(CONFIG_RTE_LIBRTE_SCHED)-include := rte_sched.h rte_bitmap.h
> +SYMLINK-$(CONFIG_RTE_LIBRTE_SCHED)-include += rte_sched_common.h rte_red.h
> +SYMLINK-$(CONFIG_RTE_LIBRTE_SCHED)-include += rte_approx.h
>  
>  include $(RTE_SDK)/mk/rte.lib.mk
> diff --git a/lib/librte_sched/rte_sched.c b/lib/librte_sched/rte_sched.c
> index 3b8ccaa..7bb6d51 100644
> --- a/lib/librte_sched/rte_sched.c
> +++ b/lib/librte_sched/rte_sched.c
> @@ -228,7 +228,7 @@ struct rte_sched_port {
>  	uint64_t time_cpu_cycles;     /* Current CPU time measured in CPU cyles */
>  	uint64_t time_cpu_bytes;      /* Current CPU time measured in bytes */
>  	uint64_t time;                /* Current NIC TX time measured in bytes */
> -	struct rte_reciprocal inv_cycles_per_byte; /* CPU cycles per byte */
> +	struct rte_reciprocal_u32 inv_cycles_per_byte; /* CPU cycles per byte */
>  
>  	/* Scheduling loop detection */
>  	uint32_t pipe_loop;
> @@ -677,7 +677,7 @@ rte_sched_port_config(struct rte_sched_port_params *params)
>  
>  	cycles_per_byte = (rte_get_tsc_hz() << RTE_SCHED_TIME_SHIFT)
>  		/ params->rate;
> -	port->inv_cycles_per_byte = rte_reciprocal_value(cycles_per_byte);
> +	port->inv_cycles_per_byte = rte_reciprocal_value_u32(cycles_per_byte);
>  
>  	/* Scheduling loop detection */
>  	port->pipe_loop = RTE_SCHED_PIPE_INVALID;
> @@ -2147,8 +2147,9 @@ rte_sched_port_time_resync(struct rte_sched_port *port)
>  	uint64_t bytes_diff;
>  
>  	/* Compute elapsed time in bytes */
> -	bytes_diff = rte_reciprocal_divide(cycles_diff << RTE_SCHED_TIME_SHIFT,
> -					   port->inv_cycles_per_byte);
> +	bytes_diff = rte_reciprocal_divide_u32(
> +			cycles_diff << RTE_SCHED_TIME_SHIFT,
> +			&port->inv_cycles_per_byte);
>  
>  	/* Advance port time */
>  	port->time_cpu_cycles = cycles;
>
  
Pavan Nikhilesh Sept. 6, 2017, 2:41 p.m. UTC | #2
On Wed, Sep 06, 2017 at 01:28:24PM +0100, Kevin Traynor wrote:
> On 09/06/2017 11:21 AM, Pavan Nikhilesh wrote:
> > From: Pavan Bhagavatula <pbhagavatula@caviumnetworks.com>
> >
> > Currently, rte_reciprocal only supports unsigned 32bit divisors. This
> > commit adds support for unsigned 64bit divisors.
> >
> > Rename unsigned 32bit specific functions appropriately and update
> > librte_sched accordingly.
> >
> > Signed-off-by: Pavan Nikhilesh <pbhagavatula@caviumnetworks.com>
> > ---
> >  lib/librte_eal/bsdapp/eal/rte_eal_version.map   |   3 +-
> >  lib/librte_eal/common/include/rte_reciprocal.h  | 109 ++++++++++++++++++++--
> >  lib/librte_eal/common/rte_reciprocal.c          | 116 +++++++++++++++++++++---
> >  lib/librte_eal/linuxapp/eal/rte_eal_version.map |   3 +-
> >  lib/librte_sched/Makefile                       |   4 +-
> >  lib/librte_sched/rte_sched.c                    |   9 +-
> >  6 files changed, 219 insertions(+), 25 deletions(-)
> >
> > diff --git a/lib/librte_eal/bsdapp/eal/rte_eal_version.map b/lib/librte_eal/bsdapp/eal/rte_eal_version.map
> > index 90d7258..59a85bb 100644
> > --- a/lib/librte_eal/bsdapp/eal/rte_eal_version.map
> > +++ b/lib/librte_eal/bsdapp/eal/rte_eal_version.map
> > @@ -241,6 +241,7 @@ EXPERIMENTAL {
> >  DPDK_17.11 {
> >  	global:
> >
> > -	rte_reciprocal_value;
> > +	rte_reciprocal_value_u32;
> > +	rte_reciprocal_value_u64;
> >
> >  } DPDK_17.08;
> > diff --git a/lib/librte_eal/common/include/rte_reciprocal.h b/lib/librte_eal/common/include/rte_reciprocal.h
> > index b6d752f..85599e6 100644
> > --- a/lib/librte_eal/common/include/rte_reciprocal.h
> > +++ b/lib/librte_eal/common/include/rte_reciprocal.h
>
> Hi Pavan, sorry for commenting late but the license in v1 of this file
> states it cannot be removed. It is not included in later versions - can
> you explain why?
>
Hi Kevin,

I have misinterpreted this mail
http://dpdk.org/ml/archives/dev/2017-August/073781.html,
any suggestion on how to proceed on this further?

Thanks,
Pavan

> +/*
> + * libdivide
> + * Copyright (C) 2010 ridiculous_fish
> + * This software is provided 'as-is', without any express or implied
> + * warranty.  In no event will the authors be held liable for any damages
> + * arising from the use of this software.
> + * Permission is granted to anyone to use this software for any purpose,
> + * including commercial applications, and to alter it and redistribute it
> + * freely, subject to the following restrictions:
> + *
> + * 1. The origin of this software must not be misrepresented; you must not
> + *    claim that you wrote the original software. If you use this software
> + *    in a product, an acknowledgment in the product documentation would be
> + *    appreciated but is not required.
> + *
> + * 2. Altered source versions must be plainly marked as such, and must
> not be
> + *    misrepresented as being the original software.
> + *
> + * 3. This notice may not be removed or altered from any source
> distribution.
> + *
> + * libdivide@ridiculousfish.com
> + *
> + */
> +
>
>
>
> > @@ -22,22 +22,117 @@
> >  #ifndef _RTE_RECIPROCAL_H_
> >  #define _RTE_RECIPROCAL_H_
> >
> > -#include <stdint.h>
> > +#include <rte_memory.h>
> >
> > -struct rte_reciprocal {
> > +/**
> > + * Unsigned 32-bit divisor structure.
> > + */
> > +struct rte_reciprocal_u32 {
> >  	uint32_t m;
> >  	uint8_t sh1, sh2;
> >  };
> >
> > +/**
> > + * Unsigned 64-bit divisor structure.
> > + */
> > +struct rte_reciprocal_u64 {
> > +	uint64_t m;
> > +	uint8_t sh1;
> > +};
> > +
> > +/**
> > + * Divide given unsigned 32-bit integer with pre calculated divisor.
> > + *
> > + * @param a
> > + *   The 32-bit dividend.
> > + * @param R
> > + *   The pointer to pre calculated divisor reciprocal structure.
> > + *
> > + * @return
> > + *   The result of the division
> > + */
> >  static inline uint32_t
> > -rte_reciprocal_divide(uint32_t a, struct rte_reciprocal R)
> > +rte_reciprocal_divide_u32(uint32_t a, struct rte_reciprocal_u32 *R)
> >  {
> > -	uint32_t t = (uint32_t)(((uint64_t)a * R.m) >> 32);
> > +	uint32_t t = (((uint64_t)a * R->m) >> 32);
> >
> > -	return (t + ((a - t) >> R.sh1)) >> R.sh2;
> > +	return (t + ((a - t) >> R->sh1)) >> R->sh2;
> >  }
> >
> > -struct rte_reciprocal
> > -rte_reciprocal_value(uint32_t d);
> > +static inline uint64_t
> > +mullhi_u64(uint64_t x, uint64_t y)
> > +{
> > +#ifdef __SIZEOF_INT128__
> > +	__uint128_t xl = x;
> > +	__uint128_t rl = xl * y;
> > +
> > +	return (rl >> 64);
> > +#else
> > +	uint64_t u0, u1, v0, v1, k, t;
> > +	uint64_t w1, w2;
> > +	uint64_t whi;
> > +
> > +	u1 = x >> 32; u0 = x & 0xFFFFFFFF;
> > +	v1 = y >> 32; v0 = y & 0xFFFFFFFF;
> > +
> > +	t = u0*v0;
> > +	k = t >> 32;
> > +
> > +	t = u1*v0 + k;
> > +	w1 = t & 0xFFFFFFFF;
> > +	w2 = t >> 32;
> > +
> > +	t = u0*v1 + w1;
> > +	k = t >> 32;
> > +
> > +	whi = u1*v1 + w2 + k;
> > +
> > +	return whi;
> > +#endif
> > +}
> > +
> > +/**
> > + * Divide given unsigned 64-bit integer with pre calculated divisor.
> > + *
> > + * @param a
> > + *   The 64-bit dividend.
> > + * @param R
> > + *   The pointer to pre calculated divisor reciprocal structure.
> > + *
> > + * @return
> > + *   The result of the division
> > + */
> > +static inline uint64_t
> > +rte_reciprocal_divide_u64(uint64_t a, struct rte_reciprocal_u64 *R)
> > +{
> > +	uint64_t q = mullhi_u64(R->m, a);
> > +	uint64_t t = ((a - q) >> 1) + q;
> > +
> > +	return t >> R->sh1;
> > +}
> > +
> > +/**
> > + * Generate pre calculated divisor structure.
> > + *
> > + * @param d
> > + *   The unsigned 32-bit divisor.
> > + *
> > + * @return
> > + *   Divisor structure.
> > + */
> > +struct rte_reciprocal_u32
> > +rte_reciprocal_value_u32(uint32_t d);
> > +
> > +/**
> > + * Generate pre calculated divisor structure.
> > + *
> > + * @param d
> > + *   The unsigned 64-bit divisor.
> > + *
> > + * @return
> > + *   Divisor structure.
> > + */
> > +struct rte_reciprocal_u64
> > +rte_reciprocal_value_u64(uint64_t d);
> >
> >  #endif /* _RTE_RECIPROCAL_H_ */
> > diff --git a/lib/librte_eal/common/rte_reciprocal.c b/lib/librte_eal/common/rte_reciprocal.c
> > index 7ab99b4..2024e62 100644
> > --- a/lib/librte_eal/common/rte_reciprocal.c
> > +++ b/lib/librte_eal/common/rte_reciprocal.c
> > @@ -31,18 +31,13 @@
> >   *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
> >   */
> >
> > -#include <stdio.h>
> > -#include <stdint.h>
> > -
> > -#include <rte_common.h>
> > -
> > -#include "rte_reciprocal.h"
> > +#include <rte_reciprocal.h>
> >
> >  /* find largest set bit.
> >   * portable and slow but does not matter for this usage.
> >   */
> >  static inline int
> > -fls(uint32_t x)
> > +fls_u32(uint32_t x)
> >  {
> >  	int b;
> >
> > @@ -54,14 +49,14 @@ fls(uint32_t x)
> >  	return 0;
> >  }
> >
> > -struct rte_reciprocal
> > -rte_reciprocal_value(uint32_t d)
> > +struct rte_reciprocal_u32
> > +rte_reciprocal_value_u32(uint32_t d)
> >  {
> > -	struct rte_reciprocal R;
> > +	struct rte_reciprocal_u32 R;
> >  	uint64_t m;
> >  	int l;
> >
> > -	l = fls(d - 1);
> > +	l = fls_u32(d - 1);
> >  	m = ((1ULL << 32) * ((1ULL << l) - d));
> >  	m /= d;
> >
> > @@ -72,3 +67,102 @@ rte_reciprocal_value(uint32_t d)
> >
> >  	return R;
> >  }
> > +
> > +/* Code taken from Hacker's Delight:
> > + * http://www.hackersdelight.org/HDcode/divlu.c.
> > + * License permits inclusion here per:
> > + * http://www.hackersdelight.org/permissions.htm
> > + */
> > +static inline uint64_t
> > +divide_128_div_64_to_64(uint64_t u1, uint64_t u0, uint64_t v, uint64_t *r)
> > +{
> > +	const uint64_t b = (1ULL << 32); /* Number base (16 bits). */
> > +	uint64_t un1, un0,           /* Norm. dividend LSD's. */
> > +			 vn1, vn0,           /* Norm. divisor digits. */
> > +			 q1, q0,             /* Quotient digits. */
> > +			 un64, un21, un10,   /* Dividend digit pairs. */
> > +			 rhat;               /* A remainder. */
> > +	int s;                       /* Shift amount for norm. */
> > +
> > +    /* If overflow, set rem. to an impossible value. */
> > +	if (u1 >= v) {
> > +		if (r != NULL)
> > +			*r = (uint64_t) -1;
> > +		return (uint64_t) -1;
> > +	}
> > +
> > +	/* Count leading zeros. */
> > +	s = __builtin_clzll(v);
> > +	if (s > 0) {
> > +		v = v << s;
> > +		un64 = (u1 << s) | ((u0 >> (64 - s)) & (-s >> 31));
> > +		un10 = u0 << s;
> > +	} else {
> > +
> > +		un64 = u1 | u0;
> > +		un10 = u0;
> > +	}
> > +
> > +	vn1 = v >> 32;
> > +	vn0 = v & 0xFFFFFFFF;
> > +
> > +	un1 = un10 >> 32;
> > +	un0 = un10 & 0xFFFFFFFF;
> > +
> > +	q1 = un64/vn1;
> > +	rhat = un64 - q1*vn1;
> > +again1:
> > +	if (q1 >= b || q1*vn0 > b*rhat + un1) {
> > +		q1 = q1 - 1;
> > +		rhat = rhat + vn1;
> > +		if (rhat < b)
> > +			goto again1;
> > +	}
> > +
> > +	un21 = un64*b + un1 - q1*v;
> > +
> > +	q0 = un21/vn1;
> > +	rhat = un21 - q0*vn1;
> > +again2:
> > +	if (q0 >= b || q0*vn0 > b*rhat + un0) {
> > +		q0 = q0 - 1;
> > +		rhat = rhat + vn1;
> > +		if (rhat < b)
> > +			goto again2;
> > +	}
> > +
> > +	if (r != NULL)
> > +		*r = (un21*b + un0 - q0*v) >> s;
> > +	return q1*b + q0;
> > +}
> > +
> > +struct rte_reciprocal_u64
> > +rte_reciprocal_value_u64(uint64_t d)
> > +{
> > +	struct rte_reciprocal_u64 R;
> > +
> > +	const uint32_t fld = 63 - __builtin_clzll(d);
> > +
> > +	if ((d & (d - 1)) == 0) {
> > +		R.m = 0;
> > +		R.sh1 = (fld - 1) | 0x40;
> > +	} else {
> > +		uint64_t rem;
> > +		uint64_t multiplier;
> > +		uint8_t more;
> > +
> > +		multiplier = divide_128_div_64_to_64(1ULL << fld, 0, d, &rem);
> > +		multiplier += multiplier;
> > +
> > +		const uint64_t twice_rem = rem + rem;
> > +		if (twice_rem >= d || twice_rem < rem)
> > +			multiplier += 1;
> > +		more = fld;
> > +		R.m = 1 + multiplier;
> > +		R.sh1 = more | 0x40;
> > +	}
> > +
> > +	R.sh1 &= 0x3F;
> > +
> > +	return R;
> > +}
> > diff --git a/lib/librte_eal/linuxapp/eal/rte_eal_version.map b/lib/librte_eal/linuxapp/eal/rte_eal_version.map
> > index 2070cba..2671627 100644
> > --- a/lib/librte_eal/linuxapp/eal/rte_eal_version.map
> > +++ b/lib/librte_eal/linuxapp/eal/rte_eal_version.map
> > @@ -246,6 +246,7 @@ EXPERIMENTAL {
> >  DPDK_17.11 {
> >  	global:
> >
> > -	rte_reciprocal_value;
> > +	rte_reciprocal_value_u32;
> > +	rte_reciprocal_value_u64;
> >
> >  } DPDK_17.08;
> > diff --git a/lib/librte_sched/Makefile b/lib/librte_sched/Makefile
> > index 569656b..a2fd6f3 100644
> > --- a/lib/librte_sched/Makefile
> > +++ b/lib/librte_sched/Makefile
> > @@ -54,6 +54,8 @@ LIBABIVER := 1
> >  SRCS-$(CONFIG_RTE_LIBRTE_SCHED) += rte_sched.c rte_red.c rte_approx.c
> >
> >  # install includes
> > -SYMLINK-$(CONFIG_RTE_LIBRTE_SCHED)-include := rte_sched.h rte_bitmap.h rte_sched_common.h rte_red.h rte_approx.h
> > +SYMLINK-$(CONFIG_RTE_LIBRTE_SCHED)-include := rte_sched.h rte_bitmap.h
> > +SYMLINK-$(CONFIG_RTE_LIBRTE_SCHED)-include += rte_sched_common.h rte_red.h
> > +SYMLINK-$(CONFIG_RTE_LIBRTE_SCHED)-include += rte_approx.h
> >
> >  include $(RTE_SDK)/mk/rte.lib.mk
> > diff --git a/lib/librte_sched/rte_sched.c b/lib/librte_sched/rte_sched.c
> > index 3b8ccaa..7bb6d51 100644
> > --- a/lib/librte_sched/rte_sched.c
> > +++ b/lib/librte_sched/rte_sched.c
> > @@ -228,7 +228,7 @@ struct rte_sched_port {
> >  	uint64_t time_cpu_cycles;     /* Current CPU time measured in CPU cyles */
> >  	uint64_t time_cpu_bytes;      /* Current CPU time measured in bytes */
> >  	uint64_t time;                /* Current NIC TX time measured in bytes */
> > -	struct rte_reciprocal inv_cycles_per_byte; /* CPU cycles per byte */
> > +	struct rte_reciprocal_u32 inv_cycles_per_byte; /* CPU cycles per byte */
> >
> >  	/* Scheduling loop detection */
> >  	uint32_t pipe_loop;
> > @@ -677,7 +677,7 @@ rte_sched_port_config(struct rte_sched_port_params *params)
> >
> >  	cycles_per_byte = (rte_get_tsc_hz() << RTE_SCHED_TIME_SHIFT)
> >  		/ params->rate;
> > -	port->inv_cycles_per_byte = rte_reciprocal_value(cycles_per_byte);
> > +	port->inv_cycles_per_byte = rte_reciprocal_value_u32(cycles_per_byte);
> >
> >  	/* Scheduling loop detection */
> >  	port->pipe_loop = RTE_SCHED_PIPE_INVALID;
> > @@ -2147,8 +2147,9 @@ rte_sched_port_time_resync(struct rte_sched_port *port)
> >  	uint64_t bytes_diff;
> >
> >  	/* Compute elapsed time in bytes */
> > -	bytes_diff = rte_reciprocal_divide(cycles_diff << RTE_SCHED_TIME_SHIFT,
> > -					   port->inv_cycles_per_byte);
> > +	bytes_diff = rte_reciprocal_divide_u32(
> > +			cycles_diff << RTE_SCHED_TIME_SHIFT,
> > +			&port->inv_cycles_per_byte);
> >
> >  	/* Advance port time */
> >  	port->time_cpu_cycles = cycles;
> >
>
  
Kevin Traynor Sept. 6, 2017, 3:35 p.m. UTC | #3
On 09/06/2017 03:41 PM, Pavan Nikhilesh Bhagavatula wrote:
> On Wed, Sep 06, 2017 at 01:28:24PM +0100, Kevin Traynor wrote:
>> On 09/06/2017 11:21 AM, Pavan Nikhilesh wrote:
>>> From: Pavan Bhagavatula <pbhagavatula@caviumnetworks.com>
>>>
>>> Currently, rte_reciprocal only supports unsigned 32bit divisors. This
>>> commit adds support for unsigned 64bit divisors.
>>>
>>> Rename unsigned 32bit specific functions appropriately and update
>>> librte_sched accordingly.
>>>
>>> Signed-off-by: Pavan Nikhilesh <pbhagavatula@caviumnetworks.com>
>>> ---
>>>  lib/librte_eal/bsdapp/eal/rte_eal_version.map   |   3 +-
>>>  lib/librte_eal/common/include/rte_reciprocal.h  | 109 ++++++++++++++++++++--
>>>  lib/librte_eal/common/rte_reciprocal.c          | 116 +++++++++++++++++++++---
>>>  lib/librte_eal/linuxapp/eal/rte_eal_version.map |   3 +-
>>>  lib/librte_sched/Makefile                       |   4 +-
>>>  lib/librte_sched/rte_sched.c                    |   9 +-
>>>  6 files changed, 219 insertions(+), 25 deletions(-)
>>>
>>> diff --git a/lib/librte_eal/bsdapp/eal/rte_eal_version.map b/lib/librte_eal/bsdapp/eal/rte_eal_version.map
>>> index 90d7258..59a85bb 100644
>>> --- a/lib/librte_eal/bsdapp/eal/rte_eal_version.map
>>> +++ b/lib/librte_eal/bsdapp/eal/rte_eal_version.map
>>> @@ -241,6 +241,7 @@ EXPERIMENTAL {
>>>  DPDK_17.11 {
>>>  	global:
>>>
>>> -	rte_reciprocal_value;
>>> +	rte_reciprocal_value_u32;
>>> +	rte_reciprocal_value_u64;
>>>
>>>  } DPDK_17.08;
>>> diff --git a/lib/librte_eal/common/include/rte_reciprocal.h b/lib/librte_eal/common/include/rte_reciprocal.h
>>> index b6d752f..85599e6 100644
>>> --- a/lib/librte_eal/common/include/rte_reciprocal.h
>>> +++ b/lib/librte_eal/common/include/rte_reciprocal.h
>>
>> Hi Pavan, sorry for commenting late but the license in v1 of this file
>> states it cannot be removed. It is not included in later versions - can
>> you explain why?
>>
> Hi Kevin,
> 
> I have misinterpreted this mail
> http://dpdk.org/ml/archives/dev/2017-August/073781.html,
> any suggestion on how to proceed on this further?
> 
> Thanks,
> Pavan
> 

no, not really :( maybe someone else has a suggestion.

The DPDK charter [1] (6.IP Policy) lists the incoming licenses that are
accepted. There is a note about getting governing board approval for an
exception, but I don't think it's applicable to a relatively small
optimization like this.

Kevin.

[1] http://dpdk.org/about/charter

>> +/*
>> + * libdivide
>> + * Copyright (C) 2010 ridiculous_fish
>> + * This software is provided 'as-is', without any express or implied
>> + * warranty.  In no event will the authors be held liable for any damages
>> + * arising from the use of this software.
>> + * Permission is granted to anyone to use this software for any purpose,
>> + * including commercial applications, and to alter it and redistribute it
>> + * freely, subject to the following restrictions:
>> + *
>> + * 1. The origin of this software must not be misrepresented; you must not
>> + *    claim that you wrote the original software. If you use this software
>> + *    in a product, an acknowledgment in the product documentation would be
>> + *    appreciated but is not required.
>> + *
>> + * 2. Altered source versions must be plainly marked as such, and must
>> not be
>> + *    misrepresented as being the original software.
>> + *
>> + * 3. This notice may not be removed or altered from any source
>> distribution.
>> + *
>> + * libdivide@ridiculousfish.com
>> + *
>> + */
>> +
>>
>>
>>
>>> @@ -22,22 +22,117 @@
>>>  #ifndef _RTE_RECIPROCAL_H_
>>>  #define _RTE_RECIPROCAL_H_
>>>
>>> -#include <stdint.h>
>>> +#include <rte_memory.h>
>>>
>>> -struct rte_reciprocal {
>>> +/**
>>> + * Unsigned 32-bit divisor structure.
>>> + */
>>> +struct rte_reciprocal_u32 {
>>>  	uint32_t m;
>>>  	uint8_t sh1, sh2;
>>>  };
>>>
>>> +/**
>>> + * Unsigned 64-bit divisor structure.
>>> + */
>>> +struct rte_reciprocal_u64 {
>>> +	uint64_t m;
>>> +	uint8_t sh1;
>>> +};
>>> +
>>> +/**
>>> + * Divide given unsigned 32-bit integer with pre calculated divisor.
>>> + *
>>> + * @param a
>>> + *   The 32-bit dividend.
>>> + * @param R
>>> + *   The pointer to pre calculated divisor reciprocal structure.
>>> + *
>>> + * @return
>>> + *   The result of the division
>>> + */
>>>  static inline uint32_t
>>> -rte_reciprocal_divide(uint32_t a, struct rte_reciprocal R)
>>> +rte_reciprocal_divide_u32(uint32_t a, struct rte_reciprocal_u32 *R)
>>>  {
>>> -	uint32_t t = (uint32_t)(((uint64_t)a * R.m) >> 32);
>>> +	uint32_t t = (((uint64_t)a * R->m) >> 32);
>>>
>>> -	return (t + ((a - t) >> R.sh1)) >> R.sh2;
>>> +	return (t + ((a - t) >> R->sh1)) >> R->sh2;
>>>  }
>>>
>>> -struct rte_reciprocal
>>> -rte_reciprocal_value(uint32_t d);
>>> +static inline uint64_t
>>> +mullhi_u64(uint64_t x, uint64_t y)
>>> +{
>>> +#ifdef __SIZEOF_INT128__
>>> +	__uint128_t xl = x;
>>> +	__uint128_t rl = xl * y;
>>> +
>>> +	return (rl >> 64);
>>> +#else
>>> +	uint64_t u0, u1, v0, v1, k, t;
>>> +	uint64_t w1, w2;
>>> +	uint64_t whi;
>>> +
>>> +	u1 = x >> 32; u0 = x & 0xFFFFFFFF;
>>> +	v1 = y >> 32; v0 = y & 0xFFFFFFFF;
>>> +
>>> +	t = u0*v0;
>>> +	k = t >> 32;
>>> +
>>> +	t = u1*v0 + k;
>>> +	w1 = t & 0xFFFFFFFF;
>>> +	w2 = t >> 32;
>>> +
>>> +	t = u0*v1 + w1;
>>> +	k = t >> 32;
>>> +
>>> +	whi = u1*v1 + w2 + k;
>>> +
>>> +	return whi;
>>> +#endif
>>> +}
>>> +
>>> +/**
>>> + * Divide given unsigned 64-bit integer with pre calculated divisor.
>>> + *
>>> + * @param a
>>> + *   The 64-bit dividend.
>>> + * @param R
>>> + *   The pointer to pre calculated divisor reciprocal structure.
>>> + *
>>> + * @return
>>> + *   The result of the division
>>> + */
>>> +static inline uint64_t
>>> +rte_reciprocal_divide_u64(uint64_t a, struct rte_reciprocal_u64 *R)
>>> +{
>>> +	uint64_t q = mullhi_u64(R->m, a);
>>> +	uint64_t t = ((a - q) >> 1) + q;
>>> +
>>> +	return t >> R->sh1;
>>> +}
>>> +
>>> +/**
>>> + * Generate pre calculated divisor structure.
>>> + *
>>> + * @param d
>>> + *   The unsigned 32-bit divisor.
>>> + *
>>> + * @return
>>> + *   Divisor structure.
>>> + */
>>> +struct rte_reciprocal_u32
>>> +rte_reciprocal_value_u32(uint32_t d);
>>> +
>>> +/**
>>> + * Generate pre calculated divisor structure.
>>> + *
>>> + * @param d
>>> + *   The unsigned 64-bit divisor.
>>> + *
>>> + * @return
>>> + *   Divisor structure.
>>> + */
>>> +struct rte_reciprocal_u64
>>> +rte_reciprocal_value_u64(uint64_t d);
>>>
>>>  #endif /* _RTE_RECIPROCAL_H_ */
>>> diff --git a/lib/librte_eal/common/rte_reciprocal.c b/lib/librte_eal/common/rte_reciprocal.c
>>> index 7ab99b4..2024e62 100644
>>> --- a/lib/librte_eal/common/rte_reciprocal.c
>>> +++ b/lib/librte_eal/common/rte_reciprocal.c
>>> @@ -31,18 +31,13 @@
>>>   *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
>>>   */
>>>
>>> -#include <stdio.h>
>>> -#include <stdint.h>
>>> -
>>> -#include <rte_common.h>
>>> -
>>> -#include "rte_reciprocal.h"
>>> +#include <rte_reciprocal.h>
>>>
>>>  /* find largest set bit.
>>>   * portable and slow but does not matter for this usage.
>>>   */
>>>  static inline int
>>> -fls(uint32_t x)
>>> +fls_u32(uint32_t x)
>>>  {
>>>  	int b;
>>>
>>> @@ -54,14 +49,14 @@ fls(uint32_t x)
>>>  	return 0;
>>>  }
>>>
>>> -struct rte_reciprocal
>>> -rte_reciprocal_value(uint32_t d)
>>> +struct rte_reciprocal_u32
>>> +rte_reciprocal_value_u32(uint32_t d)
>>>  {
>>> -	struct rte_reciprocal R;
>>> +	struct rte_reciprocal_u32 R;
>>>  	uint64_t m;
>>>  	int l;
>>>
>>> -	l = fls(d - 1);
>>> +	l = fls_u32(d - 1);
>>>  	m = ((1ULL << 32) * ((1ULL << l) - d));
>>>  	m /= d;
>>>
>>> @@ -72,3 +67,102 @@ rte_reciprocal_value(uint32_t d)
>>>
>>>  	return R;
>>>  }
>>> +
>>> +/* Code taken from Hacker's Delight:
>>> + * http://www.hackersdelight.org/HDcode/divlu.c.
>>> + * License permits inclusion here per:
>>> + * http://www.hackersdelight.org/permissions.htm
>>> + */
>>> +static inline uint64_t
>>> +divide_128_div_64_to_64(uint64_t u1, uint64_t u0, uint64_t v, uint64_t *r)
>>> +{
>>> +	const uint64_t b = (1ULL << 32); /* Number base (16 bits). */
>>> +	uint64_t un1, un0,           /* Norm. dividend LSD's. */
>>> +			 vn1, vn0,           /* Norm. divisor digits. */
>>> +			 q1, q0,             /* Quotient digits. */
>>> +			 un64, un21, un10,   /* Dividend digit pairs. */
>>> +			 rhat;               /* A remainder. */
>>> +	int s;                       /* Shift amount for norm. */
>>> +
>>> +    /* If overflow, set rem. to an impossible value. */
>>> +	if (u1 >= v) {
>>> +		if (r != NULL)
>>> +			*r = (uint64_t) -1;
>>> +		return (uint64_t) -1;
>>> +	}
>>> +
>>> +	/* Count leading zeros. */
>>> +	s = __builtin_clzll(v);
>>> +	if (s > 0) {
>>> +		v = v << s;
>>> +		un64 = (u1 << s) | ((u0 >> (64 - s)) & (-s >> 31));
>>> +		un10 = u0 << s;
>>> +	} else {
>>> +
>>> +		un64 = u1 | u0;
>>> +		un10 = u0;
>>> +	}
>>> +
>>> +	vn1 = v >> 32;
>>> +	vn0 = v & 0xFFFFFFFF;
>>> +
>>> +	un1 = un10 >> 32;
>>> +	un0 = un10 & 0xFFFFFFFF;
>>> +
>>> +	q1 = un64/vn1;
>>> +	rhat = un64 - q1*vn1;
>>> +again1:
>>> +	if (q1 >= b || q1*vn0 > b*rhat + un1) {
>>> +		q1 = q1 - 1;
>>> +		rhat = rhat + vn1;
>>> +		if (rhat < b)
>>> +			goto again1;
>>> +	}
>>> +
>>> +	un21 = un64*b + un1 - q1*v;
>>> +
>>> +	q0 = un21/vn1;
>>> +	rhat = un21 - q0*vn1;
>>> +again2:
>>> +	if (q0 >= b || q0*vn0 > b*rhat + un0) {
>>> +		q0 = q0 - 1;
>>> +		rhat = rhat + vn1;
>>> +		if (rhat < b)
>>> +			goto again2;
>>> +	}
>>> +
>>> +	if (r != NULL)
>>> +		*r = (un21*b + un0 - q0*v) >> s;
>>> +	return q1*b + q0;
>>> +}
>>> +
>>> +struct rte_reciprocal_u64
>>> +rte_reciprocal_value_u64(uint64_t d)
>>> +{
>>> +	struct rte_reciprocal_u64 R;
>>> +
>>> +	const uint32_t fld = 63 - __builtin_clzll(d);
>>> +
>>> +	if ((d & (d - 1)) == 0) {
>>> +		R.m = 0;
>>> +		R.sh1 = (fld - 1) | 0x40;
>>> +	} else {
>>> +		uint64_t rem;
>>> +		uint64_t multiplier;
>>> +		uint8_t more;
>>> +
>>> +		multiplier = divide_128_div_64_to_64(1ULL << fld, 0, d, &rem);
>>> +		multiplier += multiplier;
>>> +
>>> +		const uint64_t twice_rem = rem + rem;
>>> +		if (twice_rem >= d || twice_rem < rem)
>>> +			multiplier += 1;
>>> +		more = fld;
>>> +		R.m = 1 + multiplier;
>>> +		R.sh1 = more | 0x40;
>>> +	}
>>> +
>>> +	R.sh1 &= 0x3F;
>>> +
>>> +	return R;
>>> +}
>>> diff --git a/lib/librte_eal/linuxapp/eal/rte_eal_version.map b/lib/librte_eal/linuxapp/eal/rte_eal_version.map
>>> index 2070cba..2671627 100644
>>> --- a/lib/librte_eal/linuxapp/eal/rte_eal_version.map
>>> +++ b/lib/librte_eal/linuxapp/eal/rte_eal_version.map
>>> @@ -246,6 +246,7 @@ EXPERIMENTAL {
>>>  DPDK_17.11 {
>>>  	global:
>>>
>>> -	rte_reciprocal_value;
>>> +	rte_reciprocal_value_u32;
>>> +	rte_reciprocal_value_u64;
>>>
>>>  } DPDK_17.08;
>>> diff --git a/lib/librte_sched/Makefile b/lib/librte_sched/Makefile
>>> index 569656b..a2fd6f3 100644
>>> --- a/lib/librte_sched/Makefile
>>> +++ b/lib/librte_sched/Makefile
>>> @@ -54,6 +54,8 @@ LIBABIVER := 1
>>>  SRCS-$(CONFIG_RTE_LIBRTE_SCHED) += rte_sched.c rte_red.c rte_approx.c
>>>
>>>  # install includes
>>> -SYMLINK-$(CONFIG_RTE_LIBRTE_SCHED)-include := rte_sched.h rte_bitmap.h rte_sched_common.h rte_red.h rte_approx.h
>>> +SYMLINK-$(CONFIG_RTE_LIBRTE_SCHED)-include := rte_sched.h rte_bitmap.h
>>> +SYMLINK-$(CONFIG_RTE_LIBRTE_SCHED)-include += rte_sched_common.h rte_red.h
>>> +SYMLINK-$(CONFIG_RTE_LIBRTE_SCHED)-include += rte_approx.h
>>>
>>>  include $(RTE_SDK)/mk/rte.lib.mk
>>> diff --git a/lib/librte_sched/rte_sched.c b/lib/librte_sched/rte_sched.c
>>> index 3b8ccaa..7bb6d51 100644
>>> --- a/lib/librte_sched/rte_sched.c
>>> +++ b/lib/librte_sched/rte_sched.c
>>> @@ -228,7 +228,7 @@ struct rte_sched_port {
>>>  	uint64_t time_cpu_cycles;     /* Current CPU time measured in CPU cyles */
>>>  	uint64_t time_cpu_bytes;      /* Current CPU time measured in bytes */
>>>  	uint64_t time;                /* Current NIC TX time measured in bytes */
>>> -	struct rte_reciprocal inv_cycles_per_byte; /* CPU cycles per byte */
>>> +	struct rte_reciprocal_u32 inv_cycles_per_byte; /* CPU cycles per byte */
>>>
>>>  	/* Scheduling loop detection */
>>>  	uint32_t pipe_loop;
>>> @@ -677,7 +677,7 @@ rte_sched_port_config(struct rte_sched_port_params *params)
>>>
>>>  	cycles_per_byte = (rte_get_tsc_hz() << RTE_SCHED_TIME_SHIFT)
>>>  		/ params->rate;
>>> -	port->inv_cycles_per_byte = rte_reciprocal_value(cycles_per_byte);
>>> +	port->inv_cycles_per_byte = rte_reciprocal_value_u32(cycles_per_byte);
>>>
>>>  	/* Scheduling loop detection */
>>>  	port->pipe_loop = RTE_SCHED_PIPE_INVALID;
>>> @@ -2147,8 +2147,9 @@ rte_sched_port_time_resync(struct rte_sched_port *port)
>>>  	uint64_t bytes_diff;
>>>
>>>  	/* Compute elapsed time in bytes */
>>> -	bytes_diff = rte_reciprocal_divide(cycles_diff << RTE_SCHED_TIME_SHIFT,
>>> -					   port->inv_cycles_per_byte);
>>> +	bytes_diff = rte_reciprocal_divide_u32(
>>> +			cycles_diff << RTE_SCHED_TIME_SHIFT,
>>> +			&port->inv_cycles_per_byte);
>>>
>>>  	/* Advance port time */
>>>  	port->time_cpu_cycles = cycles;
>>>
>>
  
Stephen Hemminger Sept. 6, 2017, 3:37 p.m. UTC | #4
On Wed, 6 Sep 2017 20:11:34 +0530
Pavan Nikhilesh Bhagavatula <pbhagavatula@caviumnetworks.com> wrote:

> On Wed, Sep 06, 2017 at 01:28:24PM +0100, Kevin Traynor wrote:
> > On 09/06/2017 11:21 AM, Pavan Nikhilesh wrote:  
> > > From: Pavan Bhagavatula <pbhagavatula@caviumnetworks.com>
> > >
> > > Currently, rte_reciprocal only supports unsigned 32bit divisors. This
> > > commit adds support for unsigned 64bit divisors.
> > >
> > > Rename unsigned 32bit specific functions appropriately and update
> > > librte_sched accordingly.
> > >
> > > Signed-off-by: Pavan Nikhilesh <pbhagavatula@caviumnetworks.com>
> > > ---
> > >  lib/librte_eal/bsdapp/eal/rte_eal_version.map   |   3 +-
> > >  lib/librte_eal/common/include/rte_reciprocal.h  | 109 ++++++++++++++++++++--
> > >  lib/librte_eal/common/rte_reciprocal.c          | 116 +++++++++++++++++++++---
> > >  lib/librte_eal/linuxapp/eal/rte_eal_version.map |   3 +-
> > >  lib/librte_sched/Makefile                       |   4 +-
> > >  lib/librte_sched/rte_sched.c                    |   9 +-
> > >  6 files changed, 219 insertions(+), 25 deletions(-)
> > >
> > > diff --git a/lib/librte_eal/bsdapp/eal/rte_eal_version.map b/lib/librte_eal/bsdapp/eal/rte_eal_version.map
> > > index 90d7258..59a85bb 100644
> > > --- a/lib/librte_eal/bsdapp/eal/rte_eal_version.map
> > > +++ b/lib/librte_eal/bsdapp/eal/rte_eal_version.map
> > > @@ -241,6 +241,7 @@ EXPERIMENTAL {
> > >  DPDK_17.11 {
> > >  	global:
> > >
> > > -	rte_reciprocal_value;
> > > +	rte_reciprocal_value_u32;
> > > +	rte_reciprocal_value_u64;
> > >
> > >  } DPDK_17.08;
> > > diff --git a/lib/librte_eal/common/include/rte_reciprocal.h b/lib/librte_eal/common/include/rte_reciprocal.h
> > > index b6d752f..85599e6 100644
> > > --- a/lib/librte_eal/common/include/rte_reciprocal.h
> > > +++ b/lib/librte_eal/common/include/rte_reciprocal.h  
> >
> > Hi Pavan, sorry for commenting late but the license in v1 of this file
> > states it cannot be removed. It is not included in later versions - can
> > you explain why?
> >  
> Hi Kevin,
> 
> I have misinterpreted this mail
> http://dpdk.org/ml/archives/dev/2017-August/073781.html,
> any suggestion on how to proceed on this further?
> 
> Thanks,
> Pavan

License issues need legal advice (TAB could ask LF if required).
Sorry, I am a cynic engineer not a lawyer.

Easiest solution is to find equivalent code in FreeBSD or some other project
which does not have the restrictions.
  
Cristian Dumitrescu Sept. 6, 2017, 5:05 p.m. UTC | #5
> -----Original Message-----
> From: Stephen Hemminger [mailto:stephen@networkplumber.org]
> Sent: Wednesday, September 6, 2017 4:37 PM
> To: Pavan Nikhilesh Bhagavatula <pbhagavatula@caviumnetworks.com>
> Cc: Kevin Traynor <ktraynor@redhat.com>; Dumitrescu, Cristian
> <cristian.dumitrescu@intel.com>; dev@dpdk.org
> Subject: Re: [dpdk-dev] [PATCH v6 2/3] eal: add u64 bit variant for reciprocal
> 
> On Wed, 6 Sep 2017 20:11:34 +0530
> Pavan Nikhilesh Bhagavatula <pbhagavatula@caviumnetworks.com> wrote:
> 
> > On Wed, Sep 06, 2017 at 01:28:24PM +0100, Kevin Traynor wrote:
> > > On 09/06/2017 11:21 AM, Pavan Nikhilesh wrote:
> > > > From: Pavan Bhagavatula <pbhagavatula@caviumnetworks.com>
> > > >
> > > > Currently, rte_reciprocal only supports unsigned 32bit divisors. This
> > > > commit adds support for unsigned 64bit divisors.
> > > >
> > > > Rename unsigned 32bit specific functions appropriately and update
> > > > librte_sched accordingly.
> > > >
> > > > Signed-off-by: Pavan Nikhilesh <pbhagavatula@caviumnetworks.com>
> > > > ---
> > > >  lib/librte_eal/bsdapp/eal/rte_eal_version.map   |   3 +-
> > > >  lib/librte_eal/common/include/rte_reciprocal.h  | 109
> ++++++++++++++++++++--
> > > >  lib/librte_eal/common/rte_reciprocal.c          | 116
> +++++++++++++++++++++---
> > > >  lib/librte_eal/linuxapp/eal/rte_eal_version.map |   3 +-
> > > >  lib/librte_sched/Makefile                       |   4 +-
> > > >  lib/librte_sched/rte_sched.c                    |   9 +-
> > > >  6 files changed, 219 insertions(+), 25 deletions(-)
> > > >
> > > > diff --git a/lib/librte_eal/bsdapp/eal/rte_eal_version.map
> b/lib/librte_eal/bsdapp/eal/rte_eal_version.map
> > > > index 90d7258..59a85bb 100644
> > > > --- a/lib/librte_eal/bsdapp/eal/rte_eal_version.map
> > > > +++ b/lib/librte_eal/bsdapp/eal/rte_eal_version.map
> > > > @@ -241,6 +241,7 @@ EXPERIMENTAL {
> > > >  DPDK_17.11 {
> > > >  	global:
> > > >
> > > > -	rte_reciprocal_value;
> > > > +	rte_reciprocal_value_u32;
> > > > +	rte_reciprocal_value_u64;
> > > >
> > > >  } DPDK_17.08;
> > > > diff --git a/lib/librte_eal/common/include/rte_reciprocal.h
> b/lib/librte_eal/common/include/rte_reciprocal.h
> > > > index b6d752f..85599e6 100644
> > > > --- a/lib/librte_eal/common/include/rte_reciprocal.h
> > > > +++ b/lib/librte_eal/common/include/rte_reciprocal.h
> > >
> > > Hi Pavan, sorry for commenting late but the license in v1 of this file
> > > states it cannot be removed. It is not included in later versions - can
> > > you explain why?
> > >
> > Hi Kevin,
> >
> > I have misinterpreted this mail
> > http://dpdk.org/ml/archives/dev/2017-August/073781.html,
> > any suggestion on how to proceed on this further?
> >
> > Thanks,
> > Pavan
> 
> License issues need legal advice (TAB could ask LF if required).
> Sorry, I am a cynic engineer not a lawyer.
> 
> Easiest solution is to find equivalent code in FreeBSD or some other project
> which does not have the restrictions.

How about just adding your copyright on the existing BSD license?
  
Cristian Dumitrescu Sept. 20, 2017, 1:15 p.m. UTC | #6
Hi Pavan,

Same ask as for the first patch:
-Do not change existing code in rte_reciprocal.[hc]: no _u32 suffix, please
-Do not add lots of CR+LF to existing code

> -----Original Message-----
> From: Pavan Nikhilesh [mailto:pbhagavatula@caviumnetworks.com]
> Sent: Wednesday, September 6, 2017 11:22 AM
> To: Dumitrescu, Cristian <cristian.dumitrescu@intel.com>;
> stephen@networkplumber.org
> Cc: dev@dpdk.org; Pavan Bhagavatula
> <pbhagavatula@caviumnetworks.com>
> Subject: [dpdk-dev] [PATCH v6 2/3] eal: add u64 bit variant for reciprocal
> 
> From: Pavan Bhagavatula <pbhagavatula@caviumnetworks.com>
> 
> Currently, rte_reciprocal only supports unsigned 32bit divisors. This
> commit adds support for unsigned 64bit divisors.
> 
> Rename unsigned 32bit specific functions appropriately and update
> librte_sched accordingly.
> 
> Signed-off-by: Pavan Nikhilesh <pbhagavatula@caviumnetworks.com>
> ---
>  lib/librte_eal/bsdapp/eal/rte_eal_version.map   |   3 +-
>  lib/librte_eal/common/include/rte_reciprocal.h  | 109
> ++++++++++++++++++++--
>  lib/librte_eal/common/rte_reciprocal.c          | 116
> +++++++++++++++++++++---
>  lib/librte_eal/linuxapp/eal/rte_eal_version.map |   3 +-
>  lib/librte_sched/Makefile                       |   4 +-
>  lib/librte_sched/rte_sched.c                    |   9 +-
>  6 files changed, 219 insertions(+), 25 deletions(-)
> 
> diff --git a/lib/librte_eal/bsdapp/eal/rte_eal_version.map
> b/lib/librte_eal/bsdapp/eal/rte_eal_version.map
> index 90d7258..59a85bb 100644
> --- a/lib/librte_eal/bsdapp/eal/rte_eal_version.map
> +++ b/lib/librte_eal/bsdapp/eal/rte_eal_version.map
> @@ -241,6 +241,7 @@ EXPERIMENTAL {
>  DPDK_17.11 {
>  	global:
> 
> -	rte_reciprocal_value;
> +	rte_reciprocal_value_u32;
> +	rte_reciprocal_value_u64;
> 
>  } DPDK_17.08;
> diff --git a/lib/librte_eal/common/include/rte_reciprocal.h
> b/lib/librte_eal/common/include/rte_reciprocal.h
> index b6d752f..85599e6 100644
> --- a/lib/librte_eal/common/include/rte_reciprocal.h
> +++ b/lib/librte_eal/common/include/rte_reciprocal.h
> @@ -22,22 +22,117 @@
>  #ifndef _RTE_RECIPROCAL_H_
>  #define _RTE_RECIPROCAL_H_
> 
> -#include <stdint.h>
> +#include <rte_memory.h>
> 
> -struct rte_reciprocal {
> +/**
> + * Unsigned 32-bit divisor structure.
> + */
> +struct rte_reciprocal_u32 {
>  	uint32_t m;
>  	uint8_t sh1, sh2;
>  };
> 

Do not add _32 suffix for exiting function, no name change, please.

> +/**
> + * Unsigned 64-bit divisor structure.
> + */
> +struct rte_reciprocal_u64 {
> +	uint64_t m;
> +	uint8_t sh1;
> +};
> +

I am OK with adding +64 suffix for new API.

> +/**
> + * Divide given unsigned 32-bit integer with pre calculated divisor.
> + *
> + * @param a
> + *   The 32-bit dividend.
> + * @param R
> + *   The pointer to pre calculated divisor reciprocal structure.
> + *
> + * @return
> + *   The result of the division
> + */
>  static inline uint32_t
> -rte_reciprocal_divide(uint32_t a, struct rte_reciprocal R)
> +rte_reciprocal_divide_u32(uint32_t a, struct rte_reciprocal_u32 *R)
>  {
> -	uint32_t t = (uint32_t)(((uint64_t)a * R.m) >> 32);
> +	uint32_t t = (((uint64_t)a * R->m) >> 32);
> 
> -	return (t + ((a - t) >> R.sh1)) >> R.sh2;
> +	return (t + ((a - t) >> R->sh1)) >> R->sh2;
>  }

Do not add _u32 suffix for existing API.

> 
> -struct rte_reciprocal
> -rte_reciprocal_value(uint32_t d);
> +static inline uint64_t
> +mullhi_u64(uint64_t x, uint64_t y)
> +{
> +#ifdef __SIZEOF_INT128__
> +	__uint128_t xl = x;
> +	__uint128_t rl = xl * y;
> +
> +	return (rl >> 64);
> +#else
> +	uint64_t u0, u1, v0, v1, k, t;
> +	uint64_t w1, w2;
> +	uint64_t whi;
> +
> +	u1 = x >> 32; u0 = x & 0xFFFFFFFF;
> +	v1 = y >> 32; v0 = y & 0xFFFFFFFF;
> +
> +	t = u0*v0;
> +	k = t >> 32;
> +
> +	t = u1*v0 + k;
> +	w1 = t & 0xFFFFFFFF;
> +	w2 = t >> 32;
> +
> +	t = u0*v1 + w1;
> +	k = t >> 32;
> +
> +	whi = u1*v1 + w2 + k;
> +
> +	return whi;
> +#endif
> +}
> +
> +/**
> + * Divide given unsigned 64-bit integer with pre calculated divisor.
> + *
> + * @param a
> + *   The 64-bit dividend.
> + * @param R
> + *   The pointer to pre calculated divisor reciprocal structure.
> + *
> + * @return
> + *   The result of the division
> + */
> +static inline uint64_t
> +rte_reciprocal_divide_u64(uint64_t a, struct rte_reciprocal_u64 *R)
> +{
> +	uint64_t q = mullhi_u64(R->m, a);
> +	uint64_t t = ((a - q) >> 1) + q;
> +
> +	return t >> R->sh1;
> +}
> +
> +/**
> + * Generate pre calculated divisor structure.
> + *
> + * @param d
> + *   The unsigned 32-bit divisor.
> + *
> + * @return
> + *   Divisor structure.
> + */
> +struct rte_reciprocal_u32
> +rte_reciprocal_value_u32(uint32_t d);
> +

Do not add _u32 suffix for existing API.

> +/**
> + * Generate pre calculated divisor structure.
> + *
> + * @param d
> + *   The unsigned 64-bit divisor.
> + *
> + * @return
> + *   Divisor structure.
> + */
> +struct rte_reciprocal_u64
> +rte_reciprocal_value_u64(uint64_t d);
> 
>  #endif /* _RTE_RECIPROCAL_H_ */
> diff --git a/lib/librte_eal/common/rte_reciprocal.c
> b/lib/librte_eal/common/rte_reciprocal.c
> index 7ab99b4..2024e62 100644
> --- a/lib/librte_eal/common/rte_reciprocal.c
> +++ b/lib/librte_eal/common/rte_reciprocal.c
> @@ -31,18 +31,13 @@
>   *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH
> DAMAGE.
>   */
> 
> -#include <stdio.h>
> -#include <stdint.h>
> -
> -#include <rte_common.h>
> -
> -#include "rte_reciprocal.h"
> +#include <rte_reciprocal.h>
> 
>  /* find largest set bit.
>   * portable and slow but does not matter for this usage.
>   */
>  static inline int
> -fls(uint32_t x)
> +fls_u32(uint32_t x)
>  {
>  	int b;
> 
> @@ -54,14 +49,14 @@ fls(uint32_t x)
>  	return 0;
>  }
> 

Do not add _u32 suffix for existing API.

> -struct rte_reciprocal
> -rte_reciprocal_value(uint32_t d)
> +struct rte_reciprocal_u32
> +rte_reciprocal_value_u32(uint32_t d)

Do not add _u32 suffix for existing API.

>  {
> -	struct rte_reciprocal R;
> +	struct rte_reciprocal_u32 R;
>  	uint64_t m;
>  	int l;
> 
> -	l = fls(d - 1);
> +	l = fls_u32(d - 1);
>  	m = ((1ULL << 32) * ((1ULL << l) - d));
>  	m /= d;
> 
> @@ -72,3 +67,102 @@ rte_reciprocal_value(uint32_t d)
> 
>  	return R;
>  }
> +
> +/* Code taken from Hacker's Delight:
> + * http://www.hackersdelight.org/HDcode/divlu.c.
> + * License permits inclusion here per:
> + * http://www.hackersdelight.org/permissions.htm
> + */
> +static inline uint64_t
> +divide_128_div_64_to_64(uint64_t u1, uint64_t u0, uint64_t v, uint64_t *r)
> +{
> +	const uint64_t b = (1ULL << 32); /* Number base (16 bits). */
> +	uint64_t un1, un0,           /* Norm. dividend LSD's. */
> +			 vn1, vn0,           /* Norm. divisor digits. */
> +			 q1, q0,             /* Quotient digits. */
> +			 un64, un21, un10,   /* Dividend digit pairs. */
> +			 rhat;               /* A remainder. */
> +	int s;                       /* Shift amount for norm. */
> +
> +    /* If overflow, set rem. to an impossible value. */
> +	if (u1 >= v) {
> +		if (r != NULL)
> +			*r = (uint64_t) -1;
> +		return (uint64_t) -1;
> +	}
> +
> +	/* Count leading zeros. */
> +	s = __builtin_clzll(v);
> +	if (s > 0) {
> +		v = v << s;
> +		un64 = (u1 << s) | ((u0 >> (64 - s)) & (-s >> 31));
> +		un10 = u0 << s;
> +	} else {
> +
> +		un64 = u1 | u0;
> +		un10 = u0;
> +	}
> +
> +	vn1 = v >> 32;
> +	vn0 = v & 0xFFFFFFFF;
> +
> +	un1 = un10 >> 32;
> +	un0 = un10 & 0xFFFFFFFF;
> +
> +	q1 = un64/vn1;
> +	rhat = un64 - q1*vn1;
> +again1:
> +	if (q1 >= b || q1*vn0 > b*rhat + un1) {
> +		q1 = q1 - 1;
> +		rhat = rhat + vn1;
> +		if (rhat < b)
> +			goto again1;
> +	}
> +
> +	un21 = un64*b + un1 - q1*v;
> +
> +	q0 = un21/vn1;
> +	rhat = un21 - q0*vn1;
> +again2:
> +	if (q0 >= b || q0*vn0 > b*rhat + un0) {
> +		q0 = q0 - 1;
> +		rhat = rhat + vn1;
> +		if (rhat < b)
> +			goto again2;
> +	}
> +
> +	if (r != NULL)
> +		*r = (un21*b + un0 - q0*v) >> s;
> +	return q1*b + q0;
> +}
> +
> +struct rte_reciprocal_u64
> +rte_reciprocal_value_u64(uint64_t d)
> +{
> +	struct rte_reciprocal_u64 R;
> +
> +	const uint32_t fld = 63 - __builtin_clzll(d);
> +
> +	if ((d & (d - 1)) == 0) {
> +		R.m = 0;
> +		R.sh1 = (fld - 1) | 0x40;
> +	} else {
> +		uint64_t rem;
> +		uint64_t multiplier;
> +		uint8_t more;
> +
> +		multiplier = divide_128_div_64_to_64(1ULL << fld, 0, d,
> &rem);
> +		multiplier += multiplier;
> +
> +		const uint64_t twice_rem = rem + rem;
> +		if (twice_rem >= d || twice_rem < rem)
> +			multiplier += 1;
> +		more = fld;
> +		R.m = 1 + multiplier;
> +		R.sh1 = more | 0x40;
> +	}
> +
> +	R.sh1 &= 0x3F;
> +
> +	return R;
> +}
> diff --git a/lib/librte_eal/linuxapp/eal/rte_eal_version.map
> b/lib/librte_eal/linuxapp/eal/rte_eal_version.map
> index 2070cba..2671627 100644
> --- a/lib/librte_eal/linuxapp/eal/rte_eal_version.map
> +++ b/lib/librte_eal/linuxapp/eal/rte_eal_version.map
> @@ -246,6 +246,7 @@ EXPERIMENTAL {
>  DPDK_17.11 {
>  	global:
> 
> -	rte_reciprocal_value;
> +	rte_reciprocal_value_u32;

Do not add _u32 suffix for existing API.

> +	rte_reciprocal_value_u64;
> 
>  } DPDK_17.08;
> diff --git a/lib/librte_sched/Makefile b/lib/librte_sched/Makefile
> index 569656b..a2fd6f3 100644
> --- a/lib/librte_sched/Makefile
> +++ b/lib/librte_sched/Makefile
> @@ -54,6 +54,8 @@ LIBABIVER := 1
>  SRCS-$(CONFIG_RTE_LIBRTE_SCHED) += rte_sched.c rte_red.c rte_approx.c
> 
>  # install includes
> -SYMLINK-$(CONFIG_RTE_LIBRTE_SCHED)-include := rte_sched.h
> rte_bitmap.h rte_sched_common.h rte_red.h rte_approx.h
> +SYMLINK-$(CONFIG_RTE_LIBRTE_SCHED)-include := rte_sched.h
> rte_bitmap.h
> +SYMLINK-$(CONFIG_RTE_LIBRTE_SCHED)-include += rte_sched_common.h
> rte_red.h
> +SYMLINK-$(CONFIG_RTE_LIBRTE_SCHED)-include += rte_approx.h
> 
>  include $(RTE_SDK)/mk/rte.lib.mk
> diff --git a/lib/librte_sched/rte_sched.c b/lib/librte_sched/rte_sched.c
> index 3b8ccaa..7bb6d51 100644
> --- a/lib/librte_sched/rte_sched.c
> +++ b/lib/librte_sched/rte_sched.c
> @@ -228,7 +228,7 @@ struct rte_sched_port {
>  	uint64_t time_cpu_cycles;     /* Current CPU time measured in CPU
> cyles */
>  	uint64_t time_cpu_bytes;      /* Current CPU time measured in bytes
> */
>  	uint64_t time;                /* Current NIC TX time measured in bytes */
> -	struct rte_reciprocal inv_cycles_per_byte; /* CPU cycles per byte */
> +	struct rte_reciprocal_u32 inv_cycles_per_byte; /* CPU cycles per
> byte */
> 
>  	/* Scheduling loop detection */
>  	uint32_t pipe_loop;
> @@ -677,7 +677,7 @@ rte_sched_port_config(struct
> rte_sched_port_params *params)
> 
>  	cycles_per_byte = (rte_get_tsc_hz() << RTE_SCHED_TIME_SHIFT)
>  		/ params->rate;
> -	port->inv_cycles_per_byte = rte_reciprocal_value(cycles_per_byte);
> +	port->inv_cycles_per_byte =
> rte_reciprocal_value_u32(cycles_per_byte);
> 

Do not add _u32 suffix for existing API.

>  	/* Scheduling loop detection */
>  	port->pipe_loop = RTE_SCHED_PIPE_INVALID;
> @@ -2147,8 +2147,9 @@ rte_sched_port_time_resync(struct
> rte_sched_port *port)
>  	uint64_t bytes_diff;
> 
>  	/* Compute elapsed time in bytes */
> -	bytes_diff = rte_reciprocal_divide(cycles_diff <<
> RTE_SCHED_TIME_SHIFT,
> -					   port->inv_cycles_per_byte);
> +	bytes_diff = rte_reciprocal_divide_u32(

Do not add _u32 suffix for existing API.

> +			cycles_diff << RTE_SCHED_TIME_SHIFT,
> +			&port->inv_cycles_per_byte);
> 
>  	/* Advance port time */
>  	port->time_cpu_cycles = cycles;
> --
> 2.7.4
  

Patch

diff --git a/lib/librte_eal/bsdapp/eal/rte_eal_version.map b/lib/librte_eal/bsdapp/eal/rte_eal_version.map
index 90d7258..59a85bb 100644
--- a/lib/librte_eal/bsdapp/eal/rte_eal_version.map
+++ b/lib/librte_eal/bsdapp/eal/rte_eal_version.map
@@ -241,6 +241,7 @@  EXPERIMENTAL {
 DPDK_17.11 {
 	global:
 
-	rte_reciprocal_value;
+	rte_reciprocal_value_u32;
+	rte_reciprocal_value_u64;
 
 } DPDK_17.08;
diff --git a/lib/librte_eal/common/include/rte_reciprocal.h b/lib/librte_eal/common/include/rte_reciprocal.h
index b6d752f..85599e6 100644
--- a/lib/librte_eal/common/include/rte_reciprocal.h
+++ b/lib/librte_eal/common/include/rte_reciprocal.h
@@ -22,22 +22,117 @@ 
 #ifndef _RTE_RECIPROCAL_H_
 #define _RTE_RECIPROCAL_H_
 
-#include <stdint.h>
+#include <rte_memory.h>
 
-struct rte_reciprocal {
+/**
+ * Unsigned 32-bit divisor structure.
+ */
+struct rte_reciprocal_u32 {
 	uint32_t m;
 	uint8_t sh1, sh2;
 };
 
+/**
+ * Unsigned 64-bit divisor structure.
+ */
+struct rte_reciprocal_u64 {
+	uint64_t m;
+	uint8_t sh1;
+};
+
+/**
+ * Divide given unsigned 32-bit integer with pre calculated divisor.
+ *
+ * @param a
+ *   The 32-bit dividend.
+ * @param R
+ *   The pointer to pre calculated divisor reciprocal structure.
+ *
+ * @return
+ *   The result of the division
+ */
 static inline uint32_t
-rte_reciprocal_divide(uint32_t a, struct rte_reciprocal R)
+rte_reciprocal_divide_u32(uint32_t a, struct rte_reciprocal_u32 *R)
 {
-	uint32_t t = (uint32_t)(((uint64_t)a * R.m) >> 32);
+	uint32_t t = (((uint64_t)a * R->m) >> 32);
 
-	return (t + ((a - t) >> R.sh1)) >> R.sh2;
+	return (t + ((a - t) >> R->sh1)) >> R->sh2;
 }
 
-struct rte_reciprocal
-rte_reciprocal_value(uint32_t d);
+static inline uint64_t
+mullhi_u64(uint64_t x, uint64_t y)
+{
+#ifdef __SIZEOF_INT128__
+	__uint128_t xl = x;
+	__uint128_t rl = xl * y;
+
+	return (rl >> 64);
+#else
+	uint64_t u0, u1, v0, v1, k, t;
+	uint64_t w1, w2;
+	uint64_t whi;
+
+	u1 = x >> 32; u0 = x & 0xFFFFFFFF;
+	v1 = y >> 32; v0 = y & 0xFFFFFFFF;
+
+	t = u0*v0;
+	k = t >> 32;
+
+	t = u1*v0 + k;
+	w1 = t & 0xFFFFFFFF;
+	w2 = t >> 32;
+
+	t = u0*v1 + w1;
+	k = t >> 32;
+
+	whi = u1*v1 + w2 + k;
+
+	return whi;
+#endif
+}
+
+/**
+ * Divide given unsigned 64-bit integer with pre calculated divisor.
+ *
+ * @param a
+ *   The 64-bit dividend.
+ * @param R
+ *   The pointer to pre calculated divisor reciprocal structure.
+ *
+ * @return
+ *   The result of the division
+ */
+static inline uint64_t
+rte_reciprocal_divide_u64(uint64_t a, struct rte_reciprocal_u64 *R)
+{
+	uint64_t q = mullhi_u64(R->m, a);
+	uint64_t t = ((a - q) >> 1) + q;
+
+	return t >> R->sh1;
+}
+
+/**
+ * Generate pre calculated divisor structure.
+ *
+ * @param d
+ *   The unsigned 32-bit divisor.
+ *
+ * @return
+ *   Divisor structure.
+ */
+struct rte_reciprocal_u32
+rte_reciprocal_value_u32(uint32_t d);
+
+/**
+ * Generate pre calculated divisor structure.
+ *
+ * @param d
+ *   The unsigned 64-bit divisor.
+ *
+ * @return
+ *   Divisor structure.
+ */
+struct rte_reciprocal_u64
+rte_reciprocal_value_u64(uint64_t d);
 
 #endif /* _RTE_RECIPROCAL_H_ */
diff --git a/lib/librte_eal/common/rte_reciprocal.c b/lib/librte_eal/common/rte_reciprocal.c
index 7ab99b4..2024e62 100644
--- a/lib/librte_eal/common/rte_reciprocal.c
+++ b/lib/librte_eal/common/rte_reciprocal.c
@@ -31,18 +31,13 @@ 
  *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  */
 
-#include <stdio.h>
-#include <stdint.h>
-
-#include <rte_common.h>
-
-#include "rte_reciprocal.h"
+#include <rte_reciprocal.h>
 
 /* find largest set bit.
  * portable and slow but does not matter for this usage.
  */
 static inline int
-fls(uint32_t x)
+fls_u32(uint32_t x)
 {
 	int b;
 
@@ -54,14 +49,14 @@  fls(uint32_t x)
 	return 0;
 }
 
-struct rte_reciprocal
-rte_reciprocal_value(uint32_t d)
+struct rte_reciprocal_u32
+rte_reciprocal_value_u32(uint32_t d)
 {
-	struct rte_reciprocal R;
+	struct rte_reciprocal_u32 R;
 	uint64_t m;
 	int l;
 
-	l = fls(d - 1);
+	l = fls_u32(d - 1);
 	m = ((1ULL << 32) * ((1ULL << l) - d));
 	m /= d;
 
@@ -72,3 +67,102 @@  rte_reciprocal_value(uint32_t d)
 
 	return R;
 }
+
+/* Code taken from Hacker's Delight:
+ * http://www.hackersdelight.org/HDcode/divlu.c.
+ * License permits inclusion here per:
+ * http://www.hackersdelight.org/permissions.htm
+ */
+static inline uint64_t
+divide_128_div_64_to_64(uint64_t u1, uint64_t u0, uint64_t v, uint64_t *r)
+{
+	const uint64_t b = (1ULL << 32); /* Number base (16 bits). */
+	uint64_t un1, un0,           /* Norm. dividend LSD's. */
+			 vn1, vn0,           /* Norm. divisor digits. */
+			 q1, q0,             /* Quotient digits. */
+			 un64, un21, un10,   /* Dividend digit pairs. */
+			 rhat;               /* A remainder. */
+	int s;                       /* Shift amount for norm. */
+
+    /* If overflow, set rem. to an impossible value. */
+	if (u1 >= v) {
+		if (r != NULL)
+			*r = (uint64_t) -1;
+		return (uint64_t) -1;
+	}
+
+	/* Count leading zeros. */
+	s = __builtin_clzll(v);
+	if (s > 0) {
+		v = v << s;
+		un64 = (u1 << s) | ((u0 >> (64 - s)) & (-s >> 31));
+		un10 = u0 << s;
+	} else {
+
+		un64 = u1 | u0;
+		un10 = u0;
+	}
+
+	vn1 = v >> 32;
+	vn0 = v & 0xFFFFFFFF;
+
+	un1 = un10 >> 32;
+	un0 = un10 & 0xFFFFFFFF;
+
+	q1 = un64/vn1;
+	rhat = un64 - q1*vn1;
+again1:
+	if (q1 >= b || q1*vn0 > b*rhat + un1) {
+		q1 = q1 - 1;
+		rhat = rhat + vn1;
+		if (rhat < b)
+			goto again1;
+	}
+
+	un21 = un64*b + un1 - q1*v;
+
+	q0 = un21/vn1;
+	rhat = un21 - q0*vn1;
+again2:
+	if (q0 >= b || q0*vn0 > b*rhat + un0) {
+		q0 = q0 - 1;
+		rhat = rhat + vn1;
+		if (rhat < b)
+			goto again2;
+	}
+
+	if (r != NULL)
+		*r = (un21*b + un0 - q0*v) >> s;
+	return q1*b + q0;
+}
+
+struct rte_reciprocal_u64
+rte_reciprocal_value_u64(uint64_t d)
+{
+	struct rte_reciprocal_u64 R;
+
+	const uint32_t fld = 63 - __builtin_clzll(d);
+
+	if ((d & (d - 1)) == 0) {
+		R.m = 0;
+		R.sh1 = (fld - 1) | 0x40;
+	} else {
+		uint64_t rem;
+		uint64_t multiplier;
+		uint8_t more;
+
+		multiplier = divide_128_div_64_to_64(1ULL << fld, 0, d, &rem);
+		multiplier += multiplier;
+
+		const uint64_t twice_rem = rem + rem;
+		if (twice_rem >= d || twice_rem < rem)
+			multiplier += 1;
+		more = fld;
+		R.m = 1 + multiplier;
+		R.sh1 = more | 0x40;
+	}
+
+	R.sh1 &= 0x3F;
+
+	return R;
+}
diff --git a/lib/librte_eal/linuxapp/eal/rte_eal_version.map b/lib/librte_eal/linuxapp/eal/rte_eal_version.map
index 2070cba..2671627 100644
--- a/lib/librte_eal/linuxapp/eal/rte_eal_version.map
+++ b/lib/librte_eal/linuxapp/eal/rte_eal_version.map
@@ -246,6 +246,7 @@  EXPERIMENTAL {
 DPDK_17.11 {
 	global:
 
-	rte_reciprocal_value;
+	rte_reciprocal_value_u32;
+	rte_reciprocal_value_u64;
 
 } DPDK_17.08;
diff --git a/lib/librte_sched/Makefile b/lib/librte_sched/Makefile
index 569656b..a2fd6f3 100644
--- a/lib/librte_sched/Makefile
+++ b/lib/librte_sched/Makefile
@@ -54,6 +54,8 @@  LIBABIVER := 1
 SRCS-$(CONFIG_RTE_LIBRTE_SCHED) += rte_sched.c rte_red.c rte_approx.c
 
 # install includes
-SYMLINK-$(CONFIG_RTE_LIBRTE_SCHED)-include := rte_sched.h rte_bitmap.h rte_sched_common.h rte_red.h rte_approx.h
+SYMLINK-$(CONFIG_RTE_LIBRTE_SCHED)-include := rte_sched.h rte_bitmap.h
+SYMLINK-$(CONFIG_RTE_LIBRTE_SCHED)-include += rte_sched_common.h rte_red.h
+SYMLINK-$(CONFIG_RTE_LIBRTE_SCHED)-include += rte_approx.h
 
 include $(RTE_SDK)/mk/rte.lib.mk
diff --git a/lib/librte_sched/rte_sched.c b/lib/librte_sched/rte_sched.c
index 3b8ccaa..7bb6d51 100644
--- a/lib/librte_sched/rte_sched.c
+++ b/lib/librte_sched/rte_sched.c
@@ -228,7 +228,7 @@  struct rte_sched_port {
 	uint64_t time_cpu_cycles;     /* Current CPU time measured in CPU cyles */
 	uint64_t time_cpu_bytes;      /* Current CPU time measured in bytes */
 	uint64_t time;                /* Current NIC TX time measured in bytes */
-	struct rte_reciprocal inv_cycles_per_byte; /* CPU cycles per byte */
+	struct rte_reciprocal_u32 inv_cycles_per_byte; /* CPU cycles per byte */
 
 	/* Scheduling loop detection */
 	uint32_t pipe_loop;
@@ -677,7 +677,7 @@  rte_sched_port_config(struct rte_sched_port_params *params)
 
 	cycles_per_byte = (rte_get_tsc_hz() << RTE_SCHED_TIME_SHIFT)
 		/ params->rate;
-	port->inv_cycles_per_byte = rte_reciprocal_value(cycles_per_byte);
+	port->inv_cycles_per_byte = rte_reciprocal_value_u32(cycles_per_byte);
 
 	/* Scheduling loop detection */
 	port->pipe_loop = RTE_SCHED_PIPE_INVALID;
@@ -2147,8 +2147,9 @@  rte_sched_port_time_resync(struct rte_sched_port *port)
 	uint64_t bytes_diff;
 
 	/* Compute elapsed time in bytes */
-	bytes_diff = rte_reciprocal_divide(cycles_diff << RTE_SCHED_TIME_SHIFT,
-					   port->inv_cycles_per_byte);
+	bytes_diff = rte_reciprocal_divide_u32(
+			cycles_diff << RTE_SCHED_TIME_SHIFT,
+			&port->inv_cycles_per_byte);
 
 	/* Advance port time */
 	port->time_cpu_cycles = cycles;