[dpdk-dev] [PATCH v3 18/18] lpm: choose vector path at runtime

Medvedkin, Vladimir vladimir.medvedkin at intel.com
Wed Sep 30 15:54:30 CEST 2020


Hi Ciara,


On 30/09/2020 14:04, Ciara Power wrote:
> When choosing the vector path, max SIMD bitwidth is now checked to
> ensure a vector path is allowable. To do this, rather than the vector
> lookup functions being called directly from apps, a generic lookup
> function is called which will call the vector functions if suitable.
> 
> Signed-off-by: Ciara Power <ciara.power at intel.com>
> ---
>   lib/librte_lpm/rte_lpm.h         | 57 ++++++++++++++++++++++++++------
>   lib/librte_lpm/rte_lpm_altivec.h |  2 +-
>   lib/librte_lpm/rte_lpm_neon.h    |  2 +-
>   lib/librte_lpm/rte_lpm_sse.h     |  2 +-
>   4 files changed, 50 insertions(+), 13 deletions(-)
> 
> diff --git a/lib/librte_lpm/rte_lpm.h b/lib/librte_lpm/rte_lpm.h
> index 03da2d37e0..edba7cafd5 100644
> --- a/lib/librte_lpm/rte_lpm.h
> +++ b/lib/librte_lpm/rte_lpm.h
> @@ -397,8 +397,18 @@ rte_lpm_lookup_bulk_func(const struct rte_lpm *lpm, const uint32_t *ips,
>   /* Mask four results. */
>   #define	 RTE_LPM_MASKX4_RES	UINT64_C(0x00ffffff00ffffff)
>   
> +#if defined(RTE_ARCH_ARM) || defined(RTE_ARCH_ARM64)
> +#include "rte_lpm_neon.h"
> +#elif defined(RTE_ARCH_PPC_64)
> +#include "rte_lpm_altivec.h"
> +#else
> +#include "rte_lpm_sse.h"
> +#endif
> +
>   /**
> - * Lookup four IP addresses in an LPM table.
> + * Lookup four IP addresses in an LPM table individually by calling the
> + * lookup function for each ip. This is used when lookupx4 is called but
> + * the vector path is not suitable.
>    *
>    * @param lpm
>    *   LPM object handle
> @@ -417,16 +427,43 @@ rte_lpm_lookup_bulk_func(const struct rte_lpm *lpm, const uint32_t *ips,
>    *   if lookup would fail.
>    */
>   static inline void
> -rte_lpm_lookupx4(const struct rte_lpm *lpm, xmm_t ip, uint32_t hop[4],
> -	uint32_t defv);
> +rte_lpm_lookupx4_scalar(struct rte_lpm *lpm, xmm_t ip, uint32_t hop[4],
> +	uint32_t defv)
> +{
> +	int i;
> +	for (i = 0; i < 4; i++)
> +		if (rte_lpm_lookup(lpm, ((uint32_t *) &ip)[i], &hop[i]) < 0)
> +			hop[i] = defv; /* lookupx4 expected to set on failure */
> +}
>   
> -#if defined(RTE_ARCH_ARM) || defined(RTE_ARCH_ARM64)
> -#include "rte_lpm_neon.h"
> -#elif defined(RTE_ARCH_PPC_64)
> -#include "rte_lpm_altivec.h"
> -#else
> -#include "rte_lpm_sse.h"
> -#endif
> +/**
> + * Lookup four IP addresses in an LPM table.
> + *
> + * @param lpm
> + *   LPM object handle
> + * @param ip
> + *   Four IPs to be looked up in the LPM table
> + * @param hop
> + *   Next hop of the most specific rule found for IP (valid on lookup hit only).
> + *   This is an 4 elements array of two byte values.
> + *   If the lookup was successful for the given IP, then least significant byte
> + *   of the corresponding element is the  actual next hop and the most
> + *   significant byte is zero.
> + *   If the lookup for the given IP failed, then corresponding element would
> + *   contain default value, see description of then next parameter.
> + * @param defv
> + *   Default value to populate into corresponding element of hop[] array,
> + *   if lookup would fail.
> + */
> +static inline void
> +rte_lpm_lookupx4(struct rte_lpm *lpm, xmm_t ip, uint32_t hop[4],
> +	uint32_t defv)
> +{
> +	if (rte_get_max_simd_bitwidth() >= RTE_MAX_128_SIMD)
> +		rte_lpm_lookupx4_vec(lpm, ip, hop, defv);
> +	else
> +		rte_lpm_lookupx4_scalar(lpm, ip, hop, defv);
> +}

I'm afraid this will lead to a drop in performance. rte_lpm_lookupx4 is 
used in the hot path, and a bulk size is too small to amortize the cost 
of adding this extra logic.

>   
>   #ifdef __cplusplus
>   }
> diff --git a/lib/librte_lpm/rte_lpm_altivec.h b/lib/librte_lpm/rte_lpm_altivec.h
> index 228c41b38e..82142d3351 100644
> --- a/lib/librte_lpm/rte_lpm_altivec.h
> +++ b/lib/librte_lpm/rte_lpm_altivec.h
> @@ -16,7 +16,7 @@ extern "C" {
>   #endif
>   
>   static inline void
> -rte_lpm_lookupx4(const struct rte_lpm *lpm, xmm_t ip, uint32_t hop[4],
> +rte_lpm_lookupx4_vec(const struct rte_lpm *lpm, xmm_t ip, uint32_t hop[4],
>   	uint32_t defv)
>   {
>   	vector signed int i24;
> diff --git a/lib/librte_lpm/rte_lpm_neon.h b/lib/librte_lpm/rte_lpm_neon.h
> index 6c131d3125..14b184515d 100644
> --- a/lib/librte_lpm/rte_lpm_neon.h
> +++ b/lib/librte_lpm/rte_lpm_neon.h
> @@ -16,7 +16,7 @@ extern "C" {
>   #endif
>   
>   static inline void
> -rte_lpm_lookupx4(const struct rte_lpm *lpm, xmm_t ip, uint32_t hop[4],
> +rte_lpm_lookupx4_vec(const struct rte_lpm *lpm, xmm_t ip, uint32_t hop[4],
>   	uint32_t defv)
>   {
>   	uint32x4_t i24;
> diff --git a/lib/librte_lpm/rte_lpm_sse.h b/lib/librte_lpm/rte_lpm_sse.h
> index 44770b6ff8..cb5477c6cf 100644
> --- a/lib/librte_lpm/rte_lpm_sse.h
> +++ b/lib/librte_lpm/rte_lpm_sse.h
> @@ -15,7 +15,7 @@ extern "C" {
>   #endif
>   
>   static inline void
> -rte_lpm_lookupx4(const struct rte_lpm *lpm, xmm_t ip, uint32_t hop[4],
> +rte_lpm_lookupx4_vec(const struct rte_lpm *lpm, xmm_t ip, uint32_t hop[4],
>   	uint32_t defv)
>   {
>   	__m128i i24;
> 

-- 
Regards,
Vladimir


More information about the dev mailing list