[dpdk-dev] [PATCH v3 18/18] lpm: choose vector path at runtime
Medvedkin, Vladimir
vladimir.medvedkin at intel.com
Wed Sep 30 15:54:30 CEST 2020
Hi Ciara,
On 30/09/2020 14:04, Ciara Power wrote:
> When choosing the vector path, max SIMD bitwidth is now checked to
> ensure a vector path is allowable. To do this, rather than the vector
> lookup functions being called directly from apps, a generic lookup
> function is called which will call the vector functions if suitable.
>
> Signed-off-by: Ciara Power <ciara.power at intel.com>
> ---
> lib/librte_lpm/rte_lpm.h | 57 ++++++++++++++++++++++++++------
> lib/librte_lpm/rte_lpm_altivec.h | 2 +-
> lib/librte_lpm/rte_lpm_neon.h | 2 +-
> lib/librte_lpm/rte_lpm_sse.h | 2 +-
> 4 files changed, 50 insertions(+), 13 deletions(-)
>
> diff --git a/lib/librte_lpm/rte_lpm.h b/lib/librte_lpm/rte_lpm.h
> index 03da2d37e0..edba7cafd5 100644
> --- a/lib/librte_lpm/rte_lpm.h
> +++ b/lib/librte_lpm/rte_lpm.h
> @@ -397,8 +397,18 @@ rte_lpm_lookup_bulk_func(const struct rte_lpm *lpm, const uint32_t *ips,
> /* Mask four results. */
> #define RTE_LPM_MASKX4_RES UINT64_C(0x00ffffff00ffffff)
>
> +#if defined(RTE_ARCH_ARM) || defined(RTE_ARCH_ARM64)
> +#include "rte_lpm_neon.h"
> +#elif defined(RTE_ARCH_PPC_64)
> +#include "rte_lpm_altivec.h"
> +#else
> +#include "rte_lpm_sse.h"
> +#endif
> +
> /**
> - * Lookup four IP addresses in an LPM table.
> + * Lookup four IP addresses in an LPM table individually by calling the
> + * lookup function for each ip. This is used when lookupx4 is called but
> + * the vector path is not suitable.
> *
> * @param lpm
> * LPM object handle
> @@ -417,16 +427,43 @@ rte_lpm_lookup_bulk_func(const struct rte_lpm *lpm, const uint32_t *ips,
> * if lookup would fail.
> */
> static inline void
> -rte_lpm_lookupx4(const struct rte_lpm *lpm, xmm_t ip, uint32_t hop[4],
> - uint32_t defv);
> +rte_lpm_lookupx4_scalar(struct rte_lpm *lpm, xmm_t ip, uint32_t hop[4],
> + uint32_t defv)
> +{
> + int i;
> + for (i = 0; i < 4; i++)
> + if (rte_lpm_lookup(lpm, ((uint32_t *) &ip)[i], &hop[i]) < 0)
> + hop[i] = defv; /* lookupx4 expected to set on failure */
> +}
>
> -#if defined(RTE_ARCH_ARM) || defined(RTE_ARCH_ARM64)
> -#include "rte_lpm_neon.h"
> -#elif defined(RTE_ARCH_PPC_64)
> -#include "rte_lpm_altivec.h"
> -#else
> -#include "rte_lpm_sse.h"
> -#endif
> +/**
> + * Lookup four IP addresses in an LPM table.
> + *
> + * @param lpm
> + * LPM object handle
> + * @param ip
> + * Four IPs to be looked up in the LPM table
> + * @param hop
> + * Next hop of the most specific rule found for IP (valid on lookup hit only).
> + * This is an 4 elements array of two byte values.
> + * If the lookup was successful for the given IP, then least significant byte
> + * of the corresponding element is the actual next hop and the most
> + * significant byte is zero.
> + * If the lookup for the given IP failed, then corresponding element would
> + * contain default value, see description of then next parameter.
> + * @param defv
> + * Default value to populate into corresponding element of hop[] array,
> + * if lookup would fail.
> + */
> +static inline void
> +rte_lpm_lookupx4(struct rte_lpm *lpm, xmm_t ip, uint32_t hop[4],
> + uint32_t defv)
> +{
> + if (rte_get_max_simd_bitwidth() >= RTE_MAX_128_SIMD)
> + rte_lpm_lookupx4_vec(lpm, ip, hop, defv);
> + else
> + rte_lpm_lookupx4_scalar(lpm, ip, hop, defv);
> +}
I'm afraid this will lead to a drop in performance. rte_lpm_lookupx4 is
used in the hot path, and a bulk size is too small to amortize the cost
of adding this extra logic.
>
> #ifdef __cplusplus
> }
> diff --git a/lib/librte_lpm/rte_lpm_altivec.h b/lib/librte_lpm/rte_lpm_altivec.h
> index 228c41b38e..82142d3351 100644
> --- a/lib/librte_lpm/rte_lpm_altivec.h
> +++ b/lib/librte_lpm/rte_lpm_altivec.h
> @@ -16,7 +16,7 @@ extern "C" {
> #endif
>
> static inline void
> -rte_lpm_lookupx4(const struct rte_lpm *lpm, xmm_t ip, uint32_t hop[4],
> +rte_lpm_lookupx4_vec(const struct rte_lpm *lpm, xmm_t ip, uint32_t hop[4],
> uint32_t defv)
> {
> vector signed int i24;
> diff --git a/lib/librte_lpm/rte_lpm_neon.h b/lib/librte_lpm/rte_lpm_neon.h
> index 6c131d3125..14b184515d 100644
> --- a/lib/librte_lpm/rte_lpm_neon.h
> +++ b/lib/librte_lpm/rte_lpm_neon.h
> @@ -16,7 +16,7 @@ extern "C" {
> #endif
>
> static inline void
> -rte_lpm_lookupx4(const struct rte_lpm *lpm, xmm_t ip, uint32_t hop[4],
> +rte_lpm_lookupx4_vec(const struct rte_lpm *lpm, xmm_t ip, uint32_t hop[4],
> uint32_t defv)
> {
> uint32x4_t i24;
> diff --git a/lib/librte_lpm/rte_lpm_sse.h b/lib/librte_lpm/rte_lpm_sse.h
> index 44770b6ff8..cb5477c6cf 100644
> --- a/lib/librte_lpm/rte_lpm_sse.h
> +++ b/lib/librte_lpm/rte_lpm_sse.h
> @@ -15,7 +15,7 @@ extern "C" {
> #endif
>
> static inline void
> -rte_lpm_lookupx4(const struct rte_lpm *lpm, xmm_t ip, uint32_t hop[4],
> +rte_lpm_lookupx4_vec(const struct rte_lpm *lpm, xmm_t ip, uint32_t hop[4],
> uint32_t defv)
> {
> __m128i i24;
>
--
Regards,
Vladimir
More information about the dev
mailing list