[dpdk-dev] [PATCH v2 15/16] lpm/arm: implement rte_lpm_lookupx4 using rte_lpm_lookup_bulk on for-x86
Ananyev, Konstantin
konstantin.ananyev at intel.com
Tue Oct 27 16:31:44 CET 2015
Hi Jan,
> -----Original Message-----
> From: dev [mailto:dev-bounces at dpdk.org] On Behalf Of Jan Viktorin
> Sent: Monday, October 26, 2015 4:38 PM
> To: Thomas Monjalon; Hunt, David; dev at dpdk.org
> Cc: Vlastimil Kosar
> Subject: [dpdk-dev] [PATCH v2 15/16] lpm/arm: implement rte_lpm_lookupx4 using rte_lpm_lookup_bulk on for-x86
>
> From: Vlastimil Kosar <kosar at rehivetech.com>
>
> LPM function rte_lpm_lookupx4() uses i686/x86_64 SIMD intrinsics. Therefore,
> the function is reimplemented using non-vector operations for non-x86
> architectures. In the future, each architecture should have vectorized code.
> This patch includes rudimentary emulation of intrinsic functions _mm_set_epi32(),
> _mm_loadu_si128() and _mm_load_si128() for easy portability of existing
> applications.
>
> LPM builds now when on ARM.
>
> FIXME: to be reworked
>
> Signed-off-by: Vlastimil Kosar <kosar at rehivetech.com>
> Signed-off-by: Jan Viktorin <viktorin at rehivetech.com>
> ---
> config/defconfig_arm-armv7-a-linuxapp-gcc | 1 -
> lib/librte_lpm/rte_lpm.h | 71 +++++++++++++++++++++++++++++++
> 2 files changed, 71 insertions(+), 1 deletion(-)
>
> diff --git a/config/defconfig_arm-armv7-a-linuxapp-gcc b/config/defconfig_arm-armv7-a-linuxapp-gcc
> index 5b582a8..33afb33 100644
> --- a/config/defconfig_arm-armv7-a-linuxapp-gcc
> +++ b/config/defconfig_arm-armv7-a-linuxapp-gcc
> @@ -58,7 +58,6 @@ CONFIG_XMM_SIZE=16
>
> # fails to compile on ARM
> CONFIG_RTE_LIBRTE_ACL=n
> -CONFIG_RTE_LIBRTE_LPM=n
>
> # cannot use those on ARM
> CONFIG_RTE_KNI_KMOD=n
> diff --git a/lib/librte_lpm/rte_lpm.h b/lib/librte_lpm/rte_lpm.h
> index c299ce2..4619992 100644
> --- a/lib/librte_lpm/rte_lpm.h
> +++ b/lib/librte_lpm/rte_lpm.h
> @@ -47,7 +47,9 @@
> #include <rte_byteorder.h>
> #include <rte_memory.h>
> #include <rte_common.h>
> +#if defined(RTE_ARCH_X86_64) || defined(RTE_ARCH_I686)
> #include <rte_vect.h>
> +#endif
>
> #ifdef __cplusplus
> extern "C" {
> @@ -358,6 +360,7 @@ rte_lpm_lookup_bulk_func(const struct rte_lpm *lpm, const uint32_t * ips,
> return 0;
> }
>
> +#if defined(RTE_ARCH_X86_64) || defined(RTE_ARCH_I686)
> /* Mask four results. */
> #define RTE_LPM_MASKX4_RES UINT64_C(0x00ff00ff00ff00ff)
>
> @@ -472,6 +475,74 @@ rte_lpm_lookupx4(const struct rte_lpm *lpm, __m128i ip, uint16_t hop[4],
> hop[2] = (tbl[2] & RTE_LPM_LOOKUP_SUCCESS) ? (uint8_t)tbl[2] : defv;
> hop[3] = (tbl[3] & RTE_LPM_LOOKUP_SUCCESS) ? (uint8_t)tbl[3] : defv;
> }
> +#else
Probably better to create an lib/librte_eal/common/include/arch/arm/rte_vect.h,
and move all these x86 vector support emulation there?
Konstantin
> +// TODO: this code should be reworked.
> +
> +typedef struct {
> + union uint128 {
> + uint8_t uint8[16];
> + uint32_t uint32[4];
> + } val;
> +} __m128i;
> +
> +static inline __m128i
> +_mm_set_epi32(uint32_t v0, uint32_t v1, uint32_t v2, uint32_t v3)
> +{
> + __m128i res;
> + res.val.uint32[0] = v0;
> + res.val.uint32[1] = v1;
> + res.val.uint32[2] = v2;
> + res.val.uint32[3] = v3;
> + return res;
> +}
> +
> +static inline __m128i
> +_mm_loadu_si128(__m128i * v)
> +{
> + __m128i res;
> + res = *v;
> + return res;
> +}
> +
> +static inline __m128i
> +_mm_load_si128(__m128i * v)
> +{
> + __m128i res;
> + res = *v;
> + return res;
> +}
> +
> +/**
> + * Lookup four IP addresses in an LPM table.
> + *
> + * @param lpm
> + * LPM object handle
> + * @param ip
> + * Four IPs to be looked up in the LPM table
> + * @param hop
> + * Next hop of the most specific rule found for IP (valid on lookup hit only).
> + * This is an 4 elements array of two byte values.
> + * If the lookup was succesfull for the given IP, then least significant byte
> + * of the corresponding element is the actual next hop and the most
> + * significant byte is zero.
> + * If the lookup for the given IP failed, then corresponding element would
> + * contain default value, see description of then next parameter.
> + * @param defv
> + * Default value to populate into corresponding element of hop[] array,
> + * if lookup would fail.
> + */
> +static inline void
> +rte_lpm_lookupx4(const struct rte_lpm *lpm, __m128i ip, uint16_t hop[4],
> + uint16_t defv)
> +{
> + rte_lpm_lookup_bulk(lpm, ip.val.uint32, hop, 4);
> +
> + hop[0] = (hop[0] & RTE_LPM_LOOKUP_SUCCESS) ? (uint8_t)hop[0] : defv;
> + hop[1] = (hop[1] & RTE_LPM_LOOKUP_SUCCESS) ? (uint8_t)hop[1] : defv;
> + hop[2] = (hop[2] & RTE_LPM_LOOKUP_SUCCESS) ? (uint8_t)hop[2] : defv;
> + hop[3] = (hop[3] & RTE_LPM_LOOKUP_SUCCESS) ? (uint8_t)hop[3] : defv;
> +}
> +#endif
>
> #ifdef __cplusplus
> }
> --
> 2.6.1
More information about the dev
mailing list