[v5,17/17] node: choose vector path at runtime
Checks
Commit Message
When choosing the vector path, max SIMD bitwidth is now checked to
ensure the vector path is suitable. To do this, rather than the
scalar/vector lookup functions being called directly from the apps, a
generic function is called which will then call the scalar or vector
lookup function.
Cc: Nithin Dabilpuram <ndabilpuram@marvell.com>
Cc: Pavan Nikhilesh <pbhagavatula@marvell.com>
Cc: Jerin Jacob <jerinj@marvell.com>
Cc: Kiran Kumar K <kirankumark@marvell.com>
Signed-off-by: Ciara Power <ciara.power@intel.com>
---
lib/librte_node/ip4_lookup.c | 13 +++++++++++--
lib/librte_node/ip4_lookup_neon.h | 2 +-
lib/librte_node/ip4_lookup_sse.h | 2 +-
3 files changed, 13 insertions(+), 4 deletions(-)
Comments
> When choosing the vector path, max SIMD bitwidth is now checked to
> ensure the vector path is suitable. To do this, rather than the
> scalar/vector lookup functions being called directly from the apps, a
> generic function is called which will then call the scalar or vector
> lookup function.
>
> Cc: Nithin Dabilpuram <ndabilpuram@marvell.com>
> Cc: Pavan Nikhilesh <pbhagavatula@marvell.com>
> Cc: Jerin Jacob <jerinj@marvell.com>
> Cc: Kiran Kumar K <kirankumark@marvell.com>
>
> Signed-off-by: Ciara Power <ciara.power@intel.com>
> ---
> lib/librte_node/ip4_lookup.c | 13 +++++++++++--
> lib/librte_node/ip4_lookup_neon.h | 2 +-
> lib/librte_node/ip4_lookup_sse.h | 2 +-
> 3 files changed, 13 insertions(+), 4 deletions(-)
>
> diff --git a/lib/librte_node/ip4_lookup.c b/lib/librte_node/ip4_lookup.c
> index 293c77f39e..b3edbc1f4d 100644
> --- a/lib/librte_node/ip4_lookup.c
> +++ b/lib/librte_node/ip4_lookup.c
> @@ -34,10 +34,10 @@ static struct ip4_lookup_node_main ip4_lookup_nm;
> #include "ip4_lookup_neon.h"
> #elif defined(RTE_ARCH_X86)
> #include "ip4_lookup_sse.h"
> -#else
> +#endif
>
> static uint16_t
> -ip4_lookup_node_process(struct rte_graph *graph, struct rte_node *node,
> +ip4_lookup_node_process_scalar(struct rte_graph *graph, struct rte_node *node,
> void **objs, uint16_t nb_objs)
> {
> struct rte_ipv4_hdr *ipv4_hdr;
> @@ -109,7 +109,16 @@ ip4_lookup_node_process(struct rte_graph *graph, struct rte_node *node,
> return nb_objs;
> }
>
> +static uint16_t
> +ip4_lookup_node_process(struct rte_graph *graph, struct rte_node *node,
> + void **objs, uint16_t nb_objs)
> +{
> +#if defined(RTE_MACHINE_CPUFLAG_NEON) || defined(RTE_ARCH_X86)
> + if (rte_get_max_simd_bitwidth() >= RTE_SIMD_128)
> + return ip4_lookup_node_process_vec(graph, node, objs, nb_objs);
> #endif
> + return ip4_lookup_node_process_scalar(graph, node, objs, nb_objs);
> +}
Just wonder can't this selection be done once at ip4_lookup_node_init()?
>
> int
> rte_node_ip4_route_add(uint32_t ip, uint8_t depth, uint16_t next_hop,
> diff --git a/lib/librte_node/ip4_lookup_neon.h b/lib/librte_node/ip4_lookup_neon.h
> index 5e5a7d87be..0ad2763b82 100644
> --- a/lib/librte_node/ip4_lookup_neon.h
> +++ b/lib/librte_node/ip4_lookup_neon.h
> @@ -7,7 +7,7 @@
>
> /* ARM64 NEON */
> static uint16_t
> -ip4_lookup_node_process(struct rte_graph *graph, struct rte_node *node,
> +ip4_lookup_node_process_vec(struct rte_graph *graph, struct rte_node *node,
> void **objs, uint16_t nb_objs)
> {
> struct rte_mbuf *mbuf0, *mbuf1, *mbuf2, *mbuf3, **pkts;
> diff --git a/lib/librte_node/ip4_lookup_sse.h b/lib/librte_node/ip4_lookup_sse.h
> index a071cc5919..264c986071 100644
> --- a/lib/librte_node/ip4_lookup_sse.h
> +++ b/lib/librte_node/ip4_lookup_sse.h
> @@ -7,7 +7,7 @@
>
> /* X86 SSE */
> static uint16_t
> -ip4_lookup_node_process(struct rte_graph *graph, struct rte_node *node,
> +ip4_lookup_node_process_vec(struct rte_graph *graph, struct rte_node *node,
> void **objs, uint16_t nb_objs)
> {
> struct rte_mbuf *mbuf0, *mbuf1, *mbuf2, *mbuf3, **pkts;
> --
> 2.22.0
> -----Original Message-----
> From: Ciara Power <ciara.power@intel.com>
> Sent: Tuesday, October 13, 2020 7:05 PM
> To: dev@dpdk.org
> Cc: viktorin@rehivetech.com; Ruifeng Wang <Ruifeng.Wang@arm.com>;
> jerinj@marvell.com; drc@linux.vnet.ibm.com; bruce.richardson@intel.com;
> konstantin.ananyev@intel.com; Ciara Power <ciara.power@intel.com>;
> Nithin Dabilpuram <ndabilpuram@marvell.com>; Pavan Nikhilesh
> <pbhagavatula@marvell.com>; Kiran Kumar K <kirankumark@marvell.com>
> Subject: [PATCH v5 17/17] node: choose vector path at runtime
>
> When choosing the vector path, max SIMD bitwidth is now checked to
> ensure the vector path is suitable. To do this, rather than the scalar/vector
> lookup functions being called directly from the apps, a generic function is
> called which will then call the scalar or vector lookup function.
>
> Cc: Nithin Dabilpuram <ndabilpuram@marvell.com>
> Cc: Pavan Nikhilesh <pbhagavatula@marvell.com>
> Cc: Jerin Jacob <jerinj@marvell.com>
> Cc: Kiran Kumar K <kirankumark@marvell.com>
>
> Signed-off-by: Ciara Power <ciara.power@intel.com>
> ---
> lib/librte_node/ip4_lookup.c | 13 +++++++++++--
> lib/librte_node/ip4_lookup_neon.h | 2 +-
> lib/librte_node/ip4_lookup_sse.h | 2 +-
> 3 files changed, 13 insertions(+), 4 deletions(-)
>
> diff --git a/lib/librte_node/ip4_lookup.c b/lib/librte_node/ip4_lookup.c
> index 293c77f39e..b3edbc1f4d 100644
> --- a/lib/librte_node/ip4_lookup.c
> +++ b/lib/librte_node/ip4_lookup.c
> @@ -34,10 +34,10 @@ static struct ip4_lookup_node_main ip4_lookup_nm;
> #include "ip4_lookup_neon.h"
> #elif defined(RTE_ARCH_X86)
> #include "ip4_lookup_sse.h"
> -#else
> +#endif
>
> static uint16_t
> -ip4_lookup_node_process(struct rte_graph *graph, struct rte_node *node,
> +ip4_lookup_node_process_scalar(struct rte_graph *graph, struct rte_node
> +*node,
> void **objs, uint16_t nb_objs)
> {
> struct rte_ipv4_hdr *ipv4_hdr;
> @@ -109,7 +109,16 @@ ip4_lookup_node_process(struct rte_graph *graph,
> struct rte_node *node,
> return nb_objs;
> }
>
> +static uint16_t
> +ip4_lookup_node_process(struct rte_graph *graph, struct rte_node *node,
> + void **objs, uint16_t nb_objs)
> +{
> +#if defined(RTE_MACHINE_CPUFLAG_NEON) || defined(RTE_ARCH_X86)
Machine flags has been removed recently. RTE_MACHINE_CPUFLAG_NEON is replaced by __ARM_NEON.
Looks like the patch set needs a rebase.
> + if (rte_get_max_simd_bitwidth() >= RTE_SIMD_128)
> + return ip4_lookup_node_process_vec(graph, node, objs,
> nb_objs);
> #endif
> + return ip4_lookup_node_process_scalar(graph, node, objs,
> nb_objs); }
>
> int
> rte_node_ip4_route_add(uint32_t ip, uint8_t depth, uint16_t next_hop, diff
> --git a/lib/librte_node/ip4_lookup_neon.h
> b/lib/librte_node/ip4_lookup_neon.h
> index 5e5a7d87be..0ad2763b82 100644
> --- a/lib/librte_node/ip4_lookup_neon.h
> +++ b/lib/librte_node/ip4_lookup_neon.h
> @@ -7,7 +7,7 @@
>
> /* ARM64 NEON */
> static uint16_t
> -ip4_lookup_node_process(struct rte_graph *graph, struct rte_node *node,
> +ip4_lookup_node_process_vec(struct rte_graph *graph, struct rte_node
> +*node,
> void **objs, uint16_t nb_objs)
> {
> struct rte_mbuf *mbuf0, *mbuf1, *mbuf2, *mbuf3, **pkts; diff --git
> a/lib/librte_node/ip4_lookup_sse.h b/lib/librte_node/ip4_lookup_sse.h
> index a071cc5919..264c986071 100644
> --- a/lib/librte_node/ip4_lookup_sse.h
> +++ b/lib/librte_node/ip4_lookup_sse.h
> @@ -7,7 +7,7 @@
>
> /* X86 SSE */
> static uint16_t
> -ip4_lookup_node_process(struct rte_graph *graph, struct rte_node *node,
> +ip4_lookup_node_process_vec(struct rte_graph *graph, struct rte_node
> +*node,
> void **objs, uint16_t nb_objs)
> {
> struct rte_mbuf *mbuf0, *mbuf1, *mbuf2, *mbuf3, **pkts;
> --
> 2.22.0
On Tue, Oct 13, 2020 at 7:12 PM Ananyev, Konstantin
<konstantin.ananyev@intel.com> wrote:
>
> > When choosing the vector path, max SIMD bitwidth is now checked to
> > ensure the vector path is suitable. To do this, rather than the
> > scalar/vector lookup functions being called directly from the apps, a
> > generic function is called which will then call the scalar or vector
> > lookup function.
> >
> > Cc: Nithin Dabilpuram <ndabilpuram@marvell.com>
> > Cc: Pavan Nikhilesh <pbhagavatula@marvell.com>
> > Cc: Jerin Jacob <jerinj@marvell.com>
> > Cc: Kiran Kumar K <kirankumark@marvell.com>
> >
> > Signed-off-by: Ciara Power <ciara.power@intel.com>
> > ---
> > lib/librte_node/ip4_lookup.c | 13 +++++++++++--
> > lib/librte_node/ip4_lookup_neon.h | 2 +-
> > lib/librte_node/ip4_lookup_sse.h | 2 +-
> > 3 files changed, 13 insertions(+), 4 deletions(-)
> >
> > +static uint16_t
> > +ip4_lookup_node_process(struct rte_graph *graph, struct rte_node *node,
> > + void **objs, uint16_t nb_objs)
> > +{
> > +#if defined(RTE_MACHINE_CPUFLAG_NEON) || defined(RTE_ARCH_X86)
> > + if (rte_get_max_simd_bitwidth() >= RTE_SIMD_128)
> > + return ip4_lookup_node_process_vec(graph, node, objs, nb_objs);
> > #endif
> > + return ip4_lookup_node_process_scalar(graph, node, objs, nb_objs);
> > +}
>
> Just wonder can't this selection be done once at ip4_lookup_node_init()?
Yes. It is better to move ..process() selection in ip4_lookup_node_init()
@@ -34,10 +34,10 @@ static struct ip4_lookup_node_main ip4_lookup_nm;
#include "ip4_lookup_neon.h"
#elif defined(RTE_ARCH_X86)
#include "ip4_lookup_sse.h"
-#else
+#endif
static uint16_t
-ip4_lookup_node_process(struct rte_graph *graph, struct rte_node *node,
+ip4_lookup_node_process_scalar(struct rte_graph *graph, struct rte_node *node,
void **objs, uint16_t nb_objs)
{
struct rte_ipv4_hdr *ipv4_hdr;
@@ -109,7 +109,16 @@ ip4_lookup_node_process(struct rte_graph *graph, struct rte_node *node,
return nb_objs;
}
+static uint16_t
+ip4_lookup_node_process(struct rte_graph *graph, struct rte_node *node,
+ void **objs, uint16_t nb_objs)
+{
+#if defined(RTE_MACHINE_CPUFLAG_NEON) || defined(RTE_ARCH_X86)
+ if (rte_get_max_simd_bitwidth() >= RTE_SIMD_128)
+ return ip4_lookup_node_process_vec(graph, node, objs, nb_objs);
#endif
+ return ip4_lookup_node_process_scalar(graph, node, objs, nb_objs);
+}
int
rte_node_ip4_route_add(uint32_t ip, uint8_t depth, uint16_t next_hop,
@@ -7,7 +7,7 @@
/* ARM64 NEON */
static uint16_t
-ip4_lookup_node_process(struct rte_graph *graph, struct rte_node *node,
+ip4_lookup_node_process_vec(struct rte_graph *graph, struct rte_node *node,
void **objs, uint16_t nb_objs)
{
struct rte_mbuf *mbuf0, *mbuf1, *mbuf2, *mbuf3, **pkts;
@@ -7,7 +7,7 @@
/* X86 SSE */
static uint16_t
-ip4_lookup_node_process(struct rte_graph *graph, struct rte_node *node,
+ip4_lookup_node_process_vec(struct rte_graph *graph, struct rte_node *node,
void **objs, uint16_t nb_objs)
{
struct rte_mbuf *mbuf0, *mbuf1, *mbuf2, *mbuf3, **pkts;