[v5,17/17] node: choose vector path at runtime

Message ID 20201013110437.309110-18-ciara.power@intel.com (mailing list archive)
State Superseded, archived
Delegated to: David Marchand
Headers
Series add max SIMD bitwidth to EAL |

Checks

Context Check Description
ci/checkpatch success coding style OK
ci/iol-broadcom-Performance success Performance Testing PASS
ci/iol-broadcom-Functional success Functional Testing PASS
ci/iol-testing fail Testing issues
ci/iol-intel-Performance success Performance Testing PASS
ci/iol-mellanox-Performance success Performance Testing PASS
ci/Intel-compilation fail apply issues

Commit Message

Power, Ciara Oct. 13, 2020, 11:04 a.m. UTC
  When choosing the vector path, max SIMD bitwidth is now checked to
ensure the vector path is suitable. To do this, rather than the
scalar/vector lookup functions being called directly from the apps, a
generic function is called which will then call the scalar or vector
lookup function.

Cc: Nithin Dabilpuram <ndabilpuram@marvell.com>
Cc: Pavan Nikhilesh <pbhagavatula@marvell.com>
Cc: Jerin Jacob <jerinj@marvell.com>
Cc: Kiran Kumar K <kirankumark@marvell.com>

Signed-off-by: Ciara Power <ciara.power@intel.com>
---
 lib/librte_node/ip4_lookup.c      | 13 +++++++++++--
 lib/librte_node/ip4_lookup_neon.h |  2 +-
 lib/librte_node/ip4_lookup_sse.h  |  2 +-
 3 files changed, 13 insertions(+), 4 deletions(-)
  

Comments

Ananyev, Konstantin Oct. 13, 2020, 1:42 p.m. UTC | #1
> When choosing the vector path, max SIMD bitwidth is now checked to
> ensure the vector path is suitable. To do this, rather than the
> scalar/vector lookup functions being called directly from the apps, a
> generic function is called which will then call the scalar or vector
> lookup function.
> 
> Cc: Nithin Dabilpuram <ndabilpuram@marvell.com>
> Cc: Pavan Nikhilesh <pbhagavatula@marvell.com>
> Cc: Jerin Jacob <jerinj@marvell.com>
> Cc: Kiran Kumar K <kirankumark@marvell.com>
> 
> Signed-off-by: Ciara Power <ciara.power@intel.com>
> ---
>  lib/librte_node/ip4_lookup.c      | 13 +++++++++++--
>  lib/librte_node/ip4_lookup_neon.h |  2 +-
>  lib/librte_node/ip4_lookup_sse.h  |  2 +-
>  3 files changed, 13 insertions(+), 4 deletions(-)
> 
> diff --git a/lib/librte_node/ip4_lookup.c b/lib/librte_node/ip4_lookup.c
> index 293c77f39e..b3edbc1f4d 100644
> --- a/lib/librte_node/ip4_lookup.c
> +++ b/lib/librte_node/ip4_lookup.c
> @@ -34,10 +34,10 @@ static struct ip4_lookup_node_main ip4_lookup_nm;
>  #include "ip4_lookup_neon.h"
>  #elif defined(RTE_ARCH_X86)
>  #include "ip4_lookup_sse.h"
> -#else
> +#endif
> 
>  static uint16_t
> -ip4_lookup_node_process(struct rte_graph *graph, struct rte_node *node,
> +ip4_lookup_node_process_scalar(struct rte_graph *graph, struct rte_node *node,
>  			void **objs, uint16_t nb_objs)
>  {
>  	struct rte_ipv4_hdr *ipv4_hdr;
> @@ -109,7 +109,16 @@ ip4_lookup_node_process(struct rte_graph *graph, struct rte_node *node,
>  	return nb_objs;
>  }
> 
> +static uint16_t
> +ip4_lookup_node_process(struct rte_graph *graph, struct rte_node *node,
> +			void **objs, uint16_t nb_objs)
> +{
> +#if defined(RTE_MACHINE_CPUFLAG_NEON) || defined(RTE_ARCH_X86)
> +	if (rte_get_max_simd_bitwidth() >= RTE_SIMD_128)
> +		return ip4_lookup_node_process_vec(graph, node, objs, nb_objs);
>  #endif
> +	return ip4_lookup_node_process_scalar(graph, node, objs, nb_objs);
> +}

Just wonder can't this selection be done once at ip4_lookup_node_init()?

> 
>  int
>  rte_node_ip4_route_add(uint32_t ip, uint8_t depth, uint16_t next_hop,
> diff --git a/lib/librte_node/ip4_lookup_neon.h b/lib/librte_node/ip4_lookup_neon.h
> index 5e5a7d87be..0ad2763b82 100644
> --- a/lib/librte_node/ip4_lookup_neon.h
> +++ b/lib/librte_node/ip4_lookup_neon.h
> @@ -7,7 +7,7 @@
> 
>  /* ARM64 NEON */
>  static uint16_t
> -ip4_lookup_node_process(struct rte_graph *graph, struct rte_node *node,
> +ip4_lookup_node_process_vec(struct rte_graph *graph, struct rte_node *node,
>  			void **objs, uint16_t nb_objs)
>  {
>  	struct rte_mbuf *mbuf0, *mbuf1, *mbuf2, *mbuf3, **pkts;
> diff --git a/lib/librte_node/ip4_lookup_sse.h b/lib/librte_node/ip4_lookup_sse.h
> index a071cc5919..264c986071 100644
> --- a/lib/librte_node/ip4_lookup_sse.h
> +++ b/lib/librte_node/ip4_lookup_sse.h
> @@ -7,7 +7,7 @@
> 
>  /* X86 SSE */
>  static uint16_t
> -ip4_lookup_node_process(struct rte_graph *graph, struct rte_node *node,
> +ip4_lookup_node_process_vec(struct rte_graph *graph, struct rte_node *node,
>  			void **objs, uint16_t nb_objs)
>  {
>  	struct rte_mbuf *mbuf0, *mbuf1, *mbuf2, *mbuf3, **pkts;
> --
> 2.22.0
  
Ruifeng Wang Oct. 14, 2020, 8:28 a.m. UTC | #2
> -----Original Message-----
> From: Ciara Power <ciara.power@intel.com>
> Sent: Tuesday, October 13, 2020 7:05 PM
> To: dev@dpdk.org
> Cc: viktorin@rehivetech.com; Ruifeng Wang <Ruifeng.Wang@arm.com>;
> jerinj@marvell.com; drc@linux.vnet.ibm.com; bruce.richardson@intel.com;
> konstantin.ananyev@intel.com; Ciara Power <ciara.power@intel.com>;
> Nithin Dabilpuram <ndabilpuram@marvell.com>; Pavan Nikhilesh
> <pbhagavatula@marvell.com>; Kiran Kumar K <kirankumark@marvell.com>
> Subject: [PATCH v5 17/17] node: choose vector path at runtime
> 
> When choosing the vector path, max SIMD bitwidth is now checked to
> ensure the vector path is suitable. To do this, rather than the scalar/vector
> lookup functions being called directly from the apps, a generic function is
> called which will then call the scalar or vector lookup function.
> 
> Cc: Nithin Dabilpuram <ndabilpuram@marvell.com>
> Cc: Pavan Nikhilesh <pbhagavatula@marvell.com>
> Cc: Jerin Jacob <jerinj@marvell.com>
> Cc: Kiran Kumar K <kirankumark@marvell.com>
> 
> Signed-off-by: Ciara Power <ciara.power@intel.com>
> ---
>  lib/librte_node/ip4_lookup.c      | 13 +++++++++++--
>  lib/librte_node/ip4_lookup_neon.h |  2 +-
> lib/librte_node/ip4_lookup_sse.h  |  2 +-
>  3 files changed, 13 insertions(+), 4 deletions(-)
> 
> diff --git a/lib/librte_node/ip4_lookup.c b/lib/librte_node/ip4_lookup.c
> index 293c77f39e..b3edbc1f4d 100644
> --- a/lib/librte_node/ip4_lookup.c
> +++ b/lib/librte_node/ip4_lookup.c
> @@ -34,10 +34,10 @@ static struct ip4_lookup_node_main ip4_lookup_nm;
> #include "ip4_lookup_neon.h"
>  #elif defined(RTE_ARCH_X86)
>  #include "ip4_lookup_sse.h"
> -#else
> +#endif
> 
>  static uint16_t
> -ip4_lookup_node_process(struct rte_graph *graph, struct rte_node *node,
> +ip4_lookup_node_process_scalar(struct rte_graph *graph, struct rte_node
> +*node,
>  			void **objs, uint16_t nb_objs)
>  {
>  	struct rte_ipv4_hdr *ipv4_hdr;
> @@ -109,7 +109,16 @@ ip4_lookup_node_process(struct rte_graph *graph,
> struct rte_node *node,
>  	return nb_objs;
>  }
> 
> +static uint16_t
> +ip4_lookup_node_process(struct rte_graph *graph, struct rte_node *node,
> +			void **objs, uint16_t nb_objs)
> +{
> +#if defined(RTE_MACHINE_CPUFLAG_NEON) || defined(RTE_ARCH_X86)
Machine flags has been removed recently. RTE_MACHINE_CPUFLAG_NEON is replaced by __ARM_NEON.
Looks like the patch set needs a rebase.

> +	if (rte_get_max_simd_bitwidth() >= RTE_SIMD_128)
> +		return ip4_lookup_node_process_vec(graph, node, objs,
> nb_objs);
>  #endif
> +	return ip4_lookup_node_process_scalar(graph, node, objs,
> nb_objs); }
> 
>  int
>  rte_node_ip4_route_add(uint32_t ip, uint8_t depth, uint16_t next_hop, diff
> --git a/lib/librte_node/ip4_lookup_neon.h
> b/lib/librte_node/ip4_lookup_neon.h
> index 5e5a7d87be..0ad2763b82 100644
> --- a/lib/librte_node/ip4_lookup_neon.h
> +++ b/lib/librte_node/ip4_lookup_neon.h
> @@ -7,7 +7,7 @@
> 
>  /* ARM64 NEON */
>  static uint16_t
> -ip4_lookup_node_process(struct rte_graph *graph, struct rte_node *node,
> +ip4_lookup_node_process_vec(struct rte_graph *graph, struct rte_node
> +*node,
>  			void **objs, uint16_t nb_objs)
>  {
>  	struct rte_mbuf *mbuf0, *mbuf1, *mbuf2, *mbuf3, **pkts; diff --git
> a/lib/librte_node/ip4_lookup_sse.h b/lib/librte_node/ip4_lookup_sse.h
> index a071cc5919..264c986071 100644
> --- a/lib/librte_node/ip4_lookup_sse.h
> +++ b/lib/librte_node/ip4_lookup_sse.h
> @@ -7,7 +7,7 @@
> 
>  /* X86 SSE */
>  static uint16_t
> -ip4_lookup_node_process(struct rte_graph *graph, struct rte_node *node,
> +ip4_lookup_node_process_vec(struct rte_graph *graph, struct rte_node
> +*node,
>  			void **objs, uint16_t nb_objs)
>  {
>  	struct rte_mbuf *mbuf0, *mbuf1, *mbuf2, *mbuf3, **pkts;
> --
> 2.22.0
  
Jerin Jacob Oct. 14, 2020, 10:05 a.m. UTC | #3
On Tue, Oct 13, 2020 at 7:12 PM Ananyev, Konstantin
<konstantin.ananyev@intel.com> wrote:
>
> > When choosing the vector path, max SIMD bitwidth is now checked to
> > ensure the vector path is suitable. To do this, rather than the
> > scalar/vector lookup functions being called directly from the apps, a
> > generic function is called which will then call the scalar or vector
> > lookup function.
> >
> > Cc: Nithin Dabilpuram <ndabilpuram@marvell.com>
> > Cc: Pavan Nikhilesh <pbhagavatula@marvell.com>
> > Cc: Jerin Jacob <jerinj@marvell.com>
> > Cc: Kiran Kumar K <kirankumark@marvell.com>
> >
> > Signed-off-by: Ciara Power <ciara.power@intel.com>
> > ---
> >  lib/librte_node/ip4_lookup.c      | 13 +++++++++++--
> >  lib/librte_node/ip4_lookup_neon.h |  2 +-
> >  lib/librte_node/ip4_lookup_sse.h  |  2 +-
> >  3 files changed, 13 insertions(+), 4 deletions(-)
> >

> > +static uint16_t
> > +ip4_lookup_node_process(struct rte_graph *graph, struct rte_node *node,
> > +                     void **objs, uint16_t nb_objs)
> > +{
> > +#if defined(RTE_MACHINE_CPUFLAG_NEON) || defined(RTE_ARCH_X86)
> > +     if (rte_get_max_simd_bitwidth() >= RTE_SIMD_128)
> > +             return ip4_lookup_node_process_vec(graph, node, objs, nb_objs);
> >  #endif
> > +     return ip4_lookup_node_process_scalar(graph, node, objs, nb_objs);
> > +}
>
> Just wonder can't this selection be done once at ip4_lookup_node_init()?

Yes. It is better to move ..process() selection in ip4_lookup_node_init()
  

Patch

diff --git a/lib/librte_node/ip4_lookup.c b/lib/librte_node/ip4_lookup.c
index 293c77f39e..b3edbc1f4d 100644
--- a/lib/librte_node/ip4_lookup.c
+++ b/lib/librte_node/ip4_lookup.c
@@ -34,10 +34,10 @@  static struct ip4_lookup_node_main ip4_lookup_nm;
 #include "ip4_lookup_neon.h"
 #elif defined(RTE_ARCH_X86)
 #include "ip4_lookup_sse.h"
-#else
+#endif
 
 static uint16_t
-ip4_lookup_node_process(struct rte_graph *graph, struct rte_node *node,
+ip4_lookup_node_process_scalar(struct rte_graph *graph, struct rte_node *node,
 			void **objs, uint16_t nb_objs)
 {
 	struct rte_ipv4_hdr *ipv4_hdr;
@@ -109,7 +109,16 @@  ip4_lookup_node_process(struct rte_graph *graph, struct rte_node *node,
 	return nb_objs;
 }
 
+static uint16_t
+ip4_lookup_node_process(struct rte_graph *graph, struct rte_node *node,
+			void **objs, uint16_t nb_objs)
+{
+#if defined(RTE_MACHINE_CPUFLAG_NEON) || defined(RTE_ARCH_X86)
+	if (rte_get_max_simd_bitwidth() >= RTE_SIMD_128)
+		return ip4_lookup_node_process_vec(graph, node, objs, nb_objs);
 #endif
+	return ip4_lookup_node_process_scalar(graph, node, objs, nb_objs);
+}
 
 int
 rte_node_ip4_route_add(uint32_t ip, uint8_t depth, uint16_t next_hop,
diff --git a/lib/librte_node/ip4_lookup_neon.h b/lib/librte_node/ip4_lookup_neon.h
index 5e5a7d87be..0ad2763b82 100644
--- a/lib/librte_node/ip4_lookup_neon.h
+++ b/lib/librte_node/ip4_lookup_neon.h
@@ -7,7 +7,7 @@ 
 
 /* ARM64 NEON */
 static uint16_t
-ip4_lookup_node_process(struct rte_graph *graph, struct rte_node *node,
+ip4_lookup_node_process_vec(struct rte_graph *graph, struct rte_node *node,
 			void **objs, uint16_t nb_objs)
 {
 	struct rte_mbuf *mbuf0, *mbuf1, *mbuf2, *mbuf3, **pkts;
diff --git a/lib/librte_node/ip4_lookup_sse.h b/lib/librte_node/ip4_lookup_sse.h
index a071cc5919..264c986071 100644
--- a/lib/librte_node/ip4_lookup_sse.h
+++ b/lib/librte_node/ip4_lookup_sse.h
@@ -7,7 +7,7 @@ 
 
 /* X86 SSE */
 static uint16_t
-ip4_lookup_node_process(struct rte_graph *graph, struct rte_node *node,
+ip4_lookup_node_process_vec(struct rte_graph *graph, struct rte_node *node,
 			void **objs, uint16_t nb_objs)
 {
 	struct rte_mbuf *mbuf0, *mbuf1, *mbuf2, *mbuf3, **pkts;