diff mbox series

[v5,17/17] node: choose vector path at runtime

Message ID	20201013110437.309110-18-ciara.power@intel.com (mailing list archive)
State	Superseded, archived
Delegated to:	David Marchand
Headers	IronPort-SDR: fT5zB/iZbRwvVREiZbgmCYqRq7litChNjZd3xVCD4+U6UgQFz4C0qEfmBFcU4n0nU9GVjNLAot c5jm9VId0TQQ== IronPort-SDR: BjIP+sWBpmsV97Kc5ifitHaG5YeDhlI3bdcWwcAgtY3F+MbYertXQBl8D+VUYEPEGbAqSom1VE N43HTPyheNfw== From: Ciara Power <ciara.power@intel.com> To: dev@dpdk.org Cc: viktorin@rehivetech.com, ruifeng.wang@arm.com, jerinj@marvell.com, drc@linux.vnet.ibm.com, bruce.richardson@intel.com, konstantin.ananyev@intel.com, Ciara Power <ciara.power@intel.com>, Nithin Dabilpuram <ndabilpuram@marvell.com>, Pavan Nikhilesh <pbhagavatula@marvell.com>, Kiran Kumar K <kirankumark@marvell.com> Date: Tue, 13 Oct 2020 12:04:37 +0100 Message-Id: <20201013110437.309110-18-ciara.power@intel.com> In-Reply-To: <20201013110437.309110-1-ciara.power@intel.com> References: <20200807155859.63888-1-ciara.power@intel.com> <20201013110437.309110-1-ciara.power@intel.com> MIME-Version: 1.0 Content-Transfer-Encoding: 8bit Subject: [dpdk-dev] [PATCH v5 17/17] node: choose vector path at runtime Precedence: list Errors-To: dev-bounces@dpdk.org Sender: "dev" <dev-bounces@dpdk.org>
Series	add max SIMD bitwidth to EAL \| [v5,00/17] add max SIMD bitwidth to EAL [v5,01/17] eal: add max SIMD bitwidth [v5,02/17] doc: add detail on using max SIMD bitwidth [v5,03/17] net/i40e: add checks for max SIMD bitwidth [v5,04/17] net/axgbe: add checks for max SIMD bitwidth [v5,05/17] net/bnxt: add checks for max SIMD bitwidth [v5,06/17] net/enic: add checks for max SIMD bitwidth [v5,07/17] net/fm10k: add checks for max SIMD bitwidth [v5,08/17] net/iavf: add checks for max SIMD bitwidth [v5,09/17] net/ice: add checks for max SIMD bitwidth [v5,10/17] net/ixgbe: add checks for max SIMD bitwidth [v5,11/17] net/mlx5: add checks for max SIMD bitwidth [v5,12/17] net/virtio: add checks for max SIMD bitwidth [v5,13/17] distributor: add checks for max SIMD bitwidth [v5,14/17] member: add checks for max SIMD bitwidth [v5,15/17] efd: add checks for max SIMD bitwidth [v5,16/17] net: add checks for max SIMD bitwidth [v5,17/17] node: choose vector path at runtime

Checks

Context	Check	Description
ci/checkpatch	success	coding style OK
ci/iol-broadcom-Performance	success	Performance Testing PASS
ci/iol-broadcom-Functional	success	Functional Testing PASS
ci/iol-testing	fail	Testing issues
ci/iol-intel-Performance	success	Performance Testing PASS
ci/iol-mellanox-Performance	success	Performance Testing PASS
ci/Intel-compilation	fail	apply issues

Commit Message

Power, Ciara Oct. 13, 2020, 11:04 a.m. UTC

  When choosing the vector path, max SIMD bitwidth is now checked to
ensure the vector path is suitable. To do this, rather than the
scalar/vector lookup functions being called directly from the apps, a
generic function is called which will then call the scalar or vector
lookup function.

Cc: Nithin Dabilpuram <ndabilpuram@marvell.com>
Cc: Pavan Nikhilesh <pbhagavatula@marvell.com>
Cc: Jerin Jacob <jerinj@marvell.com>
Cc: Kiran Kumar K <kirankumark@marvell.com>

Signed-off-by: Ciara Power <ciara.power@intel.com>
---
 lib/librte_node/ip4_lookup.c      | 13 +++++++++++--
 lib/librte_node/ip4_lookup_neon.h |  2 +-
 lib/librte_node/ip4_lookup_sse.h  |  2 +-
 3 files changed, 13 insertions(+), 4 deletions(-)

Comments

Ananyev, Konstantin Oct. 13, 2020, 1:42 p.m. UTC | #1

> When choosing the vector path, max SIMD bitwidth is now checked to
> ensure the vector path is suitable. To do this, rather than the
> scalar/vector lookup functions being called directly from the apps, a
> generic function is called which will then call the scalar or vector
> lookup function.
> 
> Cc: Nithin Dabilpuram <ndabilpuram@marvell.com>
> Cc: Pavan Nikhilesh <pbhagavatula@marvell.com>
> Cc: Jerin Jacob <jerinj@marvell.com>
> Cc: Kiran Kumar K <kirankumark@marvell.com>
> 
> Signed-off-by: Ciara Power <ciara.power@intel.com>
> ---
>  lib/librte_node/ip4_lookup.c      | 13 +++++++++++--
>  lib/librte_node/ip4_lookup_neon.h |  2 +-
>  lib/librte_node/ip4_lookup_sse.h  |  2 +-
>  3 files changed, 13 insertions(+), 4 deletions(-)
> 
> diff --git a/lib/librte_node/ip4_lookup.c b/lib/librte_node/ip4_lookup.c
> index 293c77f39e..b3edbc1f4d 100644
> --- a/lib/librte_node/ip4_lookup.c
> +++ b/lib/librte_node/ip4_lookup.c
> @@ -34,10 +34,10 @@ static struct ip4_lookup_node_main ip4_lookup_nm;
>  #include "ip4_lookup_neon.h"
>  #elif defined(RTE_ARCH_X86)
>  #include "ip4_lookup_sse.h"
> -#else
> +#endif
> 
>  static uint16_t
> -ip4_lookup_node_process(struct rte_graph *graph, struct rte_node *node,
> +ip4_lookup_node_process_scalar(struct rte_graph *graph, struct rte_node *node,
>  			void **objs, uint16_t nb_objs)
>  {
>  	struct rte_ipv4_hdr *ipv4_hdr;
> @@ -109,7 +109,16 @@ ip4_lookup_node_process(struct rte_graph *graph, struct rte_node *node,
>  	return nb_objs;
>  }
> 
> +static uint16_t
> +ip4_lookup_node_process(struct rte_graph *graph, struct rte_node *node,
> +			void **objs, uint16_t nb_objs)
> +{
> +#if defined(RTE_MACHINE_CPUFLAG_NEON) || defined(RTE_ARCH_X86)
> +	if (rte_get_max_simd_bitwidth() >= RTE_SIMD_128)
> +		return ip4_lookup_node_process_vec(graph, node, objs, nb_objs);
>  #endif
> +	return ip4_lookup_node_process_scalar(graph, node, objs, nb_objs);
> +}

Just wonder can't this selection be done once at ip4_lookup_node_init()?

> 
>  int
>  rte_node_ip4_route_add(uint32_t ip, uint8_t depth, uint16_t next_hop,
> diff --git a/lib/librte_node/ip4_lookup_neon.h b/lib/librte_node/ip4_lookup_neon.h
> index 5e5a7d87be..0ad2763b82 100644
> --- a/lib/librte_node/ip4_lookup_neon.h
> +++ b/lib/librte_node/ip4_lookup_neon.h
> @@ -7,7 +7,7 @@
> 
>  /* ARM64 NEON */
>  static uint16_t
> -ip4_lookup_node_process(struct rte_graph *graph, struct rte_node *node,
> +ip4_lookup_node_process_vec(struct rte_graph *graph, struct rte_node *node,
>  			void **objs, uint16_t nb_objs)
>  {
>  	struct rte_mbuf *mbuf0, *mbuf1, *mbuf2, *mbuf3, **pkts;
> diff --git a/lib/librte_node/ip4_lookup_sse.h b/lib/librte_node/ip4_lookup_sse.h
> index a071cc5919..264c986071 100644
> --- a/lib/librte_node/ip4_lookup_sse.h
> +++ b/lib/librte_node/ip4_lookup_sse.h
> @@ -7,7 +7,7 @@
> 
>  /* X86 SSE */
>  static uint16_t
> -ip4_lookup_node_process(struct rte_graph *graph, struct rte_node *node,
> +ip4_lookup_node_process_vec(struct rte_graph *graph, struct rte_node *node,
>  			void **objs, uint16_t nb_objs)
>  {
>  	struct rte_mbuf *mbuf0, *mbuf1, *mbuf2, *mbuf3, **pkts;
> --
> 2.22.0

Ruifeng Wang Oct. 14, 2020, 8:28 a.m. UTC | #2

> -----Original Message-----
> From: Ciara Power <ciara.power@intel.com>
> Sent: Tuesday, October 13, 2020 7:05 PM
> To: dev@dpdk.org
> Cc: viktorin@rehivetech.com; Ruifeng Wang <Ruifeng.Wang@arm.com>;
> jerinj@marvell.com; drc@linux.vnet.ibm.com; bruce.richardson@intel.com;
> konstantin.ananyev@intel.com; Ciara Power <ciara.power@intel.com>;
> Nithin Dabilpuram <ndabilpuram@marvell.com>; Pavan Nikhilesh
> <pbhagavatula@marvell.com>; Kiran Kumar K <kirankumark@marvell.com>
> Subject: [PATCH v5 17/17] node: choose vector path at runtime
> 
> When choosing the vector path, max SIMD bitwidth is now checked to
> ensure the vector path is suitable. To do this, rather than the scalar/vector
> lookup functions being called directly from the apps, a generic function is
> called which will then call the scalar or vector lookup function.
> 
> Cc: Nithin Dabilpuram <ndabilpuram@marvell.com>
> Cc: Pavan Nikhilesh <pbhagavatula@marvell.com>
> Cc: Jerin Jacob <jerinj@marvell.com>
> Cc: Kiran Kumar K <kirankumark@marvell.com>
> 
> Signed-off-by: Ciara Power <ciara.power@intel.com>
> ---
>  lib/librte_node/ip4_lookup.c      | 13 +++++++++++--
>  lib/librte_node/ip4_lookup_neon.h |  2 +-
> lib/librte_node/ip4_lookup_sse.h  |  2 +-
>  3 files changed, 13 insertions(+), 4 deletions(-)
> 
> diff --git a/lib/librte_node/ip4_lookup.c b/lib/librte_node/ip4_lookup.c
> index 293c77f39e..b3edbc1f4d 100644
> --- a/lib/librte_node/ip4_lookup.c
> +++ b/lib/librte_node/ip4_lookup.c
> @@ -34,10 +34,10 @@ static struct ip4_lookup_node_main ip4_lookup_nm;
> #include "ip4_lookup_neon.h"
>  #elif defined(RTE_ARCH_X86)
>  #include "ip4_lookup_sse.h"
> -#else
> +#endif
> 
>  static uint16_t
> -ip4_lookup_node_process(struct rte_graph *graph, struct rte_node *node,
> +ip4_lookup_node_process_scalar(struct rte_graph *graph, struct rte_node
> +*node,
>  			void **objs, uint16_t nb_objs)
>  {
>  	struct rte_ipv4_hdr *ipv4_hdr;
> @@ -109,7 +109,16 @@ ip4_lookup_node_process(struct rte_graph *graph,
> struct rte_node *node,
>  	return nb_objs;
>  }
> 
> +static uint16_t
> +ip4_lookup_node_process(struct rte_graph *graph, struct rte_node *node,
> +			void **objs, uint16_t nb_objs)
> +{
> +#if defined(RTE_MACHINE_CPUFLAG_NEON) || defined(RTE_ARCH_X86)
Machine flags has been removed recently. RTE_MACHINE_CPUFLAG_NEON is replaced by __ARM_NEON.
Looks like the patch set needs a rebase.

> +	if (rte_get_max_simd_bitwidth() >= RTE_SIMD_128)
> +		return ip4_lookup_node_process_vec(graph, node, objs,
> nb_objs);
>  #endif
> +	return ip4_lookup_node_process_scalar(graph, node, objs,
> nb_objs); }
> 
>  int
>  rte_node_ip4_route_add(uint32_t ip, uint8_t depth, uint16_t next_hop, diff
> --git a/lib/librte_node/ip4_lookup_neon.h
> b/lib/librte_node/ip4_lookup_neon.h
> index 5e5a7d87be..0ad2763b82 100644
> --- a/lib/librte_node/ip4_lookup_neon.h
> +++ b/lib/librte_node/ip4_lookup_neon.h
> @@ -7,7 +7,7 @@
> 
>  /* ARM64 NEON */
>  static uint16_t
> -ip4_lookup_node_process(struct rte_graph *graph, struct rte_node *node,
> +ip4_lookup_node_process_vec(struct rte_graph *graph, struct rte_node
> +*node,
>  			void **objs, uint16_t nb_objs)
>  {
>  	struct rte_mbuf *mbuf0, *mbuf1, *mbuf2, *mbuf3, **pkts; diff --git
> a/lib/librte_node/ip4_lookup_sse.h b/lib/librte_node/ip4_lookup_sse.h
> index a071cc5919..264c986071 100644
> --- a/lib/librte_node/ip4_lookup_sse.h
> +++ b/lib/librte_node/ip4_lookup_sse.h
> @@ -7,7 +7,7 @@
> 
>  /* X86 SSE */
>  static uint16_t
> -ip4_lookup_node_process(struct rte_graph *graph, struct rte_node *node,
> +ip4_lookup_node_process_vec(struct rte_graph *graph, struct rte_node
> +*node,
>  			void **objs, uint16_t nb_objs)
>  {
>  	struct rte_mbuf *mbuf0, *mbuf1, *mbuf2, *mbuf3, **pkts;
> --
> 2.22.0

Jerin Jacob Oct. 14, 2020, 10:05 a.m. UTC | #3

On Tue, Oct 13, 2020 at 7:12 PM Ananyev, Konstantin
<konstantin.ananyev@intel.com> wrote:
>
> > When choosing the vector path, max SIMD bitwidth is now checked to
> > ensure the vector path is suitable. To do this, rather than the
> > scalar/vector lookup functions being called directly from the apps, a
> > generic function is called which will then call the scalar or vector
> > lookup function.
> >
> > Cc: Nithin Dabilpuram <ndabilpuram@marvell.com>
> > Cc: Pavan Nikhilesh <pbhagavatula@marvell.com>
> > Cc: Jerin Jacob <jerinj@marvell.com>
> > Cc: Kiran Kumar K <kirankumark@marvell.com>
> >
> > Signed-off-by: Ciara Power <ciara.power@intel.com>
> > ---
> >  lib/librte_node/ip4_lookup.c      | 13 +++++++++++--
> >  lib/librte_node/ip4_lookup_neon.h |  2 +-
> >  lib/librte_node/ip4_lookup_sse.h  |  2 +-
> >  3 files changed, 13 insertions(+), 4 deletions(-)
> >

> > +static uint16_t
> > +ip4_lookup_node_process(struct rte_graph *graph, struct rte_node *node,
> > +                     void **objs, uint16_t nb_objs)
> > +{
> > +#if defined(RTE_MACHINE_CPUFLAG_NEON) || defined(RTE_ARCH_X86)
> > +     if (rte_get_max_simd_bitwidth() >= RTE_SIMD_128)
> > +             return ip4_lookup_node_process_vec(graph, node, objs, nb_objs);
> >  #endif
> > +     return ip4_lookup_node_process_scalar(graph, node, objs, nb_objs);
> > +}
>
> Just wonder can't this selection be done once at ip4_lookup_node_init()?

Yes. It is better to move ..process() selection in ip4_lookup_node_init()

diff mbox series

Patch

diff --git a/lib/librte_node/ip4_lookup.c b/lib/librte_node/ip4_lookup.c
index 293c77f39e..b3edbc1f4d 100644
--- a/lib/librte_node/ip4_lookup.c
+++ b/lib/librte_node/ip4_lookup.c
@@ -34,10 +34,10 @@  static struct ip4_lookup_node_main ip4_lookup_nm;
 #include "ip4_lookup_neon.h"
 #elif defined(RTE_ARCH_X86)
 #include "ip4_lookup_sse.h"
-#else
+#endif
 
 static uint16_t
-ip4_lookup_node_process(struct rte_graph *graph, struct rte_node *node,
+ip4_lookup_node_process_scalar(struct rte_graph *graph, struct rte_node *node,
 			void **objs, uint16_t nb_objs)
 {
 	struct rte_ipv4_hdr *ipv4_hdr;
@@ -109,7 +109,16 @@  ip4_lookup_node_process(struct rte_graph *graph, struct rte_node *node,
 	return nb_objs;
 }
 
+static uint16_t
+ip4_lookup_node_process(struct rte_graph *graph, struct rte_node *node,
+			void **objs, uint16_t nb_objs)
+{
+#if defined(RTE_MACHINE_CPUFLAG_NEON) || defined(RTE_ARCH_X86)
+	if (rte_get_max_simd_bitwidth() >= RTE_SIMD_128)
+		return ip4_lookup_node_process_vec(graph, node, objs, nb_objs);
 #endif
+	return ip4_lookup_node_process_scalar(graph, node, objs, nb_objs);
+}
 
 int
 rte_node_ip4_route_add(uint32_t ip, uint8_t depth, uint16_t next_hop,
diff --git a/lib/librte_node/ip4_lookup_neon.h b/lib/librte_node/ip4_lookup_neon.h
index 5e5a7d87be..0ad2763b82 100644
--- a/lib/librte_node/ip4_lookup_neon.h
+++ b/lib/librte_node/ip4_lookup_neon.h
@@ -7,7 +7,7 @@ 
 
 /* ARM64 NEON */
 static uint16_t
-ip4_lookup_node_process(struct rte_graph *graph, struct rte_node *node,
+ip4_lookup_node_process_vec(struct rte_graph *graph, struct rte_node *node,
 			void **objs, uint16_t nb_objs)
 {
 	struct rte_mbuf *mbuf0, *mbuf1, *mbuf2, *mbuf3, **pkts;
diff --git a/lib/librte_node/ip4_lookup_sse.h b/lib/librte_node/ip4_lookup_sse.h
index a071cc5919..264c986071 100644
--- a/lib/librte_node/ip4_lookup_sse.h
+++ b/lib/librte_node/ip4_lookup_sse.h
@@ -7,7 +7,7 @@ 
 
 /* X86 SSE */
 static uint16_t
-ip4_lookup_node_process(struct rte_graph *graph, struct rte_node *node,
+ip4_lookup_node_process_vec(struct rte_graph *graph, struct rte_node *node,
 			void **objs, uint16_t nb_objs)
 {
 	struct rte_mbuf *mbuf0, *mbuf1, *mbuf2, *mbuf3, **pkts;