[RFC PATCH] ethdev: advertise flow restore in mbuf

Slava Ovsiienko viacheslavo at nvidia.com
Wed Jun 14 18:46:34 CEST 2023


Hi, David

It looks like a good application datapath optimization, as for me.
But I see some concerns:

1. Are we sure the PMD should register the flag, not application?
IIRC, usually application registers needed flags/fields and PMDs just follow.

+       if (!sh->tunnel_hub && sh->config.dv_miss_info) {
+               err = mlx5_flow_restore_info_register();
+               if (err) {
+                       DRV_LOG(ERR, "Could not register mbuf dynflag for rte_flow_get_restore_info");
+                       goto error;
+               }

2. This might have some general mlx5 Rx datapath performance impact (very minor though)
It introduces extra memory access (instead of immed value). We should test thoroughly.
 @@ -857,7 +857,7 @@ rxq_cq_to_mbuf(struct mlx5_rxq_data *rxq, struct rte_mbuf *pkt,
                if (MLX5_FLOW_MARK_IS_VALID(mark)) {
                        pkt->ol_flags |= RTE_MBUF_F_RX_FDIR;
                        if (mark != RTE_BE32(MLX5_FLOW_MARK_DEFAULT)) {
-                               pkt->ol_flags |= RTE_MBUF_F_RX_FDIR_ID;
+                               pkt->ol_flags |= rxq->mark_flag;
                                pkt->hash.fdir.hi = mlx5_flow_mark_get(mark);
                        }
                }

3. RTE_MBUF_F_RX_FDIR_ID is also handled in vectorized rx_burst() routines.
Please, see:
 - mlx5_rxtx_vec_altivec.h
 - mlx5_rxtx_vec_neon.h
 - mlx5_rxtx_vec_sse.h
This code must be updated, and it also might have some general (regardless of using tunnel offload 
- for all cases) performance impact.

With best regards,
Slava

> -----Original Message-----
> From: David Marchand <david.marchand at redhat.com>
> Sent: Friday, May 5, 2023 1:31 PM
> To: dev at dpdk.org
> Cc: NBU-Contact-Thomas Monjalon (EXTERNAL) <thomas at monjalon.net>;
> i.maximets at ovn.org; Aman Singh <aman.deep.singh at intel.com>; Yuying
> Zhang <yuying.zhang at intel.com>; Matan Azrad <matan at nvidia.com>; Slava
> Ovsiienko <viacheslavo at nvidia.com>; Andrew Rybchenko
> <andrew.rybchenko at oktetlabs.ru>; Ferruh Yigit <ferruh.yigit at amd.com>; Ori
> Kam <orika at nvidia.com>
> Subject: [RFC PATCH] ethdev: advertise flow restore in mbuf
> 
> As reported by Ilya [1], unconditionally calling
> rte_flow_get_restore_info() impacts an application performance for drivers
> that do not provide this ops.
> It could also impact processing of packets that require no call to
> rte_flow_get_restore_info() at all.
> 
> Advertise in mbuf (via a dynamic flag) whether the driver has more metadata
> to provide via rte_flow_get_restore_info().
> The application can then call it only when required.
> 
> Link: http://inbox.dpdk.org/dev/5248c2ca-f2a6-3fb0-38b8-
> 7f659bfa40de at ovn.org/
> Signed-off-by: David Marchand <david.marchand at redhat.com>
> ---
> Note: I did not test this RFC patch yet but I hope we can resume and maybe
> conclude on the discussion for the tunnel offloading API.
> 
> ---
>  app/test-pmd/util.c              |  9 +++++----
>  drivers/net/mlx5/linux/mlx5_os.c | 14 ++++++++++----
>  drivers/net/mlx5/mlx5.h          |  3 ++-
>  drivers/net/mlx5/mlx5_flow.c     | 26 ++++++++++++++++++++------
>  drivers/net/mlx5/mlx5_rx.c       |  2 +-
>  drivers/net/mlx5/mlx5_rx.h       |  1 +
>  drivers/net/mlx5/mlx5_trigger.c  |  4 ++--
>  drivers/net/sfc/sfc_dp.c         |  9 ++-------
>  lib/ethdev/ethdev_driver.h       |  8 ++++++++
>  lib/ethdev/rte_flow.c            | 29 +++++++++++++++++++++++++++++
>  lib/ethdev/rte_flow.h            | 19 ++++++++++++++++++-
>  lib/ethdev/version.map           |  4 ++++
>  12 files changed, 102 insertions(+), 26 deletions(-)
> 
> diff --git a/app/test-pmd/util.c b/app/test-pmd/util.c index
> f9df5f69ef..5aa69ed545 100644
> --- a/app/test-pmd/util.c
> +++ b/app/test-pmd/util.c
> @@ -88,18 +88,20 @@ dump_pkt_burst(uint16_t port_id, uint16_t queue,
> struct rte_mbuf *pkts[],
>  	char print_buf[MAX_STRING_LEN];
>  	size_t buf_size = MAX_STRING_LEN;
>  	size_t cur_len = 0;
> +	uint64_t restore_info_dynflag;
> 
>  	if (!nb_pkts)
>  		return;
> +	restore_info_dynflag = rte_flow_restore_info_dynflag();
>  	MKDUMPSTR(print_buf, buf_size, cur_len,
>  		  "port %u/queue %u: %s %u packets\n", port_id, queue,
>  		  is_rx ? "received" : "sent", (unsigned int) nb_pkts);
>  	for (i = 0; i < nb_pkts; i++) {
> -		int ret;
>  		struct rte_flow_error error;
>  		struct rte_flow_restore_info info = { 0, };
> 
>  		mb = pkts[i];
> +		ol_flags = mb->ol_flags;
>  		if (rxq_share > 0)
>  			MKDUMPSTR(print_buf, buf_size, cur_len, "port %u, ",
>  				  mb->port);
> @@ -107,8 +109,8 @@ dump_pkt_burst(uint16_t port_id, uint16_t queue,
> struct rte_mbuf *pkts[],
>  		eth_type = RTE_BE_TO_CPU_16(eth_hdr->ether_type);
>  		packet_type = mb->packet_type;
>  		is_encapsulation = RTE_ETH_IS_TUNNEL_PKT(packet_type);
> -		ret = rte_flow_get_restore_info(port_id, mb, &info, &error);
> -		if (!ret) {
> +		if ((ol_flags & restore_info_dynflag) != 0 &&
> +				rte_flow_get_restore_info(port_id, mb, &info,
> &error) == 0) {
>  			MKDUMPSTR(print_buf, buf_size, cur_len,
>  				  "restore info:");
>  			if (info.flags & RTE_FLOW_RESTORE_INFO_TUNNEL) {
> @@ -153,7 +155,6 @@ dump_pkt_burst(uint16_t port_id, uint16_t queue,
> struct rte_mbuf *pkts[],
>  			  " - pool=%s - type=0x%04x - length=%u -
> nb_segs=%d",
>  			  mb->pool->name, eth_type, (unsigned int) mb-
> >pkt_len,
>  			  (int)mb->nb_segs);
> -		ol_flags = mb->ol_flags;
>  		if (ol_flags & RTE_MBUF_F_RX_RSS_HASH) {
>  			MKDUMPSTR(print_buf, buf_size, cur_len,
>  				  " - RSS hash=0x%x",
> diff --git a/drivers/net/mlx5/linux/mlx5_os.c
> b/drivers/net/mlx5/linux/mlx5_os.c
> index 980234e2ac..e6e3784013 100644
> --- a/drivers/net/mlx5/linux/mlx5_os.c
> +++ b/drivers/net/mlx5/linux/mlx5_os.c
> @@ -575,11 +575,17 @@ mlx5_alloc_shared_dr(struct mlx5_priv *priv)
>  		goto error;
>  	}
>  #endif
> -	if (!sh->tunnel_hub && sh->config.dv_miss_info)
> +	if (!sh->tunnel_hub && sh->config.dv_miss_info) {
> +		err = mlx5_flow_restore_info_register();
> +		if (err) {
> +			DRV_LOG(ERR, "Could not register mbuf dynflag for
> rte_flow_get_restore_info");
> +			goto error;
> +		}
>  		err = mlx5_alloc_tunnel_hub(sh);
> -	if (err) {
> -		DRV_LOG(ERR, "mlx5_alloc_tunnel_hub failed err=%d", err);
> -		goto error;
> +		if (err) {
> +			DRV_LOG(ERR, "mlx5_alloc_tunnel_hub failed
> err=%d", err);
> +			goto error;
> +		}
>  	}
>  	if (sh->config.reclaim_mode == MLX5_RCM_AGGR) {
>  		mlx5_glue->dr_reclaim_domain_memory(sh->rx_domain, 1);
> diff --git a/drivers/net/mlx5/mlx5.h b/drivers/net/mlx5/mlx5.h index
> 9eae692037..77cdc802da 100644
> --- a/drivers/net/mlx5/mlx5.h
> +++ b/drivers/net/mlx5/mlx5.h
> @@ -2116,7 +2116,8 @@ int mlx5_flow_query_counter(struct rte_eth_dev
> *dev, struct rte_flow *flow,  int mlx5_flow_dev_dump_ipool(struct
> rte_eth_dev *dev, struct rte_flow *flow,
>  		FILE *file, struct rte_flow_error *error);  #endif -void
> mlx5_flow_rxq_dynf_metadata_set(struct rte_eth_dev *dev);
> +int mlx5_flow_restore_info_register(void);
> +void mlx5_flow_rxq_dynf_set(struct rte_eth_dev *dev);
>  int mlx5_flow_get_aged_flows(struct rte_eth_dev *dev, void **contexts,
>  			uint32_t nb_contexts, struct rte_flow_error *error);
> int mlx5_validate_action_ct(struct rte_eth_dev *dev, diff --git
> a/drivers/net/mlx5/mlx5_flow.c b/drivers/net/mlx5/mlx5_flow.c index
> d0275fdd00..715b7d327d 100644
> --- a/drivers/net/mlx5/mlx5_flow.c
> +++ b/drivers/net/mlx5/mlx5_flow.c
> @@ -1779,6 +1779,20 @@ flow_rxq_flags_clear(struct rte_eth_dev *dev)
>  	priv->sh->shared_mark_enabled = 0;
>  }
> 
> +static uint64_t mlx5_restore_info_dynflag;
> +
> +int
> +mlx5_flow_restore_info_register(void)
> +{
> +	int err = 0;
> +
> +	if (mlx5_restore_info_dynflag == 0) {
> +		if
> (rte_flow_restore_info_dynflag_register(&mlx5_restore_info_dynflag) < 0)
> +			err = ENOMEM;
> +	}
> +	return err;
> +}
> +
>  /**
>   * Set the Rx queue dynamic metadata (mask and offset) for a flow
>   *
> @@ -1786,7 +1800,7 @@ flow_rxq_flags_clear(struct rte_eth_dev *dev)
>   *   Pointer to the Ethernet device structure.
>   */
>  void
> -mlx5_flow_rxq_dynf_metadata_set(struct rte_eth_dev *dev)
> +mlx5_flow_rxq_dynf_set(struct rte_eth_dev *dev)
>  {
>  	struct mlx5_priv *priv = dev->data->dev_private;
>  	unsigned int i;
> @@ -1809,6 +1823,9 @@ mlx5_flow_rxq_dynf_metadata_set(struct
> rte_eth_dev *dev)
>  			data->flow_meta_offset =
> rte_flow_dynf_metadata_offs;
>  			data->flow_meta_port_mask = priv->sh-
> >dv_meta_mask;
>  		}
> +		data->mark_flag = RTE_MBUF_F_RX_FDIR_ID;
> +		if (is_tunnel_offload_active(dev))
> +			data->mark_flag |= mlx5_restore_info_dynflag;
>  	}
>  }
> 
> @@ -11453,11 +11470,8 @@ mlx5_flow_tunnel_get_restore_info(struct
> rte_eth_dev *dev,
>  	const struct mlx5_flow_tbl_data_entry *tble;
>  	const uint64_t mask = RTE_MBUF_F_RX_FDIR |
> RTE_MBUF_F_RX_FDIR_ID;
> 
> -	if (!is_tunnel_offload_active(dev)) {
> -		info->flags = 0;
> -		return 0;
> -	}
> -
> +	if ((ol_flags & mlx5_restore_info_dynflag) == 0)
> +		goto err;
>  	if ((ol_flags & mask) != mask)
>  		goto err;
>  	tble = tunnel_mark_decode(dev, m->hash.fdir.hi); diff --git
> a/drivers/net/mlx5/mlx5_rx.c b/drivers/net/mlx5/mlx5_rx.c index
> a2be523e9e..71c4638251 100644
> --- a/drivers/net/mlx5/mlx5_rx.c
> +++ b/drivers/net/mlx5/mlx5_rx.c
> @@ -857,7 +857,7 @@ rxq_cq_to_mbuf(struct mlx5_rxq_data *rxq, struct
> rte_mbuf *pkt,
>  		if (MLX5_FLOW_MARK_IS_VALID(mark)) {
>  			pkt->ol_flags |= RTE_MBUF_F_RX_FDIR;
>  			if (mark != RTE_BE32(MLX5_FLOW_MARK_DEFAULT)) {
> -				pkt->ol_flags |= RTE_MBUF_F_RX_FDIR_ID;
> +				pkt->ol_flags |= rxq->mark_flag;
>  				pkt->hash.fdir.hi =
> mlx5_flow_mark_get(mark);
>  			}
>  		}
> diff --git a/drivers/net/mlx5/mlx5_rx.h b/drivers/net/mlx5/mlx5_rx.h index
> 52c35c83f8..3514edd84e 100644
> --- a/drivers/net/mlx5/mlx5_rx.h
> +++ b/drivers/net/mlx5/mlx5_rx.h
> @@ -136,6 +136,7 @@ struct mlx5_rxq_data {
>  	struct mlx5_uar_data uar_data; /* CQ doorbell. */
>  	uint32_t cqn; /* CQ number. */
>  	uint8_t cq_arm_sn; /* CQ arm seq number. */
> +	uint64_t mark_flag; /* ol_flags to set with marks. */
>  	uint32_t tunnel; /* Tunnel information. */
>  	int timestamp_offset; /* Dynamic mbuf field for timestamp. */
>  	uint64_t timestamp_rx_flag; /* Dynamic mbuf flag for timestamp. */
> diff --git a/drivers/net/mlx5/mlx5_trigger.c b/drivers/net/mlx5/mlx5_trigger.c
> index bbaa7d2aa0..7bdb897612 100644
> --- a/drivers/net/mlx5/mlx5_trigger.c
> +++ b/drivers/net/mlx5/mlx5_trigger.c
> @@ -1282,8 +1282,8 @@ mlx5_dev_start(struct rte_eth_dev *dev)
>  			dev->data->port_id);
>  		goto error;
>  	}
> -	/* Set a mask and offset of dynamic metadata flows into Rx queues.
> */
> -	mlx5_flow_rxq_dynf_metadata_set(dev);
> +	/* Set dynamic fields and flags into Rx queues. */
> +	mlx5_flow_rxq_dynf_set(dev);
>  	/* Set flags and context to convert Rx timestamps. */
>  	mlx5_rxq_timestamp_set(dev);
>  	/* Set a mask and offset of scheduling on timestamp into Tx queues.
> */ diff --git a/drivers/net/sfc/sfc_dp.c b/drivers/net/sfc/sfc_dp.c index
> 9f2093b353..8b2dbea325 100644
> --- a/drivers/net/sfc/sfc_dp.c
> +++ b/drivers/net/sfc/sfc_dp.c
> @@ -11,6 +11,7 @@
>  #include <string.h>
>  #include <errno.h>
> 
> +#include <ethdev_driver.h>
>  #include <rte_log.h>
>  #include <rte_mbuf_dyn.h>
> 
> @@ -135,12 +136,8 @@ sfc_dp_ft_ctx_id_register(void)
>  		.size = sizeof(uint8_t),
>  		.align = __alignof__(uint8_t),
>  	};
> -	static const struct rte_mbuf_dynflag ft_ctx_id_valid = {
> -		.name = "rte_net_sfc_dynflag_ft_ctx_id_valid",
> -	};
> 
>  	int field_offset;
> -	int flag;
> 
>  	SFC_GENERIC_LOG(INFO, "%s() entry", __func__);
> 
> @@ -156,15 +153,13 @@ sfc_dp_ft_ctx_id_register(void)
>  		return -1;
>  	}
> 
> -	flag = rte_mbuf_dynflag_register(&ft_ctx_id_valid);
> -	if (flag < 0) {
> +	if (rte_flow_restore_info_dynflag_register(&sfc_dp_ft_ctx_id_valid) <
> +0) {
>  		SFC_GENERIC_LOG(ERR, "%s() failed to register ft_ctx_id
> dynflag",
>  				__func__);
>  		return -1;
>  	}
> 
>  	sfc_dp_ft_ctx_id_offset = field_offset;
> -	sfc_dp_ft_ctx_id_valid = UINT64_C(1) << flag;
> 
>  	SFC_GENERIC_LOG(INFO, "%s() done", __func__);
> 
> diff --git a/lib/ethdev/ethdev_driver.h b/lib/ethdev/ethdev_driver.h index
> 2c9d615fb5..6c17d84d1b 100644
> --- a/lib/ethdev/ethdev_driver.h
> +++ b/lib/ethdev/ethdev_driver.h
> @@ -1949,6 +1949,14 @@ __rte_internal
>  int
>  rte_eth_ip_reassembly_dynfield_register(int *field_offset, int *flag);
> 
> +/**
> + * @internal
> + * Register mbuf dynamic flag for rte_flow_get_restore_info.
> + */
> +__rte_internal
> +int
> +rte_flow_restore_info_dynflag_register(uint64_t *flag);
> +
> 
>  /*
>   * Legacy ethdev API used internally by drivers.
> diff --git a/lib/ethdev/rte_flow.c b/lib/ethdev/rte_flow.c index
> 69e6e749f7..10cd9d12ba 100644
> --- a/lib/ethdev/rte_flow.c
> +++ b/lib/ethdev/rte_flow.c
> @@ -1401,6 +1401,35 @@ rte_flow_get_restore_info(uint16_t port_id,
>  				  NULL, rte_strerror(ENOTSUP));
>  }
> 
> +static struct {
> +	const struct rte_mbuf_dynflag desc;
> +	uint64_t value;
> +} flow_restore_info_dynflag = {
> +	.desc = { .name = "RTE_MBUF_F_RX_RESTORE_INFO", }, };
> +
> +uint64_t
> +rte_flow_restore_info_dynflag(void)
> +{
> +	return flow_restore_info_dynflag.value; }
> +
> +int
> +rte_flow_restore_info_dynflag_register(uint64_t *flag) {
> +	if (flow_restore_info_dynflag.value == 0) {
> +		int offset =
> +rte_mbuf_dynflag_register(&flow_restore_info_dynflag.desc);
> +
> +		if (offset < 0)
> +			return -1;
> +		flow_restore_info_dynflag.value = RTE_BIT64(offset);
> +	}
> +	if (*flag)
> +		*flag = rte_flow_restore_info_dynflag();
> +
> +	return 0;
> +}
> +
>  int
>  rte_flow_tunnel_action_decap_release(uint16_t port_id,
>  				     struct rte_flow_action *actions, diff --git
> a/lib/ethdev/rte_flow.h b/lib/ethdev/rte_flow.h index
> 713ba8b65c..5ce2db4bbd 100644
> --- a/lib/ethdev/rte_flow.h
> +++ b/lib/ethdev/rte_flow.h
> @@ -4918,7 +4918,24 @@ rte_flow_tunnel_match(uint16_t port_id,
>  		      struct rte_flow_error *error);
> 
>  /**
> - * Populate the current packet processing state, if exists, for the given mbuf.
> + * On reception of a mbuf from HW, a call to
> +rte_flow_get_restore_info() may be
> + * required to retrieve some metadata.
> + * This function returns the associated mbuf ol_flags.
> + *
> + * Note: the dynamic flag is registered during the probing of the first
> +device
> + * that requires it. If this function returns a non 0 value, this value
> +won't
> + * change for the rest of the life of the application.
> + *
> + * @return
> + *   The offload flag indicating rte_flow_get_restore_info() must be called.
> + */
> +__rte_experimental
> +uint64_t
> +rte_flow_restore_info_dynflag(void);
> +
> +/**
> + * If a mbuf contains the rte_flow_restore_info_dynflag() flag in
> +ol_flags,
> + * populate the current packet processing state.
>   *
>   * One should negotiate tunnel metadata delivery from the NIC to the HW.
>   * @see rte_eth_rx_metadata_negotiate() diff --git a/lib/ethdev/version.map
> b/lib/ethdev/version.map index 357d1a88c0..bf5668e928 100644
> --- a/lib/ethdev/version.map
> +++ b/lib/ethdev/version.map
> @@ -299,6 +299,9 @@ EXPERIMENTAL {
>  	rte_flow_action_handle_query_update;
>  	rte_flow_async_action_handle_query_update;
>  	rte_flow_async_create_by_index;
> +
> +	# added in 23.07
> +	rte_flow_restore_info_dynflag;
>  };
> 
>  INTERNAL {
> @@ -328,4 +331,5 @@ INTERNAL {
>  	rte_eth_representor_id_get;
>  	rte_eth_switch_domain_alloc;
>  	rte_eth_switch_domain_free;
> +	rte_flow_restore_info_dynflag_register;
>  };
> --
> 2.40.0



More information about the dev mailing list