[dpdk-dev,v3,01/14] net/mlx5: support 16 hardware priorities

Message ID 20180413112023.106420-2-xuemingl@mellanox.com (mailing list archive)
State Superseded, archived
Delegated to: Shahaf Shuler
Headers

Checks

Context Check Description
ci/checkpatch success coding style OK
ci/Intel-compilation fail apply patch file failure

Commit Message

Xueming Li April 13, 2018, 11:20 a.m. UTC
  This patch supports new 16 Verbs flow priorities by trying to create a
simple flow of priority 15. If 16 priorities not available, fallback to
traditional 8 priorities.

Verb priority mapping:
			8 priorities	>=16 priorities
Control flow:		4-7		8-15
User normal flow:	1-3		4-7
User tunnel flow:	0-2		0-3

Signed-off-by: Xueming Li <xuemingl@mellanox.com>
---
 drivers/net/mlx5/mlx5.c         |  18 +++++++
 drivers/net/mlx5/mlx5.h         |   5 ++
 drivers/net/mlx5/mlx5_flow.c    | 112 +++++++++++++++++++++++++++++++++-------
 drivers/net/mlx5/mlx5_trigger.c |   8 ---
 4 files changed, 115 insertions(+), 28 deletions(-)
  

Comments

Nélio Laranjeiro April 13, 2018, 11:58 a.m. UTC | #1
Hi Xueming,

Small nips and documentation issues,

On Fri, Apr 13, 2018 at 07:20:10PM +0800, Xueming Li wrote:
> This patch supports new 16 Verbs flow priorities by trying to create a
> simple flow of priority 15. If 16 priorities not available, fallback to
> traditional 8 priorities.
> 
> Verb priority mapping:
> 			8 priorities	>=16 priorities
> Control flow:		4-7		8-15
> User normal flow:	1-3		4-7
> User tunnel flow:	0-2		0-3

There is an overlap between tunnel and normal flows it is expected?

> 
> Signed-off-by: Xueming Li <xuemingl@mellanox.com>
> ---
>  drivers/net/mlx5/mlx5.c         |  18 +++++++
>  drivers/net/mlx5/mlx5.h         |   5 ++
>  drivers/net/mlx5/mlx5_flow.c    | 112 +++++++++++++++++++++++++++++++++-------
>  drivers/net/mlx5/mlx5_trigger.c |   8 ---
>  4 files changed, 115 insertions(+), 28 deletions(-)
> 
> diff --git a/drivers/net/mlx5/mlx5.c b/drivers/net/mlx5/mlx5.c
> index cfab55897..38118e524 100644
> --- a/drivers/net/mlx5/mlx5.c
> +++ b/drivers/net/mlx5/mlx5.c
> @@ -197,6 +197,7 @@ mlx5_dev_close(struct rte_eth_dev *dev)
>  		priv->txqs_n = 0;
>  		priv->txqs = NULL;
>  	}
> +	mlx5_flow_delete_drop_queue(dev);
>  	if (priv->pd != NULL) {
>  		assert(priv->ctx != NULL);
>  		claim_zero(mlx5_glue->dealloc_pd(priv->pd));
> @@ -612,6 +613,7 @@ mlx5_pci_probe(struct rte_pci_driver *pci_drv __rte_unused,
>  	unsigned int mps;
>  	unsigned int cqe_comp;
>  	unsigned int tunnel_en = 0;
> +	unsigned int verb_priorities = 0;
>  	int idx;
>  	int i;
>  	struct mlx5dv_context attrs_out = {0};
> @@ -993,6 +995,22 @@ mlx5_pci_probe(struct rte_pci_driver *pci_drv __rte_unused,
>  		mlx5_set_link_up(eth_dev);
>  		/* Store device configuration on private structure. */
>  		priv->config = config;
> +		/* Create drop queue. */
> +		err = mlx5_flow_create_drop_queue(eth_dev);
> +		if (err) {
> +			DRV_LOG(ERR, "port %u drop queue allocation failed: %s",
> +				eth_dev->data->port_id, strerror(rte_errno));
> +			goto port_error;
> +		}
> +		/* Supported Verbs flow priority number detection. */
> +		if (verb_priorities == 0)
> +			verb_priorities = priv_get_max_verbs_prio(eth_dev);

No more priv*() rename it to mlx5_get_max_verbs_prio()

> +		if (verb_priorities < MLX5_VERBS_FLOW_PRIO_8) {
> +			DRV_LOG(ERR, "port %u wrong Verbs flow priorities: %u",
> +				eth_dev->data->port_id, verb_priorities);
> +			goto port_error;
> +		}
> +		priv->config.max_verb_prio = verb_priorities;

s/verb/verbs/

>  		continue;
>  port_error:
>  		if (priv)
> diff --git a/drivers/net/mlx5/mlx5.h b/drivers/net/mlx5/mlx5.h
> index 63b24e6bb..6e4613fe0 100644
> --- a/drivers/net/mlx5/mlx5.h
> +++ b/drivers/net/mlx5/mlx5.h
> @@ -89,6 +89,7 @@ struct mlx5_dev_config {
>  	unsigned int rx_vec_en:1; /* Rx vector is enabled. */
>  	unsigned int mpw_hdr_dseg:1; /* Enable DSEGs in the title WQEBB. */
>  	unsigned int vf_nl_en:1; /* Enable Netlink requests in VF mode. */
> +	unsigned int max_verb_prio; /* Number of Verb flow priorities. */
>  	unsigned int tso_max_payload_sz; /* Maximum TCP payload for TSO. */
>  	unsigned int ind_table_max_size; /* Maximum indirection table size. */
>  	int txq_inline; /* Maximum packet size for inlining. */
> @@ -105,6 +106,9 @@ enum mlx5_verbs_alloc_type {
>  	MLX5_VERBS_ALLOC_TYPE_RX_QUEUE,
>  };
>  
> +/* 8 Verbs priorities. */
> +#define MLX5_VERBS_FLOW_PRIO_8 8
> +
>  /**
>   * Verbs allocator needs a context to know in the callback which kind of
>   * resources it is allocating.
> @@ -253,6 +257,7 @@ int mlx5_traffic_restart(struct rte_eth_dev *dev);
>  
>  /* mlx5_flow.c */
>  
> +unsigned int priv_get_max_verbs_prio(struct rte_eth_dev *dev);
>  int mlx5_flow_validate(struct rte_eth_dev *dev,
>  		       const struct rte_flow_attr *attr,
>  		       const struct rte_flow_item items[],
> diff --git a/drivers/net/mlx5/mlx5_flow.c b/drivers/net/mlx5/mlx5_flow.c
> index 288610620..5c4f0b586 100644
> --- a/drivers/net/mlx5/mlx5_flow.c
> +++ b/drivers/net/mlx5/mlx5_flow.c
> @@ -32,8 +32,8 @@
>  #include "mlx5_prm.h"
>  #include "mlx5_glue.h"
>  
> -/* Define minimal priority for control plane flows. */
> -#define MLX5_CTRL_FLOW_PRIORITY 4
> +/* Flow priority for control plane flows. */
> +#define MLX5_CTRL_FLOW_PRIORITY 1
>  
>  /* Internet Protocol versions. */
>  #define MLX5_IPV4 4
> @@ -129,7 +129,7 @@ const struct hash_rxq_init hash_rxq_init[] = {
>  				IBV_RX_HASH_SRC_PORT_TCP |
>  				IBV_RX_HASH_DST_PORT_TCP),
>  		.dpdk_rss_hf = ETH_RSS_NONFRAG_IPV4_TCP,
> -		.flow_priority = 1,
> +		.flow_priority = 0,
>  		.ip_version = MLX5_IPV4,
>  	},
>  	[HASH_RXQ_UDPV4] = {
> @@ -138,7 +138,7 @@ const struct hash_rxq_init hash_rxq_init[] = {
>  				IBV_RX_HASH_SRC_PORT_UDP |
>  				IBV_RX_HASH_DST_PORT_UDP),
>  		.dpdk_rss_hf = ETH_RSS_NONFRAG_IPV4_UDP,
> -		.flow_priority = 1,
> +		.flow_priority = 0,
>  		.ip_version = MLX5_IPV4,
>  	},
>  	[HASH_RXQ_IPV4] = {
> @@ -146,7 +146,7 @@ const struct hash_rxq_init hash_rxq_init[] = {
>  				IBV_RX_HASH_DST_IPV4),
>  		.dpdk_rss_hf = (ETH_RSS_IPV4 |
>  				ETH_RSS_FRAG_IPV4),
> -		.flow_priority = 2,
> +		.flow_priority = 1,
>  		.ip_version = MLX5_IPV4,
>  	},
>  	[HASH_RXQ_TCPV6] = {
> @@ -155,7 +155,7 @@ const struct hash_rxq_init hash_rxq_init[] = {
>  				IBV_RX_HASH_SRC_PORT_TCP |
>  				IBV_RX_HASH_DST_PORT_TCP),
>  		.dpdk_rss_hf = ETH_RSS_NONFRAG_IPV6_TCP,
> -		.flow_priority = 1,
> +		.flow_priority = 0,
>  		.ip_version = MLX5_IPV6,
>  	},
>  	[HASH_RXQ_UDPV6] = {
> @@ -164,7 +164,7 @@ const struct hash_rxq_init hash_rxq_init[] = {
>  				IBV_RX_HASH_SRC_PORT_UDP |
>  				IBV_RX_HASH_DST_PORT_UDP),
>  		.dpdk_rss_hf = ETH_RSS_NONFRAG_IPV6_UDP,
> -		.flow_priority = 1,
> +		.flow_priority = 0,
>  		.ip_version = MLX5_IPV6,
>  	},
>  	[HASH_RXQ_IPV6] = {
> @@ -172,13 +172,13 @@ const struct hash_rxq_init hash_rxq_init[] = {
>  				IBV_RX_HASH_DST_IPV6),
>  		.dpdk_rss_hf = (ETH_RSS_IPV6 |
>  				ETH_RSS_FRAG_IPV6),
> -		.flow_priority = 2,
> +		.flow_priority = 1,
>  		.ip_version = MLX5_IPV6,
>  	},
>  	[HASH_RXQ_ETH] = {
>  		.hash_fields = 0,
>  		.dpdk_rss_hf = 0,
> -		.flow_priority = 3,
> +		.flow_priority = 2,
>  	},
>  };
>  
> @@ -900,30 +900,50 @@ mlx5_flow_convert_allocate(unsigned int size, struct rte_flow_error *error)
>   * Make inner packet matching with an higher priority from the non Inner
>   * matching.
>   *
> + * @param dev
> + *   Pointer to Ethernet device.
>   * @param[in, out] parser
>   *   Internal parser structure.
>   * @param attr
>   *   User flow attribute.
>   */
>  static void
> -mlx5_flow_update_priority(struct mlx5_flow_parse *parser,
> +mlx5_flow_update_priority(struct rte_eth_dev *dev,
> +			  struct mlx5_flow_parse *parser,
>  			  const struct rte_flow_attr *attr)
>  {
> +	struct priv *priv = dev->data->dev_private;
>  	unsigned int i;
> +	uint16_t priority;
>  
> +	/*			8 priorities	>= 16 priorities
> +	 * Control flow:	4-7		8-15
> +	 * User normal flow:	1-3		4-7
> +	 * User tunnel flow:	0-2		0-3
> +	 */

Same comment here, the tunnel flow overlap when there are only 8
priorities.

> +	priority = attr->priority * MLX5_VERBS_FLOW_PRIO_8;
> +	if (priv->config.max_verb_prio == MLX5_VERBS_FLOW_PRIO_8)
> +		priority /= 2;
> +	/*
> +	 * Lower non-tunnel flow Verbs priority 1 if only support 8 Verbs
> +	 * priorities, lower 4 otherwise.
> +	 */
> +	if (!parser->inner) {
> +		if (priv->config.max_verb_prio == MLX5_VERBS_FLOW_PRIO_8)
> +			priority += 1;
> +		else
> +			priority += MLX5_VERBS_FLOW_PRIO_8 / 2;
> +	}
>  	if (parser->drop) {
> -		parser->queue[HASH_RXQ_ETH].ibv_attr->priority =
> -			attr->priority +
> -			hash_rxq_init[HASH_RXQ_ETH].flow_priority;
> +		parser->queue[HASH_RXQ_ETH].ibv_attr->priority = priority +
> +				hash_rxq_init[HASH_RXQ_ETH].flow_priority;
>  		return;
>  	}
>  	for (i = 0; i != hash_rxq_init_n; ++i) {
> -		if (parser->queue[i].ibv_attr) {
> -			parser->queue[i].ibv_attr->priority =
> -				attr->priority +
> -				hash_rxq_init[i].flow_priority -
> -				(parser->inner ? 1 : 0);
> -		}
> +		if (!parser->queue[i].ibv_attr)
> +			continue;
> +		parser->queue[i].ibv_attr->priority = priority +
> +				hash_rxq_init[i].flow_priority;
>  	}
>  }
>  
> @@ -1158,7 +1178,7 @@ mlx5_flow_convert(struct rte_eth_dev *dev,
>  	 */
>  	if (!parser->drop)
>  		mlx5_flow_convert_finalise(parser);
> -	mlx5_flow_update_priority(parser, attr);
> +	mlx5_flow_update_priority(dev, parser, attr);
>  exit_free:
>  	/* Only verification is expected, all resources should be released. */
>  	if (!parser->create) {
> @@ -3161,3 +3181,55 @@ mlx5_dev_filter_ctrl(struct rte_eth_dev *dev,
>  	}
>  	return 0;
>  }
> +
> +/**
> + * Detect number of Verbs flow priorities supported.
> + *
> + * @param dev
> + *   Pointer to Ethernet device.
> + *
> + * @return
> + *   number of supported Verbs flow priority.
> + */
> +unsigned int
> +priv_get_max_verbs_prio(struct rte_eth_dev *dev)
> +{
> +	struct priv *priv = dev->data->dev_private;
> +	unsigned int verb_priorities = MLX5_VERBS_FLOW_PRIO_8;
> +	struct {
> +		struct ibv_flow_attr attr;
> +		struct ibv_flow_spec_eth eth;
> +		struct ibv_flow_spec_action_drop drop;
> +	} flow_attr = {
> +		.attr = {
> +			.num_of_specs = 2,
> +		},
> +		.eth = {
> +			.type = IBV_FLOW_SPEC_ETH,
> +			.size = sizeof(struct ibv_flow_spec_eth),
> +		},
> +		.drop = {
> +			.size = sizeof(struct ibv_flow_spec_action_drop),
> +			.type = IBV_FLOW_SPEC_ACTION_DROP,
> +		},
> +	};
> +	struct ibv_flow *flow;
> +
> +	do {
> +		flow_attr.attr.priority = verb_priorities - 1;
> +		flow = mlx5_glue->create_flow(priv->flow_drop_queue->qp,
> +					      &flow_attr.attr);
> +		if (flow) {
> +			claim_zero(mlx5_glue->destroy_flow(flow));
> +			/* Try more priorities. */
> +			verb_priorities *= 2;
> +		} else {
> +			/* Failed, restore last right number. */
> +			verb_priorities /= 2;
> +			break;
> +		}
> +	} while (1);
> +	DRV_LOG(INFO, "port %u Verbs flow priorities: %d",
> +		dev->data->port_id, verb_priorities);

Please remove this developer log, it will confuse the user who will
believe he have N priorities which is absolutely not the case.

> +	return verb_priorities;
> +}
> diff --git a/drivers/net/mlx5/mlx5_trigger.c b/drivers/net/mlx5/mlx5_trigger.c
> index 6bb4ffb14..d80a2e688 100644
> --- a/drivers/net/mlx5/mlx5_trigger.c
> +++ b/drivers/net/mlx5/mlx5_trigger.c
> @@ -148,12 +148,6 @@ mlx5_dev_start(struct rte_eth_dev *dev)
>  	int ret;
>  
>  	dev->data->dev_started = 1;
> -	ret = mlx5_flow_create_drop_queue(dev);
> -	if (ret) {
> -		DRV_LOG(ERR, "port %u drop queue allocation failed: %s",
> -			dev->data->port_id, strerror(rte_errno));
> -		goto error;
> -	}
>  	DRV_LOG(DEBUG, "port %u allocating and configuring hash Rx queues",
>  		dev->data->port_id);
>  	rte_mempool_walk(mlx5_mp2mr_iter, priv);
> @@ -202,7 +196,6 @@ mlx5_dev_start(struct rte_eth_dev *dev)
>  	mlx5_traffic_disable(dev);
>  	mlx5_txq_stop(dev);
>  	mlx5_rxq_stop(dev);
> -	mlx5_flow_delete_drop_queue(dev);
>  	rte_errno = ret; /* Restore rte_errno. */
>  	return -rte_errno;
>  }
> @@ -237,7 +230,6 @@ mlx5_dev_stop(struct rte_eth_dev *dev)
>  	mlx5_rxq_stop(dev);
>  	for (mr = LIST_FIRST(&priv->mr); mr; mr = LIST_FIRST(&priv->mr))
>  		mlx5_mr_release(mr);
> -	mlx5_flow_delete_drop_queue(dev);
>  }
>  
>  /**
> -- 
> 2.13.3

Thanks,
  
Xueming Li April 13, 2018, 1:10 p.m. UTC | #2
> -----Original Message-----

> From: Nélio Laranjeiro <nelio.laranjeiro@6wind.com>

> Sent: Friday, April 13, 2018 7:59 PM

> To: Xueming(Steven) Li <xuemingl@mellanox.com>

> Cc: Shahaf Shuler <shahafs@mellanox.com>; dev@dpdk.org

> Subject: Re: [PATCH v3 01/14] net/mlx5: support 16 hardware priorities

> 

> Hi Xueming,

> 

> Small nips and documentation issues,

> 

> On Fri, Apr 13, 2018 at 07:20:10PM +0800, Xueming Li wrote:

> > This patch supports new 16 Verbs flow priorities by trying to create a

> > simple flow of priority 15. If 16 priorities not available, fallback

> > to traditional 8 priorities.

> >

> > Verb priority mapping:

> > 			8 priorities	>=16 priorities

> > Control flow:		4-7		8-15

> > User normal flow:	1-3		4-7

> > User tunnel flow:	0-2		0-3

> 

> There is an overlap between tunnel and normal flows it is expected?


For 8 priorities, (4-7), (1-3) and (0-2) are the behavior of today, 
1 Verbs shift to make tunnel flow higher priority, please refer to 
commit #74936571

> 

> >

> > Signed-off-by: Xueming Li <xuemingl@mellanox.com>

> > ---

> >  drivers/net/mlx5/mlx5.c         |  18 +++++++

> >  drivers/net/mlx5/mlx5.h         |   5 ++

> >  drivers/net/mlx5/mlx5_flow.c    | 112

> +++++++++++++++++++++++++++++++++-------

> >  drivers/net/mlx5/mlx5_trigger.c |   8 ---

> >  4 files changed, 115 insertions(+), 28 deletions(-)

> >

> > diff --git a/drivers/net/mlx5/mlx5.c b/drivers/net/mlx5/mlx5.c index

> > cfab55897..38118e524 100644

> > --- a/drivers/net/mlx5/mlx5.c

> > +++ b/drivers/net/mlx5/mlx5.c

> > @@ -197,6 +197,7 @@ mlx5_dev_close(struct rte_eth_dev *dev)

> >  		priv->txqs_n = 0;

> >  		priv->txqs = NULL;

> >  	}

> > +	mlx5_flow_delete_drop_queue(dev);

> >  	if (priv->pd != NULL) {

> >  		assert(priv->ctx != NULL);

> >  		claim_zero(mlx5_glue->dealloc_pd(priv->pd));

> > @@ -612,6 +613,7 @@ mlx5_pci_probe(struct rte_pci_driver *pci_drv

> __rte_unused,

> >  	unsigned int mps;

> >  	unsigned int cqe_comp;

> >  	unsigned int tunnel_en = 0;

> > +	unsigned int verb_priorities = 0;

> >  	int idx;

> >  	int i;

> >  	struct mlx5dv_context attrs_out = {0}; @@ -993,6 +995,22 @@

> > mlx5_pci_probe(struct rte_pci_driver *pci_drv __rte_unused,

> >  		mlx5_set_link_up(eth_dev);

> >  		/* Store device configuration on private structure. */

> >  		priv->config = config;

> > +		/* Create drop queue. */

> > +		err = mlx5_flow_create_drop_queue(eth_dev);

> > +		if (err) {

> > +			DRV_LOG(ERR, "port %u drop queue allocation failed: %s",

> > +				eth_dev->data->port_id, strerror(rte_errno));

> > +			goto port_error;

> > +		}

> > +		/* Supported Verbs flow priority number detection. */

> > +		if (verb_priorities == 0)

> > +			verb_priorities = priv_get_max_verbs_prio(eth_dev);

> 

> No more priv*() rename it to mlx5_get_max_verbs_prio()

> 

> > +		if (verb_priorities < MLX5_VERBS_FLOW_PRIO_8) {

> > +			DRV_LOG(ERR, "port %u wrong Verbs flow priorities: %u",

> > +				eth_dev->data->port_id, verb_priorities);

> > +			goto port_error;

> > +		}

> > +		priv->config.max_verb_prio = verb_priorities;

> 

> s/verb/verbs/

> 

> >  		continue;

> >  port_error:

> >  		if (priv)

> > diff --git a/drivers/net/mlx5/mlx5.h b/drivers/net/mlx5/mlx5.h index

> > 63b24e6bb..6e4613fe0 100644

> > --- a/drivers/net/mlx5/mlx5.h

> > +++ b/drivers/net/mlx5/mlx5.h

> > @@ -89,6 +89,7 @@ struct mlx5_dev_config {

> >  	unsigned int rx_vec_en:1; /* Rx vector is enabled. */

> >  	unsigned int mpw_hdr_dseg:1; /* Enable DSEGs in the title WQEBB. */

> >  	unsigned int vf_nl_en:1; /* Enable Netlink requests in VF mode. */

> > +	unsigned int max_verb_prio; /* Number of Verb flow priorities. */

> >  	unsigned int tso_max_payload_sz; /* Maximum TCP payload for TSO. */

> >  	unsigned int ind_table_max_size; /* Maximum indirection table size.

> */

> >  	int txq_inline; /* Maximum packet size for inlining. */ @@ -105,6

> > +106,9 @@ enum mlx5_verbs_alloc_type {

> >  	MLX5_VERBS_ALLOC_TYPE_RX_QUEUE,

> >  };

> >

> > +/* 8 Verbs priorities. */

> > +#define MLX5_VERBS_FLOW_PRIO_8 8

> > +

> >  /**

> >   * Verbs allocator needs a context to know in the callback which kind

> of

> >   * resources it is allocating.

> > @@ -253,6 +257,7 @@ int mlx5_traffic_restart(struct rte_eth_dev *dev);

> >

> >  /* mlx5_flow.c */

> >

> > +unsigned int priv_get_max_verbs_prio(struct rte_eth_dev *dev);

> >  int mlx5_flow_validate(struct rte_eth_dev *dev,

> >  		       const struct rte_flow_attr *attr,

> >  		       const struct rte_flow_item items[], diff --git

> > a/drivers/net/mlx5/mlx5_flow.c b/drivers/net/mlx5/mlx5_flow.c index

> > 288610620..5c4f0b586 100644

> > --- a/drivers/net/mlx5/mlx5_flow.c

> > +++ b/drivers/net/mlx5/mlx5_flow.c

> > @@ -32,8 +32,8 @@

> >  #include "mlx5_prm.h"

> >  #include "mlx5_glue.h"

> >

> > -/* Define minimal priority for control plane flows. */ -#define

> > MLX5_CTRL_FLOW_PRIORITY 4

> > +/* Flow priority for control plane flows. */ #define

> > +MLX5_CTRL_FLOW_PRIORITY 1

> >

> >  /* Internet Protocol versions. */

> >  #define MLX5_IPV4 4

> > @@ -129,7 +129,7 @@ const struct hash_rxq_init hash_rxq_init[] = {

> >  				IBV_RX_HASH_SRC_PORT_TCP |

> >  				IBV_RX_HASH_DST_PORT_TCP),

> >  		.dpdk_rss_hf = ETH_RSS_NONFRAG_IPV4_TCP,

> > -		.flow_priority = 1,

> > +		.flow_priority = 0,

> >  		.ip_version = MLX5_IPV4,

> >  	},

> >  	[HASH_RXQ_UDPV4] = {

> > @@ -138,7 +138,7 @@ const struct hash_rxq_init hash_rxq_init[] = {

> >  				IBV_RX_HASH_SRC_PORT_UDP |

> >  				IBV_RX_HASH_DST_PORT_UDP),

> >  		.dpdk_rss_hf = ETH_RSS_NONFRAG_IPV4_UDP,

> > -		.flow_priority = 1,

> > +		.flow_priority = 0,

> >  		.ip_version = MLX5_IPV4,

> >  	},

> >  	[HASH_RXQ_IPV4] = {

> > @@ -146,7 +146,7 @@ const struct hash_rxq_init hash_rxq_init[] = {

> >  				IBV_RX_HASH_DST_IPV4),

> >  		.dpdk_rss_hf = (ETH_RSS_IPV4 |

> >  				ETH_RSS_FRAG_IPV4),

> > -		.flow_priority = 2,

> > +		.flow_priority = 1,

> >  		.ip_version = MLX5_IPV4,

> >  	},

> >  	[HASH_RXQ_TCPV6] = {

> > @@ -155,7 +155,7 @@ const struct hash_rxq_init hash_rxq_init[] = {

> >  				IBV_RX_HASH_SRC_PORT_TCP |

> >  				IBV_RX_HASH_DST_PORT_TCP),

> >  		.dpdk_rss_hf = ETH_RSS_NONFRAG_IPV6_TCP,

> > -		.flow_priority = 1,

> > +		.flow_priority = 0,

> >  		.ip_version = MLX5_IPV6,

> >  	},

> >  	[HASH_RXQ_UDPV6] = {

> > @@ -164,7 +164,7 @@ const struct hash_rxq_init hash_rxq_init[] = {

> >  				IBV_RX_HASH_SRC_PORT_UDP |

> >  				IBV_RX_HASH_DST_PORT_UDP),

> >  		.dpdk_rss_hf = ETH_RSS_NONFRAG_IPV6_UDP,

> > -		.flow_priority = 1,

> > +		.flow_priority = 0,

> >  		.ip_version = MLX5_IPV6,

> >  	},

> >  	[HASH_RXQ_IPV6] = {

> > @@ -172,13 +172,13 @@ const struct hash_rxq_init hash_rxq_init[] = {

> >  				IBV_RX_HASH_DST_IPV6),

> >  		.dpdk_rss_hf = (ETH_RSS_IPV6 |

> >  				ETH_RSS_FRAG_IPV6),

> > -		.flow_priority = 2,

> > +		.flow_priority = 1,

> >  		.ip_version = MLX5_IPV6,

> >  	},

> >  	[HASH_RXQ_ETH] = {

> >  		.hash_fields = 0,

> >  		.dpdk_rss_hf = 0,

> > -		.flow_priority = 3,

> > +		.flow_priority = 2,

> >  	},

> >  };

> >

> > @@ -900,30 +900,50 @@ mlx5_flow_convert_allocate(unsigned int size,

> struct rte_flow_error *error)

> >   * Make inner packet matching with an higher priority from the non

> Inner

> >   * matching.

> >   *

> > + * @param dev

> > + *   Pointer to Ethernet device.

> >   * @param[in, out] parser

> >   *   Internal parser structure.

> >   * @param attr

> >   *   User flow attribute.

> >   */

> >  static void

> > -mlx5_flow_update_priority(struct mlx5_flow_parse *parser,

> > +mlx5_flow_update_priority(struct rte_eth_dev *dev,

> > +			  struct mlx5_flow_parse *parser,

> >  			  const struct rte_flow_attr *attr)  {

> > +	struct priv *priv = dev->data->dev_private;

> >  	unsigned int i;

> > +	uint16_t priority;

> >

> > +	/*			8 priorities	>= 16 priorities

> > +	 * Control flow:	4-7		8-15

> > +	 * User normal flow:	1-3		4-7

> > +	 * User tunnel flow:	0-2		0-3

> > +	 */

> 

> Same comment here, the tunnel flow overlap when there are only 8

> priorities.

> 

> > +	priority = attr->priority * MLX5_VERBS_FLOW_PRIO_8;

> > +	if (priv->config.max_verb_prio == MLX5_VERBS_FLOW_PRIO_8)

> > +		priority /= 2;

> > +	/*

> > +	 * Lower non-tunnel flow Verbs priority 1 if only support 8 Verbs

> > +	 * priorities, lower 4 otherwise.

> > +	 */

> > +	if (!parser->inner) {

> > +		if (priv->config.max_verb_prio == MLX5_VERBS_FLOW_PRIO_8)

> > +			priority += 1;

> > +		else

> > +			priority += MLX5_VERBS_FLOW_PRIO_8 / 2;

> > +	}

> >  	if (parser->drop) {

> > -		parser->queue[HASH_RXQ_ETH].ibv_attr->priority =

> > -			attr->priority +

> > -			hash_rxq_init[HASH_RXQ_ETH].flow_priority;

> > +		parser->queue[HASH_RXQ_ETH].ibv_attr->priority = priority +

> > +				hash_rxq_init[HASH_RXQ_ETH].flow_priority;

> >  		return;

> >  	}

> >  	for (i = 0; i != hash_rxq_init_n; ++i) {

> > -		if (parser->queue[i].ibv_attr) {

> > -			parser->queue[i].ibv_attr->priority =

> > -				attr->priority +

> > -				hash_rxq_init[i].flow_priority -

> > -				(parser->inner ? 1 : 0);

> > -		}

> > +		if (!parser->queue[i].ibv_attr)

> > +			continue;

> > +		parser->queue[i].ibv_attr->priority = priority +

> > +				hash_rxq_init[i].flow_priority;

> >  	}

> >  }

> >

> > @@ -1158,7 +1178,7 @@ mlx5_flow_convert(struct rte_eth_dev *dev,

> >  	 */

> >  	if (!parser->drop)

> >  		mlx5_flow_convert_finalise(parser);

> > -	mlx5_flow_update_priority(parser, attr);

> > +	mlx5_flow_update_priority(dev, parser, attr);

> >  exit_free:

> >  	/* Only verification is expected, all resources should be released.

> */

> >  	if (!parser->create) {

> > @@ -3161,3 +3181,55 @@ mlx5_dev_filter_ctrl(struct rte_eth_dev *dev,

> >  	}

> >  	return 0;

> >  }

> > +

> > +/**

> > + * Detect number of Verbs flow priorities supported.

> > + *

> > + * @param dev

> > + *   Pointer to Ethernet device.

> > + *

> > + * @return

> > + *   number of supported Verbs flow priority.

> > + */

> > +unsigned int

> > +priv_get_max_verbs_prio(struct rte_eth_dev *dev) {

> > +	struct priv *priv = dev->data->dev_private;

> > +	unsigned int verb_priorities = MLX5_VERBS_FLOW_PRIO_8;

> > +	struct {

> > +		struct ibv_flow_attr attr;

> > +		struct ibv_flow_spec_eth eth;

> > +		struct ibv_flow_spec_action_drop drop;

> > +	} flow_attr = {

> > +		.attr = {

> > +			.num_of_specs = 2,

> > +		},

> > +		.eth = {

> > +			.type = IBV_FLOW_SPEC_ETH,

> > +			.size = sizeof(struct ibv_flow_spec_eth),

> > +		},

> > +		.drop = {

> > +			.size = sizeof(struct ibv_flow_spec_action_drop),

> > +			.type = IBV_FLOW_SPEC_ACTION_DROP,

> > +		},

> > +	};

> > +	struct ibv_flow *flow;

> > +

> > +	do {

> > +		flow_attr.attr.priority = verb_priorities - 1;

> > +		flow = mlx5_glue->create_flow(priv->flow_drop_queue->qp,

> > +					      &flow_attr.attr);

> > +		if (flow) {

> > +			claim_zero(mlx5_glue->destroy_flow(flow));

> > +			/* Try more priorities. */

> > +			verb_priorities *= 2;

> > +		} else {

> > +			/* Failed, restore last right number. */

> > +			verb_priorities /= 2;

> > +			break;

> > +		}

> > +	} while (1);

> > +	DRV_LOG(INFO, "port %u Verbs flow priorities: %d",

> > +		dev->data->port_id, verb_priorities);

> 

> Please remove this developer log, it will confuse the user who will

> believe he have N priorities which is absolutely not the case.


How about change it to DEBUG level, this should be very useful in real deployment
trouble shooting.

I could append something like "user flow priorities: %d" to avoid confusion..

> 

> > +	return verb_priorities;

> > +}

> > diff --git a/drivers/net/mlx5/mlx5_trigger.c

> > b/drivers/net/mlx5/mlx5_trigger.c index 6bb4ffb14..d80a2e688 100644

> > --- a/drivers/net/mlx5/mlx5_trigger.c

> > +++ b/drivers/net/mlx5/mlx5_trigger.c

> > @@ -148,12 +148,6 @@ mlx5_dev_start(struct rte_eth_dev *dev)

> >  	int ret;

> >

> >  	dev->data->dev_started = 1;

> > -	ret = mlx5_flow_create_drop_queue(dev);

> > -	if (ret) {

> > -		DRV_LOG(ERR, "port %u drop queue allocation failed: %s",

> > -			dev->data->port_id, strerror(rte_errno));

> > -		goto error;

> > -	}

> >  	DRV_LOG(DEBUG, "port %u allocating and configuring hash Rx queues",

> >  		dev->data->port_id);

> >  	rte_mempool_walk(mlx5_mp2mr_iter, priv); @@ -202,7 +196,6 @@

> > mlx5_dev_start(struct rte_eth_dev *dev)

> >  	mlx5_traffic_disable(dev);

> >  	mlx5_txq_stop(dev);

> >  	mlx5_rxq_stop(dev);

> > -	mlx5_flow_delete_drop_queue(dev);

> >  	rte_errno = ret; /* Restore rte_errno. */

> >  	return -rte_errno;

> >  }

> > @@ -237,7 +230,6 @@ mlx5_dev_stop(struct rte_eth_dev *dev)

> >  	mlx5_rxq_stop(dev);

> >  	for (mr = LIST_FIRST(&priv->mr); mr; mr = LIST_FIRST(&priv->mr))

> >  		mlx5_mr_release(mr);

> > -	mlx5_flow_delete_drop_queue(dev);

> >  }

> >

> >  /**

> > --

> > 2.13.3

> 

> Thanks,

> 

> --

> Nélio Laranjeiro

> 6WIND
  
Nélio Laranjeiro April 13, 2018, 1:46 p.m. UTC | #3
On Fri, Apr 13, 2018 at 01:10:07PM +0000, Xueming(Steven) Li wrote:
> 
> 
> > -----Original Message-----
> > From: Nélio Laranjeiro <nelio.laranjeiro@6wind.com>
> > Sent: Friday, April 13, 2018 7:59 PM
> > To: Xueming(Steven) Li <xuemingl@mellanox.com>
> > Cc: Shahaf Shuler <shahafs@mellanox.com>; dev@dpdk.org
> > Subject: Re: [PATCH v3 01/14] net/mlx5: support 16 hardware priorities
> > 
> > Hi Xueming,
> > 
> > Small nips and documentation issues,
> > 
> > On Fri, Apr 13, 2018 at 07:20:10PM +0800, Xueming Li wrote:
> > > This patch supports new 16 Verbs flow priorities by trying to create a
> > > simple flow of priority 15. If 16 priorities not available, fallback
> > > to traditional 8 priorities.
> > >
> > > Verb priority mapping:
> > > 			8 priorities	>=16 priorities
> > > Control flow:		4-7		8-15
> > > User normal flow:	1-3		4-7
> > > User tunnel flow:	0-2		0-3
> > 
> > There is an overlap between tunnel and normal flows it is expected?
> 
> For 8 priorities, (4-7), (1-3) and (0-2) are the behavior of today, 
> 1 Verbs shift to make tunnel flow higher priority, please refer to 
> commit #74936571

This is a little confusing wrote like this, tunnel is normal priority
less 1 according to the inner pattern. 
Documenting it like this seems in such situation it will overlap with
the normal rule.

> > >
> > > Signed-off-by: Xueming Li <xuemingl@mellanox.com>
> > > ---
> > >  drivers/net/mlx5/mlx5.c         |  18 +++++++
> > >  drivers/net/mlx5/mlx5.h         |   5 ++
> > >  drivers/net/mlx5/mlx5_flow.c    | 112
> > +++++++++++++++++++++++++++++++++-------
> > >  drivers/net/mlx5/mlx5_trigger.c |   8 ---
> > >  4 files changed, 115 insertions(+), 28 deletions(-)
> > >
> > > diff --git a/drivers/net/mlx5/mlx5.c b/drivers/net/mlx5/mlx5.c index
> > > cfab55897..38118e524 100644
> > > --- a/drivers/net/mlx5/mlx5.c
> > > +++ b/drivers/net/mlx5/mlx5.c
> > > @@ -197,6 +197,7 @@ mlx5_dev_close(struct rte_eth_dev *dev)
> > >  		priv->txqs_n = 0;
> > >  		priv->txqs = NULL;
> > >  	}
> > > +	mlx5_flow_delete_drop_queue(dev);
> > >  	if (priv->pd != NULL) {
> > >  		assert(priv->ctx != NULL);
> > >  		claim_zero(mlx5_glue->dealloc_pd(priv->pd));
> > > @@ -612,6 +613,7 @@ mlx5_pci_probe(struct rte_pci_driver *pci_drv
> > __rte_unused,
> > >  	unsigned int mps;
> > >  	unsigned int cqe_comp;
> > >  	unsigned int tunnel_en = 0;
> > > +	unsigned int verb_priorities = 0;
> > >  	int idx;
> > >  	int i;
> > >  	struct mlx5dv_context attrs_out = {0}; @@ -993,6 +995,22 @@
> > > mlx5_pci_probe(struct rte_pci_driver *pci_drv __rte_unused,
> > >  		mlx5_set_link_up(eth_dev);
> > >  		/* Store device configuration on private structure. */
> > >  		priv->config = config;
> > > +		/* Create drop queue. */
> > > +		err = mlx5_flow_create_drop_queue(eth_dev);
> > > +		if (err) {
> > > +			DRV_LOG(ERR, "port %u drop queue allocation failed: %s",
> > > +				eth_dev->data->port_id, strerror(rte_errno));
> > > +			goto port_error;
> > > +		}
> > > +		/* Supported Verbs flow priority number detection. */
> > > +		if (verb_priorities == 0)
> > > +			verb_priorities = priv_get_max_verbs_prio(eth_dev);
> > 
> > No more priv*() rename it to mlx5_get_max_verbs_prio()
> > 
> > > +		if (verb_priorities < MLX5_VERBS_FLOW_PRIO_8) {
> > > +			DRV_LOG(ERR, "port %u wrong Verbs flow priorities: %u",
> > > +				eth_dev->data->port_id, verb_priorities);
> > > +			goto port_error;
> > > +		}
> > > +		priv->config.max_verb_prio = verb_priorities;
> > 
> > s/verb/verbs/
> > 
> > >  		continue;
> > >  port_error:
> > >  		if (priv)
> > > diff --git a/drivers/net/mlx5/mlx5.h b/drivers/net/mlx5/mlx5.h index
> > > 63b24e6bb..6e4613fe0 100644
> > > --- a/drivers/net/mlx5/mlx5.h
> > > +++ b/drivers/net/mlx5/mlx5.h
> > > @@ -89,6 +89,7 @@ struct mlx5_dev_config {
> > >  	unsigned int rx_vec_en:1; /* Rx vector is enabled. */
> > >  	unsigned int mpw_hdr_dseg:1; /* Enable DSEGs in the title WQEBB. */
> > >  	unsigned int vf_nl_en:1; /* Enable Netlink requests in VF mode. */
> > > +	unsigned int max_verb_prio; /* Number of Verb flow priorities. */
> > >  	unsigned int tso_max_payload_sz; /* Maximum TCP payload for TSO. */
> > >  	unsigned int ind_table_max_size; /* Maximum indirection table size.
> > */
> > >  	int txq_inline; /* Maximum packet size for inlining. */ @@ -105,6
> > > +106,9 @@ enum mlx5_verbs_alloc_type {
> > >  	MLX5_VERBS_ALLOC_TYPE_RX_QUEUE,
> > >  };
> > >
> > > +/* 8 Verbs priorities. */
> > > +#define MLX5_VERBS_FLOW_PRIO_8 8
> > > +
> > >  /**
> > >   * Verbs allocator needs a context to know in the callback which kind
> > of
> > >   * resources it is allocating.
> > > @@ -253,6 +257,7 @@ int mlx5_traffic_restart(struct rte_eth_dev *dev);
> > >
> > >  /* mlx5_flow.c */
> > >
> > > +unsigned int priv_get_max_verbs_prio(struct rte_eth_dev *dev);
> > >  int mlx5_flow_validate(struct rte_eth_dev *dev,
> > >  		       const struct rte_flow_attr *attr,
> > >  		       const struct rte_flow_item items[], diff --git
> > > a/drivers/net/mlx5/mlx5_flow.c b/drivers/net/mlx5/mlx5_flow.c index
> > > 288610620..5c4f0b586 100644
> > > --- a/drivers/net/mlx5/mlx5_flow.c
> > > +++ b/drivers/net/mlx5/mlx5_flow.c
> > > @@ -32,8 +32,8 @@
> > >  #include "mlx5_prm.h"
> > >  #include "mlx5_glue.h"
> > >
> > > -/* Define minimal priority for control plane flows. */ -#define
> > > MLX5_CTRL_FLOW_PRIORITY 4
> > > +/* Flow priority for control plane flows. */ #define
> > > +MLX5_CTRL_FLOW_PRIORITY 1
> > >
> > >  /* Internet Protocol versions. */
> > >  #define MLX5_IPV4 4
> > > @@ -129,7 +129,7 @@ const struct hash_rxq_init hash_rxq_init[] = {
> > >  				IBV_RX_HASH_SRC_PORT_TCP |
> > >  				IBV_RX_HASH_DST_PORT_TCP),
> > >  		.dpdk_rss_hf = ETH_RSS_NONFRAG_IPV4_TCP,
> > > -		.flow_priority = 1,
> > > +		.flow_priority = 0,
> > >  		.ip_version = MLX5_IPV4,
> > >  	},
> > >  	[HASH_RXQ_UDPV4] = {
> > > @@ -138,7 +138,7 @@ const struct hash_rxq_init hash_rxq_init[] = {
> > >  				IBV_RX_HASH_SRC_PORT_UDP |
> > >  				IBV_RX_HASH_DST_PORT_UDP),
> > >  		.dpdk_rss_hf = ETH_RSS_NONFRAG_IPV4_UDP,
> > > -		.flow_priority = 1,
> > > +		.flow_priority = 0,
> > >  		.ip_version = MLX5_IPV4,
> > >  	},
> > >  	[HASH_RXQ_IPV4] = {
> > > @@ -146,7 +146,7 @@ const struct hash_rxq_init hash_rxq_init[] = {
> > >  				IBV_RX_HASH_DST_IPV4),
> > >  		.dpdk_rss_hf = (ETH_RSS_IPV4 |
> > >  				ETH_RSS_FRAG_IPV4),
> > > -		.flow_priority = 2,
> > > +		.flow_priority = 1,
> > >  		.ip_version = MLX5_IPV4,
> > >  	},
> > >  	[HASH_RXQ_TCPV6] = {
> > > @@ -155,7 +155,7 @@ const struct hash_rxq_init hash_rxq_init[] = {
> > >  				IBV_RX_HASH_SRC_PORT_TCP |
> > >  				IBV_RX_HASH_DST_PORT_TCP),
> > >  		.dpdk_rss_hf = ETH_RSS_NONFRAG_IPV6_TCP,
> > > -		.flow_priority = 1,
> > > +		.flow_priority = 0,
> > >  		.ip_version = MLX5_IPV6,
> > >  	},
> > >  	[HASH_RXQ_UDPV6] = {
> > > @@ -164,7 +164,7 @@ const struct hash_rxq_init hash_rxq_init[] = {
> > >  				IBV_RX_HASH_SRC_PORT_UDP |
> > >  				IBV_RX_HASH_DST_PORT_UDP),
> > >  		.dpdk_rss_hf = ETH_RSS_NONFRAG_IPV6_UDP,
> > > -		.flow_priority = 1,
> > > +		.flow_priority = 0,
> > >  		.ip_version = MLX5_IPV6,
> > >  	},
> > >  	[HASH_RXQ_IPV6] = {
> > > @@ -172,13 +172,13 @@ const struct hash_rxq_init hash_rxq_init[] = {
> > >  				IBV_RX_HASH_DST_IPV6),
> > >  		.dpdk_rss_hf = (ETH_RSS_IPV6 |
> > >  				ETH_RSS_FRAG_IPV6),
> > > -		.flow_priority = 2,
> > > +		.flow_priority = 1,
> > >  		.ip_version = MLX5_IPV6,
> > >  	},
> > >  	[HASH_RXQ_ETH] = {
> > >  		.hash_fields = 0,
> > >  		.dpdk_rss_hf = 0,
> > > -		.flow_priority = 3,
> > > +		.flow_priority = 2,
> > >  	},
> > >  };
> > >
> > > @@ -900,30 +900,50 @@ mlx5_flow_convert_allocate(unsigned int size,
> > struct rte_flow_error *error)
> > >   * Make inner packet matching with an higher priority from the non
> > Inner
> > >   * matching.
> > >   *
> > > + * @param dev
> > > + *   Pointer to Ethernet device.
> > >   * @param[in, out] parser
> > >   *   Internal parser structure.
> > >   * @param attr
> > >   *   User flow attribute.
> > >   */
> > >  static void
> > > -mlx5_flow_update_priority(struct mlx5_flow_parse *parser,
> > > +mlx5_flow_update_priority(struct rte_eth_dev *dev,
> > > +			  struct mlx5_flow_parse *parser,
> > >  			  const struct rte_flow_attr *attr)  {
> > > +	struct priv *priv = dev->data->dev_private;
> > >  	unsigned int i;
> > > +	uint16_t priority;
> > >
> > > +	/*			8 priorities	>= 16 priorities
> > > +	 * Control flow:	4-7		8-15
> > > +	 * User normal flow:	1-3		4-7
> > > +	 * User tunnel flow:	0-2		0-3
> > > +	 */
> > 
> > Same comment here, the tunnel flow overlap when there are only 8
> > priorities.
> > 
> > > +	priority = attr->priority * MLX5_VERBS_FLOW_PRIO_8;
> > > +	if (priv->config.max_verb_prio == MLX5_VERBS_FLOW_PRIO_8)
> > > +		priority /= 2;
> > > +	/*
> > > +	 * Lower non-tunnel flow Verbs priority 1 if only support 8 Verbs
> > > +	 * priorities, lower 4 otherwise.
> > > +	 */
> > > +	if (!parser->inner) {
> > > +		if (priv->config.max_verb_prio == MLX5_VERBS_FLOW_PRIO_8)
> > > +			priority += 1;
> > > +		else
> > > +			priority += MLX5_VERBS_FLOW_PRIO_8 / 2;
> > > +	}
> > >  	if (parser->drop) {
> > > -		parser->queue[HASH_RXQ_ETH].ibv_attr->priority =
> > > -			attr->priority +
> > > -			hash_rxq_init[HASH_RXQ_ETH].flow_priority;
> > > +		parser->queue[HASH_RXQ_ETH].ibv_attr->priority = priority +
> > > +				hash_rxq_init[HASH_RXQ_ETH].flow_priority;
> > >  		return;
> > >  	}
> > >  	for (i = 0; i != hash_rxq_init_n; ++i) {
> > > -		if (parser->queue[i].ibv_attr) {
> > > -			parser->queue[i].ibv_attr->priority =
> > > -				attr->priority +
> > > -				hash_rxq_init[i].flow_priority -
> > > -				(parser->inner ? 1 : 0);
> > > -		}
> > > +		if (!parser->queue[i].ibv_attr)
> > > +			continue;
> > > +		parser->queue[i].ibv_attr->priority = priority +
> > > +				hash_rxq_init[i].flow_priority;
> > >  	}
> > >  }
> > >
> > > @@ -1158,7 +1178,7 @@ mlx5_flow_convert(struct rte_eth_dev *dev,
> > >  	 */
> > >  	if (!parser->drop)
> > >  		mlx5_flow_convert_finalise(parser);
> > > -	mlx5_flow_update_priority(parser, attr);
> > > +	mlx5_flow_update_priority(dev, parser, attr);
> > >  exit_free:
> > >  	/* Only verification is expected, all resources should be released.
> > */
> > >  	if (!parser->create) {
> > > @@ -3161,3 +3181,55 @@ mlx5_dev_filter_ctrl(struct rte_eth_dev *dev,
> > >  	}
> > >  	return 0;
> > >  }
> > > +
> > > +/**
> > > + * Detect number of Verbs flow priorities supported.
> > > + *
> > > + * @param dev
> > > + *   Pointer to Ethernet device.
> > > + *
> > > + * @return
> > > + *   number of supported Verbs flow priority.
> > > + */
> > > +unsigned int
> > > +priv_get_max_verbs_prio(struct rte_eth_dev *dev) {
> > > +	struct priv *priv = dev->data->dev_private;
> > > +	unsigned int verb_priorities = MLX5_VERBS_FLOW_PRIO_8;
> > > +	struct {
> > > +		struct ibv_flow_attr attr;
> > > +		struct ibv_flow_spec_eth eth;
> > > +		struct ibv_flow_spec_action_drop drop;
> > > +	} flow_attr = {
> > > +		.attr = {
> > > +			.num_of_specs = 2,
> > > +		},
> > > +		.eth = {
> > > +			.type = IBV_FLOW_SPEC_ETH,
> > > +			.size = sizeof(struct ibv_flow_spec_eth),
> > > +		},
> > > +		.drop = {
> > > +			.size = sizeof(struct ibv_flow_spec_action_drop),
> > > +			.type = IBV_FLOW_SPEC_ACTION_DROP,
> > > +		},
> > > +	};
> > > +	struct ibv_flow *flow;
> > > +
> > > +	do {
> > > +		flow_attr.attr.priority = verb_priorities - 1;
> > > +		flow = mlx5_glue->create_flow(priv->flow_drop_queue->qp,
> > > +					      &flow_attr.attr);
> > > +		if (flow) {
> > > +			claim_zero(mlx5_glue->destroy_flow(flow));
> > > +			/* Try more priorities. */
> > > +			verb_priorities *= 2;
> > > +		} else {
> > > +			/* Failed, restore last right number. */
> > > +			verb_priorities /= 2;
> > > +			break;
> > > +		}
> > > +	} while (1);
> > > +	DRV_LOG(INFO, "port %u Verbs flow priorities: %d",
> > > +		dev->data->port_id, verb_priorities);
> > 
> > Please remove this developer log, it will confuse the user who will
> > believe he have N priorities which is absolutely not the case.
> 
> How about change it to DEBUG level, this should be very useful in real deployment
> trouble shooting.

I agree in using the DEBUG() instead.

> I could append something like "user flow priorities: %d" to avoid confusion..
> 
> > 
> > > +	return verb_priorities;
> > > +}
> > > diff --git a/drivers/net/mlx5/mlx5_trigger.c
> > > b/drivers/net/mlx5/mlx5_trigger.c index 6bb4ffb14..d80a2e688 100644
> > > --- a/drivers/net/mlx5/mlx5_trigger.c
> > > +++ b/drivers/net/mlx5/mlx5_trigger.c
> > > @@ -148,12 +148,6 @@ mlx5_dev_start(struct rte_eth_dev *dev)
> > >  	int ret;
> > >
> > >  	dev->data->dev_started = 1;
> > > -	ret = mlx5_flow_create_drop_queue(dev);
> > > -	if (ret) {
> > > -		DRV_LOG(ERR, "port %u drop queue allocation failed: %s",
> > > -			dev->data->port_id, strerror(rte_errno));
> > > -		goto error;
> > > -	}
> > >  	DRV_LOG(DEBUG, "port %u allocating and configuring hash Rx queues",
> > >  		dev->data->port_id);
> > >  	rte_mempool_walk(mlx5_mp2mr_iter, priv); @@ -202,7 +196,6 @@
> > > mlx5_dev_start(struct rte_eth_dev *dev)
> > >  	mlx5_traffic_disable(dev);
> > >  	mlx5_txq_stop(dev);
> > >  	mlx5_rxq_stop(dev);
> > > -	mlx5_flow_delete_drop_queue(dev);
> > >  	rte_errno = ret; /* Restore rte_errno. */
> > >  	return -rte_errno;
> > >  }
> > > @@ -237,7 +230,6 @@ mlx5_dev_stop(struct rte_eth_dev *dev)
> > >  	mlx5_rxq_stop(dev);
> > >  	for (mr = LIST_FIRST(&priv->mr); mr; mr = LIST_FIRST(&priv->mr))
> > >  		mlx5_mr_release(mr);
> > > -	mlx5_flow_delete_drop_queue(dev);
> > >  }
> > >
> > >  /**
> > > --
> > > 2.13.3

Thanks,
  

Patch

diff --git a/drivers/net/mlx5/mlx5.c b/drivers/net/mlx5/mlx5.c
index cfab55897..38118e524 100644
--- a/drivers/net/mlx5/mlx5.c
+++ b/drivers/net/mlx5/mlx5.c
@@ -197,6 +197,7 @@  mlx5_dev_close(struct rte_eth_dev *dev)
 		priv->txqs_n = 0;
 		priv->txqs = NULL;
 	}
+	mlx5_flow_delete_drop_queue(dev);
 	if (priv->pd != NULL) {
 		assert(priv->ctx != NULL);
 		claim_zero(mlx5_glue->dealloc_pd(priv->pd));
@@ -612,6 +613,7 @@  mlx5_pci_probe(struct rte_pci_driver *pci_drv __rte_unused,
 	unsigned int mps;
 	unsigned int cqe_comp;
 	unsigned int tunnel_en = 0;
+	unsigned int verb_priorities = 0;
 	int idx;
 	int i;
 	struct mlx5dv_context attrs_out = {0};
@@ -993,6 +995,22 @@  mlx5_pci_probe(struct rte_pci_driver *pci_drv __rte_unused,
 		mlx5_set_link_up(eth_dev);
 		/* Store device configuration on private structure. */
 		priv->config = config;
+		/* Create drop queue. */
+		err = mlx5_flow_create_drop_queue(eth_dev);
+		if (err) {
+			DRV_LOG(ERR, "port %u drop queue allocation failed: %s",
+				eth_dev->data->port_id, strerror(rte_errno));
+			goto port_error;
+		}
+		/* Supported Verbs flow priority number detection. */
+		if (verb_priorities == 0)
+			verb_priorities = priv_get_max_verbs_prio(eth_dev);
+		if (verb_priorities < MLX5_VERBS_FLOW_PRIO_8) {
+			DRV_LOG(ERR, "port %u wrong Verbs flow priorities: %u",
+				eth_dev->data->port_id, verb_priorities);
+			goto port_error;
+		}
+		priv->config.max_verb_prio = verb_priorities;
 		continue;
 port_error:
 		if (priv)
diff --git a/drivers/net/mlx5/mlx5.h b/drivers/net/mlx5/mlx5.h
index 63b24e6bb..6e4613fe0 100644
--- a/drivers/net/mlx5/mlx5.h
+++ b/drivers/net/mlx5/mlx5.h
@@ -89,6 +89,7 @@  struct mlx5_dev_config {
 	unsigned int rx_vec_en:1; /* Rx vector is enabled. */
 	unsigned int mpw_hdr_dseg:1; /* Enable DSEGs in the title WQEBB. */
 	unsigned int vf_nl_en:1; /* Enable Netlink requests in VF mode. */
+	unsigned int max_verb_prio; /* Number of Verb flow priorities. */
 	unsigned int tso_max_payload_sz; /* Maximum TCP payload for TSO. */
 	unsigned int ind_table_max_size; /* Maximum indirection table size. */
 	int txq_inline; /* Maximum packet size for inlining. */
@@ -105,6 +106,9 @@  enum mlx5_verbs_alloc_type {
 	MLX5_VERBS_ALLOC_TYPE_RX_QUEUE,
 };
 
+/* 8 Verbs priorities. */
+#define MLX5_VERBS_FLOW_PRIO_8 8
+
 /**
  * Verbs allocator needs a context to know in the callback which kind of
  * resources it is allocating.
@@ -253,6 +257,7 @@  int mlx5_traffic_restart(struct rte_eth_dev *dev);
 
 /* mlx5_flow.c */
 
+unsigned int priv_get_max_verbs_prio(struct rte_eth_dev *dev);
 int mlx5_flow_validate(struct rte_eth_dev *dev,
 		       const struct rte_flow_attr *attr,
 		       const struct rte_flow_item items[],
diff --git a/drivers/net/mlx5/mlx5_flow.c b/drivers/net/mlx5/mlx5_flow.c
index 288610620..5c4f0b586 100644
--- a/drivers/net/mlx5/mlx5_flow.c
+++ b/drivers/net/mlx5/mlx5_flow.c
@@ -32,8 +32,8 @@ 
 #include "mlx5_prm.h"
 #include "mlx5_glue.h"
 
-/* Define minimal priority for control plane flows. */
-#define MLX5_CTRL_FLOW_PRIORITY 4
+/* Flow priority for control plane flows. */
+#define MLX5_CTRL_FLOW_PRIORITY 1
 
 /* Internet Protocol versions. */
 #define MLX5_IPV4 4
@@ -129,7 +129,7 @@  const struct hash_rxq_init hash_rxq_init[] = {
 				IBV_RX_HASH_SRC_PORT_TCP |
 				IBV_RX_HASH_DST_PORT_TCP),
 		.dpdk_rss_hf = ETH_RSS_NONFRAG_IPV4_TCP,
-		.flow_priority = 1,
+		.flow_priority = 0,
 		.ip_version = MLX5_IPV4,
 	},
 	[HASH_RXQ_UDPV4] = {
@@ -138,7 +138,7 @@  const struct hash_rxq_init hash_rxq_init[] = {
 				IBV_RX_HASH_SRC_PORT_UDP |
 				IBV_RX_HASH_DST_PORT_UDP),
 		.dpdk_rss_hf = ETH_RSS_NONFRAG_IPV4_UDP,
-		.flow_priority = 1,
+		.flow_priority = 0,
 		.ip_version = MLX5_IPV4,
 	},
 	[HASH_RXQ_IPV4] = {
@@ -146,7 +146,7 @@  const struct hash_rxq_init hash_rxq_init[] = {
 				IBV_RX_HASH_DST_IPV4),
 		.dpdk_rss_hf = (ETH_RSS_IPV4 |
 				ETH_RSS_FRAG_IPV4),
-		.flow_priority = 2,
+		.flow_priority = 1,
 		.ip_version = MLX5_IPV4,
 	},
 	[HASH_RXQ_TCPV6] = {
@@ -155,7 +155,7 @@  const struct hash_rxq_init hash_rxq_init[] = {
 				IBV_RX_HASH_SRC_PORT_TCP |
 				IBV_RX_HASH_DST_PORT_TCP),
 		.dpdk_rss_hf = ETH_RSS_NONFRAG_IPV6_TCP,
-		.flow_priority = 1,
+		.flow_priority = 0,
 		.ip_version = MLX5_IPV6,
 	},
 	[HASH_RXQ_UDPV6] = {
@@ -164,7 +164,7 @@  const struct hash_rxq_init hash_rxq_init[] = {
 				IBV_RX_HASH_SRC_PORT_UDP |
 				IBV_RX_HASH_DST_PORT_UDP),
 		.dpdk_rss_hf = ETH_RSS_NONFRAG_IPV6_UDP,
-		.flow_priority = 1,
+		.flow_priority = 0,
 		.ip_version = MLX5_IPV6,
 	},
 	[HASH_RXQ_IPV6] = {
@@ -172,13 +172,13 @@  const struct hash_rxq_init hash_rxq_init[] = {
 				IBV_RX_HASH_DST_IPV6),
 		.dpdk_rss_hf = (ETH_RSS_IPV6 |
 				ETH_RSS_FRAG_IPV6),
-		.flow_priority = 2,
+		.flow_priority = 1,
 		.ip_version = MLX5_IPV6,
 	},
 	[HASH_RXQ_ETH] = {
 		.hash_fields = 0,
 		.dpdk_rss_hf = 0,
-		.flow_priority = 3,
+		.flow_priority = 2,
 	},
 };
 
@@ -900,30 +900,50 @@  mlx5_flow_convert_allocate(unsigned int size, struct rte_flow_error *error)
  * Make inner packet matching with an higher priority from the non Inner
  * matching.
  *
+ * @param dev
+ *   Pointer to Ethernet device.
  * @param[in, out] parser
  *   Internal parser structure.
  * @param attr
  *   User flow attribute.
  */
 static void
-mlx5_flow_update_priority(struct mlx5_flow_parse *parser,
+mlx5_flow_update_priority(struct rte_eth_dev *dev,
+			  struct mlx5_flow_parse *parser,
 			  const struct rte_flow_attr *attr)
 {
+	struct priv *priv = dev->data->dev_private;
 	unsigned int i;
+	uint16_t priority;
 
+	/*			8 priorities	>= 16 priorities
+	 * Control flow:	4-7		8-15
+	 * User normal flow:	1-3		4-7
+	 * User tunnel flow:	0-2		0-3
+	 */
+	priority = attr->priority * MLX5_VERBS_FLOW_PRIO_8;
+	if (priv->config.max_verb_prio == MLX5_VERBS_FLOW_PRIO_8)
+		priority /= 2;
+	/*
+	 * Lower non-tunnel flow Verbs priority 1 if only support 8 Verbs
+	 * priorities, lower 4 otherwise.
+	 */
+	if (!parser->inner) {
+		if (priv->config.max_verb_prio == MLX5_VERBS_FLOW_PRIO_8)
+			priority += 1;
+		else
+			priority += MLX5_VERBS_FLOW_PRIO_8 / 2;
+	}
 	if (parser->drop) {
-		parser->queue[HASH_RXQ_ETH].ibv_attr->priority =
-			attr->priority +
-			hash_rxq_init[HASH_RXQ_ETH].flow_priority;
+		parser->queue[HASH_RXQ_ETH].ibv_attr->priority = priority +
+				hash_rxq_init[HASH_RXQ_ETH].flow_priority;
 		return;
 	}
 	for (i = 0; i != hash_rxq_init_n; ++i) {
-		if (parser->queue[i].ibv_attr) {
-			parser->queue[i].ibv_attr->priority =
-				attr->priority +
-				hash_rxq_init[i].flow_priority -
-				(parser->inner ? 1 : 0);
-		}
+		if (!parser->queue[i].ibv_attr)
+			continue;
+		parser->queue[i].ibv_attr->priority = priority +
+				hash_rxq_init[i].flow_priority;
 	}
 }
 
@@ -1158,7 +1178,7 @@  mlx5_flow_convert(struct rte_eth_dev *dev,
 	 */
 	if (!parser->drop)
 		mlx5_flow_convert_finalise(parser);
-	mlx5_flow_update_priority(parser, attr);
+	mlx5_flow_update_priority(dev, parser, attr);
 exit_free:
 	/* Only verification is expected, all resources should be released. */
 	if (!parser->create) {
@@ -3161,3 +3181,55 @@  mlx5_dev_filter_ctrl(struct rte_eth_dev *dev,
 	}
 	return 0;
 }
+
+/**
+ * Detect number of Verbs flow priorities supported.
+ *
+ * @param dev
+ *   Pointer to Ethernet device.
+ *
+ * @return
+ *   number of supported Verbs flow priority.
+ */
+unsigned int
+priv_get_max_verbs_prio(struct rte_eth_dev *dev)
+{
+	struct priv *priv = dev->data->dev_private;
+	unsigned int verb_priorities = MLX5_VERBS_FLOW_PRIO_8;
+	struct {
+		struct ibv_flow_attr attr;
+		struct ibv_flow_spec_eth eth;
+		struct ibv_flow_spec_action_drop drop;
+	} flow_attr = {
+		.attr = {
+			.num_of_specs = 2,
+		},
+		.eth = {
+			.type = IBV_FLOW_SPEC_ETH,
+			.size = sizeof(struct ibv_flow_spec_eth),
+		},
+		.drop = {
+			.size = sizeof(struct ibv_flow_spec_action_drop),
+			.type = IBV_FLOW_SPEC_ACTION_DROP,
+		},
+	};
+	struct ibv_flow *flow;
+
+	do {
+		flow_attr.attr.priority = verb_priorities - 1;
+		flow = mlx5_glue->create_flow(priv->flow_drop_queue->qp,
+					      &flow_attr.attr);
+		if (flow) {
+			claim_zero(mlx5_glue->destroy_flow(flow));
+			/* Try more priorities. */
+			verb_priorities *= 2;
+		} else {
+			/* Failed, restore last right number. */
+			verb_priorities /= 2;
+			break;
+		}
+	} while (1);
+	DRV_LOG(INFO, "port %u Verbs flow priorities: %d",
+		dev->data->port_id, verb_priorities);
+	return verb_priorities;
+}
diff --git a/drivers/net/mlx5/mlx5_trigger.c b/drivers/net/mlx5/mlx5_trigger.c
index 6bb4ffb14..d80a2e688 100644
--- a/drivers/net/mlx5/mlx5_trigger.c
+++ b/drivers/net/mlx5/mlx5_trigger.c
@@ -148,12 +148,6 @@  mlx5_dev_start(struct rte_eth_dev *dev)
 	int ret;
 
 	dev->data->dev_started = 1;
-	ret = mlx5_flow_create_drop_queue(dev);
-	if (ret) {
-		DRV_LOG(ERR, "port %u drop queue allocation failed: %s",
-			dev->data->port_id, strerror(rte_errno));
-		goto error;
-	}
 	DRV_LOG(DEBUG, "port %u allocating and configuring hash Rx queues",
 		dev->data->port_id);
 	rte_mempool_walk(mlx5_mp2mr_iter, priv);
@@ -202,7 +196,6 @@  mlx5_dev_start(struct rte_eth_dev *dev)
 	mlx5_traffic_disable(dev);
 	mlx5_txq_stop(dev);
 	mlx5_rxq_stop(dev);
-	mlx5_flow_delete_drop_queue(dev);
 	rte_errno = ret; /* Restore rte_errno. */
 	return -rte_errno;
 }
@@ -237,7 +230,6 @@  mlx5_dev_stop(struct rte_eth_dev *dev)
 	mlx5_rxq_stop(dev);
 	for (mr = LIST_FIRST(&priv->mr); mr; mr = LIST_FIRST(&priv->mr))
 		mlx5_mr_release(mr);
-	mlx5_flow_delete_drop_queue(dev);
 }
 
 /**