[dpdk-dev,v2,04/15] net/mlx5: support Rx tunnel type identification

Message ID 20180410133415.189905-5-xuemingl@mellanox.com (mailing list archive)
State Superseded, archived
Delegated to: Shahaf Shuler
Headers

Checks

Context Check Description
ci/checkpatch success coding style OK
ci/Intel-compilation fail apply patch file failure

Commit Message

Xueming Li April 10, 2018, 1:34 p.m. UTC
  This patch introduced tunnel type identification based on flow rules.
If flows of multiple tunnel types built on same queue,
RTE_PTYPE_TUNNEL_MASK will be returned, bits in flow mark could be used
as tunnel type identifier.

Signed-off-by: Xueming Li <xuemingl@mellanox.com>
---
 drivers/net/mlx5/mlx5_flow.c          | 125 +++++++++++++++++++++++++++++-----
 drivers/net/mlx5/mlx5_rxq.c           |  11 ++-
 drivers/net/mlx5/mlx5_rxtx.c          |  12 ++--
 drivers/net/mlx5/mlx5_rxtx.h          |   9 ++-
 drivers/net/mlx5/mlx5_rxtx_vec_neon.h |  21 +++---
 drivers/net/mlx5/mlx5_rxtx_vec_sse.h  |  17 +++--
 6 files changed, 157 insertions(+), 38 deletions(-)
  

Comments

Nélio Laranjeiro April 10, 2018, 3:17 p.m. UTC | #1
On Tue, Apr 10, 2018 at 09:34:04PM +0800, Xueming Li wrote:
> This patch introduced tunnel type identification based on flow rules.
> If flows of multiple tunnel types built on same queue,
> RTE_PTYPE_TUNNEL_MASK will be returned, bits in flow mark could be used
> as tunnel type identifier.

I don't see anywhere in this patch where the bits are reserved to
identify a flow, nor values which can help to identify it.

Is this missing?

Anyway we have already very few bits in the mark making it difficult to
be used by the user, reserving again some to may lead to remove the mark
support from the flows.

> Signed-off-by: Xueming Li <xuemingl@mellanox.com>
> ---
>  drivers/net/mlx5/mlx5_flow.c          | 125 +++++++++++++++++++++++++++++-----
>  drivers/net/mlx5/mlx5_rxq.c           |  11 ++-
>  drivers/net/mlx5/mlx5_rxtx.c          |  12 ++--
>  drivers/net/mlx5/mlx5_rxtx.h          |   9 ++-
>  drivers/net/mlx5/mlx5_rxtx_vec_neon.h |  21 +++---
>  drivers/net/mlx5/mlx5_rxtx_vec_sse.h  |  17 +++--
>  6 files changed, 157 insertions(+), 38 deletions(-)
> 
> diff --git a/drivers/net/mlx5/mlx5_flow.c b/drivers/net/mlx5/mlx5_flow.c
> index 870d05250..65d7a9b62 100644
> --- a/drivers/net/mlx5/mlx5_flow.c
> +++ b/drivers/net/mlx5/mlx5_flow.c
> @@ -222,6 +222,7 @@ struct rte_flow {
>  	struct rte_flow_action_rss rss_conf; /**< RSS configuration */
>  	uint16_t (*queues)[]; /**< Queues indexes to use. */
>  	uint8_t rss_key[40]; /**< copy of the RSS key. */
> +	uint32_t tunnel; /**< Tunnel type of RTE_PTYPE_TUNNEL_XXX. */
>  	struct ibv_counter_set *cs; /**< Holds the counters for the rule. */
>  	struct mlx5_flow_counter_stats counter_stats;/**<The counter stats. */
>  	struct mlx5_flow frxq[RTE_DIM(hash_rxq_init)];
> @@ -238,6 +239,19 @@ struct rte_flow {
>  	(type) == RTE_FLOW_ITEM_TYPE_VXLAN || \
>  	(type) == RTE_FLOW_ITEM_TYPE_GRE)
>  
> +const uint32_t flow_ptype[] = {
> +	[RTE_FLOW_ITEM_TYPE_VXLAN] = RTE_PTYPE_TUNNEL_VXLAN,
> +	[RTE_FLOW_ITEM_TYPE_GRE] = RTE_PTYPE_TUNNEL_GRE,
> +};
> +
> +#define PTYPE_IDX(t) ((RTE_PTYPE_TUNNEL_MASK & (t)) >> 12)
> +
> +const uint32_t ptype_ext[] = {
> +	[PTYPE_IDX(RTE_PTYPE_TUNNEL_VXLAN)] = RTE_PTYPE_TUNNEL_VXLAN |
> +					      RTE_PTYPE_L4_UDP,
> +	[PTYPE_IDX(RTE_PTYPE_TUNNEL_GRE)] = RTE_PTYPE_TUNNEL_GRE,
> +};
> +
>  /** Structure to generate a simple graph of layers supported by the NIC. */
>  struct mlx5_flow_items {
>  	/** List of possible actions for these items. */
> @@ -437,6 +451,7 @@ struct mlx5_flow_parse {
>  	uint16_t queues[RTE_MAX_QUEUES_PER_PORT]; /**< Queues indexes to use. */
>  	uint8_t rss_key[40]; /**< copy of the RSS key. */
>  	enum hash_rxq_type layer; /**< Last pattern layer detected. */
> +	uint32_t tunnel; /**< Tunnel type of RTE_PTYPE_TUNNEL_XXX. */
>  	struct ibv_counter_set *cs; /**< Holds the counter set for the rule */
>  	struct {
>  		struct ibv_flow_attr *ibv_attr;
> @@ -860,7 +875,7 @@ mlx5_flow_convert_items_validate(const struct rte_flow_item items[],
>  		if (ret)
>  			goto exit_item_not_supported;
>  		if (IS_TUNNEL(items->type)) {
> -			if (parser->inner) {
> +			if (parser->tunnel) {
>  				rte_flow_error_set(error, ENOTSUP,
>  						   RTE_FLOW_ERROR_TYPE_ITEM,
>  						   items,
> @@ -869,6 +884,7 @@ mlx5_flow_convert_items_validate(const struct rte_flow_item items[],
>  				return -rte_errno;
>  			}
>  			parser->inner = IBV_FLOW_SPEC_INNER;
> +			parser->tunnel = flow_ptype[items->type];
>  		}
>  		if (parser->drop) {
>  			parser->queue[HASH_RXQ_ETH].offset += cur_item->dst_sz;
> @@ -1165,6 +1181,7 @@ mlx5_flow_convert(struct rte_eth_dev *dev,
>  	}
>  	/* Third step. Conversion parse, fill the specifications. */
>  	parser->inner = 0;
> +	parser->tunnel = 0;
>  	for (; items->type != RTE_FLOW_ITEM_TYPE_END; ++items) {
>  		struct mlx5_flow_data data = {
>  			.parser = parser,
> @@ -1633,6 +1650,7 @@ mlx5_flow_create_vxlan(const struct rte_flow_item *item,
>  
>  	id.vni[0] = 0;
>  	parser->inner = IBV_FLOW_SPEC_INNER;
> +	parser->tunnel = ptype_ext[PTYPE_IDX(RTE_PTYPE_TUNNEL_VXLAN)];
>  	if (spec) {
>  		if (!mask)
>  			mask = default_mask;
> @@ -1686,6 +1704,7 @@ mlx5_flow_create_gre(const struct rte_flow_item *item __rte_unused,
>  	};
>  
>  	parser->inner = IBV_FLOW_SPEC_INNER;
> +	parser->tunnel = ptype_ext[PTYPE_IDX(RTE_PTYPE_TUNNEL_GRE)];
>  	mlx5_flow_create_copy(parser, &tunnel, size);
>  	return 0;
>  }
> @@ -1864,7 +1883,8 @@ mlx5_flow_create_action_queue_rss(struct rte_eth_dev *dev,
>  				      parser->rss_conf.key_len,
>  				      hash_fields,
>  				      parser->rss_conf.queue,
> -				      parser->rss_conf.queue_num);
> +				      parser->rss_conf.queue_num,
> +				      parser->tunnel);
>  		if (flow->frxq[i].hrxq)
>  			continue;
>  		flow->frxq[i].hrxq =
> @@ -1873,7 +1893,8 @@ mlx5_flow_create_action_queue_rss(struct rte_eth_dev *dev,
>  				      parser->rss_conf.key_len,
>  				      hash_fields,
>  				      parser->rss_conf.queue,
> -				      parser->rss_conf.queue_num);
> +				      parser->rss_conf.queue_num,
> +				      parser->tunnel);
>  		if (!flow->frxq[i].hrxq) {
>  			return rte_flow_error_set(error, ENOMEM,
>  						  RTE_FLOW_ERROR_TYPE_HANDLE,
> @@ -1885,6 +1906,40 @@ mlx5_flow_create_action_queue_rss(struct rte_eth_dev *dev,
>  }
>  
>  /**
> + * RXQ update after flow rule creation.
> + *
> + * @param dev
> + *   Pointer to Ethernet device.
> + * @param flow
> + *   Pointer to the flow rule.
> + */
> +static void
> +mlx5_flow_create_update_rxqs(struct rte_eth_dev *dev, struct rte_flow *flow)
> +{
> +	struct priv *priv = dev->data->dev_private;
> +	unsigned int i;
> +
> +	if (!dev->data->dev_started)
> +		return;
> +	for (i = 0; i != flow->rss_conf.queue_num; ++i) {
> +		struct mlx5_rxq_data *rxq_data = (*priv->rxqs)
> +						 [(*flow->queues)[i]];
> +		struct mlx5_rxq_ctrl *rxq_ctrl =
> +			container_of(rxq_data, struct mlx5_rxq_ctrl, rxq);
> +		uint8_t tunnel = PTYPE_IDX(flow->tunnel);
> +
> +		rxq_data->mark |= flow->mark;
> +		if (!tunnel)
> +			continue;
> +		rxq_ctrl->tunnel_types[tunnel] += 1;

I don't understand why you need such array, the NIC is unable to return
the tunnel type has it returns only one bit saying tunnel.
Why don't it store in the priv structure the current configured tunnel?

> +		if (rxq_data->tunnel != flow->tunnel)
> +			rxq_data->tunnel = rxq_data->tunnel ?
> +					   RTE_PTYPE_TUNNEL_MASK :
> +					   flow->tunnel;
> +	}
> +}
> +
> +/**
>   * Complete flow rule creation.
>   *
>   * @param dev
> @@ -1944,12 +1999,7 @@ mlx5_flow_create_action_queue(struct rte_eth_dev *dev,
>  				   NULL, "internal error in flow creation");
>  		goto error;
>  	}
> -	for (i = 0; i != parser->rss_conf.queue_num; ++i) {
> -		struct mlx5_rxq_data *q =
> -			(*priv->rxqs)[parser->rss_conf.queue[i]];
> -
> -		q->mark |= parser->mark;
> -	}
> +	mlx5_flow_create_update_rxqs(dev, flow);
>  	return 0;
>  error:
>  	ret = rte_errno; /* Save rte_errno before cleanup. */
> @@ -2022,6 +2072,7 @@ mlx5_flow_list_create(struct rte_eth_dev *dev,
>  	}
>  	/* Copy configuration. */
>  	flow->queues = (uint16_t (*)[])(flow + 1);
> +	flow->tunnel = parser.tunnel;
>  	flow->rss_conf = (struct rte_flow_action_rss){
>  		.func = RTE_ETH_HASH_FUNCTION_DEFAULT,
>  		.level = 0,
> @@ -2113,9 +2164,38 @@ mlx5_flow_list_destroy(struct rte_eth_dev *dev, struct mlx5_flows *list,
>  	struct priv *priv = dev->data->dev_private;
>  	unsigned int i;
>  
> -	if (flow->drop || !flow->mark)
> +	if (flow->drop || !dev->data->dev_started)
>  		goto free;
> -	for (i = 0; i != flow->rss_conf.queue_num; ++i) {
> +	for (i = 0; flow->tunnel && i != flow->rss_conf.queue_num; ++i) {
> +		/* Update queue tunnel type. */
> +		struct mlx5_rxq_data *rxq_data = (*priv->rxqs)
> +						 [(*flow->queues)[i]];
> +		struct mlx5_rxq_ctrl *rxq_ctrl =
> +			container_of(rxq_data, struct mlx5_rxq_ctrl, rxq);
> +		uint8_t tunnel = PTYPE_IDX(flow->tunnel);
> +
> +		RTE_ASSERT(rxq_ctrl->tunnel_types[tunnel] > 0);

use assert() not RTE_ASSERT() or make a patch to make such move in the
whole PMD.

> +		rxq_ctrl->tunnel_types[tunnel] -= 1;
> +		if (!rxq_ctrl->tunnel_types[tunnel]) {
> +			/* Update tunnel type. */
> +			uint8_t j;
> +			uint8_t types = 0;
> +			uint8_t last;
> +
> +			for (j = 0; j < RTE_DIM(rxq_ctrl->tunnel_types); j++)
> +				if (rxq_ctrl->tunnel_types[j]) {
> +					types += 1;
> +					last = j;
> +				}
> +			/* Keep same if more than one tunnel types left. */
> +			if (types == 1)
> +				rxq_data->tunnel = ptype_ext[last];
> +			else if (types == 0)
> +				/* No tunnel type left. */
> +				rxq_data->tunnel = 0;
> +		}
> +	}
> +	for (i = 0; flow->mark && i != flow->rss_conf.queue_num; ++i) {
>  		struct rte_flow *tmp;
>  		int mark = 0;
>  
> @@ -2334,9 +2414,9 @@ mlx5_flow_stop(struct rte_eth_dev *dev, struct mlx5_flows *list)
>  {
>  	struct priv *priv = dev->data->dev_private;
>  	struct rte_flow *flow;
> +	unsigned int i;
>  
>  	TAILQ_FOREACH_REVERSE(flow, list, mlx5_flows, next) {
> -		unsigned int i;
>  		struct mlx5_ind_table_ibv *ind_tbl = NULL;
>  
>  		if (flow->drop) {
> @@ -2382,6 +2462,16 @@ mlx5_flow_stop(struct rte_eth_dev *dev, struct mlx5_flows *list)
>  		DRV_LOG(DEBUG, "port %u flow %p removed", dev->data->port_id,
>  			(void *)flow);
>  	}
> +	/* Cleanup Rx queue tunnel info. */
> +	for (i = 0; i != priv->rxqs_n; ++i) {
> +		struct mlx5_rxq_data *q = (*priv->rxqs)[i];
> +		struct mlx5_rxq_ctrl *rxq_ctrl =
> +			container_of(q, struct mlx5_rxq_ctrl, rxq);
> +
> +		memset((void *)rxq_ctrl->tunnel_types, 0,
> +		       sizeof(rxq_ctrl->tunnel_types));
> +		q->tunnel = 0;
> +	}
>  }

This hunk does not handle the fact the Rx queue array may have some
holes i.e. the application is allowed to ask for 10 queues and only
initialise some.  In such situation this code will segfault.

It should only memset the Rx queues making part of the flow not the
others.

>  /**
> @@ -2429,7 +2519,8 @@ mlx5_flow_start(struct rte_eth_dev *dev, struct mlx5_flows *list)
>  					      flow->rss_conf.key_len,
>  					      hash_rxq_init[i].hash_fields,
>  					      flow->rss_conf.queue,
> -					      flow->rss_conf.queue_num);
> +					      flow->rss_conf.queue_num,
> +					      flow->tunnel);
>  			if (flow->frxq[i].hrxq)
>  				goto flow_create;
>  			flow->frxq[i].hrxq =
> @@ -2437,7 +2528,8 @@ mlx5_flow_start(struct rte_eth_dev *dev, struct mlx5_flows *list)
>  					      flow->rss_conf.key_len,
>  					      hash_rxq_init[i].hash_fields,
>  					      flow->rss_conf.queue,
> -					      flow->rss_conf.queue_num);
> +					      flow->rss_conf.queue_num,
> +					      flow->tunnel);
>  			if (!flow->frxq[i].hrxq) {
>  				DRV_LOG(DEBUG,
>  					"port %u flow %p cannot be applied",
> @@ -2459,10 +2551,7 @@ mlx5_flow_start(struct rte_eth_dev *dev, struct mlx5_flows *list)
>  			DRV_LOG(DEBUG, "port %u flow %p applied",
>  				dev->data->port_id, (void *)flow);
>  		}
> -		if (!flow->mark)
> -			continue;
> -		for (i = 0; i != flow->rss_conf.queue_num; ++i)
> -			(*priv->rxqs)[flow->rss_conf.queue[i]]->mark = 1;
> +		mlx5_flow_create_update_rxqs(dev, flow);
>  	}
>  	return 0;
>  }
> diff --git a/drivers/net/mlx5/mlx5_rxq.c b/drivers/net/mlx5/mlx5_rxq.c
> index 1e4354ab3..351acfc0f 100644
> --- a/drivers/net/mlx5/mlx5_rxq.c
> +++ b/drivers/net/mlx5/mlx5_rxq.c
> @@ -1386,6 +1386,8 @@ mlx5_ind_table_ibv_verify(struct rte_eth_dev *dev)
>   *   first queue index will be taken for the indirection table.
>   * @param queues_n
>   *   Number of queues.
> + * @param tunnel
> + *   Tunnel type.
>   *
>   * @return
>   *   The Verbs object initialised, NULL otherwise and rte_errno is set.
> @@ -1394,7 +1396,7 @@ struct mlx5_hrxq *
>  mlx5_hrxq_new(struct rte_eth_dev *dev,
>  	      const uint8_t *rss_key, uint32_t rss_key_len,
>  	      uint64_t hash_fields,
> -	      const uint16_t *queues, uint32_t queues_n)
> +	      const uint16_t *queues, uint32_t queues_n, uint32_t tunnel)
>  {
>  	struct priv *priv = dev->data->dev_private;
>  	struct mlx5_hrxq *hrxq;
> @@ -1438,6 +1440,7 @@ mlx5_hrxq_new(struct rte_eth_dev *dev,
>  	hrxq->qp = qp;
>  	hrxq->rss_key_len = rss_key_len;
>  	hrxq->hash_fields = hash_fields;
> +	hrxq->tunnel = tunnel;
>  	memcpy(hrxq->rss_key, rss_key, rss_key_len);
>  	rte_atomic32_inc(&hrxq->refcnt);
>  	LIST_INSERT_HEAD(&priv->hrxqs, hrxq, next);
> @@ -1466,6 +1469,8 @@ mlx5_hrxq_new(struct rte_eth_dev *dev,
>   *   first queue index will be taken for the indirection table.
>   * @param queues_n
>   *   Number of queues.
> + * @param tunnel
> + *   Tunnel type.
>   *
>   * @return
>   *   An hash Rx queue on success.
> @@ -1474,7 +1479,7 @@ struct mlx5_hrxq *
>  mlx5_hrxq_get(struct rte_eth_dev *dev,
>  	      const uint8_t *rss_key, uint32_t rss_key_len,
>  	      uint64_t hash_fields,
> -	      const uint16_t *queues, uint32_t queues_n)
> +	      const uint16_t *queues, uint32_t queues_n, uint32_t tunnel)
>  {
>  	struct priv *priv = dev->data->dev_private;
>  	struct mlx5_hrxq *hrxq;
> @@ -1489,6 +1494,8 @@ mlx5_hrxq_get(struct rte_eth_dev *dev,
>  			continue;
>  		if (hrxq->hash_fields != hash_fields)
>  			continue;
> +		if (hrxq->tunnel != tunnel)
> +			continue;
>  		ind_tbl = mlx5_ind_table_ibv_get(dev, queues, queues_n);
>  		if (!ind_tbl)
>  			continue;
> diff --git a/drivers/net/mlx5/mlx5_rxtx.c b/drivers/net/mlx5/mlx5_rxtx.c
> index 1f422c70b..d061dfc8a 100644
> --- a/drivers/net/mlx5/mlx5_rxtx.c
> +++ b/drivers/net/mlx5/mlx5_rxtx.c
> @@ -34,7 +34,7 @@
>  #include "mlx5_prm.h"
>  
>  static __rte_always_inline uint32_t
> -rxq_cq_to_pkt_type(volatile struct mlx5_cqe *cqe);
> +rxq_cq_to_pkt_type(struct mlx5_rxq_data *rxq, volatile struct mlx5_cqe *cqe);
>  
>  static __rte_always_inline int
>  mlx5_rx_poll_len(struct mlx5_rxq_data *rxq, volatile struct mlx5_cqe *cqe,
> @@ -125,12 +125,14 @@ mlx5_set_ptype_table(void)
>  	(*p)[0x8a] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN |
>  		     RTE_PTYPE_L4_UDP;
>  	/* Tunneled - L3 */
> +	(*p)[0x40] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN;
>  	(*p)[0x41] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN |
>  		     RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN |
>  		     RTE_PTYPE_INNER_L4_NONFRAG;
>  	(*p)[0x42] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN |
>  		     RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN |
>  		     RTE_PTYPE_INNER_L4_NONFRAG;
> +	(*p)[0xc0] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN;
>  	(*p)[0xc1] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN |
>  		     RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN |
>  		     RTE_PTYPE_INNER_L4_NONFRAG;
> @@ -1577,6 +1579,8 @@ mlx5_tx_burst_empw(void *dpdk_txq, struct rte_mbuf **pkts, uint16_t pkts_n)
>  /**
>   * Translate RX completion flags to packet type.
>   *
> + * @param[in] rxq
> + *   Pointer to RX queue structure.
>   * @param[in] cqe
>   *   Pointer to CQE.
>   *
> @@ -1586,7 +1590,7 @@ mlx5_tx_burst_empw(void *dpdk_txq, struct rte_mbuf **pkts, uint16_t pkts_n)
>   *   Packet type for struct rte_mbuf.
>   */
>  static inline uint32_t
> -rxq_cq_to_pkt_type(volatile struct mlx5_cqe *cqe)
> +rxq_cq_to_pkt_type(struct mlx5_rxq_data *rxq, volatile struct mlx5_cqe *cqe)
>  {
>  	uint8_t idx;
>  	uint8_t pinfo = cqe->pkt_info;
> @@ -1601,7 +1605,7 @@ rxq_cq_to_pkt_type(volatile struct mlx5_cqe *cqe)
>  	 * bit[7] = outer_l3_type
>  	 */
>  	idx = ((pinfo & 0x3) << 6) | ((ptype & 0xfc00) >> 10);
> -	return mlx5_ptype_table[idx];
> +	return mlx5_ptype_table[idx] | rxq->tunnel * !!(idx & (1 << 6));
>  }
>  
>  /**
> @@ -1833,7 +1837,7 @@ mlx5_rx_burst(void *dpdk_rxq, struct rte_mbuf **pkts, uint16_t pkts_n)
>  			pkt = seg;
>  			assert(len >= (rxq->crc_present << 2));
>  			/* Update packet information. */
> -			pkt->packet_type = rxq_cq_to_pkt_type(cqe);
> +			pkt->packet_type = rxq_cq_to_pkt_type(rxq, cqe);
>  			pkt->ol_flags = 0;
>  			if (rss_hash_res && rxq->rss_hash) {
>  				pkt->hash.rss = rss_hash_res;
> diff --git a/drivers/net/mlx5/mlx5_rxtx.h b/drivers/net/mlx5/mlx5_rxtx.h
> index a702cb603..6866f6818 100644
> --- a/drivers/net/mlx5/mlx5_rxtx.h
> +++ b/drivers/net/mlx5/mlx5_rxtx.h
> @@ -104,6 +104,7 @@ struct mlx5_rxq_data {
>  	void *cq_uar; /* CQ user access region. */
>  	uint32_t cqn; /* CQ number. */
>  	uint8_t cq_arm_sn; /* CQ arm seq number. */
> +	uint32_t tunnel; /* Tunnel information. */
>  } __rte_cache_aligned;
>  
>  /* Verbs Rx queue elements. */
> @@ -125,6 +126,7 @@ struct mlx5_rxq_ctrl {
>  	struct mlx5_rxq_ibv *ibv; /* Verbs elements. */
>  	struct mlx5_rxq_data rxq; /* Data path structure. */
>  	unsigned int socket; /* CPU socket ID for allocations. */
> +	uint32_t tunnel_types[16]; /* Tunnel type counter. */
>  	unsigned int irq:1; /* Whether IRQ is enabled. */
>  	uint16_t idx; /* Queue index. */
>  };
> @@ -145,6 +147,7 @@ struct mlx5_hrxq {
>  	struct mlx5_ind_table_ibv *ind_table; /* Indirection table. */
>  	struct ibv_qp *qp; /* Verbs queue pair. */
>  	uint64_t hash_fields; /* Verbs Hash fields. */
> +	uint32_t tunnel; /* Tunnel type. */
>  	uint32_t rss_key_len; /* Hash key length in bytes. */
>  	uint8_t rss_key[]; /* Hash key. */
>  };
> @@ -248,11 +251,13 @@ int mlx5_ind_table_ibv_verify(struct rte_eth_dev *dev);
>  struct mlx5_hrxq *mlx5_hrxq_new(struct rte_eth_dev *dev,
>  				const uint8_t *rss_key, uint32_t rss_key_len,
>  				uint64_t hash_fields,
> -				const uint16_t *queues, uint32_t queues_n);
> +				const uint16_t *queues, uint32_t queues_n,
> +				uint32_t tunnel);
>  struct mlx5_hrxq *mlx5_hrxq_get(struct rte_eth_dev *dev,
>  				const uint8_t *rss_key, uint32_t rss_key_len,
>  				uint64_t hash_fields,
> -				const uint16_t *queues, uint32_t queues_n);
> +				const uint16_t *queues, uint32_t queues_n,
> +				uint32_t tunnel);
>  int mlx5_hrxq_release(struct rte_eth_dev *dev, struct mlx5_hrxq *hxrq);
>  int mlx5_hrxq_ibv_verify(struct rte_eth_dev *dev);
>  uint64_t mlx5_get_rx_port_offloads(void);
> diff --git a/drivers/net/mlx5/mlx5_rxtx_vec_neon.h b/drivers/net/mlx5/mlx5_rxtx_vec_neon.h
> index bbe1818ef..9f9136108 100644
> --- a/drivers/net/mlx5/mlx5_rxtx_vec_neon.h
> +++ b/drivers/net/mlx5/mlx5_rxtx_vec_neon.h
> @@ -551,6 +551,7 @@ rxq_cq_to_ptype_oflags_v(struct mlx5_rxq_data *rxq,
>  	const uint64x1_t mbuf_init = vld1_u64(&rxq->mbuf_initializer);
>  	const uint64x1_t r32_mask = vcreate_u64(0xffffffff);
>  	uint64x2_t rearm0, rearm1, rearm2, rearm3;
> +	uint8_t pt_idx0, pt_idx1, pt_idx2, pt_idx3;
>  
>  	if (rxq->mark) {
>  		const uint32x4_t ft_def = vdupq_n_u32(MLX5_FLOW_MARK_DEFAULT);
> @@ -583,14 +584,18 @@ rxq_cq_to_ptype_oflags_v(struct mlx5_rxq_data *rxq,
>  	ptype = vshrn_n_u32(ptype_info, 10);
>  	/* Errored packets will have RTE_PTYPE_ALL_MASK. */
>  	ptype = vorr_u16(ptype, op_err);
> -	pkts[0]->packet_type =
> -		mlx5_ptype_table[vget_lane_u8(vreinterpret_u8_u16(ptype), 6)];
> -	pkts[1]->packet_type =
> -		mlx5_ptype_table[vget_lane_u8(vreinterpret_u8_u16(ptype), 4)];
> -	pkts[2]->packet_type =
> -		mlx5_ptype_table[vget_lane_u8(vreinterpret_u8_u16(ptype), 2)];
> -	pkts[3]->packet_type =
> -		mlx5_ptype_table[vget_lane_u8(vreinterpret_u8_u16(ptype), 0)];
> +	pt_idx0 = vget_lane_u8(vreinterpret_u8_u16(ptype), 6);
> +	pt_idx1 = vget_lane_u8(vreinterpret_u8_u16(ptype), 4);
> +	pt_idx2 = vget_lane_u8(vreinterpret_u8_u16(ptype), 2);
> +	pt_idx3 = vget_lane_u8(vreinterpret_u8_u16(ptype), 0);
> +	pkts[0]->packet_type = mlx5_ptype_table[pt_idx0] |
> +			       !!(pt_idx0 & (1 << 6)) * rxq->tunnel;
> +	pkts[1]->packet_type = mlx5_ptype_table[pt_idx1] |
> +			       !!(pt_idx1 & (1 << 6)) * rxq->tunnel;
> +	pkts[2]->packet_type = mlx5_ptype_table[pt_idx2] |
> +			       !!(pt_idx2 & (1 << 6)) * rxq->tunnel;
> +	pkts[3]->packet_type = mlx5_ptype_table[pt_idx3] |
> +			       !!(pt_idx3 & (1 << 6)) * rxq->tunnel;
>  	/* Fill flags for checksum and VLAN. */
>  	pinfo = vandq_u32(ptype_info, ptype_ol_mask);
>  	pinfo = vreinterpretq_u32_u8(
> diff --git a/drivers/net/mlx5/mlx5_rxtx_vec_sse.h b/drivers/net/mlx5/mlx5_rxtx_vec_sse.h
> index c088bcb51..d2492481d 100644
> --- a/drivers/net/mlx5/mlx5_rxtx_vec_sse.h
> +++ b/drivers/net/mlx5/mlx5_rxtx_vec_sse.h
> @@ -542,6 +542,7 @@ rxq_cq_to_ptype_oflags_v(struct mlx5_rxq_data *rxq, __m128i cqes[4],
>  	const __m128i mbuf_init =
>  		_mm_loadl_epi64((__m128i *)&rxq->mbuf_initializer);
>  	__m128i rearm0, rearm1, rearm2, rearm3;
> +	uint8_t pt_idx0, pt_idx1, pt_idx2, pt_idx3;
>  
>  	/* Extract pkt_info field. */
>  	pinfo0 = _mm_unpacklo_epi32(cqes[0], cqes[1]);
> @@ -595,10 +596,18 @@ rxq_cq_to_ptype_oflags_v(struct mlx5_rxq_data *rxq, __m128i cqes[4],
>  	/* Errored packets will have RTE_PTYPE_ALL_MASK. */
>  	op_err = _mm_srli_epi16(op_err, 8);
>  	ptype = _mm_or_si128(ptype, op_err);
> -	pkts[0]->packet_type = mlx5_ptype_table[_mm_extract_epi8(ptype, 0)];
> -	pkts[1]->packet_type = mlx5_ptype_table[_mm_extract_epi8(ptype, 2)];
> -	pkts[2]->packet_type = mlx5_ptype_table[_mm_extract_epi8(ptype, 4)];
> -	pkts[3]->packet_type = mlx5_ptype_table[_mm_extract_epi8(ptype, 6)];
> +	pt_idx0 = _mm_extract_epi8(ptype, 0);
> +	pt_idx1 = _mm_extract_epi8(ptype, 2);
> +	pt_idx2 = _mm_extract_epi8(ptype, 4);
> +	pt_idx3 = _mm_extract_epi8(ptype, 6);
> +	pkts[0]->packet_type = mlx5_ptype_table[pt_idx0] |
> +			       !!(pt_idx0 & (1 << 6)) * rxq->tunnel;
> +	pkts[1]->packet_type = mlx5_ptype_table[pt_idx1] |
> +			       !!(pt_idx1 & (1 << 6)) * rxq->tunnel;
> +	pkts[2]->packet_type = mlx5_ptype_table[pt_idx2] |
> +			       !!(pt_idx2 & (1 << 6)) * rxq->tunnel;
> +	pkts[3]->packet_type = mlx5_ptype_table[pt_idx3] |
> +			       !!(pt_idx3 & (1 << 6)) * rxq->tunnel;
>  	/* Fill flags for checksum and VLAN. */
>  	pinfo = _mm_and_si128(pinfo, ptype_ol_mask);
>  	pinfo = _mm_shuffle_epi8(cv_flag_sel, pinfo);
> -- 
> 2.13.3

Regards,
  
Xueming Li April 11, 2018, 8:11 a.m. UTC | #2
Hi Nelio,

> -----Original Message-----

> From: Nélio Laranjeiro <nelio.laranjeiro@6wind.com>

> Sent: Tuesday, April 10, 2018 11:17 PM

> To: Xueming(Steven) Li <xuemingl@mellanox.com>

> Cc: Shahaf Shuler <shahafs@mellanox.com>; dev@dpdk.org

> Subject: Re: [PATCH v2 04/15] net/mlx5: support Rx tunnel type

> identification

> 

> On Tue, Apr 10, 2018 at 09:34:04PM +0800, Xueming Li wrote:

> > This patch introduced tunnel type identification based on flow rules.

> > If flows of multiple tunnel types built on same queue,

> > RTE_PTYPE_TUNNEL_MASK will be returned, bits in flow mark could be

> > used as tunnel type identifier.

> 

> I don't see anywhere in this patch where the bits are reserved to identify

> a flow, nor values which can help to identify it.

> 

> Is this missing?

> 

> Anyway we have already very few bits in the mark making it difficult to be

> used by the user, reserving again some to may lead to remove the mark

> support from the flows.


Not all users will use multiple tunnel types, this is not included in this patch
set and left to user decision. I'll update comments to make this clear.

> 

> > Signed-off-by: Xueming Li <xuemingl@mellanox.com>

> > ---

> >  drivers/net/mlx5/mlx5_flow.c          | 125

> +++++++++++++++++++++++++++++-----

> >  drivers/net/mlx5/mlx5_rxq.c           |  11 ++-

> >  drivers/net/mlx5/mlx5_rxtx.c          |  12 ++--

> >  drivers/net/mlx5/mlx5_rxtx.h          |   9 ++-

> >  drivers/net/mlx5/mlx5_rxtx_vec_neon.h |  21 +++---

> > drivers/net/mlx5/mlx5_rxtx_vec_sse.h  |  17 +++--

> >  6 files changed, 157 insertions(+), 38 deletions(-)

> >

> > diff --git a/drivers/net/mlx5/mlx5_flow.c

> > b/drivers/net/mlx5/mlx5_flow.c index 870d05250..65d7a9b62 100644

> > --- a/drivers/net/mlx5/mlx5_flow.c

> > +++ b/drivers/net/mlx5/mlx5_flow.c

> > @@ -222,6 +222,7 @@ struct rte_flow {

> >  	struct rte_flow_action_rss rss_conf; /**< RSS configuration */

> >  	uint16_t (*queues)[]; /**< Queues indexes to use. */

> >  	uint8_t rss_key[40]; /**< copy of the RSS key. */

> > +	uint32_t tunnel; /**< Tunnel type of RTE_PTYPE_TUNNEL_XXX. */

> >  	struct ibv_counter_set *cs; /**< Holds the counters for the rule. */

> >  	struct mlx5_flow_counter_stats counter_stats;/**<The counter stats.

> */

> >  	struct mlx5_flow frxq[RTE_DIM(hash_rxq_init)]; @@ -238,6 +239,19 @@

> > struct rte_flow {

> >  	(type) == RTE_FLOW_ITEM_TYPE_VXLAN || \

> >  	(type) == RTE_FLOW_ITEM_TYPE_GRE)

> >

> > +const uint32_t flow_ptype[] = {

> > +	[RTE_FLOW_ITEM_TYPE_VXLAN] = RTE_PTYPE_TUNNEL_VXLAN,

> > +	[RTE_FLOW_ITEM_TYPE_GRE] = RTE_PTYPE_TUNNEL_GRE, };

> > +

> > +#define PTYPE_IDX(t) ((RTE_PTYPE_TUNNEL_MASK & (t)) >> 12)

> > +

> > +const uint32_t ptype_ext[] = {

> > +	[PTYPE_IDX(RTE_PTYPE_TUNNEL_VXLAN)] = RTE_PTYPE_TUNNEL_VXLAN |

> > +					      RTE_PTYPE_L4_UDP,

> > +	[PTYPE_IDX(RTE_PTYPE_TUNNEL_GRE)] = RTE_PTYPE_TUNNEL_GRE, };

> > +

> >  /** Structure to generate a simple graph of layers supported by the

> > NIC. */  struct mlx5_flow_items {

> >  	/** List of possible actions for these items. */ @@ -437,6 +451,7 @@

> > struct mlx5_flow_parse {

> >  	uint16_t queues[RTE_MAX_QUEUES_PER_PORT]; /**< Queues indexes to use.

> */

> >  	uint8_t rss_key[40]; /**< copy of the RSS key. */

> >  	enum hash_rxq_type layer; /**< Last pattern layer detected. */

> > +	uint32_t tunnel; /**< Tunnel type of RTE_PTYPE_TUNNEL_XXX. */

> >  	struct ibv_counter_set *cs; /**< Holds the counter set for the rule

> */

> >  	struct {

> >  		struct ibv_flow_attr *ibv_attr;

> > @@ -860,7 +875,7 @@ mlx5_flow_convert_items_validate(const struct

> rte_flow_item items[],

> >  		if (ret)

> >  			goto exit_item_not_supported;

> >  		if (IS_TUNNEL(items->type)) {

> > -			if (parser->inner) {

> > +			if (parser->tunnel) {

> >  				rte_flow_error_set(error, ENOTSUP,

> >  						   RTE_FLOW_ERROR_TYPE_ITEM,

> >  						   items,

> > @@ -869,6 +884,7 @@ mlx5_flow_convert_items_validate(const struct

> rte_flow_item items[],

> >  				return -rte_errno;

> >  			}

> >  			parser->inner = IBV_FLOW_SPEC_INNER;

> > +			parser->tunnel = flow_ptype[items->type];

> >  		}

> >  		if (parser->drop) {

> >  			parser->queue[HASH_RXQ_ETH].offset += cur_item->dst_sz;

> @@ -1165,6

> > +1181,7 @@ mlx5_flow_convert(struct rte_eth_dev *dev,

> >  	}

> >  	/* Third step. Conversion parse, fill the specifications. */

> >  	parser->inner = 0;

> > +	parser->tunnel = 0;

> >  	for (; items->type != RTE_FLOW_ITEM_TYPE_END; ++items) {

> >  		struct mlx5_flow_data data = {

> >  			.parser = parser,

> > @@ -1633,6 +1650,7 @@ mlx5_flow_create_vxlan(const struct

> > rte_flow_item *item,

> >

> >  	id.vni[0] = 0;

> >  	parser->inner = IBV_FLOW_SPEC_INNER;

> > +	parser->tunnel = ptype_ext[PTYPE_IDX(RTE_PTYPE_TUNNEL_VXLAN)];

> >  	if (spec) {

> >  		if (!mask)

> >  			mask = default_mask;

> > @@ -1686,6 +1704,7 @@ mlx5_flow_create_gre(const struct rte_flow_item

> *item __rte_unused,

> >  	};

> >

> >  	parser->inner = IBV_FLOW_SPEC_INNER;

> > +	parser->tunnel = ptype_ext[PTYPE_IDX(RTE_PTYPE_TUNNEL_GRE)];

> >  	mlx5_flow_create_copy(parser, &tunnel, size);

> >  	return 0;

> >  }

> > @@ -1864,7 +1883,8 @@ mlx5_flow_create_action_queue_rss(struct

> rte_eth_dev *dev,

> >  				      parser->rss_conf.key_len,

> >  				      hash_fields,

> >  				      parser->rss_conf.queue,

> > -				      parser->rss_conf.queue_num);

> > +				      parser->rss_conf.queue_num,

> > +				      parser->tunnel);

> >  		if (flow->frxq[i].hrxq)

> >  			continue;

> >  		flow->frxq[i].hrxq =

> > @@ -1873,7 +1893,8 @@ mlx5_flow_create_action_queue_rss(struct

> rte_eth_dev *dev,

> >  				      parser->rss_conf.key_len,

> >  				      hash_fields,

> >  				      parser->rss_conf.queue,

> > -				      parser->rss_conf.queue_num);

> > +				      parser->rss_conf.queue_num,

> > +				      parser->tunnel);

> >  		if (!flow->frxq[i].hrxq) {

> >  			return rte_flow_error_set(error, ENOMEM,

> >  						  RTE_FLOW_ERROR_TYPE_HANDLE,

> > @@ -1885,6 +1906,40 @@ mlx5_flow_create_action_queue_rss(struct

> > rte_eth_dev *dev,  }

> >

> >  /**

> > + * RXQ update after flow rule creation.

> > + *

> > + * @param dev

> > + *   Pointer to Ethernet device.

> > + * @param flow

> > + *   Pointer to the flow rule.

> > + */

> > +static void

> > +mlx5_flow_create_update_rxqs(struct rte_eth_dev *dev, struct rte_flow

> > +*flow) {

> > +	struct priv *priv = dev->data->dev_private;

> > +	unsigned int i;

> > +

> > +	if (!dev->data->dev_started)

> > +		return;

> > +	for (i = 0; i != flow->rss_conf.queue_num; ++i) {

> > +		struct mlx5_rxq_data *rxq_data = (*priv->rxqs)

> > +						 [(*flow->queues)[i]];

> > +		struct mlx5_rxq_ctrl *rxq_ctrl =

> > +			container_of(rxq_data, struct mlx5_rxq_ctrl, rxq);

> > +		uint8_t tunnel = PTYPE_IDX(flow->tunnel);

> > +

> > +		rxq_data->mark |= flow->mark;

> > +		if (!tunnel)

> > +			continue;

> > +		rxq_ctrl->tunnel_types[tunnel] += 1;

> 

> I don't understand why you need such array, the NIC is unable to return

> the tunnel type has it returns only one bit saying tunnel.

> Why don't it store in the priv structure the current configured tunnel?


This array is used to count tunnel types bound to queue, if only one tunnel type,
ptype will report that tunnel type, TUNNEL MASK(max value) will be returned if 
multiple types bound to a queue.

Flow rss action specifies queues that binding to tunnel, thus we can't assume
all queues have same tunnel types, so this is a per queue structure.


have 

> 

> > +		if (rxq_data->tunnel != flow->tunnel)

> > +			rxq_data->tunnel = rxq_data->tunnel ?

> > +					   RTE_PTYPE_TUNNEL_MASK :

> > +					   flow->tunnel;

> > +	}

> > +}

> > +

> > +/**

> >   * Complete flow rule creation.

> >   *

> >   * @param dev

> > @@ -1944,12 +1999,7 @@ mlx5_flow_create_action_queue(struct rte_eth_dev

> *dev,

> >  				   NULL, "internal error in flow creation");

> >  		goto error;

> >  	}

> > -	for (i = 0; i != parser->rss_conf.queue_num; ++i) {

> > -		struct mlx5_rxq_data *q =

> > -			(*priv->rxqs)[parser->rss_conf.queue[i]];

> > -

> > -		q->mark |= parser->mark;

> > -	}

> > +	mlx5_flow_create_update_rxqs(dev, flow);

> >  	return 0;

> >  error:

> >  	ret = rte_errno; /* Save rte_errno before cleanup. */ @@ -2022,6

> > +2072,7 @@ mlx5_flow_list_create(struct rte_eth_dev *dev,

> >  	}

> >  	/* Copy configuration. */

> >  	flow->queues = (uint16_t (*)[])(flow + 1);

> > +	flow->tunnel = parser.tunnel;

> >  	flow->rss_conf = (struct rte_flow_action_rss){

> >  		.func = RTE_ETH_HASH_FUNCTION_DEFAULT,

> >  		.level = 0,

> > @@ -2113,9 +2164,38 @@ mlx5_flow_list_destroy(struct rte_eth_dev *dev,

> struct mlx5_flows *list,

> >  	struct priv *priv = dev->data->dev_private;

> >  	unsigned int i;

> >

> > -	if (flow->drop || !flow->mark)

> > +	if (flow->drop || !dev->data->dev_started)

> >  		goto free;

> > -	for (i = 0; i != flow->rss_conf.queue_num; ++i) {

> > +	for (i = 0; flow->tunnel && i != flow->rss_conf.queue_num; ++i) {

> > +		/* Update queue tunnel type. */

> > +		struct mlx5_rxq_data *rxq_data = (*priv->rxqs)

> > +						 [(*flow->queues)[i]];

> > +		struct mlx5_rxq_ctrl *rxq_ctrl =

> > +			container_of(rxq_data, struct mlx5_rxq_ctrl, rxq);

> > +		uint8_t tunnel = PTYPE_IDX(flow->tunnel);

> > +

> > +		RTE_ASSERT(rxq_ctrl->tunnel_types[tunnel] > 0);

> 

> use assert() not RTE_ASSERT() or make a patch to make such move in the

> whole PMD.

> 

> > +		rxq_ctrl->tunnel_types[tunnel] -= 1;

> > +		if (!rxq_ctrl->tunnel_types[tunnel]) {

> > +			/* Update tunnel type. */

> > +			uint8_t j;

> > +			uint8_t types = 0;

> > +			uint8_t last;

> > +

> > +			for (j = 0; j < RTE_DIM(rxq_ctrl->tunnel_types); j++)

> > +				if (rxq_ctrl->tunnel_types[j]) {

> > +					types += 1;

> > +					last = j;

> > +				}

> > +			/* Keep same if more than one tunnel types left. */

> > +			if (types == 1)

> > +				rxq_data->tunnel = ptype_ext[last];

> > +			else if (types == 0)

> > +				/* No tunnel type left. */

> > +				rxq_data->tunnel = 0;

> > +		}

> > +	}

> > +	for (i = 0; flow->mark && i != flow->rss_conf.queue_num; ++i) {

> >  		struct rte_flow *tmp;

> >  		int mark = 0;

> >

> > @@ -2334,9 +2414,9 @@ mlx5_flow_stop(struct rte_eth_dev *dev, struct

> > mlx5_flows *list)  {

> >  	struct priv *priv = dev->data->dev_private;

> >  	struct rte_flow *flow;

> > +	unsigned int i;

> >

> >  	TAILQ_FOREACH_REVERSE(flow, list, mlx5_flows, next) {

> > -		unsigned int i;

> >  		struct mlx5_ind_table_ibv *ind_tbl = NULL;

> >

> >  		if (flow->drop) {

> > @@ -2382,6 +2462,16 @@ mlx5_flow_stop(struct rte_eth_dev *dev, struct

> mlx5_flows *list)

> >  		DRV_LOG(DEBUG, "port %u flow %p removed", dev->data->port_id,

> >  			(void *)flow);

> >  	}

> > +	/* Cleanup Rx queue tunnel info. */

> > +	for (i = 0; i != priv->rxqs_n; ++i) {

> > +		struct mlx5_rxq_data *q = (*priv->rxqs)[i];

> > +		struct mlx5_rxq_ctrl *rxq_ctrl =

> > +			container_of(q, struct mlx5_rxq_ctrl, rxq);

> > +

> > +		memset((void *)rxq_ctrl->tunnel_types, 0,

> > +		       sizeof(rxq_ctrl->tunnel_types));

> > +		q->tunnel = 0;

> > +	}

> >  }

> 

> This hunk does not handle the fact the Rx queue array may have some holes

> i.e. the application is allowed to ask for 10 queues and only initialise

> some.  In such situation this code will segfault.


In other words, "q" could be NULL, correct? I'll add check for this.
BTW, there should be an action item to add such check in rss/queue flow creation.

> 

> It should only memset the Rx queues making part of the flow not the others.


Clean this(decrease tunnel_types counter of each queue) from each flow would be time 
consuming. If an error happened, counter will not be cleared and such state 
will impact tunnel type after port start again.

> 

> >  /**

> > @@ -2429,7 +2519,8 @@ mlx5_flow_start(struct rte_eth_dev *dev, struct

> mlx5_flows *list)

> >  					      flow->rss_conf.key_len,

> >  					      hash_rxq_init[i].hash_fields,

> >  					      flow->rss_conf.queue,

> > -					      flow->rss_conf.queue_num);

> > +					      flow->rss_conf.queue_num,

> > +					      flow->tunnel);

> >  			if (flow->frxq[i].hrxq)

> >  				goto flow_create;

> >  			flow->frxq[i].hrxq =

> > @@ -2437,7 +2528,8 @@ mlx5_flow_start(struct rte_eth_dev *dev, struct

> mlx5_flows *list)

> >  					      flow->rss_conf.key_len,

> >  					      hash_rxq_init[i].hash_fields,

> >  					      flow->rss_conf.queue,

> > -					      flow->rss_conf.queue_num);

> > +					      flow->rss_conf.queue_num,

> > +					      flow->tunnel);

> >  			if (!flow->frxq[i].hrxq) {

> >  				DRV_LOG(DEBUG,

> >  					"port %u flow %p cannot be applied", @@ -

> 2459,10 +2551,7 @@

> > mlx5_flow_start(struct rte_eth_dev *dev, struct mlx5_flows *list)

> >  			DRV_LOG(DEBUG, "port %u flow %p applied",

> >  				dev->data->port_id, (void *)flow);

> >  		}

> > -		if (!flow->mark)

> > -			continue;

> > -		for (i = 0; i != flow->rss_conf.queue_num; ++i)

> > -			(*priv->rxqs)[flow->rss_conf.queue[i]]->mark = 1;

> > +		mlx5_flow_create_update_rxqs(dev, flow);

> >  	}

> >  	return 0;

> >  }

> > diff --git a/drivers/net/mlx5/mlx5_rxq.c b/drivers/net/mlx5/mlx5_rxq.c

> > index 1e4354ab3..351acfc0f 100644

> > --- a/drivers/net/mlx5/mlx5_rxq.c

> > +++ b/drivers/net/mlx5/mlx5_rxq.c

> > @@ -1386,6 +1386,8 @@ mlx5_ind_table_ibv_verify(struct rte_eth_dev *dev)

> >   *   first queue index will be taken for the indirection table.

> >   * @param queues_n

> >   *   Number of queues.

> > + * @param tunnel

> > + *   Tunnel type.

> >   *

> >   * @return

> >   *   The Verbs object initialised, NULL otherwise and rte_errno is set.

> > @@ -1394,7 +1396,7 @@ struct mlx5_hrxq *  mlx5_hrxq_new(struct

> > rte_eth_dev *dev,

> >  	      const uint8_t *rss_key, uint32_t rss_key_len,

> >  	      uint64_t hash_fields,

> > -	      const uint16_t *queues, uint32_t queues_n)

> > +	      const uint16_t *queues, uint32_t queues_n, uint32_t tunnel)

> >  {

> >  	struct priv *priv = dev->data->dev_private;

> >  	struct mlx5_hrxq *hrxq;

> > @@ -1438,6 +1440,7 @@ mlx5_hrxq_new(struct rte_eth_dev *dev,

> >  	hrxq->qp = qp;

> >  	hrxq->rss_key_len = rss_key_len;

> >  	hrxq->hash_fields = hash_fields;

> > +	hrxq->tunnel = tunnel;

> >  	memcpy(hrxq->rss_key, rss_key, rss_key_len);

> >  	rte_atomic32_inc(&hrxq->refcnt);

> >  	LIST_INSERT_HEAD(&priv->hrxqs, hrxq, next); @@ -1466,6 +1469,8 @@

> > mlx5_hrxq_new(struct rte_eth_dev *dev,

> >   *   first queue index will be taken for the indirection table.

> >   * @param queues_n

> >   *   Number of queues.

> > + * @param tunnel

> > + *   Tunnel type.

> >   *

> >   * @return

> >   *   An hash Rx queue on success.

> > @@ -1474,7 +1479,7 @@ struct mlx5_hrxq *  mlx5_hrxq_get(struct

> > rte_eth_dev *dev,

> >  	      const uint8_t *rss_key, uint32_t rss_key_len,

> >  	      uint64_t hash_fields,

> > -	      const uint16_t *queues, uint32_t queues_n)

> > +	      const uint16_t *queues, uint32_t queues_n, uint32_t tunnel)

> >  {

> >  	struct priv *priv = dev->data->dev_private;

> >  	struct mlx5_hrxq *hrxq;

> > @@ -1489,6 +1494,8 @@ mlx5_hrxq_get(struct rte_eth_dev *dev,

> >  			continue;

> >  		if (hrxq->hash_fields != hash_fields)

> >  			continue;

> > +		if (hrxq->tunnel != tunnel)

> > +			continue;

> >  		ind_tbl = mlx5_ind_table_ibv_get(dev, queues, queues_n);

> >  		if (!ind_tbl)

> >  			continue;

> > diff --git a/drivers/net/mlx5/mlx5_rxtx.c

> > b/drivers/net/mlx5/mlx5_rxtx.c index 1f422c70b..d061dfc8a 100644

> > --- a/drivers/net/mlx5/mlx5_rxtx.c

> > +++ b/drivers/net/mlx5/mlx5_rxtx.c

> > @@ -34,7 +34,7 @@

> >  #include "mlx5_prm.h"

> >

> >  static __rte_always_inline uint32_t

> > -rxq_cq_to_pkt_type(volatile struct mlx5_cqe *cqe);

> > +rxq_cq_to_pkt_type(struct mlx5_rxq_data *rxq, volatile struct

> > +mlx5_cqe *cqe);

> >

> >  static __rte_always_inline int

> >  mlx5_rx_poll_len(struct mlx5_rxq_data *rxq, volatile struct mlx5_cqe

> > *cqe, @@ -125,12 +125,14 @@ mlx5_set_ptype_table(void)

> >  	(*p)[0x8a] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN |

> >  		     RTE_PTYPE_L4_UDP;

> >  	/* Tunneled - L3 */

> > +	(*p)[0x40] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN;

> >  	(*p)[0x41] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN |

> >  		     RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN |

> >  		     RTE_PTYPE_INNER_L4_NONFRAG;

> >  	(*p)[0x42] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN |

> >  		     RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN |

> >  		     RTE_PTYPE_INNER_L4_NONFRAG;

> > +	(*p)[0xc0] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN;

> >  	(*p)[0xc1] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN |

> >  		     RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN |

> >  		     RTE_PTYPE_INNER_L4_NONFRAG;

> > @@ -1577,6 +1579,8 @@ mlx5_tx_burst_empw(void *dpdk_txq, struct

> > rte_mbuf **pkts, uint16_t pkts_n)

> >  /**

> >   * Translate RX completion flags to packet type.

> >   *

> > + * @param[in] rxq

> > + *   Pointer to RX queue structure.

> >   * @param[in] cqe

> >   *   Pointer to CQE.

> >   *

> > @@ -1586,7 +1590,7 @@ mlx5_tx_burst_empw(void *dpdk_txq, struct rte_mbuf

> **pkts, uint16_t pkts_n)

> >   *   Packet type for struct rte_mbuf.

> >   */

> >  static inline uint32_t

> > -rxq_cq_to_pkt_type(volatile struct mlx5_cqe *cqe)

> > +rxq_cq_to_pkt_type(struct mlx5_rxq_data *rxq, volatile struct

> > +mlx5_cqe *cqe)

> >  {

> >  	uint8_t idx;

> >  	uint8_t pinfo = cqe->pkt_info;

> > @@ -1601,7 +1605,7 @@ rxq_cq_to_pkt_type(volatile struct mlx5_cqe *cqe)

> >  	 * bit[7] = outer_l3_type

> >  	 */

> >  	idx = ((pinfo & 0x3) << 6) | ((ptype & 0xfc00) >> 10);

> > -	return mlx5_ptype_table[idx];

> > +	return mlx5_ptype_table[idx] | rxq->tunnel * !!(idx & (1 << 6));

> >  }

> >

> >  /**

> > @@ -1833,7 +1837,7 @@ mlx5_rx_burst(void *dpdk_rxq, struct rte_mbuf

> **pkts, uint16_t pkts_n)

> >  			pkt = seg;

> >  			assert(len >= (rxq->crc_present << 2));

> >  			/* Update packet information. */

> > -			pkt->packet_type = rxq_cq_to_pkt_type(cqe);

> > +			pkt->packet_type = rxq_cq_to_pkt_type(rxq, cqe);

> >  			pkt->ol_flags = 0;

> >  			if (rss_hash_res && rxq->rss_hash) {

> >  				pkt->hash.rss = rss_hash_res;

> > diff --git a/drivers/net/mlx5/mlx5_rxtx.h

> > b/drivers/net/mlx5/mlx5_rxtx.h index a702cb603..6866f6818 100644

> > --- a/drivers/net/mlx5/mlx5_rxtx.h

> > +++ b/drivers/net/mlx5/mlx5_rxtx.h

> > @@ -104,6 +104,7 @@ struct mlx5_rxq_data {

> >  	void *cq_uar; /* CQ user access region. */

> >  	uint32_t cqn; /* CQ number. */

> >  	uint8_t cq_arm_sn; /* CQ arm seq number. */

> > +	uint32_t tunnel; /* Tunnel information. */

> >  } __rte_cache_aligned;

> >

> >  /* Verbs Rx queue elements. */

> > @@ -125,6 +126,7 @@ struct mlx5_rxq_ctrl {

> >  	struct mlx5_rxq_ibv *ibv; /* Verbs elements. */

> >  	struct mlx5_rxq_data rxq; /* Data path structure. */

> >  	unsigned int socket; /* CPU socket ID for allocations. */

> > +	uint32_t tunnel_types[16]; /* Tunnel type counter. */

> >  	unsigned int irq:1; /* Whether IRQ is enabled. */

> >  	uint16_t idx; /* Queue index. */

> >  };

> > @@ -145,6 +147,7 @@ struct mlx5_hrxq {

> >  	struct mlx5_ind_table_ibv *ind_table; /* Indirection table. */

> >  	struct ibv_qp *qp; /* Verbs queue pair. */

> >  	uint64_t hash_fields; /* Verbs Hash fields. */

> > +	uint32_t tunnel; /* Tunnel type. */

> >  	uint32_t rss_key_len; /* Hash key length in bytes. */

> >  	uint8_t rss_key[]; /* Hash key. */

> >  };

> > @@ -248,11 +251,13 @@ int mlx5_ind_table_ibv_verify(struct rte_eth_dev

> > *dev);  struct mlx5_hrxq *mlx5_hrxq_new(struct rte_eth_dev *dev,

> >  				const uint8_t *rss_key, uint32_t rss_key_len,

> >  				uint64_t hash_fields,

> > -				const uint16_t *queues, uint32_t queues_n);

> > +				const uint16_t *queues, uint32_t queues_n,

> > +				uint32_t tunnel);

> >  struct mlx5_hrxq *mlx5_hrxq_get(struct rte_eth_dev *dev,

> >  				const uint8_t *rss_key, uint32_t rss_key_len,

> >  				uint64_t hash_fields,

> > -				const uint16_t *queues, uint32_t queues_n);

> > +				const uint16_t *queues, uint32_t queues_n,

> > +				uint32_t tunnel);

> >  int mlx5_hrxq_release(struct rte_eth_dev *dev, struct mlx5_hrxq

> > *hxrq);  int mlx5_hrxq_ibv_verify(struct rte_eth_dev *dev);  uint64_t

> > mlx5_get_rx_port_offloads(void); diff --git

> > a/drivers/net/mlx5/mlx5_rxtx_vec_neon.h

> > b/drivers/net/mlx5/mlx5_rxtx_vec_neon.h

> > index bbe1818ef..9f9136108 100644

> > --- a/drivers/net/mlx5/mlx5_rxtx_vec_neon.h

> > +++ b/drivers/net/mlx5/mlx5_rxtx_vec_neon.h

> > @@ -551,6 +551,7 @@ rxq_cq_to_ptype_oflags_v(struct mlx5_rxq_data *rxq,

> >  	const uint64x1_t mbuf_init = vld1_u64(&rxq->mbuf_initializer);

> >  	const uint64x1_t r32_mask = vcreate_u64(0xffffffff);

> >  	uint64x2_t rearm0, rearm1, rearm2, rearm3;

> > +	uint8_t pt_idx0, pt_idx1, pt_idx2, pt_idx3;

> >

> >  	if (rxq->mark) {

> >  		const uint32x4_t ft_def = vdupq_n_u32(MLX5_FLOW_MARK_DEFAULT);

> > @@ -583,14 +584,18 @@ rxq_cq_to_ptype_oflags_v(struct mlx5_rxq_data *rxq,

> >  	ptype = vshrn_n_u32(ptype_info, 10);

> >  	/* Errored packets will have RTE_PTYPE_ALL_MASK. */

> >  	ptype = vorr_u16(ptype, op_err);

> > -	pkts[0]->packet_type =

> > -		mlx5_ptype_table[vget_lane_u8(vreinterpret_u8_u16(ptype), 6)];

> > -	pkts[1]->packet_type =

> > -		mlx5_ptype_table[vget_lane_u8(vreinterpret_u8_u16(ptype), 4)];

> > -	pkts[2]->packet_type =

> > -		mlx5_ptype_table[vget_lane_u8(vreinterpret_u8_u16(ptype), 2)];

> > -	pkts[3]->packet_type =

> > -		mlx5_ptype_table[vget_lane_u8(vreinterpret_u8_u16(ptype), 0)];

> > +	pt_idx0 = vget_lane_u8(vreinterpret_u8_u16(ptype), 6);

> > +	pt_idx1 = vget_lane_u8(vreinterpret_u8_u16(ptype), 4);

> > +	pt_idx2 = vget_lane_u8(vreinterpret_u8_u16(ptype), 2);

> > +	pt_idx3 = vget_lane_u8(vreinterpret_u8_u16(ptype), 0);

> > +	pkts[0]->packet_type = mlx5_ptype_table[pt_idx0] |

> > +			       !!(pt_idx0 & (1 << 6)) * rxq->tunnel;

> > +	pkts[1]->packet_type = mlx5_ptype_table[pt_idx1] |

> > +			       !!(pt_idx1 & (1 << 6)) * rxq->tunnel;

> > +	pkts[2]->packet_type = mlx5_ptype_table[pt_idx2] |

> > +			       !!(pt_idx2 & (1 << 6)) * rxq->tunnel;

> > +	pkts[3]->packet_type = mlx5_ptype_table[pt_idx3] |

> > +			       !!(pt_idx3 & (1 << 6)) * rxq->tunnel;

> >  	/* Fill flags for checksum and VLAN. */

> >  	pinfo = vandq_u32(ptype_info, ptype_ol_mask);

> >  	pinfo = vreinterpretq_u32_u8(

> > diff --git a/drivers/net/mlx5/mlx5_rxtx_vec_sse.h

> > b/drivers/net/mlx5/mlx5_rxtx_vec_sse.h

> > index c088bcb51..d2492481d 100644

> > --- a/drivers/net/mlx5/mlx5_rxtx_vec_sse.h

> > +++ b/drivers/net/mlx5/mlx5_rxtx_vec_sse.h

> > @@ -542,6 +542,7 @@ rxq_cq_to_ptype_oflags_v(struct mlx5_rxq_data *rxq,

> __m128i cqes[4],

> >  	const __m128i mbuf_init =

> >  		_mm_loadl_epi64((__m128i *)&rxq->mbuf_initializer);

> >  	__m128i rearm0, rearm1, rearm2, rearm3;

> > +	uint8_t pt_idx0, pt_idx1, pt_idx2, pt_idx3;

> >

> >  	/* Extract pkt_info field. */

> >  	pinfo0 = _mm_unpacklo_epi32(cqes[0], cqes[1]); @@ -595,10 +596,18 @@

> > rxq_cq_to_ptype_oflags_v(struct mlx5_rxq_data *rxq, __m128i cqes[4],

> >  	/* Errored packets will have RTE_PTYPE_ALL_MASK. */

> >  	op_err = _mm_srli_epi16(op_err, 8);

> >  	ptype = _mm_or_si128(ptype, op_err);

> > -	pkts[0]->packet_type = mlx5_ptype_table[_mm_extract_epi8(ptype, 0)];

> > -	pkts[1]->packet_type = mlx5_ptype_table[_mm_extract_epi8(ptype, 2)];

> > -	pkts[2]->packet_type = mlx5_ptype_table[_mm_extract_epi8(ptype, 4)];

> > -	pkts[3]->packet_type = mlx5_ptype_table[_mm_extract_epi8(ptype, 6)];

> > +	pt_idx0 = _mm_extract_epi8(ptype, 0);

> > +	pt_idx1 = _mm_extract_epi8(ptype, 2);

> > +	pt_idx2 = _mm_extract_epi8(ptype, 4);

> > +	pt_idx3 = _mm_extract_epi8(ptype, 6);

> > +	pkts[0]->packet_type = mlx5_ptype_table[pt_idx0] |

> > +			       !!(pt_idx0 & (1 << 6)) * rxq->tunnel;

> > +	pkts[1]->packet_type = mlx5_ptype_table[pt_idx1] |

> > +			       !!(pt_idx1 & (1 << 6)) * rxq->tunnel;

> > +	pkts[2]->packet_type = mlx5_ptype_table[pt_idx2] |

> > +			       !!(pt_idx2 & (1 << 6)) * rxq->tunnel;

> > +	pkts[3]->packet_type = mlx5_ptype_table[pt_idx3] |

> > +			       !!(pt_idx3 & (1 << 6)) * rxq->tunnel;

> >  	/* Fill flags for checksum and VLAN. */

> >  	pinfo = _mm_and_si128(pinfo, ptype_ol_mask);

> >  	pinfo = _mm_shuffle_epi8(cv_flag_sel, pinfo);

> > --

> > 2.13.3

> 

> Regards,

> 

> --

> Nélio Laranjeiro

> 6WIND
  
Nélio Laranjeiro April 12, 2018, 9:50 a.m. UTC | #3
On Wed, Apr 11, 2018 at 08:11:50AM +0000, Xueming(Steven) Li wrote:
> Hi Nelio,
> 
> > -----Original Message-----
> > From: Nélio Laranjeiro <nelio.laranjeiro@6wind.com>
> > Sent: Tuesday, April 10, 2018 11:17 PM
> > To: Xueming(Steven) Li <xuemingl@mellanox.com>
> > Cc: Shahaf Shuler <shahafs@mellanox.com>; dev@dpdk.org
> > Subject: Re: [PATCH v2 04/15] net/mlx5: support Rx tunnel type
> > identification
> > 
> > On Tue, Apr 10, 2018 at 09:34:04PM +0800, Xueming Li wrote:
> > > This patch introduced tunnel type identification based on flow rules.
> > > If flows of multiple tunnel types built on same queue,
> > > RTE_PTYPE_TUNNEL_MASK will be returned, bits in flow mark could be
> > > used as tunnel type identifier.
> > 
> > I don't see anywhere in this patch where the bits are reserved to identify
> > a flow, nor values which can help to identify it.
> > 
> > Is this missing?
> > 
> > Anyway we have already very few bits in the mark making it difficult to be
> > used by the user, reserving again some to may lead to remove the mark
> > support from the flows.
> 
> Not all users will use multiple tunnel types, this is not included in this patch
> set and left to user decision. I'll update comments to make this clear.

Thanks,

> > > Signed-off-by: Xueming Li <xuemingl@mellanox.com>
<snip/>
> > >  /**
> > > + * RXQ update after flow rule creation.
> > > + *
> > > + * @param dev
> > > + *   Pointer to Ethernet device.
> > > + * @param flow
> > > + *   Pointer to the flow rule.
> > > + */
> > > +static void
> > > +mlx5_flow_create_update_rxqs(struct rte_eth_dev *dev, struct rte_flow
> > > +*flow) {
> > > +	struct priv *priv = dev->data->dev_private;
> > > +	unsigned int i;
> > > +
> > > +	if (!dev->data->dev_started)
> > > +		return;
> > > +	for (i = 0; i != flow->rss_conf.queue_num; ++i) {
> > > +		struct mlx5_rxq_data *rxq_data = (*priv->rxqs)
> > > +						 [(*flow->queues)[i]];
> > > +		struct mlx5_rxq_ctrl *rxq_ctrl =
> > > +			container_of(rxq_data, struct mlx5_rxq_ctrl, rxq);
> > > +		uint8_t tunnel = PTYPE_IDX(flow->tunnel);
> > > +
> > > +		rxq_data->mark |= flow->mark;
> > > +		if (!tunnel)
> > > +			continue;
> > > +		rxq_ctrl->tunnel_types[tunnel] += 1;
> > 
> > I don't understand why you need such array, the NIC is unable to return
> > the tunnel type has it returns only one bit saying tunnel.
> > Why don't it store in the priv structure the current configured tunnel?
> 
> This array is used to count tunnel types bound to queue, if only one tunnel type,
> ptype will report that tunnel type, TUNNEL MASK(max value) will be returned if 
> multiple types bound to a queue.
> 
> Flow rss action specifies queues that binding to tunnel, thus we can't assume
> all queues have same tunnel types, so this is a per queue structure.

There is something I am missing here, how in the dataplane the PMD can
understand from 1 bit which kind of tunnel the packet is matching?

<snip/>
> > > @@ -2334,9 +2414,9 @@ mlx5_flow_stop(struct rte_eth_dev *dev, struct
> > > mlx5_flows *list)  {
> > >  	struct priv *priv = dev->data->dev_private;
> > >  	struct rte_flow *flow;
> > > +	unsigned int i;
> > >
> > >  	TAILQ_FOREACH_REVERSE(flow, list, mlx5_flows, next) {
> > > -		unsigned int i;
> > >  		struct mlx5_ind_table_ibv *ind_tbl = NULL;
> > >
> > >  		if (flow->drop) {
> > > @@ -2382,6 +2462,16 @@ mlx5_flow_stop(struct rte_eth_dev *dev, struct
> > mlx5_flows *list)
> > >  		DRV_LOG(DEBUG, "port %u flow %p removed", dev->data->port_id,
> > >  			(void *)flow);
> > >  	}
> > > +	/* Cleanup Rx queue tunnel info. */
> > > +	for (i = 0; i != priv->rxqs_n; ++i) {
> > > +		struct mlx5_rxq_data *q = (*priv->rxqs)[i];
> > > +		struct mlx5_rxq_ctrl *rxq_ctrl =
> > > +			container_of(q, struct mlx5_rxq_ctrl, rxq);
> > > +
> > > +		memset((void *)rxq_ctrl->tunnel_types, 0,
> > > +		       sizeof(rxq_ctrl->tunnel_types));
> > > +		q->tunnel = 0;
> > > +	}
> > >  }
> > 
> > This hunk does not handle the fact the Rx queue array may have some holes
> > i.e. the application is allowed to ask for 10 queues and only initialise
> > some.  In such situation this code will segfault.
> 
> In other words, "q" could be NULL, correct? I'll add check for this.

Correct.

> BTW, there should be an action item to add such check in rss/queue flow creation.

As it is the responsibility of the application/user to make rule according
to what it has configured, it has not been added.  It can still be
added, but it cannot be considered as a fix.

> > It should only memset the Rx queues making part of the flow not the others.
> 
> Clean this(decrease tunnel_types counter of each queue) from each flow would be time 
> consuming.

Considering flows are already relying on syscall to communicate with
the kernel, the extra cycles consumption to only clear the queues making
part of this flow is neglectable.  

By the way in the same function the mark is cleared only for the queues
making part of the flow, the same loop can be used to clear those tunnel
informations at the same time.

> If an error happened, counter will not be cleared and such state will
> impact tunnel type after port start again.

Unless an implementation error which other kind of them do you fear to
happen?

Thanks,
  
Xueming Li April 12, 2018, 2:27 p.m. UTC | #4
> -----Original Message-----

> From: Nélio Laranjeiro <nelio.laranjeiro@6wind.com>

> Sent: Thursday, April 12, 2018 5:51 PM

> To: Xueming(Steven) Li <xuemingl@mellanox.com>

> Cc: Shahaf Shuler <shahafs@mellanox.com>; dev@dpdk.org

> Subject: Re: [PATCH v2 04/15] net/mlx5: support Rx tunnel type

> identification

> 

> On Wed, Apr 11, 2018 at 08:11:50AM +0000, Xueming(Steven) Li wrote:

> > Hi Nelio,

> >

> > > -----Original Message-----

> > > From: Nélio Laranjeiro <nelio.laranjeiro@6wind.com>

> > > Sent: Tuesday, April 10, 2018 11:17 PM

> > > To: Xueming(Steven) Li <xuemingl@mellanox.com>

> > > Cc: Shahaf Shuler <shahafs@mellanox.com>; dev@dpdk.org

> > > Subject: Re: [PATCH v2 04/15] net/mlx5: support Rx tunnel type

> > > identification

> > >

> > > On Tue, Apr 10, 2018 at 09:34:04PM +0800, Xueming Li wrote:

> > > > This patch introduced tunnel type identification based on flow rules.

> > > > If flows of multiple tunnel types built on same queue,

> > > > RTE_PTYPE_TUNNEL_MASK will be returned, bits in flow mark could be

> > > > used as tunnel type identifier.

> > >

> > > I don't see anywhere in this patch where the bits are reserved to

> > > identify a flow, nor values which can help to identify it.

> > >

> > > Is this missing?

> > >

> > > Anyway we have already very few bits in the mark making it difficult

> > > to be used by the user, reserving again some to may lead to remove

> > > the mark support from the flows.

> >

> > Not all users will use multiple tunnel types, this is not included in

> > this patch set and left to user decision. I'll update comments to make

> this clear.

> 

> Thanks,

> 

> > > > Signed-off-by: Xueming Li <xuemingl@mellanox.com>

> <snip/>

> > > >  /**

> > > > + * RXQ update after flow rule creation.

> > > > + *

> > > > + * @param dev

> > > > + *   Pointer to Ethernet device.

> > > > + * @param flow

> > > > + *   Pointer to the flow rule.

> > > > + */

> > > > +static void

> > > > +mlx5_flow_create_update_rxqs(struct rte_eth_dev *dev, struct

> > > > +rte_flow

> > > > +*flow) {

> > > > +	struct priv *priv = dev->data->dev_private;

> > > > +	unsigned int i;

> > > > +

> > > > +	if (!dev->data->dev_started)

> > > > +		return;

> > > > +	for (i = 0; i != flow->rss_conf.queue_num; ++i) {

> > > > +		struct mlx5_rxq_data *rxq_data = (*priv->rxqs)

> > > > +						 [(*flow->queues)[i]];

> > > > +		struct mlx5_rxq_ctrl *rxq_ctrl =

> > > > +			container_of(rxq_data, struct mlx5_rxq_ctrl, rxq);

> > > > +		uint8_t tunnel = PTYPE_IDX(flow->tunnel);

> > > > +

> > > > +		rxq_data->mark |= flow->mark;

> > > > +		if (!tunnel)

> > > > +			continue;

> > > > +		rxq_ctrl->tunnel_types[tunnel] += 1;

> > >

> > > I don't understand why you need such array, the NIC is unable to

> > > return the tunnel type has it returns only one bit saying tunnel.

> > > Why don't it store in the priv structure the current configured tunnel?

> >

> > This array is used to count tunnel types bound to queue, if only one

> > tunnel type, ptype will report that tunnel type, TUNNEL MASK(max

> > value) will be returned if multiple types bound to a queue.

> >

> > Flow rss action specifies queues that binding to tunnel, thus we can't

> > assume all queues have same tunnel types, so this is a per queue

> structure.

> 

> There is something I am missing here, how in the dataplane the PMD can

> understand from 1 bit which kind of tunnel the packet is matching?


The code under this line is answer, let me post here: 
		if (rxq_data->tunnel != flow->tunnel)
			rxq_data->tunnel = rxq_data->tunnel ?
					   RTE_PTYPE_TUNNEL_MASK :
					   flow->tunnel;
If no tunnel type associated to rxq, use tunnel type from flow.
If a new tunnel type from flow, use RTE_PTYPE_TUNNEL_MASK.

> 

> <snip/>

> > > > @@ -2334,9 +2414,9 @@ mlx5_flow_stop(struct rte_eth_dev *dev,

> > > > struct mlx5_flows *list)  {

> > > >  	struct priv *priv = dev->data->dev_private;

> > > >  	struct rte_flow *flow;

> > > > +	unsigned int i;

> > > >

> > > >  	TAILQ_FOREACH_REVERSE(flow, list, mlx5_flows, next) {

> > > > -		unsigned int i;

> > > >  		struct mlx5_ind_table_ibv *ind_tbl = NULL;

> > > >

> > > >  		if (flow->drop) {

> > > > @@ -2382,6 +2462,16 @@ mlx5_flow_stop(struct rte_eth_dev *dev,

> > > > struct

> > > mlx5_flows *list)

> > > >  		DRV_LOG(DEBUG, "port %u flow %p removed", dev->data-

> >port_id,

> > > >  			(void *)flow);

> > > >  	}

> > > > +	/* Cleanup Rx queue tunnel info. */

> > > > +	for (i = 0; i != priv->rxqs_n; ++i) {

> > > > +		struct mlx5_rxq_data *q = (*priv->rxqs)[i];

> > > > +		struct mlx5_rxq_ctrl *rxq_ctrl =

> > > > +			container_of(q, struct mlx5_rxq_ctrl, rxq);

> > > > +

> > > > +		memset((void *)rxq_ctrl->tunnel_types, 0,

> > > > +		       sizeof(rxq_ctrl->tunnel_types));

> > > > +		q->tunnel = 0;

> > > > +	}

> > > >  }

> > >

> > > This hunk does not handle the fact the Rx queue array may have some

> > > holes i.e. the application is allowed to ask for 10 queues and only

> > > initialise some.  In such situation this code will segfault.

> >

> > In other words, "q" could be NULL, correct? I'll add check for this.

> 

> Correct.

> 

> > BTW, there should be an action item to add such check in rss/queue flow

> creation.

> 

> As it is the responsibility of the application/user to make rule according

> to what it has configured, it has not been added.  It can still be added,

> but it cannot be considered as a fix.

> 

> > > It should only memset the Rx queues making part of the flow not the

> others.

> >

> > Clean this(decrease tunnel_types counter of each queue) from each flow

> > would be time consuming.

> 

> Considering flows are already relying on syscall to communicate with the

> kernel, the extra cycles consumption to only clear the queues making part

> of this flow is neglectable.

> 

> By the way in the same function the mark is cleared only for the queues

> making part of the flow, the same loop can be used to clear those tunnel

> informations at the same time.

> 

> > If an error happened, counter will not be cleared and such state will

> > impact tunnel type after port start again.

> 

> Unless an implementation error which other kind of them do you fear to

> happen?


Mark of rxq simply reset to 0, this field is counter, the final target is to 
clear field value, so my code should be straight forward and error free 😊

From a quick look, this function could be much simple that what it is today:
1. clean verb flow and hrex where possible, despite of flow type.
2. clean rxq state: mark and tunnel_types.

> 

> Thanks,

> 

> --

> Nélio Laranjeiro

> 6WIND
  
Nélio Laranjeiro April 13, 2018, 8:37 a.m. UTC | #5
On Thu, Apr 12, 2018 at 02:27:45PM +0000, Xueming(Steven) Li wrote:
> > -----Original Message-----
> > From: Nélio Laranjeiro <nelio.laranjeiro@6wind.com>
> > Sent: Thursday, April 12, 2018 5:51 PM
> > To: Xueming(Steven) Li <xuemingl@mellanox.com>
> > Cc: Shahaf Shuler <shahafs@mellanox.com>; dev@dpdk.org
> > Subject: Re: [PATCH v2 04/15] net/mlx5: support Rx tunnel type
> > identification
> > 
> > On Wed, Apr 11, 2018 at 08:11:50AM +0000, Xueming(Steven) Li wrote:
> > > Hi Nelio,
> > >
> > > > -----Original Message-----
> > > > From: Nélio Laranjeiro <nelio.laranjeiro@6wind.com>
> > > > Sent: Tuesday, April 10, 2018 11:17 PM
> > > > To: Xueming(Steven) Li <xuemingl@mellanox.com>
> > > > Cc: Shahaf Shuler <shahafs@mellanox.com>; dev@dpdk.org
> > > > Subject: Re: [PATCH v2 04/15] net/mlx5: support Rx tunnel type
> > > > identification
> > > >
> > > > On Tue, Apr 10, 2018 at 09:34:04PM +0800, Xueming Li wrote:
> > > > > This patch introduced tunnel type identification based on flow rules.
> > > > > If flows of multiple tunnel types built on same queue,
> > > > > RTE_PTYPE_TUNNEL_MASK will be returned, bits in flow mark could be
> > > > > used as tunnel type identifier.
> > > >
> > > > I don't see anywhere in this patch where the bits are reserved to
> > > > identify a flow, nor values which can help to identify it.
> > > >
> > > > Is this missing?
> > > >
> > > > Anyway we have already very few bits in the mark making it difficult
> > > > to be used by the user, reserving again some to may lead to remove
> > > > the mark support from the flows.
> > >
> > > Not all users will use multiple tunnel types, this is not included in
> > > this patch set and left to user decision. I'll update comments to make
> > this clear.
> > 
> > Thanks,
> > 
> > > > > Signed-off-by: Xueming Li <xuemingl@mellanox.com>
> > <snip/>
> > > > >  /**
> > > > > + * RXQ update after flow rule creation.
> > > > > + *
> > > > > + * @param dev
> > > > > + *   Pointer to Ethernet device.
> > > > > + * @param flow
> > > > > + *   Pointer to the flow rule.
> > > > > + */
> > > > > +static void
> > > > > +mlx5_flow_create_update_rxqs(struct rte_eth_dev *dev, struct
> > > > > +rte_flow
> > > > > +*flow) {
> > > > > +	struct priv *priv = dev->data->dev_private;
> > > > > +	unsigned int i;
> > > > > +
> > > > > +	if (!dev->data->dev_started)
> > > > > +		return;
> > > > > +	for (i = 0; i != flow->rss_conf.queue_num; ++i) {
> > > > > +		struct mlx5_rxq_data *rxq_data = (*priv->rxqs)
> > > > > +						 [(*flow->queues)[i]];
> > > > > +		struct mlx5_rxq_ctrl *rxq_ctrl =
> > > > > +			container_of(rxq_data, struct mlx5_rxq_ctrl, rxq);
> > > > > +		uint8_t tunnel = PTYPE_IDX(flow->tunnel);
> > > > > +
> > > > > +		rxq_data->mark |= flow->mark;
> > > > > +		if (!tunnel)
> > > > > +			continue;
> > > > > +		rxq_ctrl->tunnel_types[tunnel] += 1;
> > > >
> > > > I don't understand why you need such array, the NIC is unable to
> > > > return the tunnel type has it returns only one bit saying tunnel.
> > > > Why don't it store in the priv structure the current configured tunnel?
> > >
> > > This array is used to count tunnel types bound to queue, if only one
> > > tunnel type, ptype will report that tunnel type, TUNNEL MASK(max
> > > value) will be returned if multiple types bound to a queue.
> > >
> > > Flow rss action specifies queues that binding to tunnel, thus we can't
> > > assume all queues have same tunnel types, so this is a per queue
> > structure.
> > 
> > There is something I am missing here, how in the dataplane the PMD can
> > understand from 1 bit which kind of tunnel the packet is matching?
> 
> The code under this line is answer, let me post here: 
> 		if (rxq_data->tunnel != flow->tunnel)
> 			rxq_data->tunnel = rxq_data->tunnel ?
> 					   RTE_PTYPE_TUNNEL_MASK :
> 					   flow->tunnel;
> If no tunnel type associated to rxq, use tunnel type from flow.
> If a new tunnel type from flow, use RTE_PTYPE_TUNNEL_MASK.

From my understanding, when in the same queue there are several tunnel
offloads, the mbuf ptype will contains RTE_PTYPE_TUNNEL_MASK:

   @@ -1601,7 +1605,7 @@ rxq_cq_to_pkt_type(volatile struct mlx5_cqe *cqe)
           * bit[7] = outer_l3_type
           */
          idx = ((pinfo & 0x3) << 6) | ((ptype & 0xfc00) >> 10);
  -       return mlx5_ptype_table[idx];
  +       return mlx5_ptype_table[idx] | rxq->tunnel * !!(idx & (1 << 6));
   }


Used by Rx burst functions,

/* Update packet information. */
 pkt->packet_type = rxq_cq_to_pkt_type(cqe);

Is this correct?

There is another strange point here, 

 +       [PTYPE_IDX(RTE_PTYPE_TUNNEL_VXLAN)] = RTE_PTYPE_TUNNEL_VXLAN |
 +                                             RTE_PTYPE_L4_UDP,

According to the RFC 7348 [1] having a VXLAN with an outer IPv6 is
possible.  How do you handle it?

> > <snip/>
> > > > > @@ -2334,9 +2414,9 @@ mlx5_flow_stop(struct rte_eth_dev *dev,
> > > > > struct mlx5_flows *list)  {
> > > > >  	struct priv *priv = dev->data->dev_private;
> > > > >  	struct rte_flow *flow;
> > > > > +	unsigned int i;
> > > > >
> > > > >  	TAILQ_FOREACH_REVERSE(flow, list, mlx5_flows, next) {
> > > > > -		unsigned int i;
> > > > >  		struct mlx5_ind_table_ibv *ind_tbl = NULL;
> > > > >
> > > > >  		if (flow->drop) {
> > > > > @@ -2382,6 +2462,16 @@ mlx5_flow_stop(struct rte_eth_dev *dev,
> > > > > struct
> > > > mlx5_flows *list)
> > > > >  		DRV_LOG(DEBUG, "port %u flow %p removed", dev->data-
> > >port_id,
> > > > >  			(void *)flow);
> > > > >  	}
> > > > > +	/* Cleanup Rx queue tunnel info. */
> > > > > +	for (i = 0; i != priv->rxqs_n; ++i) {
> > > > > +		struct mlx5_rxq_data *q = (*priv->rxqs)[i];
> > > > > +		struct mlx5_rxq_ctrl *rxq_ctrl =
> > > > > +			container_of(q, struct mlx5_rxq_ctrl, rxq);
> > > > > +
> > > > > +		memset((void *)rxq_ctrl->tunnel_types, 0,
> > > > > +		       sizeof(rxq_ctrl->tunnel_types));
> > > > > +		q->tunnel = 0;
> > > > > +	}
> > > > >  }
> > > >
> > > > This hunk does not handle the fact the Rx queue array may have some
> > > > holes i.e. the application is allowed to ask for 10 queues and only
> > > > initialise some.  In such situation this code will segfault.
> > >
> > > In other words, "q" could be NULL, correct? I'll add check for this.
> > 
> > Correct.
> > 
> > > BTW, there should be an action item to add such check in rss/queue flow
> > creation.
> > 
> > As it is the responsibility of the application/user to make rule according
> > to what it has configured, it has not been added.  It can still be added,
> > but it cannot be considered as a fix.
> > 
> > > > It should only memset the Rx queues making part of the flow not the
> > others.
> > >
> > > Clean this(decrease tunnel_types counter of each queue) from each flow
> > > would be time consuming.
> > 
> > Considering flows are already relying on syscall to communicate with the
> > kernel, the extra cycles consumption to only clear the queues making part
> > of this flow is neglectable.
> > 
> > By the way in the same function the mark is cleared only for the queues
> > making part of the flow, the same loop can be used to clear those tunnel
> > informations at the same time.
> > 
> > > If an error happened, counter will not be cleared and such state will
> > > impact tunnel type after port start again.
> > 
> > Unless an implementation error which other kind of them do you fear to
> > happen?
> 
> Mark of rxq simply reset to 0, this field is counter, the final target is to 
> clear field value, so my code should be straight forward and error free 😊
> 
> From a quick look, this function could be much simple that what it is today:
> 1. clean verb flow and hrex where possible, despite of flow type.
> 2. clean rxq state: mark and tunnel_types.

Ok.

Thanks,

[1] https://dpdk.org/patch/37965
  
Xueming Li April 13, 2018, 12:09 p.m. UTC | #6
> -----Original Message-----

> From: Nélio Laranjeiro <nelio.laranjeiro@6wind.com>

> Sent: Friday, April 13, 2018 4:38 PM

> To: Xueming(Steven) Li <xuemingl@mellanox.com>

> Cc: Shahaf Shuler <shahafs@mellanox.com>; dev@dpdk.org

> Subject: Re: [PATCH v2 04/15] net/mlx5: support Rx tunnel type

> identification

> 

> On Thu, Apr 12, 2018 at 02:27:45PM +0000, Xueming(Steven) Li wrote:

> > > -----Original Message-----

> > > From: Nélio Laranjeiro <nelio.laranjeiro@6wind.com>

> > > Sent: Thursday, April 12, 2018 5:51 PM

> > > To: Xueming(Steven) Li <xuemingl@mellanox.com>

> > > Cc: Shahaf Shuler <shahafs@mellanox.com>; dev@dpdk.org

> > > Subject: Re: [PATCH v2 04/15] net/mlx5: support Rx tunnel type

> > > identification

> > >

> > > On Wed, Apr 11, 2018 at 08:11:50AM +0000, Xueming(Steven) Li wrote:

> > > > Hi Nelio,

> > > >

> > > > > -----Original Message-----

> > > > > From: Nélio Laranjeiro <nelio.laranjeiro@6wind.com>

> > > > > Sent: Tuesday, April 10, 2018 11:17 PM

> > > > > To: Xueming(Steven) Li <xuemingl@mellanox.com>

> > > > > Cc: Shahaf Shuler <shahafs@mellanox.com>; dev@dpdk.org

> > > > > Subject: Re: [PATCH v2 04/15] net/mlx5: support Rx tunnel type

> > > > > identification

> > > > >

> > > > > On Tue, Apr 10, 2018 at 09:34:04PM +0800, Xueming Li wrote:

> > > > > > This patch introduced tunnel type identification based on flow

> rules.

> > > > > > If flows of multiple tunnel types built on same queue,

> > > > > > RTE_PTYPE_TUNNEL_MASK will be returned, bits in flow mark

> > > > > > could be used as tunnel type identifier.

> > > > >

> > > > > I don't see anywhere in this patch where the bits are reserved

> > > > > to identify a flow, nor values which can help to identify it.

> > > > >

> > > > > Is this missing?

> > > > >

> > > > > Anyway we have already very few bits in the mark making it

> > > > > difficult to be used by the user, reserving again some to may

> > > > > lead to remove the mark support from the flows.

> > > >

> > > > Not all users will use multiple tunnel types, this is not included

> > > > in this patch set and left to user decision. I'll update comments

> > > > to make

> > > this clear.

> > >

> > > Thanks,

> > >

> > > > > > Signed-off-by: Xueming Li <xuemingl@mellanox.com>

> > > <snip/>

> > > > > >  /**

> > > > > > + * RXQ update after flow rule creation.

> > > > > > + *

> > > > > > + * @param dev

> > > > > > + *   Pointer to Ethernet device.

> > > > > > + * @param flow

> > > > > > + *   Pointer to the flow rule.

> > > > > > + */

> > > > > > +static void

> > > > > > +mlx5_flow_create_update_rxqs(struct rte_eth_dev *dev, struct

> > > > > > +rte_flow

> > > > > > +*flow) {

> > > > > > +	struct priv *priv = dev->data->dev_private;

> > > > > > +	unsigned int i;

> > > > > > +

> > > > > > +	if (!dev->data->dev_started)

> > > > > > +		return;

> > > > > > +	for (i = 0; i != flow->rss_conf.queue_num; ++i) {

> > > > > > +		struct mlx5_rxq_data *rxq_data = (*priv->rxqs)

> > > > > > +						 [(*flow->queues)[i]];

> > > > > > +		struct mlx5_rxq_ctrl *rxq_ctrl =

> > > > > > +			container_of(rxq_data, struct mlx5_rxq_ctrl, rxq);

> > > > > > +		uint8_t tunnel = PTYPE_IDX(flow->tunnel);

> > > > > > +

> > > > > > +		rxq_data->mark |= flow->mark;

> > > > > > +		if (!tunnel)

> > > > > > +			continue;

> > > > > > +		rxq_ctrl->tunnel_types[tunnel] += 1;

> > > > >

> > > > > I don't understand why you need such array, the NIC is unable to

> > > > > return the tunnel type has it returns only one bit saying tunnel.

> > > > > Why don't it store in the priv structure the current configured

> tunnel?

> > > >

> > > > This array is used to count tunnel types bound to queue, if only

> > > > one tunnel type, ptype will report that tunnel type, TUNNEL

> > > > MASK(max

> > > > value) will be returned if multiple types bound to a queue.

> > > >

> > > > Flow rss action specifies queues that binding to tunnel, thus we

> > > > can't assume all queues have same tunnel types, so this is a per

> > > > queue

> > > structure.

> > >

> > > There is something I am missing here, how in the dataplane the PMD

> > > can understand from 1 bit which kind of tunnel the packet is matching?

> >

> > The code under this line is answer, let me post here:

> > 		if (rxq_data->tunnel != flow->tunnel)

> > 			rxq_data->tunnel = rxq_data->tunnel ?

> > 					   RTE_PTYPE_TUNNEL_MASK :

> > 					   flow->tunnel;

> > If no tunnel type associated to rxq, use tunnel type from flow.

> > If a new tunnel type from flow, use RTE_PTYPE_TUNNEL_MASK.

> 

> From my understanding, when in the same queue there are several tunnel

> offloads, the mbuf ptype will contains RTE_PTYPE_TUNNEL_MASK:

> 

>    @@ -1601,7 +1605,7 @@ rxq_cq_to_pkt_type(volatile struct mlx5_cqe *cqe)

>            * bit[7] = outer_l3_type

>            */

>           idx = ((pinfo & 0x3) << 6) | ((ptype & 0xfc00) >> 10);

>   -       return mlx5_ptype_table[idx];

>   +       return mlx5_ptype_table[idx] | rxq->tunnel * !!(idx & (1 << 6));

>    }

> 

> 

> Used by Rx burst functions,

> 

> /* Update packet information. */

>  pkt->packet_type = rxq_cq_to_pkt_type(cqe);

> 

> Is this correct?


You got the point.

> 

> There is another strange point here,

> 

>  +       [PTYPE_IDX(RTE_PTYPE_TUNNEL_VXLAN)] = RTE_PTYPE_TUNNEL_VXLAN |

>  +                                             RTE_PTYPE_L4_UDP,

> 

> According to the RFC 7348 [1] having a VXLAN with an outer IPv6 is

> possible.  How do you handle it?


The answer was hide in the code you pasted:

    @@ -1601,7 +1605,7 @@ rxq_cq_to_pkt_type(volatile struct mlx5_cqe *cqe)
            * bit[7] = outer_l3_type
            */
           idx = ((pinfo & 0x3) << 6) | ((ptype & 0xfc00) >> 10);
   -       return mlx5_ptype_table[idx];
   +       return mlx5_ptype_table[idx] | rxq->tunnel * !!(idx & (1 << 6));

In comment, Bit 7 is outer L3 type from CQE, PTYPE will be retrieved from 
mlx5_ptype_table lookup.

> 

> > > <snip/>

> > > > > > @@ -2334,9 +2414,9 @@ mlx5_flow_stop(struct rte_eth_dev *dev,

> > > > > > struct mlx5_flows *list)  {

> > > > > >  	struct priv *priv = dev->data->dev_private;

> > > > > >  	struct rte_flow *flow;

> > > > > > +	unsigned int i;

> > > > > >

> > > > > >  	TAILQ_FOREACH_REVERSE(flow, list, mlx5_flows, next) {

> > > > > > -		unsigned int i;

> > > > > >  		struct mlx5_ind_table_ibv *ind_tbl = NULL;

> > > > > >

> > > > > >  		if (flow->drop) {

> > > > > > @@ -2382,6 +2462,16 @@ mlx5_flow_stop(struct rte_eth_dev *dev,

> > > > > > struct

> > > > > mlx5_flows *list)

> > > > > >  		DRV_LOG(DEBUG, "port %u flow %p removed", dev->data-

> > > >port_id,

> > > > > >  			(void *)flow);

> > > > > >  	}

> > > > > > +	/* Cleanup Rx queue tunnel info. */

> > > > > > +	for (i = 0; i != priv->rxqs_n; ++i) {

> > > > > > +		struct mlx5_rxq_data *q = (*priv->rxqs)[i];

> > > > > > +		struct mlx5_rxq_ctrl *rxq_ctrl =

> > > > > > +			container_of(q, struct mlx5_rxq_ctrl, rxq);

> > > > > > +

> > > > > > +		memset((void *)rxq_ctrl->tunnel_types, 0,

> > > > > > +		       sizeof(rxq_ctrl->tunnel_types));

> > > > > > +		q->tunnel = 0;

> > > > > > +	}

> > > > > >  }

> > > > >

> > > > > This hunk does not handle the fact the Rx queue array may have

> > > > > some holes i.e. the application is allowed to ask for 10 queues

> > > > > and only initialise some.  In such situation this code will

> segfault.

> > > >

> > > > In other words, "q" could be NULL, correct? I'll add check for this.

> > >

> > > Correct.

> > >

> > > > BTW, there should be an action item to add such check in rss/queue

> > > > flow

> > > creation.

> > >

> > > As it is the responsibility of the application/user to make rule

> > > according to what it has configured, it has not been added.  It can

> > > still be added, but it cannot be considered as a fix.

> > >

> > > > > It should only memset the Rx queues making part of the flow not

> > > > > the

> > > others.

> > > >

> > > > Clean this(decrease tunnel_types counter of each queue) from each

> > > > flow would be time consuming.

> > >

> > > Considering flows are already relying on syscall to communicate with

> > > the kernel, the extra cycles consumption to only clear the queues

> > > making part of this flow is neglectable.

> > >

> > > By the way in the same function the mark is cleared only for the

> > > queues making part of the flow, the same loop can be used to clear

> > > those tunnel informations at the same time.

> > >

> > > > If an error happened, counter will not be cleared and such state

> > > > will impact tunnel type after port start again.

> > >

> > > Unless an implementation error which other kind of them do you fear

> > > to happen?

> >

> > Mark of rxq simply reset to 0, this field is counter, the final target

> > is to clear field value, so my code should be straight forward and

> > error free 😊

> >

> > From a quick look, this function could be much simple that what it is

> today:

> > 1. clean verb flow and hrex where possible, despite of flow type.

> > 2. clean rxq state: mark and tunnel_types.

> 

> Ok.

> 

> Thanks,

> 

> [1]

> https://emea01.safelinks.protection.outlook.com/?url=https%3A%2F%2Fdpdk.or

> g%2Fpatch%2F37965&data=02%7C01%7Cxuemingl%40mellanox.com%7Cc3de5d7cfc85463

> 41a6808d5a119c916%7Ca652971c7d2e4d9ba6a4d149256f461b%7C0%7C0%7C63659205449

> 1963568&sdata=Eoq30ySZ8gRhemDG6BDawVqFvWB1gI85GpcYIMwl32Q%3D&reserved=0

> 

> --

> Nélio Laranjeiro

> 6WIND
  

Patch

diff --git a/drivers/net/mlx5/mlx5_flow.c b/drivers/net/mlx5/mlx5_flow.c
index 870d05250..65d7a9b62 100644
--- a/drivers/net/mlx5/mlx5_flow.c
+++ b/drivers/net/mlx5/mlx5_flow.c
@@ -222,6 +222,7 @@  struct rte_flow {
 	struct rte_flow_action_rss rss_conf; /**< RSS configuration */
 	uint16_t (*queues)[]; /**< Queues indexes to use. */
 	uint8_t rss_key[40]; /**< copy of the RSS key. */
+	uint32_t tunnel; /**< Tunnel type of RTE_PTYPE_TUNNEL_XXX. */
 	struct ibv_counter_set *cs; /**< Holds the counters for the rule. */
 	struct mlx5_flow_counter_stats counter_stats;/**<The counter stats. */
 	struct mlx5_flow frxq[RTE_DIM(hash_rxq_init)];
@@ -238,6 +239,19 @@  struct rte_flow {
 	(type) == RTE_FLOW_ITEM_TYPE_VXLAN || \
 	(type) == RTE_FLOW_ITEM_TYPE_GRE)
 
+const uint32_t flow_ptype[] = {
+	[RTE_FLOW_ITEM_TYPE_VXLAN] = RTE_PTYPE_TUNNEL_VXLAN,
+	[RTE_FLOW_ITEM_TYPE_GRE] = RTE_PTYPE_TUNNEL_GRE,
+};
+
+#define PTYPE_IDX(t) ((RTE_PTYPE_TUNNEL_MASK & (t)) >> 12)
+
+const uint32_t ptype_ext[] = {
+	[PTYPE_IDX(RTE_PTYPE_TUNNEL_VXLAN)] = RTE_PTYPE_TUNNEL_VXLAN |
+					      RTE_PTYPE_L4_UDP,
+	[PTYPE_IDX(RTE_PTYPE_TUNNEL_GRE)] = RTE_PTYPE_TUNNEL_GRE,
+};
+
 /** Structure to generate a simple graph of layers supported by the NIC. */
 struct mlx5_flow_items {
 	/** List of possible actions for these items. */
@@ -437,6 +451,7 @@  struct mlx5_flow_parse {
 	uint16_t queues[RTE_MAX_QUEUES_PER_PORT]; /**< Queues indexes to use. */
 	uint8_t rss_key[40]; /**< copy of the RSS key. */
 	enum hash_rxq_type layer; /**< Last pattern layer detected. */
+	uint32_t tunnel; /**< Tunnel type of RTE_PTYPE_TUNNEL_XXX. */
 	struct ibv_counter_set *cs; /**< Holds the counter set for the rule */
 	struct {
 		struct ibv_flow_attr *ibv_attr;
@@ -860,7 +875,7 @@  mlx5_flow_convert_items_validate(const struct rte_flow_item items[],
 		if (ret)
 			goto exit_item_not_supported;
 		if (IS_TUNNEL(items->type)) {
-			if (parser->inner) {
+			if (parser->tunnel) {
 				rte_flow_error_set(error, ENOTSUP,
 						   RTE_FLOW_ERROR_TYPE_ITEM,
 						   items,
@@ -869,6 +884,7 @@  mlx5_flow_convert_items_validate(const struct rte_flow_item items[],
 				return -rte_errno;
 			}
 			parser->inner = IBV_FLOW_SPEC_INNER;
+			parser->tunnel = flow_ptype[items->type];
 		}
 		if (parser->drop) {
 			parser->queue[HASH_RXQ_ETH].offset += cur_item->dst_sz;
@@ -1165,6 +1181,7 @@  mlx5_flow_convert(struct rte_eth_dev *dev,
 	}
 	/* Third step. Conversion parse, fill the specifications. */
 	parser->inner = 0;
+	parser->tunnel = 0;
 	for (; items->type != RTE_FLOW_ITEM_TYPE_END; ++items) {
 		struct mlx5_flow_data data = {
 			.parser = parser,
@@ -1633,6 +1650,7 @@  mlx5_flow_create_vxlan(const struct rte_flow_item *item,
 
 	id.vni[0] = 0;
 	parser->inner = IBV_FLOW_SPEC_INNER;
+	parser->tunnel = ptype_ext[PTYPE_IDX(RTE_PTYPE_TUNNEL_VXLAN)];
 	if (spec) {
 		if (!mask)
 			mask = default_mask;
@@ -1686,6 +1704,7 @@  mlx5_flow_create_gre(const struct rte_flow_item *item __rte_unused,
 	};
 
 	parser->inner = IBV_FLOW_SPEC_INNER;
+	parser->tunnel = ptype_ext[PTYPE_IDX(RTE_PTYPE_TUNNEL_GRE)];
 	mlx5_flow_create_copy(parser, &tunnel, size);
 	return 0;
 }
@@ -1864,7 +1883,8 @@  mlx5_flow_create_action_queue_rss(struct rte_eth_dev *dev,
 				      parser->rss_conf.key_len,
 				      hash_fields,
 				      parser->rss_conf.queue,
-				      parser->rss_conf.queue_num);
+				      parser->rss_conf.queue_num,
+				      parser->tunnel);
 		if (flow->frxq[i].hrxq)
 			continue;
 		flow->frxq[i].hrxq =
@@ -1873,7 +1893,8 @@  mlx5_flow_create_action_queue_rss(struct rte_eth_dev *dev,
 				      parser->rss_conf.key_len,
 				      hash_fields,
 				      parser->rss_conf.queue,
-				      parser->rss_conf.queue_num);
+				      parser->rss_conf.queue_num,
+				      parser->tunnel);
 		if (!flow->frxq[i].hrxq) {
 			return rte_flow_error_set(error, ENOMEM,
 						  RTE_FLOW_ERROR_TYPE_HANDLE,
@@ -1885,6 +1906,40 @@  mlx5_flow_create_action_queue_rss(struct rte_eth_dev *dev,
 }
 
 /**
+ * RXQ update after flow rule creation.
+ *
+ * @param dev
+ *   Pointer to Ethernet device.
+ * @param flow
+ *   Pointer to the flow rule.
+ */
+static void
+mlx5_flow_create_update_rxqs(struct rte_eth_dev *dev, struct rte_flow *flow)
+{
+	struct priv *priv = dev->data->dev_private;
+	unsigned int i;
+
+	if (!dev->data->dev_started)
+		return;
+	for (i = 0; i != flow->rss_conf.queue_num; ++i) {
+		struct mlx5_rxq_data *rxq_data = (*priv->rxqs)
+						 [(*flow->queues)[i]];
+		struct mlx5_rxq_ctrl *rxq_ctrl =
+			container_of(rxq_data, struct mlx5_rxq_ctrl, rxq);
+		uint8_t tunnel = PTYPE_IDX(flow->tunnel);
+
+		rxq_data->mark |= flow->mark;
+		if (!tunnel)
+			continue;
+		rxq_ctrl->tunnel_types[tunnel] += 1;
+		if (rxq_data->tunnel != flow->tunnel)
+			rxq_data->tunnel = rxq_data->tunnel ?
+					   RTE_PTYPE_TUNNEL_MASK :
+					   flow->tunnel;
+	}
+}
+
+/**
  * Complete flow rule creation.
  *
  * @param dev
@@ -1944,12 +1999,7 @@  mlx5_flow_create_action_queue(struct rte_eth_dev *dev,
 				   NULL, "internal error in flow creation");
 		goto error;
 	}
-	for (i = 0; i != parser->rss_conf.queue_num; ++i) {
-		struct mlx5_rxq_data *q =
-			(*priv->rxqs)[parser->rss_conf.queue[i]];
-
-		q->mark |= parser->mark;
-	}
+	mlx5_flow_create_update_rxqs(dev, flow);
 	return 0;
 error:
 	ret = rte_errno; /* Save rte_errno before cleanup. */
@@ -2022,6 +2072,7 @@  mlx5_flow_list_create(struct rte_eth_dev *dev,
 	}
 	/* Copy configuration. */
 	flow->queues = (uint16_t (*)[])(flow + 1);
+	flow->tunnel = parser.tunnel;
 	flow->rss_conf = (struct rte_flow_action_rss){
 		.func = RTE_ETH_HASH_FUNCTION_DEFAULT,
 		.level = 0,
@@ -2113,9 +2164,38 @@  mlx5_flow_list_destroy(struct rte_eth_dev *dev, struct mlx5_flows *list,
 	struct priv *priv = dev->data->dev_private;
 	unsigned int i;
 
-	if (flow->drop || !flow->mark)
+	if (flow->drop || !dev->data->dev_started)
 		goto free;
-	for (i = 0; i != flow->rss_conf.queue_num; ++i) {
+	for (i = 0; flow->tunnel && i != flow->rss_conf.queue_num; ++i) {
+		/* Update queue tunnel type. */
+		struct mlx5_rxq_data *rxq_data = (*priv->rxqs)
+						 [(*flow->queues)[i]];
+		struct mlx5_rxq_ctrl *rxq_ctrl =
+			container_of(rxq_data, struct mlx5_rxq_ctrl, rxq);
+		uint8_t tunnel = PTYPE_IDX(flow->tunnel);
+
+		RTE_ASSERT(rxq_ctrl->tunnel_types[tunnel] > 0);
+		rxq_ctrl->tunnel_types[tunnel] -= 1;
+		if (!rxq_ctrl->tunnel_types[tunnel]) {
+			/* Update tunnel type. */
+			uint8_t j;
+			uint8_t types = 0;
+			uint8_t last;
+
+			for (j = 0; j < RTE_DIM(rxq_ctrl->tunnel_types); j++)
+				if (rxq_ctrl->tunnel_types[j]) {
+					types += 1;
+					last = j;
+				}
+			/* Keep same if more than one tunnel types left. */
+			if (types == 1)
+				rxq_data->tunnel = ptype_ext[last];
+			else if (types == 0)
+				/* No tunnel type left. */
+				rxq_data->tunnel = 0;
+		}
+	}
+	for (i = 0; flow->mark && i != flow->rss_conf.queue_num; ++i) {
 		struct rte_flow *tmp;
 		int mark = 0;
 
@@ -2334,9 +2414,9 @@  mlx5_flow_stop(struct rte_eth_dev *dev, struct mlx5_flows *list)
 {
 	struct priv *priv = dev->data->dev_private;
 	struct rte_flow *flow;
+	unsigned int i;
 
 	TAILQ_FOREACH_REVERSE(flow, list, mlx5_flows, next) {
-		unsigned int i;
 		struct mlx5_ind_table_ibv *ind_tbl = NULL;
 
 		if (flow->drop) {
@@ -2382,6 +2462,16 @@  mlx5_flow_stop(struct rte_eth_dev *dev, struct mlx5_flows *list)
 		DRV_LOG(DEBUG, "port %u flow %p removed", dev->data->port_id,
 			(void *)flow);
 	}
+	/* Cleanup Rx queue tunnel info. */
+	for (i = 0; i != priv->rxqs_n; ++i) {
+		struct mlx5_rxq_data *q = (*priv->rxqs)[i];
+		struct mlx5_rxq_ctrl *rxq_ctrl =
+			container_of(q, struct mlx5_rxq_ctrl, rxq);
+
+		memset((void *)rxq_ctrl->tunnel_types, 0,
+		       sizeof(rxq_ctrl->tunnel_types));
+		q->tunnel = 0;
+	}
 }
 
 /**
@@ -2429,7 +2519,8 @@  mlx5_flow_start(struct rte_eth_dev *dev, struct mlx5_flows *list)
 					      flow->rss_conf.key_len,
 					      hash_rxq_init[i].hash_fields,
 					      flow->rss_conf.queue,
-					      flow->rss_conf.queue_num);
+					      flow->rss_conf.queue_num,
+					      flow->tunnel);
 			if (flow->frxq[i].hrxq)
 				goto flow_create;
 			flow->frxq[i].hrxq =
@@ -2437,7 +2528,8 @@  mlx5_flow_start(struct rte_eth_dev *dev, struct mlx5_flows *list)
 					      flow->rss_conf.key_len,
 					      hash_rxq_init[i].hash_fields,
 					      flow->rss_conf.queue,
-					      flow->rss_conf.queue_num);
+					      flow->rss_conf.queue_num,
+					      flow->tunnel);
 			if (!flow->frxq[i].hrxq) {
 				DRV_LOG(DEBUG,
 					"port %u flow %p cannot be applied",
@@ -2459,10 +2551,7 @@  mlx5_flow_start(struct rte_eth_dev *dev, struct mlx5_flows *list)
 			DRV_LOG(DEBUG, "port %u flow %p applied",
 				dev->data->port_id, (void *)flow);
 		}
-		if (!flow->mark)
-			continue;
-		for (i = 0; i != flow->rss_conf.queue_num; ++i)
-			(*priv->rxqs)[flow->rss_conf.queue[i]]->mark = 1;
+		mlx5_flow_create_update_rxqs(dev, flow);
 	}
 	return 0;
 }
diff --git a/drivers/net/mlx5/mlx5_rxq.c b/drivers/net/mlx5/mlx5_rxq.c
index 1e4354ab3..351acfc0f 100644
--- a/drivers/net/mlx5/mlx5_rxq.c
+++ b/drivers/net/mlx5/mlx5_rxq.c
@@ -1386,6 +1386,8 @@  mlx5_ind_table_ibv_verify(struct rte_eth_dev *dev)
  *   first queue index will be taken for the indirection table.
  * @param queues_n
  *   Number of queues.
+ * @param tunnel
+ *   Tunnel type.
  *
  * @return
  *   The Verbs object initialised, NULL otherwise and rte_errno is set.
@@ -1394,7 +1396,7 @@  struct mlx5_hrxq *
 mlx5_hrxq_new(struct rte_eth_dev *dev,
 	      const uint8_t *rss_key, uint32_t rss_key_len,
 	      uint64_t hash_fields,
-	      const uint16_t *queues, uint32_t queues_n)
+	      const uint16_t *queues, uint32_t queues_n, uint32_t tunnel)
 {
 	struct priv *priv = dev->data->dev_private;
 	struct mlx5_hrxq *hrxq;
@@ -1438,6 +1440,7 @@  mlx5_hrxq_new(struct rte_eth_dev *dev,
 	hrxq->qp = qp;
 	hrxq->rss_key_len = rss_key_len;
 	hrxq->hash_fields = hash_fields;
+	hrxq->tunnel = tunnel;
 	memcpy(hrxq->rss_key, rss_key, rss_key_len);
 	rte_atomic32_inc(&hrxq->refcnt);
 	LIST_INSERT_HEAD(&priv->hrxqs, hrxq, next);
@@ -1466,6 +1469,8 @@  mlx5_hrxq_new(struct rte_eth_dev *dev,
  *   first queue index will be taken for the indirection table.
  * @param queues_n
  *   Number of queues.
+ * @param tunnel
+ *   Tunnel type.
  *
  * @return
  *   An hash Rx queue on success.
@@ -1474,7 +1479,7 @@  struct mlx5_hrxq *
 mlx5_hrxq_get(struct rte_eth_dev *dev,
 	      const uint8_t *rss_key, uint32_t rss_key_len,
 	      uint64_t hash_fields,
-	      const uint16_t *queues, uint32_t queues_n)
+	      const uint16_t *queues, uint32_t queues_n, uint32_t tunnel)
 {
 	struct priv *priv = dev->data->dev_private;
 	struct mlx5_hrxq *hrxq;
@@ -1489,6 +1494,8 @@  mlx5_hrxq_get(struct rte_eth_dev *dev,
 			continue;
 		if (hrxq->hash_fields != hash_fields)
 			continue;
+		if (hrxq->tunnel != tunnel)
+			continue;
 		ind_tbl = mlx5_ind_table_ibv_get(dev, queues, queues_n);
 		if (!ind_tbl)
 			continue;
diff --git a/drivers/net/mlx5/mlx5_rxtx.c b/drivers/net/mlx5/mlx5_rxtx.c
index 1f422c70b..d061dfc8a 100644
--- a/drivers/net/mlx5/mlx5_rxtx.c
+++ b/drivers/net/mlx5/mlx5_rxtx.c
@@ -34,7 +34,7 @@ 
 #include "mlx5_prm.h"
 
 static __rte_always_inline uint32_t
-rxq_cq_to_pkt_type(volatile struct mlx5_cqe *cqe);
+rxq_cq_to_pkt_type(struct mlx5_rxq_data *rxq, volatile struct mlx5_cqe *cqe);
 
 static __rte_always_inline int
 mlx5_rx_poll_len(struct mlx5_rxq_data *rxq, volatile struct mlx5_cqe *cqe,
@@ -125,12 +125,14 @@  mlx5_set_ptype_table(void)
 	(*p)[0x8a] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN |
 		     RTE_PTYPE_L4_UDP;
 	/* Tunneled - L3 */
+	(*p)[0x40] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN;
 	(*p)[0x41] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN |
 		     RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN |
 		     RTE_PTYPE_INNER_L4_NONFRAG;
 	(*p)[0x42] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN |
 		     RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN |
 		     RTE_PTYPE_INNER_L4_NONFRAG;
+	(*p)[0xc0] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN;
 	(*p)[0xc1] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN |
 		     RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN |
 		     RTE_PTYPE_INNER_L4_NONFRAG;
@@ -1577,6 +1579,8 @@  mlx5_tx_burst_empw(void *dpdk_txq, struct rte_mbuf **pkts, uint16_t pkts_n)
 /**
  * Translate RX completion flags to packet type.
  *
+ * @param[in] rxq
+ *   Pointer to RX queue structure.
  * @param[in] cqe
  *   Pointer to CQE.
  *
@@ -1586,7 +1590,7 @@  mlx5_tx_burst_empw(void *dpdk_txq, struct rte_mbuf **pkts, uint16_t pkts_n)
  *   Packet type for struct rte_mbuf.
  */
 static inline uint32_t
-rxq_cq_to_pkt_type(volatile struct mlx5_cqe *cqe)
+rxq_cq_to_pkt_type(struct mlx5_rxq_data *rxq, volatile struct mlx5_cqe *cqe)
 {
 	uint8_t idx;
 	uint8_t pinfo = cqe->pkt_info;
@@ -1601,7 +1605,7 @@  rxq_cq_to_pkt_type(volatile struct mlx5_cqe *cqe)
 	 * bit[7] = outer_l3_type
 	 */
 	idx = ((pinfo & 0x3) << 6) | ((ptype & 0xfc00) >> 10);
-	return mlx5_ptype_table[idx];
+	return mlx5_ptype_table[idx] | rxq->tunnel * !!(idx & (1 << 6));
 }
 
 /**
@@ -1833,7 +1837,7 @@  mlx5_rx_burst(void *dpdk_rxq, struct rte_mbuf **pkts, uint16_t pkts_n)
 			pkt = seg;
 			assert(len >= (rxq->crc_present << 2));
 			/* Update packet information. */
-			pkt->packet_type = rxq_cq_to_pkt_type(cqe);
+			pkt->packet_type = rxq_cq_to_pkt_type(rxq, cqe);
 			pkt->ol_flags = 0;
 			if (rss_hash_res && rxq->rss_hash) {
 				pkt->hash.rss = rss_hash_res;
diff --git a/drivers/net/mlx5/mlx5_rxtx.h b/drivers/net/mlx5/mlx5_rxtx.h
index a702cb603..6866f6818 100644
--- a/drivers/net/mlx5/mlx5_rxtx.h
+++ b/drivers/net/mlx5/mlx5_rxtx.h
@@ -104,6 +104,7 @@  struct mlx5_rxq_data {
 	void *cq_uar; /* CQ user access region. */
 	uint32_t cqn; /* CQ number. */
 	uint8_t cq_arm_sn; /* CQ arm seq number. */
+	uint32_t tunnel; /* Tunnel information. */
 } __rte_cache_aligned;
 
 /* Verbs Rx queue elements. */
@@ -125,6 +126,7 @@  struct mlx5_rxq_ctrl {
 	struct mlx5_rxq_ibv *ibv; /* Verbs elements. */
 	struct mlx5_rxq_data rxq; /* Data path structure. */
 	unsigned int socket; /* CPU socket ID for allocations. */
+	uint32_t tunnel_types[16]; /* Tunnel type counter. */
 	unsigned int irq:1; /* Whether IRQ is enabled. */
 	uint16_t idx; /* Queue index. */
 };
@@ -145,6 +147,7 @@  struct mlx5_hrxq {
 	struct mlx5_ind_table_ibv *ind_table; /* Indirection table. */
 	struct ibv_qp *qp; /* Verbs queue pair. */
 	uint64_t hash_fields; /* Verbs Hash fields. */
+	uint32_t tunnel; /* Tunnel type. */
 	uint32_t rss_key_len; /* Hash key length in bytes. */
 	uint8_t rss_key[]; /* Hash key. */
 };
@@ -248,11 +251,13 @@  int mlx5_ind_table_ibv_verify(struct rte_eth_dev *dev);
 struct mlx5_hrxq *mlx5_hrxq_new(struct rte_eth_dev *dev,
 				const uint8_t *rss_key, uint32_t rss_key_len,
 				uint64_t hash_fields,
-				const uint16_t *queues, uint32_t queues_n);
+				const uint16_t *queues, uint32_t queues_n,
+				uint32_t tunnel);
 struct mlx5_hrxq *mlx5_hrxq_get(struct rte_eth_dev *dev,
 				const uint8_t *rss_key, uint32_t rss_key_len,
 				uint64_t hash_fields,
-				const uint16_t *queues, uint32_t queues_n);
+				const uint16_t *queues, uint32_t queues_n,
+				uint32_t tunnel);
 int mlx5_hrxq_release(struct rte_eth_dev *dev, struct mlx5_hrxq *hxrq);
 int mlx5_hrxq_ibv_verify(struct rte_eth_dev *dev);
 uint64_t mlx5_get_rx_port_offloads(void);
diff --git a/drivers/net/mlx5/mlx5_rxtx_vec_neon.h b/drivers/net/mlx5/mlx5_rxtx_vec_neon.h
index bbe1818ef..9f9136108 100644
--- a/drivers/net/mlx5/mlx5_rxtx_vec_neon.h
+++ b/drivers/net/mlx5/mlx5_rxtx_vec_neon.h
@@ -551,6 +551,7 @@  rxq_cq_to_ptype_oflags_v(struct mlx5_rxq_data *rxq,
 	const uint64x1_t mbuf_init = vld1_u64(&rxq->mbuf_initializer);
 	const uint64x1_t r32_mask = vcreate_u64(0xffffffff);
 	uint64x2_t rearm0, rearm1, rearm2, rearm3;
+	uint8_t pt_idx0, pt_idx1, pt_idx2, pt_idx3;
 
 	if (rxq->mark) {
 		const uint32x4_t ft_def = vdupq_n_u32(MLX5_FLOW_MARK_DEFAULT);
@@ -583,14 +584,18 @@  rxq_cq_to_ptype_oflags_v(struct mlx5_rxq_data *rxq,
 	ptype = vshrn_n_u32(ptype_info, 10);
 	/* Errored packets will have RTE_PTYPE_ALL_MASK. */
 	ptype = vorr_u16(ptype, op_err);
-	pkts[0]->packet_type =
-		mlx5_ptype_table[vget_lane_u8(vreinterpret_u8_u16(ptype), 6)];
-	pkts[1]->packet_type =
-		mlx5_ptype_table[vget_lane_u8(vreinterpret_u8_u16(ptype), 4)];
-	pkts[2]->packet_type =
-		mlx5_ptype_table[vget_lane_u8(vreinterpret_u8_u16(ptype), 2)];
-	pkts[3]->packet_type =
-		mlx5_ptype_table[vget_lane_u8(vreinterpret_u8_u16(ptype), 0)];
+	pt_idx0 = vget_lane_u8(vreinterpret_u8_u16(ptype), 6);
+	pt_idx1 = vget_lane_u8(vreinterpret_u8_u16(ptype), 4);
+	pt_idx2 = vget_lane_u8(vreinterpret_u8_u16(ptype), 2);
+	pt_idx3 = vget_lane_u8(vreinterpret_u8_u16(ptype), 0);
+	pkts[0]->packet_type = mlx5_ptype_table[pt_idx0] |
+			       !!(pt_idx0 & (1 << 6)) * rxq->tunnel;
+	pkts[1]->packet_type = mlx5_ptype_table[pt_idx1] |
+			       !!(pt_idx1 & (1 << 6)) * rxq->tunnel;
+	pkts[2]->packet_type = mlx5_ptype_table[pt_idx2] |
+			       !!(pt_idx2 & (1 << 6)) * rxq->tunnel;
+	pkts[3]->packet_type = mlx5_ptype_table[pt_idx3] |
+			       !!(pt_idx3 & (1 << 6)) * rxq->tunnel;
 	/* Fill flags for checksum and VLAN. */
 	pinfo = vandq_u32(ptype_info, ptype_ol_mask);
 	pinfo = vreinterpretq_u32_u8(
diff --git a/drivers/net/mlx5/mlx5_rxtx_vec_sse.h b/drivers/net/mlx5/mlx5_rxtx_vec_sse.h
index c088bcb51..d2492481d 100644
--- a/drivers/net/mlx5/mlx5_rxtx_vec_sse.h
+++ b/drivers/net/mlx5/mlx5_rxtx_vec_sse.h
@@ -542,6 +542,7 @@  rxq_cq_to_ptype_oflags_v(struct mlx5_rxq_data *rxq, __m128i cqes[4],
 	const __m128i mbuf_init =
 		_mm_loadl_epi64((__m128i *)&rxq->mbuf_initializer);
 	__m128i rearm0, rearm1, rearm2, rearm3;
+	uint8_t pt_idx0, pt_idx1, pt_idx2, pt_idx3;
 
 	/* Extract pkt_info field. */
 	pinfo0 = _mm_unpacklo_epi32(cqes[0], cqes[1]);
@@ -595,10 +596,18 @@  rxq_cq_to_ptype_oflags_v(struct mlx5_rxq_data *rxq, __m128i cqes[4],
 	/* Errored packets will have RTE_PTYPE_ALL_MASK. */
 	op_err = _mm_srli_epi16(op_err, 8);
 	ptype = _mm_or_si128(ptype, op_err);
-	pkts[0]->packet_type = mlx5_ptype_table[_mm_extract_epi8(ptype, 0)];
-	pkts[1]->packet_type = mlx5_ptype_table[_mm_extract_epi8(ptype, 2)];
-	pkts[2]->packet_type = mlx5_ptype_table[_mm_extract_epi8(ptype, 4)];
-	pkts[3]->packet_type = mlx5_ptype_table[_mm_extract_epi8(ptype, 6)];
+	pt_idx0 = _mm_extract_epi8(ptype, 0);
+	pt_idx1 = _mm_extract_epi8(ptype, 2);
+	pt_idx2 = _mm_extract_epi8(ptype, 4);
+	pt_idx3 = _mm_extract_epi8(ptype, 6);
+	pkts[0]->packet_type = mlx5_ptype_table[pt_idx0] |
+			       !!(pt_idx0 & (1 << 6)) * rxq->tunnel;
+	pkts[1]->packet_type = mlx5_ptype_table[pt_idx1] |
+			       !!(pt_idx1 & (1 << 6)) * rxq->tunnel;
+	pkts[2]->packet_type = mlx5_ptype_table[pt_idx2] |
+			       !!(pt_idx2 & (1 << 6)) * rxq->tunnel;
+	pkts[3]->packet_type = mlx5_ptype_table[pt_idx3] |
+			       !!(pt_idx3 & (1 << 6)) * rxq->tunnel;
 	/* Fill flags for checksum and VLAN. */
 	pinfo = _mm_and_si128(pinfo, ptype_ol_mask);
 	pinfo = _mm_shuffle_epi8(cv_flag_sel, pinfo);