[dpdk-dev] [PATCH v2] net/mlx5: support MPLS-in-GRE and MPLS-in-UDP

Nélio Laranjeiro nelio.laranjeiro at 6wind.com
Tue May 15 14:04:16 CEST 2018


On Tue, May 15, 2018 at 11:07:14AM +0000, Matan Azrad wrote:
> Add support for MPLS over GRE and MPLS over UDP tunnel types as
> described in the next RFCs:
> 1. https://tools.ietf.org/html/rfc4023
> 2. https://tools.ietf.org/html/rfc7510
> 3. https://tools.ietf.org/html/rfc4385
> 
> Signed-off-by: Matan Azrad <matan at mellanox.com>

Acked-by: Nelio Laranjeiro <nelio.laranjeiro at 6wind.com>

> ---
>  doc/guides/nics/mlx5.rst     |   4 +-
>  drivers/net/mlx5/Makefile    |   5 ++
>  drivers/net/mlx5/mlx5.c      |  13 ++++
>  drivers/net/mlx5/mlx5.h      |   1 +
>  drivers/net/mlx5/mlx5_flow.c | 161 +++++++++++++++++++++++++++++++++++++++++--
>  5 files changed, 176 insertions(+), 8 deletions(-)
> 
> 
> V2:
> Ignore void items between GRE and MPLS tunnels (Nelio suggestion).
> 
> 
> diff --git a/doc/guides/nics/mlx5.rst b/doc/guides/nics/mlx5.rst
> index a7d5c90..2b110f4 100644
> --- a/doc/guides/nics/mlx5.rst
> +++ b/doc/guides/nics/mlx5.rst
> @@ -74,7 +74,7 @@ Features
>  - RX interrupts.
>  - Statistics query including Basic, Extended and per queue.
>  - Rx HW timestamp.
> -- Tunnel types: VXLAN, L3 VXLAN, VXLAN-GPE, GRE.
> +- Tunnel types: VXLAN, L3 VXLAN, VXLAN-GPE, GRE, MPLSoGRE, MPLSoUDP.
>  - Tunnel HW offloads: packet type, inner/outer RSS, IP and UDP checksum verification.
>  
>  Limitations
> @@ -113,6 +113,8 @@ Limitations
>  
>  - VXLAN TSO and checksum offloads are not supported on VM.
>  
> +- L3 VXLAN and VXLAN-GPE tunnels cannot be supported together with MPLSoGRE and MPLSoUDP.
> +
>  - VF: flow rules created on VF devices can only match traffic targeted at the
>    configured MAC addresses (see ``rte_eth_dev_mac_addr_add()``).
>  
> diff --git a/drivers/net/mlx5/Makefile b/drivers/net/mlx5/Makefile
> index 8d64d4c..293144e 100644
> --- a/drivers/net/mlx5/Makefile
> +++ b/drivers/net/mlx5/Makefile
> @@ -108,6 +108,11 @@ mlx5_autoconf.h.new: $(RTE_SDK)/buildtools/auto-config-h.sh
>  		enum MLX5DV_CONTEXT_MASK_TUNNEL_OFFLOADS \
>  		$(AUTOCONF_OUTPUT)
>  	$Q sh -- '$<' '$@' \
> +		HAVE_IBV_DEVICE_MPLS_SUPPORT \
> +		infiniband/verbs.h \
> +		enum IBV_FLOW_SPEC_MPLS \
> +		$(AUTOCONF_OUTPUT)
> +	$Q sh -- '$<' '$@' \
>  		HAVE_IBV_WQ_FLAG_RX_END_PADDING \
>  		infiniband/verbs.h \
>  		enum IBV_WQ_FLAG_RX_END_PADDING \
> diff --git a/drivers/net/mlx5/mlx5.c b/drivers/net/mlx5/mlx5.c
> index 8aa91cc..225ebd4 100644
> --- a/drivers/net/mlx5/mlx5.c
> +++ b/drivers/net/mlx5/mlx5.c
> @@ -690,6 +690,7 @@
>  	unsigned int mps;
>  	unsigned int cqe_comp;
>  	unsigned int tunnel_en = 0;
> +	unsigned int mpls_en = 0;
>  	unsigned int swp = 0;
>  	unsigned int verb_priorities = 0;
>  	unsigned int mprq = 0;
> @@ -850,6 +851,17 @@
>  	DRV_LOG(WARNING,
>  		"tunnel offloading disabled due to old OFED/rdma-core version");
>  #endif
> +#ifdef HAVE_IBV_DEVICE_MPLS_SUPPORT
> +	mpls_en = ((attrs_out.tunnel_offloads_caps &
> +		    MLX5DV_RAW_PACKET_CAP_TUNNELED_OFFLOAD_CW_MPLS_OVER_GRE) &&
> +		   (attrs_out.tunnel_offloads_caps &
> +		    MLX5DV_RAW_PACKET_CAP_TUNNELED_OFFLOAD_CW_MPLS_OVER_UDP));
> +	DRV_LOG(DEBUG, "MPLS over GRE/UDP tunnel offloading is %ssupported",
> +		mpls_en ? "" : "not ");
> +#else
> +	DRV_LOG(WARNING, "MPLS over GRE/UDP tunnel offloading disabled due to"
> +		" old OFED/rdma-core version or firmware configuration");
> +#endif
>  	err = mlx5_glue->query_device_ex(attr_ctx, NULL, &device_attr);
>  	if (err) {
>  		DEBUG("ibv_query_device_ex() failed");
> @@ -873,6 +885,7 @@
>  			.cqe_comp = cqe_comp,
>  			.mps = mps,
>  			.tunnel_en = tunnel_en,
> +			.mpls_en = mpls_en,
>  			.tx_vec_en = 1,
>  			.rx_vec_en = 1,
>  			.mpw_hdr_dseg = 0,
> diff --git a/drivers/net/mlx5/mlx5.h b/drivers/net/mlx5/mlx5.h
> index c4c962b..7750832 100644
> --- a/drivers/net/mlx5/mlx5.h
> +++ b/drivers/net/mlx5/mlx5.h
> @@ -93,6 +93,7 @@ struct mlx5_dev_config {
>  	unsigned int mps:2; /* Multi-packet send supported mode. */
>  	unsigned int tunnel_en:1;
>  	/* Whether tunnel stateless offloads are supported. */
> +	unsigned int mpls_en:1; /* MPLS over GRE/UDP is enabled. */
>  	unsigned int flow_counter_en:1; /* Whether flow counter is supported. */
>  	unsigned int cqe_comp:1; /* CQE compression is enabled. */
>  	unsigned int tso:1; /* Whether TSO is supported. */
> diff --git a/drivers/net/mlx5/mlx5_flow.c b/drivers/net/mlx5/mlx5_flow.c
> index 7af1dfa..3af9524 100644
> --- a/drivers/net/mlx5/mlx5_flow.c
> +++ b/drivers/net/mlx5/mlx5_flow.c
> @@ -101,6 +101,11 @@ struct mlx5_flow_data {
>  		     const void *default_mask,
>  		     struct mlx5_flow_data *data);
>  
> +static int
> +mlx5_flow_create_mpls(const struct rte_flow_item *item,
> +		      const void *default_mask,
> +		      struct mlx5_flow_data *data);
> +
>  struct mlx5_flow_parse;
>  
>  static void
> @@ -248,12 +253,14 @@ struct rte_flow {
>  #define IS_TUNNEL(type) ( \
>  	(type) == RTE_FLOW_ITEM_TYPE_VXLAN || \
>  	(type) == RTE_FLOW_ITEM_TYPE_VXLAN_GPE || \
> -	(type) == RTE_FLOW_ITEM_TYPE_GRE)
> +	(type) == RTE_FLOW_ITEM_TYPE_GRE || \
> +	(type) == RTE_FLOW_ITEM_TYPE_MPLS)
>  
>  const uint32_t flow_ptype[] = {
>  	[RTE_FLOW_ITEM_TYPE_VXLAN] = RTE_PTYPE_TUNNEL_VXLAN,
>  	[RTE_FLOW_ITEM_TYPE_VXLAN_GPE] = RTE_PTYPE_TUNNEL_VXLAN_GPE,
>  	[RTE_FLOW_ITEM_TYPE_GRE] = RTE_PTYPE_TUNNEL_GRE,
> +	[RTE_FLOW_ITEM_TYPE_MPLS] = RTE_PTYPE_TUNNEL_MPLS_IN_GRE,
>  };
>  
>  #define PTYPE_IDX(t) ((RTE_PTYPE_TUNNEL_MASK & (t)) >> 12)
> @@ -264,6 +271,10 @@ struct rte_flow {
>  	[PTYPE_IDX(RTE_PTYPE_TUNNEL_VXLAN_GPE)]	= RTE_PTYPE_TUNNEL_VXLAN_GPE |
>  						  RTE_PTYPE_L4_UDP,
>  	[PTYPE_IDX(RTE_PTYPE_TUNNEL_GRE)] = RTE_PTYPE_TUNNEL_GRE,
> +	[PTYPE_IDX(RTE_PTYPE_TUNNEL_MPLS_IN_GRE)] =
> +		RTE_PTYPE_TUNNEL_MPLS_IN_GRE,
> +	[PTYPE_IDX(RTE_PTYPE_TUNNEL_MPLS_IN_UDP)] =
> +		RTE_PTYPE_TUNNEL_MPLS_IN_GRE | RTE_PTYPE_L4_UDP,
>  };
>  
>  /** Structure to generate a simple graph of layers supported by the NIC. */
> @@ -400,7 +411,8 @@ struct mlx5_flow_items {
>  	},
>  	[RTE_FLOW_ITEM_TYPE_UDP] = {
>  		.items = ITEMS(RTE_FLOW_ITEM_TYPE_VXLAN,
> -			       RTE_FLOW_ITEM_TYPE_VXLAN_GPE),
> +			       RTE_FLOW_ITEM_TYPE_VXLAN_GPE,
> +			       RTE_FLOW_ITEM_TYPE_MPLS),
>  		.actions = valid_actions,
>  		.mask = &(const struct rte_flow_item_udp){
>  			.hdr = {
> @@ -429,7 +441,8 @@ struct mlx5_flow_items {
>  	[RTE_FLOW_ITEM_TYPE_GRE] = {
>  		.items = ITEMS(RTE_FLOW_ITEM_TYPE_ETH,
>  			       RTE_FLOW_ITEM_TYPE_IPV4,
> -			       RTE_FLOW_ITEM_TYPE_IPV6),
> +			       RTE_FLOW_ITEM_TYPE_IPV6,
> +			       RTE_FLOW_ITEM_TYPE_MPLS),
>  		.actions = valid_actions,
>  		.mask = &(const struct rte_flow_item_gre){
>  			.protocol = -1,
> @@ -437,7 +450,26 @@ struct mlx5_flow_items {
>  		.default_mask = &rte_flow_item_gre_mask,
>  		.mask_sz = sizeof(struct rte_flow_item_gre),
>  		.convert = mlx5_flow_create_gre,
> +#ifdef HAVE_IBV_DEVICE_MPLS_SUPPORT
> +		.dst_sz = sizeof(struct ibv_flow_spec_gre),
> +#else
>  		.dst_sz = sizeof(struct ibv_flow_spec_tunnel),
> +#endif
> +	},
> +	[RTE_FLOW_ITEM_TYPE_MPLS] = {
> +		.items = ITEMS(RTE_FLOW_ITEM_TYPE_ETH,
> +			       RTE_FLOW_ITEM_TYPE_IPV4,
> +			       RTE_FLOW_ITEM_TYPE_IPV6),
> +		.actions = valid_actions,
> +		.mask = &(const struct rte_flow_item_mpls){
> +			.label_tc_s = "\xff\xff\xf0",
> +		},
> +		.default_mask = &rte_flow_item_mpls_mask,
> +		.mask_sz = sizeof(struct rte_flow_item_mpls),
> +		.convert = mlx5_flow_create_mpls,
> +#ifdef HAVE_IBV_DEVICE_MPLS_SUPPORT
> +		.dst_sz = sizeof(struct ibv_flow_spec_mpls),
> +#endif
>  	},
>  	[RTE_FLOW_ITEM_TYPE_VXLAN] = {
>  		.items = ITEMS(RTE_FLOW_ITEM_TYPE_ETH,
> @@ -865,6 +897,7 @@ struct ibv_spec_header {
>  	struct priv *priv = dev->data->dev_private;
>  	const struct mlx5_flow_items *cur_item = mlx5_flow_items;
>  	unsigned int i;
> +	unsigned int last_voids = 0;
>  	int ret = 0;
>  
>  	/* Initialise the offsets to start after verbs attribute. */
> @@ -874,8 +907,10 @@ struct ibv_spec_header {
>  		const struct mlx5_flow_items *token = NULL;
>  		unsigned int n;
>  
> -		if (items->type == RTE_FLOW_ITEM_TYPE_VOID)
> +		if (items->type == RTE_FLOW_ITEM_TYPE_VOID) {
> +			last_voids++;
>  			continue;
> +		}
>  		for (i = 0;
>  		     cur_item->items &&
>  		     cur_item->items[i] != RTE_FLOW_ITEM_TYPE_END;
> @@ -896,7 +931,10 @@ struct ibv_spec_header {
>  		if (ret)
>  			goto exit_item_not_supported;
>  		if (IS_TUNNEL(items->type)) {
> -			if (parser->tunnel) {
> +			if (parser->tunnel &&
> +			    !((items - last_voids - 1)->type ==
> +			      RTE_FLOW_ITEM_TYPE_GRE && items->type ==
> +			      RTE_FLOW_ITEM_TYPE_MPLS)) {
>  				rte_flow_error_set(error, ENOTSUP,
>  						   RTE_FLOW_ERROR_TYPE_ITEM,
>  						   items,
> @@ -904,6 +942,16 @@ struct ibv_spec_header {
>  						   " tunnel encapsulations.");
>  				return -rte_errno;
>  			}
> +			if (items->type == RTE_FLOW_ITEM_TYPE_MPLS &&
> +			    !priv->config.mpls_en) {
> +				rte_flow_error_set(error, ENOTSUP,
> +						   RTE_FLOW_ERROR_TYPE_ITEM,
> +						   items,
> +						   "MPLS not supported or"
> +						   " disabled in firmware"
> +						   " configuration.");
> +				return -rte_errno;
> +			}
>  			if (!priv->config.tunnel_en &&
>  			    parser->rss_conf.level > 1) {
>  				rte_flow_error_set(error, ENOTSUP,
> @@ -921,6 +969,7 @@ struct ibv_spec_header {
>  			for (n = 0; n != hash_rxq_init_n; ++n)
>  				parser->queue[n].offset += cur_item->dst_sz;
>  		}
> +		last_voids = 0;
>  	}
>  	if (parser->drop) {
>  		parser->queue[HASH_RXQ_ETH].offset +=
> @@ -1878,16 +1927,27 @@ struct ibv_spec_header {
>   *   0 on success, a negative errno value otherwise and rte_errno is set.
>   */
>  static int
> -mlx5_flow_create_gre(const struct rte_flow_item *item __rte_unused,
> -		     const void *default_mask __rte_unused,
> +mlx5_flow_create_gre(const struct rte_flow_item *item,
> +		     const void *default_mask,
>  		     struct mlx5_flow_data *data)
>  {
>  	struct mlx5_flow_parse *parser = data->parser;
> +#ifndef HAVE_IBV_DEVICE_MPLS_SUPPORT
> +	(void)default_mask;
>  	unsigned int size = sizeof(struct ibv_flow_spec_tunnel);
>  	struct ibv_flow_spec_tunnel tunnel = {
>  		.type = parser->inner | IBV_FLOW_SPEC_VXLAN_TUNNEL,
>  		.size = size,
>  	};
> +#else
> +	const struct rte_flow_item_gre *spec = item->spec;
> +	const struct rte_flow_item_gre *mask = item->mask;
> +	unsigned int size = sizeof(struct ibv_flow_spec_gre);
> +	struct ibv_flow_spec_gre tunnel = {
> +		.type = parser->inner | IBV_FLOW_SPEC_GRE,
> +		.size = size,
> +	};
> +#endif
>  	struct ibv_flow_spec_ipv4_ext *ipv4;
>  	struct ibv_flow_spec_ipv6 *ipv6;
>  	unsigned int i;
> @@ -1899,6 +1959,20 @@ struct ibv_spec_header {
>  	/* Default GRE to inner RSS. */
>  	if (!parser->rss_conf.level)
>  		parser->rss_conf.level = 2;
> +#ifdef HAVE_IBV_DEVICE_MPLS_SUPPORT
> +	if (spec) {
> +		if (!mask)
> +			mask = default_mask;
> +		tunnel.val.c_ks_res0_ver = spec->c_rsvd0_ver;
> +		tunnel.val.protocol = spec->protocol;
> +		tunnel.mask.c_ks_res0_ver = mask->c_rsvd0_ver;
> +		tunnel.mask.protocol = mask->protocol;
> +		/* Remove unwanted bits from values. */
> +		tunnel.val.c_ks_res0_ver &= tunnel.mask.c_ks_res0_ver;
> +		tunnel.val.protocol &= tunnel.mask.protocol;
> +		tunnel.val.key &= tunnel.mask.key;
> +	}
> +#endif
>  	/* Update encapsulation IP layer protocol. */
>  	for (i = 0; i != hash_rxq_init_n; ++i) {
>  		if (!parser->queue[i].ibv_attr)
> @@ -1932,6 +2006,79 @@ struct ibv_spec_header {
>  }
>  
>  /**
> + * Convert MPLS item to Verbs specification.
> + * MPLS tunnel types currently supported are MPLS-in-GRE and MPLS-in-UDP.
> + *
> + * @param item[in]
> + *   Item specification.
> + * @param default_mask[in]
> + *   Default bit-masks to use when item->mask is not provided.
> + * @param data[in, out]
> + *   User structure.
> + *
> + * @return
> + *   0 on success, a negative errno value otherwise and rte_errno is set.
> + */
> +static int
> +mlx5_flow_create_mpls(const struct rte_flow_item *item,
> +		      const void *default_mask,
> +		      struct mlx5_flow_data *data)
> +{
> +#ifndef HAVE_IBV_DEVICE_MPLS_SUPPORT
> +	(void)default_mask;
> +	return rte_flow_error_set(data->error, ENOTSUP,
> +				  RTE_FLOW_ERROR_TYPE_ITEM,
> +				  item,
> +				  "MPLS is not supported by driver");
> +#else
> +	const struct rte_flow_item_mpls *spec = item->spec;
> +	const struct rte_flow_item_mpls *mask = item->mask;
> +	struct mlx5_flow_parse *parser = data->parser;
> +	unsigned int size = sizeof(struct ibv_flow_spec_mpls);
> +	struct ibv_flow_spec_mpls mpls = {
> +		.type = IBV_FLOW_SPEC_MPLS,
> +		.size = size,
> +	};
> +
> +	parser->inner = IBV_FLOW_SPEC_INNER;
> +	if (parser->layer == HASH_RXQ_UDPV4 ||
> +	    parser->layer == HASH_RXQ_UDPV6) {
> +		parser->tunnel =
> +			ptype_ext[PTYPE_IDX(RTE_PTYPE_TUNNEL_MPLS_IN_UDP)];
> +		parser->out_layer = parser->layer;
> +	} else {
> +		parser->tunnel =
> +			ptype_ext[PTYPE_IDX(RTE_PTYPE_TUNNEL_MPLS_IN_GRE)];
> +		/* parser->out_layer stays as in GRE out_layer. */
> +	}
> +	parser->layer = HASH_RXQ_TUNNEL;
> +	/*
> +	 * For MPLS-in-GRE, RSS level should have been set.
> +	 * For MPLS-in-UDP, use outer RSS.
> +	 */
> +	if (!parser->rss_conf.level)
> +		parser->rss_conf.level = 1;
> +	if (spec) {
> +		if (!mask)
> +			mask = default_mask;
> +		/*
> +		 * The verbs label field includes the entire MPLS header:
> +		 * bits 0:19 - label value field.
> +		 * bits 20:22 - traffic class field.
> +		 * bits 23 - bottom of stack bit.
> +		 * bits 24:31 - ttl field.
> +		 */
> +		mpls.val.label = *(const uint32_t *)spec;
> +		mpls.mask.label = *(const uint32_t *)mask;
> +		/* Remove unwanted bits from values. */
> +		mpls.val.label &= mpls.mask.label;
> +	}
> +	mlx5_flow_create_copy(parser, &mpls, size);
> +	return 0;
> +#endif
> +}
> +
> +/**
>   * Convert mark/flag action to Verbs specification.
>   *
>   * @param parser
> -- 
> 1.9.5
> 

-- 
Nélio Laranjeiro
6WIND


More information about the dev mailing list