[2/4] net/mlx5: add tos and ttl fields support on E-Switch

Message ID 1546109501-24865-3-git-send-email-viacheslavo@mellanox.com (mailing list archive)
State Superseded, archived
Headers
Series net/mlx5: add tos and ttl flower match and tunnel keys |

Checks

Context Check Description
ci/checkpatch warning coding style issues
ci/Intel-compilation success Compilation OK

Commit Message

Slava Ovsiienko Dec. 29, 2018, 6:51 p.m. UTC
  This patch adds the type-of-service and time-to-live IP header
fields support on E-Switch. There match pattern for both fields
with masking is added. Also these fields can be set for VXLAN
tunnel encapsulation header.

This issue is critical for some Open VSwitch configuration
on overlayed (tunneled) networks, where the tos field can be
inherited from outer header to inner header.

Signed-off-by: Viacheslav Ovsiienko <viacheslavo@mellanox.com>
---
 drivers/net/mlx5/mlx5_flow_tcf.c | 166 +++++++++++++++++++++++++++++++++++++--
 1 file changed, 160 insertions(+), 6 deletions(-)
  

Patch

diff --git a/drivers/net/mlx5/mlx5_flow_tcf.c b/drivers/net/mlx5/mlx5_flow_tcf.c
index 87585ed..ca8ea0b 100644
--- a/drivers/net/mlx5/mlx5_flow_tcf.c
+++ b/drivers/net/mlx5/mlx5_flow_tcf.c
@@ -372,6 +372,8 @@  enum flow_tcf_tunact_type {
 #define FLOW_TCF_ENCAP_UDP_SRC (1u << 6)
 #define FLOW_TCF_ENCAP_UDP_DST (1u << 7)
 #define FLOW_TCF_ENCAP_VXLAN_VNI (1u << 8)
+#define FLOW_TCF_ENCAP_IP_TTL (1u << 9)
+#define FLOW_TCF_ENCAP_IP_TOS (1u << 10)
 
 /**
  * Structure for holding netlink context.
@@ -457,6 +459,8 @@  struct flow_tcf_vxlan_decap {
 struct flow_tcf_vxlan_encap {
 	struct flow_tcf_tunnel_hdr hdr;
 	uint32_t mask;
+	uint8_t ip_tos;
+	uint8_t ip_ttl_hop;
 	struct {
 		struct ether_addr dst;
 		struct ether_addr src;
@@ -1303,6 +1307,20 @@  struct pedit_parser {
 					  " must be specified for"
 					  " vxlan encapsulation");
 	}
+	if (mask->hdr.type_of_service &&
+	    mask->hdr.type_of_service != 0xff)
+		return rte_flow_error_set(error, ENOTSUP,
+					  RTE_FLOW_ERROR_TYPE_ITEM_MASK, mask,
+					  "no support for partial mask on"
+					  " \"ipv4.hdr.type_of_service\" field"
+					  " for vxlan encapsulation");
+	if (mask->hdr.time_to_live &&
+	    mask->hdr.time_to_live != 0xff)
+		return rte_flow_error_set(error, ENOTSUP,
+					  RTE_FLOW_ERROR_TYPE_ITEM_MASK, mask,
+					  "no support for partial mask on"
+					  " \"ipv4.hdr.time_to_live\" field"
+					  " for vxlan encapsulation");
 	return 0;
 }
 
@@ -1324,6 +1342,7 @@  struct pedit_parser {
 {
 	const struct rte_flow_item_ipv6 *spec = item->spec;
 	const struct rte_flow_item_ipv6 *mask = item->mask;
+	uint8_t msk6;
 
 	if (!spec) {
 		/*
@@ -1389,6 +1408,20 @@  struct pedit_parser {
 					  " must be specified for"
 					  " vxlan encapsulation");
 	}
+	msk6 = (rte_be_to_cpu_32(mask->hdr.vtc_flow) >>
+		IPV6_HDR_TC_SHIFT) & 0xff;
+	if (msk6 && msk6 != 0xff)
+		return rte_flow_error_set(error, ENOTSUP,
+					  RTE_FLOW_ERROR_TYPE_ITEM_MASK, mask,
+					  "no support for partial mask on"
+					  " \"ipv6.hdr.vtc_flow.tos\" field"
+					  " for vxlan encapsulation");
+	if (mask->hdr.hop_limits && mask->hdr.hop_limits != 0xff)
+		return rte_flow_error_set(error, ENOTSUP,
+					  RTE_FLOW_ERROR_TYPE_ITEM_MASK, mask,
+					  "no support for partial mask on"
+					  " \"ipv6.hdr.hop_limits\" field"
+					  " for vxlan encapsulation");
 	return 0;
 }
 
@@ -2476,16 +2509,31 @@  struct pedit_parser {
 				SZ_NLATTR_TYPE_OF(uint8_t) + /* VLAN prio. */
 				SZ_NLATTR_TYPE_OF(uint16_t); /* VLAN ID. */
 			break;
-		case RTE_FLOW_ITEM_TYPE_IPV4:
+		case RTE_FLOW_ITEM_TYPE_IPV4: {
+			const struct rte_flow_item_ipv4 *ipv4 = items->mask;
+
 			size +=	SZ_NLATTR_TYPE_OF(uint8_t) + /* IP proto. */
 				SZ_NLATTR_TYPE_OF(uint32_t) * 4;
 				/* dst/src IP addr and mask. */
+			if (ipv4 && ipv4->hdr.time_to_live)
+				size += SZ_NLATTR_TYPE_OF(uint8_t) * 2;
+			if (ipv4 && ipv4->hdr.type_of_service)
+				size += SZ_NLATTR_TYPE_OF(uint8_t) * 2;
 			break;
-		case RTE_FLOW_ITEM_TYPE_IPV6:
+		}
+		case RTE_FLOW_ITEM_TYPE_IPV6: {
+			const struct rte_flow_item_ipv6 *ipv6 = items->mask;
+
 			size +=	SZ_NLATTR_TYPE_OF(uint8_t) + /* IP proto. */
 				SZ_NLATTR_DATA_OF(IPV6_ADDR_LEN) * 4;
 				/* dst/src IP addr and mask. */
+			if (ipv6 && ipv6->hdr.hop_limits)
+				size += SZ_NLATTR_TYPE_OF(uint8_t) * 2;
+			if (ipv6 && (rte_be_to_cpu_32(ipv6->hdr.vtc_flow) &
+				     (0xfful << IPV6_HDR_TC_SHIFT)))
+				size += SZ_NLATTR_TYPE_OF(uint8_t) * 2;
 			break;
+		}
 		case RTE_FLOW_ITEM_TYPE_UDP:
 			size += SZ_NLATTR_TYPE_OF(uint8_t) + /* IP proto. */
 				SZ_NLATTR_TYPE_OF(uint16_t) * 4;
@@ -2553,12 +2601,27 @@  struct pedit_parser {
 		case RTE_FLOW_ITEM_TYPE_ETH:
 			/* This item does not require message buffer. */
 			break;
-		case RTE_FLOW_ITEM_TYPE_IPV4:
+		case RTE_FLOW_ITEM_TYPE_IPV4: {
+			const struct rte_flow_item_ipv4 *ipv4 = items->mask;
+
 			size += SZ_NLATTR_DATA_OF(IPV4_ADDR_LEN) * 2;
+			if (ipv4 && ipv4->hdr.time_to_live)
+				size += SZ_NLATTR_TYPE_OF(uint8_t) * 2;
+			if (ipv4 && ipv4->hdr.type_of_service)
+				size += SZ_NLATTR_TYPE_OF(uint8_t) * 2;
 			break;
-		case RTE_FLOW_ITEM_TYPE_IPV6:
+		}
+		case RTE_FLOW_ITEM_TYPE_IPV6: {
+			const struct rte_flow_item_ipv6 *ipv6 = items->mask;
+
 			size += SZ_NLATTR_DATA_OF(IPV6_ADDR_LEN) * 2;
+			if (ipv6 && ipv6->hdr.hop_limits)
+				size += SZ_NLATTR_TYPE_OF(uint8_t) * 2;
+			if (ipv6 && (rte_be_to_cpu_32(ipv6->hdr.vtc_flow) &
+				     (0xfful << IPV6_HDR_TC_SHIFT)))
+				size += SZ_NLATTR_TYPE_OF(uint8_t) * 2;
 			break;
+		}
 		case RTE_FLOW_ITEM_TYPE_UDP: {
 			const struct rte_flow_item_udp *udp = items->mask;
 
@@ -2936,11 +2999,14 @@  struct pedit_parser {
  *
  * @param[in] spec
  *   RTE_FLOW_ITEM_TYPE_IPV4 entry specification.
+ * @param[in] mask
+ *  RTE_FLOW_ITEM_TYPE_IPV4 entry mask.
  * @param[out] encap
  *   Structure to fill the gathered IPV4 address data.
  */
 static void
 flow_tcf_parse_vxlan_encap_ipv4(const struct rte_flow_item_ipv4 *spec,
+				const struct rte_flow_item_ipv4 *mask,
 				struct flow_tcf_vxlan_encap *encap)
 {
 	/* Item must be validated before. No redundant checks. */
@@ -2949,6 +3015,14 @@  struct pedit_parser {
 	encap->ipv4.src = spec->hdr.src_addr;
 	encap->mask |= FLOW_TCF_ENCAP_IPV4_SRC |
 		       FLOW_TCF_ENCAP_IPV4_DST;
+	if (mask && mask->hdr.type_of_service) {
+		encap->mask |= FLOW_TCF_ENCAP_IP_TOS;
+		encap->ip_tos = spec->hdr.type_of_service;
+	}
+	if (mask && mask->hdr.time_to_live) {
+		encap->mask |= FLOW_TCF_ENCAP_IP_TTL;
+		encap->ip_ttl_hop = spec->hdr.time_to_live;
+	}
 }
 
 /**
@@ -2959,11 +3033,14 @@  struct pedit_parser {
  *
  * @param[in] spec
  *   RTE_FLOW_ITEM_TYPE_IPV6 entry specification.
+ * @param[in] mask
+ *  RTE_FLOW_ITEM_TYPE_IPV6 entry mask.
  * @param[out] encap
  *   Structure to fill the gathered IPV6 address data.
  */
 static void
 flow_tcf_parse_vxlan_encap_ipv6(const struct rte_flow_item_ipv6 *spec,
+				const struct rte_flow_item_ipv6 *mask,
 				struct flow_tcf_vxlan_encap *encap)
 {
 	/* Item must be validated before. No redundant checks. */
@@ -2972,6 +3049,19 @@  struct pedit_parser {
 	memcpy(encap->ipv6.src, spec->hdr.src_addr, IPV6_ADDR_LEN);
 	encap->mask |= FLOW_TCF_ENCAP_IPV6_SRC |
 		       FLOW_TCF_ENCAP_IPV6_DST;
+	if (mask) {
+		if ((rte_be_to_cpu_32(mask->hdr.vtc_flow) >>
+		    IPV6_HDR_TC_SHIFT) & 0xff) {
+			encap->mask |= FLOW_TCF_ENCAP_IP_TOS;
+			encap->ip_tos = (rte_be_to_cpu_32
+						(spec->hdr.vtc_flow) >>
+						 IPV6_HDR_TC_SHIFT) & 0xff;
+		}
+		if (mask->hdr.hop_limits) {
+			encap->mask |= FLOW_TCF_ENCAP_IP_TTL;
+			encap->ip_ttl_hop = spec->hdr.hop_limits;
+		}
+	}
 }
 
 /**
@@ -3066,11 +3156,15 @@  struct pedit_parser {
 			break;
 		case RTE_FLOW_ITEM_TYPE_IPV4:
 			spec.ipv4 = items->spec;
-			flow_tcf_parse_vxlan_encap_ipv4(spec.ipv4, encap);
+			mask.ipv4 = items->mask;
+			flow_tcf_parse_vxlan_encap_ipv4(spec.ipv4, mask.ipv4,
+							encap);
 			break;
 		case RTE_FLOW_ITEM_TYPE_IPV6:
 			spec.ipv6 = items->spec;
-			flow_tcf_parse_vxlan_encap_ipv6(spec.ipv6, encap);
+			mask.ipv6 = items->mask;
+			flow_tcf_parse_vxlan_encap_ipv6(spec.ipv6, mask.ipv6,
+							encap);
 			break;
 		case RTE_FLOW_ITEM_TYPE_UDP:
 			mask.udp = items->mask;
@@ -3383,10 +3477,35 @@  struct pedit_parser {
 					 TCA_FLOWER_KEY_IPV4_DST_MASK,
 					 mask.ipv4->hdr.dst_addr);
 			}
+			if (mask.ipv4->hdr.time_to_live) {
+				mnl_attr_put_u8
+					(nlh, tunnel_outer ?
+					 TCA_FLOWER_KEY_ENC_IP_TTL :
+					 TCA_FLOWER_KEY_IP_TTL,
+					 spec.ipv4->hdr.time_to_live);
+				mnl_attr_put_u8
+					(nlh, tunnel_outer ?
+					 TCA_FLOWER_KEY_ENC_IP_TTL_MASK :
+					 TCA_FLOWER_KEY_IP_TTL_MASK,
+					 mask.ipv4->hdr.time_to_live);
+			}
+			if (mask.ipv4->hdr.type_of_service) {
+				mnl_attr_put_u8
+					(nlh, tunnel_outer ?
+					 TCA_FLOWER_KEY_ENC_IP_TOS :
+					 TCA_FLOWER_KEY_IP_TOS,
+					 spec.ipv4->hdr.type_of_service);
+				mnl_attr_put_u8
+					(nlh, tunnel_outer ?
+					 TCA_FLOWER_KEY_ENC_IP_TOS_MASK :
+					 TCA_FLOWER_KEY_IP_TOS_MASK,
+					 mask.ipv4->hdr.type_of_service);
+			}
 			assert(dev_flow->tcf.nlsize >= nlh->nlmsg_len);
 			break;
 		case RTE_FLOW_ITEM_TYPE_IPV6: {
 			bool ipv6_src, ipv6_dst;
+			uint8_t msk6, tos6;
 
 			item_flags |= (item_flags & MLX5_FLOW_LAYER_TUNNEL) ?
 				      MLX5_FLOW_LAYER_INNER_L3_IPV6 :
@@ -3472,6 +3591,33 @@  struct pedit_parser {
 					     IPV6_ADDR_LEN,
 					     mask.ipv6->hdr.dst_addr);
 			}
+			if (mask.ipv6->hdr.hop_limits) {
+				mnl_attr_put_u8
+					(nlh, tunnel_outer ?
+					 TCA_FLOWER_KEY_ENC_IP_TTL :
+					 TCA_FLOWER_KEY_IP_TTL,
+					 spec.ipv6->hdr.hop_limits);
+				mnl_attr_put_u8
+					(nlh, tunnel_outer ?
+					 TCA_FLOWER_KEY_ENC_IP_TTL_MASK :
+					 TCA_FLOWER_KEY_IP_TTL_MASK,
+					 mask.ipv6->hdr.hop_limits);
+			}
+			msk6 = (rte_be_to_cpu_32(mask.ipv6->hdr.vtc_flow) >>
+				IPV6_HDR_TC_SHIFT) & 0xff;
+			if (msk6) {
+				tos6 = (rte_be_to_cpu_32
+					(spec.ipv6->hdr.vtc_flow) >>
+						IPV6_HDR_TC_SHIFT) & 0xff;
+				mnl_attr_put_u8
+					(nlh, tunnel_outer ?
+					 TCA_FLOWER_KEY_ENC_IP_TOS :
+					 TCA_FLOWER_KEY_IP_TOS, tos6);
+				mnl_attr_put_u8
+					(nlh, tunnel_outer ?
+					 TCA_FLOWER_KEY_ENC_IP_TOS_MASK :
+					 TCA_FLOWER_KEY_IP_TOS_MASK, msk6);
+			}
 			assert(dev_flow->tcf.nlsize >= nlh->nlmsg_len);
 			break;
 		}
@@ -3824,6 +3970,14 @@  struct pedit_parser {
 					 TCA_TUNNEL_KEY_ENC_IPV6_DST,
 					 sizeof(encap.vxlan->ipv6.dst),
 					 &encap.vxlan->ipv6.dst);
+			if (encap.vxlan->mask & FLOW_TCF_ENCAP_IP_TTL)
+				mnl_attr_put_u8(nlh,
+					 TCA_TUNNEL_KEY_ENC_TTL,
+					 encap.vxlan->ip_ttl_hop);
+			if (encap.vxlan->mask & FLOW_TCF_ENCAP_IP_TOS)
+				mnl_attr_put_u8(nlh,
+					 TCA_TUNNEL_KEY_ENC_TOS,
+					 encap.vxlan->ip_tos);
 			if (encap.vxlan->mask & FLOW_TCF_ENCAP_VXLAN_VNI)
 				mnl_attr_put_u32(nlh,
 					 TCA_TUNNEL_KEY_ENC_KEY_ID,