[dpdk-dev] [PATCH] ethdev: Additions to rte_flows to support vTEP encap/decap offload

Declan Doherty declan.doherty at intel.com
Sat Mar 10 01:25:35 CET 2018


This V1 patchset contains the revised proposal to manage virtual
tunnel endpoints (vTEP) hardware accleration based on community
feedback on RFC
(http://dpdk.org/ml/archives/dev/2017-December/084676.html). This
proposal is purely enabled through rte_flow APIs with the
additions of some new features which were previously implemented
by the proposed rte_tep APIs which were proposed in the original
RFC. This patchset ultimately aims to enable the configuration
of inline data path encapsulation and decapsulation of tunnel
endpoint network overlays on accelerated IO devices.

The summary of the additions to the rte_flow are as follows:

- Add new flow actions RTE_RTE_FLOW_ACTION_TYPE_VTEP_ENCAP and
RTE_FLOW_ACTION_TYPE_VTEP_DECAP to rte_flow to support specfication
of encapsulation and decapsulation of virtual Tunnel Endpoint on
hardware.

- Updates the matching pattern item definition
description to specify that all actions which modify a packet
must be specified in the explicit order they are to be excuted.

- Introduces support for the use of pipeline metadata in
the flow pattern defintion and the population of metadata fields
from flow actions.

- Adds group counters to enable statistics to be kept on groups of
flows such as all ingress/egress flows of a vTEP

- Adds group_action to allow a flows termination to be a group/table
within the device.

A high level summary of the proposed usage model is as follows:

1. Decapsulation

1.1. Decapsulation of vTEP outer headers and forward all traffic
     to the same queue/s or port, would have the follow flows
     paramteters, sudo code used here.

struct rte_flow_attr attr = { .ingress = 1 };

struct rte_flow_item pattern[] = {
	{ .type = RTE_FLOW_ITEM_TYPE_ETH,  .spec = &eth_item },
	{ .type = RTE_FLOW_ITEM_TYPE_IPV4, .spec = &ipv4_item },
	{ .type = RTE_FLOW_ITEM_TYPE_UDP, .spec = &udp_item },
	{ .type = RTE_FLOW_ITEM_TYPE_VxLAN, .spec = &vxlan_item },
	{ .type = RTE_FLOW_ITEM_TYPE_END }
};

struct rte_flow_action actions[] = {
	{ .type = RTE_FLOW_ACTION_TYPE_VTEP_DECAP, .conf = VxLAN },
	{ .type = RTE_FLOW_ACTION_TYPE_VF, .conf = &vf_action  },
	{ .type = RTE_FLOW_ACTION_TYPE_END }

}

1.2.
Decapsulation of vTEP outer headers and matching on inner
headers, and forwarding to the same queue/s or port.

1.2.1.
The same scenario as above but either the application
or hardware requires configuration as 2 logically independent
operations (viewing it as 2 logical tables). The first stage
being the flow rule to define the pattern to match the vTEP
and the action to decapsulate the packet, and the second stage
stage table matches the inner header and defines the actions,
forward to port etc.

flow rule for outer header on table 0

struct rte_flow_attr attr = { .ingress = 1, .table = 0 };

struct rte_flow_item pattern[] = {
	{ .type = RTE_FLOW_ITEM_TYPE_ETH,  .spec = &eth_item },
	{ .type = RTE_FLOW_ITEM_TYPE_IPV4, .spec = &ipv4_item },
	{ .type = RTE_FLOW_ITEM_TYPE_UDP, .spec = &udp_item },
	{ .type = RTE_FLOW_ITEM_TYPE_VxLAN, .spec = &vxlan_item },
	{ .type = RTE_FLOW_ITEM_TYPE_END }
};

struct rte_flow_action actions[] = {
	{ .type = RTE_FLOW_ACTION_TYPE_GROUP_COUNT, .conf = &vtep_counter },
	{ .type = RTE_FLOW_ACTION_TYPE_METADATA, .conf = &metadata_action },
	{ .type = RTE_FLOW_ACTION_TYPE_VTEP_DECAP, .conf = VxLAN },
	{ .type = RTE_FLOW_ACTION_TYPE_GROUP, .conf = &group_action = { .id = 1 } },
	{ .type = RTE_FLOW_ACTION_TYPE_END }
}

flow rule for inner header on table 1

struct rte_flow_attr attr = { .ingress = 1, .table = 1 };

struct rte_flow_item pattern[] = {
	{ .type = RTE_FLOW_ITEM_TYPE_METADATA,  .spec = &metadata_item },
	{ .type = RTE_FLOW_ITEM_TYPE_ETH,  .spec = &eth_item },
	{ .type = RTE_FLOW_ITEM_TYPE_IPV4, .spec = &ipv4_item },
	{ .type = RTE_FLOW_ITEM_TYPE_TCP, .spec = &tcp_item },
	{ .type = RTE_FLOW_ITEM_TYPE_END }
};

struct rte_flow_action actions[] = {
	{ .type = RTE_FLOW_ACTION_TYPE_PORT, .conf = &port_action = { port_id } },
	{ .type = RTE_FLOW_ACTION_TYPE_END }
}

Note that the metadata action in the flow rule in table 0 is generating
the metadata in the pipeline which is then used in as part as the flow
pattern in table 1 to specify the exact flow to match against. In the
case where exact match rules are being provided by the application
then i this metadata could be provided by the application in both rules.
If there was wildcard matching happening at the first table then this
metadata could be generated by hw, but this would require extension to
currently proposed API to allow specification of how the metadata should
be generated.

2. Encapsulation

Encapsulation of all traffic matching a specific flow pattern to a
specified vTEP and egressing to a particular port.

struct rte_flow_attr attr = { .egress = 1 };

struct rte_flow_item pattern[] = {
	{ .type = RTE_FLOW_ITEM_TYPE_ETH, .spec = &eth_item },
	{ .type = RTE_FLOW_ITEM_TYPE_IPV4, .spec = &ipv4_item },
	{ .type = RTE_FLOW_ITEM_TYPE_TCP, .spec = &tcp_item },
	{ .type = RTE_FLOW_ITEM_TYPE_END }
};

struct rte_flow_action_vtep_encap encap_action = {
	.patterns = {
		{ .type=eth, .item = {} },
		{ .type=ipv4, .item = {} },
		{ .type=udp, .item = {} },
		{ .type=vxlan, .item = {} } }
};

struct rte_flow_action actions[] = {
	{ .type = RTE_FLOW_ACTION_TYPE_GROUP_COUNT, .conf = &group_count } },
	{ .type = RTE_FLOW_ACTION_TYPE_VTEP_ENCAP, .conf = &encap_action } },
	{ .type = RTE_FLOW_ACTION_TYPE_PORT, .conf = &port_action = { port_id } },
	{ .type = RTE_FLOW_ACTION_TYPE_END }
}

Signed-off-by: Declan Doherty <declan.doherty at intel.com>
---
 lib/librte_ether/rte_flow.h | 166 +++++++++++++++++++++++++++++++++++++++++++-
 1 file changed, 163 insertions(+), 3 deletions(-)

diff --git a/lib/librte_ether/rte_flow.h b/lib/librte_ether/rte_flow.h
index 13e420218..8eb838c99 100644
--- a/lib/librte_ether/rte_flow.h
+++ b/lib/librte_ether/rte_flow.h
@@ -323,6 +323,13 @@ enum rte_flow_item_type {
 	 * See struct rte_flow_item_geneve.
 	 */
 	RTE_FLOW_ITEM_TYPE_GENEVE,
+
+	/**
+	 * Matches specified pipeline metadata field.
+	 *
+	 * See struct rte_flow_item_metadata.
+	 */
+	RTE_FLOW_ITEM_TYPE_METADATA
 };
 
 /**
@@ -814,6 +821,17 @@ static const struct rte_flow_item_geneve rte_flow_item_geneve_mask = {
 };
 #endif
 
+/**
+ * RTE_FLOW_ITEM_TYPE_METADATA
+ *
+ * Allow arbitrary pipeline metadata to be used in specification flow pattern
+ */
+struct rte_flow_item_metadata {
+	uint32_t id;		/**< field identifier */
+	uint32_t size;		/**< field size */
+	uint8_t bytes[];	/**< field value */
+};
+
 /**
  * Matching pattern item definition.
  *
@@ -859,9 +877,11 @@ struct rte_flow_item {
  *
  * Each possible action is represented by a type. Some have associated
  * configuration structures. Several actions combined in a list can be
- * affected to a flow rule. That list is not ordered.
+ * affected to a flow rule. That list is not ordered, with the exception of
+ * actions which modify the packet itself, these packet modification actions
+ * must be specified in the explicit order in which they are to be executed.
  *
- * They fall in three categories:
+ * They fall in four categories:
  *
  * - Terminating actions (such as QUEUE, DROP, RSS, PF, VF) that prevent
  *   processing matched packets by subsequent flow rules, unless overridden
@@ -870,6 +890,10 @@ struct rte_flow_item {
  * - Non terminating actions (PASSTHRU, DUP) that leave matched packets up
  *   for additional processing by subsequent flow rules.
  *
+ * - Non terminating meta actions that do not affect the fate of
+ *   packets but result in modification of the packet itself (SECURITY,
+ *   VTEP_ENCAP, VTEP_DECAP).
+ *
  * - Other non terminating meta actions that do not affect the fate of
  *   packets (END, VOID, MARK, FLAG, COUNT).
  *
@@ -879,6 +903,11 @@ struct rte_flow_item {
  * Only the last action of a given type is taken into account. PMDs still
  * perform error checking on the entire list.
  *
+ * Note that it may be possible for some packet modifications actions to be
+ * specified more than once on a single flow rule. For example an action which
+ * modified an IP header could be specified for both inner and outer IP headers
+ * on a tunneled packet.
+ *
  * Note that PASSTHRU is the only action able to override a terminating
  * rule.
  */
@@ -1010,7 +1039,50 @@ enum rte_flow_action_type {
 	 *
 	 * See struct rte_flow_action_security.
 	 */
-	RTE_FLOW_ACTION_TYPE_SECURITY
+	RTE_FLOW_ACTION_TYPE_SECURITY,
+
+	/**
+	 * Enable flow group counter for flow.
+	 *
+	 * Group counters can be retrieved and reset through
+	 * rte_flow_group_count_query()
+	 *
+	 * See struct rte_flow_action_group_count.
+	 */
+	RTE_FLOW_ACTION_TYPE_GROUP_COUNT,
+
+	/**
+	 *
+	 * See struct rte_flow_action_vtep_encap.
+	 */
+	RTE_FLOW_ACTION_TYPE_VTEP_ENCAP,
+
+	/**
+	 * Decapsulate all the headers of the vTEP
+	 *
+	 * See struct rte_flow_action_vtep_decap.
+	 */
+	RTE_FLOW_ACTION_TYPE_VTEP_DECAP,
+
+	/**
+	 * [META]
+	 *
+	 * Set specific metadata field associated with packet which is then
+	 * available to further pipeline stages.
+	 *
+	 * See struct rte_flow_action_metadata.
+	 */
+	RTE_FLOW_ACTION_TYPE_METADATA,
+	
+	/**
+	 * Send packet to a group. In a logical hierarchy of groups (flow
+	 * tables) this allows the terminating action to be a next stage table,
+	 * which can match on further elements of the packet or on metadata
+	 * generated from previous group stages.
+	 * 
+	 * See struct rte_flow_action_group.
+	 */
+	RTE_FLOW_ACTION_TYPE_GROUP
 };
 
 /**
@@ -1103,6 +1175,26 @@ struct rte_flow_action_vf {
 	uint32_t id; /**< VF ID to redirect packets to. */
 };
 
+/**
+ * RTE_FLOW_ACTION_TYPE_GROUP
+ * 
+ * Redirects packets to the group of flows on the current device.
+ *
+ * This action may be used to enable a number of functions. On hw
+ * devices which support multiple table chained together, this
+ * action allows the output of one table to be the input of
+ * another. For applications which need to program flow rules for 
+ * operations on a tunnel packet on the inner and outer header 
+ * separately, the groups can be used to enable a PMD to support
+ * two levels of virtual tables which it can then flatten into
+ * a single rule before programing to hardware. 
+ * 
+ * Terminating by default.
+ */
+struct rte_flow_action_group {
+	uint32_t id;
+};
+
 /**
  * RTE_FLOW_ACTION_TYPE_METER
  *
@@ -1148,6 +1240,54 @@ struct rte_flow_action_security {
 	void *security_session; /**< Pointer to security session structure. */
 };
 
+/**
+ * RTE_FLOW_ACTION_TYPE_GROUP_COUNT
+ *
+ * A packet/byte counter which can be shared across a group of flows programmed
+ * on the same port/switch domain.
+ *
+ * Non-terminating by default.
+ */
+struct rte_flow_action_group_count {
+	uint32_t id;
+};
+
+/**
+ * RTE_FLOW_ACTION_TYPE_METADATA
+ *
+ * Set a specified metadata field in hw pipeline with value for consumption
+ * on further pipeline stages or on host interface.
+ *
+ * Non-terminating by default.
+ */
+struct rte_flow_action_metadata {
+	uint32_t id;		/**< field identifier */
+	uint32_t size;		/**< field size */
+	uint8_t bytes[];	/**< field value */
+};
+
+/*
+ * RTE_FLOW_ACTION_TYPE_VTEP_ENCAP
+ *
+ * Non-terminating by default.
+ */
+struct rte_flow_action_vtep_encap {
+	struct rte_flow_action_item {
+		enum rte_flow_item_type type;	/**< Item type. */
+		const void *item; 		/**< Item definition. */
+	} *pattern;
+	/**< vTEP definition */
+};
+
+/**
+ * RTE_FLOW_ACTION_TYP_VTEP_DECAP
+ *
+ * Non-terminating by default.
+ */
+struct rte_flow_action_vtep_decap {
+	enum rte_flow_item_type type; /**< Item type. */
+};
+
 /**
  * Definition of a single action.
  *
@@ -1476,6 +1616,26 @@ rte_flow_copy(struct rte_flow_desc *fd, size_t len,
 	      const struct rte_flow_item *items,
 	      const struct rte_flow_action *actions);
 
+/**
+ * A group counter is a counter which can be shared among multiple
+ * Get packet/bytes count for group counter.
+ *
+ * @param[in]	port_id
+ *
+ * @param[in]	group_count_id
+ * @param[out]	packets
+ * @param[out]	bytes
+ * @param[out]	error
+ *
+ * @return
+ */
+int
+rte_flow_query_group_count(uint16_t port_id,
+		uint32_t group_count_id,
+		uint64_t *packets, uint64_t *bytes,
+		struct rte_flow_error *error);
+
+
 #ifdef __cplusplus
 }
 #endif
-- 
2.14.3



More information about the dev mailing list