[dpdk-dev] [PATCH 3/5] lib/gso: add VxLAN GSO support

Jiayu Hu jiayu.hu at intel.com
Thu Aug 24 16:15:42 CEST 2017


From: Mark Kavanagh <mark.b.kavanagh at intel.com>

This patch adds GSO support for VxLAN-encapsulated packets. Supported
VxLAN packets must have an outer IPv4 header (prepended by an optional
VLAN tag), and contain an inner TCP/IPv4 packet (with an optional inner
VLAN tag).

VxLAN GSO assumes that all input packets have correct checksums and
doesn't update checksums for output packets. Additionally, it doesn't
process IP fragmented packets.

As with TCP/IPv4 GSO, VxLAN GSO uses a two-segment MBUF to organize each
output packet, which mandates support for multi-segment mbufs in the TX
functions of the NIC driver. Also, if a packet is GSOed, VxLAN GSO
reduces its MBUF refcnt by 1. As a result, when all of its GSOed
segments are freed, the packet is freed automatically.

Signed-off-by: Mark Kavanagh <mark.b.kavanagh at intel.com>
Signed-off-by: Jiayu Hu <jiayu.hu at intel.com>
---
 lib/librte_gso/Makefile     |   1 +
 lib/librte_gso/gso_common.c | 109 ++++++++++++++++++++++++++++++++++++++++++--
 lib/librte_gso/gso_common.h |  41 ++++++++++++++++-
 lib/librte_gso/gso_tunnel.c |  62 +++++++++++++++++++++++++
 lib/librte_gso/gso_tunnel.h |  46 +++++++++++++++++++
 lib/librte_gso/rte_gso.c    |  12 ++++-
 lib/librte_gso/rte_gso.h    |   4 ++
 7 files changed, 268 insertions(+), 7 deletions(-)
 create mode 100644 lib/librte_gso/gso_tunnel.c
 create mode 100644 lib/librte_gso/gso_tunnel.h

diff --git a/lib/librte_gso/Makefile b/lib/librte_gso/Makefile
index 0f8e38f..a4d1a81 100644
--- a/lib/librte_gso/Makefile
+++ b/lib/librte_gso/Makefile
@@ -44,6 +44,7 @@ LIBABIVER := 1
 SRCS-$(CONFIG_RTE_LIBRTE_GSO) += rte_gso.c
 SRCS-$(CONFIG_RTE_LIBRTE_GSO) += gso_common.c
 SRCS-$(CONFIG_RTE_LIBRTE_GSO) += gso_tcp.c
+SRCS-$(CONFIG_RTE_LIBRTE_GSO) += gso_tunnel.c
 
 # install this header file
 SYMLINK-$(CONFIG_RTE_LIBRTE_GSO)-include += rte_gso.h
diff --git a/lib/librte_gso/gso_common.c b/lib/librte_gso/gso_common.c
index 2b54fbd..65cec44 100644
--- a/lib/librte_gso/gso_common.c
+++ b/lib/librte_gso/gso_common.c
@@ -39,6 +39,7 @@
 #include <rte_ether.h>
 #include <rte_ip.h>
 #include <rte_tcp.h>
+#include <rte_udp.h>
 
 #include "gso_common.h"
 
@@ -156,18 +157,60 @@ gso_do_segment(struct rte_mbuf *pkt,
 	return nb_segs;
 }
 
+static inline void parse_ethernet(struct ether_hdr *eth_hdr,
+		struct rte_mbuf *pkt);
+
+static inline void
+parse_vxlan(struct udp_hdr *udp_hdr, struct rte_mbuf *pkt)
+{
+	struct ether_hdr *eth_hdr;
+
+	eth_hdr = (struct ether_hdr *)((char *)udp_hdr +
+			sizeof(struct udp_hdr) +
+			sizeof(struct vxlan_hdr));
+
+	pkt->packet_type |= RTE_PTYPE_TUNNEL_VXLAN;
+	pkt->outer_l2_len = pkt->l2_len;
+	parse_ethernet(eth_hdr, pkt);
+	pkt->l2_len += ETHER_VXLAN_HLEN; /* add udp + vxlan */
+}
+
+static inline void
+parse_udp(struct udp_hdr *udp_hdr, struct rte_mbuf *pkt)
+{
+	/* Outer UDP header of VxLAN packet */
+	if (udp_hdr->dst_port == rte_cpu_to_be_16(VXLAN_DEFAULT_PORT)) {
+		pkt->packet_type |= RTE_PTYPE_L4_UDP;
+		parse_vxlan(udp_hdr, pkt);
+	} else {
+		/* IPv4/UDP packet */
+		pkt->l4_len = sizeof(struct udp_hdr);
+		pkt->packet_type |= RTE_PTYPE_L4_UDP;
+	}
+}
+
 static inline void
 parse_ipv4(struct ipv4_hdr *ipv4_hdr, struct rte_mbuf *pkt)
 {
 	struct tcp_hdr *tcp_hdr;
+	struct udp_hdr *udp_hdr;
 
 	switch (ipv4_hdr->next_proto_id) {
 	case IPPROTO_TCP:
-		pkt->packet_type |= RTE_PTYPE_L4_TCP;
+		if (IS_VXLAN_PKT(pkt)) {
+			pkt->outer_l3_len = pkt->l3_len;
+			pkt->packet_type |= RTE_PTYPE_INNER_L4_TCP;
+		} else
+			pkt->packet_type |= RTE_PTYPE_L4_TCP;
 		pkt->l3_len = IPv4_HDR_LEN(ipv4_hdr);
 		tcp_hdr = (struct tcp_hdr *)((char *)ipv4_hdr + pkt->l3_len);
 		pkt->l4_len = TCP_HDR_LEN(tcp_hdr);
 		break;
+	case IPPROTO_UDP:
+		pkt->l3_len = IPv4_HDR_LEN(ipv4_hdr);
+		udp_hdr = (struct udp_hdr *)((char *)ipv4_hdr + pkt->l3_len);
+		parse_udp(udp_hdr, pkt);
+		break;
 	}
 }
 
@@ -182,13 +225,21 @@ parse_ethernet(struct ether_hdr *eth_hdr, struct rte_mbuf *pkt)
 	if (ethertype == ETHER_TYPE_VLAN) {
 		vlan_hdr = (struct vlan_hdr *)(eth_hdr + 1);
 		pkt->l2_len = sizeof(struct vlan_hdr);
-		pkt->packet_type |= RTE_PTYPE_L2_ETHER_VLAN;
+		if (IS_VXLAN_PKT(pkt))
+			pkt->packet_type |= RTE_PTYPE_INNER_L2_ETHER_VLAN;
+		else
+			pkt->packet_type |= RTE_PTYPE_L2_ETHER_VLAN;
 		ethertype = rte_be_to_cpu_16(vlan_hdr->eth_proto);
-	}
+	} else
+		pkt->l2_len = 0;
 
 	switch (ethertype) {
 	case ETHER_TYPE_IPv4:
-		if (IS_VLAN_PKT(pkt)) {
+		if (IS_VXLAN_PKT(pkt)) {
+			if (!IS_INNER_VLAN_PKT(pkt))
+				pkt->packet_type |= RTE_PTYPE_INNER_L2_ETHER;
+			pkt->packet_type |= RTE_PTYPE_INNER_L3_IPV4;
+		} else if (IS_VLAN_PKT(pkt)) {
 			pkt->packet_type |= RTE_PTYPE_L3_IPV4;
 		} else {
 			pkt->packet_type |= RTE_PTYPE_L2_ETHER;
@@ -236,14 +287,62 @@ void
 gso_update_pkt_headers(struct rte_mbuf *pkt, uint16_t nb_segments,
 		struct rte_mbuf **out_segments)
 {
-	struct ipv4_hdr *ipv4_hdr;
+	struct ipv4_hdr *ipv4_hdr, *outer_ipv4_hdr;
 	struct tcp_hdr *tcp_hdr;
+	struct udp_hdr *udp_hdr;
 	struct rte_mbuf *seg;
 	uint32_t sent_seq;
 	uint16_t offset, i;
 	uint16_t tail_seg_idx = nb_segments - 1, id;
+	uint16_t outer_id;
 
 	switch (pkt->packet_type) {
+	case ETHER_VLAN_IPv4_UDP_VXLAN_VLAN_IPv4_TCP_PKT:
+	case ETHER_VLAN_IPv4_UDP_VXLAN_IPv4_TCP_PKT:
+	case ETHER_IPv4_UDP_VXLAN_VLAN_IPv4_TCP_PKT:
+	case ETHER_IPv4_UDP_VXLAN_IPv4_TCP_PKT:
+		outer_ipv4_hdr =
+			(struct ipv4_hdr *)(rte_pktmbuf_mtod(pkt, char *) +
+				pkt->outer_l2_len);
+		ipv4_hdr = (struct ipv4_hdr *)((char *)(outer_ipv4_hdr +
+					pkt->outer_l3_len + pkt->l2_len));
+		tcp_hdr = (struct tcp_hdr *)(ipv4_hdr + 1);
+
+		outer_id = rte_be_to_cpu_16(outer_ipv4_hdr->packet_id);
+		id = rte_be_to_cpu_16(ipv4_hdr->packet_id);
+		sent_seq = rte_be_to_cpu_32(tcp_hdr->sent_seq);
+
+		for (i = 0; i < nb_segments; i++) {
+			seg = out_segments[i];
+
+			/* Update outer IPv4 header */
+			offset = seg->outer_l2_len;
+			update_ipv4_header(rte_pktmbuf_mtod(seg, char *),
+					offset, seg->pkt_len, outer_id);
+			outer_id++;
+
+			/* Update outer UDP header */
+			offset += seg->outer_l3_len;
+			udp_hdr = (struct udp_hdr *)(
+					rte_pktmbuf_mtod(seg, char *) +
+					offset);
+			udp_hdr->dgram_len = rte_cpu_to_be_16(seg->pkt_len -
+					offset);
+
+			/* Update inner IPv4 header */
+			offset += seg->l2_len;
+			update_ipv4_header(rte_pktmbuf_mtod(seg, char*),
+					offset, seg->pkt_len, id);
+			id++;
+
+			/* Update inner TCP header */
+			offset += seg->l3_len;
+			update_tcp_header(rte_pktmbuf_mtod(seg, char *),
+					offset, sent_seq, i < tail_seg_idx);
+
+			sent_seq += seg->next->data_len;
+		}
+		break;
 	case ETHER_VLAN_IPv4_TCP_PKT:
 	case ETHER_IPv4_TCP_PKT:
 		ipv4_hdr = (struct ipv4_hdr *)(rte_pktmbuf_mtod(pkt, char *) +
diff --git a/lib/librte_gso/gso_common.h b/lib/librte_gso/gso_common.h
index d750041..0ad95d3 100644
--- a/lib/librte_gso/gso_common.h
+++ b/lib/librte_gso/gso_common.h
@@ -46,6 +46,8 @@
 #define TCP_HDR_LEN(tcph) ((tcph->data_off & 0xf0) >> 2)
 
 #define ETHER_IPv4_PKT (RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4)
+#define INNER_ETHER_IPv4_TCP_PKT (RTE_PTYPE_INNER_L2_ETHER |\
+		RTE_PTYPE_INNER_L3_IPV4 | RTE_PTYPE_INNER_L4_TCP)
 /* Supported packet types */
 /* TCP/IPv4 packet. */
 #define ETHER_IPv4_TCP_PKT (ETHER_IPv4_PKT | RTE_PTYPE_L4_TCP)
@@ -54,9 +56,46 @@
 #define ETHER_VLAN_IPv4_TCP_PKT (RTE_PTYPE_L2_ETHER_VLAN | \
 		RTE_PTYPE_L3_IPV4 | RTE_PTYPE_L4_TCP)
 
+/* VxLAN packet */
+#define ETHER_IPv4_UDP_VXLAN_IPv4_TCP_PKT (ETHER_IPv4_PKT | \
+		RTE_PTYPE_L4_UDP | \
+		RTE_PTYPE_TUNNEL_VXLAN | \
+		INNER_ETHER_IPv4_TCP_PKT)
+
+/* VxLAN packet with outer VLAN tag. */
+#define ETHER_VLAN_IPv4_UDP_VXLAN_IPv4_TCP_PKT (RTE_PTYPE_L2_ETHER_VLAN | \
+		RTE_PTYPE_L3_IPV4 | \
+		RTE_PTYPE_L4_UDP | \
+		RTE_PTYPE_TUNNEL_VXLAN | \
+		INNER_ETHER_IPv4_TCP_PKT)
+
+/* VxLAN packet with inner VLAN tag. */
+#define ETHER_IPv4_UDP_VXLAN_VLAN_IPv4_TCP_PKT (ETHER_IPv4_PKT | \
+		RTE_PTYPE_L4_UDP | \
+		RTE_PTYPE_TUNNEL_VXLAN | \
+		RTE_PTYPE_INNER_L2_ETHER_VLAN | \
+		RTE_PTYPE_INNER_L3_IPV4  | \
+		RTE_PTYPE_INNER_L4_TCP)
+
+/* VxLAN packet with both outer and inner VLAN tags. */
+#define ETHER_VLAN_IPv4_UDP_VXLAN_VLAN_IPv4_TCP_PKT (\
+		RTE_PTYPE_L2_ETHER_VLAN | \
+		RTE_PTYPE_L3_IPV4 | \
+		RTE_PTYPE_L4_UDP | \
+		RTE_PTYPE_TUNNEL_VXLAN | \
+		RTE_PTYPE_INNER_L2_ETHER_VLAN | \
+		RTE_PTYPE_INNER_L3_IPV4 | \
+		RTE_PTYPE_INNER_L4_TCP)
+
 #define IS_VLAN_PKT(pkt) ((pkt->packet_type & RTE_PTYPE_L2_ETHER_VLAN) == \
 		RTE_PTYPE_L2_ETHER_VLAN)
+#define IS_INNER_VLAN_PKT(pkt) (\
+		(pkt->packet_type & RTE_PTYPE_INNER_L2_ETHER_VLAN) == \
+		RTE_PTYPE_INNER_L2_ETHER_VLAN)
 
+#define VXLAN_DEFAULT_PORT 4789
+#define IS_VXLAN_PKT(pkt) ((pkt->packet_type & RTE_PTYPE_TUNNEL_VXLAN) == \
+		RTE_PTYPE_TUNNEL_VXLAN)
 /**
  * Internal function which parses a packet, setting outer_l2/l3_len and
  * l2/l3/l4_len and packet_type.
@@ -92,7 +131,7 @@ void gso_update_pkt_headers(struct rte_mbuf *pkt, uint16_t nb_segments,
  * @param pkt
  *  Packet to segment.
  * @param pkt_hdr_offset
- *  Packet header offset, measured in byte.
+ *  Packet header offset, measured in bytes.
  * @param pyld_unit_size
  *  The max payload length of a GSO segment.
  * @param direct_pool
diff --git a/lib/librte_gso/gso_tunnel.c b/lib/librte_gso/gso_tunnel.c
new file mode 100644
index 0000000..6a04697
--- /dev/null
+++ b/lib/librte_gso/gso_tunnel.c
@@ -0,0 +1,62 @@
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright(c) 2017 Intel Corporation. All rights reserved.
+ *   All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of Intel Corporation nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <rte_ether.h>
+
+#include "gso_common.h"
+#include "gso_tunnel.h"
+
+int
+gso_tunnel_segment(struct rte_mbuf *pkt,
+		uint16_t gso_size,
+		struct rte_mempool *direct_pool,
+		struct rte_mempool *indirect_pool,
+		struct rte_mbuf **pkts_out,
+		uint16_t nb_pkts_out)
+{
+	uint16_t pyld_unit_size, hdr_offset;
+	int ret;
+
+	hdr_offset = pkt->outer_l2_len + pkt->outer_l3_len + pkt->l2_len +
+		pkt->l3_len + pkt->l4_len;
+
+	pyld_unit_size = gso_size - hdr_offset - ETHER_CRC_LEN;
+
+	ret = gso_do_segment(pkt, hdr_offset, pyld_unit_size, direct_pool,
+			indirect_pool, pkts_out, nb_pkts_out);
+
+	if (ret > 1)
+		gso_update_pkt_headers(pkt, ret, pkts_out);
+
+	return ret;
+}
diff --git a/lib/librte_gso/gso_tunnel.h b/lib/librte_gso/gso_tunnel.h
new file mode 100644
index 0000000..a9b2363
--- /dev/null
+++ b/lib/librte_gso/gso_tunnel.h
@@ -0,0 +1,46 @@
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright(c) 2017 Intel Corporation. All rights reserved.
+ *   All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of Intel Corporation nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef _GSO_TUNNEL_H_
+#define _GSO_TUNNEL_H_
+
+#include <stdint.h>
+#include <rte_mbuf.h>
+
+int gso_tunnel_segment(struct rte_mbuf *pkt,
+		uint16_t gso_size,
+		struct rte_mempool *direct_pool,
+		struct rte_mempool *indirect_pool,
+		struct rte_mbuf **pkts_out,
+		uint16_t nb_pkts_out);
+#endif
diff --git a/lib/librte_gso/rte_gso.c b/lib/librte_gso/rte_gso.c
index fac95f2..f110f18 100644
--- a/lib/librte_gso/rte_gso.c
+++ b/lib/librte_gso/rte_gso.c
@@ -36,6 +36,7 @@
 #include "rte_gso.h"
 #include "gso_common.h"
 #include "gso_tcp.h"
+#include "gso_tunnel.h"
 
 int
 rte_gso_segment(struct rte_mbuf *pkt,
@@ -51,7 +52,8 @@ rte_gso_segment(struct rte_mbuf *pkt,
 			NULL || gso_ctx.indirect_pool == NULL)
 		return -EINVAL;
 
-	if ((gso_ctx.gso_types & RTE_GSO_TCP_IPV4) == 0 ||
+	if ((gso_ctx.gso_types & (RTE_GSO_TCP_IPV4 |
+					RTE_GSO_IPV4_VXLAN_TCP_IPV4)) == 0 ||
 			gso_ctx.gso_size >= pkt->pkt_len ||
 			gso_ctx.gso_size == 0)
 		return 1;
@@ -71,6 +73,14 @@ rte_gso_segment(struct rte_mbuf *pkt,
 				direct_pool, indirect_pool,
 				pkts_out, nb_pkts_out);
 		break;
+	case ETHER_VLAN_IPv4_UDP_VXLAN_VLAN_IPv4_TCP_PKT:
+	case ETHER_VLAN_IPv4_UDP_VXLAN_IPv4_TCP_PKT:
+	case ETHER_IPv4_UDP_VXLAN_VLAN_IPv4_TCP_PKT:
+	case ETHER_IPv4_UDP_VXLAN_IPv4_TCP_PKT:
+		nb_segments = gso_tunnel_segment(pkt, gso_size,
+				direct_pool, indirect_pool,
+				pkts_out, nb_pkts_out);
+		break;
 	default:
 		RTE_LOG(WARNING, GSO, "Unsupported packet type\n");
 		nb_segments = 1;
diff --git a/lib/librte_gso/rte_gso.h b/lib/librte_gso/rte_gso.h
index 77853fa..e1b2c23 100644
--- a/lib/librte_gso/rte_gso.h
+++ b/lib/librte_gso/rte_gso.h
@@ -48,6 +48,10 @@ extern "C" {
 
 #define RTE_GSO_TCP_IPV4 (1ULL << 0)
 /**< GSO flag for TCP/IPv4 packets (containing optional VLAN tag) */
+#define RTE_GSO_IPV4_VXLAN_TCP_IPV4 (1ULL << 1)
+/**< GSO flag for VxLAN packets that contain outer IPv4, and inner
+ * TCP/IPv4 headers (plus optional inner and/or outer VLAN tags).
+ */
 
 /**
  * GSO context structure.
-- 
2.7.4



More information about the dev mailing list