[dpdk-dev,v3,3/5] gso: add VxLAN GSO support

Message ID 1505184211-36728-4-git-send-email-jiayu.hu@intel.com
State Superseded, archived
Headers show

Checks

Context Check Description
checkpatch success coding style OK
Intel-compilation fail Compilation issues

Commit Message

Jiayu Hu Sept. 12, 2017, 2:43 a.m.
From: Mark Kavanagh <mark.b.kavanagh@intel.com>

This patch adds GSO support for VxLAN-encapsulated packets. Supported
VxLAN packets must have an outer IPv4 header (prepended by an optional
VLAN tag), and contain an inner TCP/IPv4 packet (with an optional inner
VLAN tag).

VxLAN GSO assumes that all input packets have correct checksums and
doesn't update checksums for output packets. Additionally, it doesn't
process IP fragmented packets.

As with TCP/IPv4 GSO, VxLAN GSO uses a two-segment MBUF to organize each
output packet, which mandates support for multi-segment mbufs in the TX
functions of the NIC driver. Also, if a packet is GSOed, VxLAN GSO
reduces its MBUF refcnt by 1. As a result, when all of its GSOed
segments are freed, the packet is freed automatically.

Signed-off-by: Mark Kavanagh <mark.b.kavanagh@intel.com>
Signed-off-by: Jiayu Hu <jiayu.hu@intel.com>
---
 lib/librte_gso/Makefile          |  1 +
 lib/librte_gso/gso_common.c      | 48 ++++++++++++++++++++++-
 lib/librte_gso/gso_common.h      | 33 ++++++++++++++++
 lib/librte_gso/gso_tunnel_tcp4.c | 85 ++++++++++++++++++++++++++++++++++++++++
 lib/librte_gso/gso_tunnel_tcp4.h | 76 +++++++++++++++++++++++++++++++++++
 lib/librte_gso/rte_gso.c         |  7 +++-
 6 files changed, 248 insertions(+), 2 deletions(-)
 create mode 100644 lib/librte_gso/gso_tunnel_tcp4.c
 create mode 100644 lib/librte_gso/gso_tunnel_tcp4.h

Patch hide | download patch | download mbox

diff --git a/lib/librte_gso/Makefile b/lib/librte_gso/Makefile
index 2be64d1..e6d41df 100644
--- a/lib/librte_gso/Makefile
+++ b/lib/librte_gso/Makefile
@@ -44,6 +44,7 @@  LIBABIVER := 1
 SRCS-$(CONFIG_RTE_LIBRTE_GSO) += rte_gso.c
 SRCS-$(CONFIG_RTE_LIBRTE_GSO) += gso_common.c
 SRCS-$(CONFIG_RTE_LIBRTE_GSO) += gso_tcp4.c
+SRCS-$(CONFIG_RTE_LIBRTE_GSO) += gso_tunnel_tcp4.c
 
 # install this header file
 SYMLINK-$(CONFIG_RTE_LIBRTE_GSO)-include += rte_gso.h
diff --git a/lib/librte_gso/gso_common.c b/lib/librte_gso/gso_common.c
index 7c32e03..c6779d0 100644
--- a/lib/librte_gso/gso_common.c
+++ b/lib/librte_gso/gso_common.c
@@ -39,6 +39,7 @@ 
 #include <rte_ether.h>
 #include <rte_ip.h>
 #include <rte_tcp.h>
+#include <rte_udp.h>
 
 #include "gso_common.h"
 
@@ -193,10 +194,55 @@  update_inner_tcp4_header(struct rte_mbuf *pkt, uint8_t ipid_delta,
 	}
 }
 
+static inline void
+update_outer_ipv4_header(struct rte_mbuf *pkt, uint16_t id)
+{
+	struct ipv4_hdr *ipv4_hdr;
+
+	ipv4_hdr = (struct ipv4_hdr *)(rte_pktmbuf_mtod(pkt, char *) +
+			pkt->outer_l2_len);
+	ipv4_hdr->total_length = rte_cpu_to_be_16(pkt->pkt_len -
+			pkt->outer_l2_len);
+	ipv4_hdr->packet_id = rte_cpu_to_be_16(id);
+}
+
+static inline void
+update_outer_udp_header(struct rte_mbuf *pkt)
+{
+	struct udp_hdr *udp_hdr;
+	uint16_t length;
+
+	length = pkt->outer_l2_len + pkt->outer_l3_len;
+	udp_hdr = (struct udp_hdr *)(rte_pktmbuf_mtod(pkt, char *) +
+			length);
+	udp_hdr->dgram_len = rte_cpu_to_be_16(pkt->pkt_len - length);
+}
+
+static inline void
+update_ipv4_vxlan_tcp4_header(struct rte_mbuf *pkt, uint8_t ipid_delta,
+		struct rte_mbuf **segs, uint16_t nb_segs)
+{
+	struct ipv4_hdr *ipv4_hdr;
+	uint16_t i, id;
+
+	ipv4_hdr = (struct ipv4_hdr *)(rte_pktmbuf_mtod(pkt, char *) +
+			pkt->outer_l2_len);
+	id = rte_be_to_cpu_16(ipv4_hdr->packet_id);
+	for (i = 0; i < nb_segs; i++) {
+		update_outer_ipv4_header(segs[i], id);
+		id += ipid_delta;
+		update_outer_udp_header(segs[i]);
+	}
+	/* Update inner TCP/IPv4 headers */
+	update_inner_tcp4_header(pkt, ipid_delta, segs, nb_segs);
+}
+
 void
 gso_update_pkt_headers(struct rte_mbuf *pkt, uint8_t ipid_delta,
 		struct rte_mbuf **segs, uint16_t nb_segs)
 {
-	if (is_ipv4_tcp(pkt->packet_type))
+	if (is_ipv4_vxlan_ipv4_tcp(pkt->packet_type))
+		update_ipv4_vxlan_tcp4_header(pkt, ipid_delta, segs, nb_segs);
+	else if (is_ipv4_tcp(pkt->packet_type))
 		update_inner_tcp4_header(pkt, ipid_delta, segs, nb_segs);
 }
diff --git a/lib/librte_gso/gso_common.h b/lib/librte_gso/gso_common.h
index 3c76520..2377a1d 100644
--- a/lib/librte_gso/gso_common.h
+++ b/lib/librte_gso/gso_common.h
@@ -56,6 +56,39 @@  static inline uint8_t is_ipv4_tcp(uint32_t ptype)
 	}
 }
 
+#define IS_INNER_IPV4_HDR(ptype) (((ptype) == RTE_PTYPE_INNER_L3_IPV4) | \
+			((ptype) == RTE_PTYPE_INNER_L3_IPV4_EXT) | \
+			((ptype) == RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN))
+
+#define ETHER_UDP_VXLAN_ETHER_TCP_PKT (RTE_PTYPE_L2_ETHER | \
+		RTE_PTYPE_L4_UDP | RTE_PTYPE_TUNNEL_VXLAN | \
+		RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L4_TCP)
+#define ETHER_VLAN_UDP_VXLAN_ETHER_TCP_PKT (RTE_PTYPE_L2_ETHER_VLAN | \
+		RTE_PTYPE_L4_UDP | RTE_PTYPE_TUNNEL_VXLAN | \
+		RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L4_TCP)
+#define ETHER_UDP_VXLAN_ETHER_VLAN_TCP_PKT (RTE_PTYPE_L2_ETHER | \
+		RTE_PTYPE_L4_UDP | RTE_PTYPE_TUNNEL_VXLAN | \
+		RTE_PTYPE_INNER_L2_ETHER_VLAN | RTE_PTYPE_INNER_L4_TCP)
+#define ETHER_VLAN_UDP_VXLAN_ETHER_VLAN_TCP_PKT (RTE_PTYPE_L2_ETHER_VLAN | \
+		RTE_PTYPE_L4_UDP | RTE_PTYPE_TUNNEL_VXLAN | \
+		RTE_PTYPE_INNER_L2_ETHER_VLAN | RTE_PTYPE_INNER_L4_TCP)
+static inline uint8_t is_ipv4_vxlan_ipv4_tcp(uint32_t ptype)
+{
+	uint32_t type;
+
+	type = ptype & (~(RTE_PTYPE_L3_MASK | RTE_PTYPE_INNER_L3_MASK));
+	switch (type) {
+	case ETHER_UDP_VXLAN_ETHER_TCP_PKT:
+	case ETHER_VLAN_UDP_VXLAN_ETHER_TCP_PKT:
+	case ETHER_UDP_VXLAN_ETHER_VLAN_TCP_PKT:
+	case ETHER_VLAN_UDP_VXLAN_ETHER_VLAN_TCP_PKT:
+		return (RTE_ETH_IS_IPV4_HDR(ptype) > 0) ?
+			IS_INNER_IPV4_HDR(ptype & RTE_PTYPE_INNER_L3_MASK) : 0;
+	default:
+		return 0;
+	}
+}
+
 /**
  * Internal function which updates relevant packet headers, following
  * segmentation. This is required to update, for example, the IPv4
diff --git a/lib/librte_gso/gso_tunnel_tcp4.c b/lib/librte_gso/gso_tunnel_tcp4.c
new file mode 100644
index 0000000..8ca52d1
--- /dev/null
+++ b/lib/librte_gso/gso_tunnel_tcp4.c
@@ -0,0 +1,85 @@ 
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright(c) 2017 Intel Corporation. All rights reserved.
+ *   All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of Intel Corporation nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <rte_ether.h>
+#include <rte_ip.h>
+
+#include "gso_common.h"
+#include "gso_tunnel_tcp4.h"
+
+int
+gso_tunnel_tcp4_segment(struct rte_mbuf *pkt,
+		uint16_t gso_size,
+		uint8_t ipid_delta,
+		struct rte_mempool *direct_pool,
+		struct rte_mempool *indirect_pool,
+		struct rte_mbuf **pkts_out,
+		uint16_t nb_pkts_out)
+{
+	struct ipv4_hdr *inner_ipv4_hdr;
+	uint16_t pyld_unit_size, hdr_offset;
+	uint16_t tcp_dl;
+	int ret = 1;
+
+	hdr_offset = pkt->outer_l2_len + pkt->outer_l3_len + pkt->l2_len;
+	inner_ipv4_hdr = (struct ipv4_hdr *)(rte_pktmbuf_mtod(pkt, char *) +
+			hdr_offset);
+	/*
+	 * Don't process the packet whose DF bit of the inner IPv4
+	 * header isn't set.
+	 */
+	if (unlikely((inner_ipv4_hdr->fragment_offset & rte_cpu_to_be_16(
+						IPV4_HDR_DF_MASK)) == 0)) {
+		pkts_out[0] = pkt;
+		return ret;
+	}
+
+	tcp_dl = rte_be_to_cpu_16(inner_ipv4_hdr->total_length) -
+		pkt->l3_len - pkt->l4_len;
+	/* Don't process the packet without data */
+	if (unlikely(tcp_dl == 0)) {
+		pkts_out[0] = pkt;
+		return ret;
+	}
+
+	hdr_offset += pkt->l3_len + pkt->l4_len;
+	pyld_unit_size = gso_size - hdr_offset - ETHER_CRC_LEN;
+
+	/* Segment the payload */
+	ret = gso_do_segment(pkt, hdr_offset, pyld_unit_size, direct_pool,
+			indirect_pool, pkts_out, nb_pkts_out);
+	if (ret > 1)
+		gso_update_pkt_headers(pkt, ipid_delta, pkts_out, ret);
+
+	return ret;
+}
diff --git a/lib/librte_gso/gso_tunnel_tcp4.h b/lib/librte_gso/gso_tunnel_tcp4.h
new file mode 100644
index 0000000..0280b38
--- /dev/null
+++ b/lib/librte_gso/gso_tunnel_tcp4.h
@@ -0,0 +1,76 @@ 
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright(c) 2017 Intel Corporation. All rights reserved.
+ *   All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of Intel Corporation nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef _GSO_TUNNEL_TCP4_H_
+#define _GSO_TUNNEL_TCP4_H_
+
+#include <stdint.h>
+#include <rte_mbuf.h>
+
+/**
+ * Segment an tunneling packet with inner TCP/IPv4 headers. This function
+ * assumes the input packet has correct checksums and doesn't update
+ * checksums for GSO segment. Furthermore, it doesn't process IP fragment
+ * packets.
+ *
+ * @param pkt
+ *  The packet mbuf to segment.
+ * @param gso_size
+ *  The max length of a GSO segment, measured in bytes.
+ * @param ipid_delta
+ *  The increasing uint of IP ids.
+ * @param direct_pool
+ *  MBUF pool used for allocating direct buffers for output segments.
+ * @param indirect_pool
+ *  MBUF pool used for allocating indirect buffers for output segments.
+ * @param pkts_out
+ *  Pointer array used to store the MBUF addresses of output GSO
+ *  segments, when gso_tunnel_tcp4_segment() successes. If the memory
+ *  space in pkts_out is insufficient, gso_tcp4_segment() fails and
+ *  returns -EINVAL.
+ * @param nb_pkts_out
+ *  The max number of items that 'pkts_out' can keep.
+ *
+ * @return
+ *   - The number of GSO segments filled in pkts_out on success.
+ *   - Return -ENOMEM if run out of memory in MBUF pools.
+ *   - Return -EINVAL for invalid parameters.
+ */
+int gso_tunnel_tcp4_segment(struct rte_mbuf *pkt,
+		uint16_t gso_size,
+		uint8_t ipid_delta,
+		struct rte_mempool *direct_pool,
+		struct rte_mempool *indirect_pool,
+		struct rte_mbuf **pkts_out,
+		uint16_t nb_pkts_out);
+#endif
diff --git a/lib/librte_gso/rte_gso.c b/lib/librte_gso/rte_gso.c
index 95f6ea6..226c75a 100644
--- a/lib/librte_gso/rte_gso.c
+++ b/lib/librte_gso/rte_gso.c
@@ -38,6 +38,7 @@ 
 #include "rte_gso.h"
 #include "gso_common.h"
 #include "gso_tcp4.h"
+#include "gso_tunnel_tcp4.h"
 
 int
 rte_gso_segment(struct rte_mbuf *pkt,
@@ -66,7 +67,11 @@  rte_gso_segment(struct rte_mbuf *pkt,
 	gso_size = gso_ctx.gso_size;
 	ipid_delta = gso_ctx.ipid_flag == RTE_GSO_IPID_INCREASE;
 
-	if (is_ipv4_tcp(pkt->packet_type)) {
+	if (is_ipv4_vxlan_ipv4_tcp(pkt->packet_type)) {
+		ret = gso_tunnel_tcp4_segment(pkt, gso_size, ipid_delta,
+				direct_pool, indirect_pool,
+				pkts_out, nb_pkts_out);
+	} else if (is_ipv4_tcp(pkt->packet_type)) {
 		ret = gso_tcp4_segment(pkt, gso_size, ipid_delta,
 				direct_pool, indirect_pool,
 				pkts_out, nb_pkts_out);