[dpdk-dev] [PATCH v6 3/6] gso: add VxLAN GSO support

Ananyev, Konstantin konstantin.ananyev at intel.com
Wed Oct 4 16:12:20 CEST 2017



> -----Original Message-----
> From: Kavanagh, Mark B
> Sent: Monday, October 2, 2017 5:46 PM
> To: dev at dpdk.org
> Cc: Hu, Jiayu <jiayu.hu at intel.com>; Tan, Jianfeng <jianfeng.tan at intel.com>; Ananyev, Konstantin <konstantin.ananyev at intel.com>; Yigit,
> Ferruh <ferruh.yigit at intel.com>; thomas at monjalon.net; Kavanagh, Mark B <mark.b.kavanagh at intel.com>
> Subject: [PATCH v6 3/6] gso: add VxLAN GSO support
> 
> This patch adds a framework that allows GSO on tunneled packets.
> Furthermore, it leverages that framework to provide GSO support for
> VxLAN-encapsulated packets.
> 
> Supported VxLAN packets must have an outer IPv4 header (prepended by an
> optional VLAN tag), and contain an inner TCP/IPv4 packet (with an optional
> inner VLAN tag).
> 
> VxLAN GSO doesn't check if input packets have correct checksums and
> doesn't update checksums for output packets. Additionally, it doesn't
> process IP fragmented packets.
> 
> As with TCP/IPv4 GSO, VxLAN GSO uses a two-segment MBUF to organize each
> output packet, which mandates support for multi-segment mbufs in the TX
> functions of the NIC driver. Also, if a packet is GSOed, VxLAN GSO
> reduces its MBUF refcnt by 1. As a result, when all of its GSO'd segments
> are freed, the packet is freed automatically.
> 
> Signed-off-by: Mark Kavanagh <mark.b.kavanagh at intel.com>
> Signed-off-by: Jiayu Hu <jiayu.hu at intel.com>
> ---
>  doc/guides/rel_notes/release_17_11.rst |   3 +
>  lib/librte_gso/Makefile                |   1 +
>  lib/librte_gso/gso_common.h            |  25 +++++++
>  lib/librte_gso/gso_tunnel_tcp4.c       | 123 +++++++++++++++++++++++++++++++++
>  lib/librte_gso/gso_tunnel_tcp4.h       |  75 ++++++++++++++++++++
>  lib/librte_gso/rte_gso.c               |  13 +++-
>  6 files changed, 237 insertions(+), 3 deletions(-)
>  create mode 100644 lib/librte_gso/gso_tunnel_tcp4.c
>  create mode 100644 lib/librte_gso/gso_tunnel_tcp4.h
> 
> diff --git a/doc/guides/rel_notes/release_17_11.rst b/doc/guides/rel_notes/release_17_11.rst
> index c414f73..25b8a78 100644
> --- a/doc/guides/rel_notes/release_17_11.rst
> +++ b/doc/guides/rel_notes/release_17_11.rst
> @@ -48,6 +48,9 @@ New Features
>    ones (e.g. MTU is 1500B). Supported packet types are:
> 
>    * TCP/IPv4 packets, which may include a single VLAN tag.
> +  * VxLAN packets, which must have an outer IPv4 header (prepended by
> +    an optional VLAN tag), and contain an inner TCP/IPv4 packet (with
> +    an optional VLAN tag).
> 
>    The GSO library doesn't check if the input packets have correct
>    checksums, and doesn't update checksums for output packets.
> diff --git a/lib/librte_gso/Makefile b/lib/librte_gso/Makefile
> index 2be64d1..e6d41df 100644
> --- a/lib/librte_gso/Makefile
> +++ b/lib/librte_gso/Makefile
> @@ -44,6 +44,7 @@ LIBABIVER := 1
>  SRCS-$(CONFIG_RTE_LIBRTE_GSO) += rte_gso.c
>  SRCS-$(CONFIG_RTE_LIBRTE_GSO) += gso_common.c
>  SRCS-$(CONFIG_RTE_LIBRTE_GSO) += gso_tcp4.c
> +SRCS-$(CONFIG_RTE_LIBRTE_GSO) += gso_tunnel_tcp4.c
> 
>  # install this header file
>  SYMLINK-$(CONFIG_RTE_LIBRTE_GSO)-include += rte_gso.h
> diff --git a/lib/librte_gso/gso_common.h b/lib/librte_gso/gso_common.h
> index 8d9b94e..c051295 100644
> --- a/lib/librte_gso/gso_common.h
> +++ b/lib/librte_gso/gso_common.h
> @@ -39,6 +39,7 @@
>  #include <rte_mbuf.h>
>  #include <rte_ip.h>
>  #include <rte_tcp.h>
> +#include <rte_udp.h>
> 
>  #define IS_FRAGMENTED(frag_off) (((frag_off) & IPV4_HDR_OFFSET_MASK) != 0 \
>  		|| ((frag_off) & IPV4_HDR_MF_FLAG) == IPV4_HDR_MF_FLAG)
> @@ -49,6 +50,30 @@
>  #define IS_IPV4_TCP(flag) (((flag) & (PKT_TX_TCP_SEG | PKT_TX_IPV4)) == \
>  		(PKT_TX_TCP_SEG | PKT_TX_IPV4))
> 
> +#define IS_IPV4_VXLAN_TCP4(flag) (((flag) & (PKT_TX_TCP_SEG | PKT_TX_IPV4 | \
> +				PKT_TX_OUTER_IPV4 | PKT_TX_TUNNEL_VXLAN)) == \
> +		(PKT_TX_TCP_SEG | PKT_TX_IPV4 | PKT_TX_OUTER_IPV4 | \
> +		 PKT_TX_TUNNEL_VXLAN))
> +
> +/**
> + * Internal function which updates the UDP header of a packet, following
> + * segmentation. This is required to update the header's datagram length field.
> + *
> + * @param pkt
> + *  The packet containing the UDP header.
> + * @param udp_offset
> + *  The offset of the UDP header from the start of the packet.
> + */
> +static inline void
> +update_udp_header(struct rte_mbuf *pkt, uint16_t udp_offset)
> +{
> +	struct udp_hdr *udp_hdr;
> +
> +	udp_hdr = (struct udp_hdr *)(rte_pktmbuf_mtod(pkt, char *) +
> +			udp_offset);
> +	udp_hdr->dgram_len = rte_cpu_to_be_16(pkt->pkt_len - udp_offset);
> +}
> +
>  /**
>   * Internal function which updates the TCP header of a packet, following
>   * segmentation. This is required to update the header's 'sent' sequence
> diff --git a/lib/librte_gso/gso_tunnel_tcp4.c b/lib/librte_gso/gso_tunnel_tcp4.c
> new file mode 100644
> index 0000000..34bbbd7
> --- /dev/null
> +++ b/lib/librte_gso/gso_tunnel_tcp4.c
> @@ -0,0 +1,123 @@
> +/*-
> + *   BSD LICENSE
> + *
> + *   Copyright(c) 2017 Intel Corporation. All rights reserved.
> + *   All rights reserved.
> + *
> + *   Redistribution and use in source and binary forms, with or without
> + *   modification, are permitted provided that the following conditions
> + *   are met:
> + *
> + *     * Redistributions of source code must retain the above copyright
> + *       notice, this list of conditions and the following disclaimer.
> + *     * Redistributions in binary form must reproduce the above copyright
> + *       notice, this list of conditions and the following disclaimer in
> + *       the documentation and/or other materials provided with the
> + *       distribution.
> + *     * Neither the name of Intel Corporation nor the names of its
> + *       contributors may be used to endorse or promote products derived
> + *       from this software without specific prior written permission.
> + *
> + *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
> + *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
> + *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
> + *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
> + *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
> + *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
> + *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
> + *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
> + *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
> + *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
> + *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
> + */
> +
> +#include "gso_common.h"
> +#include "gso_tunnel_tcp4.h"
> +
> +static void
> +update_tunnel_ipv4_tcp_headers(struct rte_mbuf *pkt, uint8_t ipid_delta,
> +		struct rte_mbuf **segs, uint16_t nb_segs)
> +{
> +	struct ipv4_hdr *ipv4_hdr;
> +	struct tcp_hdr *tcp_hdr;
> +	uint32_t sent_seq;
> +	uint16_t outer_id, inner_id, tail_idx, i;
> +	uint16_t outer_ipv4_offset, inner_ipv4_offset, udp_offset, tcp_offset;
> +
> +	outer_ipv4_offset = pkt->outer_l2_len;
> +	udp_offset = outer_ipv4_offset + pkt->outer_l3_len;
> +	inner_ipv4_offset = udp_offset + pkt->l2_len;
> +	tcp_offset = inner_ipv4_offset + pkt->l3_len;
> +
> +	/* Outer IPv4 header. */
> +	ipv4_hdr = (struct ipv4_hdr *)(rte_pktmbuf_mtod(pkt, char *) +
> +			outer_ipv4_offset);
> +	outer_id = rte_be_to_cpu_16(ipv4_hdr->packet_id);
> +
> +	/* Inner IPv4 header. */
> +	ipv4_hdr = (struct ipv4_hdr *)(rte_pktmbuf_mtod(pkt, char *) +
> +			inner_ipv4_offset);
> +	inner_id = rte_be_to_cpu_16(ipv4_hdr->packet_id);
> +
> +	tcp_hdr = (struct tcp_hdr *)((char *)ipv4_hdr + pkt->l3_len);
> +	sent_seq = rte_be_to_cpu_32(tcp_hdr->sent_seq);
> +	tail_idx = nb_segs - 1;
> +
> +	for (i = 0; i < nb_segs; i++) {
> +		update_ipv4_header(segs[i], outer_ipv4_offset, outer_id);
> +		update_udp_header(segs[i], udp_offset);
> +		update_ipv4_header(segs[i], inner_ipv4_offset, inner_id);
> +		update_tcp_header(segs[i], tcp_offset, sent_seq, i < tail_idx);
> +		outer_id++;
> +		inner_id += ipid_delta;
> +		sent_seq += (segs[i]->pkt_len - segs[i]->data_len);
> +	}
> +}
> +
> +int
> +gso_tunnel_tcp4_segment(struct rte_mbuf *pkt,
> +		uint16_t gso_size,
> +		uint8_t ipid_delta,
> +		struct rte_mempool *direct_pool,
> +		struct rte_mempool *indirect_pool,
> +		struct rte_mbuf **pkts_out,
> +		uint16_t nb_pkts_out)
> +{
> +	struct ipv4_hdr *inner_ipv4_hdr;
> +	uint16_t pyld_unit_size, hdr_offset;
> +	uint16_t tcp_dl, frag_off;
> +	int ret = 1;
> +
> +	hdr_offset = pkt->outer_l2_len + pkt->outer_l3_len + pkt->l2_len;
> +	inner_ipv4_hdr = (struct ipv4_hdr *)(rte_pktmbuf_mtod(pkt, char *) +
> +			hdr_offset);
> +	/*
> +	 * Don't process the packet whose MF bit or offset in the inner
> +	 * IPv4 header are non-zero.
> +	 */
> +	frag_off = rte_be_to_cpu_16(inner_ipv4_hdr->fragment_offset);
> +	if (unlikely(IS_FRAGMENTED(frag_off))) {
> +		pkts_out[0] = pkt;
> +		return 1;
> +	}
> +
> +	/* Don't process the packet without data */
> +	tcp_dl = pkt->pkt_len - pkt->l2_len - pkt->l3_len - pkt->l4_len;
> +	if (unlikely(tcp_dl == 0)) {

You probably need to take into account outer_len* too..
Probably better to move that check after final hdr_offset calculations:

...
hdr_offset += pkt->l3_len + pkt->l4_len;
if (hdr_offset >= pkt->pkt_len) {..;' return 1;}
...

> +		pkts_out[0] = pkt;
> +		return 1;
> +	}
> +
> +	hdr_offset += pkt->l3_len + pkt->l4_len;
> +	pyld_unit_size = gso_size - hdr_offset;
> +
> +	/* Segment the payload */
> +	ret = gso_do_segment(pkt, hdr_offset, pyld_unit_size, direct_pool,
> +			indirect_pool, pkts_out, nb_pkts_out);
> +	if (ret <= 1)
> +		return ret;
> +
> +	update_tunnel_ipv4_tcp_headers(pkt, ipid_delta, pkts_out, ret);
> +
> +	return ret;
> +}
> diff --git a/lib/librte_gso/gso_tunnel_tcp4.h b/lib/librte_gso/gso_tunnel_tcp4.h
> new file mode 100644
> index 0000000..3c67f0c
> --- /dev/null
> +++ b/lib/librte_gso/gso_tunnel_tcp4.h
> @@ -0,0 +1,75 @@
> +/*-
> + *   BSD LICENSE
> + *
> + *   Copyright(c) 2017 Intel Corporation. All rights reserved.
> + *   All rights reserved.
> + *
> + *   Redistribution and use in source and binary forms, with or without
> + *   modification, are permitted provided that the following conditions
> + *   are met:
> + *
> + *     * Redistributions of source code must retain the above copyright
> + *       notice, this list of conditions and the following disclaimer.
> + *     * Redistributions in binary form must reproduce the above copyright
> + *       notice, this list of conditions and the following disclaimer in
> + *       the documentation and/or other materials provided with the
> + *       distribution.
> + *     * Neither the name of Intel Corporation nor the names of its
> + *       contributors may be used to endorse or promote products derived
> + *       from this software without specific prior written permission.
> + *
> + *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
> + *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
> + *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
> + *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
> + *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
> + *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
> + *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
> + *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
> + *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
> + *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
> + *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
> + */
> +
> +#ifndef _GSO_TUNNEL_TCP4_H_
> +#define _GSO_TUNNEL_TCP4_H_
> +
> +#include <stdint.h>
> +#include <rte_mbuf.h>
> +
> +/**
> + * Segment a tunneling packet with inner TCP/IPv4 headers. This function
> + * doesn't check if the input packet has correct checksums, and doesn't
> + * update checksums for output GSO segments. Furthermore, it doesn't
> + * process IP fragment packets.
> + *
> + * @param pkt
> + *  The packet mbuf to segment.
> + * @param gso_size
> + *  The max length of a GSO segment, measured in bytes.
> + * @param ipid_delta
> + *  The increasing unit of IP ids.
> + * @param direct_pool
> + *  MBUF pool used for allocating direct buffers for output segments.
> + * @param indirect_pool
> + *  MBUF pool used for allocating indirect buffers for output segments.
> + * @param pkts_out
> + *  Pointer array used to store the MBUF addresses of output GSO
> + *  segments, when it succeeds. If the memory space in pkts_out is
> + *  insufficient, it fails and returns -EINVAL.
> + * @param nb_pkts_out
> + *  The max number of items that 'pkts_out' can keep.
> + *
> + * @return
> + *   - The number of GSO segments filled in pkts_out on success.
> + *   - Return -ENOMEM if run out of memory in MBUF pools.
> + *   - Return -EINVAL for invalid parameters.
> + */
> +int gso_tunnel_tcp4_segment(struct rte_mbuf *pkt,
> +		uint16_t gso_size,
> +		uint8_t ipid_delta,
> +		struct rte_mempool *direct_pool,
> +		struct rte_mempool *indirect_pool,
> +		struct rte_mbuf **pkts_out,
> +		uint16_t nb_pkts_out);
> +#endif
> diff --git a/lib/librte_gso/rte_gso.c b/lib/librte_gso/rte_gso.c
> index a4fce50..6095689 100644
> --- a/lib/librte_gso/rte_gso.c
> +++ b/lib/librte_gso/rte_gso.c
> @@ -39,6 +39,7 @@
>  #include "rte_gso.h"
>  #include "gso_common.h"
>  #include "gso_tcp4.h"
> +#include "gso_tunnel_tcp4.h"
> 
>  int
>  rte_gso_segment(struct rte_mbuf *pkt,
> @@ -58,8 +59,9 @@
>  		return -EINVAL;
> 
>  	if ((gso_ctx->gso_size >= pkt->pkt_len) || (gso_ctx->gso_types &
> -				DEV_TX_OFFLOAD_TCP_TSO) !=
> -			gso_ctx->gso_types) {
> +				(DEV_TX_OFFLOAD_TCP_TSO |
> +				 DEV_TX_OFFLOAD_VXLAN_TNL_TSO)) !=
> +				gso_ctx->gso_types) {
>  		pkt->ol_flags &= (~PKT_TX_TCP_SEG);
>  		pkts_out[0] = pkt;
>  		return 1;
> @@ -71,7 +73,12 @@
>  	ipid_delta = (gso_ctx->ipid_flag != RTE_GSO_IPID_FIXED);
>  	ol_flags = pkt->ol_flags;
> 
> -	if (IS_IPV4_TCP(pkt->ol_flags)) {
> +	if (IS_IPV4_VXLAN_TCP4(pkt->ol_flags)) {
> +		pkt->ol_flags &= (~PKT_TX_TCP_SEG);
> +		ret = gso_tunnel_tcp4_segment(pkt, gso_size, ipid_delta,
> +				direct_pool, indirect_pool,
> +				pkts_out, nb_pkts_out);
> +	} else if (IS_IPV4_TCP(pkt->ol_flags)) {

Hmm it doesn't look quite right.
Imagine user doesn't want libgso to segment plain TCP packets with that ctx, just VXLAN+TCP.

I think you need to merge that if and one above to something like that:

If (IS_IPV4_VXLAN_TCP4(pkt->ol_flags))
  &&  (gso_ctx->gso_types & (DEV_TX_OFFLOAD_VXLAN_TNL_TSO | DEV_TX_OFFLOAD_TCP_TSO)) == 
   (DEV_TX_OFFLOAD_VXLAN_TNL_TSO | DEV_TX_OFFLOAD_TCP_TSO)) {
   ...
} else if (IS_IPV4_TCP(pkt->ol_flags) && (gso_ctx->gso_types & DEV_TX_OFFLOAD_TCP_TSO)) {
   ...
} else {
     /* unsupported packet, skip */
}

Konstantin

>  		pkt->ol_flags &= (~PKT_TX_TCP_SEG);
>  		ret = gso_tcp4_segment(pkt, gso_size, ipid_delta,
>  				direct_pool, indirect_pool,
> --
> 1.9.3



More information about the dev mailing list