[dpdk-dev] [PATCH 2/5] gso/lib: add TCP/IPv4 GSO support

Ananyev, Konstantin konstantin.ananyev at intel.com
Wed Aug 30 03:38:33 CEST 2017



> -----Original Message-----
> From: Hu, Jiayu
> Sent: Thursday, August 24, 2017 3:16 PM
> To: dev at dpdk.org
> Cc: Kavanagh, Mark B <mark.b.kavanagh at intel.com>; Ananyev, Konstantin <konstantin.ananyev at intel.com>; Tan, Jianfeng
> <jianfeng.tan at intel.com>; Hu, Jiayu <jiayu.hu at intel.com>
> Subject: [PATCH 2/5] gso/lib: add TCP/IPv4 GSO support
> 
> This patch adds GSO support for TCP/IPv4 packets. Supported packets
> may include a single VLAN tag. TCP/IPv4 GSO assumes that all input
> packets have correct checksums, and doesn't update checksums for output
> packets (the responsibility for this lies with the application).
> Additionally, TCP/IPv4 GSO doesn't process IP fragmented packets.
> 
> TCP/IPv4 GSO uses two chained MBUFs, one direct MBUF and one indrect
> MBUF, to organize an output packet. Note that we refer to these two
> chained MBUFs as a two-segment MBUF. The direct MBUF stores the packet
> header, while the indirect mbuf simply points to a location within the
> original packet's payload. Consequently, use of the GSO library requires
> multi-segment MBUF support in the TX functions of the NIC driver.
> 
> If a packet is GSOed, TCP/IPv4 GSO reduces its MBUF refcnt by 1. As a
> result, when all of its GSOed segments are freed, the packet is freed
> automatically.
> 
> Signed-off-by: Jiayu Hu <jiayu.hu at intel.com>
> Signed-off-by: Mark Kavanagh <mark.b.kavanagh at intel.com>
> ---
>  lib/librte_eal/common/include/rte_log.h |   1 +
>  lib/librte_gso/Makefile                 |   2 +
>  lib/librte_gso/gso_common.c             | 270 ++++++++++++++++++++++++++++++++
>  lib/librte_gso/gso_common.h             | 120 ++++++++++++++
>  lib/librte_gso/gso_tcp.c                |  82 ++++++++++
>  lib/librte_gso/gso_tcp.h                |  73 +++++++++
>  lib/librte_gso/rte_gso.c                |  44 +++++-
>  lib/librte_gso/rte_gso.h                |   3 +
>  8 files changed, 593 insertions(+), 2 deletions(-)
>  create mode 100644 lib/librte_gso/gso_common.c
>  create mode 100644 lib/librte_gso/gso_common.h
>  create mode 100644 lib/librte_gso/gso_tcp.c
>  create mode 100644 lib/librte_gso/gso_tcp.h
> 
> diff --git a/lib/librte_eal/common/include/rte_log.h b/lib/librte_eal/common/include/rte_log.h
> index ec8dba7..2fa1199 100644
> --- a/lib/librte_eal/common/include/rte_log.h
> +++ b/lib/librte_eal/common/include/rte_log.h
> @@ -87,6 +87,7 @@ extern struct rte_logs rte_logs;
>  #define RTE_LOGTYPE_CRYPTODEV 17 /**< Log related to cryptodev. */
>  #define RTE_LOGTYPE_EFD       18 /**< Log related to EFD. */
>  #define RTE_LOGTYPE_EVENTDEV  19 /**< Log related to eventdev. */
> +#define RTE_LOGTYPE_GSO       20 /**< Log related to GSO. */
> 
>  /* these log types can be used in an application */
>  #define RTE_LOGTYPE_USER1     24 /**< User-defined log type 1. */
> diff --git a/lib/librte_gso/Makefile b/lib/librte_gso/Makefile
> index aeaacbc..0f8e38f 100644
> --- a/lib/librte_gso/Makefile
> +++ b/lib/librte_gso/Makefile
> @@ -42,6 +42,8 @@ LIBABIVER := 1
> 
>  #source files
>  SRCS-$(CONFIG_RTE_LIBRTE_GSO) += rte_gso.c
> +SRCS-$(CONFIG_RTE_LIBRTE_GSO) += gso_common.c
> +SRCS-$(CONFIG_RTE_LIBRTE_GSO) += gso_tcp.c
> 
>  # install this header file
>  SYMLINK-$(CONFIG_RTE_LIBRTE_GSO)-include += rte_gso.h
> diff --git a/lib/librte_gso/gso_common.c b/lib/librte_gso/gso_common.c
> new file mode 100644
> index 0000000..2b54fbd
> --- /dev/null
> +++ b/lib/librte_gso/gso_common.c
> @@ -0,0 +1,270 @@
> +/*-
> + *   BSD LICENSE
> + *
> + *   Copyright(c) 2017 Intel Corporation. All rights reserved.
> + *   All rights reserved.
> + *
> + *   Redistribution and use in source and binary forms, with or without
> + *   modification, are permitted provided that the following conditions
> + *   are met:
> + *
> + *     * Redistributions of source code must retain the above copyright
> + *       notice, this list of conditions and the following disclaimer.
> + *     * Redistributions in binary form must reproduce the above copyright
> + *       notice, this list of conditions and the following disclaimer in
> + *       the documentation and/or other materials provided with the
> + *       distribution.
> + *     * Neither the name of Intel Corporation nor the names of its
> + *       contributors may be used to endorse or promote products derived
> + *       from this software without specific prior written permission.
> + *
> + *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
> + *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
> + *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
> + *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
> + *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
> + *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
> + *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
> + *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
> + *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
> + *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
> + *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
> + */
> +
> +#include <stdbool.h>
> +#include <string.h>
> +
> +#include <rte_malloc.h>
> +
> +#include <rte_ether.h>
> +#include <rte_ip.h>
> +#include <rte_tcp.h>
> +
> +#include "gso_common.h"
> +
> +static inline void
> +hdr_segment_init(struct rte_mbuf *hdr_segment, struct rte_mbuf *pkt,
> +		uint16_t pkt_hdr_offset)
> +{
> +	/* copy mbuf metadata */
> +	hdr_segment->nb_segs = 1;
> +	hdr_segment->port = pkt->port;
> +	hdr_segment->ol_flags = pkt->ol_flags;
> +	hdr_segment->packet_type = pkt->packet_type;
> +	hdr_segment->pkt_len = pkt_hdr_offset;
> +	hdr_segment->data_len = pkt_hdr_offset;
> +	hdr_segment->tx_offload = pkt->tx_offload;
> +	/* copy packet header */
> +	rte_memcpy(rte_pktmbuf_mtod(hdr_segment, char *),
> +			rte_pktmbuf_mtod(pkt, char *),
> +			pkt_hdr_offset);
> +}
> +
> +static inline void
> +free_gso_segment(struct rte_mbuf **pkts, uint16_t nb_pkts)
> +{
> +	uint16_t i;
> +
> +	for (i = 0; i < nb_pkts; i++) {
> +		rte_pktmbuf_detach(pkts[i]->next);

I don't think you need to call detach() here explicitly.
Just rte_pktmbuf_free(pkts[i]) should do I think.

> +		rte_pktmbuf_free(pkts[i]);
> +		pkts[i] = NULL;
> +	}
> +}
> +
> +int
> +gso_do_segment(struct rte_mbuf *pkt,
> +		uint16_t pkt_hdr_offset,
> +		uint16_t pyld_unit_size,
> +		struct rte_mempool *direct_pool,
> +		struct rte_mempool *indirect_pool,
> +		struct rte_mbuf **pkts_out,
> +		uint16_t nb_pkts_out)
> +{
> +	struct rte_mbuf *pkt_in;
> +	struct rte_mbuf *hdr_segment, *pyld_segment;
> +	uint32_t pkt_in_pyld_off;
> +	uint16_t pkt_in_segment_len, pkt_out_segment_len;
> +	uint16_t nb_segs;
> +	bool pkt_in_segment_processed;
> +
> +	pkt_in_pyld_off = pkt->data_off + pkt_hdr_offset;
> +	pkt_in = pkt;
> +	nb_segs = 0;
> +
> +	while (pkt_in) {
> +		pkt_in_segment_processed = false;
> +		pkt_in_segment_len = pkt_in->data_off + pkt_in->data_len;
> +
> +		while (!pkt_in_segment_processed) {
> +			if (unlikely(nb_segs >= nb_pkts_out)) {
> +				free_gso_segment(pkts_out, nb_segs);
> +				return -EINVAL;
> +			}
> +
> +			/* allocate direct mbuf */
> +			hdr_segment = rte_pktmbuf_alloc(direct_pool);
> +			if (unlikely(hdr_segment == NULL)) {
> +				free_gso_segment(pkts_out, nb_segs);
> +				return -ENOMEM;
> +			}
> +
> +			/* allocate indirect mbuf */
> +			pyld_segment = rte_pktmbuf_alloc(indirect_pool);
> +			if (unlikely(pyld_segment == NULL)) {
> +				rte_pktmbuf_free(hdr_segment);
> +				free_gso_segment(pkts_out, nb_segs);
> +				return -ENOMEM;
> +			}

So, if I understand correctly each new packet would always contain just one data segment?
Why several data segments couldn't be chained together (if sum of their data_len <= mss)?
In a same way as done here:
http://dpdk.org/browse/dpdk/tree/lib/librte_ip_frag/rte_ipv4_fragmentation.c#n93
or here:
https://gerrit.fd.io/r/gitweb?p=tldk.git;a=blob;f=lib/libtle_l4p/tcp_tx_seg.h;h=a8d2425597a7ad6f598aa4bb7fcd7f1da74305f0;hb=HEAD#l23
?

> +
> +			/* copy packet header */
> +			hdr_segment_init(hdr_segment, pkt, pkt_hdr_offset);
> +
> +			/* attach payload mbuf to current packet segment */
> +			rte_pktmbuf_attach(pyld_segment, pkt_in);
> +
> +			hdr_segment->next = pyld_segment;
> +			pkts_out[nb_segs++] = hdr_segment;
> +
> +			/* calculate payload length */
> +			pkt_out_segment_len = pyld_unit_size;
> +			if (pkt_in_pyld_off + pkt_out_segment_len >
> +					pkt_in_segment_len) {
> +				pkt_out_segment_len = pkt_in_segment_len -
> +					pkt_in_pyld_off;
> +			}
> +
> +			/* update payload segment */
> +			pyld_segment->data_off = pkt_in_pyld_off;
> +			pyld_segment->data_len = pkt_out_segment_len;
> +
> +			/* update header segment */
> +			hdr_segment->pkt_len += pyld_segment->data_len;
> +			hdr_segment->nb_segs++;
> +
> +			/* update pkt_in_pyld_off */
> +			pkt_in_pyld_off += pkt_out_segment_len;
> +			if (pkt_in_pyld_off == pkt_in_segment_len)
> +				pkt_in_segment_processed = true;
> +		}
> +
> +		/* 'pkt_in' may contain numerous segments */
> +		pkt_in = pkt_in->next;
> +		if (pkt_in != NULL)
> +			pkt_in_pyld_off = pkt_in->data_off;
> +	}
> +	return nb_segs;
> +}
> +
> +static inline void
> +parse_ipv4(struct ipv4_hdr *ipv4_hdr, struct rte_mbuf *pkt)
> +{
> +	struct tcp_hdr *tcp_hdr;
> +
> +	switch (ipv4_hdr->next_proto_id) {
> +	case IPPROTO_TCP:
> +		pkt->packet_type |= RTE_PTYPE_L4_TCP;
> +		pkt->l3_len = IPv4_HDR_LEN(ipv4_hdr);
> +		tcp_hdr = (struct tcp_hdr *)((char *)ipv4_hdr + pkt->l3_len);
> +		pkt->l4_len = TCP_HDR_LEN(tcp_hdr);
> +		break;
> +	}
> +}
> +
> +static inline void
> +parse_ethernet(struct ether_hdr *eth_hdr, struct rte_mbuf *pkt)
> +{
> +	struct ipv4_hdr *ipv4_hdr;
> +	struct vlan_hdr *vlan_hdr;
> +	uint16_t ethertype;
> +
> +	ethertype = rte_be_to_cpu_16(eth_hdr->ether_type);
> +	if (ethertype == ETHER_TYPE_VLAN) {
> +		vlan_hdr = (struct vlan_hdr *)(eth_hdr + 1);
> +		pkt->l2_len = sizeof(struct vlan_hdr);
> +		pkt->packet_type |= RTE_PTYPE_L2_ETHER_VLAN;
> +		ethertype = rte_be_to_cpu_16(vlan_hdr->eth_proto);
> +	}
> +
> +	switch (ethertype) {
> +	case ETHER_TYPE_IPv4:
> +		if (IS_VLAN_PKT(pkt)) {
> +			pkt->packet_type |= RTE_PTYPE_L3_IPV4;
> +		} else {
> +			pkt->packet_type |= RTE_PTYPE_L2_ETHER;
> +			pkt->packet_type |= RTE_PTYPE_L3_IPV4;
> +		}
> +		pkt->l2_len += sizeof(struct ether_hdr);
> +		ipv4_hdr = (struct ipv4_hdr *) ((char *)eth_hdr +
> +				pkt->l2_len);
> +		parse_ipv4(ipv4_hdr, pkt);
> +		break;
> +	}
> +}
> +
> +void
> +gso_parse_packet(struct rte_mbuf *pkt)

There is a function rte_net_get_ptype() that supposed to provide similar functionality.
So we probably don't need to create a new SW parse function here, instead would be better
to reuse (and update if needed) an existing one.
Again user already might have l2/l3/l4.../_len and packet_type setuped.
So better to keep SW packet parsing out of scope of that library. 

> +{
> +	struct ether_hdr *eth_hdr = rte_pktmbuf_mtod(pkt, struct ether_hdr *);
> +
> +	pkt->packet_type = pkt->tx_offload = 0;
> +	parse_ethernet(eth_hdr, pkt);
> +}
> +
> +static inline void
> +update_ipv4_header(char *base, uint16_t offset, uint16_t length, uint16_t id)
> +{
> +	struct ipv4_hdr *ipv4_hdr = (struct ipv4_hdr *)(base + offset);
> +
> +	ipv4_hdr->total_length = rte_cpu_to_be_16(length - offset);
> +	ipv4_hdr->packet_id = rte_cpu_to_be_16(id);
> +}
> +
> +static inline void
> +update_tcp_header(char *base, uint16_t offset, uint32_t sent_seq,
> +	uint8_t non_tail)
> +{
> +	struct tcp_hdr *tcp_hdr = (struct tcp_hdr *)(base + offset);
> +
> +	tcp_hdr->sent_seq = rte_cpu_to_be_32(sent_seq);
> +	/* clean FIN and PSH for non-tail segments */
> +	if (non_tail)
> +		tcp_hdr->tcp_flags &= (~(TCP_HDR_PSH_MASK | TCP_HDR_FIN_MASK));
> +}
> +
> +void
> +gso_update_pkt_headers(struct rte_mbuf *pkt, uint16_t nb_segments,
> +		struct rte_mbuf **out_segments)
> +{
> +	struct ipv4_hdr *ipv4_hdr;
> +	struct tcp_hdr *tcp_hdr;
> +	struct rte_mbuf *seg;
> +	uint32_t sent_seq;
> +	uint16_t offset, i;
> +	uint16_t tail_seg_idx = nb_segments - 1, id;
> +
> +	switch (pkt->packet_type) {
> +	case ETHER_VLAN_IPv4_TCP_PKT:
> +	case ETHER_IPv4_TCP_PKT:

Might be worth to put code below in a separate function:
update_inner_tcp_hdr(..) or so.
Then you can reuse it for tunneled cases too.

> +		ipv4_hdr = (struct ipv4_hdr *)(rte_pktmbuf_mtod(pkt, char *) 
> +				pkt->l2_len);
> +		tcp_hdr = (struct tcp_hdr *)((char *)ipv4_hdr + pkt->l3_len);
> +		id = rte_be_to_cpu_16(ipv4_hdr->packet_id);
> +		sent_seq = rte_be_to_cpu_32(tcp_hdr->sent_seq);
> +
> +		for (i = 0; i < nb_segments; i++) {
> +			seg = out_segments[i];
> +
> +			offset = seg->l2_len;
> +			update_ipv4_header(rte_pktmbuf_mtod(seg, char *),
> +					offset, seg->pkt_len, id);
> +			id++;

Who would be responsible to make sure that we wouldn't have consecutive packets with the IPV4 id?
Would be the upper layer that forms the packet or gso library or ...?

> +
> +			offset += seg->l3_len;
> +			update_tcp_header(rte_pktmbuf_mtod(seg, char *),
> +					offset, sent_seq, i < tail_seg_idx);
> +			sent_seq += seg->next->data_len;
> +		}
> +		break;
> +	}
> +}
> diff --git a/lib/librte_gso/gso_common.h b/lib/librte_gso/gso_common.h
> new file mode 100644
> index 0000000..d750041
> --- /dev/null
> +++ b/lib/librte_gso/gso_common.h
> @@ -0,0 +1,120 @@
> +/*-
> + *   BSD LICENSE
> + *
> + *   Copyright(c) 2017 Intel Corporation. All rights reserved.
> + *   All rights reserved.
> + *
> + *   Redistribution and use in source and binary forms, with or without
> + *   modification, are permitted provided that the following conditions
> + *   are met:
> + *
> + *     * Redistributions of source code must retain the above copyright
> + *       notice, this list of conditions and the following disclaimer.
> + *     * Redistributions in binary form must reproduce the above copyright
> + *       notice, this list of conditions and the following disclaimer in
> + *       the documentation and/or other materials provided with the
> + *       distribution.
> + *     * Neither the name of Intel Corporation nor the names of its
> + *       contributors may be used to endorse or promote products derived
> + *       from this software without specific prior written permission.
> + *
> + *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
> + *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
> + *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
> + *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
> + *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
> + *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
> + *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
> + *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
> + *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
> + *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
> + *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
> + */
> +
> +#ifndef _GSO_COMMON_H_
> +#define _GSO_COMMON_H_
> +
> +#include <stdint.h>
> +#include <rte_mbuf.h>
> +
> +#define IPV4_HDR_DF_SHIFT 14
> +#define IPV4_HDR_DF_MASK (1 << IPV4_HDR_DF_SHIFT)
> +#define IPv4_HDR_LEN(iph) ((iph->version_ihl & 0x0f) * 4)
> +
> +#define TCP_HDR_PSH_MASK ((uint8_t)0x08)
> +#define TCP_HDR_FIN_MASK ((uint8_t)0x01)
> +#define TCP_HDR_LEN(tcph) ((tcph->data_off & 0xf0) >> 2)
> +
> +#define ETHER_IPv4_PKT (RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4)
> +/* Supported packet types */
> +/* TCP/IPv4 packet. */
> +#define ETHER_IPv4_TCP_PKT (ETHER_IPv4_PKT | RTE_PTYPE_L4_TCP)
> +
> +/* TCP/IPv4 packet with VLAN tag. */
> +#define ETHER_VLAN_IPv4_TCP_PKT (RTE_PTYPE_L2_ETHER_VLAN | \
> +		RTE_PTYPE_L3_IPV4 | RTE_PTYPE_L4_TCP)
> +
> +#define IS_VLAN_PKT(pkt) ((pkt->packet_type & RTE_PTYPE_L2_ETHER_VLAN) == \
> +		RTE_PTYPE_L2_ETHER_VLAN)
> +
> +/**
> + * Internal function which parses a packet, setting outer_l2/l3_len and
> + * l2/l3/l4_len and packet_type.
> + *
> + * @param pkt
> + *  Packet to parse.
> + */
> +void gso_parse_packet(struct rte_mbuf *pkt);
> +
> +/**
> + * Internal function which updates relevant packet headers, following
> + * segmentation. This is required to update, for example, the IPv4
> + * 'total_length' field, to reflect the reduced length of the now-
> + * segmented packet.
> + *
> + * @param pkt
> + *  The original packet.
> + * @param nb_segments
> + *  The number of GSO segments into which pkt was split.
> + * @param out_segements
> + *  Pointer array used for storing mbuf addresses for GSO segments.
> + */
> +void gso_update_pkt_headers(struct rte_mbuf *pkt, uint16_t nb_segments,
> +		struct rte_mbuf **out_segments);
> +
> +/**
> + * Internal function which divides the input packet into small segments.
> + * Each of the newly-created segments is organized as a two-segment mbuf,
> + * where the first segment is a standard mbuf, which stores a copy of
> + * packet header, and the second is an indirect mbuf which points to a
> + * section of data in the input packet.
> + *
> + * @param pkt
> + *  Packet to segment.
> + * @param pkt_hdr_offset
> + *  Packet header offset, measured in byte.
> + * @param pyld_unit_size
> + *  The max payload length of a GSO segment.
> + * @param direct_pool
> + *  MBUF pool used for allocating direct buffers for output segments.
> + * @param indirect_pool
> + *  MBUF pool used for allocating indirect buffers for output segments.
> + * @param pkts_out
> + *  Pointer array used to keep the mbuf addresses of output segments.
> + * @param nb_pkts_out
> + *  The max number of items that pkts_out can keep.
> + *
> + * @return
> + *  - The number of segments created in the event of success.
> + *  - If no GSO is performed, return 1.
> + *  - If available memory in mempools is insufficient, return -ENOMEM.
> + *  - -EINVAL for invalid parameters
> + */
> +int gso_do_segment(struct rte_mbuf *pkt,
> +		uint16_t pkt_hdr_offset,
> +		uint16_t pyld_unit_size,
> +		struct rte_mempool *direct_pool,
> +		struct rte_mempool *indirect_pool,
> +		struct rte_mbuf **pkts_out,
> +		uint16_t nb_pkts_out);
> +#endif
> diff --git a/lib/librte_gso/gso_tcp.c b/lib/librte_gso/gso_tcp.c
> new file mode 100644
> index 0000000..9d5fc30
> --- /dev/null
> +++ b/lib/librte_gso/gso_tcp.c
> @@ -0,0 +1,82 @@
> +/*-
> + *   BSD LICENSE
> + *
> + *   Copyright(c) 2017 Intel Corporation. All rights reserved.
> + *   All rights reserved.
> + *
> + *   Redistribution and use in source and binary forms, with or without
> + *   modification, are permitted provided that the following conditions
> + *   are met:
> + *
> + *     * Redistributions of source code must retain the above copyright
> + *       notice, this list of conditions and the following disclaimer.
> + *     * Redistributions in binary form must reproduce the above copyright
> + *       notice, this list of conditions and the following disclaimer in
> + *       the documentation and/or other materials provided with the
> + *       distribution.
> + *     * Neither the name of Intel Corporation nor the names of its
> + *       contributors may be used to endorse or promote products derived
> + *       from this software without specific prior written permission.
> + *
> + *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
> + *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
> + *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
> + *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
> + *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
> + *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
> + *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
> + *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
> + *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
> + *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
> + *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
> + */
> +
> +
> +#include <rte_ether.h>
> +#include <rte_ip.h>
> +#include <rte_tcp.h>
> +
> +#include "gso_common.h"
> +#include "gso_tcp.h"
> +
> +int
> +gso_tcp4_segment(struct rte_mbuf *pkt,
> +		uint16_t gso_size,
> +		struct rte_mempool *direct_pool,
> +		struct rte_mempool *indirect_pool,
> +		struct rte_mbuf **pkts_out,
> +		uint16_t nb_pkts_out)
> +{
> +	struct ether_hdr *eth_hdr;
> +	struct ipv4_hdr *ipv4_hdr;
> +	uint16_t tcp_dl;
> +	uint16_t pyld_unit_size;
> +	uint16_t hdr_offset;
> +	int ret = 1;
> +
> +	eth_hdr = rte_pktmbuf_mtod(pkt, struct ether_hdr *);
> +	ipv4_hdr = (struct ipv4_hdr *)((char *)eth_hdr + pkt->l2_len);
> +
> +	/* don't process fragmented packet */
> +	if ((ipv4_hdr->fragment_offset &
> +				rte_cpu_to_be_16(IPV4_HDR_DF_MASK)) == 0)
> +		return ret;
> +
> +	tcp_dl = rte_be_to_cpu_16(ipv4_hdr->total_length) -
> +		pkt->l3_len - pkt->l4_len;
> +	/* don't process packet without data */
> +	if (tcp_dl == 0)
> +		return ret;
> +
> +	hdr_offset = pkt->l2_len + pkt->l3_len + pkt->l4_len;
> +	pyld_unit_size = gso_size - hdr_offset - ETHER_CRC_LEN;
> +
> +	/* segment the payload */
> +	ret = gso_do_segment(pkt, hdr_offset, pyld_unit_size, direct_pool,
> +			indirect_pool, pkts_out, nb_pkts_out);
> +
> +	if (ret > 1)
> +		gso_update_pkt_headers(pkt, ret, pkts_out);
> +
> +	return ret;
> +}
> diff --git a/lib/librte_gso/gso_tcp.h b/lib/librte_gso/gso_tcp.h
> new file mode 100644
> index 0000000..f291ccb
> --- /dev/null
> +++ b/lib/librte_gso/gso_tcp.h
> @@ -0,0 +1,73 @@
> +/*-
> + *   BSD LICENSE
> + *
> + *   Copyright(c) 2017 Intel Corporation. All rights reserved.
> + *   All rights reserved.
> + *
> + *   Redistribution and use in source and binary forms, with or without
> + *   modification, are permitted provided that the following conditions
> + *   are met:
> + *
> + *     * Redistributions of source code must retain the above copyright
> + *       notice, this list of conditions and the following disclaimer.
> + *     * Redistributions in binary form must reproduce the above copyright
> + *       notice, this list of conditions and the following disclaimer in
> + *       the documentation and/or other materials provided with the
> + *       distribution.
> + *     * Neither the name of Intel Corporation nor the names of its
> + *       contributors may be used to endorse or promote products derived
> + *       from this software without specific prior written permission.
> + *
> + *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
> + *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
> + *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
> + *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
> + *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
> + *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
> + *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
> + *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
> + *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
> + *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
> + *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
> + */
> +
> +#ifndef _GSO_TCP_H_
> +#define _GSO_TCP_H_
> +
> +#include <stdint.h>
> +#include <rte_mbuf.h>
> +
> +/**
> + * Segment an IPv4/TCP packet. This function assumes the input packet has
> + * correct checksums and doesn't update checksums for GSO segment.
> + * Furthermore, it doesn't process IP fragment packets.
> + *
> + * @param pkt
> + *  The packet mbuf to segment.
> + * @param gso_size
> + *  The max length of a GSO segment, measured in bytes.
> + * @param direct_pool
> + *  MBUF pool used for allocating direct buffers for output segments.
> + * @param indirect_pool
> + *  MBUF pool used for allocating indirect buffers for output segments.
> + * @param pkts_out
> + *  Pointer array, which is used to store mbuf addresses of GSO segments.
> + *  Caller should guarantee that 'pkts_out' is sufficiently large to store
> + *  all GSO segments.
> + * @param nb_pkts_out
> + *  The max number of items that 'pkts_out' can keep.
> + *
> + * @return
> + *   - The number of GSO segments on success.
> + *   - Return 1 if no GSO is performed.
> + *   - Return -ENOMEM if available memory in mempools is insufficient.
> + *   - Return -EINVAL for invalid parameters.
> + */
> +int gso_tcp4_segment(struct rte_mbuf *pkt,
> +		uint16_t gso_size,
> +		struct rte_mempool *direct_pool,
> +		struct rte_mempool *indirect_pool,
> +		struct rte_mbuf **pkts_out,
> +		uint16_t nb_pkts_out);
> +
> +#endif
> diff --git a/lib/librte_gso/rte_gso.c b/lib/librte_gso/rte_gso.c
> index b81afce..fac95f2 100644
> --- a/lib/librte_gso/rte_gso.c
> +++ b/lib/librte_gso/rte_gso.c
> @@ -31,17 +31,57 @@
>   *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
>   */
> 
> +#include <rte_log.h>
> +
>  #include "rte_gso.h"
> +#include "gso_common.h"
> +#include "gso_tcp.h"
> 
>  int
>  rte_gso_segment(struct rte_mbuf *pkt,
>  		struct rte_gso_ctx gso_ctx,
>  		struct rte_mbuf **pkts_out,
> -		uint16_t nb_pkts_out __rte_unused)
> +		uint16_t nb_pkts_out)
>  {
> +	struct rte_mempool *direct_pool, *indirect_pool;
> +	struct rte_mbuf *pkt_seg;
> +	uint16_t nb_segments, gso_size;
> +
>  	if (pkt == NULL || pkts_out == NULL || gso_ctx.direct_pool ==
>  			NULL || gso_ctx.indirect_pool == NULL)
>  		return -EINVAL;

Probably we don't need to check gso_ctx values for each incoming packet.
If you feel it is necessary - create  new function rte_gso_ctx_check() that
could be performed just once per ctx.

> 
> -	return 1;
> +	if ((gso_ctx.gso_types & RTE_GSO_TCP_IPV4) == 0 ||
> +			gso_ctx.gso_size >= pkt->pkt_len ||
> +			gso_ctx.gso_size == 0)


First and third condition seems redundant.

> +		return 1;


I think you forgot here:
pkts_out[0] = pkt;


> +
> +	pkt_seg = pkt;
> +	gso_size = gso_ctx.gso_size;
> +	direct_pool = gso_ctx.direct_pool;
> +	indirect_pool = gso_ctx.indirect_pool;
> +
> +	/* Parse packet headers to determine how to segment 'pkt' */
> +	gso_parse_packet(pkt);


I don't think we need to parse packet here.
 Instead assume that user already filled packet_type and l2/l3/..._len fields correctly.

> +
> +	switch (pkt->packet_type) {
> +	case ETHER_VLAN_IPv4_TCP_PKT:
> +	case ETHER_IPv4_TCP_PKT:
> +		nb_segments = gso_tcp4_segment(pkt, gso_size,
> +				direct_pool, indirect_pool,
> +				pkts_out, nb_pkts_out);
> +		break;
> +	default:
> +		RTE_LOG(WARNING, GSO, "Unsupported packet type\n");
> +		nb_segments = 1;
> +	}
> +
> +	if (nb_segments > 1) {
> +		while (pkt_seg) {
> +			rte_mbuf_refcnt_update(pkt_seg, -1);
> +			pkt_seg = pkt_seg->next;
> +		}
> +	}
> +
> +	return nb_segments;
>  }
> diff --git a/lib/librte_gso/rte_gso.h b/lib/librte_gso/rte_gso.h
> index 5a8389a..77853fa 100644
> --- a/lib/librte_gso/rte_gso.h
> +++ b/lib/librte_gso/rte_gso.h
> @@ -46,6 +46,9 @@ extern "C" {
>  #include <stdint.h>
>  #include <rte_mbuf.h>
> 
> +#define RTE_GSO_TCP_IPV4 (1ULL << 0)
> +/**< GSO flag for TCP/IPv4 packets (containing optional VLAN tag) */
> +
>  /**
>   * GSO context structure.
>   */
> --
> 2.7.4



More information about the dev mailing list