[dpdk-dev,v3,2/3] lib/gro: add TCP/IPv4 GRO support

Message ID 1493021398-115955-3-git-send-email-jiayu.hu@intel.com (mailing list archive)
State Superseded, archived
Delegated to: Thomas Monjalon
Headers

Checks

Context Check Description
ci/checkpatch success coding style OK
ci/Intel-compilation fail Compilation issues

Commit Message

Hu, Jiayu April 24, 2017, 8:09 a.m. UTC
  Introduce three new functions to support TCP/IPv4 GRO.
- rte_gro_tcp4_tbl_create: create a TCP/IPv4 hashing table;
- rte_gro_tcp4_reassemble: try to reassemble an incoming TCP/IPv4 packet
    with existed TCP/IPv4 packets;
- rte_gro_tcp4_cksum_update: update TCP and IPv4 checksums.

rte_gro_tcp4_reassemble uses a TCP/IPv4 hashing table to implement packet
reassembly. The TCP/IPv4 hashing table is a cuckoo hashing table, whose
keys are rules of merging TCP/IPv4 packets, and whose values point to
item-lists. The item-list contains items, which point to packets with
the same keys.

That rte_gro_tcp4_reassemble processes an incoming packet requires four
steps:
a. check if the packet should be processed. TCP/IPv4 GRO doesn't process
packets of the following types:
- packets without data;
- packets with wrong checksums;
- fragmented packets.
b. lookup the hashing table to find a item-list, which stores packets that
may be able to merge with the incoming one;
c. if find the item-list, check all of its packets. If find one that
is the neighbor of the incoming packet, chaining them together and update
packet length and mbuf metadata; if don't find, allocate a new item for
the incoming packet and insert it into the item-list;
d. if fail to find a item-list, allocate a new item-list for the incoming
packet and insert it into the hash table.

Signed-off-by: Jiayu Hu <jiayu.hu@intel.com>
---
 lib/librte_gro/Makefile         |   1 +
 lib/librte_gro/rte_gro.c        |  82 +++++++++++-
 lib/librte_gro/rte_gro_common.h |   4 +-
 lib/librte_gro/rte_gro_tcp.c    | 270 ++++++++++++++++++++++++++++++++++++++++
 lib/librte_gro/rte_gro_tcp.h    |  95 ++++++++++++++
 5 files changed, 449 insertions(+), 3 deletions(-)
 create mode 100644 lib/librte_gro/rte_gro_tcp.c
 create mode 100644 lib/librte_gro/rte_gro_tcp.h
  

Patch

diff --git a/lib/librte_gro/Makefile b/lib/librte_gro/Makefile
index fb3a36c..c45f4f2 100644
--- a/lib/librte_gro/Makefile
+++ b/lib/librte_gro/Makefile
@@ -43,6 +43,7 @@  LIBABIVER := 1
 
 #source files
 SRCS-$(CONFIG_RTE_LIBRTE_GRO) += rte_gro.c
+SRCS-$(CONFIG_RTE_LIBRTE_GRO) += rte_gro_tcp.c
 
 # install this header file
 SYMLINK-$(CONFIG_RTE_LIBRTE_GRO)-include += rte_gro.h
diff --git a/lib/librte_gro/rte_gro.c b/lib/librte_gro/rte_gro.c
index 996b382..7851ac6 100644
--- a/lib/librte_gro/rte_gro.c
+++ b/lib/librte_gro/rte_gro.c
@@ -6,9 +6,12 @@ 
 
 #include "rte_gro.h"
 #include "rte_gro_common.h"
+#include "rte_gro_tcp.h"
 
-gro_reassemble_fn reassemble_functions[GRO_TYPE_MAX_NB] = {NULL};
-gro_tbl_create_fn tbl_create_functions[GRO_TYPE_MAX_NB] = {NULL};
+gro_reassemble_fn reassemble_functions[GRO_TYPE_MAX_NB] = {
+	rte_gro_tcp4_reassemble, NULL};
+gro_tbl_create_fn tbl_create_functions[GRO_TYPE_MAX_NB] = {
+	rte_gro_tcp4_tbl_create, NULL};
 
 struct rte_gro_status *gro_status;
 
@@ -105,7 +108,82 @@  rte_gro_reassemble_burst(uint8_t port __rte_unused,
 		printf("invalid parameters for GRO.\n");
 		return 0;
 	}
+	struct ether_hdr *eth_hdr;
+	struct ipv4_hdr *ipv4_hdr;
+	uint16_t l3proc_type, i;
+
+	/* record packet GRO info */
+	struct gro_info gro_infos[nb_pkts];
+	struct rte_gro_lkp_tbl *lkp_tbls = ((struct rte_gro_tbl *)
+			gro_tbl)->lkp_tbls;
+	int32_t ret;
 	uint16_t nb_after_gro = nb_pkts;
+	uint8_t dirty_tbls[GRO_SUPPORT_TYPE_NB] = {0};
+
+	/* pre-allocate tcp items for TCP GRO */
+	struct gro_tcp_item tcp_items[nb_pkts * nb_pkts];
+
+	for (i = 0; i < nb_pkts; i++) {
+		gro_infos[i].nb_merged_packets = 1;	/* initial value */
+		eth_hdr = rte_pktmbuf_mtod(pkts[i], struct ether_hdr *);
+		l3proc_type = rte_be_to_cpu_16(eth_hdr->ether_type);
+		if (l3proc_type == ETHER_TYPE_IPv4) {
+			ipv4_hdr = (struct ipv4_hdr *)(eth_hdr + 1);
+			if (ipv4_hdr->next_proto_id == IPPROTO_TCP) {
+				gro_infos[i].gro_type = GRO_TCP_IPV4;
+				/* allocate an item-list for the packet */
+				gro_infos[i].item_list.items =
+					&tcp_items[i * nb_pkts];
+				gro_infos[i].item_list.nb_item = 1;
+				/**
+				 * fill the packet information into the first
+				 * item of the item-list
+				 */
+				tcp_items[i * nb_pkts].segment = pkts[i];
+				tcp_items[i * nb_pkts].segment_idx = i;
+
+				ret = rte_gro_tcp4_reassemble(
+						lkp_tbls[GRO_TCP_IPV4].hash_tbl,
+						&gro_infos[i].item_list);
+				if (ret > 0) {
+					gro_infos[i].nb_merged_packets = 0;
+					gro_infos[--ret].nb_merged_packets++;
+					nb_after_gro--;
+				}
+				dirty_tbls[GRO_TCP_IPV4] = ret >= 0 ? 1 : 0;
+			}
+		}
+	}
+	/**
+	 * if there are packets been merged, update their headers,
+	 * and remove useless packet addresses from the inputted
+	 * packet array.
+	 */
+	if (nb_after_gro < nb_pkts) {
+		struct rte_mbuf *tmp[nb_pkts];
+		uint16_t j;
+
+		memset(tmp, 0,
+				sizeof(struct rte_mbuf *) * nb_pkts);
+		for (i = 0, j = 0; i < nb_pkts; i++) {
+			if (gro_infos[i].nb_merged_packets > 1) {
+				switch (gro_infos[i].gro_type) {
+				case GRO_TCP_IPV4:
+					rte_gro_tcp4_cksum_update(pkts[i]);
+					break;
+				}
+			}
+			if (gro_infos[i].nb_merged_packets != 0)
+				tmp[j++] = pkts[i];
+		}
+		rte_memcpy(pkts, tmp,
+				nb_pkts * sizeof(struct rte_mbuf *));
+	}
+
+	/* if GRO is performed, reset the hash table */
+	for (i = 0; i < GRO_SUPPORT_TYPE_NB; i++)
+		if (dirty_tbls[i])
+			rte_hash_reset(lkp_tbls[i].hash_tbl);
 
 	return nb_after_gro;
 }
diff --git a/lib/librte_gro/rte_gro_common.h b/lib/librte_gro/rte_gro_common.h
index 611d833..7b5d9ec 100644
--- a/lib/librte_gro/rte_gro_common.h
+++ b/lib/librte_gro/rte_gro_common.h
@@ -12,7 +12,9 @@ 
 /**
  * current supported GRO types number
  */
-#define GRO_SUPPORT_TYPE_NB 0
+#define GRO_SUPPORT_TYPE_NB 1
+
+#define GRO_TCP_IPV4 0	/**< TCP/IPv4 GRO */
 
 /**
  * default element number of the hashing table
diff --git a/lib/librte_gro/rte_gro_tcp.c b/lib/librte_gro/rte_gro_tcp.c
new file mode 100644
index 0000000..f17d9f5
--- /dev/null
+++ b/lib/librte_gro/rte_gro_tcp.c
@@ -0,0 +1,270 @@ 
+#include "rte_gro_tcp.h"
+
+int
+rte_gro_tcp4_tbl_create(char *name,
+		uint32_t nb_entries, uint16_t socket_id,
+		struct rte_hash **hash_tbl)
+{
+	struct rte_hash_parameters ht_param = {
+		.entries = nb_entries,
+		.name = name,
+		.key_len = sizeof(struct gro_tcp4_pre_rules),
+		.hash_func = rte_jhash,
+		.hash_func_init_val = 0,
+		.socket_id = socket_id,
+	};
+
+	*hash_tbl = rte_hash_create(&ht_param);
+	if (likely(*hash_tbl != NULL))
+		return 0;
+	return -1;
+}
+
+/* update TCP IPv4 checksum */
+void
+rte_gro_tcp4_cksum_update(struct rte_mbuf *pkt)
+{
+	uint32_t len, offset, cksum;
+	struct ether_hdr *eth_hdr;
+	struct ipv4_hdr *ipv4_hdr;
+	struct tcp_hdr *tcp_hdr;
+	uint16_t ipv4_ihl, cksum_pld;
+
+	if (pkt == NULL)
+		return;
+
+	len = pkt->pkt_len;
+	eth_hdr = rte_pktmbuf_mtod(pkt, struct ether_hdr *);
+	ipv4_hdr = (struct ipv4_hdr *)(eth_hdr + 1);
+	ipv4_ihl = IPv4_HDR_LEN(ipv4_hdr);
+	tcp_hdr = (struct tcp_hdr *)((char *)ipv4_hdr + ipv4_ihl);
+
+	offset = sizeof(struct ether_hdr) + ipv4_ihl;
+	len -= offset;
+
+	/* TCP cksum without IP pseudo header */
+	ipv4_hdr->hdr_checksum = 0;
+	tcp_hdr->cksum = 0;
+	if (rte_raw_cksum_mbuf(pkt, offset, len, &cksum_pld) < 0) {
+		printf("invalid param for raw_cksum_mbuf\n");
+		return;
+	}
+	/* IP pseudo header cksum */
+	cksum = cksum_pld;
+	cksum += rte_ipv4_phdr_cksum(ipv4_hdr, 0);
+
+	/* combine TCP checksum and IP pseudo header checksum */
+	cksum = ((cksum & 0xffff0000) >> 16) + (cksum & 0xffff);
+	cksum = (~cksum) & 0xffff;
+	cksum = (cksum == 0) ? 0xffff : cksum;
+	tcp_hdr->cksum = cksum;
+
+	/* update IP header cksum */
+	ipv4_hdr->hdr_checksum = rte_ipv4_cksum(ipv4_hdr);
+}
+
+/**
+ * This function traverses the item-list to find one item that can be
+ * merged with the incoming packet. If merge successfully, the merged
+ * packets are chained together; if not, insert the incoming packet into
+ * the item-list.
+ */
+static int32_t
+gro_tcp4_reassemble(struct rte_mbuf *pkt,
+		uint16_t pkt_idx,
+		uint32_t pkt_sent_seq,
+		struct gro_item_list *list)
+{
+	struct gro_tcp_item *items;
+	struct ipv4_hdr *ipv4_hdr1;
+	struct tcp_hdr *tcp_hdr1;
+	uint16_t ipv4_ihl1, tcp_hl1, tcp_dl1;
+
+	items = (struct gro_tcp_item *)list->items;
+	ipv4_hdr1 = (struct ipv4_hdr *)(rte_pktmbuf_mtod(pkt, struct
+				ether_hdr *) + 1);
+	ipv4_ihl1 = IPv4_HDR_LEN(ipv4_hdr1);
+	tcp_hdr1 = (struct tcp_hdr *)((char *)ipv4_hdr1 + ipv4_ihl1);
+	tcp_hl1 = TCP_HDR_LEN(tcp_hdr1);
+	tcp_dl1 = rte_be_to_cpu_16(ipv4_hdr1->total_length) - ipv4_ihl1
+		- tcp_hl1;
+
+	for (uint16_t i = 0; i < list->nb_item; i++) {
+		/* check if the two packets are neighbor */
+		if ((pkt_sent_seq ^ items[i].next_sent_seq) == 0) {
+			struct ipv4_hdr *ipv4_hdr2;
+			struct tcp_hdr *tcp_hdr2;
+			uint16_t ipv4_ihl2, tcp_hl2;
+			struct rte_mbuf *tail;
+
+			ipv4_hdr2 = (struct ipv4_hdr *)(rte_pktmbuf_mtod(
+						items[i].segment,
+						struct ether_hdr *)
+					+ 1);
+
+			/* check if the option fields equal */
+			if (tcp_hl1 > sizeof(struct tcp_hdr)) {
+				ipv4_ihl2 = IPv4_HDR_LEN(ipv4_hdr2);
+				tcp_hdr2 = (struct tcp_hdr *)
+					((char *)ipv4_hdr2 + ipv4_ihl2);
+				tcp_hl2 = TCP_HDR_LEN(tcp_hdr2);
+				if ((tcp_hl1 != tcp_hl2) ||
+						(memcmp(tcp_hdr1 + 1,
+								tcp_hdr2 + 1,
+								tcp_hl2 - sizeof
+								(struct tcp_hdr))
+						 != 0))
+					continue;
+			}
+			/* check if the packet length will be beyond 64K */
+			if (items[i].segment->pkt_len + tcp_dl1 > UINT16_MAX)
+				goto merge_fail;
+
+			/* remove the header of the incoming packet */
+			rte_pktmbuf_adj(pkt, sizeof(struct ether_hdr) +
+					ipv4_ihl1 + tcp_hl1);
+			/* chain the two packet together */
+			tail = rte_pktmbuf_lastseg(items[i].segment);
+			tail->next = pkt;
+
+			/* update IP header for the merged packet */
+			ipv4_hdr2->total_length = rte_cpu_to_be_16(
+					rte_be_to_cpu_16(
+						ipv4_hdr2->total_length)
+					+ tcp_dl1);
+
+			/* update the next expected sequence number */
+			items[i].next_sent_seq += tcp_dl1;
+
+			/* update mbuf metadata for the merged packet */
+			items[i].segment->nb_segs++;
+			items[i].segment->pkt_len += pkt->pkt_len;
+
+			return items[i].segment_idx + 1;
+		}
+	}
+
+merge_fail:
+	/* fail to merge. Insert the incoming packet into the item-list */
+	items[list->nb_item].next_sent_seq = pkt_sent_seq + tcp_dl1;
+	items[list->nb_item].segment = pkt;
+	items[list->nb_item].segment_idx = pkt_idx;
+	list->nb_item++;
+
+	return 0;
+}
+
+/**
+ * Traverse the item-list to find a packet to merge with the incoming
+ * one.
+ * @param hash_tbl
+ *  TCP IPv4 lookup table
+ * @param item_list
+ *  Pre-allocated item-list, in which the first item stores the packet
+ *  to process.
+ * @return
+ *  If the incoming packet merges with one packet successfully, return
+ *  the index + 1 of the merged packet; if the incoming packet hasn't
+ *  been performed GRO, return -1; if the incoming packet is performed
+ *  GRO but fail to merge, return 0.
+ */
+int32_t
+rte_gro_tcp4_reassemble(struct rte_hash *hash_tbl,
+		struct gro_item_list *item_list)
+{
+	struct ether_hdr *eth_hdr;
+	struct ipv4_hdr *ipv4_hdr;
+	struct tcp_hdr *tcp_hdr;
+	uint16_t ipv4_ihl, tcp_hl, tcp_dl, tcp_cksum, ip_cksum;
+	struct gro_tcp4_pre_rules key = {0};
+	struct gro_item_list *list;
+	uint64_t ol_flags;
+	uint32_t sent_seq;
+	int32_t ret = -1;
+
+	/* get the packet to process */
+	struct gro_tcp_item *items = item_list->items;
+	struct rte_mbuf *pkt = items[0].segment;
+	uint32_t pkt_idx = items[0].segment_idx;
+
+	eth_hdr = rte_pktmbuf_mtod(pkt, struct ether_hdr *);
+	ipv4_hdr = (struct ipv4_hdr *)(eth_hdr + 1);
+	ipv4_ihl = IPv4_HDR_LEN(ipv4_hdr);
+
+	/* 1. check if the packet should be processed */
+	if (ipv4_ihl < sizeof(struct ipv4_hdr))
+		goto end;
+	if (ipv4_hdr->next_proto_id != IPPROTO_TCP)
+		goto end;
+	if ((ipv4_hdr->fragment_offset &
+				rte_cpu_to_be_16(IPV4_HDR_DF_MASK))
+			== 0)
+		goto end;
+
+	tcp_hdr = (struct tcp_hdr *)((char *)ipv4_hdr + ipv4_ihl);
+	tcp_hl = TCP_HDR_LEN(tcp_hdr);
+	tcp_dl = rte_be_to_cpu_16(ipv4_hdr->total_length) - ipv4_ihl
+		- tcp_hl;
+	if (tcp_dl == 0)
+		goto end;
+
+	ol_flags = pkt->ol_flags;
+	/**
+	 * 2. if HW rx checksum offload isn't enabled, recalculate the
+	 * checksum in SW. Then, check if the checksum is correct
+	 */
+	if ((ol_flags & PKT_RX_IP_CKSUM_MASK) !=
+			PKT_RX_IP_CKSUM_UNKNOWN) {
+		if (ol_flags == PKT_RX_IP_CKSUM_BAD)
+			goto end;
+	} else {
+		ip_cksum = ipv4_hdr->hdr_checksum;
+		ipv4_hdr->hdr_checksum = 0;
+		ipv4_hdr->hdr_checksum = rte_ipv4_cksum(ipv4_hdr);
+		if (ipv4_hdr->hdr_checksum ^ ip_cksum)
+			goto end;
+	}
+
+	if ((ol_flags & PKT_RX_L4_CKSUM_MASK) !=
+			PKT_RX_L4_CKSUM_UNKNOWN) {
+		if (ol_flags == PKT_RX_L4_CKSUM_BAD)
+			goto end;
+	} else {
+		tcp_cksum = tcp_hdr->cksum;
+		tcp_hdr->cksum = 0;
+		tcp_hdr->cksum = rte_ipv4_udptcp_cksum
+			(ipv4_hdr, tcp_hdr);
+		if (tcp_hdr->cksum ^ tcp_cksum)
+			goto end;
+	}
+
+	/* 3. search for the corresponding item-list for the packet */
+	key.eth_saddr = eth_hdr->s_addr;
+	key.eth_daddr = eth_hdr->d_addr;
+	key.ip_src_addr = rte_be_to_cpu_32(ipv4_hdr->src_addr);
+	key.ip_dst_addr = rte_be_to_cpu_32(ipv4_hdr->dst_addr);
+	key.src_port = rte_be_to_cpu_16(tcp_hdr->src_port);
+	key.dst_port = rte_be_to_cpu_16(tcp_hdr->dst_port);
+	key.recv_ack = rte_be_to_cpu_32(tcp_hdr->recv_ack);
+	key.tcp_flags = tcp_hdr->tcp_flags;
+
+	sent_seq = rte_be_to_cpu_32(tcp_hdr->sent_seq);
+
+	if (rte_hash_lookup_data(hash_tbl, &key, (void **)&list) >= 0) {
+		ret = gro_tcp4_reassemble(pkt, pkt_idx, sent_seq, list);
+	} else {
+		/**
+		 * fail to find a item-list. Record the sequence number of the
+		 * incoming packet's neighbor into its item_list, and insert it
+		 * into the hash table.
+		 */
+		items[0].next_sent_seq = sent_seq + tcp_dl;
+		if (unlikely(rte_hash_add_key_data(hash_tbl, &key, item_list)
+					!= 0))
+			printf("GRO TCP hash insert fail.\n");
+		else
+			ret = 0;
+	}
+end:
+	return ret;
+}
diff --git a/lib/librte_gro/rte_gro_tcp.h b/lib/librte_gro/rte_gro_tcp.h
new file mode 100644
index 0000000..52be9cd
--- /dev/null
+++ b/lib/librte_gro/rte_gro_tcp.h
@@ -0,0 +1,95 @@ 
+#ifndef _RTE_GRO_TCP_H_
+#define _RTE_GRO_TCP_H_
+
+#include <rte_ethdev.h>
+#include <rte_ip.h>
+#include <rte_tcp.h>
+#include <rte_hash.h>
+#include <rte_jhash.h>
+
+#include "rte_gro_common.h"
+
+#if RTE_BYTE_ORDER == RTE_LITTLE_ENDIAN
+#define TCP_HDR_LEN(tcph) \
+	((tcph->data_off >> 4) * 4)
+#define IPv4_HDR_LEN(iph) \
+	((iph->version_ihl & 0x0f) * 4)
+#else
+#define TCP_DATAOFF_MASK 0x0f
+#define TCP_HDR_LEN(tcph) \
+	((tcph->data_off & TCP_DATAOFF_MASK) * 4)
+#define IPv4_HDR_LEN(iph) \
+	((iph->version_ihl >> 4) * 4)
+#endif
+
+#define IPV4_HDR_DF_SHIFT 14
+#define IPV4_HDR_DF_MASK (1 << IPV4_HDR_DF_SHIFT)
+
+
+/**
+ * key structure of TCP ipv4 hash table. It describes the prerequsite
+ * rules of merging packets.
+ */
+struct gro_tcp4_pre_rules {
+	struct ether_addr eth_saddr;
+	struct ether_addr eth_daddr;
+	uint32_t ip_src_addr;
+	uint32_t ip_dst_addr;
+
+	uint32_t recv_ack;	/**< acknowledgment sequence number. */
+	uint16_t src_port;
+	uint16_t dst_port;
+	uint8_t tcp_flags;	/**< TCP flags. */
+
+	uint8_t padding[3];
+};
+
+/**
+ * TCP item structure
+ */
+struct gro_tcp_item {
+	struct rte_mbuf *segment;	/**< packet address. */
+	uint32_t next_sent_seq;	/**< sequence number of the next packet. */
+	uint16_t segment_idx;	/**< packet index. */
+};
+
+void
+rte_gro_tcp4_cksum_update(struct rte_mbuf *pkt);
+
+/**
+ * Create a new TCP ipv4 GRO lookup table.
+ *
+ * @param name
+ *	Lookup table name
+ * @param nb_entries
+ *  Lookup table elements number, whose value should be larger than or
+ *  equal to RTE_GRO_HASH_ENTRIES_MIN, and less than or equal to
+ *  RTE_GRO_HASH_ENTRIES_MAX, and should be power of two.
+ * @param socket_id
+ *  socket id
+ * @return
+ *  lookup table address
+ */
+int
+rte_gro_tcp4_tbl_create(char *name, uint32_t nb_entries,
+		uint16_t socket_id, struct rte_hash **hash_tbl);
+/**
+ * This function reassembles a bulk of TCP IPv4 packets. For non-TCP IPv4
+ * packets, the function won't process them.
+ *
+ * @param hash_tbl
+ *	Lookup table used to reassemble packets. It stores key-value pairs.
+ *	The key describes the prerequsite rules to merge two TCP IPv4 packets;
+ *	the value is a pointer pointing to a item-list, which contains
+ *	packets that have the same prerequisite TCP IPv4 rules. Note that
+ *	applications need to guarantee the hash_tbl is clean when first call
+ *	this function.
+ * @return
+ *	The packet number after GRO. If reassemble successfully, the value is
+ *	less than nb_pkts; if not, the value is equal to nb_pkts.
+ */
+int32_t
+rte_gro_tcp4_reassemble(struct rte_hash *hash_tbl,
+		struct gro_item_list *item_list);
+
+#endif