[dpdk-dev] [RFC PATCH 5/6] rte_ether: implement encap and decap APIs

Jijiang Liu jijiang.liu at intel.com
Wed Dec 23 09:49:51 CET 2015


Using SIMD instruction to accelarate encapsulation operation.

Signed-off-by: Jijiang Liu <jijiang.liu at intel.com>
---
 lib/librte_ether/libtunnel/rte_vxlan_opt.c |  251 ++++++++++++++++++++++++++++
 1 files changed, 251 insertions(+), 0 deletions(-)
 create mode 100644 lib/librte_ether/libtunnel/rte_vxlan_opt.c

diff --git a/lib/librte_ether/libtunnel/rte_vxlan_opt.c b/lib/librte_ether/libtunnel/rte_vxlan_opt.c
new file mode 100644
index 0000000..e59ed2c
--- /dev/null
+++ b/lib/librte_ether/libtunnel/rte_vxlan_opt.c
@@ -0,0 +1,251 @@
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright(c) 2010-2015 Intel Corporation. All rights reserved.
+ *   All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of Intel Corporation nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+#include <stdint.h>
+#include <rte_mbuf.h>
+#include <rte_ether.h>
+#include <rte_ip.h>
+#include <rte_udp.h>
+#include <rte_tcp.h>
+#include <rte_byteorder.h>
+#include <rte_prefetch.h>
+#include <rte_ethdev.h>
+
+#include <immintrin.h>
+#include <tmmintrin.h>
+#include <mmintrin.h>
+
+#include "vxlan_opt.h"
+
+#ifndef __INTEL_COMPILER
+#pragma GCC diagnostic ignored "-Wcast-qual"
+#endif
+
+#pragma GCC diagnostic ignored "-Wstrict-aliasing"
+
+#define PORT_MIN    49152
+#define PORT_MAX    65535
+#define PORT_RANGE ((PORT_MAX - PORT_MIN) + 1)
+
+#define DUMMY_FOR_TEST
+#define RTE_DEFAULT_VXLAN_PORT 4789
+ 
+#define LOOP           4
+#define MAC_LEN        6
+#define PREFIX         ETHER_HDR_LEN + 4
+#define UDP_PRE_SZ     (sizeof(struct udp_hdr) + sizeof(struct vxlan_hdr))
+#define IP_PRE_SZ      (UDP_PRE_SZ + sizeof(struct ipv4_hdr))
+#define VXLAN_PKT_HDR_SIZE       (IP_PRE_SZ + ETHER_HDR_LEN)
+ 
+#define VXLAN_SIZE     sizeof(struct vxlan_hdr)
+#define INNER_PRE_SZ   (14 + 20 + 8 + 8)
+#define DECAP_OFFSET   (16 + 8 + 8)
+#define DETECT_OFFSET  12
+
+struct eth_pkt_info {
+	uint8_t l2_len;
+	uint16_t ethertype;
+	uint16_t l3_len;
+	uint16_t l4_proto;
+	uint16_t l4_len;
+};
+
+/* 16Bytes tx meta data */
+struct vxlan_tx_meta {
+	uint32_t sip;
+	uint32_t dip;
+	uint32_t vni;
+	uint16_t sport;
+} __attribute__((__aligned__(16)));
+
+
+/* Parse an IPv4 header to fill l3_len, l4_len, and l4_proto */
+static void
+parse_ipv4(struct ipv4_hdr *ipv4_hdr, struct eth_pkt_info *info)
+{
+	struct tcp_hdr *tcp_hdr;
+
+	info->l3_len = (ipv4_hdr->version_ihl & 0x0f) * 4;
+	info->l4_proto = ipv4_hdr->next_proto_id;
+
+	/* only fill l4_len for TCP, it's useful for TSO */
+	if (info->l4_proto == IPPROTO_TCP) {
+		tcp_hdr = (struct tcp_hdr *)((char *)ipv4_hdr + info->l3_len);
+		info->l4_len = (tcp_hdr->data_off & 0xf0) >> 2;
+	} else
+		info->l4_len = 0;
+}
+
+/* Parse an IPv6 header to fill l3_len, l4_len, and l4_proto */
+static void
+parse_ipv6(struct ipv6_hdr *ipv6_hdr, struct eth_pkt_info *info)
+{
+	struct tcp_hdr *tcp_hdr;
+
+	info->l3_len = sizeof(struct ipv6_hdr);
+	info->l4_proto = ipv6_hdr->proto;
+
+	/* only fill l4_len for TCP, it's useful for TSO */
+	if (info->l4_proto == IPPROTO_TCP) {
+		tcp_hdr = (struct tcp_hdr *)((char *)ipv6_hdr + info->l3_len);
+		info->l4_len = (tcp_hdr->data_off & 0xf0) >> 2;
+	} else
+		info->l4_len = 0;
+}
+
+/*
+ * Parse an ethernet header to fill the ethertype, l2_len, l3_len and
+ * ipproto. This function is able to recognize IPv4/IPv6 with one optional vlan
+ * header. The l4_len argument is only set in case of TCP (useful for TSO).
+ */
+static void
+parse_ethernet(struct ether_hdr *eth_hdr, struct eth_pkt_info *info)
+{
+	struct ipv4_hdr *ipv4_hdr;
+	struct ipv6_hdr *ipv6_hdr;
+
+	info->l2_len = sizeof(struct ether_hdr);
+	info->ethertype = eth_hdr->ether_type;
+
+	if (info->ethertype == _htons(ETHER_TYPE_VLAN)) {
+		struct vlan_hdr *vlan_hdr = (struct vlan_hdr *)(eth_hdr + 1);
+
+		info->l2_len  += sizeof(struct vlan_hdr);
+		info->ethertype = vlan_hdr->eth_proto;
+	}
+
+	switch (info->ethertype) {
+	case _htons(ETHER_TYPE_IPv4):
+		ipv4_hdr = (struct ipv4_hdr *) ((char *)eth_hdr + info->l2_len);
+		rte_parse_ipv4(ipv4_hdr, info);
+		break;
+	case _htons(ETHER_TYPE_IPv6):
+		ipv6_hdr = (struct ipv6_hdr *) ((char *)eth_hdr + info->l2_len);
+		rte_parse_ipv6(ipv6_hdr, info);
+		break;
+	default:
+		info->l4_len = 0;
+		info->l3_len = 0;
+		info->l4_proto = 0;
+		break;
+	}
+}
+
+extern void
+rte_vxlan_decap_burst(uint8_t port, uint16_t queue,
+                      struct rte_mbuf *pkts[], uint16_t nb_pkts,
+                      void *user_param)
+{
+	char *pkt;
+	struct eth_pkt_info info;
+	uint16_t outer_hdr_len;
+	uint16_t nb_rx = 0;
+
+	struct ether_hdr *eth_hdr = rte_pktmbuf_mtod(pkt[nb_rx], struct ether_hdr *);
+	
+	/* Assume we are using same rule on this queue,and just analyse first packet */
+	if (user_param == NULL)
+		return;
+
+	parse_ethernet(eth_hdr, &info);
+	outer_hdr_len = info.l2_len + info.l3_len + info.l4_len +
+			sizeof(struct vxlan_hdr);
+
+	rte_pktmbuf_adj(pkt[nb_rx++], outer_header_len);
+
+	while (nb_rx < nb_pkts)
+		rte_pktmbuf_adj(pkt[nb_rx++], outer_header_len);
+}
+
+/* Encapsulation using SIMD and flow rule  to accelarate this operation */
+
+extern void 
+rte_vxlan_encap_burst(uint8_t port, uint16_t queue,
+        struct rte_mbuf *pkts[], uint16_t nb_pkts,
+        rte_eth_tunnel_conf *encap_param)
+{
+	char *pkt;
+	uint16_t len;
+	uint32_t hash;
+	uint16_t nb_rx = 0;
+	__m256i temp, cur;
+	__m256i shuf_msk = _mm256_set_epi8(
+		0xFF, 0, 1, 2,           /* high octet 0~2, 24 bits vni */
+		0xFF, 0xFF, 0xFF, 0xFF,  /* skip vx_flags */
+		0xFF, 0xFF, 0xFF, 0xFF,  /* skip udp len, cksum */
+		0xFF, 0xFF,              /* skip udp dst port */
+		8, 9,                    /* high octet 8~9, 16 bits udp src port */
+		8, 9, 10, 11,            /* low octet 8~11, 32 bits dst ip */
+		0, 1, 2, 3,              /* low octet 0~3, 32 bits src ip */
+		0xFF, 0xFF, 0xFF, 0xFF,  /* skip ttl, proto_id, hdr_csum */
+		0xFF, 0xFF, 0xFF, 0xFF   /* skip packet_id, fragment_offset */
+	);
+
+	
+	hash = rte_hash_crc(phdr, 2 * ETHER_ADDR_LEN, phdr->ether_type);
+
+        meta.src_ip = encap_param->tunnel_flow[0].dst_ip;
+        meta.dst_ip = encap_param->tunnel_flow[0].src_ip;
+        meta.vni = encap_param->tunnel_id;
+        meta.sport =  rte_cpu_to_be_16(((uint64_t) hash * PORT_RANGE) >> 32 + PORT_MIN);
+
+	while (nb_rx < nb_pkts) {
+		len = rte_pktmbuf_pkt_len(pkts[nb_rx]);
+		pkt = rte_pktmbuf_prepend(pkts[nb_rx], VXLAN_PKT_HDR_SIZE);
+
+		/* load 16B meta into 32B register */
+		cur = _mm256_cvtepu32_epi64(_mm_loadu_si128((__m128i *)meta));
+		temp = _mm256_set_epi16(0, 0, 0, 0,
+               		0, rte_cpu_to_be_16(len + UDP_PRE_SZ),
+               		rte_cpu_to_be_16(DEFAULT_VXLAN_PORT), 0,
+               		0, 0, 0, 0,
+               		0, 0x11FF, 0, 0);
+
+		rte_prefetch1(pkts);
+		cur = _mm256_shuffle_epi8(cur, shuf_msk);
+
+		/* write 4 Bytes, IP:4B */
+  		*(uint32_t *)(pkt[nb_rx] + ETHER_HDR_LEN) =
+			rte_cpu_to_be_32(0x4500 << 16 | (len + IP_PRE_SZ));
+
+		/* write 32Btyes, VXLAN:8 UDP:8 IP:16B */
+		_mm256_storeu_si256((__m256i *)(pkt[nb_rx] + PREFIX), cur);
+
+		cur = _mm256_or_si256(cur, temp);
+
+		/* write L2 header */
+		rte_memcpy(pkts[nb_rx], encap_param->peer_mac, MAC_LEN);
+		rte_memcpy(pkts[nb_rx] + MAC_LEN, encap_param->dst_mac, MAC_LEN);
+		*(uint32_t *)(pkts[nb_rx] + MAC_LEN * 2) = rte_cpu_to_be_16(ETHER_TYPE_IPv4);
+		nb_rx++;
+	}
+}
-- 
1.7.7.6



More information about the dev mailing list