[dpdk-dev] [RFC PATCH 5/6] rte_ether: implement encap and decap APIs
Jijiang Liu
jijiang.liu at intel.com
Wed Dec 23 09:49:51 CET 2015
Using SIMD instruction to accelarate encapsulation operation.
Signed-off-by: Jijiang Liu <jijiang.liu at intel.com>
---
lib/librte_ether/libtunnel/rte_vxlan_opt.c | 251 ++++++++++++++++++++++++++++
1 files changed, 251 insertions(+), 0 deletions(-)
create mode 100644 lib/librte_ether/libtunnel/rte_vxlan_opt.c
diff --git a/lib/librte_ether/libtunnel/rte_vxlan_opt.c b/lib/librte_ether/libtunnel/rte_vxlan_opt.c
new file mode 100644
index 0000000..e59ed2c
--- /dev/null
+++ b/lib/librte_ether/libtunnel/rte_vxlan_opt.c
@@ -0,0 +1,251 @@
+/*-
+ * BSD LICENSE
+ *
+ * Copyright(c) 2010-2015 Intel Corporation. All rights reserved.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+#include <stdint.h>
+#include <rte_mbuf.h>
+#include <rte_ether.h>
+#include <rte_ip.h>
+#include <rte_udp.h>
+#include <rte_tcp.h>
+#include <rte_byteorder.h>
+#include <rte_prefetch.h>
+#include <rte_ethdev.h>
+
+#include <immintrin.h>
+#include <tmmintrin.h>
+#include <mmintrin.h>
+
+#include "vxlan_opt.h"
+
+#ifndef __INTEL_COMPILER
+#pragma GCC diagnostic ignored "-Wcast-qual"
+#endif
+
+#pragma GCC diagnostic ignored "-Wstrict-aliasing"
+
+#define PORT_MIN 49152
+#define PORT_MAX 65535
+#define PORT_RANGE ((PORT_MAX - PORT_MIN) + 1)
+
+#define DUMMY_FOR_TEST
+#define RTE_DEFAULT_VXLAN_PORT 4789
+
+#define LOOP 4
+#define MAC_LEN 6
+#define PREFIX ETHER_HDR_LEN + 4
+#define UDP_PRE_SZ (sizeof(struct udp_hdr) + sizeof(struct vxlan_hdr))
+#define IP_PRE_SZ (UDP_PRE_SZ + sizeof(struct ipv4_hdr))
+#define VXLAN_PKT_HDR_SIZE (IP_PRE_SZ + ETHER_HDR_LEN)
+
+#define VXLAN_SIZE sizeof(struct vxlan_hdr)
+#define INNER_PRE_SZ (14 + 20 + 8 + 8)
+#define DECAP_OFFSET (16 + 8 + 8)
+#define DETECT_OFFSET 12
+
+struct eth_pkt_info {
+ uint8_t l2_len;
+ uint16_t ethertype;
+ uint16_t l3_len;
+ uint16_t l4_proto;
+ uint16_t l4_len;
+};
+
+/* 16Bytes tx meta data */
+struct vxlan_tx_meta {
+ uint32_t sip;
+ uint32_t dip;
+ uint32_t vni;
+ uint16_t sport;
+} __attribute__((__aligned__(16)));
+
+
+/* Parse an IPv4 header to fill l3_len, l4_len, and l4_proto */
+static void
+parse_ipv4(struct ipv4_hdr *ipv4_hdr, struct eth_pkt_info *info)
+{
+ struct tcp_hdr *tcp_hdr;
+
+ info->l3_len = (ipv4_hdr->version_ihl & 0x0f) * 4;
+ info->l4_proto = ipv4_hdr->next_proto_id;
+
+ /* only fill l4_len for TCP, it's useful for TSO */
+ if (info->l4_proto == IPPROTO_TCP) {
+ tcp_hdr = (struct tcp_hdr *)((char *)ipv4_hdr + info->l3_len);
+ info->l4_len = (tcp_hdr->data_off & 0xf0) >> 2;
+ } else
+ info->l4_len = 0;
+}
+
+/* Parse an IPv6 header to fill l3_len, l4_len, and l4_proto */
+static void
+parse_ipv6(struct ipv6_hdr *ipv6_hdr, struct eth_pkt_info *info)
+{
+ struct tcp_hdr *tcp_hdr;
+
+ info->l3_len = sizeof(struct ipv6_hdr);
+ info->l4_proto = ipv6_hdr->proto;
+
+ /* only fill l4_len for TCP, it's useful for TSO */
+ if (info->l4_proto == IPPROTO_TCP) {
+ tcp_hdr = (struct tcp_hdr *)((char *)ipv6_hdr + info->l3_len);
+ info->l4_len = (tcp_hdr->data_off & 0xf0) >> 2;
+ } else
+ info->l4_len = 0;
+}
+
+/*
+ * Parse an ethernet header to fill the ethertype, l2_len, l3_len and
+ * ipproto. This function is able to recognize IPv4/IPv6 with one optional vlan
+ * header. The l4_len argument is only set in case of TCP (useful for TSO).
+ */
+static void
+parse_ethernet(struct ether_hdr *eth_hdr, struct eth_pkt_info *info)
+{
+ struct ipv4_hdr *ipv4_hdr;
+ struct ipv6_hdr *ipv6_hdr;
+
+ info->l2_len = sizeof(struct ether_hdr);
+ info->ethertype = eth_hdr->ether_type;
+
+ if (info->ethertype == _htons(ETHER_TYPE_VLAN)) {
+ struct vlan_hdr *vlan_hdr = (struct vlan_hdr *)(eth_hdr + 1);
+
+ info->l2_len += sizeof(struct vlan_hdr);
+ info->ethertype = vlan_hdr->eth_proto;
+ }
+
+ switch (info->ethertype) {
+ case _htons(ETHER_TYPE_IPv4):
+ ipv4_hdr = (struct ipv4_hdr *) ((char *)eth_hdr + info->l2_len);
+ rte_parse_ipv4(ipv4_hdr, info);
+ break;
+ case _htons(ETHER_TYPE_IPv6):
+ ipv6_hdr = (struct ipv6_hdr *) ((char *)eth_hdr + info->l2_len);
+ rte_parse_ipv6(ipv6_hdr, info);
+ break;
+ default:
+ info->l4_len = 0;
+ info->l3_len = 0;
+ info->l4_proto = 0;
+ break;
+ }
+}
+
+extern void
+rte_vxlan_decap_burst(uint8_t port, uint16_t queue,
+ struct rte_mbuf *pkts[], uint16_t nb_pkts,
+ void *user_param)
+{
+ char *pkt;
+ struct eth_pkt_info info;
+ uint16_t outer_hdr_len;
+ uint16_t nb_rx = 0;
+
+ struct ether_hdr *eth_hdr = rte_pktmbuf_mtod(pkt[nb_rx], struct ether_hdr *);
+
+ /* Assume we are using same rule on this queue,and just analyse first packet */
+ if (user_param == NULL)
+ return;
+
+ parse_ethernet(eth_hdr, &info);
+ outer_hdr_len = info.l2_len + info.l3_len + info.l4_len +
+ sizeof(struct vxlan_hdr);
+
+ rte_pktmbuf_adj(pkt[nb_rx++], outer_header_len);
+
+ while (nb_rx < nb_pkts)
+ rte_pktmbuf_adj(pkt[nb_rx++], outer_header_len);
+}
+
+/* Encapsulation using SIMD and flow rule to accelarate this operation */
+
+extern void
+rte_vxlan_encap_burst(uint8_t port, uint16_t queue,
+ struct rte_mbuf *pkts[], uint16_t nb_pkts,
+ rte_eth_tunnel_conf *encap_param)
+{
+ char *pkt;
+ uint16_t len;
+ uint32_t hash;
+ uint16_t nb_rx = 0;
+ __m256i temp, cur;
+ __m256i shuf_msk = _mm256_set_epi8(
+ 0xFF, 0, 1, 2, /* high octet 0~2, 24 bits vni */
+ 0xFF, 0xFF, 0xFF, 0xFF, /* skip vx_flags */
+ 0xFF, 0xFF, 0xFF, 0xFF, /* skip udp len, cksum */
+ 0xFF, 0xFF, /* skip udp dst port */
+ 8, 9, /* high octet 8~9, 16 bits udp src port */
+ 8, 9, 10, 11, /* low octet 8~11, 32 bits dst ip */
+ 0, 1, 2, 3, /* low octet 0~3, 32 bits src ip */
+ 0xFF, 0xFF, 0xFF, 0xFF, /* skip ttl, proto_id, hdr_csum */
+ 0xFF, 0xFF, 0xFF, 0xFF /* skip packet_id, fragment_offset */
+ );
+
+
+ hash = rte_hash_crc(phdr, 2 * ETHER_ADDR_LEN, phdr->ether_type);
+
+ meta.src_ip = encap_param->tunnel_flow[0].dst_ip;
+ meta.dst_ip = encap_param->tunnel_flow[0].src_ip;
+ meta.vni = encap_param->tunnel_id;
+ meta.sport = rte_cpu_to_be_16(((uint64_t) hash * PORT_RANGE) >> 32 + PORT_MIN);
+
+ while (nb_rx < nb_pkts) {
+ len = rte_pktmbuf_pkt_len(pkts[nb_rx]);
+ pkt = rte_pktmbuf_prepend(pkts[nb_rx], VXLAN_PKT_HDR_SIZE);
+
+ /* load 16B meta into 32B register */
+ cur = _mm256_cvtepu32_epi64(_mm_loadu_si128((__m128i *)meta));
+ temp = _mm256_set_epi16(0, 0, 0, 0,
+ 0, rte_cpu_to_be_16(len + UDP_PRE_SZ),
+ rte_cpu_to_be_16(DEFAULT_VXLAN_PORT), 0,
+ 0, 0, 0, 0,
+ 0, 0x11FF, 0, 0);
+
+ rte_prefetch1(pkts);
+ cur = _mm256_shuffle_epi8(cur, shuf_msk);
+
+ /* write 4 Bytes, IP:4B */
+ *(uint32_t *)(pkt[nb_rx] + ETHER_HDR_LEN) =
+ rte_cpu_to_be_32(0x4500 << 16 | (len + IP_PRE_SZ));
+
+ /* write 32Btyes, VXLAN:8 UDP:8 IP:16B */
+ _mm256_storeu_si256((__m256i *)(pkt[nb_rx] + PREFIX), cur);
+
+ cur = _mm256_or_si256(cur, temp);
+
+ /* write L2 header */
+ rte_memcpy(pkts[nb_rx], encap_param->peer_mac, MAC_LEN);
+ rte_memcpy(pkts[nb_rx] + MAC_LEN, encap_param->dst_mac, MAC_LEN);
+ *(uint32_t *)(pkts[nb_rx] + MAC_LEN * 2) = rte_cpu_to_be_16(ETHER_TYPE_IPv4);
+ nb_rx++;
+ }
+}
--
1.7.7.6
More information about the dev
mailing list