[dpdk-dev] [PATCH] net/tap: add Rx/Tx checksum offload support

Pascal Mazon pascal.mazon at 6wind.com
Mon May 22 13:20:43 CEST 2017


This patch adds basic offloading support, widely expected in a PMD.

Verify IPv4 and UDP/TCP checksums upon packet reception, and set
ol_flags accordingly.

On Tx, set IPv4 and UDP/TCP checksums when required, considering
ol_flags.

Signed-off-by: Pascal Mazon <pascal.mazon at 6wind.com>
---
 doc/guides/nics/features/tap.ini |   2 +
 drivers/net/tap/rte_eth_tap.c    | 130 +++++++++++++++++++++++++++++++++++++++
 2 files changed, 132 insertions(+)

diff --git a/doc/guides/nics/features/tap.ini b/doc/guides/nics/features/tap.ini
index 3efae758ccde..7e289e99ef23 100644
--- a/doc/guides/nics/features/tap.ini
+++ b/doc/guides/nics/features/tap.ini
@@ -11,6 +11,8 @@ Promiscuous mode     = Y
 Allmulticast mode    = Y
 Basic stats          = Y
 Flow API             = Y
+L3 checksum offload  = Y
+L4 checksum offload  = Y
 MTU update           = Y
 Multicast MAC filter = Y
 Speed capabilities   = Y
diff --git a/drivers/net/tap/rte_eth_tap.c b/drivers/net/tap/rte_eth_tap.c
index e6261e7a46ab..56208df234d2 100644
--- a/drivers/net/tap/rte_eth_tap.c
+++ b/drivers/net/tap/rte_eth_tap.c
@@ -33,6 +33,7 @@
 
 #include <rte_atomic.h>
 #include <rte_branch_prediction.h>
+#include <rte_byteorder.h>
 #include <rte_common.h>
 #include <rte_mbuf.h>
 #include <rte_ethdev.h>
@@ -42,6 +43,7 @@
 #include <rte_kvargs.h>
 #include <rte_net.h>
 #include <rte_debug.h>
+#include <rte_ip.h>
 
 #include <sys/types.h>
 #include <sys/stat.h>
@@ -229,6 +231,60 @@ tun_alloc(struct pmd_internals *pmd)
 	return -1;
 }
 
+static void
+tap_verify_csum(struct rte_mbuf *mbuf)
+{
+	uint32_t l2 = mbuf->packet_type & RTE_PTYPE_L2_MASK;
+	uint32_t l3 = mbuf->packet_type & RTE_PTYPE_L3_MASK;
+	uint32_t l4 = mbuf->packet_type & RTE_PTYPE_L4_MASK;
+	unsigned int l2_len = sizeof(struct ether_hdr);
+	unsigned int l3_len;
+	uint16_t cksum = 0;
+	void *l3_hdr;
+	void *l4_hdr;
+
+	if (l2 == RTE_PTYPE_L2_ETHER_VLAN)
+		l2_len += 4;
+	else if (l2 == RTE_PTYPE_L2_ETHER_QINQ)
+		l2_len += 8;
+	/* Don't verify checksum for packets with discontinuous L2 header */
+	if (unlikely(l2_len + sizeof(struct ipv4_hdr) >
+		     rte_pktmbuf_data_len(mbuf)))
+		return;
+	l3_hdr = rte_pktmbuf_mtod_offset(mbuf, void *, l2_len);
+	if (l3 == RTE_PTYPE_L3_IPV4 || l3 == RTE_PTYPE_L3_IPV4_EXT) {
+		struct ipv4_hdr *iph = l3_hdr;
+
+		/* ihl contains the number of 4-byte words in the header */
+		l3_len = 4 * (iph->version_ihl & 0xf);
+		if (unlikely(l2_len + l3_len > rte_pktmbuf_data_len(mbuf)))
+			return;
+
+		cksum = ~rte_raw_cksum(iph, l3_len);
+		mbuf->ol_flags |= cksum ?
+			PKT_RX_IP_CKSUM_BAD :
+			PKT_RX_IP_CKSUM_GOOD;
+	} else if (l3 == RTE_PTYPE_L3_IPV6) {
+		l3_len = sizeof(struct ipv6_hdr);
+	} else {
+		/* IPv6 extensions are not supported */
+		return;
+	}
+	if (l4 == RTE_PTYPE_L4_UDP || l4 == RTE_PTYPE_L4_TCP) {
+		l4_hdr = rte_pktmbuf_mtod_offset(mbuf, void *, l2_len + l3_len);
+		/* Don't verify checksum for multi-segment packets. */
+		if (mbuf->nb_segs > 1)
+			return;
+		if (l3 == RTE_PTYPE_L3_IPV4)
+			cksum = ~rte_ipv4_udptcp_cksum(l3_hdr, l4_hdr);
+		else if (l3 == RTE_PTYPE_L3_IPV6)
+			cksum = ~rte_ipv6_udptcp_cksum(l3_hdr, l4_hdr);
+		mbuf->ol_flags |= cksum ?
+			PKT_RX_L4_CKSUM_BAD :
+			PKT_RX_L4_CKSUM_GOOD;
+	}
+}
+
 /* Callback to handle the rx burst of packets to the correct interface and
  * file descriptor(s) in a multi-queue setup.
  */
@@ -309,6 +365,8 @@ pmd_rx_burst(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
 		seg->next = NULL;
 		mbuf->packet_type = rte_net_get_ptype(mbuf, NULL,
 						      RTE_PTYPE_ALL_MASK);
+		if (rxq->rxmode->hw_ip_checksum)
+			tap_verify_csum(mbuf);
 
 		/* account for the receive frame */
 		bufs[num_rx++] = mbuf;
@@ -321,6 +379,57 @@ pmd_rx_burst(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
 	return num_rx;
 }
 
+static void
+tap_tx_offload(char *packet, uint64_t ol_flags, unsigned int l2_len,
+	       unsigned int l3_len)
+{
+	void *l3_hdr = packet + l2_len;
+
+	if (ol_flags & (PKT_TX_IP_CKSUM | PKT_TX_IPV4)) {
+		struct ipv4_hdr *iph = l3_hdr;
+		uint16_t cksum;
+
+		iph->hdr_checksum = 0;
+		cksum = rte_raw_cksum(iph, l3_len);
+		iph->hdr_checksum = (cksum == 0xffff) ? cksum : ~cksum;
+	}
+	if (ol_flags & PKT_TX_L4_MASK) {
+		uint16_t l4_len;
+		uint32_t cksum;
+		uint16_t *l4_cksum;
+		void *l4_hdr;
+
+		l4_hdr = packet + l2_len + l3_len;
+		if ((ol_flags & PKT_TX_L4_MASK) == PKT_TX_UDP_CKSUM)
+			l4_cksum = &((struct udp_hdr *)l4_hdr)->dgram_cksum;
+		else if ((ol_flags & PKT_TX_L4_MASK) == PKT_TX_TCP_CKSUM)
+			l4_cksum = &((struct tcp_hdr *)l4_hdr)->cksum;
+		else
+			return;
+		*l4_cksum = 0;
+		if (ol_flags & PKT_TX_IPV4) {
+			struct ipv4_hdr *iph = l3_hdr;
+
+			l4_len = rte_be_to_cpu_16(iph->total_length) - l3_len;
+			cksum = rte_ipv4_phdr_cksum(l3_hdr, 0);
+		} else {
+			struct ipv6_hdr *ip6h = l3_hdr;
+
+			/* payload_len does not include ext headers */
+			l4_len = rte_be_to_cpu_16(ip6h->payload_len) -
+				l3_len + sizeof(struct ipv6_hdr);
+			cksum = rte_ipv6_phdr_cksum(l3_hdr, 0);
+		}
+		cksum += rte_raw_cksum(l4_hdr, l4_len);
+		cksum = ((cksum & 0xffff0000) >> 16) + (cksum & 0xffff);
+		cksum = (~cksum) & 0xffff;
+		if (cksum == 0)
+			cksum = 0xffff;
+		*l4_cksum = cksum;
+	}
+	return;
+}
+
 /* Callback to handle sending packets from the tap interface
  */
 static uint16_t
@@ -341,6 +450,7 @@ pmd_tx_burst(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
 		struct iovec iovecs[mbuf->nb_segs + 1];
 		struct tun_pi pi = { .flags = 0 };
 		struct rte_mbuf *seg = mbuf;
+		char m_copy[mbuf->data_len];
 		int n;
 		int j;
 
@@ -356,6 +466,19 @@ pmd_tx_burst(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
 				rte_pktmbuf_mtod(seg, void *);
 			seg = seg->next;
 		}
+		if (mbuf->ol_flags & (PKT_TX_IP_CKSUM | PKT_TX_IPV4) ||
+		    (mbuf->ol_flags & PKT_TX_L4_MASK) == PKT_TX_UDP_CKSUM ||
+		    (mbuf->ol_flags & PKT_TX_L4_MASK) == PKT_TX_TCP_CKSUM) {
+			/* Support only packets with all data in the same seg */
+			if (mbuf->nb_segs > 1)
+				break;
+			/* To change checksums, work on a copy of data. */
+			rte_memcpy(m_copy, rte_pktmbuf_mtod(mbuf, void *),
+				   rte_pktmbuf_data_len(mbuf));
+			tap_tx_offload(m_copy, mbuf->ol_flags,
+				       mbuf->l2_len, mbuf->l3_len);
+			iovecs[1].iov_base = m_copy;
+		}
 		/* copy the tx frame data */
 		n = writev(txq->fd, iovecs, mbuf->nb_segs + 1);
 		if (n <= 0)
@@ -533,6 +656,13 @@ tap_dev_info(struct rte_eth_dev *dev, struct rte_eth_dev_info *dev_info)
 	dev_info->min_rx_bufsize = 0;
 	dev_info->pci_dev = NULL;
 	dev_info->speed_capa = tap_dev_speed_capa();
+	dev_info->rx_offload_capa = (DEV_RX_OFFLOAD_IPV4_CKSUM |
+				     DEV_RX_OFFLOAD_UDP_CKSUM |
+				     DEV_RX_OFFLOAD_TCP_CKSUM);
+	dev_info->tx_offload_capa =
+		(DEV_TX_OFFLOAD_IPV4_CKSUM |
+		 DEV_TX_OFFLOAD_UDP_CKSUM |
+		 DEV_TX_OFFLOAD_TCP_CKSUM);
 }
 
 static void
-- 
2.12.0.306.g4a9b9b3



More information about the dev mailing list