[dpdk-dev] [PATCH v3 1/6] ethdev: add Tx preparation

Tomasz Kulasek tomaszx.kulasek at intel.com
Wed Sep 28 13:10:47 CEST 2016


Added API for `rte_eth_tx_prep`

uint16_t rte_eth_tx_prep(uint8_t port_id, uint16_t queue_id,
	struct rte_mbuf **tx_pkts, uint16_t nb_pkts)

Added fields to the `struct rte_eth_desc_lim`:

	uint16_t nb_seg_max;
		/**< Max number of segments per whole packet. */

	uint16_t nb_mtu_seg_max;
		/**< Max number of segments per one MTU */

Created `rte_pkt.h` header with common used functions:

int rte_validate_tx_offload(struct rte_mbuf *m)
	to validate general requirements for tx offload in packet such a
	flag completness. In current implementation this function is called
	optionaly when RTE_LIBRTE_ETHDEV_DEBUG is enabled.

int rte_phdr_cksum_fix(struct rte_mbuf *m)
	to fix pseudo header checksum for TSO and non-TSO tcp/udp packets
	before hardware tx checksum offload.
	 - for non-TSO tcp/udp packets full pseudo-header checksum is
	   counted and set.
	 - for TSO the IP payload length is not included.

Signed-off-by: Tomasz Kulasek <tomaszx.kulasek at intel.com>
---
 config/common_base            |    1 +
 lib/librte_ether/rte_ethdev.h |   85 ++++++++++++++++++++++++++
 lib/librte_mbuf/rte_mbuf.h    |    8 +++
 lib/librte_net/Makefile       |    2 +-
 lib/librte_net/rte_pkt.h      |  133 +++++++++++++++++++++++++++++++++++++++++
 5 files changed, 228 insertions(+), 1 deletion(-)
 create mode 100644 lib/librte_net/rte_pkt.h

diff --git a/config/common_base b/config/common_base
index 7830535..7ada9e0 100644
--- a/config/common_base
+++ b/config/common_base
@@ -120,6 +120,7 @@ CONFIG_RTE_MAX_QUEUES_PER_PORT=1024
 CONFIG_RTE_LIBRTE_IEEE1588=n
 CONFIG_RTE_ETHDEV_QUEUE_STAT_CNTRS=16
 CONFIG_RTE_ETHDEV_RXTX_CALLBACKS=y
+CONFIG_RTE_ETHDEV_TX_PREP=y
 
 #
 # Support NIC bypass logic
diff --git a/lib/librte_ether/rte_ethdev.h b/lib/librte_ether/rte_ethdev.h
index 96575e8..6594544 100644
--- a/lib/librte_ether/rte_ethdev.h
+++ b/lib/librte_ether/rte_ethdev.h
@@ -182,6 +182,7 @@ extern "C" {
 #include <rte_pci.h>
 #include <rte_dev.h>
 #include <rte_devargs.h>
+#include <rte_errno.h>
 #include "rte_ether.h"
 #include "rte_eth_ctrl.h"
 #include "rte_dev_info.h"
@@ -699,6 +700,8 @@ struct rte_eth_desc_lim {
 	uint16_t nb_max;   /**< Max allowed number of descriptors. */
 	uint16_t nb_min;   /**< Min allowed number of descriptors. */
 	uint16_t nb_align; /**< Number of descriptors should be aligned to. */
+	uint16_t nb_seg_max;     /**< Max number of segments per whole packet. */
+	uint16_t nb_mtu_seg_max; /**< Max number of segments per one MTU */
 };
 
 /**
@@ -1184,6 +1187,11 @@ typedef uint16_t (*eth_tx_burst_t)(void *txq,
 				   uint16_t nb_pkts);
 /**< @internal Send output packets on a transmit queue of an Ethernet device. */
 
+typedef uint16_t (*eth_tx_prep_t)(void *txq,
+				   struct rte_mbuf **tx_pkts,
+				   uint16_t nb_pkts);
+/**< @internal Prepare output packets on a transmit queue of an Ethernet device. */
+
 typedef int (*flow_ctrl_get_t)(struct rte_eth_dev *dev,
 			       struct rte_eth_fc_conf *fc_conf);
 /**< @internal Get current flow control parameter on an Ethernet device */
@@ -1629,6 +1637,7 @@ enum rte_eth_dev_type {
 struct rte_eth_dev {
 	eth_rx_burst_t rx_pkt_burst; /**< Pointer to PMD receive function. */
 	eth_tx_burst_t tx_pkt_burst; /**< Pointer to PMD transmit function. */
+	eth_tx_prep_t tx_pkt_prep; /**< Pointer to PMD transmit prepare function. */
 	struct rte_eth_dev_data *data;  /**< Pointer to device data */
 	const struct eth_driver *driver;/**< Driver for this device */
 	const struct eth_dev_ops *dev_ops; /**< Functions exported by PMD */
@@ -2837,6 +2846,82 @@ rte_eth_tx_burst(uint8_t port_id, uint16_t queue_id,
 	return (*dev->tx_pkt_burst)(dev->data->tx_queues[queue_id], tx_pkts, nb_pkts);
 }
 
+/**
+ * Process a burst of output packets on a transmit queue of an Ethernet device.
+ *
+ * The rte_eth_tx_prep() function is invoked to prepare output packets to be
+ * transmitted on the output queue *queue_id* of the Ethernet device designated
+ * by its *port_id*.
+ * The *nb_pkts* parameter is the number of packets to be prepared which are
+ * supplied in the *tx_pkts* array of *rte_mbuf* structures, each of them
+ * allocated from a pool created with rte_pktmbuf_pool_create().
+ * For each packet to send, the rte_eth_tx_prep() function performs
+ * the following operations:
+ *
+ * - Check if packet meets devices requirements for tx offloads.
+ *
+ * - Check limitations about number of segments.
+ *
+ * - Check additional requirements when debug is enabled.
+ *
+ * - Update and/or reset required checksums when tx offload is set for packet.
+ *
+ * The rte_eth_tx_prep() function returns the number of packets ready to be
+ * sent. A return value equal to *nb_pkts* means that all packets are valid and
+ * ready to be sent.
+ *
+ * @param port_id
+ *   The port identifier of the Ethernet device.
+ * @param queue_id
+ *   The index of the transmit queue through which output packets must be
+ *   sent.
+ *   The value must be in the range [0, nb_tx_queue - 1] previously supplied
+ *   to rte_eth_dev_configure().
+ * @param tx_pkts
+ *   The address of an array of *nb_pkts* pointers to *rte_mbuf* structures
+ *   which contain the output packets.
+ * @param nb_pkts
+ *   The maximum number of packets to process.
+ * @return
+ *   The number of packets correct and ready to be sent. The return value can be
+ *   less than the value of the *tx_pkts* parameter when some packet doesn't
+ *   meet devices requirements with rte_errno set appropriately.
+ */
+
+#ifdef RTE_ETHDEV_TX_PREP
+
+static inline uint16_t
+rte_eth_tx_prep(uint8_t port_id, uint16_t queue_id, struct rte_mbuf **tx_pkts,
+		uint16_t nb_pkts)
+{
+	struct rte_eth_dev *dev = &rte_eth_devices[port_id];
+
+	if (!dev->tx_pkt_prep)
+		return nb_pkts;
+
+#ifdef RTE_LIBRTE_ETHDEV_DEBUG
+	if (queue_id >= dev->data->nb_tx_queues) {
+		RTE_PMD_DEBUG_TRACE("Invalid TX queue_id=%d\n", queue_id);
+		rte_errno = -EINVAL;
+		return 0;
+	}
+#endif
+
+	return (*dev->tx_pkt_prep)(dev->data->tx_queues[queue_id],
+			tx_pkts, nb_pkts);
+}
+
+#else
+
+static inline uint16_t
+rte_eth_tx_prep(uint8_t port_id __rte_unused, uint16_t queue_id __rte_unused,
+		struct rte_mbuf **tx_pkts __rte_unused, uint16_t nb_pkts)
+{
+	return nb_pkts;
+}
+
+#endif
+
 typedef void (*buffer_tx_error_fn)(struct rte_mbuf **unsent, uint16_t count,
 		void *userdata);
 
diff --git a/lib/librte_mbuf/rte_mbuf.h b/lib/librte_mbuf/rte_mbuf.h
index 23b7bf8..8b73261 100644
--- a/lib/librte_mbuf/rte_mbuf.h
+++ b/lib/librte_mbuf/rte_mbuf.h
@@ -211,6 +211,14 @@ extern "C" {
  */
 #define PKT_TX_OUTER_IPV4   (1ULL << 59)
 
+#define PKT_TX_OFFLOAD_MASK (    \
+		PKT_TX_IP_CKSUM |        \
+		PKT_TX_L4_MASK |         \
+		PKT_TX_OUTER_IP_CKSUM |  \
+		PKT_TX_TCP_SEG |         \
+		PKT_TX_QINQ_PKT |        \
+		PKT_TX_VLAN_PKT)
+
 /**
  * Packet outer header is IPv6. This flag must be set when using any
  * outer offload feature (L4 checksum) to tell the NIC that the outer
diff --git a/lib/librte_net/Makefile b/lib/librte_net/Makefile
index ad2e482..b5abe84 100644
--- a/lib/librte_net/Makefile
+++ b/lib/librte_net/Makefile
@@ -34,7 +34,7 @@ include $(RTE_SDK)/mk/rte.vars.mk
 CFLAGS += $(WERROR_FLAGS) -I$(SRCDIR) -O3
 
 # install includes
-SYMLINK-$(CONFIG_RTE_LIBRTE_NET)-include := rte_ip.h rte_tcp.h rte_udp.h rte_sctp.h rte_icmp.h rte_arp.h
+SYMLINK-$(CONFIG_RTE_LIBRTE_NET)-include := rte_ip.h rte_tcp.h rte_udp.h rte_sctp.h rte_icmp.h rte_arp.h rte_pkt.h
 
 
 include $(RTE_SDK)/mk/rte.install.mk
diff --git a/lib/librte_net/rte_pkt.h b/lib/librte_net/rte_pkt.h
new file mode 100644
index 0000000..72903ac
--- /dev/null
+++ b/lib/librte_net/rte_pkt.h
@@ -0,0 +1,133 @@
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright(c) 2016 Intel Corporation. All rights reserved.
+ *   All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of Intel Corporation nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef _RTE_PKT_H_
+#define _RTE_PKT_H_
+
+#include <rte_ip.h>
+#include <rte_udp.h>
+#include <rte_tcp.h>
+#include <rte_sctp.h>
+
+/**
+ * Validate general requirements for tx offload in packet.
+ */
+static inline int
+rte_validate_tx_offload(struct rte_mbuf *m)
+{
+	uint64_t ol_flags = m->ol_flags;
+
+	/* Does packet set any of available offloads? */
+	if (!(ol_flags & PKT_TX_OFFLOAD_MASK))
+		return 0;
+
+	/* IP checksum can be counted only for IPv4 packet */
+	if ((ol_flags & PKT_TX_IP_CKSUM) && (ol_flags & PKT_TX_IPV6))
+		return -EINVAL;
+
+	if (ol_flags & (PKT_TX_L4_MASK | PKT_TX_TCP_SEG))
+		/* IP type not set */
+		if (!(ol_flags & (PKT_TX_IPV4 | PKT_TX_IPV6)))
+			return -EINVAL;
+
+	if (ol_flags & PKT_TX_TCP_SEG)
+		/* PKT_TX_IP_CKSUM offload not set for IPv4 TSO packet */
+		if ((m->tso_segsz == 0) ||
+				((ol_flags & PKT_TX_IPV4) && !(ol_flags & PKT_TX_IP_CKSUM)))
+			return -EINVAL;
+
+	/* PKT_TX_OUTER_IP_CKSUM set for non outer IPv4 packet. */
+	if ((ol_flags & PKT_TX_OUTER_IP_CKSUM) && !(ol_flags & PKT_TX_OUTER_IPV4))
+		return -EINVAL;
+
+	return 0;
+}
+
+/**
+ * Fix pseudo header checksum for TSO and non-TSO tcp/udp packets before
+ * hardware tx checksum.
+ * For non-TSO tcp/udp packets full pseudo-header checksum is counted and set.
+ * For TSO the IP payload length is not included.
+ */
+static inline int
+rte_phdr_cksum_fix(struct rte_mbuf *m)
+{
+	struct ipv4_hdr *ipv4_hdr;
+	struct ipv6_hdr *ipv6_hdr;
+	struct tcp_hdr *tcp_hdr;
+	struct udp_hdr *udp_hdr;
+	uint64_t inner_l3_offset = m->l2_len;
+
+	if (m->ol_flags & PKT_TX_OUTER_IP_CKSUM)
+		inner_l3_offset += m->outer_l2_len + m->outer_l3_len;
+
+	if (m->ol_flags & PKT_TX_IPV4) {
+		ipv4_hdr = rte_pktmbuf_mtod_offset(m, struct ipv4_hdr *,
+				inner_l3_offset);
+
+		if (m->ol_flags & PKT_TX_IP_CKSUM)
+			ipv4_hdr->hdr_checksum = 0;
+
+		if ((m->ol_flags & PKT_TX_UDP_CKSUM) == PKT_TX_UDP_CKSUM) {
+			/* non-TSO udp */
+			udp_hdr = rte_pktmbuf_mtod_offset(m, struct udp_hdr *,
+					inner_l3_offset + m->l3_len);
+			udp_hdr->dgram_cksum = rte_ipv4_phdr_cksum(ipv4_hdr, m->ol_flags);
+		} else if ((m->ol_flags & PKT_TX_TCP_CKSUM) ||
+				(m->ol_flags & PKT_TX_TCP_SEG)) {
+			/* non-TSO tcp or TSO */
+			tcp_hdr = rte_pktmbuf_mtod_offset(m, struct tcp_hdr *,
+					inner_l3_offset + m->l3_len);
+			tcp_hdr->cksum = rte_ipv4_phdr_cksum(ipv4_hdr, m->ol_flags);
+		}
+	} else if (m->ol_flags & PKT_TX_IPV6) {
+		ipv6_hdr = rte_pktmbuf_mtod_offset(m, struct ipv6_hdr *,
+				inner_l3_offset);
+
+		if ((m->ol_flags & PKT_TX_UDP_CKSUM) == PKT_TX_UDP_CKSUM) {
+			/* non-TSO udp */
+			udp_hdr = rte_pktmbuf_mtod_offset(m, struct udp_hdr *,
+					inner_l3_offset + m->l3_len);
+			udp_hdr->dgram_cksum = rte_ipv6_phdr_cksum(ipv6_hdr, m->ol_flags);
+		} else if ((m->ol_flags & PKT_TX_TCP_CKSUM) ||
+				(m->ol_flags & PKT_TX_TCP_SEG)) {
+			/* non-TSO tcp or TSO */
+			tcp_hdr = rte_pktmbuf_mtod_offset(m, struct tcp_hdr *,
+					inner_l3_offset + m->l3_len);
+			tcp_hdr->cksum = rte_ipv6_phdr_cksum(ipv6_hdr, m->ol_flags);
+		}
+	}
+	return 0;
+}
+
+#endif /* _RTE_PKT_H_ */
-- 
1.7.9.5



More information about the dev mailing list