[dpdk-dev] [PATCH v3 1/6] ethdev: add Tx preparation
Tomasz Kulasek
tomaszx.kulasek at intel.com
Wed Sep 28 13:10:47 CEST 2016
Added API for `rte_eth_tx_prep`
uint16_t rte_eth_tx_prep(uint8_t port_id, uint16_t queue_id,
struct rte_mbuf **tx_pkts, uint16_t nb_pkts)
Added fields to the `struct rte_eth_desc_lim`:
uint16_t nb_seg_max;
/**< Max number of segments per whole packet. */
uint16_t nb_mtu_seg_max;
/**< Max number of segments per one MTU */
Created `rte_pkt.h` header with common used functions:
int rte_validate_tx_offload(struct rte_mbuf *m)
to validate general requirements for tx offload in packet such a
flag completness. In current implementation this function is called
optionaly when RTE_LIBRTE_ETHDEV_DEBUG is enabled.
int rte_phdr_cksum_fix(struct rte_mbuf *m)
to fix pseudo header checksum for TSO and non-TSO tcp/udp packets
before hardware tx checksum offload.
- for non-TSO tcp/udp packets full pseudo-header checksum is
counted and set.
- for TSO the IP payload length is not included.
Signed-off-by: Tomasz Kulasek <tomaszx.kulasek at intel.com>
---
config/common_base | 1 +
lib/librte_ether/rte_ethdev.h | 85 ++++++++++++++++++++++++++
lib/librte_mbuf/rte_mbuf.h | 8 +++
lib/librte_net/Makefile | 2 +-
lib/librte_net/rte_pkt.h | 133 +++++++++++++++++++++++++++++++++++++++++
5 files changed, 228 insertions(+), 1 deletion(-)
create mode 100644 lib/librte_net/rte_pkt.h
diff --git a/config/common_base b/config/common_base
index 7830535..7ada9e0 100644
--- a/config/common_base
+++ b/config/common_base
@@ -120,6 +120,7 @@ CONFIG_RTE_MAX_QUEUES_PER_PORT=1024
CONFIG_RTE_LIBRTE_IEEE1588=n
CONFIG_RTE_ETHDEV_QUEUE_STAT_CNTRS=16
CONFIG_RTE_ETHDEV_RXTX_CALLBACKS=y
+CONFIG_RTE_ETHDEV_TX_PREP=y
#
# Support NIC bypass logic
diff --git a/lib/librte_ether/rte_ethdev.h b/lib/librte_ether/rte_ethdev.h
index 96575e8..6594544 100644
--- a/lib/librte_ether/rte_ethdev.h
+++ b/lib/librte_ether/rte_ethdev.h
@@ -182,6 +182,7 @@ extern "C" {
#include <rte_pci.h>
#include <rte_dev.h>
#include <rte_devargs.h>
+#include <rte_errno.h>
#include "rte_ether.h"
#include "rte_eth_ctrl.h"
#include "rte_dev_info.h"
@@ -699,6 +700,8 @@ struct rte_eth_desc_lim {
uint16_t nb_max; /**< Max allowed number of descriptors. */
uint16_t nb_min; /**< Min allowed number of descriptors. */
uint16_t nb_align; /**< Number of descriptors should be aligned to. */
+ uint16_t nb_seg_max; /**< Max number of segments per whole packet. */
+ uint16_t nb_mtu_seg_max; /**< Max number of segments per one MTU */
};
/**
@@ -1184,6 +1187,11 @@ typedef uint16_t (*eth_tx_burst_t)(void *txq,
uint16_t nb_pkts);
/**< @internal Send output packets on a transmit queue of an Ethernet device. */
+typedef uint16_t (*eth_tx_prep_t)(void *txq,
+ struct rte_mbuf **tx_pkts,
+ uint16_t nb_pkts);
+/**< @internal Prepare output packets on a transmit queue of an Ethernet device. */
+
typedef int (*flow_ctrl_get_t)(struct rte_eth_dev *dev,
struct rte_eth_fc_conf *fc_conf);
/**< @internal Get current flow control parameter on an Ethernet device */
@@ -1629,6 +1637,7 @@ enum rte_eth_dev_type {
struct rte_eth_dev {
eth_rx_burst_t rx_pkt_burst; /**< Pointer to PMD receive function. */
eth_tx_burst_t tx_pkt_burst; /**< Pointer to PMD transmit function. */
+ eth_tx_prep_t tx_pkt_prep; /**< Pointer to PMD transmit prepare function. */
struct rte_eth_dev_data *data; /**< Pointer to device data */
const struct eth_driver *driver;/**< Driver for this device */
const struct eth_dev_ops *dev_ops; /**< Functions exported by PMD */
@@ -2837,6 +2846,82 @@ rte_eth_tx_burst(uint8_t port_id, uint16_t queue_id,
return (*dev->tx_pkt_burst)(dev->data->tx_queues[queue_id], tx_pkts, nb_pkts);
}
+/**
+ * Process a burst of output packets on a transmit queue of an Ethernet device.
+ *
+ * The rte_eth_tx_prep() function is invoked to prepare output packets to be
+ * transmitted on the output queue *queue_id* of the Ethernet device designated
+ * by its *port_id*.
+ * The *nb_pkts* parameter is the number of packets to be prepared which are
+ * supplied in the *tx_pkts* array of *rte_mbuf* structures, each of them
+ * allocated from a pool created with rte_pktmbuf_pool_create().
+ * For each packet to send, the rte_eth_tx_prep() function performs
+ * the following operations:
+ *
+ * - Check if packet meets devices requirements for tx offloads.
+ *
+ * - Check limitations about number of segments.
+ *
+ * - Check additional requirements when debug is enabled.
+ *
+ * - Update and/or reset required checksums when tx offload is set for packet.
+ *
+ * The rte_eth_tx_prep() function returns the number of packets ready to be
+ * sent. A return value equal to *nb_pkts* means that all packets are valid and
+ * ready to be sent.
+ *
+ * @param port_id
+ * The port identifier of the Ethernet device.
+ * @param queue_id
+ * The index of the transmit queue through which output packets must be
+ * sent.
+ * The value must be in the range [0, nb_tx_queue - 1] previously supplied
+ * to rte_eth_dev_configure().
+ * @param tx_pkts
+ * The address of an array of *nb_pkts* pointers to *rte_mbuf* structures
+ * which contain the output packets.
+ * @param nb_pkts
+ * The maximum number of packets to process.
+ * @return
+ * The number of packets correct and ready to be sent. The return value can be
+ * less than the value of the *tx_pkts* parameter when some packet doesn't
+ * meet devices requirements with rte_errno set appropriately.
+ */
+
+#ifdef RTE_ETHDEV_TX_PREP
+
+static inline uint16_t
+rte_eth_tx_prep(uint8_t port_id, uint16_t queue_id, struct rte_mbuf **tx_pkts,
+ uint16_t nb_pkts)
+{
+ struct rte_eth_dev *dev = &rte_eth_devices[port_id];
+
+ if (!dev->tx_pkt_prep)
+ return nb_pkts;
+
+#ifdef RTE_LIBRTE_ETHDEV_DEBUG
+ if (queue_id >= dev->data->nb_tx_queues) {
+ RTE_PMD_DEBUG_TRACE("Invalid TX queue_id=%d\n", queue_id);
+ rte_errno = -EINVAL;
+ return 0;
+ }
+#endif
+
+ return (*dev->tx_pkt_prep)(dev->data->tx_queues[queue_id],
+ tx_pkts, nb_pkts);
+}
+
+#else
+
+static inline uint16_t
+rte_eth_tx_prep(uint8_t port_id __rte_unused, uint16_t queue_id __rte_unused,
+ struct rte_mbuf **tx_pkts __rte_unused, uint16_t nb_pkts)
+{
+ return nb_pkts;
+}
+
+#endif
+
typedef void (*buffer_tx_error_fn)(struct rte_mbuf **unsent, uint16_t count,
void *userdata);
diff --git a/lib/librte_mbuf/rte_mbuf.h b/lib/librte_mbuf/rte_mbuf.h
index 23b7bf8..8b73261 100644
--- a/lib/librte_mbuf/rte_mbuf.h
+++ b/lib/librte_mbuf/rte_mbuf.h
@@ -211,6 +211,14 @@ extern "C" {
*/
#define PKT_TX_OUTER_IPV4 (1ULL << 59)
+#define PKT_TX_OFFLOAD_MASK ( \
+ PKT_TX_IP_CKSUM | \
+ PKT_TX_L4_MASK | \
+ PKT_TX_OUTER_IP_CKSUM | \
+ PKT_TX_TCP_SEG | \
+ PKT_TX_QINQ_PKT | \
+ PKT_TX_VLAN_PKT)
+
/**
* Packet outer header is IPv6. This flag must be set when using any
* outer offload feature (L4 checksum) to tell the NIC that the outer
diff --git a/lib/librte_net/Makefile b/lib/librte_net/Makefile
index ad2e482..b5abe84 100644
--- a/lib/librte_net/Makefile
+++ b/lib/librte_net/Makefile
@@ -34,7 +34,7 @@ include $(RTE_SDK)/mk/rte.vars.mk
CFLAGS += $(WERROR_FLAGS) -I$(SRCDIR) -O3
# install includes
-SYMLINK-$(CONFIG_RTE_LIBRTE_NET)-include := rte_ip.h rte_tcp.h rte_udp.h rte_sctp.h rte_icmp.h rte_arp.h
+SYMLINK-$(CONFIG_RTE_LIBRTE_NET)-include := rte_ip.h rte_tcp.h rte_udp.h rte_sctp.h rte_icmp.h rte_arp.h rte_pkt.h
include $(RTE_SDK)/mk/rte.install.mk
diff --git a/lib/librte_net/rte_pkt.h b/lib/librte_net/rte_pkt.h
new file mode 100644
index 0000000..72903ac
--- /dev/null
+++ b/lib/librte_net/rte_pkt.h
@@ -0,0 +1,133 @@
+/*-
+ * BSD LICENSE
+ *
+ * Copyright(c) 2016 Intel Corporation. All rights reserved.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef _RTE_PKT_H_
+#define _RTE_PKT_H_
+
+#include <rte_ip.h>
+#include <rte_udp.h>
+#include <rte_tcp.h>
+#include <rte_sctp.h>
+
+/**
+ * Validate general requirements for tx offload in packet.
+ */
+static inline int
+rte_validate_tx_offload(struct rte_mbuf *m)
+{
+ uint64_t ol_flags = m->ol_flags;
+
+ /* Does packet set any of available offloads? */
+ if (!(ol_flags & PKT_TX_OFFLOAD_MASK))
+ return 0;
+
+ /* IP checksum can be counted only for IPv4 packet */
+ if ((ol_flags & PKT_TX_IP_CKSUM) && (ol_flags & PKT_TX_IPV6))
+ return -EINVAL;
+
+ if (ol_flags & (PKT_TX_L4_MASK | PKT_TX_TCP_SEG))
+ /* IP type not set */
+ if (!(ol_flags & (PKT_TX_IPV4 | PKT_TX_IPV6)))
+ return -EINVAL;
+
+ if (ol_flags & PKT_TX_TCP_SEG)
+ /* PKT_TX_IP_CKSUM offload not set for IPv4 TSO packet */
+ if ((m->tso_segsz == 0) ||
+ ((ol_flags & PKT_TX_IPV4) && !(ol_flags & PKT_TX_IP_CKSUM)))
+ return -EINVAL;
+
+ /* PKT_TX_OUTER_IP_CKSUM set for non outer IPv4 packet. */
+ if ((ol_flags & PKT_TX_OUTER_IP_CKSUM) && !(ol_flags & PKT_TX_OUTER_IPV4))
+ return -EINVAL;
+
+ return 0;
+}
+
+/**
+ * Fix pseudo header checksum for TSO and non-TSO tcp/udp packets before
+ * hardware tx checksum.
+ * For non-TSO tcp/udp packets full pseudo-header checksum is counted and set.
+ * For TSO the IP payload length is not included.
+ */
+static inline int
+rte_phdr_cksum_fix(struct rte_mbuf *m)
+{
+ struct ipv4_hdr *ipv4_hdr;
+ struct ipv6_hdr *ipv6_hdr;
+ struct tcp_hdr *tcp_hdr;
+ struct udp_hdr *udp_hdr;
+ uint64_t inner_l3_offset = m->l2_len;
+
+ if (m->ol_flags & PKT_TX_OUTER_IP_CKSUM)
+ inner_l3_offset += m->outer_l2_len + m->outer_l3_len;
+
+ if (m->ol_flags & PKT_TX_IPV4) {
+ ipv4_hdr = rte_pktmbuf_mtod_offset(m, struct ipv4_hdr *,
+ inner_l3_offset);
+
+ if (m->ol_flags & PKT_TX_IP_CKSUM)
+ ipv4_hdr->hdr_checksum = 0;
+
+ if ((m->ol_flags & PKT_TX_UDP_CKSUM) == PKT_TX_UDP_CKSUM) {
+ /* non-TSO udp */
+ udp_hdr = rte_pktmbuf_mtod_offset(m, struct udp_hdr *,
+ inner_l3_offset + m->l3_len);
+ udp_hdr->dgram_cksum = rte_ipv4_phdr_cksum(ipv4_hdr, m->ol_flags);
+ } else if ((m->ol_flags & PKT_TX_TCP_CKSUM) ||
+ (m->ol_flags & PKT_TX_TCP_SEG)) {
+ /* non-TSO tcp or TSO */
+ tcp_hdr = rte_pktmbuf_mtod_offset(m, struct tcp_hdr *,
+ inner_l3_offset + m->l3_len);
+ tcp_hdr->cksum = rte_ipv4_phdr_cksum(ipv4_hdr, m->ol_flags);
+ }
+ } else if (m->ol_flags & PKT_TX_IPV6) {
+ ipv6_hdr = rte_pktmbuf_mtod_offset(m, struct ipv6_hdr *,
+ inner_l3_offset);
+
+ if ((m->ol_flags & PKT_TX_UDP_CKSUM) == PKT_TX_UDP_CKSUM) {
+ /* non-TSO udp */
+ udp_hdr = rte_pktmbuf_mtod_offset(m, struct udp_hdr *,
+ inner_l3_offset + m->l3_len);
+ udp_hdr->dgram_cksum = rte_ipv6_phdr_cksum(ipv6_hdr, m->ol_flags);
+ } else if ((m->ol_flags & PKT_TX_TCP_CKSUM) ||
+ (m->ol_flags & PKT_TX_TCP_SEG)) {
+ /* non-TSO tcp or TSO */
+ tcp_hdr = rte_pktmbuf_mtod_offset(m, struct tcp_hdr *,
+ inner_l3_offset + m->l3_len);
+ tcp_hdr->cksum = rte_ipv6_phdr_cksum(ipv6_hdr, m->ol_flags);
+ }
+ }
+ return 0;
+}
+
+#endif /* _RTE_PKT_H_ */
--
1.7.9.5
More information about the dev
mailing list