[dpdk-dev] [PATCH RFC 11/11] ixgbe/mbuf: add TSO support

Ananyev, Konstantin konstantin.ananyev at intel.com
Thu May 15 17:09:28 CEST 2014


Hi Oliver,

By design PMD not supposed to touch (or even look) into actual packet's data.
That is one of the reason why we put l2/l3/l4_len fields into the mbuf itself.
Also it seems a bit strange to calculate one pseudo-header checksum in the upper layer and then
Recalculate it again inside testpmd/
So I wonder is it possible to move fix_tcp_phdr_cksum() logic into the upper layer
(testpmd pkt_burst_checksum_forward())?

Thanks
Konstantin

-----Original Message-----
From: dev [mailto:dev-bounces at dpdk.org] On Behalf Of Olivier Matz
Sent: Friday, May 09, 2014 3:51 PM
To: dev at dpdk.org
Subject: [dpdk-dev] [PATCH RFC 11/11] ixgbe/mbuf: add TSO support

Implement TSO (TCP segmentation offload) in ixgbe driver. To delegate
the TCP segmentation to the hardware, the user has to:

- set the PKT_TX_TCP_SEG flag in mbuf->ol_flags (this flag implies
  PKT_TX_IP_CKSUM and PKT_TX_TCP_CKSUM)
- fill the mbuf->hw_offload information: l2_len, l3_len, l4_len, mss
- calculate the pseudo header checksum and set it in the TCP header,
  as required when doing hardware TCP checksum offload
- set the IP checksum to 0

This approach seems generic enough to be used for other hw/drivers
in the future.

In the patch, the tx_desc_cksum_flags_to_olinfo() and
tx_desc_ol_flags_to_cmdtype() functions have been reworked to make them
clearer. This does not impact performance as gcc (version 4.8 in my
case) is smart enough to convert the tests into a code that does not
contain any branch instruction.

validation
==========

platform:

  Tester (linux)   <---->   DUT (DPDK)

Run testpmd on DUT:

  cd dpdk.org/
  make install T=x86_64-default-linuxapp-gcc
  cd x86_64-default-linuxapp-gcc/
  modprobe uio
  insmod kmod/igb_uio.ko
  python ../tools/igb_uio_bind.py -b igb_uio 0000:02:00.0
  echo 0 > /proc/sys/kernel/randomize_va_space
  echo 1000 > /sys/devices/system/node/node0/hugepages/hugepages-2048kB/nr_hugepages
  echo 1000 > /sys/devices/system/node/node1/hugepages/hugepages-2048kB/nr_hugepages
  mount -t hugetlbfs none /mnt/huge
  ./app/testpmd -c 0x55 -n 4 -m 800 -- -i --port-topology=chained

Disable all offload feature on Tester, and start capture:

  ethtool -K ixgbe0 rx off tx off tso off gso off gro off lro off
  ip l set ixgbe0 up
  tcpdump -n -e -i ixgbe0 -s 0 -w /tmp/cap

We use the following scapy script for testing:

  def test():
    ############### IPv4
    # checksum TCP
    p=Ether()/IP(src=RandIP(), dst=RandIP())/TCP(flags=0x10)/Raw(RandString(50))
    sendp(p, iface="ixgbe0", count=5)
    # checksum UDP
    p=Ether()/IP(src=RandIP(), dst=RandIP())/UDP()/Raw(RandString(50))
    sendp(p, iface="ixgbe0", count=5)
    # bad IP checksum
    p=Ether()/IP(src=RandIP(), dst=RandIP(), chksum=0x1234)/TCP(flags=0x10)/Raw(RandString(50))
    sendp(p, iface="ixgbe0", count=5)
    # bad TCP checksum
    p=Ether()/IP(src=RandIP(), dst=RandIP())/TCP(flags=0x10, chksum=0x1234)/Raw(RandString(50))
    sendp(p, iface="ixgbe0", count=5)
    # large packet
    p=Ether()/IP(src=RandIP(), dst=RandIP())/TCP(flags=0x10)/Raw(RandString(1400))
    sendp(p, iface="ixgbe0", count=5)
    ############### IPv6v6
    # checksum TCP
    p=Ether()/IPv6(src=RandIP6(), dst=RandIP6())/TCP(flags=0x10)/Raw(RandString(50))
    sendp(p, iface="ixgbe0", count=5)
    # checksum UDP
    p=Ether()/IPv6(src=RandIP6(), dst=RandIP6())/UDP()/Raw(RandString(50))
    sendp(p, iface="ixgbe0", count=5)
    # bad TCP checksum
    p=Ether()/IPv6(src=RandIP6(), dst=RandIP6())/TCP(flags=0x10, chksum=0x1234)/Raw(RandString(50))
    sendp(p, iface="ixgbe0", count=5)
    # large packet
    p=Ether()/IPv6(src=RandIP6(), dst=RandIP6())/TCP(flags=0x10)/Raw(RandString(1400))
    sendp(p, iface="ixgbe0", count=5)

Without hw cksum
----------------

On DUT:

  # disable hw cksum (use sw) in csumonly test, disable tso
  stop
  set fwd csum
  tx_checksum set 0x0 0
  tso set 0 0
  start

On tester:

  >>> test()

Then check the capture file.

With hw cksum
-------------

On DUT:

  # enable hw cksum in csumonly test, disable tso
  stop
  set fwd csum
  tx_checksum set 0xf 0
  tso set 0 0
  start

On tester:

  >>> test()

Then check the capture file.

With TSO
--------

On DUT:

  set fwd csum
  tx_checksum set 0xf 0
  tso set 800 0
  start

On tester:

  >>> test()

Then check the capture file.

Signed-off-by: Olivier Matz <olivier.matz at 6wind.com>
---
 app/test-pmd/cmdline.c            |  45 +++++++++++
 app/test-pmd/config.c             |   8 ++
 app/test-pmd/csumonly.c           |  16 ++++
 app/test-pmd/testpmd.h            |   2 +
 lib/librte_mbuf/rte_mbuf.h        |   7 +-
 lib/librte_pmd_ixgbe/ixgbe_rxtx.c | 165 ++++++++++++++++++++++++++++----------
 6 files changed, 200 insertions(+), 43 deletions(-)

diff --git a/app/test-pmd/cmdline.c b/app/test-pmd/cmdline.c
index a95b279..c628773 100644
--- a/app/test-pmd/cmdline.c
+++ b/app/test-pmd/cmdline.c
@@ -2305,6 +2305,50 @@ cmdline_parse_inst_t cmd_tx_cksum_set = {
 	},
 };
 
+/* *** ENABLE HARDWARE SEGMENTATION IN TX PACKETS *** */
+struct cmd_tso_set_result {
+	cmdline_fixed_string_t tso;
+	cmdline_fixed_string_t set;
+	uint16_t mss;
+	uint8_t port_id;
+};
+
+static void
+cmd_tso_set_parsed(void *parsed_result,
+		       __attribute__((unused)) struct cmdline *cl,
+		       __attribute__((unused)) void *data)
+{
+	struct cmd_tso_set_result *res = parsed_result;
+	tso_set(res->port_id, res->mss);
+}
+
+cmdline_parse_token_string_t cmd_tso_set_tso =
+	TOKEN_STRING_INITIALIZER(struct cmd_tso_set_result,
+				tso, "tso");
+cmdline_parse_token_string_t cmd_tso_set_set =
+	TOKEN_STRING_INITIALIZER(struct cmd_tso_set_result,
+				set, "set");
+cmdline_parse_token_num_t cmd_tso_set_mss =
+	TOKEN_NUM_INITIALIZER(struct cmd_tso_set_result,
+				mss, UINT16);
+cmdline_parse_token_num_t cmd_tso_set_portid =
+	TOKEN_NUM_INITIALIZER(struct cmd_tso_set_result,
+				port_id, UINT8);
+
+cmdline_parse_inst_t cmd_tso_set = {
+	.f = cmd_tso_set_parsed,
+	.data = NULL,
+	.help_str = "Enable hardware segmentation (set MSS to 0 to disable): "
+	"tso set <MSS> <PORT>",
+	.tokens = {
+		(void *)&cmd_tso_set_tso,
+		(void *)&cmd_tso_set_set,
+		(void *)&cmd_tso_set_mss,
+		(void *)&cmd_tso_set_portid,
+		NULL,
+	},
+};
+
 /* *** ENABLE/DISABLE FLUSH ON RX STREAMS *** */
 struct cmd_set_flush_rx {
 	cmdline_fixed_string_t set;
@@ -5151,6 +5195,7 @@ cmdline_parse_ctx_t main_ctx[] = {
 	(cmdline_parse_inst_t *)&cmd_tx_vlan_set,
 	(cmdline_parse_inst_t *)&cmd_tx_vlan_reset,
 	(cmdline_parse_inst_t *)&cmd_tx_cksum_set,
+	(cmdline_parse_inst_t *)&cmd_tso_set,
 	(cmdline_parse_inst_t *)&cmd_link_flow_control_set,
 	(cmdline_parse_inst_t *)&cmd_priority_flow_control_set,
 	(cmdline_parse_inst_t *)&cmd_config_dcb,
diff --git a/app/test-pmd/config.c b/app/test-pmd/config.c
index cd82f60..a6d749d 100644
--- a/app/test-pmd/config.c
+++ b/app/test-pmd/config.c
@@ -1455,6 +1455,14 @@ tx_cksum_set(portid_t port_id, uint32_t ol_flags)
 }
 
 void
+tso_set(portid_t port_id, uint16_t mss)
+{
+	if (port_id_is_invalid(port_id))
+		return;
+	ports[port_id].tx_mss = mss;
+}
+
+void
 fdir_add_signature_filter(portid_t port_id, uint8_t queue_id,
 			  struct rte_fdir_filter *fdir_filter)
 {
diff --git a/app/test-pmd/csumonly.c b/app/test-pmd/csumonly.c
index e93d75f..9983618 100644
--- a/app/test-pmd/csumonly.c
+++ b/app/test-pmd/csumonly.c
@@ -220,10 +220,12 @@ pkt_burst_checksum_forward(struct fwd_stream *fs)
 	uint32_t ol_flags;
 	uint32_t pkt_ol_flags;
 	uint32_t tx_ol_flags;
+	uint16_t tx_mss;
 	uint16_t l4_proto;
 	uint16_t eth_type;
 	uint8_t  l2_len;
 	uint8_t  l3_len;
+	uint8_t  l4_len;
 
 	uint32_t rx_bad_ip_csum;
 	uint32_t rx_bad_l4_csum;
@@ -255,6 +257,7 @@ pkt_burst_checksum_forward(struct fwd_stream *fs)
 
 	txp = &ports[fs->tx_port];
 	tx_ol_flags = txp->tx_ol_flags;
+	tx_mss = txp->tx_mss;
 
 	for (i = 0; i < nb_rx; i++) {
 
@@ -272,6 +275,7 @@ pkt_burst_checksum_forward(struct fwd_stream *fs)
 				((uintptr_t)&eth_hdr->ether_type +
 				sizeof(struct vlan_hdr)));
 		}
+		l4_len  = 0;
 
 		/* Update the L3/L4 checksum error packet count  */
 		rx_bad_ip_csum += ((pkt_ol_flags & PKT_RX_IP_CKSUM_BAD) != 0);
@@ -347,6 +351,11 @@ pkt_burst_checksum_forward(struct fwd_stream *fs)
 					tcp_hdr->cksum = get_ipv4_udptcp_checksum(ipv4_hdr,
 							(uint16_t*)tcp_hdr);
 				}
+
+				if (tx_mss != 0) {
+					ol_flags |= PKT_TX_TCP_SEG;
+					l4_len = (tcp_hdr->data_off & 0xf0) >> 2;
+				}
 			}
 			else if (l4_proto == IPPROTO_SCTP) {
 				sctp_hdr = (struct sctp_hdr*) (rte_pktmbuf_mtod(mb,
@@ -404,6 +413,11 @@ pkt_burst_checksum_forward(struct fwd_stream *fs)
 					tcp_hdr->cksum = get_ipv6_udptcp_checksum(ipv6_hdr,
 							(uint16_t*)tcp_hdr);
 				}
+
+				if (tx_mss != 0) {
+					ol_flags |= PKT_TX_TCP_SEG;
+					l4_len = (tcp_hdr->data_off & 0xf0) >> 2;
+				}
 			}
 			else if (l4_proto == IPPROTO_SCTP) {
 				sctp_hdr = (struct sctp_hdr*) (rte_pktmbuf_mtod(mb,
@@ -434,6 +448,8 @@ pkt_burst_checksum_forward(struct fwd_stream *fs)
 		/* Combine the packet header write. VLAN is not consider here */
 		mb->hw_offload.l2_len = l2_len;
 		mb->hw_offload.l3_len = l3_len;
+		mb->hw_offload.l4_len = l4_len;
+		mb->hw_offload.mss = tx_mss;
 		mb->ol_flags = ol_flags;
 	}
 	nb_tx = rte_eth_tx_burst(fs->tx_port, fs->tx_queue, pkts_burst, nb_rx);
diff --git a/app/test-pmd/testpmd.h b/app/test-pmd/testpmd.h
index 77dcc30..6f567e7 100644
--- a/app/test-pmd/testpmd.h
+++ b/app/test-pmd/testpmd.h
@@ -134,6 +134,7 @@ struct rte_port {
 	struct fwd_stream       *tx_stream; /**< Port TX stream, if unique */
 	unsigned int            socket_id;  /**< For NUMA support */
 	uint32_t                tx_ol_flags;/**< Offload Flags of TX packets. */
+	uint16_t                tx_mss;     /**< MSS for segmentation offload. */
 	uint16_t                tx_vlan_id; /**< Tag Id. in TX VLAN packets. */
 	void                    *fwd_ctx;   /**< Forwarding mode context */
 	uint64_t                rx_bad_ip_csum; /**< rx pkts with bad ip checksum  */
@@ -480,6 +481,7 @@ void tx_vlan_reset(portid_t port_id);
 void set_qmap(portid_t port_id, uint8_t is_rx, uint16_t queue_id, uint8_t map_value);
 
 void tx_cksum_set(portid_t port_id, uint32_t ol_flags);
+void tso_set(portid_t port_id, uint16_t mss);
 
 void set_verbose_level(uint16_t vb_level);
 void set_tx_pkt_segments(unsigned *seg_lengths, unsigned nb_segs);
diff --git a/lib/librte_mbuf/rte_mbuf.h b/lib/librte_mbuf/rte_mbuf.h
index d71c86c..75298bd 100644
--- a/lib/librte_mbuf/rte_mbuf.h
+++ b/lib/librte_mbuf/rte_mbuf.h
@@ -96,6 +96,7 @@ extern "C" {
 #define PKT_TX_SCTP_CKSUM    0x00080000 /**< SCTP cksum of TX pkt. computed by NIC. */
 #define PKT_TX_UDP_CKSUM     0x000C0000 /**< UDP cksum of TX pkt. computed by NIC. */
 #define PKT_TX_IEEE1588_TMST 0x00100000 /**< TX IEEE1588 packet to timestamp. */
+#define PKT_TX_TCP_SEG       0x00200000 /**< TCP segmentation offload. */
 
 /**
  * Get the name of a RX offload flag
@@ -140,6 +141,7 @@ static inline const char *rte_get_tx_ol_flag_name(uint32_t mask)
 	case PKT_TX_SCTP_CKSUM: return "PKT_TX_SCTP_CKSUM";
 	case PKT_TX_UDP_CKSUM: return "PKT_TX_UDP_CKSUM";
 	case PKT_TX_IEEE1588_TMST: return "PKT_TX_IEEE1588_TMST";
+	case PKT_TX_TCP_SEG: return "PKT_TX_TCP_SEG";
 	default: return NULL;
 	}
 }
@@ -153,11 +155,12 @@ union rte_hw_offload {
 #define HW_OFFLOAD_L4_LEN_MASK 0xff
 		uint32_t l2_len:7; /**< L2 (MAC) Header Length. */
 		uint32_t l3_len:9; /**< L3 (IP) Header Length. */
-		uint32_t reserved:16;
+		uint32_t l4_len:8; /**< L4 (TCP/UDP) Header Length. */
+		uint32_t reserved:8;
 
 		uint16_t vlan_tci;
 		/**< VLAN Tag Control Identifier (CPU order). */
-		uint16_t reserved2;
+		uint16_t mss; /**< Maximum segment size. */
 	};
 };
 
diff --git a/lib/librte_pmd_ixgbe/ixgbe_rxtx.c b/lib/librte_pmd_ixgbe/ixgbe_rxtx.c
index d52482e..75ff16e 100644
--- a/lib/librte_pmd_ixgbe/ixgbe_rxtx.c
+++ b/lib/librte_pmd_ixgbe/ixgbe_rxtx.c
@@ -347,13 +347,59 @@ ixgbe_xmit_pkts_simple(void *tx_queue, struct rte_mbuf **tx_pkts,
 	return nb_tx;
 }
 
+/* When doing TSO, the IP length must not be included in the pseudo
+ * header checksum of the packet given to the hardware */
+static inline void
+ixgbe_fix_tcp_phdr_cksum(struct rte_mbuf *m)
+{
+	char *data;
+	uint16_t *cksum_ptr;
+	uint16_t prev_cksum;
+	uint16_t new_cksum;
+	uint16_t ip_len, ip_paylen;
+	uint32_t tmp;
+	uint8_t ip_version;
+
+	/* get phdr cksum at offset 16 of TCP header */
+	data = rte_pktmbuf_mtod(m, char *);
+	cksum_ptr = (uint16_t *)(data + m->hw_offload.l2_len +
+		m->hw_offload.l3_len + 16);
+	prev_cksum = *cksum_ptr;
+
+	/* get ip_version */
+	ip_version = (*(uint8_t *)(data + m->hw_offload.l2_len)) >> 4;
+
+	/* get ip_len at offset 2 of IP header or offset 4 of IPv6 header */
+	if (ip_version == 4) {
+		/* override ip cksum to 0 */
+		data[m->hw_offload.l2_len + 10] = 0;
+		data[m->hw_offload.l2_len + 11] = 0;
+
+		ip_len = *(uint16_t *)(data + m->hw_offload.l2_len + 2);
+		ip_paylen = rte_cpu_to_be_16(rte_be_to_cpu_16(ip_len) -
+			m->hw_offload.l3_len);
+	} else {
+		ip_paylen = *(uint16_t *)(data + m->hw_offload.l2_len + 4);
+	}
+
+	/* calculate the new phdr checksum that doesn't include ip_paylen */
+	tmp = prev_cksum ^ 0xffff;
+	if (tmp < ip_paylen)
+		tmp += 0xffff;
+	tmp -= ip_paylen;
+	new_cksum = tmp;
+
+	/* replace it in the packet */
+	*cksum_ptr = new_cksum;
+}
+
 static inline void
 ixgbe_set_xmit_ctx(struct igb_tx_queue* txq,
 		volatile struct ixgbe_adv_tx_context_desc *ctx_txd,
 		uint32_t ol_flags, union rte_hw_offload hw_offload)
 {
 	uint32_t type_tucmd_mlhl;
-	uint32_t mss_l4len_idx;
+	uint32_t mss_l4len_idx = 0;
 	uint32_t ctx_idx;
 	uint32_t vlan_macip_lens;
 	union rte_hw_offload offload_mask;
@@ -362,44 +408,61 @@ ixgbe_set_xmit_ctx(struct igb_tx_queue* txq,
 	offload_mask.u64 = 0;
 	type_tucmd_mlhl = 0;
 
+	/* Specify which HW CTX to upload. */
+	mss_l4len_idx |= (ctx_idx << IXGBE_ADVTXD_IDX_SHIFT);
+
 	if (ol_flags & PKT_TX_VLAN_PKT) {
 		offload_mask.vlan_tci = 0xffff;
 	}
 
-	if (ol_flags & PKT_TX_IP_CKSUM) {
-		type_tucmd_mlhl = IXGBE_ADVTXD_TUCMD_IPV4;
+	/* check if TCP segmentation required for this packet */
+	if (ol_flags & PKT_TX_TCP_SEG) {
+		/* implies IP cksum and TCP cksum */
+		type_tucmd_mlhl = IXGBE_ADVTXD_TUCMD_IPV4 |
+			IXGBE_ADVTXD_TUCMD_L4T_TCP |
+			IXGBE_ADVTXD_DTYP_CTXT | IXGBE_ADVTXD_DCMD_DEXT;;
+
 		offload_mask.l2_len = HW_OFFLOAD_L2_LEN_MASK;
 		offload_mask.l3_len = HW_OFFLOAD_L3_LEN_MASK;
-	}
+		offload_mask.l4_len = HW_OFFLOAD_L4_LEN_MASK;
+		offload_mask.mss = 0xffff;
+		mss_l4len_idx |= hw_offload.mss << IXGBE_ADVTXD_MSS_SHIFT;
+		mss_l4len_idx |= hw_offload.l4_len << IXGBE_ADVTXD_L4LEN_SHIFT;
+	} else { /* no TSO, check if hardware checksum is needed */
+		if (ol_flags & PKT_TX_IP_CKSUM) {
+			type_tucmd_mlhl = IXGBE_ADVTXD_TUCMD_IPV4;
+			offload_mask.l2_len = HW_OFFLOAD_L2_LEN_MASK;
+			offload_mask.l3_len = HW_OFFLOAD_L3_LEN_MASK;
+		}
 
-	/* Specify which HW CTX to upload. */
-	mss_l4len_idx = (ctx_idx << IXGBE_ADVTXD_IDX_SHIFT);
-	switch (ol_flags & PKT_TX_L4_MASK) {
-	case PKT_TX_UDP_CKSUM:
-		type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_L4T_UDP |
+		switch (ol_flags & PKT_TX_L4_MASK) {
+		case PKT_TX_UDP_CKSUM:
+			type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_L4T_UDP |
 				IXGBE_ADVTXD_DTYP_CTXT | IXGBE_ADVTXD_DCMD_DEXT;
-		mss_l4len_idx |= sizeof(struct udp_hdr) << IXGBE_ADVTXD_L4LEN_SHIFT;
-		offload_mask.l2_len = HW_OFFLOAD_L2_LEN_MASK;
-		offload_mask.l3_len = HW_OFFLOAD_L3_LEN_MASK;
-		break;
-	case PKT_TX_TCP_CKSUM:
-		type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_L4T_TCP |
+			mss_l4len_idx |= sizeof(struct udp_hdr) << IXGBE_ADVTXD_L4LEN_SHIFT;
+			offload_mask.l2_len = HW_OFFLOAD_L2_LEN_MASK;
+			offload_mask.l3_len = HW_OFFLOAD_L3_LEN_MASK;
+			break;
+		case PKT_TX_TCP_CKSUM:
+			type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_L4T_TCP |
 				IXGBE_ADVTXD_DTYP_CTXT | IXGBE_ADVTXD_DCMD_DEXT;
-		mss_l4len_idx |= sizeof(struct tcp_hdr) << IXGBE_ADVTXD_L4LEN_SHIFT;
-		offload_mask.l2_len = HW_OFFLOAD_L2_LEN_MASK;
-		offload_mask.l3_len = HW_OFFLOAD_L3_LEN_MASK;
-		break;
-	case PKT_TX_SCTP_CKSUM:
-		type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_L4T_SCTP |
+			mss_l4len_idx |= sizeof(struct tcp_hdr) << IXGBE_ADVTXD_L4LEN_SHIFT;
+			offload_mask.l2_len = HW_OFFLOAD_L2_LEN_MASK;
+			offload_mask.l3_len = HW_OFFLOAD_L3_LEN_MASK;
+			offload_mask.l4_len = HW_OFFLOAD_L4_LEN_MASK;
+			break;
+		case PKT_TX_SCTP_CKSUM:
+			type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_L4T_SCTP |
 				IXGBE_ADVTXD_DTYP_CTXT | IXGBE_ADVTXD_DCMD_DEXT;
-		mss_l4len_idx |= sizeof(struct sctp_hdr) << IXGBE_ADVTXD_L4LEN_SHIFT;
-		offload_mask.l2_len = HW_OFFLOAD_L2_LEN_MASK;
-		offload_mask.l3_len = HW_OFFLOAD_L3_LEN_MASK;
-		break;
-	default:
-		type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_L4T_RSV |
+			mss_l4len_idx |= sizeof(struct sctp_hdr) << IXGBE_ADVTXD_L4LEN_SHIFT;
+			offload_mask.l2_len = HW_OFFLOAD_L2_LEN_MASK;
+			offload_mask.l3_len = HW_OFFLOAD_L3_LEN_MASK;
+			break;
+		default:
+			type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_L4T_RSV |
 				IXGBE_ADVTXD_DTYP_CTXT | IXGBE_ADVTXD_DCMD_DEXT;
-		break;
+			break;
+		}
 	}
 
 	txq->ctx_cache[ctx_idx].flags = ol_flags;
@@ -446,20 +509,25 @@ what_advctx_update(struct igb_tx_queue *txq, uint32_t flags,
 static inline uint32_t
 tx_desc_cksum_flags_to_olinfo(uint32_t ol_flags)
 {
-	static const uint32_t l4_olinfo[2] = {0, IXGBE_ADVTXD_POPTS_TXSM};
-	static const uint32_t l3_olinfo[2] = {0, IXGBE_ADVTXD_POPTS_IXSM};
-	uint32_t tmp;
-
-	tmp  = l4_olinfo[(ol_flags & PKT_TX_L4_MASK)  != PKT_TX_L4_NO_CKSUM];
-	tmp |= l3_olinfo[(ol_flags & PKT_TX_IP_CKSUM) != 0];
+	uint32_t tmp = 0;
+	if ((ol_flags & PKT_TX_L4_MASK) != PKT_TX_L4_NO_CKSUM)
+		tmp |= IXGBE_ADVTXD_POPTS_TXSM;
+	if (ol_flags & PKT_TX_IP_CKSUM)
+		tmp |= IXGBE_ADVTXD_POPTS_IXSM;
+	if (ol_flags & PKT_TX_TCP_SEG)
+		tmp |= IXGBE_ADVTXD_POPTS_TXSM | IXGBE_ADVTXD_POPTS_IXSM;
 	return tmp;
 }
 
 static inline uint32_t
-tx_desc_vlan_flags_to_cmdtype(uint32_t ol_flags)
+tx_desc_ol_flags_to_cmdtype(uint32_t ol_flags)
 {
-	static const uint32_t vlan_cmd[2] = {0, IXGBE_ADVTXD_DCMD_VLE};
-	return vlan_cmd[(ol_flags & PKT_TX_VLAN_PKT) != 0];
+	uint32_t cmdtype = 0;
+	if (ol_flags & PKT_TX_VLAN_PKT)
+		cmdtype |= IXGBE_ADVTXD_DCMD_VLE;
+	if (ol_flags & PKT_TX_TCP_SEG)
+		cmdtype |= IXGBE_ADVTXD_DCMD_TSE;
+	return cmdtype;
 }
 
 /* Default RS bit threshold values */
@@ -583,7 +651,8 @@ ixgbe_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts,
 
 		/* If hardware offload required */
 		tx_ol_req = ol_flags &
-			(PKT_TX_VLAN_PKT | PKT_TX_IP_CKSUM | PKT_TX_L4_MASK);
+			(PKT_TX_VLAN_PKT | PKT_TX_IP_CKSUM | PKT_TX_L4_MASK |
+			PKT_TX_TCP_SEG);
 		if (tx_ol_req) {
 			/* If new context need be built or reuse the exist ctx. */
 			ctx = what_advctx_update(txq, tx_ol_req,
@@ -702,7 +771,20 @@ ixgbe_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts,
 		 */
 		cmd_type_len = IXGBE_ADVTXD_DTYP_DATA |
 			IXGBE_ADVTXD_DCMD_IFCS | IXGBE_ADVTXD_DCMD_DEXT;
+
+		if (ol_flags & PKT_TX_TCP_SEG) {
+			/* paylen in descriptor is the not the packet
+			 * len bu the tcp payload len if TSO in on */
+			pkt_len -= (hw_offload.l2_len + hw_offload.l3_len +
+				hw_offload.l4_len);
+
+			/* the pseudo header checksum must be modified:
+			 * it should not include the ip_len */
+			ixgbe_fix_tcp_phdr_cksum(tx_pkt);
+		}
+
 		olinfo_status = (pkt_len << IXGBE_ADVTXD_PAYLEN_SHIFT);
+
 #ifdef RTE_LIBRTE_IEEE1588
 		if (ol_flags & PKT_TX_IEEE1588_TMST)
 			cmd_type_len |= IXGBE_ADVTXD_MAC_1588;
@@ -741,7 +823,7 @@ ixgbe_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts,
 			 * This path will go through
 			 * whatever new/reuse the context descriptor
 			 */
-			cmd_type_len  |= tx_desc_vlan_flags_to_cmdtype(ol_flags);
+			cmd_type_len  |= tx_desc_ol_flags_to_cmdtype(ol_flags);
 			olinfo_status |= tx_desc_cksum_flags_to_olinfo(ol_flags);
 			olinfo_status |= ctx << IXGBE_ADVTXD_IDX_SHIFT;
 		}
@@ -3420,9 +3502,10 @@ ixgbe_dev_tx_init(struct rte_eth_dev *dev)
 	PMD_INIT_FUNC_TRACE();
 	hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
 
-	/* Enable TX CRC (checksum offload requirement) */
+	/* Enable TX CRC (checksum offload requirement) and hw padding
+	 * (TSO requirement) */
 	hlreg0 = IXGBE_READ_REG(hw, IXGBE_HLREG0);
-	hlreg0 |= IXGBE_HLREG0_TXCRCEN;
+	hlreg0 |= (IXGBE_HLREG0_TXCRCEN | IXGBE_HLREG0_TXPADEN);
 	IXGBE_WRITE_REG(hw, IXGBE_HLREG0, hlreg0);
 
 	/* Setup the Base and Length of the Tx Descriptor Rings */
-- 
1.9.2



More information about the dev mailing list