[dpdk-dev] [PATCH v12 6/6] testpmd: use Tx preparation in csum engine

Tomasz Kulasek tomaszx.kulasek at intel.com
Wed Nov 23 18:36:25 CET 2016


Added "csum txprep (on|off)" command which allows to switch to the
tx path using Tx preparation API.

By default unchanged implementation is used.

Using Tx preparation path, pseudo header calculation for udp/tcp/tso
packets from application, and used Tx preparation API for
packet preparation and verification.

Adding additional step to the csum engine costs about 3-4% of performance
drop, on my setup with ixgbe driver. It's caused mostly by the need
of reaccessing and modification of packet data.

Signed-off-by: Tomasz Kulasek <tomaszx.kulasek at intel.com>
Acked-by: Konstantin Ananyev <konstantin.ananyev at intel.com>
---
 app/test-pmd/cmdline.c  |   49 +++++++++++++++++++++++++++++++++++++++++++++++
 app/test-pmd/csumonly.c |   33 ++++++++++++++++++++++++-------
 app/test-pmd/testpmd.c  |    5 +++++
 app/test-pmd/testpmd.h  |    2 ++
 4 files changed, 82 insertions(+), 7 deletions(-)

diff --git a/app/test-pmd/cmdline.c b/app/test-pmd/cmdline.c
index 63b55dc..373fc59 100644
--- a/app/test-pmd/cmdline.c
+++ b/app/test-pmd/cmdline.c
@@ -366,6 +366,10 @@ static void cmd_help_long_parsed(void *parsed_result,
 			"csum show (port_id)\n"
 			"    Display tx checksum offload configuration\n\n"
 
+			"csum txprep (on|off)"
+			"    Enable tx preparation path in csum forward engine"
+			"\n\n"
+
 			"tso set (segsize) (portid)\n"
 			"    Enable TCP Segmentation Offload in csum forward"
 			" engine.\n"
@@ -3523,6 +3527,50 @@ struct cmd_csum_tunnel_result {
 	},
 };
 
+/* Enable/disable tx preparation path */
+struct cmd_csum_txprep_result {
+	cmdline_fixed_string_t csum;
+	cmdline_fixed_string_t parse;
+	cmdline_fixed_string_t onoff;
+};
+
+static void
+cmd_csum_txprep_parsed(void *parsed_result,
+		       __attribute__((unused)) struct cmdline *cl,
+		       __attribute__((unused)) void *data)
+{
+	struct cmd_csum_txprep_result *res = parsed_result;
+
+	if (!strcmp(res->onoff, "on"))
+		tx_prepare = 1;
+	else
+		tx_prepare = 0;
+
+}
+
+cmdline_parse_token_string_t cmd_csum_txprep_csum =
+	TOKEN_STRING_INITIALIZER(struct cmd_csum_txprep_result,
+				csum, "csum");
+cmdline_parse_token_string_t cmd_csum_txprep_parse =
+	TOKEN_STRING_INITIALIZER(struct cmd_csum_txprep_result,
+				parse, "txprep");
+cmdline_parse_token_string_t cmd_csum_txprep_onoff =
+	TOKEN_STRING_INITIALIZER(struct cmd_csum_txprep_result,
+				onoff, "on#off");
+
+cmdline_parse_inst_t cmd_csum_txprep = {
+	.f = cmd_csum_txprep_parsed,
+	.data = NULL,
+	.help_str = "enable/disable tx preparation path for csum engine: "
+	"csum txprep on|off",
+	.tokens = {
+		(void *)&cmd_csum_txprep_csum,
+		(void *)&cmd_csum_txprep_parse,
+		(void *)&cmd_csum_txprep_onoff,
+		NULL,
+	},
+};
+
 /* *** ENABLE HARDWARE SEGMENTATION IN TX NON-TUNNELED PACKETS *** */
 struct cmd_tso_set_result {
 	cmdline_fixed_string_t tso;
@@ -11470,6 +11518,7 @@ struct cmd_set_vf_mac_addr_result {
 	(cmdline_parse_inst_t *)&cmd_csum_set,
 	(cmdline_parse_inst_t *)&cmd_csum_show,
 	(cmdline_parse_inst_t *)&cmd_csum_tunnel,
+	(cmdline_parse_inst_t *)&cmd_csum_txprep,
 	(cmdline_parse_inst_t *)&cmd_tso_set,
 	(cmdline_parse_inst_t *)&cmd_tso_show,
 	(cmdline_parse_inst_t *)&cmd_tunnel_tso_set,
diff --git a/app/test-pmd/csumonly.c b/app/test-pmd/csumonly.c
index 57e6ae2..3afa9ab 100644
--- a/app/test-pmd/csumonly.c
+++ b/app/test-pmd/csumonly.c
@@ -372,8 +372,10 @@ struct simple_gre_hdr {
 			udp_hdr->dgram_cksum = 0;
 			if (testpmd_ol_flags & TESTPMD_TX_OFFLOAD_UDP_CKSUM) {
 				ol_flags |= PKT_TX_UDP_CKSUM;
-				udp_hdr->dgram_cksum = get_psd_sum(l3_hdr,
-					info->ethertype, ol_flags);
+				if (!tx_prepare)
+					udp_hdr->dgram_cksum = get_psd_sum(
+							l3_hdr, info->ethertype,
+							ol_flags);
 			} else {
 				udp_hdr->dgram_cksum =
 					get_udptcp_checksum(l3_hdr, udp_hdr,
@@ -385,12 +387,15 @@ struct simple_gre_hdr {
 		tcp_hdr->cksum = 0;
 		if (tso_segsz) {
 			ol_flags |= PKT_TX_TCP_SEG;
-			tcp_hdr->cksum = get_psd_sum(l3_hdr, info->ethertype,
-				ol_flags);
+			if (!tx_prepare)
+				tcp_hdr->cksum = get_psd_sum(l3_hdr,
+						info->ethertype, ol_flags);
+
 		} else if (testpmd_ol_flags & TESTPMD_TX_OFFLOAD_TCP_CKSUM) {
 			ol_flags |= PKT_TX_TCP_CKSUM;
-			tcp_hdr->cksum = get_psd_sum(l3_hdr, info->ethertype,
-				ol_flags);
+			if (!tx_prepare)
+				tcp_hdr->cksum = get_psd_sum(l3_hdr,
+						info->ethertype, ol_flags);
 		} else {
 			tcp_hdr->cksum =
 				get_udptcp_checksum(l3_hdr, tcp_hdr,
@@ -648,6 +653,7 @@ struct simple_gre_hdr {
 	void *l3_hdr = NULL, *outer_l3_hdr = NULL; /* can be IPv4 or IPv6 */
 	uint16_t nb_rx;
 	uint16_t nb_tx;
+	uint16_t nb_prep;
 	uint16_t i;
 	uint64_t rx_ol_flags, tx_ol_flags;
 	uint16_t testpmd_ol_flags;
@@ -857,7 +863,20 @@ struct simple_gre_hdr {
 			printf("\n");
 		}
 	}
-	nb_tx = rte_eth_tx_burst(fs->tx_port, fs->tx_queue, pkts_burst, nb_rx);
+
+	if (tx_prepare) {
+		nb_prep = rte_eth_tx_prepare(fs->tx_port, fs->tx_queue,
+				pkts_burst, nb_rx);
+		if (nb_prep != nb_rx)
+			printf("Preparing packet burst to transmit failed: %s\n",
+					rte_strerror(rte_errno));
+
+		nb_tx = rte_eth_tx_burst(fs->tx_port, fs->tx_queue, pkts_burst,
+				nb_prep);
+	} else
+		nb_tx = rte_eth_tx_burst(fs->tx_port, fs->tx_queue, pkts_burst,
+				nb_rx);
+
 	/*
 	 * Retry if necessary
 	 */
diff --git a/app/test-pmd/testpmd.c b/app/test-pmd/testpmd.c
index a0332c2..c18bc28 100644
--- a/app/test-pmd/testpmd.c
+++ b/app/test-pmd/testpmd.c
@@ -180,6 +180,11 @@ struct fwd_engine * fwd_engines[] = {
 enum tx_pkt_split tx_pkt_split = TX_PKT_SPLIT_OFF;
 /**< Split policy for packets to TX. */
 
+/*
+ * Enable Tx preparation path in the "csum" engine.
+ */
+uint8_t tx_prepare = 0;
+
 uint16_t nb_pkt_per_burst = DEF_PKT_BURST; /**< Number of packets per burst. */
 uint16_t mb_mempool_cache = DEF_MBUF_CACHE; /**< Size of mbuf mempool cache. */
 
diff --git a/app/test-pmd/testpmd.h b/app/test-pmd/testpmd.h
index 9c1e703..488a6e1 100644
--- a/app/test-pmd/testpmd.h
+++ b/app/test-pmd/testpmd.h
@@ -383,6 +383,8 @@ enum tx_pkt_split {
 
 extern enum tx_pkt_split tx_pkt_split;
 
+extern uint8_t tx_prepare;
+
 extern uint16_t nb_pkt_per_burst;
 extern uint16_t mb_mempool_cache;
 extern int8_t rx_pthresh;
-- 
1.7.9.5



More information about the dev mailing list