[dpdk-dev] [PATCH v12 6/6] testpmd: use Tx preparation in csum engine
Tomasz Kulasek
tomaszx.kulasek at intel.com
Wed Nov 23 18:36:25 CET 2016
Added "csum txprep (on|off)" command which allows to switch to the
tx path using Tx preparation API.
By default unchanged implementation is used.
Using Tx preparation path, pseudo header calculation for udp/tcp/tso
packets from application, and used Tx preparation API for
packet preparation and verification.
Adding additional step to the csum engine costs about 3-4% of performance
drop, on my setup with ixgbe driver. It's caused mostly by the need
of reaccessing and modification of packet data.
Signed-off-by: Tomasz Kulasek <tomaszx.kulasek at intel.com>
Acked-by: Konstantin Ananyev <konstantin.ananyev at intel.com>
---
app/test-pmd/cmdline.c | 49 +++++++++++++++++++++++++++++++++++++++++++++++
app/test-pmd/csumonly.c | 33 ++++++++++++++++++++++++-------
app/test-pmd/testpmd.c | 5 +++++
app/test-pmd/testpmd.h | 2 ++
4 files changed, 82 insertions(+), 7 deletions(-)
diff --git a/app/test-pmd/cmdline.c b/app/test-pmd/cmdline.c
index 63b55dc..373fc59 100644
--- a/app/test-pmd/cmdline.c
+++ b/app/test-pmd/cmdline.c
@@ -366,6 +366,10 @@ static void cmd_help_long_parsed(void *parsed_result,
"csum show (port_id)\n"
" Display tx checksum offload configuration\n\n"
+ "csum txprep (on|off)"
+ " Enable tx preparation path in csum forward engine"
+ "\n\n"
+
"tso set (segsize) (portid)\n"
" Enable TCP Segmentation Offload in csum forward"
" engine.\n"
@@ -3523,6 +3527,50 @@ struct cmd_csum_tunnel_result {
},
};
+/* Enable/disable tx preparation path */
+struct cmd_csum_txprep_result {
+ cmdline_fixed_string_t csum;
+ cmdline_fixed_string_t parse;
+ cmdline_fixed_string_t onoff;
+};
+
+static void
+cmd_csum_txprep_parsed(void *parsed_result,
+ __attribute__((unused)) struct cmdline *cl,
+ __attribute__((unused)) void *data)
+{
+ struct cmd_csum_txprep_result *res = parsed_result;
+
+ if (!strcmp(res->onoff, "on"))
+ tx_prepare = 1;
+ else
+ tx_prepare = 0;
+
+}
+
+cmdline_parse_token_string_t cmd_csum_txprep_csum =
+ TOKEN_STRING_INITIALIZER(struct cmd_csum_txprep_result,
+ csum, "csum");
+cmdline_parse_token_string_t cmd_csum_txprep_parse =
+ TOKEN_STRING_INITIALIZER(struct cmd_csum_txprep_result,
+ parse, "txprep");
+cmdline_parse_token_string_t cmd_csum_txprep_onoff =
+ TOKEN_STRING_INITIALIZER(struct cmd_csum_txprep_result,
+ onoff, "on#off");
+
+cmdline_parse_inst_t cmd_csum_txprep = {
+ .f = cmd_csum_txprep_parsed,
+ .data = NULL,
+ .help_str = "enable/disable tx preparation path for csum engine: "
+ "csum txprep on|off",
+ .tokens = {
+ (void *)&cmd_csum_txprep_csum,
+ (void *)&cmd_csum_txprep_parse,
+ (void *)&cmd_csum_txprep_onoff,
+ NULL,
+ },
+};
+
/* *** ENABLE HARDWARE SEGMENTATION IN TX NON-TUNNELED PACKETS *** */
struct cmd_tso_set_result {
cmdline_fixed_string_t tso;
@@ -11470,6 +11518,7 @@ struct cmd_set_vf_mac_addr_result {
(cmdline_parse_inst_t *)&cmd_csum_set,
(cmdline_parse_inst_t *)&cmd_csum_show,
(cmdline_parse_inst_t *)&cmd_csum_tunnel,
+ (cmdline_parse_inst_t *)&cmd_csum_txprep,
(cmdline_parse_inst_t *)&cmd_tso_set,
(cmdline_parse_inst_t *)&cmd_tso_show,
(cmdline_parse_inst_t *)&cmd_tunnel_tso_set,
diff --git a/app/test-pmd/csumonly.c b/app/test-pmd/csumonly.c
index 57e6ae2..3afa9ab 100644
--- a/app/test-pmd/csumonly.c
+++ b/app/test-pmd/csumonly.c
@@ -372,8 +372,10 @@ struct simple_gre_hdr {
udp_hdr->dgram_cksum = 0;
if (testpmd_ol_flags & TESTPMD_TX_OFFLOAD_UDP_CKSUM) {
ol_flags |= PKT_TX_UDP_CKSUM;
- udp_hdr->dgram_cksum = get_psd_sum(l3_hdr,
- info->ethertype, ol_flags);
+ if (!tx_prepare)
+ udp_hdr->dgram_cksum = get_psd_sum(
+ l3_hdr, info->ethertype,
+ ol_flags);
} else {
udp_hdr->dgram_cksum =
get_udptcp_checksum(l3_hdr, udp_hdr,
@@ -385,12 +387,15 @@ struct simple_gre_hdr {
tcp_hdr->cksum = 0;
if (tso_segsz) {
ol_flags |= PKT_TX_TCP_SEG;
- tcp_hdr->cksum = get_psd_sum(l3_hdr, info->ethertype,
- ol_flags);
+ if (!tx_prepare)
+ tcp_hdr->cksum = get_psd_sum(l3_hdr,
+ info->ethertype, ol_flags);
+
} else if (testpmd_ol_flags & TESTPMD_TX_OFFLOAD_TCP_CKSUM) {
ol_flags |= PKT_TX_TCP_CKSUM;
- tcp_hdr->cksum = get_psd_sum(l3_hdr, info->ethertype,
- ol_flags);
+ if (!tx_prepare)
+ tcp_hdr->cksum = get_psd_sum(l3_hdr,
+ info->ethertype, ol_flags);
} else {
tcp_hdr->cksum =
get_udptcp_checksum(l3_hdr, tcp_hdr,
@@ -648,6 +653,7 @@ struct simple_gre_hdr {
void *l3_hdr = NULL, *outer_l3_hdr = NULL; /* can be IPv4 or IPv6 */
uint16_t nb_rx;
uint16_t nb_tx;
+ uint16_t nb_prep;
uint16_t i;
uint64_t rx_ol_flags, tx_ol_flags;
uint16_t testpmd_ol_flags;
@@ -857,7 +863,20 @@ struct simple_gre_hdr {
printf("\n");
}
}
- nb_tx = rte_eth_tx_burst(fs->tx_port, fs->tx_queue, pkts_burst, nb_rx);
+
+ if (tx_prepare) {
+ nb_prep = rte_eth_tx_prepare(fs->tx_port, fs->tx_queue,
+ pkts_burst, nb_rx);
+ if (nb_prep != nb_rx)
+ printf("Preparing packet burst to transmit failed: %s\n",
+ rte_strerror(rte_errno));
+
+ nb_tx = rte_eth_tx_burst(fs->tx_port, fs->tx_queue, pkts_burst,
+ nb_prep);
+ } else
+ nb_tx = rte_eth_tx_burst(fs->tx_port, fs->tx_queue, pkts_burst,
+ nb_rx);
+
/*
* Retry if necessary
*/
diff --git a/app/test-pmd/testpmd.c b/app/test-pmd/testpmd.c
index a0332c2..c18bc28 100644
--- a/app/test-pmd/testpmd.c
+++ b/app/test-pmd/testpmd.c
@@ -180,6 +180,11 @@ struct fwd_engine * fwd_engines[] = {
enum tx_pkt_split tx_pkt_split = TX_PKT_SPLIT_OFF;
/**< Split policy for packets to TX. */
+/*
+ * Enable Tx preparation path in the "csum" engine.
+ */
+uint8_t tx_prepare = 0;
+
uint16_t nb_pkt_per_burst = DEF_PKT_BURST; /**< Number of packets per burst. */
uint16_t mb_mempool_cache = DEF_MBUF_CACHE; /**< Size of mbuf mempool cache. */
diff --git a/app/test-pmd/testpmd.h b/app/test-pmd/testpmd.h
index 9c1e703..488a6e1 100644
--- a/app/test-pmd/testpmd.h
+++ b/app/test-pmd/testpmd.h
@@ -383,6 +383,8 @@ enum tx_pkt_split {
extern enum tx_pkt_split tx_pkt_split;
+extern uint8_t tx_prepare;
+
extern uint16_t nb_pkt_per_burst;
extern uint16_t mb_mempool_cache;
extern int8_t rx_pthresh;
--
1.7.9.5
More information about the dev
mailing list