patch 'net/hns3: optimize Tx performance by mbuf fast free' has been queued to stable release 20.11.4

Xueming Li xuemingl at nvidia.com
Sun Nov 28 15:53:53 CET 2021


Hi,

FYI, your patch has been queued to stable release 20.11.4

Note it hasn't been pushed to http://dpdk.org/browse/dpdk-stable yet.
It will be pushed if I get no objections before 11/30/21. So please
shout if anyone has objections.

Also note that after the patch there's a diff of the upstream commit vs the
patch applied to the branch. This will indicate if there was any rebasing
needed to apply to the stable branch. If there were code changes for rebasing
(ie: not only metadata diffs), please double check that the rebase was
correctly done.

Queued patches are on a temporary branch at:
https://github.com/steevenlee/dpdk

This queued commit can be viewed at:
https://github.com/steevenlee/dpdk/commit/6a4f262ed3ffb82a92332825230bab668f18ca7b

Thanks.

Xueming Li <xuemingl at nvidia.com>

---
>From 6a4f262ed3ffb82a92332825230bab668f18ca7b Mon Sep 17 00:00:00 2001
From: Chengwen Feng <fengchengwen at huawei.com>
Date: Tue, 16 Nov 2021 09:22:11 +0800
Subject: [PATCH] net/hns3: optimize Tx performance by mbuf fast free
Cc: Xueming Li <xuemingl at nvidia.com>

[ upstream commit 3cc817c195cc8e3bf54b5cadd89c5c7776d3727e ]

Currently the vector and simple xmit algorithm don't support multi_segs,
so if Tx offload support MBUF_FAST_FREE, driver could invoke
rte_mempool_put_bulk() to free Tx mbufs in this situation.

In the testpmd single core MAC forwarding scenario, the performance is
improved by 8% at 64B on Kunpeng920 platform.

Signed-off-by: Chengwen Feng <fengchengwen at huawei.com>
Signed-off-by: Min Hu (Connor) <humin29 at huawei.com>
---
 doc/guides/nics/features/hns3.ini |  1 +
 drivers/net/hns3/hns3_rxtx.c      | 11 +++++++++++
 drivers/net/hns3/hns3_rxtx.h      |  2 ++
 drivers/net/hns3/hns3_rxtx_vec.h  |  9 +++++++++
 4 files changed, 23 insertions(+)

diff --git a/doc/guides/nics/features/hns3.ini b/doc/guides/nics/features/hns3.ini
index a1dc7eb9eb..00d9b1aef2 100644
--- a/doc/guides/nics/features/hns3.ini
+++ b/doc/guides/nics/features/hns3.ini
@@ -10,6 +10,7 @@ Queue start/stop     = Y
 Runtime Rx queue setup = Y
 Runtime Tx queue setup = Y
 Burst mode info      = Y
+Fast mbuf free       = Y
 MTU update           = Y
 Jumbo frame          = Y
 Scattered Rx         = Y
diff --git a/drivers/net/hns3/hns3_rxtx.c b/drivers/net/hns3/hns3_rxtx.c
index 4ae7c1f00a..235197ce55 100644
--- a/drivers/net/hns3/hns3_rxtx.c
+++ b/drivers/net/hns3/hns3_rxtx.c
@@ -2722,6 +2722,8 @@ hns3_tx_queue_setup(struct rte_eth_dev *dev, uint16_t idx, uint16_t nb_desc,
 	txq->udp_cksum_mode = hw->udp_cksum_mode;
 	txq->over_length_pkt_cnt = 0;
 	txq->exceed_limit_bd_pkt_cnt = 0;
+	txq->mbuf_fast_free_en = !!(dev->data->dev_conf.txmode.offloads &
+					DEV_TX_OFFLOAD_MBUF_FAST_FREE);
 	txq->exceed_limit_bd_reassem_fail = 0;
 	txq->unsupported_tunnel_pkt_cnt = 0;
 	txq->queue_full_cnt = 0;
@@ -3592,6 +3594,14 @@ hns3_tx_free_buffer_simple(struct hns3_tx_queue *txq)
 
 		tx_entry = &txq->sw_ring[txq->next_to_clean];
 
+		if (txq->mbuf_fast_free_en) {
+			rte_mempool_put_bulk(tx_entry->mbuf->pool,
+					(void **)tx_entry, txq->tx_rs_thresh);
+			for (i = 0; i < txq->tx_rs_thresh; i++)
+				tx_entry[i].mbuf = NULL;
+			goto update_field;
+		}
+
 		for (i = 0; i < txq->tx_rs_thresh; i++)
 			rte_prefetch0((tx_entry + i)->mbuf);
 		for (i = 0; i < txq->tx_rs_thresh; i++, tx_entry++) {
@@ -3599,6 +3609,7 @@ hns3_tx_free_buffer_simple(struct hns3_tx_queue *txq)
 			tx_entry->mbuf = NULL;
 		}
 
+update_field:
 		txq->next_to_clean = (tx_next_clean + 1) % txq->nb_tx_desc;
 		txq->tx_bd_ready += txq->tx_rs_thresh;
 	}
diff --git a/drivers/net/hns3/hns3_rxtx.h b/drivers/net/hns3/hns3_rxtx.h
index dc9c89b5af..f425c9e8a3 100644
--- a/drivers/net/hns3/hns3_rxtx.h
+++ b/drivers/net/hns3/hns3_rxtx.h
@@ -423,6 +423,8 @@ struct hns3_tx_queue {
 	 */
 	bool pvid_sw_shift_en;
 	bool enabled;           /* indicate if Tx queue has been enabled */
+	/* check whether the mbuf fast free offload is enabled */
+	uint16_t mbuf_fast_free_en:1;
 
 	/*
 	 * The following items are used for the abnormal errors statistics in
diff --git a/drivers/net/hns3/hns3_rxtx_vec.h b/drivers/net/hns3/hns3_rxtx_vec.h
index 67c75e44ef..4985a7cae8 100644
--- a/drivers/net/hns3/hns3_rxtx_vec.h
+++ b/drivers/net/hns3/hns3_rxtx_vec.h
@@ -18,6 +18,14 @@ hns3_tx_bulk_free_buffers(struct hns3_tx_queue *txq)
 	int i;
 
 	tx_entry = &txq->sw_ring[txq->next_to_clean];
+	if (txq->mbuf_fast_free_en) {
+		rte_mempool_put_bulk(tx_entry->mbuf->pool, (void **)tx_entry,
+				     txq->tx_rs_thresh);
+		for (i = 0; i < txq->tx_rs_thresh; i++)
+			tx_entry[i].mbuf = NULL;
+		goto update_field;
+	}
+
 	for (i = 0; i < txq->tx_rs_thresh; i++, tx_entry++) {
 		m = rte_pktmbuf_prefree_seg(tx_entry->mbuf);
 		tx_entry->mbuf = NULL;
@@ -36,6 +44,7 @@ hns3_tx_bulk_free_buffers(struct hns3_tx_queue *txq)
 	if (nb_free)
 		rte_mempool_put_bulk(free[0]->pool, (void **)free, nb_free);
 
+update_field:
 	/* Update numbers of available descriptor due to buffer freed */
 	txq->tx_bd_ready += txq->tx_rs_thresh;
 	txq->next_to_clean += txq->tx_rs_thresh;
-- 
2.34.0

---
  Diff of the applied patch vs upstream commit (please double-check if non-empty:
---
--- -	2021-11-28 22:41:05.866400503 +0800
+++ 0049-net-hns3-optimize-Tx-performance-by-mbuf-fast-free.patch	2021-11-28 22:41:03.370206742 +0800
@@ -1 +1 @@
-From 3cc817c195cc8e3bf54b5cadd89c5c7776d3727e Mon Sep 17 00:00:00 2001
+From 6a4f262ed3ffb82a92332825230bab668f18ca7b Mon Sep 17 00:00:00 2001
@@ -4,0 +5,3 @@
+Cc: Xueming Li <xuemingl at nvidia.com>
+
+[ upstream commit 3cc817c195cc8e3bf54b5cadd89c5c7776d3727e ]
@@ -13,2 +15,0 @@
-Cc: stable at dpdk.org
-
@@ -25 +26 @@
-index c3464c8396..405b94f05c 100644
+index a1dc7eb9eb..00d9b1aef2 100644
@@ -28 +29 @@
-@@ -12,6 +12,7 @@ Queue start/stop     = Y
+@@ -10,6 +10,7 @@ Queue start/stop     = Y
@@ -33 +33,0 @@
- Free Tx mbuf on demand = Y
@@ -34,0 +35 @@
+ Jumbo frame          = Y
@@ -37 +38 @@
-index d26e262335..f0a57611ec 100644
+index 4ae7c1f00a..235197ce55 100644
@@ -40,3 +41 @@
-@@ -3059,6 +3059,8 @@ hns3_tx_queue_setup(struct rte_eth_dev *dev, uint16_t idx, uint16_t nb_desc,
- 	txq->min_tx_pkt_len = hw->min_tx_pkt_len;
- 	txq->tso_mode = hw->tso_mode;
+@@ -2722,6 +2722,8 @@ hns3_tx_queue_setup(struct rte_eth_dev *dev, uint16_t idx, uint16_t nb_desc,
@@ -43,0 +43,2 @@
+ 	txq->over_length_pkt_cnt = 0;
+ 	txq->exceed_limit_bd_pkt_cnt = 0;
@@ -45,5 +46,5 @@
-+				    RTE_ETH_TX_OFFLOAD_MBUF_FAST_FREE);
- 	memset(&txq->basic_stats, 0, sizeof(struct hns3_tx_basic_stats));
- 	memset(&txq->dfx_stats, 0, sizeof(struct hns3_tx_dfx_stats));
- 
-@@ -3991,6 +3993,14 @@ hns3_tx_free_buffer_simple(struct hns3_tx_queue *txq)
++					DEV_TX_OFFLOAD_MBUF_FAST_FREE);
+ 	txq->exceed_limit_bd_reassem_fail = 0;
+ 	txq->unsupported_tunnel_pkt_cnt = 0;
+ 	txq->queue_full_cnt = 0;
+@@ -3592,6 +3594,14 @@ hns3_tx_free_buffer_simple(struct hns3_tx_queue *txq)
@@ -64 +65 @@
-@@ -3998,6 +4008,7 @@ hns3_tx_free_buffer_simple(struct hns3_tx_queue *txq)
+@@ -3599,6 +3609,7 @@ hns3_tx_free_buffer_simple(struct hns3_tx_queue *txq)
@@ -73 +74 @@
-index 63bafc68b6..df731856ef 100644
+index dc9c89b5af..f425c9e8a3 100644
@@ -76,2 +77 @@
-@@ -495,6 +495,8 @@ struct hns3_tx_queue {
- 	 * this point.
+@@ -423,6 +423,8 @@ struct hns3_tx_queue {
@@ -79 +79,2 @@
- 	uint16_t pvid_sw_shift_en:1;
+ 	bool pvid_sw_shift_en;
+ 	bool enabled;           /* indicate if Tx queue has been enabled */
@@ -84 +85 @@
- 	 * For better performance in tx datapath, releasing mbuf in batches is
+ 	 * The following items are used for the abnormal errors statistics in


More information about the stable mailing list