patch 'net/hns3: fix order in NEON Rx' has been queued to stable release 22.11.4

Xueming Li xuemingl at nvidia.com
Sun Oct 22 16:20:47 CEST 2023


Hi,

FYI, your patch has been queued to stable release 22.11.4

Note it hasn't been pushed to http://dpdk.org/browse/dpdk-stable yet.
It will be pushed if I get no objections before 11/15/23. So please
shout if anyone has objections.

Also note that after the patch there's a diff of the upstream commit vs the
patch applied to the branch. This will indicate if there was any rebasing
needed to apply to the stable branch. If there were code changes for rebasing
(ie: not only metadata diffs), please double check that the rebase was
correctly done.

Queued patches are on a temporary branch at:
https://git.dpdk.org/dpdk-stable/log/?h=22.11-staging

This queued commit can be viewed at:
https://git.dpdk.org/dpdk-stable/commit/?h=22.11-staging&id=4e986000b127ffe1e8f127893201889e0be869af

Thanks.

Xueming Li <xuemingl at nvidia.com>

---
>From 4e986000b127ffe1e8f127893201889e0be869af Mon Sep 17 00:00:00 2001
From: Huisong Li <lihuisong at huawei.com>
Date: Tue, 11 Jul 2023 18:24:45 +0800
Subject: [PATCH] net/hns3: fix order in NEON Rx
Cc: Xueming Li <xuemingl at nvidia.com>

[ upstream commit 7dd439ed998c36c8d0204c436cc656af08cfa5fc ]

This patch reorders the order of the NEON Rx for better maintenance
and easier understanding.

Fixes: a3d4f4d291d7 ("net/hns3: support NEON Rx")

Signed-off-by: Huisong Li <lihuisong at huawei.com>
Signed-off-by: Dongdong Liu <liudongdong3 at huawei.com>
---
 drivers/net/hns3/hns3_rxtx_vec_neon.h | 78 +++++++++++----------------
 1 file changed, 31 insertions(+), 47 deletions(-)

diff --git a/drivers/net/hns3/hns3_rxtx_vec_neon.h b/drivers/net/hns3/hns3_rxtx_vec_neon.h
index a20a6b6acb..1048b9db87 100644
--- a/drivers/net/hns3/hns3_rxtx_vec_neon.h
+++ b/drivers/net/hns3/hns3_rxtx_vec_neon.h
@@ -180,19 +180,12 @@ hns3_recv_burst_vec(struct hns3_rx_queue *__restrict rxq,
 		bd_vld = vset_lane_u16(rxdp[2].rx.bdtype_vld_udp0, bd_vld, 2);
 		bd_vld = vset_lane_u16(rxdp[3].rx.bdtype_vld_udp0, bd_vld, 3);

-		/* load 2 mbuf pointer */
-		mbp1 = vld1q_u64((uint64_t *)&sw_ring[pos]);
-
 		bd_vld = vshl_n_u16(bd_vld,
 				    HNS3_UINT16_BIT - 1 - HNS3_RXD_VLD_B);
 		bd_vld = vreinterpret_u16_s16(
 				vshr_n_s16(vreinterpret_s16_u16(bd_vld),
 					   HNS3_UINT16_BIT - 1));
 		stat = ~vget_lane_u64(vreinterpret_u64_u16(bd_vld), 0);
-
-		/* load 2 mbuf pointer again */
-		mbp2 = vld1q_u64((uint64_t *)&sw_ring[pos + 2]);
-
 		if (likely(stat == 0))
 			bd_valid_num = HNS3_DEFAULT_DESCS_PER_LOOP;
 		else
@@ -200,20 +193,20 @@ hns3_recv_burst_vec(struct hns3_rx_queue *__restrict rxq,
 		if (bd_valid_num == 0)
 			break;

-		/* use offset to control below data load oper ordering */
-		offset = rxq->offset_table[bd_valid_num];
+		/* load 4 mbuf pointer */
+		mbp1 = vld1q_u64((uint64_t *)&sw_ring[pos]);
+		mbp2 = vld1q_u64((uint64_t *)&sw_ring[pos + 2]);

-		/* store 2 mbuf pointer into rx_pkts */
+		/* store 4 mbuf pointer into rx_pkts */
 		vst1q_u64((uint64_t *)&rx_pkts[pos], mbp1);
+		vst1q_u64((uint64_t *)&rx_pkts[pos + 2], mbp2);

-		/* read first two descs */
+		/* use offset to control below data load oper ordering */
+		offset = rxq->offset_table[bd_valid_num];
+
+		/* read 4 descs */
 		descs[0] = vld2q_u64((uint64_t *)(rxdp + offset));
 		descs[1] = vld2q_u64((uint64_t *)(rxdp + offset + 1));
-
-		/* store 2 mbuf pointer into rx_pkts again */
-		vst1q_u64((uint64_t *)&rx_pkts[pos + 2], mbp2);
-
-		/* read remains two descs */
 		descs[2] = vld2q_u64((uint64_t *)(rxdp + offset + 2));
 		descs[3] = vld2q_u64((uint64_t *)(rxdp + offset + 3));

@@ -221,56 +214,47 @@ hns3_recv_burst_vec(struct hns3_rx_queue *__restrict rxq,
 		pkt_mbuf1.val[1] = vreinterpretq_u8_u64(descs[0].val[1]);
 		pkt_mbuf2.val[0] = vreinterpretq_u8_u64(descs[1].val[0]);
 		pkt_mbuf2.val[1] = vreinterpretq_u8_u64(descs[1].val[1]);
+		pkt_mbuf3.val[0] = vreinterpretq_u8_u64(descs[2].val[0]);
+		pkt_mbuf3.val[1] = vreinterpretq_u8_u64(descs[2].val[1]);
+		pkt_mbuf4.val[0] = vreinterpretq_u8_u64(descs[3].val[0]);
+		pkt_mbuf4.val[1] = vreinterpretq_u8_u64(descs[3].val[1]);

-		/* pkt 1,2 convert format from desc to pktmbuf */
+		/* 4 packets convert format from desc to pktmbuf */
 		pkt_mb1 = vqtbl2q_u8(pkt_mbuf1, shuf_desc_fields_msk);
 		pkt_mb2 = vqtbl2q_u8(pkt_mbuf2, shuf_desc_fields_msk);
+		pkt_mb3 = vqtbl2q_u8(pkt_mbuf3, shuf_desc_fields_msk);
+		pkt_mb4 = vqtbl2q_u8(pkt_mbuf4, shuf_desc_fields_msk);

-		/* store the first 8 bytes of pkt 1,2 mbuf's rearm_data */
-		*(uint64_t *)&sw_ring[pos + 0].mbuf->rearm_data =
-			rxq->mbuf_initializer;
-		*(uint64_t *)&sw_ring[pos + 1].mbuf->rearm_data =
-			rxq->mbuf_initializer;
-
-		/* pkt 1,2 remove crc */
+		/* 4 packets remove crc */
 		tmp = vsubq_u16(vreinterpretq_u16_u8(pkt_mb1), crc_adjust);
 		pkt_mb1 = vreinterpretq_u8_u16(tmp);
 		tmp = vsubq_u16(vreinterpretq_u16_u8(pkt_mb2), crc_adjust);
 		pkt_mb2 = vreinterpretq_u8_u16(tmp);
+		tmp = vsubq_u16(vreinterpretq_u16_u8(pkt_mb3), crc_adjust);
+		pkt_mb3 = vreinterpretq_u8_u16(tmp);
+		tmp = vsubq_u16(vreinterpretq_u16_u8(pkt_mb4), crc_adjust);
+		pkt_mb4 = vreinterpretq_u8_u16(tmp);

-		pkt_mbuf3.val[0] = vreinterpretq_u8_u64(descs[2].val[0]);
-		pkt_mbuf3.val[1] = vreinterpretq_u8_u64(descs[2].val[1]);
-		pkt_mbuf4.val[0] = vreinterpretq_u8_u64(descs[3].val[0]);
-		pkt_mbuf4.val[1] = vreinterpretq_u8_u64(descs[3].val[1]);
-
-		/* pkt 3,4 convert format from desc to pktmbuf */
-		pkt_mb3 = vqtbl2q_u8(pkt_mbuf3, shuf_desc_fields_msk);
-		pkt_mb4 = vqtbl2q_u8(pkt_mbuf4, shuf_desc_fields_msk);
-
-		/* pkt 1,2 save to rx_pkts mbuf */
+		/* save packet info to rx_pkts mbuf */
 		vst1q_u8((void *)&sw_ring[pos + 0].mbuf->rx_descriptor_fields1,
 			 pkt_mb1);
 		vst1q_u8((void *)&sw_ring[pos + 1].mbuf->rx_descriptor_fields1,
 			 pkt_mb2);
+		vst1q_u8((void *)&sw_ring[pos + 2].mbuf->rx_descriptor_fields1,
+			 pkt_mb3);
+		vst1q_u8((void *)&sw_ring[pos + 3].mbuf->rx_descriptor_fields1,
+			 pkt_mb4);

-		/* pkt 3,4 remove crc */
-		tmp = vsubq_u16(vreinterpretq_u16_u8(pkt_mb3), crc_adjust);
-		pkt_mb3 = vreinterpretq_u8_u16(tmp);
-		tmp = vsubq_u16(vreinterpretq_u16_u8(pkt_mb4), crc_adjust);
-		pkt_mb4 = vreinterpretq_u8_u16(tmp);
-
-		/* store the first 8 bytes of pkt 3,4 mbuf's rearm_data */
+		/* store the first 8 bytes of packets mbuf's rearm_data */
+		*(uint64_t *)&sw_ring[pos + 0].mbuf->rearm_data =
+			rxq->mbuf_initializer;
+		*(uint64_t *)&sw_ring[pos + 1].mbuf->rearm_data =
+			rxq->mbuf_initializer;
 		*(uint64_t *)&sw_ring[pos + 2].mbuf->rearm_data =
 			rxq->mbuf_initializer;
 		*(uint64_t *)&sw_ring[pos + 3].mbuf->rearm_data =
 			rxq->mbuf_initializer;

-		/* pkt 3,4 save to rx_pkts mbuf */
-		vst1q_u8((void *)&sw_ring[pos + 2].mbuf->rx_descriptor_fields1,
-			 pkt_mb3);
-		vst1q_u8((void *)&sw_ring[pos + 3].mbuf->rx_descriptor_fields1,
-			 pkt_mb4);
-
 		rte_prefetch_non_temporal(rxdp + HNS3_DEFAULT_DESCS_PER_LOOP);

 		parse_retcode = hns3_desc_parse_field(rxq, &sw_ring[pos],
--
2.25.1

---
  Diff of the applied patch vs upstream commit (please double-check if non-empty:
---
--- -	2023-10-22 22:17:35.115691100 +0800
+++ 0018-net-hns3-fix-order-in-NEON-Rx.patch	2023-10-22 22:17:34.156723700 +0800
@@ -1 +1 @@
-From 7dd439ed998c36c8d0204c436cc656af08cfa5fc Mon Sep 17 00:00:00 2001
+From 4e986000b127ffe1e8f127893201889e0be869af Mon Sep 17 00:00:00 2001
@@ -4,0 +5,3 @@
+Cc: Xueming Li <xuemingl at nvidia.com>
+
+[ upstream commit 7dd439ed998c36c8d0204c436cc656af08cfa5fc ]
@@ -10 +12,0 @@
-Cc: stable at dpdk.org
@@ -19 +21 @@
-index 564d831a48..0dc6b9f0a2 100644
+index a20a6b6acb..1048b9db87 100644


More information about the stable mailing list