[dpdk-stable] patch 'net/ice: fix generic build on FreeBSD' has been queued to stable release 20.11.4

Xueming Li xuemingl at nvidia.com
Wed Nov 10 07:30:49 CET 2021


Hi,

FYI, your patch has been queued to stable release 20.11.4

Note it hasn't been pushed to http://dpdk.org/browse/dpdk-stable yet.
It will be pushed if I get no objections before 11/12/21. So please
shout if anyone has objections.

Also note that after the patch there's a diff of the upstream commit vs the
patch applied to the branch. This will indicate if there was any rebasing
needed to apply to the stable branch. If there were code changes for rebasing
(ie: not only metadata diffs), please double check that the rebase was
correctly done.

Queued patches are on a temporary branch at:
https://github.com/steevenlee/dpdk

This queued commit can be viewed at:
https://github.com/steevenlee/dpdk/commit/84a1e5c1369a24a3728ff3ff5b8f2a13a2c96e5a

Thanks.

Xueming Li <xuemingl at nvidia.com>

---
>From 84a1e5c1369a24a3728ff3ff5b8f2a13a2c96e5a Mon Sep 17 00:00:00 2001
From: Leyi Rong <leyi.rong at intel.com>
Date: Tue, 19 Oct 2021 11:02:08 +0800
Subject: [PATCH] net/ice: fix generic build on FreeBSD
Cc: Xueming Li <xuemingl at nvidia.com>

[ upstream commit 0d989ff9ca515f80fbab2aad18cede501b759ff9 ]

The common header file for vectorization is included in multiple files,
and so must use macros for the current compilation unit, rather than the
compiler-capability flag set for the whole driver. With the current,
incorrect, macro, the AVX512 or AVX2 flags may be set when compiling up
SSE code, leading to compilation errors. Changing from "CC_AVX*_SUPPORT"
to the compiler-defined "__AVX*__" macros fixes this issue. In addition,
splitting AVX-specific code into the new ice_rxtx_common_avx.h header
file to avoid such bugs.

Bugzilla ID: 788
Fixes: a4e480de268e ("net/ice: optimize Tx by using AVX512")
Fixes: 20daa1c978b7 ("net/ice: fix crash in AVX512")

Signed-off-by: Leyi Rong <leyi.rong at intel.com>
Signed-off-by: Bruce Richardson <bruce.richardson at intel.com>
Reviewed-by: Ferruh Yigit <ferruh.yigit at intel.com>
---
 drivers/net/ice/ice_rxtx_common_avx.h | 213 ++++++++++++++++++++++++++
 drivers/net/ice/ice_rxtx_vec_avx2.c   |   1 +
 drivers/net/ice/ice_rxtx_vec_avx512.c |   1 +
 drivers/net/ice/ice_rxtx_vec_common.h | 201 +-----------------------
 4 files changed, 216 insertions(+), 200 deletions(-)
 create mode 100644 drivers/net/ice/ice_rxtx_common_avx.h

diff --git a/drivers/net/ice/ice_rxtx_common_avx.h b/drivers/net/ice/ice_rxtx_common_avx.h
new file mode 100644
index 0000000000..81e0db5dd3
--- /dev/null
+++ b/drivers/net/ice/ice_rxtx_common_avx.h
@@ -0,0 +1,213 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2019 Intel Corporation
+ */
+
+#ifndef _ICE_RXTX_COMMON_AVX_H_
+#define _ICE_RXTX_COMMON_AVX_H_
+
+#include "ice_rxtx.h"
+
+#ifndef __INTEL_COMPILER
+#pragma GCC diagnostic ignored "-Wcast-qual"
+#endif
+
+#ifdef __AVX2__
+static __rte_always_inline void
+ice_rxq_rearm_common(struct ice_rx_queue *rxq, __rte_unused bool avx512)
+{
+	int i;
+	uint16_t rx_id;
+	volatile union ice_rx_flex_desc *rxdp;
+	struct ice_rx_entry *rxep = &rxq->sw_ring[rxq->rxrearm_start];
+
+	rxdp = rxq->rx_ring + rxq->rxrearm_start;
+
+	/* Pull 'n' more MBUFs into the software ring */
+	if (rte_mempool_get_bulk(rxq->mp,
+				 (void *)rxep,
+				 ICE_RXQ_REARM_THRESH) < 0) {
+		if (rxq->rxrearm_nb + ICE_RXQ_REARM_THRESH >=
+		    rxq->nb_rx_desc) {
+			__m128i dma_addr0;
+
+			dma_addr0 = _mm_setzero_si128();
+			for (i = 0; i < ICE_DESCS_PER_LOOP; i++) {
+				rxep[i].mbuf = &rxq->fake_mbuf;
+				_mm_store_si128((__m128i *)&rxdp[i].read,
+						dma_addr0);
+			}
+		}
+		rte_eth_devices[rxq->port_id].data->rx_mbuf_alloc_failed +=
+			ICE_RXQ_REARM_THRESH;
+		return;
+	}
+
+#ifndef RTE_LIBRTE_ICE_16BYTE_RX_DESC
+	struct rte_mbuf *mb0, *mb1;
+	__m128i dma_addr0, dma_addr1;
+	__m128i hdr_room = _mm_set_epi64x(RTE_PKTMBUF_HEADROOM,
+			RTE_PKTMBUF_HEADROOM);
+	/* Initialize the mbufs in vector, process 2 mbufs in one loop */
+	for (i = 0; i < ICE_RXQ_REARM_THRESH; i += 2, rxep += 2) {
+		__m128i vaddr0, vaddr1;
+
+		mb0 = rxep[0].mbuf;
+		mb1 = rxep[1].mbuf;
+
+		/* load buf_addr(lo 64bit) and buf_iova(hi 64bit) */
+		RTE_BUILD_BUG_ON(offsetof(struct rte_mbuf, buf_iova) !=
+				offsetof(struct rte_mbuf, buf_addr) + 8);
+		vaddr0 = _mm_loadu_si128((__m128i *)&mb0->buf_addr);
+		vaddr1 = _mm_loadu_si128((__m128i *)&mb1->buf_addr);
+
+		/* convert pa to dma_addr hdr/data */
+		dma_addr0 = _mm_unpackhi_epi64(vaddr0, vaddr0);
+		dma_addr1 = _mm_unpackhi_epi64(vaddr1, vaddr1);
+
+		/* add headroom to pa values */
+		dma_addr0 = _mm_add_epi64(dma_addr0, hdr_room);
+		dma_addr1 = _mm_add_epi64(dma_addr1, hdr_room);
+
+		/* flush desc with pa dma_addr */
+		_mm_store_si128((__m128i *)&rxdp++->read, dma_addr0);
+		_mm_store_si128((__m128i *)&rxdp++->read, dma_addr1);
+	}
+#else
+#ifdef __AVX512VL__
+	if (avx512) {
+		struct rte_mbuf *mb0, *mb1, *mb2, *mb3;
+		struct rte_mbuf *mb4, *mb5, *mb6, *mb7;
+		__m512i dma_addr0_3, dma_addr4_7;
+		__m512i hdr_room = _mm512_set1_epi64(RTE_PKTMBUF_HEADROOM);
+		/* Initialize the mbufs in vector, process 8 mbufs in one loop */
+		for (i = 0; i < ICE_RXQ_REARM_THRESH;
+				i += 8, rxep += 8, rxdp += 8) {
+			__m128i vaddr0, vaddr1, vaddr2, vaddr3;
+			__m128i vaddr4, vaddr5, vaddr6, vaddr7;
+			__m256i vaddr0_1, vaddr2_3;
+			__m256i vaddr4_5, vaddr6_7;
+			__m512i vaddr0_3, vaddr4_7;
+
+			mb0 = rxep[0].mbuf;
+			mb1 = rxep[1].mbuf;
+			mb2 = rxep[2].mbuf;
+			mb3 = rxep[3].mbuf;
+			mb4 = rxep[4].mbuf;
+			mb5 = rxep[5].mbuf;
+			mb6 = rxep[6].mbuf;
+			mb7 = rxep[7].mbuf;
+
+			/* load buf_addr(lo 64bit) and buf_iova(hi 64bit) */
+			RTE_BUILD_BUG_ON(offsetof(struct rte_mbuf, buf_iova) !=
+					offsetof(struct rte_mbuf, buf_addr) + 8);
+			vaddr0 = _mm_loadu_si128((__m128i *)&mb0->buf_addr);
+			vaddr1 = _mm_loadu_si128((__m128i *)&mb1->buf_addr);
+			vaddr2 = _mm_loadu_si128((__m128i *)&mb2->buf_addr);
+			vaddr3 = _mm_loadu_si128((__m128i *)&mb3->buf_addr);
+			vaddr4 = _mm_loadu_si128((__m128i *)&mb4->buf_addr);
+			vaddr5 = _mm_loadu_si128((__m128i *)&mb5->buf_addr);
+			vaddr6 = _mm_loadu_si128((__m128i *)&mb6->buf_addr);
+			vaddr7 = _mm_loadu_si128((__m128i *)&mb7->buf_addr);
+
+			/**
+			 * merge 0 & 1, by casting 0 to 256-bit and inserting 1
+			 * into the high lanes. Similarly for 2 & 3, and so on.
+			 */
+			vaddr0_1 =
+				_mm256_inserti128_si256(_mm256_castsi128_si256(vaddr0),
+							vaddr1, 1);
+			vaddr2_3 =
+				_mm256_inserti128_si256(_mm256_castsi128_si256(vaddr2),
+							vaddr3, 1);
+			vaddr4_5 =
+				_mm256_inserti128_si256(_mm256_castsi128_si256(vaddr4),
+							vaddr5, 1);
+			vaddr6_7 =
+				_mm256_inserti128_si256(_mm256_castsi128_si256(vaddr6),
+							vaddr7, 1);
+			vaddr0_3 =
+				_mm512_inserti64x4(_mm512_castsi256_si512(vaddr0_1),
+						   vaddr2_3, 1);
+			vaddr4_7 =
+				_mm512_inserti64x4(_mm512_castsi256_si512(vaddr4_5),
+						   vaddr6_7, 1);
+
+			/* convert pa to dma_addr hdr/data */
+			dma_addr0_3 = _mm512_unpackhi_epi64(vaddr0_3, vaddr0_3);
+			dma_addr4_7 = _mm512_unpackhi_epi64(vaddr4_7, vaddr4_7);
+
+			/* add headroom to pa values */
+			dma_addr0_3 = _mm512_add_epi64(dma_addr0_3, hdr_room);
+			dma_addr4_7 = _mm512_add_epi64(dma_addr4_7, hdr_room);
+
+			/* flush desc with pa dma_addr */
+			_mm512_store_si512((__m512i *)&rxdp->read, dma_addr0_3);
+			_mm512_store_si512((__m512i *)&(rxdp + 4)->read, dma_addr4_7);
+		}
+	} else
+#endif /* __AVX512VL__ */
+	{
+		struct rte_mbuf *mb0, *mb1, *mb2, *mb3;
+		__m256i dma_addr0_1, dma_addr2_3;
+		__m256i hdr_room = _mm256_set1_epi64x(RTE_PKTMBUF_HEADROOM);
+		/* Initialize the mbufs in vector, process 4 mbufs in one loop */
+		for (i = 0; i < ICE_RXQ_REARM_THRESH;
+				i += 4, rxep += 4, rxdp += 4) {
+			__m128i vaddr0, vaddr1, vaddr2, vaddr3;
+			__m256i vaddr0_1, vaddr2_3;
+
+			mb0 = rxep[0].mbuf;
+			mb1 = rxep[1].mbuf;
+			mb2 = rxep[2].mbuf;
+			mb3 = rxep[3].mbuf;
+
+			/* load buf_addr(lo 64bit) and buf_iova(hi 64bit) */
+			RTE_BUILD_BUG_ON(offsetof(struct rte_mbuf, buf_iova) !=
+					offsetof(struct rte_mbuf, buf_addr) + 8);
+			vaddr0 = _mm_loadu_si128((__m128i *)&mb0->buf_addr);
+			vaddr1 = _mm_loadu_si128((__m128i *)&mb1->buf_addr);
+			vaddr2 = _mm_loadu_si128((__m128i *)&mb2->buf_addr);
+			vaddr3 = _mm_loadu_si128((__m128i *)&mb3->buf_addr);
+
+			/**
+			 * merge 0 & 1, by casting 0 to 256-bit and inserting 1
+			 * into the high lanes. Similarly for 2 & 3
+			 */
+			vaddr0_1 =
+				_mm256_inserti128_si256(_mm256_castsi128_si256(vaddr0),
+							vaddr1, 1);
+			vaddr2_3 =
+				_mm256_inserti128_si256(_mm256_castsi128_si256(vaddr2),
+							vaddr3, 1);
+
+			/* convert pa to dma_addr hdr/data */
+			dma_addr0_1 = _mm256_unpackhi_epi64(vaddr0_1, vaddr0_1);
+			dma_addr2_3 = _mm256_unpackhi_epi64(vaddr2_3, vaddr2_3);
+
+			/* add headroom to pa values */
+			dma_addr0_1 = _mm256_add_epi64(dma_addr0_1, hdr_room);
+			dma_addr2_3 = _mm256_add_epi64(dma_addr2_3, hdr_room);
+
+			/* flush desc with pa dma_addr */
+			_mm256_store_si256((__m256i *)&rxdp->read, dma_addr0_1);
+			_mm256_store_si256((__m256i *)&(rxdp + 2)->read, dma_addr2_3);
+		}
+	}
+
+#endif
+
+	rxq->rxrearm_start += ICE_RXQ_REARM_THRESH;
+	if (rxq->rxrearm_start >= rxq->nb_rx_desc)
+		rxq->rxrearm_start = 0;
+
+	rxq->rxrearm_nb -= ICE_RXQ_REARM_THRESH;
+
+	rx_id = (uint16_t)((rxq->rxrearm_start == 0) ?
+			     (rxq->nb_rx_desc - 1) : (rxq->rxrearm_start - 1));
+
+	/* Update the tail pointer on the NIC */
+	ICE_PCI_REG_WC_WRITE(rxq->qrx_tail, rx_id);
+}
+#endif /* __AVX2__ */
+
+#endif /* _ICE_RXTX_COMMON_AVX_H_ */
diff --git a/drivers/net/ice/ice_rxtx_vec_avx2.c b/drivers/net/ice/ice_rxtx_vec_avx2.c
index ac2719fa15..581d3d348f 100644
--- a/drivers/net/ice/ice_rxtx_vec_avx2.c
+++ b/drivers/net/ice/ice_rxtx_vec_avx2.c
@@ -3,6 +3,7 @@
  */
 
 #include "ice_rxtx_vec_common.h"
+#include "ice_rxtx_common_avx.h"
 
 #include <x86intrin.h>
 
diff --git a/drivers/net/ice/ice_rxtx_vec_avx512.c b/drivers/net/ice/ice_rxtx_vec_avx512.c
index 719b7b8b38..2c0881a01e 100644
--- a/drivers/net/ice/ice_rxtx_vec_avx512.c
+++ b/drivers/net/ice/ice_rxtx_vec_avx512.c
@@ -3,6 +3,7 @@
  */
 
 #include "ice_rxtx_vec_common.h"
+#include "ice_rxtx_common_avx.h"
 
 #include <x86intrin.h>
 
diff --git a/drivers/net/ice/ice_rxtx_vec_common.h b/drivers/net/ice/ice_rxtx_vec_common.h
index 1d138aa899..bd2450ad5f 100644
--- a/drivers/net/ice/ice_rxtx_vec_common.h
+++ b/drivers/net/ice/ice_rxtx_vec_common.h
@@ -194,7 +194,7 @@ _ice_tx_queue_release_mbufs_vec(struct ice_tx_queue *txq)
 	 */
 	i = txq->tx_next_dd - txq->tx_rs_thresh + 1;
 
-#ifdef CC_AVX512_SUPPORT
+#ifdef __AVX512VL__
 	struct rte_eth_dev *dev = &rte_eth_devices[txq->vsi->adapter->pf.dev_data->port_id];
 
 	if (dev->tx_pkt_burst == ice_xmit_pkts_vec_avx512) {
@@ -322,203 +322,4 @@ ice_tx_vec_dev_check_default(struct rte_eth_dev *dev)
 	return 0;
 }
 
-#ifdef CC_AVX2_SUPPORT
-static __rte_always_inline void
-ice_rxq_rearm_common(struct ice_rx_queue *rxq, __rte_unused bool avx512)
-{
-	int i;
-	uint16_t rx_id;
-	volatile union ice_rx_flex_desc *rxdp;
-	struct ice_rx_entry *rxep = &rxq->sw_ring[rxq->rxrearm_start];
-
-	rxdp = rxq->rx_ring + rxq->rxrearm_start;
-
-	/* Pull 'n' more MBUFs into the software ring */
-	if (rte_mempool_get_bulk(rxq->mp,
-				 (void *)rxep,
-				 ICE_RXQ_REARM_THRESH) < 0) {
-		if (rxq->rxrearm_nb + ICE_RXQ_REARM_THRESH >=
-		    rxq->nb_rx_desc) {
-			__m128i dma_addr0;
-
-			dma_addr0 = _mm_setzero_si128();
-			for (i = 0; i < ICE_DESCS_PER_LOOP; i++) {
-				rxep[i].mbuf = &rxq->fake_mbuf;
-				_mm_store_si128((__m128i *)&rxdp[i].read,
-						dma_addr0);
-			}
-		}
-		rte_eth_devices[rxq->port_id].data->rx_mbuf_alloc_failed +=
-			ICE_RXQ_REARM_THRESH;
-		return;
-	}
-
-#ifndef RTE_LIBRTE_ICE_16BYTE_RX_DESC
-	struct rte_mbuf *mb0, *mb1;
-	__m128i dma_addr0, dma_addr1;
-	__m128i hdr_room = _mm_set_epi64x(RTE_PKTMBUF_HEADROOM,
-			RTE_PKTMBUF_HEADROOM);
-	/* Initialize the mbufs in vector, process 2 mbufs in one loop */
-	for (i = 0; i < ICE_RXQ_REARM_THRESH; i += 2, rxep += 2) {
-		__m128i vaddr0, vaddr1;
-
-		mb0 = rxep[0].mbuf;
-		mb1 = rxep[1].mbuf;
-
-		/* load buf_addr(lo 64bit) and buf_iova(hi 64bit) */
-		RTE_BUILD_BUG_ON(offsetof(struct rte_mbuf, buf_iova) !=
-				offsetof(struct rte_mbuf, buf_addr) + 8);
-		vaddr0 = _mm_loadu_si128((__m128i *)&mb0->buf_addr);
-		vaddr1 = _mm_loadu_si128((__m128i *)&mb1->buf_addr);
-
-		/* convert pa to dma_addr hdr/data */
-		dma_addr0 = _mm_unpackhi_epi64(vaddr0, vaddr0);
-		dma_addr1 = _mm_unpackhi_epi64(vaddr1, vaddr1);
-
-		/* add headroom to pa values */
-		dma_addr0 = _mm_add_epi64(dma_addr0, hdr_room);
-		dma_addr1 = _mm_add_epi64(dma_addr1, hdr_room);
-
-		/* flush desc with pa dma_addr */
-		_mm_store_si128((__m128i *)&rxdp++->read, dma_addr0);
-		_mm_store_si128((__m128i *)&rxdp++->read, dma_addr1);
-	}
-#else
-#ifdef CC_AVX512_SUPPORT
-	if (avx512) {
-		struct rte_mbuf *mb0, *mb1, *mb2, *mb3;
-		struct rte_mbuf *mb4, *mb5, *mb6, *mb7;
-		__m512i dma_addr0_3, dma_addr4_7;
-		__m512i hdr_room = _mm512_set1_epi64(RTE_PKTMBUF_HEADROOM);
-		/* Initialize the mbufs in vector, process 8 mbufs in one loop */
-		for (i = 0; i < ICE_RXQ_REARM_THRESH;
-				i += 8, rxep += 8, rxdp += 8) {
-			__m128i vaddr0, vaddr1, vaddr2, vaddr3;
-			__m128i vaddr4, vaddr5, vaddr6, vaddr7;
-			__m256i vaddr0_1, vaddr2_3;
-			__m256i vaddr4_5, vaddr6_7;
-			__m512i vaddr0_3, vaddr4_7;
-
-			mb0 = rxep[0].mbuf;
-			mb1 = rxep[1].mbuf;
-			mb2 = rxep[2].mbuf;
-			mb3 = rxep[3].mbuf;
-			mb4 = rxep[4].mbuf;
-			mb5 = rxep[5].mbuf;
-			mb6 = rxep[6].mbuf;
-			mb7 = rxep[7].mbuf;
-
-			/* load buf_addr(lo 64bit) and buf_iova(hi 64bit) */
-			RTE_BUILD_BUG_ON(offsetof(struct rte_mbuf, buf_iova) !=
-					offsetof(struct rte_mbuf, buf_addr) + 8);
-			vaddr0 = _mm_loadu_si128((__m128i *)&mb0->buf_addr);
-			vaddr1 = _mm_loadu_si128((__m128i *)&mb1->buf_addr);
-			vaddr2 = _mm_loadu_si128((__m128i *)&mb2->buf_addr);
-			vaddr3 = _mm_loadu_si128((__m128i *)&mb3->buf_addr);
-			vaddr4 = _mm_loadu_si128((__m128i *)&mb4->buf_addr);
-			vaddr5 = _mm_loadu_si128((__m128i *)&mb5->buf_addr);
-			vaddr6 = _mm_loadu_si128((__m128i *)&mb6->buf_addr);
-			vaddr7 = _mm_loadu_si128((__m128i *)&mb7->buf_addr);
-
-			/**
-			 * merge 0 & 1, by casting 0 to 256-bit and inserting 1
-			 * into the high lanes. Similarly for 2 & 3, and so on.
-			 */
-			vaddr0_1 =
-				_mm256_inserti128_si256(_mm256_castsi128_si256(vaddr0),
-							vaddr1, 1);
-			vaddr2_3 =
-				_mm256_inserti128_si256(_mm256_castsi128_si256(vaddr2),
-							vaddr3, 1);
-			vaddr4_5 =
-				_mm256_inserti128_si256(_mm256_castsi128_si256(vaddr4),
-							vaddr5, 1);
-			vaddr6_7 =
-				_mm256_inserti128_si256(_mm256_castsi128_si256(vaddr6),
-							vaddr7, 1);
-			vaddr0_3 =
-				_mm512_inserti64x4(_mm512_castsi256_si512(vaddr0_1),
-							vaddr2_3, 1);
-			vaddr4_7 =
-				_mm512_inserti64x4(_mm512_castsi256_si512(vaddr4_5),
-							vaddr6_7, 1);
-
-			/* convert pa to dma_addr hdr/data */
-			dma_addr0_3 = _mm512_unpackhi_epi64(vaddr0_3, vaddr0_3);
-			dma_addr4_7 = _mm512_unpackhi_epi64(vaddr4_7, vaddr4_7);
-
-			/* add headroom to pa values */
-			dma_addr0_3 = _mm512_add_epi64(dma_addr0_3, hdr_room);
-			dma_addr4_7 = _mm512_add_epi64(dma_addr4_7, hdr_room);
-
-			/* flush desc with pa dma_addr */
-			_mm512_store_si512((__m512i *)&rxdp->read, dma_addr0_3);
-			_mm512_store_si512((__m512i *)&(rxdp + 4)->read, dma_addr4_7);
-		}
-	} else
-#endif
-	{
-		struct rte_mbuf *mb0, *mb1, *mb2, *mb3;
-		__m256i dma_addr0_1, dma_addr2_3;
-		__m256i hdr_room = _mm256_set1_epi64x(RTE_PKTMBUF_HEADROOM);
-		/* Initialize the mbufs in vector, process 4 mbufs in one loop */
-		for (i = 0; i < ICE_RXQ_REARM_THRESH;
-				i += 4, rxep += 4, rxdp += 4) {
-			__m128i vaddr0, vaddr1, vaddr2, vaddr3;
-			__m256i vaddr0_1, vaddr2_3;
-
-			mb0 = rxep[0].mbuf;
-			mb1 = rxep[1].mbuf;
-			mb2 = rxep[2].mbuf;
-			mb3 = rxep[3].mbuf;
-
-			/* load buf_addr(lo 64bit) and buf_iova(hi 64bit) */
-			RTE_BUILD_BUG_ON(offsetof(struct rte_mbuf, buf_iova) !=
-					offsetof(struct rte_mbuf, buf_addr) + 8);
-			vaddr0 = _mm_loadu_si128((__m128i *)&mb0->buf_addr);
-			vaddr1 = _mm_loadu_si128((__m128i *)&mb1->buf_addr);
-			vaddr2 = _mm_loadu_si128((__m128i *)&mb2->buf_addr);
-			vaddr3 = _mm_loadu_si128((__m128i *)&mb3->buf_addr);
-
-			/**
-			 * merge 0 & 1, by casting 0 to 256-bit and inserting 1
-			 * into the high lanes. Similarly for 2 & 3
-			 */
-			vaddr0_1 =
-				_mm256_inserti128_si256(_mm256_castsi128_si256(vaddr0),
-							vaddr1, 1);
-			vaddr2_3 =
-				_mm256_inserti128_si256(_mm256_castsi128_si256(vaddr2),
-							vaddr3, 1);
-
-			/* convert pa to dma_addr hdr/data */
-			dma_addr0_1 = _mm256_unpackhi_epi64(vaddr0_1, vaddr0_1);
-			dma_addr2_3 = _mm256_unpackhi_epi64(vaddr2_3, vaddr2_3);
-
-			/* add headroom to pa values */
-			dma_addr0_1 = _mm256_add_epi64(dma_addr0_1, hdr_room);
-			dma_addr2_3 = _mm256_add_epi64(dma_addr2_3, hdr_room);
-
-			/* flush desc with pa dma_addr */
-			_mm256_store_si256((__m256i *)&rxdp->read, dma_addr0_1);
-			_mm256_store_si256((__m256i *)&(rxdp + 2)->read, dma_addr2_3);
-		}
-	}
-
-#endif
-
-	rxq->rxrearm_start += ICE_RXQ_REARM_THRESH;
-	if (rxq->rxrearm_start >= rxq->nb_rx_desc)
-		rxq->rxrearm_start = 0;
-
-	rxq->rxrearm_nb -= ICE_RXQ_REARM_THRESH;
-
-	rx_id = (uint16_t)((rxq->rxrearm_start == 0) ?
-			     (rxq->nb_rx_desc - 1) : (rxq->rxrearm_start - 1));
-
-	/* Update the tail pointer on the NIC */
-	ICE_PCI_REG_WC_WRITE(rxq->qrx_tail, rx_id);
-}
-#endif
-
 #endif
-- 
2.33.0

---
  Diff of the applied patch vs upstream commit (please double-check if non-empty:
---
--- -	2021-11-10 14:17:09.271137982 +0800
+++ 0165-net-ice-fix-generic-build-on-FreeBSD.patch	2021-11-10 14:17:01.977411885 +0800
@@ -1 +1 @@
-From 0d989ff9ca515f80fbab2aad18cede501b759ff9 Mon Sep 17 00:00:00 2001
+From 84a1e5c1369a24a3728ff3ff5b8f2a13a2c96e5a Mon Sep 17 00:00:00 2001
@@ -4,0 +5,3 @@
+Cc: Xueming Li <xuemingl at nvidia.com>
+
+[ upstream commit 0d989ff9ca515f80fbab2aad18cede501b759ff9 ]
@@ -18 +20,0 @@
-Cc: stable at dpdk.org
@@ -251 +253 @@
-index 9725ac0180..490693bff2 100644
+index ac2719fa15..581d3d348f 100644
@@ -260 +262 @@
- #include <rte_vect.h>
+ #include <x86intrin.h>
@@ -263 +265 @@
-index 5bba9887d2..7efe7b50a2 100644
+index 719b7b8b38..2c0881a01e 100644
@@ -272 +274 @@
- #include <rte_vect.h>
+ #include <x86intrin.h>
@@ -275 +277 @@
-index 5b5250565e..f0f9926585 100644
+index 1d138aa899..bd2450ad5f 100644
@@ -286,3 +288,3 @@
- 	if (dev->tx_pkt_burst == ice_xmit_pkts_vec_avx512 ||
-@@ -355,205 +355,6 @@ ice_tx_vec_dev_check_default(struct rte_eth_dev *dev)
- 	return result;
+ 	if (dev->tx_pkt_burst == ice_xmit_pkts_vec_avx512) {
+@@ -322,203 +322,4 @@ ice_tx_vec_dev_check_default(struct rte_eth_dev *dev)
+ 	return 0;
@@ -490,3 +492 @@
- static inline void
- ice_txd_enable_offload(struct rte_mbuf *tx_pkt,
- 		       uint64_t *txd_hi)
+ #endif


More information about the stable mailing list