[dpdk-dev] [PATCH v4 7/7] librte_table: performance improvement on rte_prefetch offset

roy.fan.zhang at intel.com roy.fan.zhang at intel.com
Wed Oct 28 18:11:22 CET 2015


From: Fan Zhang <roy.fan.zhang at intel.com>

This patch modifies rte_prefetch offsets to improve hash/lru
table lookup performance.

Signed-off-by: Fan Zhang <roy.fan.zhang at intel.com>
---
 lib/librte_table/rte_table_hash_ext.c   | 10 ++++---
 lib/librte_table/rte_table_hash_key16.c | 51 +++++++++++++++++----------------
 lib/librte_table/rte_table_hash_key32.c | 35 +++++++++++-----------
 lib/librte_table/rte_table_hash_key8.c  | 51 +++++++++++++++++----------------
 lib/librte_table/rte_table_hash_lru.c   | 10 ++++---
 5 files changed, 85 insertions(+), 72 deletions(-)

diff --git a/lib/librte_table/rte_table_hash_ext.c b/lib/librte_table/rte_table_hash_ext.c
index 1fa15c8..854e1a5 100644
--- a/lib/librte_table/rte_table_hash_ext.c
+++ b/lib/librte_table/rte_table_hash_ext.c
@@ -648,6 +648,7 @@ static int rte_table_hash_ext_lookup_unoptimized(
 {									\
 	uint64_t pkt00_mask, pkt01_mask;				\
 	struct rte_mbuf *mbuf00, *mbuf01;				\
+	uint32_t key_offset = t->key_offset;			\
 									\
 	pkt00_index = __builtin_ctzll(pkts_mask);			\
 	pkt00_mask = 1LLU << pkt00_index;				\
@@ -659,8 +660,8 @@ static int rte_table_hash_ext_lookup_unoptimized(
 	pkts_mask &= ~pkt01_mask;					\
 	mbuf01 = pkts[pkt01_index];					\
 									\
-	rte_prefetch0(RTE_MBUF_METADATA_UINT8_PTR(mbuf00, 0));		\
-	rte_prefetch0(RTE_MBUF_METADATA_UINT8_PTR(mbuf01, 0));		\
+	rte_prefetch0(RTE_MBUF_METADATA_UINT8_PTR(mbuf00, key_offset));\
+	rte_prefetch0(RTE_MBUF_METADATA_UINT8_PTR(mbuf01, key_offset));\
 }
 
 #define lookup2_stage0_with_odd_support(t, g, pkts, pkts_mask, pkt00_index, \
@@ -668,6 +669,7 @@ static int rte_table_hash_ext_lookup_unoptimized(
 {									\
 	uint64_t pkt00_mask, pkt01_mask;				\
 	struct rte_mbuf *mbuf00, *mbuf01;				\
+	uint32_t key_offset = t->key_offset;			\
 									\
 	pkt00_index = __builtin_ctzll(pkts_mask);			\
 	pkt00_mask = 1LLU << pkt00_index;				\
@@ -681,8 +683,8 @@ static int rte_table_hash_ext_lookup_unoptimized(
 	pkts_mask &= ~pkt01_mask;					\
 	mbuf01 = pkts[pkt01_index];					\
 									\
-	rte_prefetch0(RTE_MBUF_METADATA_UINT8_PTR(mbuf00, 0));		\
-	rte_prefetch0(RTE_MBUF_METADATA_UINT8_PTR(mbuf01, 0));		\
+	rte_prefetch0(RTE_MBUF_METADATA_UINT8_PTR(mbuf00, key_offset));\
+	rte_prefetch0(RTE_MBUF_METADATA_UINT8_PTR(mbuf01, key_offset));\
 }
 
 #define lookup2_stage1(t, g, pkts, pkt10_index, pkt11_index)		\
diff --git a/lib/librte_table/rte_table_hash_key16.c b/lib/librte_table/rte_table_hash_key16.c
index 427b534..21130b9 100644
--- a/lib/librte_table/rte_table_hash_key16.c
+++ b/lib/librte_table/rte_table_hash_key16.c
@@ -595,16 +595,17 @@ rte_table_hash_entry_delete_key16_ext(
 		pos = 3;					\
 }
 
-#define lookup1_stage0(pkt0_index, mbuf0, pkts, pkts_mask)	\
+#define lookup1_stage0(pkt0_index, mbuf0, pkts, pkts_mask, f)	\
 {								\
 	uint64_t pkt_mask;					\
+	uint32_t key_offset = f->key_offset;\
 								\
 	pkt0_index = __builtin_ctzll(pkts_mask);		\
 	pkt_mask = 1LLU << pkt0_index;				\
 	pkts_mask &= ~pkt_mask;					\
 								\
 	mbuf0 = pkts[pkt0_index];				\
-	rte_prefetch0(RTE_MBUF_METADATA_UINT8_PTR(mbuf0, 0));	\
+	rte_prefetch0(RTE_MBUF_METADATA_UINT8_PTR(mbuf0, key_offset));\
 }
 
 #define lookup1_stage1(mbuf1, bucket1, f)			\
@@ -729,36 +730,38 @@ rte_table_hash_entry_delete_key16_ext(
 }
 
 #define lookup2_stage0(pkt00_index, pkt01_index, mbuf00, mbuf01,\
-		pkts, pkts_mask)				\
+		pkts, pkts_mask, f)				\
 {								\
 	uint64_t pkt00_mask, pkt01_mask;			\
+	uint32_t key_offset = f->key_offset;		\
 								\
 	pkt00_index = __builtin_ctzll(pkts_mask);		\
 	pkt00_mask = 1LLU << pkt00_index;			\
 	pkts_mask &= ~pkt00_mask;				\
 								\
 	mbuf00 = pkts[pkt00_index];				\
-	rte_prefetch0(RTE_MBUF_METADATA_UINT8_PTR(mbuf00, 0));	\
+	rte_prefetch0(RTE_MBUF_METADATA_UINT8_PTR(mbuf00, key_offset));\
 								\
 	pkt01_index = __builtin_ctzll(pkts_mask);		\
 	pkt01_mask = 1LLU << pkt01_index;			\
 	pkts_mask &= ~pkt01_mask;				\
 								\
 	mbuf01 = pkts[pkt01_index];				\
-	rte_prefetch0(RTE_MBUF_METADATA_UINT8_PTR(mbuf01, 0));	\
+	rte_prefetch0(RTE_MBUF_METADATA_UINT8_PTR(mbuf01, key_offset));\
 }
 
 #define lookup2_stage0_with_odd_support(pkt00_index, pkt01_index,\
-		mbuf00, mbuf01, pkts, pkts_mask)		\
+		mbuf00, mbuf01, pkts, pkts_mask, f)		\
 {								\
 	uint64_t pkt00_mask, pkt01_mask;			\
+	uint32_t key_offset = f->key_offset;		\
 								\
 	pkt00_index = __builtin_ctzll(pkts_mask);		\
 	pkt00_mask = 1LLU << pkt00_index;			\
 	pkts_mask &= ~pkt00_mask;				\
 								\
 	mbuf00 = pkts[pkt00_index];				\
-	rte_prefetch0(RTE_MBUF_METADATA_UINT8_PTR(mbuf00, 0));	\
+	rte_prefetch0(RTE_MBUF_METADATA_UINT8_PTR(mbuf00, key_offset));	\
 								\
 	pkt01_index = __builtin_ctzll(pkts_mask);		\
 	if (pkts_mask == 0)					\
@@ -767,7 +770,7 @@ rte_table_hash_entry_delete_key16_ext(
 	pkts_mask &= ~pkt01_mask;				\
 								\
 	mbuf01 = pkts[pkt01_index];				\
-	rte_prefetch0(RTE_MBUF_METADATA_UINT8_PTR(mbuf01, 0));	\
+	rte_prefetch0(RTE_MBUF_METADATA_UINT8_PTR(mbuf01, key_offset));	\
 }
 
 #define lookup2_stage1(mbuf10, mbuf11, bucket10, bucket11, f)	\
@@ -922,7 +925,7 @@ rte_table_hash_lookup_key16_lru(
 			struct rte_mbuf *mbuf;
 			uint32_t pkt_index;
 
-			lookup1_stage0(pkt_index, mbuf, pkts, pkts_mask);
+			lookup1_stage0(pkt_index, mbuf, pkts, pkts_mask, f);
 			lookup1_stage1(mbuf, bucket, f);
 			lookup1_stage2_lru(pkt_index, mbuf, bucket,
 				pkts_mask_out, entries, f);
@@ -940,7 +943,7 @@ rte_table_hash_lookup_key16_lru(
 	 */
 	/* Pipeline stage 0 */
 	lookup2_stage0(pkt00_index, pkt01_index, mbuf00, mbuf01, pkts,
-		pkts_mask);
+		pkts_mask, f);
 
 	/* Pipeline feed */
 	mbuf10 = mbuf00;
@@ -950,7 +953,7 @@ rte_table_hash_lookup_key16_lru(
 
 	/* Pipeline stage 0 */
 	lookup2_stage0(pkt00_index, pkt01_index, mbuf00, mbuf01, pkts,
-		pkts_mask);
+		pkts_mask, f);
 
 	/* Pipeline stage 1 */
 	lookup2_stage1(mbuf10, mbuf11, bucket10, bucket11, f);
@@ -974,7 +977,7 @@ rte_table_hash_lookup_key16_lru(
 
 		/* Pipeline stage 0 */
 		lookup2_stage0_with_odd_support(pkt00_index, pkt01_index,
-			mbuf00, mbuf01, pkts, pkts_mask);
+			mbuf00, mbuf01, pkts, pkts_mask, f);
 
 		/* Pipeline stage 1 */
 		lookup2_stage1(mbuf10, mbuf11, bucket10, bucket11, f);
@@ -1051,7 +1054,7 @@ rte_table_hash_lookup_key16_lru_dosig(
 			struct rte_mbuf *mbuf;
 			uint32_t pkt_index;
 
-			lookup1_stage0(pkt_index, mbuf, pkts, pkts_mask);
+			lookup1_stage0(pkt_index, mbuf, pkts, pkts_mask, f);
 			lookup1_stage1_dosig(mbuf, bucket, f);
 			lookup1_stage2_lru(pkt_index, mbuf, bucket,
 				pkts_mask_out, entries, f);
@@ -1069,7 +1072,7 @@ rte_table_hash_lookup_key16_lru_dosig(
 	 */
 	/* Pipeline stage 0 */
 	lookup2_stage0(pkt00_index, pkt01_index, mbuf00, mbuf01, pkts,
-		pkts_mask);
+		pkts_mask, f);
 
 	/* Pipeline feed */
 	mbuf10 = mbuf00;
@@ -1079,7 +1082,7 @@ rte_table_hash_lookup_key16_lru_dosig(
 
 	/* Pipeline stage 0 */
 	lookup2_stage0(pkt00_index, pkt01_index, mbuf00, mbuf01, pkts,
-		pkts_mask);
+		pkts_mask, f);
 
 	/* Pipeline stage 1 */
 	lookup2_stage1_dosig(mbuf10, mbuf11, bucket10, bucket11, f);
@@ -1103,7 +1106,7 @@ rte_table_hash_lookup_key16_lru_dosig(
 
 		/* Pipeline stage 0 */
 		lookup2_stage0_with_odd_support(pkt00_index, pkt01_index,
-			mbuf00, mbuf01, pkts, pkts_mask);
+			mbuf00, mbuf01, pkts, pkts_mask, f);
 
 		/* Pipeline stage 1 */
 		lookup2_stage1_dosig(mbuf10, mbuf11, bucket10, bucket11, f);
@@ -1181,7 +1184,7 @@ rte_table_hash_lookup_key16_ext(
 			struct rte_mbuf *mbuf;
 			uint32_t pkt_index;
 
-			lookup1_stage0(pkt_index, mbuf, pkts, pkts_mask);
+			lookup1_stage0(pkt_index, mbuf, pkts, pkts_mask, f);
 			lookup1_stage1(mbuf, bucket, f);
 			lookup1_stage2_ext(pkt_index, mbuf, bucket,
 				pkts_mask_out, entries, buckets_mask,
@@ -1197,7 +1200,7 @@ rte_table_hash_lookup_key16_ext(
 	 */
 	/* Pipeline stage 0 */
 	lookup2_stage0(pkt00_index, pkt01_index, mbuf00, mbuf01, pkts,
-		pkts_mask);
+		pkts_mask, f);
 
 	/* Pipeline feed */
 	mbuf10 = mbuf00;
@@ -1207,7 +1210,7 @@ rte_table_hash_lookup_key16_ext(
 
 	/* Pipeline stage 0 */
 	lookup2_stage0(pkt00_index, pkt01_index, mbuf00, mbuf01, pkts,
-		pkts_mask);
+		pkts_mask, f);
 
 	/* Pipeline stage 1 */
 	lookup2_stage1(mbuf10, mbuf11, bucket10, bucket11, f);
@@ -1231,7 +1234,7 @@ rte_table_hash_lookup_key16_ext(
 
 		/* Pipeline stage 0 */
 		lookup2_stage0_with_odd_support(pkt00_index, pkt01_index,
-			mbuf00, mbuf01, pkts, pkts_mask);
+			mbuf00, mbuf01, pkts, pkts_mask, f);
 
 		/* Pipeline stage 1 */
 		lookup2_stage1(mbuf10, mbuf11, bucket10, bucket11, f);
@@ -1333,7 +1336,7 @@ rte_table_hash_lookup_key16_ext_dosig(
 			struct rte_mbuf *mbuf;
 			uint32_t pkt_index;
 
-			lookup1_stage0(pkt_index, mbuf, pkts, pkts_mask);
+			lookup1_stage0(pkt_index, mbuf, pkts, pkts_mask, f);
 			lookup1_stage1_dosig(mbuf, bucket, f);
 			lookup1_stage2_ext(pkt_index, mbuf, bucket,
 				pkts_mask_out, entries, buckets_mask,
@@ -1349,7 +1352,7 @@ rte_table_hash_lookup_key16_ext_dosig(
 	 */
 	/* Pipeline stage 0 */
 	lookup2_stage0(pkt00_index, pkt01_index, mbuf00, mbuf01, pkts,
-		pkts_mask);
+		pkts_mask, f);
 
 	/* Pipeline feed */
 	mbuf10 = mbuf00;
@@ -1359,7 +1362,7 @@ rte_table_hash_lookup_key16_ext_dosig(
 
 	/* Pipeline stage 0 */
 	lookup2_stage0(pkt00_index, pkt01_index, mbuf00, mbuf01, pkts,
-		pkts_mask);
+		pkts_mask, f);
 
 	/* Pipeline stage 1 */
 	lookup2_stage1_dosig(mbuf10, mbuf11, bucket10, bucket11, f);
@@ -1383,7 +1386,7 @@ rte_table_hash_lookup_key16_ext_dosig(
 
 		/* Pipeline stage 0 */
 		lookup2_stage0_with_odd_support(pkt00_index, pkt01_index,
-			mbuf00, mbuf01, pkts, pkts_mask);
+			mbuf00, mbuf01, pkts, pkts_mask, f);
 
 		/* Pipeline stage 1 */
 		lookup2_stage1_dosig(mbuf10, mbuf11, bucket10, bucket11, f);
diff --git a/lib/librte_table/rte_table_hash_key32.c b/lib/librte_table/rte_table_hash_key32.c
index 5fe4161..e33029a 100644
--- a/lib/librte_table/rte_table_hash_key32.c
+++ b/lib/librte_table/rte_table_hash_key32.c
@@ -591,16 +591,17 @@ rte_table_hash_entry_delete_key32_ext(
 		pos = 3;					\
 }
 
-#define lookup1_stage0(pkt0_index, mbuf0, pkts, pkts_mask)	\
+#define lookup1_stage0(pkt0_index, mbuf0, pkts, pkts_mask, f)	\
 {								\
 	uint64_t pkt_mask;					\
+	uint32_t key_offset = f->key_offset;	\
 								\
 	pkt0_index = __builtin_ctzll(pkts_mask);		\
 	pkt_mask = 1LLU << pkt0_index;				\
 	pkts_mask &= ~pkt_mask;					\
 								\
 	mbuf0 = pkts[pkt0_index];				\
-	rte_prefetch0(RTE_MBUF_METADATA_UINT8_PTR(mbuf0, 0));	\
+	rte_prefetch0(RTE_MBUF_METADATA_UINT8_PTR(mbuf0, key_offset));\
 }
 
 #define lookup1_stage1(mbuf1, bucket1, f)			\
@@ -698,36 +699,38 @@ rte_table_hash_entry_delete_key32_ext(
 }
 
 #define lookup2_stage0(pkt00_index, pkt01_index, mbuf00, mbuf01,\
-	pkts, pkts_mask)					\
+	pkts, pkts_mask, f)					\
 {								\
 	uint64_t pkt00_mask, pkt01_mask;			\
+	uint32_t key_offset = f->key_offset;		\
 								\
 	pkt00_index = __builtin_ctzll(pkts_mask);		\
 	pkt00_mask = 1LLU << pkt00_index;			\
 	pkts_mask &= ~pkt00_mask;				\
 								\
 	mbuf00 = pkts[pkt00_index];				\
-	rte_prefetch0(RTE_MBUF_METADATA_UINT8_PTR(mbuf00, 0));	\
+	rte_prefetch0(RTE_MBUF_METADATA_UINT8_PTR(mbuf00, key_offset));\
 								\
 	pkt01_index = __builtin_ctzll(pkts_mask);		\
 	pkt01_mask = 1LLU << pkt01_index;			\
 	pkts_mask &= ~pkt01_mask;				\
 								\
 	mbuf01 = pkts[pkt01_index];				\
-	rte_prefetch0(RTE_MBUF_METADATA_UINT8_PTR(mbuf01, 0));	\
+	rte_prefetch0(RTE_MBUF_METADATA_UINT8_PTR(mbuf01, key_offset));\
 }
 
 #define lookup2_stage0_with_odd_support(pkt00_index, pkt01_index,\
-	mbuf00, mbuf01, pkts, pkts_mask)			\
+	mbuf00, mbuf01, pkts, pkts_mask, f)			\
 {								\
 	uint64_t pkt00_mask, pkt01_mask;			\
+	uint32_t key_offset = f->key_offset;		\
 								\
 	pkt00_index = __builtin_ctzll(pkts_mask);		\
 	pkt00_mask = 1LLU << pkt00_index;			\
 	pkts_mask &= ~pkt00_mask;				\
 								\
 	mbuf00 = pkts[pkt00_index];				\
-	rte_prefetch0(RTE_MBUF_METADATA_UINT8_PTR(mbuf00, 0));	\
+	rte_prefetch0(RTE_MBUF_METADATA_UINT8_PTR(mbuf00, key_offset));	\
 								\
 	pkt01_index = __builtin_ctzll(pkts_mask);		\
 	if (pkts_mask == 0)					\
@@ -737,7 +740,7 @@ rte_table_hash_entry_delete_key32_ext(
 	pkts_mask &= ~pkt01_mask;				\
 								\
 	mbuf01 = pkts[pkt01_index];				\
-	rte_prefetch0(RTE_MBUF_METADATA_UINT8_PTR(mbuf01, 0));	\
+	rte_prefetch0(RTE_MBUF_METADATA_UINT8_PTR(mbuf01, key_offset));	\
 }
 
 #define lookup2_stage1(mbuf10, mbuf11, bucket10, bucket11, f)	\
@@ -852,7 +855,7 @@ rte_table_hash_lookup_key32_lru(
 			struct rte_mbuf *mbuf;
 			uint32_t pkt_index;
 
-			lookup1_stage0(pkt_index, mbuf, pkts, pkts_mask);
+			lookup1_stage0(pkt_index, mbuf, pkts, pkts_mask, f);
 			lookup1_stage1(mbuf, bucket, f);
 			lookup1_stage2_lru(pkt_index, mbuf, bucket,
 					pkts_mask_out, entries, f);
@@ -869,7 +872,7 @@ rte_table_hash_lookup_key32_lru(
 	 */
 	/* Pipeline stage 0 */
 	lookup2_stage0(pkt00_index, pkt01_index, mbuf00, mbuf01, pkts,
-		pkts_mask);
+		pkts_mask, f);
 
 	/* Pipeline feed */
 	mbuf10 = mbuf00;
@@ -879,7 +882,7 @@ rte_table_hash_lookup_key32_lru(
 
 	/* Pipeline stage 0 */
 	lookup2_stage0(pkt00_index, pkt01_index, mbuf00, mbuf01, pkts,
-		pkts_mask);
+		pkts_mask, f);
 
 	/* Pipeline stage 1 */
 	lookup2_stage1(mbuf10, mbuf11, bucket10, bucket11, f);
@@ -903,7 +906,7 @@ rte_table_hash_lookup_key32_lru(
 
 		/* Pipeline stage 0 */
 		lookup2_stage0_with_odd_support(pkt00_index, pkt01_index,
-			mbuf00, mbuf01, pkts, pkts_mask);
+			mbuf00, mbuf01, pkts, pkts_mask, f);
 
 		/* Pipeline stage 1 */
 		lookup2_stage1(mbuf10, mbuf11, bucket10, bucket11, f);
@@ -981,7 +984,7 @@ rte_table_hash_lookup_key32_ext(
 			struct rte_mbuf *mbuf;
 			uint32_t pkt_index;
 
-			lookup1_stage0(pkt_index, mbuf, pkts, pkts_mask);
+			lookup1_stage0(pkt_index, mbuf, pkts, pkts_mask, f);
 			lookup1_stage1(mbuf, bucket, f);
 			lookup1_stage2_ext(pkt_index, mbuf, bucket,
 				pkts_mask_out, entries, buckets_mask, buckets,
@@ -997,7 +1000,7 @@ rte_table_hash_lookup_key32_ext(
 	 */
 	/* Pipeline stage 0 */
 	lookup2_stage0(pkt00_index, pkt01_index, mbuf00, mbuf01, pkts,
-		pkts_mask);
+		pkts_mask, f);
 
 	/* Pipeline feed */
 	mbuf10 = mbuf00;
@@ -1007,7 +1010,7 @@ rte_table_hash_lookup_key32_ext(
 
 	/* Pipeline stage 0 */
 	lookup2_stage0(pkt00_index, pkt01_index, mbuf00, mbuf01, pkts,
-		pkts_mask);
+		pkts_mask, f);
 
 	/* Pipeline stage 1 */
 	lookup2_stage1(mbuf10, mbuf11, bucket10, bucket11, f);
@@ -1031,7 +1034,7 @@ rte_table_hash_lookup_key32_ext(
 
 		/* Pipeline stage 0 */
 		lookup2_stage0_with_odd_support(pkt00_index, pkt01_index,
-			mbuf00, mbuf01, pkts, pkts_mask);
+			mbuf00, mbuf01, pkts, pkts_mask, f);
 
 		/* Pipeline stage 1 */
 		lookup2_stage1(mbuf10, mbuf11, bucket10, bucket11, f);
diff --git a/lib/librte_table/rte_table_hash_key8.c b/lib/librte_table/rte_table_hash_key8.c
index ccb20cf..79d7184 100644
--- a/lib/librte_table/rte_table_hash_key8.c
+++ b/lib/librte_table/rte_table_hash_key8.c
@@ -568,16 +568,17 @@ rte_table_hash_entry_delete_key8_ext(
 		pos = 3;					\
 }
 
-#define lookup1_stage0(pkt0_index, mbuf0, pkts, pkts_mask)	\
+#define lookup1_stage0(pkt0_index, mbuf0, pkts, pkts_mask, f)	\
 {								\
 	uint64_t pkt_mask;					\
+	uint32_t key_offset = f->key_offset;\
 								\
 	pkt0_index = __builtin_ctzll(pkts_mask);		\
 	pkt_mask = 1LLU << pkt0_index;				\
 	pkts_mask &= ~pkt_mask;					\
 								\
 	mbuf0 = pkts[pkt0_index];				\
-	rte_prefetch0(RTE_MBUF_METADATA_UINT8_PTR(mbuf0, 0));	\
+	rte_prefetch0(RTE_MBUF_METADATA_UINT8_PTR(mbuf0, key_offset));	\
 }
 
 #define lookup1_stage1(mbuf1, bucket1, f)			\
@@ -693,36 +694,38 @@ rte_table_hash_entry_delete_key8_ext(
 }
 
 #define lookup2_stage0(pkt00_index, pkt01_index, mbuf00, mbuf01,\
-	pkts, pkts_mask)					\
+	pkts, pkts_mask, f)					\
 {								\
 	uint64_t pkt00_mask, pkt01_mask;			\
+	uint32_t key_offset = f->key_offset;		\
 								\
 	pkt00_index = __builtin_ctzll(pkts_mask);		\
 	pkt00_mask = 1LLU << pkt00_index;			\
 	pkts_mask &= ~pkt00_mask;				\
 								\
 	mbuf00 = pkts[pkt00_index];				\
-	rte_prefetch0(RTE_MBUF_METADATA_UINT8_PTR(mbuf00, 0));	\
+	rte_prefetch0(RTE_MBUF_METADATA_UINT8_PTR(mbuf00, key_offset));\
 								\
 	pkt01_index = __builtin_ctzll(pkts_mask);		\
 	pkt01_mask = 1LLU << pkt01_index;			\
 	pkts_mask &= ~pkt01_mask;				\
 								\
 	mbuf01 = pkts[pkt01_index];				\
-	rte_prefetch0(RTE_MBUF_METADATA_UINT8_PTR(mbuf01, 0));	\
+	rte_prefetch0(RTE_MBUF_METADATA_UINT8_PTR(mbuf01, key_offset));\
 }
 
 #define lookup2_stage0_with_odd_support(pkt00_index, pkt01_index,\
-	mbuf00, mbuf01, pkts, pkts_mask)			\
+	mbuf00, mbuf01, pkts, pkts_mask, f)			\
 {								\
 	uint64_t pkt00_mask, pkt01_mask;			\
+	uint32_t key_offset = f->key_offset;		\
 								\
 	pkt00_index = __builtin_ctzll(pkts_mask);		\
 	pkt00_mask = 1LLU << pkt00_index;			\
 	pkts_mask &= ~pkt00_mask;				\
 								\
 	mbuf00 = pkts[pkt00_index];				\
-	rte_prefetch0(RTE_MBUF_METADATA_UINT8_PTR(mbuf00, 0));	\
+	rte_prefetch0(RTE_MBUF_METADATA_UINT8_PTR(mbuf00, key_offset));\
 								\
 	pkt01_index = __builtin_ctzll(pkts_mask);		\
 	if (pkts_mask == 0)					\
@@ -732,7 +735,7 @@ rte_table_hash_entry_delete_key8_ext(
 	pkts_mask &= ~pkt01_mask;				\
 								\
 	mbuf01 = pkts[pkt01_index];				\
-	rte_prefetch0(RTE_MBUF_METADATA_UINT8_PTR(mbuf01, 0));	\
+	rte_prefetch0(RTE_MBUF_METADATA_UINT8_PTR(mbuf01, key_offset));\
 }
 
 #define lookup2_stage1(mbuf10, mbuf11, bucket10, bucket11, f)	\
@@ -882,7 +885,7 @@ rte_table_hash_lookup_key8_lru(
 			struct rte_mbuf *mbuf;
 			uint32_t pkt_index;
 
-			lookup1_stage0(pkt_index, mbuf, pkts, pkts_mask);
+			lookup1_stage0(pkt_index, mbuf, pkts, pkts_mask, f);
 			lookup1_stage1(mbuf, bucket, f);
 			lookup1_stage2_lru(pkt_index, mbuf, bucket,
 					pkts_mask_out, entries, f);
@@ -899,7 +902,7 @@ rte_table_hash_lookup_key8_lru(
 	 */
 	/* Pipeline stage 0 */
 	lookup2_stage0(pkt00_index, pkt01_index, mbuf00, mbuf01, pkts,
-		pkts_mask);
+		pkts_mask, f);
 
 	/* Pipeline feed */
 	mbuf10 = mbuf00;
@@ -909,7 +912,7 @@ rte_table_hash_lookup_key8_lru(
 
 	/* Pipeline stage 0 */
 	lookup2_stage0(pkt00_index, pkt01_index, mbuf00, mbuf01, pkts,
-		pkts_mask);
+		pkts_mask, f);
 
 	/* Pipeline stage 1 */
 	lookup2_stage1(mbuf10, mbuf11, bucket10, bucket11, f);
@@ -933,7 +936,7 @@ rte_table_hash_lookup_key8_lru(
 
 		/* Pipeline stage 0 */
 		lookup2_stage0_with_odd_support(pkt00_index, pkt01_index,
-			mbuf00, mbuf01, pkts, pkts_mask);
+			mbuf00, mbuf01, pkts, pkts_mask, f);
 
 		/* Pipeline stage 1 */
 		lookup2_stage1(mbuf10, mbuf11, bucket10, bucket11, f);
@@ -1008,7 +1011,7 @@ rte_table_hash_lookup_key8_lru_dosig(
 			struct rte_mbuf *mbuf;
 			uint32_t pkt_index;
 
-			lookup1_stage0(pkt_index, mbuf, pkts, pkts_mask);
+			lookup1_stage0(pkt_index, mbuf, pkts, pkts_mask, f);
 			lookup1_stage1_dosig(mbuf, bucket, f);
 			lookup1_stage2_lru(pkt_index, mbuf, bucket,
 				pkts_mask_out, entries, f);
@@ -1025,7 +1028,7 @@ rte_table_hash_lookup_key8_lru_dosig(
 	 */
 	/* Pipeline stage 0 */
 	lookup2_stage0(pkt00_index, pkt01_index, mbuf00, mbuf01, pkts,
-		pkts_mask);
+		pkts_mask, f);
 
 	/* Pipeline feed */
 	mbuf10 = mbuf00;
@@ -1035,7 +1038,7 @@ rte_table_hash_lookup_key8_lru_dosig(
 
 	/* Pipeline stage 0 */
 	lookup2_stage0(pkt00_index, pkt01_index, mbuf00, mbuf01, pkts,
-		pkts_mask);
+		pkts_mask, f);
 
 	/* Pipeline stage 1 */
 	lookup2_stage1_dosig(mbuf10, mbuf11, bucket10, bucket11, f);
@@ -1059,7 +1062,7 @@ rte_table_hash_lookup_key8_lru_dosig(
 
 		/* Pipeline stage 0 */
 		lookup2_stage0_with_odd_support(pkt00_index, pkt01_index,
-			mbuf00, mbuf01, pkts, pkts_mask);
+			mbuf00, mbuf01, pkts, pkts_mask, f);
 
 		/* Pipeline stage 1 */
 		lookup2_stage1_dosig(mbuf10, mbuf11, bucket10, bucket11, f);
@@ -1136,7 +1139,7 @@ rte_table_hash_lookup_key8_ext(
 			struct rte_mbuf *mbuf;
 			uint32_t pkt_index;
 
-			lookup1_stage0(pkt_index, mbuf, pkts, pkts_mask);
+			lookup1_stage0(pkt_index, mbuf, pkts, pkts_mask, f);
 			lookup1_stage1(mbuf, bucket, f);
 			lookup1_stage2_ext(pkt_index, mbuf, bucket,
 				pkts_mask_out, entries, buckets_mask, buckets,
@@ -1152,7 +1155,7 @@ rte_table_hash_lookup_key8_ext(
 	 */
 	/* Pipeline stage 0 */
 	lookup2_stage0(pkt00_index, pkt01_index, mbuf00, mbuf01, pkts,
-		pkts_mask);
+		pkts_mask, f);
 
 	/* Pipeline feed */
 	mbuf10 = mbuf00;
@@ -1162,7 +1165,7 @@ rte_table_hash_lookup_key8_ext(
 
 	/* Pipeline stage 0 */
 	lookup2_stage0(pkt00_index, pkt01_index, mbuf00, mbuf01, pkts,
-		pkts_mask);
+		pkts_mask, f);
 
 	/* Pipeline stage 1 */
 	lookup2_stage1(mbuf10, mbuf11, bucket10, bucket11, f);
@@ -1186,7 +1189,7 @@ rte_table_hash_lookup_key8_ext(
 
 		/* Pipeline stage 0 */
 		lookup2_stage0_with_odd_support(pkt00_index, pkt01_index,
-			mbuf00, mbuf01, pkts, pkts_mask);
+			mbuf00, mbuf01, pkts, pkts_mask, f);
 
 		/* Pipeline stage 1 */
 		lookup2_stage1(mbuf10, mbuf11, bucket10, bucket11, f);
@@ -1286,7 +1289,7 @@ rte_table_hash_lookup_key8_ext_dosig(
 			struct rte_mbuf *mbuf;
 			uint32_t pkt_index;
 
-			lookup1_stage0(pkt_index, mbuf, pkts, pkts_mask);
+			lookup1_stage0(pkt_index, mbuf, pkts, pkts_mask, f);
 			lookup1_stage1_dosig(mbuf, bucket, f);
 			lookup1_stage2_ext(pkt_index, mbuf, bucket,
 				pkts_mask_out, entries, buckets_mask,
@@ -1302,7 +1305,7 @@ rte_table_hash_lookup_key8_ext_dosig(
 	 */
 	/* Pipeline stage 0 */
 	lookup2_stage0(pkt00_index, pkt01_index, mbuf00, mbuf01, pkts,
-		pkts_mask);
+		pkts_mask, f);
 
 	/* Pipeline feed */
 	mbuf10 = mbuf00;
@@ -1312,7 +1315,7 @@ rte_table_hash_lookup_key8_ext_dosig(
 
 	/* Pipeline stage 0 */
 	lookup2_stage0(pkt00_index, pkt01_index, mbuf00, mbuf01, pkts,
-		pkts_mask);
+		pkts_mask, f);
 
 	/* Pipeline stage 1 */
 	lookup2_stage1_dosig(mbuf10, mbuf11, bucket10, bucket11, f);
@@ -1336,7 +1339,7 @@ rte_table_hash_lookup_key8_ext_dosig(
 
 		/* Pipeline stage 0 */
 		lookup2_stage0_with_odd_support(pkt00_index, pkt01_index,
-			mbuf00, mbuf01, pkts, pkts_mask);
+			mbuf00, mbuf01, pkts, pkts_mask, f);
 
 		/* Pipeline stage 1 */
 		lookup2_stage1_dosig(mbuf10, mbuf11, bucket10, bucket11, f);
diff --git a/lib/librte_table/rte_table_hash_lru.c b/lib/librte_table/rte_table_hash_lru.c
index 1640dc9..b9b88e9 100644
--- a/lib/librte_table/rte_table_hash_lru.c
+++ b/lib/librte_table/rte_table_hash_lru.c
@@ -576,6 +576,7 @@ static int rte_table_hash_lru_lookup_unoptimized(
 {								\
 	uint64_t pkt00_mask, pkt01_mask;			\
 	struct rte_mbuf *mbuf00, *mbuf01;			\
+	uint32_t key_offset = t->key_offset;		\
 								\
 	pkt00_index = __builtin_ctzll(pkts_mask);		\
 	pkt00_mask = 1LLU << pkt00_index;			\
@@ -587,8 +588,8 @@ static int rte_table_hash_lru_lookup_unoptimized(
 	pkts_mask &= ~pkt01_mask;				\
 	mbuf01 = pkts[pkt01_index];				\
 								\
-	rte_prefetch0(RTE_MBUF_METADATA_UINT8_PTR(mbuf00, 0));	\
-	rte_prefetch0(RTE_MBUF_METADATA_UINT8_PTR(mbuf01, 0));	\
+	rte_prefetch0(RTE_MBUF_METADATA_UINT8_PTR(mbuf00, key_offset));\
+	rte_prefetch0(RTE_MBUF_METADATA_UINT8_PTR(mbuf01, key_offset));\
 }
 
 #define lookup2_stage0_with_odd_support(t, g, pkts, pkts_mask, pkt00_index, \
@@ -596,6 +597,7 @@ static int rte_table_hash_lru_lookup_unoptimized(
 {								\
 	uint64_t pkt00_mask, pkt01_mask;			\
 	struct rte_mbuf *mbuf00, *mbuf01;			\
+	uint32_t key_offset = t->key_offset;		\
 								\
 	pkt00_index = __builtin_ctzll(pkts_mask);		\
 	pkt00_mask = 1LLU << pkt00_index;			\
@@ -610,8 +612,8 @@ static int rte_table_hash_lru_lookup_unoptimized(
 	pkts_mask &= ~pkt01_mask;				\
 	mbuf01 = pkts[pkt01_index];				\
 								\
-	rte_prefetch0(RTE_MBUF_METADATA_UINT8_PTR(mbuf00, 0));	\
-	rte_prefetch0(RTE_MBUF_METADATA_UINT8_PTR(mbuf01, 0));	\
+	rte_prefetch0(RTE_MBUF_METADATA_UINT8_PTR(mbuf00, key_offset));\
+	rte_prefetch0(RTE_MBUF_METADATA_UINT8_PTR(mbuf01, key_offset));\
 }
 
 #define lookup2_stage1(t, g, pkts, pkt10_index, pkt11_index)	\
-- 
2.1.0



More information about the dev mailing list