[dpdk-dev] [PATCH 3/8] librte_table: add 16 byte hash table operations with computed lookup

roy.fan.zhang at intel.com roy.fan.zhang at intel.com
Sat Sep 26 00:33:07 CEST 2015


From: Fan Zhang <roy.fan.zhang at intel.com>

This patch is to adding hash table operations for key signature
computed on lookup ("do-sig") for LRU hash tables and Extendible buckets.

Signed-off-by: Fan Zhang <roy.fan.zhang at intel.com>
---
 lib/librte_table/rte_table_hash.h       |   8 +
 lib/librte_table/rte_table_hash_key16.c | 358 +++++++++++++++++++++++++++++++-
 2 files changed, 363 insertions(+), 3 deletions(-)

diff --git a/lib/librte_table/rte_table_hash.h b/lib/librte_table/rte_table_hash.h
index e2c60e1..9d17516 100644
--- a/lib/librte_table/rte_table_hash.h
+++ b/lib/librte_table/rte_table_hash.h
@@ -271,6 +271,10 @@ struct rte_table_hash_key16_lru_params {
 /** LRU hash table operations for pre-computed key signature */
 extern struct rte_table_ops rte_table_hash_key16_lru_ops;
 
+/** LRU hash table operations for key signature computed on lookup
+    ("do-sig") */
+extern struct rte_table_ops rte_table_hash_key16_lru_dosig_ops;
+
 /** Extendible bucket hash table parameters */
 struct rte_table_hash_key16_ext_params {
 	/** Maximum number of entries (and keys) in the table */
@@ -301,6 +305,10 @@ struct rte_table_hash_key16_ext_params {
 /** Extendible bucket operations for pre-computed key signature */
 extern struct rte_table_ops rte_table_hash_key16_ext_ops;
 
+/** Extendible bucket hash table operations for key signature computed on
+    lookup ("do-sig") */
+extern struct rte_table_ops rte_table_hash_key16_ext_dosig_ops;
+
 /**
  * 32-byte key hash tables
  *
diff --git a/lib/librte_table/rte_table_hash_key16.c b/lib/librte_table/rte_table_hash_key16.c
index ffd3249..427b534 100644
--- a/lib/librte_table/rte_table_hash_key16.c
+++ b/lib/librte_table/rte_table_hash_key16.c
@@ -620,6 +620,27 @@ rte_table_hash_entry_delete_key16_ext(
 	rte_prefetch0((void *)(((uintptr_t) bucket1) + RTE_CACHE_LINE_SIZE));\
 }
 
+#define lookup1_stage1_dosig(mbuf1, bucket1, f)			\
+{								\
+	uint64_t *key;						\
+	uint64_t signature = 0;				\
+	uint32_t bucket_index;				\
+	uint64_t hash_key_buffer[2];		\
+								\
+	key = RTE_MBUF_METADATA_UINT64_PTR(mbuf1, f->key_offset);\
+								\
+	hash_key_buffer[0] = key[0] & f->key_mask[0];	\
+	hash_key_buffer[1] = key[1] & f->key_mask[1];	\
+	signature = f->f_hash(hash_key_buffer,			\
+			RTE_TABLE_HASH_KEY_SIZE, f->seed);		\
+								\
+	bucket_index = signature & (f->n_buckets - 1);		\
+	bucket1 = (struct rte_bucket_4_16 *)			\
+		&f->memory[bucket_index * f->bucket_size];	\
+	rte_prefetch0(bucket1);					\
+	rte_prefetch0((void *)(((uintptr_t) bucket1) + RTE_CACHE_LINE_SIZE));\
+}
+
 #define lookup1_stage2_lru(pkt2_index, mbuf2, bucket2,		\
 		pkts_mask_out, entries, f)			\
 {								\
@@ -769,6 +790,36 @@ rte_table_hash_entry_delete_key16_ext(
 	rte_prefetch0((void *)(((uintptr_t) bucket11) + RTE_CACHE_LINE_SIZE));\
 }
 
+#define lookup2_stage1_dosig(mbuf10, mbuf11, bucket10, bucket11, f)	\
+{								\
+	uint64_t *key10, *key11;					\
+	uint64_t hash_offset_buffer[2];				\
+	uint64_t signature10, signature11;			\
+	uint32_t bucket10_index, bucket11_index;	\
+								\
+	key10 = RTE_MBUF_METADATA_UINT64_PTR(mbuf10, f->key_offset);\
+	hash_offset_buffer[0] = key10[0] & f->key_mask[0];	\
+	hash_offset_buffer[1] = key10[1] & f->key_mask[1];	\
+	signature10 = f->f_hash(hash_offset_buffer,			\
+			RTE_TABLE_HASH_KEY_SIZE, f->seed);\
+	bucket10_index = signature10 & (f->n_buckets - 1);	\
+	bucket10 = (struct rte_bucket_4_16 *)				\
+		&f->memory[bucket10_index * f->bucket_size];	\
+	rte_prefetch0(bucket10);				\
+	rte_prefetch0((void *)(((uintptr_t) bucket10) + RTE_CACHE_LINE_SIZE));\
+								\
+	key11 = RTE_MBUF_METADATA_UINT64_PTR(mbuf11, f->key_offset);\
+	hash_offset_buffer[0] = key11[0] & f->key_mask[0];	\
+	hash_offset_buffer[1] = key11[1] & f->key_mask[1];	\
+	signature11 = f->f_hash(hash_offset_buffer,			\
+			RTE_TABLE_HASH_KEY_SIZE, f->seed);\
+	bucket11_index = signature11 & (f->n_buckets - 1);	\
+	bucket11 = (struct rte_bucket_4_16 *)			\
+		&f->memory[bucket11_index * f->bucket_size];	\
+	rte_prefetch0(bucket11);				\
+	rte_prefetch0((void *)(((uintptr_t) bucket11) + RTE_CACHE_LINE_SIZE));\
+}
+
 #define lookup2_stage2_lru(pkt20_index, pkt21_index, mbuf20, mbuf21,\
 		bucket20, bucket21, pkts_mask_out, entries, f)	\
 {								\
@@ -878,7 +929,8 @@ rte_table_hash_lookup_key16_lru(
 		}
 
 		*lookup_hit_mask = pkts_mask_out;
-		RTE_TABLE_HASH_KEY16_STATS_PKTS_LOOKUP_MISS(f, n_pkts_in - __builtin_popcountll(pkts_mask_out));
+		RTE_TABLE_HASH_KEY16_STATS_PKTS_LOOKUP_MISS(f,
+			n_pkts_in - __builtin_popcountll(pkts_mask_out));
 		return 0;
 	}
 
@@ -968,11 +1020,141 @@ rte_table_hash_lookup_key16_lru(
 		bucket20, bucket21, pkts_mask_out, entries, f);
 
 	*lookup_hit_mask = pkts_mask_out;
-	RTE_TABLE_HASH_KEY16_STATS_PKTS_LOOKUP_MISS(f, n_pkts_in - __builtin_popcountll(pkts_mask_out));
+	RTE_TABLE_HASH_KEY16_STATS_PKTS_LOOKUP_MISS(f, n_pkts_in -
+		__builtin_popcountll(pkts_mask_out));
 	return 0;
 } /* rte_table_hash_lookup_key16_lru() */
 
 static int
+rte_table_hash_lookup_key16_lru_dosig(
+	void *table,
+	struct rte_mbuf **pkts,
+	uint64_t pkts_mask,
+	uint64_t *lookup_hit_mask,
+	void **entries)
+{
+	struct rte_table_hash *f = (struct rte_table_hash *) table;
+	struct rte_bucket_4_16 *bucket10, *bucket11, *bucket20, *bucket21;
+	struct rte_mbuf *mbuf00, *mbuf01, *mbuf10, *mbuf11, *mbuf20, *mbuf21;
+	uint32_t pkt00_index, pkt01_index, pkt10_index;
+	uint32_t pkt11_index, pkt20_index, pkt21_index;
+	uint64_t pkts_mask_out = 0;
+
+	__rte_unused uint32_t n_pkts_in = __builtin_popcountll(pkts_mask);
+
+	RTE_TABLE_HASH_KEY16_STATS_PKTS_IN_ADD(f, n_pkts_in);
+
+	/* Cannot run the pipeline with less than 5 packets */
+	if (__builtin_popcountll(pkts_mask) < 5) {
+		for ( ; pkts_mask; ) {
+			struct rte_bucket_4_16 *bucket;
+			struct rte_mbuf *mbuf;
+			uint32_t pkt_index;
+
+			lookup1_stage0(pkt_index, mbuf, pkts, pkts_mask);
+			lookup1_stage1_dosig(mbuf, bucket, f);
+			lookup1_stage2_lru(pkt_index, mbuf, bucket,
+				pkts_mask_out, entries, f);
+		}
+
+		*lookup_hit_mask = pkts_mask_out;
+		RTE_TABLE_HASH_KEY16_STATS_PKTS_LOOKUP_MISS(f, n_pkts_in -
+			__builtin_popcountll(pkts_mask_out));
+		return 0;
+	}
+
+	/*
+	 * Pipeline fill
+	 *
+	 */
+	/* Pipeline stage 0 */
+	lookup2_stage0(pkt00_index, pkt01_index, mbuf00, mbuf01, pkts,
+		pkts_mask);
+
+	/* Pipeline feed */
+	mbuf10 = mbuf00;
+	mbuf11 = mbuf01;
+	pkt10_index = pkt00_index;
+	pkt11_index = pkt01_index;
+
+	/* Pipeline stage 0 */
+	lookup2_stage0(pkt00_index, pkt01_index, mbuf00, mbuf01, pkts,
+		pkts_mask);
+
+	/* Pipeline stage 1 */
+	lookup2_stage1_dosig(mbuf10, mbuf11, bucket10, bucket11, f);
+
+	/*
+	 * Pipeline run
+	 *
+	 */
+	for ( ; pkts_mask; ) {
+		/* Pipeline feed */
+		bucket20 = bucket10;
+		bucket21 = bucket11;
+		mbuf20 = mbuf10;
+		mbuf21 = mbuf11;
+		mbuf10 = mbuf00;
+		mbuf11 = mbuf01;
+		pkt20_index = pkt10_index;
+		pkt21_index = pkt11_index;
+		pkt10_index = pkt00_index;
+		pkt11_index = pkt01_index;
+
+		/* Pipeline stage 0 */
+		lookup2_stage0_with_odd_support(pkt00_index, pkt01_index,
+			mbuf00, mbuf01, pkts, pkts_mask);
+
+		/* Pipeline stage 1 */
+		lookup2_stage1_dosig(mbuf10, mbuf11, bucket10, bucket11, f);
+
+		/* Pipeline stage 2 */
+		lookup2_stage2_lru(pkt20_index, pkt21_index, mbuf20, mbuf21,
+			bucket20, bucket21, pkts_mask_out, entries, f);
+	}
+
+	/*
+	 * Pipeline flush
+	 *
+	 */
+	/* Pipeline feed */
+	bucket20 = bucket10;
+	bucket21 = bucket11;
+	mbuf20 = mbuf10;
+	mbuf21 = mbuf11;
+	mbuf10 = mbuf00;
+	mbuf11 = mbuf01;
+	pkt20_index = pkt10_index;
+	pkt21_index = pkt11_index;
+	pkt10_index = pkt00_index;
+	pkt11_index = pkt01_index;
+
+	/* Pipeline stage 1 */
+	lookup2_stage1_dosig(mbuf10, mbuf11, bucket10, bucket11, f);
+
+	/* Pipeline stage 2 */
+	lookup2_stage2_lru(pkt20_index, pkt21_index, mbuf20, mbuf21,
+		bucket20, bucket21, pkts_mask_out, entries, f);
+
+	/* Pipeline feed */
+	bucket20 = bucket10;
+	bucket21 = bucket11;
+	mbuf20 = mbuf10;
+	mbuf21 = mbuf11;
+	pkt20_index = pkt10_index;
+	pkt21_index = pkt11_index;
+
+	/* Pipeline stage 2 */
+	lookup2_stage2_lru(pkt20_index, pkt21_index, mbuf20, mbuf21,
+		bucket20, bucket21, pkts_mask_out, entries, f);
+
+	*lookup_hit_mask = pkts_mask_out;
+	RTE_TABLE_HASH_KEY16_STATS_PKTS_LOOKUP_MISS(f, n_pkts_in -
+		__builtin_popcountll(pkts_mask_out));
+	return 0;
+} /* rte_table_hash_lookup_key16_lru_dosig() */
+
+static int
 rte_table_hash_lookup_key16_ext(
 	void *table,
 	struct rte_mbuf **pkts,
@@ -1118,11 +1300,164 @@ grind_next_buckets:
 	}
 
 	*lookup_hit_mask = pkts_mask_out;
-	RTE_TABLE_HASH_KEY16_STATS_PKTS_LOOKUP_MISS(f, n_pkts_in - __builtin_popcountll(pkts_mask_out));
+	RTE_TABLE_HASH_KEY16_STATS_PKTS_LOOKUP_MISS(f, n_pkts_in -
+		__builtin_popcountll(pkts_mask_out));
 	return 0;
 } /* rte_table_hash_lookup_key16_ext() */
 
 static int
+rte_table_hash_lookup_key16_ext_dosig(
+	void *table,
+	struct rte_mbuf **pkts,
+	uint64_t pkts_mask,
+	uint64_t *lookup_hit_mask,
+	void **entries)
+{
+	struct rte_table_hash *f = (struct rte_table_hash *) table;
+	struct rte_bucket_4_16 *bucket10, *bucket11, *bucket20, *bucket21;
+	struct rte_mbuf *mbuf00, *mbuf01, *mbuf10, *mbuf11, *mbuf20, *mbuf21;
+	uint32_t pkt00_index, pkt01_index, pkt10_index;
+	uint32_t pkt11_index, pkt20_index, pkt21_index;
+	uint64_t pkts_mask_out = 0, buckets_mask = 0;
+	struct rte_bucket_4_16 *buckets[RTE_PORT_IN_BURST_SIZE_MAX];
+	uint64_t *keys[RTE_PORT_IN_BURST_SIZE_MAX];
+
+	__rte_unused uint32_t n_pkts_in = __builtin_popcountll(pkts_mask);
+
+	RTE_TABLE_HASH_KEY16_STATS_PKTS_IN_ADD(f, n_pkts_in);
+
+	/* Cannot run the pipeline with less than 5 packets */
+	if (__builtin_popcountll(pkts_mask) < 5) {
+		for ( ; pkts_mask; ) {
+			struct rte_bucket_4_16 *bucket;
+			struct rte_mbuf *mbuf;
+			uint32_t pkt_index;
+
+			lookup1_stage0(pkt_index, mbuf, pkts, pkts_mask);
+			lookup1_stage1_dosig(mbuf, bucket, f);
+			lookup1_stage2_ext(pkt_index, mbuf, bucket,
+				pkts_mask_out, entries, buckets_mask,
+				buckets, keys, f);
+		}
+
+		goto grind_next_buckets;
+	}
+
+	/*
+	 * Pipeline fill
+	 *
+	 */
+	/* Pipeline stage 0 */
+	lookup2_stage0(pkt00_index, pkt01_index, mbuf00, mbuf01, pkts,
+		pkts_mask);
+
+	/* Pipeline feed */
+	mbuf10 = mbuf00;
+	mbuf11 = mbuf01;
+	pkt10_index = pkt00_index;
+	pkt11_index = pkt01_index;
+
+	/* Pipeline stage 0 */
+	lookup2_stage0(pkt00_index, pkt01_index, mbuf00, mbuf01, pkts,
+		pkts_mask);
+
+	/* Pipeline stage 1 */
+	lookup2_stage1_dosig(mbuf10, mbuf11, bucket10, bucket11, f);
+
+	/*
+	 * Pipeline run
+	 *
+	 */
+	for ( ; pkts_mask; ) {
+		/* Pipeline feed */
+		bucket20 = bucket10;
+		bucket21 = bucket11;
+		mbuf20 = mbuf10;
+		mbuf21 = mbuf11;
+		mbuf10 = mbuf00;
+		mbuf11 = mbuf01;
+		pkt20_index = pkt10_index;
+		pkt21_index = pkt11_index;
+		pkt10_index = pkt00_index;
+		pkt11_index = pkt01_index;
+
+		/* Pipeline stage 0 */
+		lookup2_stage0_with_odd_support(pkt00_index, pkt01_index,
+			mbuf00, mbuf01, pkts, pkts_mask);
+
+		/* Pipeline stage 1 */
+		lookup2_stage1_dosig(mbuf10, mbuf11, bucket10, bucket11, f);
+
+		/* Pipeline stage 2 */
+		lookup2_stage2_ext(pkt20_index, pkt21_index, mbuf20, mbuf21,
+			bucket20, bucket21, pkts_mask_out, entries,
+			buckets_mask, buckets, keys, f);
+	}
+
+	/*
+	 * Pipeline flush
+	 *
+	 */
+	/* Pipeline feed */
+	bucket20 = bucket10;
+	bucket21 = bucket11;
+	mbuf20 = mbuf10;
+	mbuf21 = mbuf11;
+	mbuf10 = mbuf00;
+	mbuf11 = mbuf01;
+	pkt20_index = pkt10_index;
+	pkt21_index = pkt11_index;
+	pkt10_index = pkt00_index;
+	pkt11_index = pkt01_index;
+
+	/* Pipeline stage 1 */
+	lookup2_stage1_dosig(mbuf10, mbuf11, bucket10, bucket11, f);
+
+	/* Pipeline stage 2 */
+	lookup2_stage2_ext(pkt20_index, pkt21_index, mbuf20, mbuf21,
+		bucket20, bucket21, pkts_mask_out, entries,
+		buckets_mask, buckets, keys, f);
+
+	/* Pipeline feed */
+	bucket20 = bucket10;
+	bucket21 = bucket11;
+	mbuf20 = mbuf10;
+	mbuf21 = mbuf11;
+	pkt20_index = pkt10_index;
+	pkt21_index = pkt11_index;
+
+	/* Pipeline stage 2 */
+	lookup2_stage2_ext(pkt20_index, pkt21_index, mbuf20, mbuf21,
+		bucket20, bucket21, pkts_mask_out, entries,
+		buckets_mask, buckets, keys, f);
+
+grind_next_buckets:
+	/* Grind next buckets */
+	for ( ; buckets_mask; ) {
+		uint64_t buckets_mask_next = 0;
+
+		for ( ; buckets_mask; ) {
+			uint64_t pkt_mask;
+			uint32_t pkt_index;
+
+			pkt_index = __builtin_ctzll(buckets_mask);
+			pkt_mask = 1LLU << pkt_index;
+			buckets_mask &= ~pkt_mask;
+
+			lookup_grinder(pkt_index, buckets, keys, pkts_mask_out,
+				entries, buckets_mask_next, f);
+		}
+
+		buckets_mask = buckets_mask_next;
+	}
+
+	*lookup_hit_mask = pkts_mask_out;
+	RTE_TABLE_HASH_KEY16_STATS_PKTS_LOOKUP_MISS(f, n_pkts_in -
+		__builtin_popcountll(pkts_mask_out));
+	return 0;
+} /* rte_table_hash_lookup_key16_ext_dosig() */
+
+static int
 rte_table_hash_key16_stats_read(void *table, struct rte_table_stats *stats, int clear)
 {
 	struct rte_table_hash *t = (struct rte_table_hash *) table;
@@ -1145,6 +1480,15 @@ struct rte_table_ops rte_table_hash_key16_lru_ops = {
 	.f_stats = rte_table_hash_key16_stats_read,
 };
 
+struct rte_table_ops rte_table_hash_key16_lru_dosig_ops = {
+	.f_create = rte_table_hash_create_key16_lru,
+	.f_free = rte_table_hash_free_key16_lru,
+	.f_add = rte_table_hash_entry_add_key16_lru,
+	.f_delete = rte_table_hash_entry_delete_key16_lru,
+	.f_lookup = rte_table_hash_lookup_key16_lru_dosig,
+	.f_stats = rte_table_hash_key16_stats_read,
+};
+
 struct rte_table_ops rte_table_hash_key16_ext_ops = {
 	.f_create = rte_table_hash_create_key16_ext,
 	.f_free = rte_table_hash_free_key16_ext,
@@ -1154,3 +1498,11 @@ struct rte_table_ops rte_table_hash_key16_ext_ops = {
 	.f_stats = rte_table_hash_key16_stats_read,
 };
 
+struct rte_table_ops rte_table_hash_key16_ext_dosig_ops = {
+	.f_create = rte_table_hash_create_key16_ext,
+	.f_free = rte_table_hash_free_key16_ext,
+	.f_add = rte_table_hash_entry_add_key16_ext,
+	.f_delete = rte_table_hash_entry_delete_key16_ext,
+	.f_lookup = rte_table_hash_lookup_key16_ext_dosig,
+	.f_stats = rte_table_hash_key16_stats_read,
+};
-- 
2.1.0



More information about the dev mailing list