[dpdk-dev] [PATCH v3] rte_hash: add scalable multi-writer insertion w/ Intel TSX

Wei Shen wei1.shen at intel.com
Fri Jun 17 00:14:14 CEST 2016


This patch introduced scalable multi-writer Cuckoo Hash insertion
based on a split Cuckoo Search and Move operation using Intel
TSX. It can do scalable hash insertion with 22 cores with little
performance loss and negligible TSX abortion rate.

* Added an extra rte_hash flag definition to switch default single writer
  Cuckoo Hash behavior to multiwriter.
    - If HTM is available, it would use hardware feature for concurrency.
    - If HTM is not available, it would fall back to spinlock.

* Created a rte_cuckoo_hash_x86.h file to hold all x86-arch related
  cuckoo_hash functions. And rte_cuckoo_hash.c uses compile time flag to
  select x86 file or other platform-specific implementations. While HTM check
  is still done at runtime (same idea with
  RTE_HASH_EXTRA_FLAGS_TRANS_MEM_SUPPORT)

* Moved rte_hash private struct definitions to rte_cuckoo_hash.h, to allow
  rte_cuckoo_hash_x86.h or future platform dependent functions to include.

* Following new functions are created for consistent names when new platform
  TM support are added.
    - rte_hash_cuckoo_move_insert_mw_tm: do insertion with bucket movement.
    - rte_hash_cuckoo_insert_mw_tm: do insertion without bucket movement.

* One extra multi-writer test case is added.

Signed-off-by: Shen Wei <wei1.shen at intel.com>
Signed-off-by: Sameh Gobriel <sameh.gobriel at intel.com>
---
 app/test/Makefile                      |   1 +
 app/test/test_hash_multiwriter.c       | 287 +++++++++++++++++++++++++++++++++
 doc/guides/rel_notes/release_16_07.rst |  12 ++
 lib/librte_hash/rte_cuckoo_hash.c      | 258 ++++++++++-------------------
 lib/librte_hash/rte_cuckoo_hash.h      | 219 +++++++++++++++++++++++++
 lib/librte_hash/rte_cuckoo_hash_x86.h  | 193 ++++++++++++++++++++++
 lib/librte_hash/rte_hash.h             |   3 +
 7 files changed, 796 insertions(+), 177 deletions(-)
 create mode 100644 app/test/test_hash_multiwriter.c
 create mode 100644 lib/librte_hash/rte_cuckoo_hash.h
 create mode 100644 lib/librte_hash/rte_cuckoo_hash_x86.h

diff --git a/app/test/Makefile b/app/test/Makefile
index 053f3a2..5476300 100644
--- a/app/test/Makefile
+++ b/app/test/Makefile
@@ -120,6 +120,7 @@ SRCS-$(CONFIG_RTE_LIBRTE_HASH) += test_thash.c
 SRCS-$(CONFIG_RTE_LIBRTE_HASH) += test_hash_perf.c
 SRCS-$(CONFIG_RTE_LIBRTE_HASH) += test_hash_functions.c
 SRCS-$(CONFIG_RTE_LIBRTE_HASH) += test_hash_scaling.c
+SRCS-$(CONFIG_RTE_LIBRTE_HASH) += test_hash_multiwriter.c
 
 SRCS-$(CONFIG_RTE_LIBRTE_LPM) += test_lpm.c
 SRCS-$(CONFIG_RTE_LIBRTE_LPM) += test_lpm_perf.c
diff --git a/app/test/test_hash_multiwriter.c b/app/test/test_hash_multiwriter.c
new file mode 100644
index 0000000..b0f31b0
--- /dev/null
+++ b/app/test/test_hash_multiwriter.c
@@ -0,0 +1,287 @@
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright(c) 2016 Intel Corporation. All rights reserved.
+ *   All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *	 notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *	 notice, this list of conditions and the following disclaimer in
+ *	 the documentation and/or other materials provided with the
+ *	 distribution.
+ *     * Neither the name of Intel Corporation nor the names of its
+ *	 contributors may be used to endorse or promote products derived
+ *	 from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+#include <inttypes.h>
+#include <locale.h>
+
+#include <rte_cycles.h>
+#include <rte_hash.h>
+#include <rte_hash_crc.h>
+#include <rte_launch.h>
+#include <rte_malloc.h>
+#include <rte_random.h>
+#include <rte_spinlock.h>
+
+#include "test.h"
+
+/*
+ * Check condition and return an error if true. Assumes that "handle" is the
+ * name of the hash structure pointer to be freed.
+ */
+#define RETURN_IF_ERROR(cond, str, ...) do {                            \
+	if (cond) {                                                     \
+		printf("ERROR line %d: " str "\n", __LINE__,            \
+							##__VA_ARGS__);	\
+		if (handle)                                             \
+			rte_hash_free(handle);                          \
+		return -1;                                              \
+	}                                                               \
+} while (0)
+
+#define RTE_APP_TEST_HASH_MULTIWRITER_FAILED 0
+
+struct {
+	uint32_t *keys;
+	uint32_t *found;
+	uint32_t nb_tsx_insertion;
+	struct rte_hash *h;
+} tbl_multiwriter_test_params;
+
+const uint32_t nb_entries = 16*1024*1024;
+const uint32_t nb_total_tsx_insertion = 15*1024*1024;
+uint32_t rounded_nb_total_tsx_insertion;
+
+static rte_atomic64_t gcycles;
+static rte_atomic64_t ginsertions;
+
+static int use_htm;
+
+static int
+test_hash_multiwriter_worker(__attribute__((unused)) void *arg)
+{
+	uint64_t i, offset;
+	uint32_t lcore_id = rte_lcore_id();
+	uint64_t begin, cycles;
+
+	offset = (lcore_id - rte_get_master_lcore())
+		* tbl_multiwriter_test_params.nb_tsx_insertion;
+
+	printf("Core #%d inserting %d: %'"PRId64" - %'"PRId64"\n",
+	       lcore_id, tbl_multiwriter_test_params.nb_tsx_insertion,
+	       offset, offset + tbl_multiwriter_test_params.nb_tsx_insertion);
+
+	begin = rte_rdtsc_precise();
+
+	for (i = offset;
+	     i < offset + tbl_multiwriter_test_params.nb_tsx_insertion;
+	     i++) {
+		if (rte_hash_add_key(tbl_multiwriter_test_params.h,
+				     tbl_multiwriter_test_params.keys + i) < 0)
+			break;
+	}
+
+	cycles = rte_rdtsc_precise() - begin;
+	rte_atomic64_add(&gcycles, cycles);
+	rte_atomic64_add(&ginsertions, i - offset);
+
+	for (; i < offset + tbl_multiwriter_test_params.nb_tsx_insertion; i++)
+		tbl_multiwriter_test_params.keys[i]
+			= RTE_APP_TEST_HASH_MULTIWRITER_FAILED;
+
+	return 0;
+}
+
+
+static int
+test_hash_multiwriter(void)
+{
+	unsigned int i, rounded_nb_total_tsx_insertion;
+	static unsigned calledCount = 1;
+
+	uint32_t *keys;
+	uint32_t *found;
+
+	struct rte_hash_parameters hash_params = {
+		.entries = nb_entries,
+		.key_len = sizeof(uint32_t),
+		.hash_func = rte_hash_crc,
+		.hash_func_init_val = 0,
+		.socket_id = rte_socket_id(),
+	};
+	if (use_htm)
+		hash_params.extra_flag =
+			RTE_HASH_EXTRA_FLAGS_TRANS_MEM_SUPPORT
+				| RTE_HASH_EXTRA_FLAGS_MULTI_WRITER_ADD;
+	else
+		hash_params.extra_flag =
+			RTE_HASH_EXTRA_FLAGS_MULTI_WRITER_ADD;
+
+	struct rte_hash *handle;
+	char name[RTE_HASH_NAMESIZE];
+
+	const void *next_key;
+	void *next_data;
+	uint32_t iter = 0;
+
+	uint32_t duplicated_keys = 0;
+	uint32_t lost_keys = 0;
+
+	snprintf(name, 32, "test%u", calledCount++);
+	hash_params.name = name;
+
+	handle = rte_hash_create(&hash_params);
+	RETURN_IF_ERROR(handle == NULL, "hash creation failed");
+
+	tbl_multiwriter_test_params.h = handle;
+	tbl_multiwriter_test_params.nb_tsx_insertion =
+		nb_total_tsx_insertion / rte_lcore_count();
+
+	rounded_nb_total_tsx_insertion = (nb_total_tsx_insertion /
+		tbl_multiwriter_test_params.nb_tsx_insertion)
+		* tbl_multiwriter_test_params.nb_tsx_insertion;
+
+	rte_srand(rte_rdtsc());
+
+	keys = rte_malloc(NULL, sizeof(uint32_t) * nb_entries, 0);
+
+	if (keys == NULL) {
+		printf("RTE_MALLOC failed\n");
+		goto err1;
+	}
+
+	found = rte_zmalloc(NULL, sizeof(uint32_t) * nb_entries, 0);
+	if (found == NULL) {
+		printf("RTE_ZMALLOC failed\n");
+		goto err2;
+	}
+
+	for (i = 0; i < nb_entries; i++)
+		keys[i] = i;
+
+	tbl_multiwriter_test_params.keys = keys;
+	tbl_multiwriter_test_params.found = found;
+
+	rte_atomic64_init(&gcycles);
+	rte_atomic64_clear(&gcycles);
+
+	rte_atomic64_init(&ginsertions);
+	rte_atomic64_clear(&ginsertions);
+
+	/* Fire all threads. */
+	rte_eal_mp_remote_launch(test_hash_multiwriter_worker,
+				 NULL, CALL_MASTER);
+	rte_eal_mp_wait_lcore();
+
+	while (rte_hash_iterate(handle, &next_key, &next_data, &iter) >= 0) {
+		/* Search for the key in the list of keys added .*/
+		i = *(const uint32_t *)next_key;
+		tbl_multiwriter_test_params.found[i]++;
+	}
+
+	for (i = 0; i < rounded_nb_total_tsx_insertion; i++) {
+		if (tbl_multiwriter_test_params.keys[i]
+		    != RTE_APP_TEST_HASH_MULTIWRITER_FAILED) {
+			if (tbl_multiwriter_test_params.found[i] > 1) {
+				duplicated_keys++;
+				break;
+			}
+			if (tbl_multiwriter_test_params.found[i] == 0) {
+				lost_keys++;
+				printf("key %d is lost\n", i);
+				break;
+			}
+		}
+	}
+
+	if (duplicated_keys > 0) {
+		printf("%d key duplicated\n", duplicated_keys);
+		goto err3;
+	}
+
+	if (lost_keys > 0) {
+		printf("%d key lost\n", lost_keys);
+		goto err3;
+	}
+
+	printf("No key corrupted during multiwriter insertion.\n");
+
+	unsigned long long int cycles_per_insertion =
+		rte_atomic64_read(&gcycles)/
+		rte_atomic64_read(&ginsertions);
+
+	printf(" cycles per insertion: %llu\n", cycles_per_insertion);
+
+	rte_free(tbl_multiwriter_test_params.found);
+	rte_free(tbl_multiwriter_test_params.keys);
+	rte_hash_free(handle);
+	return 0;
+
+err3:
+	rte_free(tbl_multiwriter_test_params.found);
+err2:
+	rte_free(tbl_multiwriter_test_params.keys);
+err1:
+	rte_hash_free(handle);
+	return -1;
+}
+
+static int
+test_hash_multiwriter_main(void)
+{
+	int r = -1;
+
+	if (rte_lcore_count() == 1) {
+		printf("More than one lcore is required to do multiwriter test\n");
+		return 0;
+	}
+
+
+	setlocale(LC_NUMERIC, "");
+
+
+	if (!rte_tm_supported()) {
+		printf("Hardware transactional memory (lock elision) "
+			"is NOT supported\n");
+	} else {
+		printf("Hardware transactional memory (lock elision) "
+			"is supported\n");
+
+		printf("Test multi-writer with Hardware transactional memory\n");
+
+		use_htm = 1;
+		r = test_hash_multiwriter();
+	}
+
+	printf("Test multi-writer without Hardware transactional memory\n");
+	use_htm = 0;
+	r = test_hash_multiwriter();
+
+	return r;
+}
+
+
+static struct test_command hash_scaling_cmd = {
+	.command = "hash_multiwriter_autotest",
+	.callback = test_hash_multiwriter_main,
+};
+
+REGISTER_TEST_COMMAND(hash_scaling_cmd);
diff --git a/doc/guides/rel_notes/release_16_07.rst b/doc/guides/rel_notes/release_16_07.rst
index ff8cc0d..dd96286 100644
--- a/doc/guides/rel_notes/release_16_07.rst
+++ b/doc/guides/rel_notes/release_16_07.rst
@@ -76,6 +76,18 @@ New Features
   monitoring applications, enabling the support of broader liveness
   reporting to external processes.
 
+* **Added multi-writer support for RTE Hash with Intel TSX.**
+
+  The following features/modifications have been added to rte_hash library:
+
+  * Enabled application developers to use an extra flag for rte_hash creation
+    to specify default behavior (multi-thread safe/unsafe) with rte_hash_add_key
+    function.
+  * Changed Cuckoo search algorithm to breadth first search for multi-writer
+    routine and split Cuckoo Search and Move operations in order to reduce
+    transactional code region and improve TSX performance.
+  * Added a hash multi-writer test case for test app.
+
 
 Resolved Issues
 ---------------
diff --git a/lib/librte_hash/rte_cuckoo_hash.c b/lib/librte_hash/rte_cuckoo_hash.c
index 7b7d1f8..e3cc3a7 100644
--- a/lib/librte_hash/rte_cuckoo_hash.c
+++ b/lib/librte_hash/rte_cuckoo_hash.c
@@ -1,7 +1,7 @@
 /*-
  *   BSD LICENSE
  *
- *   Copyright(c) 2010-2015 Intel Corporation. All rights reserved.
+ *   Copyright(c) 2010-2016 Intel Corporation. All rights reserved.
  *   All rights reserved.
  *
  *   Redistribution and use in source and binary forms, with or without
@@ -59,12 +59,10 @@
 #include <rte_compat.h>
 
 #include "rte_hash.h"
-#if defined(RTE_ARCH_X86)
-#include "rte_cmp_x86.h"
-#endif
+#include "rte_cuckoo_hash.h"
 
-#if defined(RTE_ARCH_ARM64)
-#include "rte_cmp_arm64.h"
+#if defined(RTE_ARCH_X86)
+#include "rte_cuckoo_hash_x86.h"
 #endif
 
 TAILQ_HEAD(rte_hash_list, rte_tailq_entry);
@@ -74,153 +72,6 @@ static struct rte_tailq_elem rte_hash_tailq = {
 };
 EAL_REGISTER_TAILQ(rte_hash_tailq)
 
-/* Macro to enable/disable run-time checking of function parameters */
-#if defined(RTE_LIBRTE_HASH_DEBUG)
-#define RETURN_IF_TRUE(cond, retval) do { \
-	if (cond) \
-		return retval; \
-} while (0)
-#else
-#define RETURN_IF_TRUE(cond, retval)
-#endif
-
-/* Hash function used if none is specified */
-#if defined(RTE_MACHINE_CPUFLAG_SSE4_2) || defined(RTE_MACHINE_CPUFLAG_CRC32)
-#include <rte_hash_crc.h>
-#define DEFAULT_HASH_FUNC       rte_hash_crc
-#else
-#include <rte_jhash.h>
-#define DEFAULT_HASH_FUNC       rte_jhash
-#endif
-
-/** Number of items per bucket. */
-#define RTE_HASH_BUCKET_ENTRIES		4
-
-#define NULL_SIGNATURE			0
-
-#define KEY_ALIGNMENT			16
-
-#define LCORE_CACHE_SIZE		8
-
-#if defined(RTE_ARCH_X86) || defined(RTE_ARCH_ARM64)
-/*
- * All different options to select a key compare function,
- * based on the key size and custom function.
- */
-enum cmp_jump_table_case {
-	KEY_CUSTOM = 0,
-	KEY_16_BYTES,
-	KEY_32_BYTES,
-	KEY_48_BYTES,
-	KEY_64_BYTES,
-	KEY_80_BYTES,
-	KEY_96_BYTES,
-	KEY_112_BYTES,
-	KEY_128_BYTES,
-	KEY_OTHER_BYTES,
-	NUM_KEY_CMP_CASES,
-};
-
-/*
- * Table storing all different key compare functions
- * (multi-process supported)
- */
-const rte_hash_cmp_eq_t cmp_jump_table[NUM_KEY_CMP_CASES] = {
-	NULL,
-	rte_hash_k16_cmp_eq,
-	rte_hash_k32_cmp_eq,
-	rte_hash_k48_cmp_eq,
-	rte_hash_k64_cmp_eq,
-	rte_hash_k80_cmp_eq,
-	rte_hash_k96_cmp_eq,
-	rte_hash_k112_cmp_eq,
-	rte_hash_k128_cmp_eq,
-	memcmp
-};
-#else
-/*
- * All different options to select a key compare function,
- * based on the key size and custom function.
- */
-enum cmp_jump_table_case {
-	KEY_CUSTOM = 0,
-	KEY_OTHER_BYTES,
-	NUM_KEY_CMP_CASES,
-};
-
-/*
- * Table storing all different key compare functions
- * (multi-process supported)
- */
-const rte_hash_cmp_eq_t cmp_jump_table[NUM_KEY_CMP_CASES] = {
-	NULL,
-	memcmp
-};
-
-#endif
-
-struct lcore_cache {
-	unsigned len; /**< Cache len */
-	void *objs[LCORE_CACHE_SIZE]; /**< Cache objects */
-} __rte_cache_aligned;
-
-/** A hash table structure. */
-struct rte_hash {
-	char name[RTE_HASH_NAMESIZE];   /**< Name of the hash. */
-	uint32_t entries;               /**< Total table entries. */
-	uint32_t num_buckets;           /**< Number of buckets in table. */
-	uint32_t key_len;               /**< Length of hash key. */
-	rte_hash_function hash_func;    /**< Function used to calculate hash. */
-	uint32_t hash_func_init_val;    /**< Init value used by hash_func. */
-	rte_hash_cmp_eq_t rte_hash_custom_cmp_eq;
-	/**< Custom function used to compare keys. */
-	enum cmp_jump_table_case cmp_jump_table_idx;
-	/**< Indicates which compare function to use. */
-	uint32_t bucket_bitmask;        /**< Bitmask for getting bucket index
-						from hash signature. */
-	uint32_t key_entry_size;         /**< Size of each key entry. */
-
-	struct rte_ring *free_slots;    /**< Ring that stores all indexes
-						of the free slots in the key table */
-	void *key_store;                /**< Table storing all keys and data */
-	struct rte_hash_bucket *buckets;	/**< Table with buckets storing all the
-							hash values and key indexes
-							to the key table*/
-	uint8_t hw_trans_mem_support;	/**< Hardware transactional
-							memory support */
-	struct lcore_cache *local_free_slots;
-	/**< Local cache per lcore, storing some indexes of the free slots */
-} __rte_cache_aligned;
-
-/* Structure storing both primary and secondary hashes */
-struct rte_hash_signatures {
-	union {
-		struct {
-			hash_sig_t current;
-			hash_sig_t alt;
-		};
-		uint64_t sig;
-	};
-};
-
-/* Structure that stores key-value pair */
-struct rte_hash_key {
-	union {
-		uintptr_t idata;
-		void *pdata;
-	};
-	/* Variable key size */
-	char key[0];
-} __attribute__((aligned(KEY_ALIGNMENT)));
-
-/** Bucket structure */
-struct rte_hash_bucket {
-	struct rte_hash_signatures signatures[RTE_HASH_BUCKET_ENTRIES];
-	/* Includes dummy key index that always contains index 0 */
-	uint32_t key_idx[RTE_HASH_BUCKET_ENTRIES + 1];
-	uint8_t flag[RTE_HASH_BUCKET_ENTRIES];
-} __rte_cache_aligned;
-
 struct rte_hash *
 rte_hash_find_existing(const char *name)
 {
@@ -372,7 +223,7 @@ rte_hash_create(const struct rte_hash_parameters *params)
 
 /*
  * If x86 architecture is used, select appropriate compare function,
- * which may use x86 instrinsics, otherwise use memcmp
+ * which may use x86 intrinsics, otherwise use memcmp
  */
 #if defined(RTE_ARCH_X86) || defined(RTE_ARCH_ARM64)
 	/* Select function to compare keys */
@@ -431,7 +282,23 @@ rte_hash_create(const struct rte_hash_parameters *params)
 	h->free_slots = r;
 	h->hw_trans_mem_support = hw_trans_mem_support;
 
-	/* populate the free slots ring. Entry zero is reserved for key misses */
+	/* Turn on multi-writer only with explicit flat from user and TM
+	 * support.
+	 */
+	if (params->extra_flag & RTE_HASH_EXTRA_FLAGS_MULTI_WRITER_ADD) {
+		if (h->hw_trans_mem_support) {
+			h->add_key = ADD_KEY_MULTIWRITER_TM;
+		} else {
+			h->add_key = ADD_KEY_MULTIWRITER;
+			h->multiwriter_lock = rte_malloc(NULL,
+							sizeof(rte_spinlock_t),
+							LCORE_CACHE_SIZE);
+			rte_spinlock_init(h->multiwriter_lock);
+		}
+	} else
+		h->add_key = ADD_KEY_SINGLEWRITER;
+
+	/* Populate free slots ring. Entry zero is reserved for key misses. */
 	for (i = 1; i < params->entries + 1; i++)
 		rte_ring_sp_enqueue(r, (void *)((uintptr_t) i));
 
@@ -482,6 +349,8 @@ rte_hash_free(struct rte_hash *h)
 	if (h->hw_trans_mem_support)
 		rte_free(h->local_free_slots);
 
+	if (h->add_key == ADD_KEY_MULTIWRITER)
+		rte_free(h->multiwriter_lock);
 	rte_ring_free(h->free_slots);
 	rte_free(h->key_store);
 	rte_free(h->buckets);
@@ -632,6 +501,9 @@ __rte_hash_add_key_with_hash(const struct rte_hash *h, const void *key,
 	unsigned lcore_id;
 	struct lcore_cache *cached_free_slots = NULL;
 
+	if (h->add_key == ADD_KEY_MULTIWRITER)
+		rte_spinlock_lock(h->multiwriter_lock);
+
 	prim_bucket_idx = sig & h->bucket_bitmask;
 	prim_bkt = &h->buckets[prim_bucket_idx];
 	rte_prefetch0(prim_bkt);
@@ -712,35 +584,67 @@ __rte_hash_add_key_with_hash(const struct rte_hash *h, const void *key,
 	rte_memcpy(new_k->key, key, h->key_len);
 	new_k->pdata = data;
 
-	/* Insert new entry is there is room in the primary bucket */
-	for (i = 0; i < RTE_HASH_BUCKET_ENTRIES; i++) {
-		/* Check if slot is available */
-		if (likely(prim_bkt->signatures[i].sig == NULL_SIGNATURE)) {
-			prim_bkt->signatures[i].current = sig;
-			prim_bkt->signatures[i].alt = alt_hash;
-			prim_bkt->key_idx[i] = new_idx;
+#if defined(RTE_ARCH_X86) /* currently only x86 support HTM */
+	if (h->add_key == ADD_KEY_MULTIWRITER_TM) {
+		ret = rte_hash_cuckoo_insert_mw_tm(prim_bkt,
+				sig, alt_hash, new_idx);
+		if (ret >= 0)
+			return new_idx - 1;
+
+		/* Primary bucket full, need to make space for new entry */
+		ret = rte_hash_cuckoo_make_space_mw_tm(h, prim_bkt, sig,
+							alt_hash, new_idx);
+
+		if (ret >= 0)
+			return new_idx - 1;
+
+		/* Also search secondary bucket to get better occupancy */
+		ret = rte_hash_cuckoo_make_space_mw_tm(h, sec_bkt, sig,
+							alt_hash, new_idx);
+
+		if (ret >= 0)
+			return new_idx - 1;
+	} else {
+#endif
+		for (i = 0; i < RTE_HASH_BUCKET_ENTRIES; i++) {
+			/* Check if slot is available */
+			if (likely(prim_bkt->signatures[i].sig == NULL_SIGNATURE)) {
+				prim_bkt->signatures[i].current = sig;
+				prim_bkt->signatures[i].alt = alt_hash;
+				prim_bkt->key_idx[i] = new_idx;
+				break;
+			}
+		}
+
+		if (i != RTE_HASH_BUCKET_ENTRIES) {
+			if (h->add_key == ADD_KEY_MULTIWRITER)
+				rte_spinlock_unlock(h->multiwriter_lock);
 			return new_idx - 1;
 		}
-	}
 
-	/* Primary bucket is full, so we need to make space for new entry */
-	ret = make_space_bucket(h, prim_bkt);
-	/*
-	 * After recursive function.
-	 * Insert the new entry in the position of the pushed entry
-	 * if successful or return error and
-	 * store the new slot back in the ring
-	 */
-	if (ret >= 0) {
-		prim_bkt->signatures[ret].current = sig;
-		prim_bkt->signatures[ret].alt = alt_hash;
-		prim_bkt->key_idx[ret] = new_idx;
-		return new_idx - 1;
+		/* Primary bucket full, need to make space for new entry
+		 * After recursive function.
+		 * Insert the new entry in the position of the pushed entry
+		 * if successful or return error and
+		 * store the new slot back in the ring
+		 */
+		ret = make_space_bucket(h, prim_bkt);
+		if (ret >= 0) {
+			prim_bkt->signatures[ret].current = sig;
+			prim_bkt->signatures[ret].alt = alt_hash;
+			prim_bkt->key_idx[ret] = new_idx;
+			if (h->add_key == ADD_KEY_MULTIWRITER)
+				rte_spinlock_unlock(h->multiwriter_lock);
+			return new_idx - 1;
+		}
+#if defined(RTE_ARCH_X86)
 	}
-
+#endif
 	/* Error in addition, store new slot back in the ring and return error */
 	enqueue_slot_back(h, cached_free_slots, (void *)((uintptr_t) new_idx));
 
+	if (h->add_key == ADD_KEY_MULTIWRITER)
+		rte_spinlock_unlock(h->multiwriter_lock);
 	return ret;
 }
 
diff --git a/lib/librte_hash/rte_cuckoo_hash.h b/lib/librte_hash/rte_cuckoo_hash.h
new file mode 100644
index 0000000..6c76700
--- /dev/null
+++ b/lib/librte_hash/rte_cuckoo_hash.h
@@ -0,0 +1,219 @@
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright(c) 2016 Intel Corporation. All rights reserved.
+ *   All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of Intel Corporation nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/* rte_cuckoo_hash.h
+ * This file hold Cuckoo Hash private data structures to allows include from
+ * platform specific files like rte_cuckoo_hash_x86.h
+ */
+
+#ifndef _RTE_CUCKOO_HASH_H_
+#define _RTE_CUCKOO_HASH_H_
+
+#if defined(RTE_ARCH_X86)
+#include "rte_cmp_x86.h"
+#endif
+
+#if defined(RTE_ARCH_ARM64)
+#include "rte_cmp_arm64.h"
+#endif
+
+/* Macro to enable/disable run-time checking of function parameters */
+#if defined(RTE_LIBRTE_HASH_DEBUG)
+#define RETURN_IF_TRUE(cond, retval) do { \
+	if (cond) \
+		return retval; \
+} while (0)
+#else
+#define RETURN_IF_TRUE(cond, retval)
+#endif
+
+/* Hash function used if none is specified */
+#if defined(RTE_MACHINE_CPUFLAG_SSE4_2) || defined(RTE_MACHINE_CPUFLAG_CRC32)
+#include <rte_hash_crc.h>
+#define DEFAULT_HASH_FUNC       rte_hash_crc
+#else
+#include <rte_jhash.h>
+#define DEFAULT_HASH_FUNC       rte_jhash
+#endif
+
+#if defined(RTE_ARCH_X86) || defined(RTE_ARCH_ARM64)
+/*
+ * All different options to select a key compare function,
+ * based on the key size and custom function.
+ */
+enum cmp_jump_table_case {
+	KEY_CUSTOM = 0,
+	KEY_16_BYTES,
+	KEY_32_BYTES,
+	KEY_48_BYTES,
+	KEY_64_BYTES,
+	KEY_80_BYTES,
+	KEY_96_BYTES,
+	KEY_112_BYTES,
+	KEY_128_BYTES,
+	KEY_OTHER_BYTES,
+	NUM_KEY_CMP_CASES,
+};
+
+/*
+ * Table storing all different key compare functions
+ * (multi-process supported)
+ */
+const rte_hash_cmp_eq_t cmp_jump_table[NUM_KEY_CMP_CASES] = {
+	NULL,
+	rte_hash_k16_cmp_eq,
+	rte_hash_k32_cmp_eq,
+	rte_hash_k48_cmp_eq,
+	rte_hash_k64_cmp_eq,
+	rte_hash_k80_cmp_eq,
+	rte_hash_k96_cmp_eq,
+	rte_hash_k112_cmp_eq,
+	rte_hash_k128_cmp_eq,
+	memcmp
+};
+#else
+/*
+ * All different options to select a key compare function,
+ * based on the key size and custom function.
+ */
+enum cmp_jump_table_case {
+	KEY_CUSTOM = 0,
+	KEY_OTHER_BYTES,
+	NUM_KEY_CMP_CASES,
+};
+
+/*
+ * Table storing all different key compare functions
+ * (multi-process supported)
+ */
+const rte_hash_cmp_eq_t cmp_jump_table[NUM_KEY_CMP_CASES] = {
+	NULL,
+	memcmp
+};
+
+#endif
+
+enum add_key_case {
+	ADD_KEY_SINGLEWRITER = 0,
+	ADD_KEY_MULTIWRITER,
+	ADD_KEY_MULTIWRITER_TM,
+};
+
+/** Number of items per bucket. */
+#define RTE_HASH_BUCKET_ENTRIES		4
+
+#define NULL_SIGNATURE			0
+
+#define KEY_ALIGNMENT			16
+
+#define LCORE_CACHE_SIZE		64
+
+#define RTE_HASH_BFS_QUEUE_MAX_LEN       1000
+
+#define RTE_XABORT_CUCKOO_PATH_INVALIDED 0x4
+
+#define RTE_HASH_TSX_MAX_RETRY  10
+
+struct lcore_cache {
+	unsigned len; /**< Cache len */
+	void *objs[LCORE_CACHE_SIZE]; /**< Cache objects */
+} __rte_cache_aligned;
+
+/* Structure storing both primary and secondary hashes */
+struct rte_hash_signatures {
+	union {
+		struct {
+			hash_sig_t current;
+			hash_sig_t alt;
+		};
+		uint64_t sig;
+	};
+};
+
+/* Structure that stores key-value pair */
+struct rte_hash_key {
+	union {
+		uintptr_t idata;
+		void *pdata;
+	};
+	/* Variable key size */
+	char key[0];
+} __attribute__((aligned(KEY_ALIGNMENT)));
+
+/** Bucket structure */
+struct rte_hash_bucket {
+	struct rte_hash_signatures signatures[RTE_HASH_BUCKET_ENTRIES];
+	/* Includes dummy key index that always contains index 0 */
+	uint32_t key_idx[RTE_HASH_BUCKET_ENTRIES + 1];
+	uint8_t flag[RTE_HASH_BUCKET_ENTRIES];
+} __rte_cache_aligned;
+
+/** A hash table structure. */
+struct rte_hash {
+	char name[RTE_HASH_NAMESIZE];   /**< Name of the hash. */
+	uint32_t entries;               /**< Total table entries. */
+	uint32_t num_buckets;           /**< Number of buckets in table. */
+	uint32_t key_len;               /**< Length of hash key. */
+	rte_hash_function hash_func;    /**< Function used to calculate hash. */
+	uint32_t hash_func_init_val;    /**< Init value used by hash_func. */
+	rte_hash_cmp_eq_t rte_hash_custom_cmp_eq;
+	/**< Custom function used to compare keys. */
+	enum cmp_jump_table_case cmp_jump_table_idx;
+	/**< Indicates which compare function to use. */
+	uint32_t bucket_bitmask;        /**< Bitmask for getting bucket index
+						from hash signature. */
+	uint32_t key_entry_size;         /**< Size of each key entry. */
+
+	struct rte_ring *free_slots;    /**< Ring that stores all indexes
+						of the free slots in the key table */
+	void *key_store;                /**< Table storing all keys and data */
+	struct rte_hash_bucket *buckets;	/**< Table with buckets storing all the
+							hash values and key indexes
+							to the key table*/
+	uint8_t hw_trans_mem_support;	/**< Hardware transactional
+							memory support */
+	struct lcore_cache *local_free_slots;
+	/**< Local cache per lcore, storing some indexes of the free slots */
+	enum add_key_case add_key; /**< Multi-writer hash add behavior */
+
+	rte_spinlock_t *multiwriter_lock; /**< Multi-writer spinlock for w/o TM */
+} __rte_cache_aligned;
+
+struct queue_node {
+	struct rte_hash_bucket *bkt; /* Current bucket on the bfs search */
+
+	struct queue_node *prev;     /* Parent(bucket) in search path */
+	int prev_slot;               /* Parent(slot) in search path */
+};
+
+#endif
diff --git a/lib/librte_hash/rte_cuckoo_hash_x86.h b/lib/librte_hash/rte_cuckoo_hash_x86.h
new file mode 100644
index 0000000..fa5630b
--- /dev/null
+++ b/lib/librte_hash/rte_cuckoo_hash_x86.h
@@ -0,0 +1,193 @@
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright(c) 2016 Intel Corporation. All rights reserved.
+ *   All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of Intel Corporation nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/* rte_cuckoo_hash_x86.h
+ * This file holds all x86 specific Cuckoo Hash functions
+ */
+
+/* Only tries to insert at one bucket (@prim_bkt) without trying to push
+ * buckets around
+ */
+static inline unsigned
+rte_hash_cuckoo_insert_mw_tm(struct rte_hash_bucket *prim_bkt,
+		hash_sig_t sig, hash_sig_t alt_hash, uint32_t new_idx)
+{
+	unsigned i, status;
+	unsigned try = 0;
+
+	while (try < RTE_HASH_TSX_MAX_RETRY) {
+		status = rte_xbegin();
+		if (likely(status == RTE_XBEGIN_STARTED)) {
+			/* Insert new entry if there is room in the primary
+			* bucket.
+			*/
+			for (i = 0; i < RTE_HASH_BUCKET_ENTRIES; i++) {
+				/* Check if slot is available */
+				if (likely(prim_bkt->signatures[i].sig ==
+						NULL_SIGNATURE)) {
+					prim_bkt->signatures[i].current = sig;
+					prim_bkt->signatures[i].alt = alt_hash;
+					prim_bkt->key_idx[i] = new_idx;
+					break;
+				}
+			}
+			rte_xend();
+
+			if (i != RTE_HASH_BUCKET_ENTRIES)
+				return 0;
+
+			break; /* break off try loop if transaction commits */
+		} else {
+			/* If we abort we give up this cuckoo path. */
+			try++;
+			rte_pause();
+		}
+	}
+
+	return -1;
+}
+
+/* Shift buckets along provided cuckoo_path (@leaf and @leaf_slot) and fill
+ * the path head with new entry (sig, alt_hash, new_idx)
+ */
+static inline int
+rte_hash_cuckoo_move_insert_mw_tm(const struct rte_hash *h,
+			struct queue_node *leaf, uint32_t leaf_slot,
+			hash_sig_t sig, hash_sig_t alt_hash, uint32_t new_idx)
+{
+	unsigned try = 0;
+	unsigned status;
+	uint32_t prev_alt_bkt_idx;
+
+	struct queue_node *prev_node, *curr_node = leaf;
+	struct rte_hash_bucket *prev_bkt, *curr_bkt = leaf->bkt;
+	uint32_t prev_slot, curr_slot = leaf_slot;
+
+	while (try < RTE_HASH_TSX_MAX_RETRY) {
+		status = rte_xbegin();
+		if (likely(status == RTE_XBEGIN_STARTED)) {
+			while (likely(curr_node->prev != NULL)) {
+				prev_node = curr_node->prev;
+				prev_bkt = prev_node->bkt;
+				prev_slot = curr_node->prev_slot;
+
+				prev_alt_bkt_idx
+					= prev_bkt->signatures[prev_slot].alt
+					    & h->bucket_bitmask;
+
+				if (unlikely(&h->buckets[prev_alt_bkt_idx]
+					     != curr_bkt)) {
+					rte_xabort(RTE_XABORT_CUCKOO_PATH_INVALIDED);
+				}
+
+				/* Need to swap current/alt sig to allow later
+				 * Cuckoo insert to move elements back to its
+				 * primary bucket if available
+				 */
+				curr_bkt->signatures[curr_slot].alt =
+				    prev_bkt->signatures[prev_slot].current;
+				curr_bkt->signatures[curr_slot].current =
+				    prev_bkt->signatures[prev_slot].alt;
+				curr_bkt->key_idx[curr_slot]
+				    = prev_bkt->key_idx[prev_slot];
+
+				curr_slot = prev_slot;
+				curr_node = prev_node;
+				curr_bkt = curr_node->bkt;
+			}
+
+			curr_bkt->signatures[curr_slot].current = sig;
+			curr_bkt->signatures[curr_slot].alt = alt_hash;
+			curr_bkt->key_idx[curr_slot] = new_idx;
+
+			rte_xend();
+
+			return 0;
+		}
+
+		/* If we abort we give up this cuckoo path, since most likely it's
+		 * no longer valid as TSX detected data conflict
+		 */
+		try++;
+		rte_pause();
+	}
+
+	return -1;
+}
+
+/*
+ * Make space for new key, using bfs Cuckoo Search and Multi-Writer safe
+ * Cuckoo
+ */
+static inline int
+rte_hash_cuckoo_make_space_mw_tm(const struct rte_hash *h,
+			struct rte_hash_bucket *bkt,
+			hash_sig_t sig, hash_sig_t alt_hash,
+			uint32_t new_idx)
+{
+	unsigned i;
+	struct queue_node queue[RTE_HASH_BFS_QUEUE_MAX_LEN];
+	struct queue_node *tail, *head;
+	struct rte_hash_bucket *curr_bkt, *alt_bkt;
+
+	tail = queue;
+	head = queue + 1;
+	tail->bkt = bkt;
+	tail->prev = NULL;
+	tail->prev_slot = -1;
+
+	/* Cuckoo bfs Search */
+	while (likely(tail != head && head <
+					queue + RTE_HASH_BFS_QUEUE_MAX_LEN - 4)) {
+		curr_bkt = tail->bkt;
+		for (i = 0; i < RTE_HASH_BUCKET_ENTRIES; i++) {
+			if (curr_bkt->signatures[i].sig == NULL_SIGNATURE) {
+				if (likely(rte_hash_cuckoo_move_insert_mw_tm(h,
+						tail, i, sig,
+						alt_hash, new_idx) == 0))
+					return 0;
+			}
+
+			/* Enqueue new node and keep prev node info */
+			alt_bkt = &(h->buckets[curr_bkt->signatures[i].alt
+						    & h->bucket_bitmask]);
+			head->bkt = alt_bkt;
+			head->prev = tail;
+			head->prev_slot = i;
+			head++;
+		}
+		tail++;
+	}
+
+	return -ENOSPC;
+}
diff --git a/lib/librte_hash/rte_hash.h b/lib/librte_hash/rte_hash.h
index 724315a..c9612fb 100644
--- a/lib/librte_hash/rte_hash.h
+++ b/lib/librte_hash/rte_hash.h
@@ -60,6 +60,9 @@ extern "C" {
 /** Enable Hardware transactional memory support. */
 #define RTE_HASH_EXTRA_FLAGS_TRANS_MEM_SUPPORT	0x01
 
+/** Default behavior of insertion, single writer/multi writer */
+#define RTE_HASH_EXTRA_FLAGS_MULTI_WRITER_ADD 0x02
+
 /** Signature of key that is stored internally. */
 typedef uint32_t hash_sig_t;
 
-- 
2.5.5



More information about the dev mailing list