[v6,6/8] stack: add C11 atomic implementation

Message ID 20190401211429.20282-7-gage.eads@intel.com (mailing list archive)
State Superseded, archived
Delegated to: Thomas Monjalon
Headers
Series Add stack library and new mempool handler |

Checks

Context Check Description
ci/checkpatch success coding style OK
ci/Intel-compilation fail Compilation issues

Commit Message

Eads, Gage April 1, 2019, 9:14 p.m. UTC
  This commit adds an implementation of the lock-free stack push, pop, and
length functions that use __atomic builtins, for systems that benefit from
the finer-grained memory ordering control.

Signed-off-by: Gage Eads <gage.eads@intel.com>
Reviewed-by: Olivier Matz <olivier.matz@6wind.com>
---
 lib/librte_stack/Makefile           |   3 +-
 lib/librte_stack/meson.build        |   3 +-
 lib/librte_stack/rte_stack_lf.h     |   4 +
 lib/librte_stack/rte_stack_lf_c11.h | 175 ++++++++++++++++++++++++++++++++++++
 4 files changed, 183 insertions(+), 2 deletions(-)
 create mode 100644 lib/librte_stack/rte_stack_lf_c11.h
  

Comments

Honnappa Nagarahalli April 2, 2019, 11:11 a.m. UTC | #1
> Subject: [PATCH v6 6/8] stack: add C11 atomic implementation
> 
> This commit adds an implementation of the lock-free stack push, pop, and
> length functions that use __atomic builtins, for systems that benefit from the
> finer-grained memory ordering control.
> 
> Signed-off-by: Gage Eads <gage.eads@intel.com>
> Reviewed-by: Olivier Matz <olivier.matz@6wind.com>
> ---
Reviewed-by: Honnappa Nagarahalli <honnappa.nagarahalli@arm.com>

<snip>
  

Patch

diff --git a/lib/librte_stack/Makefile b/lib/librte_stack/Makefile
index 311edd997..8d18ce520 100644
--- a/lib/librte_stack/Makefile
+++ b/lib/librte_stack/Makefile
@@ -23,6 +23,7 @@  SRCS-$(CONFIG_RTE_LIBRTE_STACK) := rte_stack.c \
 SYMLINK-$(CONFIG_RTE_LIBRTE_STACK)-include := rte_stack.h \
 					      rte_stack_std.h \
 					      rte_stack_lf.h \
-					      rte_stack_lf_generic.h
+					      rte_stack_lf_generic.h \
+					      rte_stack_lf_c11.h
 
 include $(RTE_SDK)/mk/rte.lib.mk
diff --git a/lib/librte_stack/meson.build b/lib/librte_stack/meson.build
index 7a09a5d66..46fce0c20 100644
--- a/lib/librte_stack/meson.build
+++ b/lib/librte_stack/meson.build
@@ -8,4 +8,5 @@  sources = files('rte_stack.c', 'rte_stack_std.c', 'rte_stack_lf.c')
 headers = files('rte_stack.h',
 		'rte_stack_std.h',
 		'rte_stack_lf.h',
-		'rte_stack_lf_generic.h')
+		'rte_stack_lf_generic.h',
+		'rte_stack_lf_c11.h')
diff --git a/lib/librte_stack/rte_stack_lf.h b/lib/librte_stack/rte_stack_lf.h
index bfd680133..518889a05 100644
--- a/lib/librte_stack/rte_stack_lf.h
+++ b/lib/librte_stack/rte_stack_lf.h
@@ -5,7 +5,11 @@ 
 #ifndef _RTE_STACK_LF_H_
 #define _RTE_STACK_LF_H_
 
+#ifdef RTE_USE_C11_MEM_MODEL
+#include "rte_stack_lf_c11.h"
+#else
 #include "rte_stack_lf_generic.h"
+#endif
 
 /**
  * @internal Push several objects on the lock-free stack (MT-safe).
diff --git a/lib/librte_stack/rte_stack_lf_c11.h b/lib/librte_stack/rte_stack_lf_c11.h
new file mode 100644
index 000000000..a316e9af5
--- /dev/null
+++ b/lib/librte_stack/rte_stack_lf_c11.h
@@ -0,0 +1,175 @@ 
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2019 Intel Corporation
+ */
+
+#ifndef _RTE_STACK_LF_C11_H_
+#define _RTE_STACK_LF_C11_H_
+
+#include <rte_branch_prediction.h>
+#include <rte_prefetch.h>
+
+static __rte_always_inline unsigned int
+__rte_stack_lf_count(struct rte_stack *s)
+{
+	/* stack_lf_push() and stack_lf_pop() do not update the list's contents
+	 * and stack_lf->len atomically, which can cause the list to appear
+	 * shorter than it actually is if this function is called while other
+	 * threads are modifying the list.
+	 *
+	 * However, given the inherently approximate nature of the get_count
+	 * callback -- even if the list and its size were updated atomically,
+	 * the size could change between when get_count executes and when the
+	 * value is returned to the caller -- this is acceptable.
+	 *
+	 * The stack_lf->len updates are placed such that the list may appear to
+	 * have fewer elements than it does, but will never appear to have more
+	 * elements. If the mempool is near-empty to the point that this is a
+	 * concern, the user should consider increasing the mempool size.
+	 */
+	return (unsigned int)__atomic_load_n(&s->stack_lf.used.len.cnt,
+					     __ATOMIC_RELAXED);
+}
+
+static __rte_always_inline void
+__rte_stack_lf_push_elems(struct rte_stack_lf_list *list,
+			  struct rte_stack_lf_elem *first,
+			  struct rte_stack_lf_elem *last,
+			  unsigned int num)
+{
+#ifndef RTE_ARCH_X86_64
+	RTE_SET_USED(first);
+	RTE_SET_USED(last);
+	RTE_SET_USED(list);
+	RTE_SET_USED(num);
+#else
+	struct rte_stack_lf_head old_head;
+	int success;
+
+	old_head = list->head;
+
+	do {
+		struct rte_stack_lf_head new_head;
+
+		/* Use an acquire fence to establish a synchronized-with
+		 * relationship between the list->head load and store-release
+		 * operations (as part of the rte_atomic128_cmp_exchange()).
+		 */
+		__atomic_thread_fence(__ATOMIC_ACQUIRE);
+
+		/* Swing the top pointer to the first element in the list and
+		 * make the last element point to the old top.
+		 */
+		new_head.top = first;
+		new_head.cnt = old_head.cnt + 1;
+
+		last->next = old_head.top;
+
+		/* Use the release memmodel to ensure the writes to the LF LIFO
+		 * elements are visible before the head pointer write.
+		 */
+		success = rte_atomic128_cmp_exchange(
+				(rte_int128_t *)&list->head,
+				(rte_int128_t *)&old_head,
+				(rte_int128_t *)&new_head,
+				1, __ATOMIC_RELEASE,
+				__ATOMIC_RELAXED);
+	} while (success == 0);
+
+	/* Ensure the stack modifications are not reordered with respect
+	 * to the LIFO len update.
+	 */
+	__atomic_add_fetch(&list->len.cnt, num, __ATOMIC_RELEASE);
+#endif
+}
+
+static __rte_always_inline struct rte_stack_lf_elem *
+__rte_stack_lf_pop_elems(struct rte_stack_lf_list *list,
+			 unsigned int num,
+			 void **obj_table,
+			 struct rte_stack_lf_elem **last)
+{
+#ifndef RTE_ARCH_X86_64
+	RTE_SET_USED(obj_table);
+	RTE_SET_USED(last);
+	RTE_SET_USED(list);
+	RTE_SET_USED(num);
+
+	return NULL;
+#else
+	struct rte_stack_lf_head old_head;
+	uint64_t len;
+	int success;
+
+	/* Reserve num elements, if available */
+	len = __atomic_load_n(&list->len.cnt, __ATOMIC_ACQUIRE);
+
+	while (1) {
+		/* Does the list contain enough elements? */
+		if (unlikely(len < num))
+			return NULL;
+
+		/* len is updated on failure */
+		if (__atomic_compare_exchange_n(&list->len.cnt,
+						&len, len - num,
+						0, __ATOMIC_ACQUIRE,
+						__ATOMIC_ACQUIRE))
+			break;
+	}
+
+	/* If a torn read occurs, the CAS will fail and set old_head to the
+	 * correct/latest value.
+	 */
+	old_head = list->head;
+
+	/* Pop num elements */
+	do {
+		struct rte_stack_lf_head new_head;
+		struct rte_stack_lf_elem *tmp;
+		unsigned int i;
+
+		/* Use the acquire memmodel to ensure the reads to the LF LIFO
+		 * elements are properly ordered with respect to the head
+		 * pointer read.
+		 */
+		__atomic_thread_fence(__ATOMIC_ACQUIRE);
+
+		rte_prefetch0(old_head.top);
+
+		tmp = old_head.top;
+
+		/* Traverse the list to find the new head. A next pointer will
+		 * either point to another element or NULL; if a thread
+		 * encounters a pointer that has already been popped, the CAS
+		 * will fail.
+		 */
+		for (i = 0; i < num && tmp != NULL; i++) {
+			rte_prefetch0(tmp->next);
+			if (obj_table)
+				obj_table[i] = tmp->data;
+			if (last)
+				*last = tmp;
+			tmp = tmp->next;
+		}
+
+		/* If NULL was encountered, the list was modified while
+		 * traversing it. Retry.
+		 */
+		if (i != num)
+			continue;
+
+		new_head.top = tmp;
+		new_head.cnt = old_head.cnt + 1;
+
+		success = rte_atomic128_cmp_exchange(
+				(rte_int128_t *)&list->head,
+				(rte_int128_t *)&old_head,
+				(rte_int128_t *)&new_head,
+				1, __ATOMIC_RELEASE,
+				__ATOMIC_RELAXED);
+	} while (success == 0);
+
+	return old_head.top;
+#endif
+}
+
+#endif /* _RTE_STACK_LF_C11_H_ */