[v2,2/2] eal: fix failure path race setting new thread affinity

Message ID 1678750267-3829-3-git-send-email-roretzla@linux.microsoft.com (mailing list archive)
State Superseded, archived
Delegated to: David Marchand
Headers
Series fix race in rte_thread_create failure path |

Checks

Context Check Description
ci/checkpatch warning coding style issues
ci/loongarch-compilation success Compilation OK
ci/loongarch-unit-testing success Unit Testing PASS
ci/Intel-compilation success Compilation OK
ci/intel-Testing success Testing PASS
ci/intel-Functional success Functional PASS
ci/github-robot: build fail github build: failed
ci/iol-mellanox-Performance success Performance Testing PASS
ci/iol-broadcom-Performance success Performance Testing PASS
ci/iol-broadcom-Functional success Functional Testing PASS
ci/iol-intel-Performance success Performance Testing PASS
ci/iol-intel-Functional success Functional Testing PASS
ci/iol-aarch64-unit-testing success Testing PASS
ci/iol-aarch64-compile-testing success Testing PASS
ci/iol-x86_64-compile-testing success Testing PASS
ci/iol-testing success Testing PASS
ci/iol-x86_64-unit-testing success Testing PASS
ci/iol-abi-testing success Testing PASS

Commit Message

Tyler Retzlaff March 13, 2023, 11:31 p.m. UTC
  In rte_thread_create setting affinity after pthread_create may fail.
Such a failure should result in the entire rte_thread_create failing
but doesn't.

Additionally if there is a failure to set affinity a race exists where
the creating thread will free ctx and depending on scheduling of the new
thread it may also free ctx (double free).

Resolve both of the above issues by setting the affinity from the newly
created thread instead of after thread creation. To achieve this modify
the existing thread wrapper to allow the creating thread to wait on the
result of the set affinity operation.

Fixes: ce6e911d20f6 ("eal: add thread lifetime API")
Cc: stable@dpdk.org

Signed-off-by: Tyler Retzlaff <roretzla@linux.microsoft.com>
---
 lib/eal/unix/rte_thread.c | 52 ++++++++++++++++++++++++++++++++++++++---------
 1 file changed, 42 insertions(+), 10 deletions(-)
  

Patch

diff --git a/lib/eal/unix/rte_thread.c b/lib/eal/unix/rte_thread.c
index 37ebfcf..a09fb08 100644
--- a/lib/eal/unix/rte_thread.c
+++ b/lib/eal/unix/rte_thread.c
@@ -8,6 +8,7 @@ 
 #include <stdlib.h>
 #include <string.h>
 
+#include <rte_cycles.h>
 #include <rte_errno.h>
 #include <rte_log.h>
 #include <rte_thread.h>
@@ -16,8 +17,17 @@  struct eal_tls_key {
 	pthread_key_t thread_index;
 };
 
+enum __rte_thread_wrapper_status {
+	THREAD_WRAPPER_LAUNCHING, /* Yet to call thread_func function */
+	THREAD_WRAPPER_RUNNING, /* Thread is running successfully */
+	THREAD_WRAPPER_ERROR /* Thread thread_wrapper encountered an error */
+};
+
 struct thread_routine_ctx {
 	rte_thread_func thread_func;
+	const rte_thread_attr_t *thread_attr;
+	int thread_wrapper_ret;
+	enum __rte_thread_wrapper_status thread_wrapper_status;
 	void *routine_args;
 };
 
@@ -83,11 +93,24 @@  struct thread_routine_ctx {
 static void *
 thread_func_wrapper(void *arg)
 {
-	struct thread_routine_ctx ctx = *(struct thread_routine_ctx *)arg;
+	struct thread_routine_ctx *ctx = (struct thread_routine_ctx *)arg;
+	rte_thread_func thread_func = ctx->thread_func;
+	void *thread_args = ctx->routine_args;
+
+	if (ctx->thread_attr != NULL && CPU_COUNT(&ctx->thread_attr->cpuset) > 0) {
+		ctx->thread_wrapper_ret = rte_thread_set_affinity(&ctx->thread_attr->cpuset);
+		if (ctx->thread_wrapper_ret != 0) {
+			RTE_LOG(DEBUG, EAL, "rte_thread_set_affinity failed\n");
+			__atomic_store_n(&ctx->thread_wrapper_status,
+				THREAD_WRAPPER_ERROR, __ATOMIC_RELEASE);
+		}
+	}
+	__atomic_store_n(&ctx->thread_wrapper_status,
+		THREAD_WRAPPER_RUNNING, __ATOMIC_RELEASE);
 
 	free(arg);
 
-	return (void *)(uintptr_t)ctx.thread_func(ctx.routine_args);
+	return (void *)(uintptr_t)thread_func(thread_args);
 }
 
 int
@@ -98,6 +121,7 @@  struct thread_routine_ctx {
 	int ret = 0;
 	pthread_attr_t attr;
 	pthread_attr_t *attrp = NULL;
+	enum __rte_thread_wrapper_status thread_wrapper_status;
 	struct thread_routine_ctx *ctx;
 	struct sched_param param = {
 		.sched_priority = 0,
@@ -111,7 +135,10 @@  struct thread_routine_ctx {
 		goto cleanup;
 	}
 	ctx->routine_args = args;
+	ctx->thread_attr = thread_attr;
 	ctx->thread_func = thread_func;
+	ctx->thread_wrapper_ret = 0;
+	ctx->thread_wrapper_status = THREAD_WRAPPER_LAUNCHING;
 
 	if (thread_attr != NULL) {
 		ret = pthread_attr_init(&attr);
@@ -164,16 +191,21 @@  struct thread_routine_ctx {
 		goto cleanup;
 	}
 
-	if (thread_attr != NULL && CPU_COUNT(&thread_attr->cpuset) > 0) {
-		ret = rte_thread_set_affinity_by_id(*thread_id,
-			&thread_attr->cpuset);
-		if (ret != 0) {
-			RTE_LOG(DEBUG, EAL, "rte_thread_set_affinity_by_id failed\n");
-			goto cleanup;
-		}
+	/* Wait for the thread wrapper to initialize thread successfully */
+	while ((thread_wrapper_status =
+		__atomic_load_n(&ctx->thread_wrapper_status,
+		__ATOMIC_ACQUIRE)) == THREAD_WRAPPER_LAUNCHING)
+			sched_yield();
+
+	/* Check if the control thread encountered an error */
+	if (thread_wrapper_status == THREAD_WRAPPER_ERROR) {
+		/* thread wrapper is exiting */
+		pthread_join((pthread_t)thread_id->opaque_id, NULL);
+		ret = ctx->thread_wrapper_ret;
+		free(ctx);
 	}
-
 	ctx = NULL;
+
 cleanup:
 	free(ctx);
 	if (attrp != NULL)