[v5,2/2] eal: fix failure path race setting new thread affinity

Message ID 1678925224-2706-3-git-send-email-roretzla@linux.microsoft.com (mailing list archive)
State Superseded, archived
Delegated to: David Marchand
Headers
Series fix race in rte_thread_create failure path |

Checks

Context Check Description
ci/checkpatch success coding style OK
ci/loongarch-compilation success Compilation OK
ci/loongarch-unit-testing success Unit Testing PASS
ci/iol-mellanox-Performance success Performance Testing PASS
ci/iol-broadcom-Functional success Functional Testing PASS
ci/Intel-compilation success Compilation OK
ci/iol-broadcom-Performance success Performance Testing PASS
ci/iol-intel-Functional success Functional Testing PASS
ci/iol-intel-Performance fail Performance Testing issues
ci/intel-Testing success Testing PASS
ci/github-robot: build success github build: passed
ci/iol-x86_64-compile-testing success Testing PASS
ci/iol-aarch64-unit-testing success Testing PASS
ci/iol-aarch64-compile-testing success Testing PASS
ci/intel-Functional success Functional PASS
ci/iol-abi-testing success Testing PASS

Commit Message

Tyler Retzlaff March 16, 2023, 12:07 a.m. UTC
  In rte_thread_create setting affinity after pthread_create may fail.
Such a failure should result in the entire rte_thread_create failing
but doesn't.

Additionally if there is a failure to set affinity a race exists where
the creating thread will free ctx and depending on scheduling of the new
thread it may also free ctx (double free).

Resolve the above by setting the affinity from the newly created thread
using a condition variable to signal the completion of the thread
start wrapper having completed.

Since we are now waiting for the thread start wrapper to complete we can
allocate the thread start wrapper context on the stack. While here clean
up the variable naming in the context to better highlight the fields of
the context require synchronization between the creating and created
thread.

Fixes: ce6e911d20f6 ("eal: add thread lifetime API")
Cc: stable@dpdk.org

Signed-off-by: Tyler Retzlaff <roretzla@linux.microsoft.com>
---
 lib/eal/unix/rte_thread.c | 70 +++++++++++++++++++++++++++++------------------
 1 file changed, 43 insertions(+), 27 deletions(-)
  

Comments

David Marchand March 17, 2023, 10:45 a.m. UTC | #1
On Thu, Mar 16, 2023 at 1:07 AM Tyler Retzlaff
<roretzla@linux.microsoft.com> wrote:
>
> In rte_thread_create setting affinity after pthread_create may fail.
> Such a failure should result in the entire rte_thread_create failing
> but doesn't.
>
> Additionally if there is a failure to set affinity a race exists where
> the creating thread will free ctx and depending on scheduling of the new
> thread it may also free ctx (double free).
>
> Resolve the above by setting the affinity from the newly created thread
> using a condition variable to signal the completion of the thread
> start wrapper having completed.
>
> Since we are now waiting for the thread start wrapper to complete we can
> allocate the thread start wrapper context on the stack. While here clean
> up the variable naming in the context to better highlight the fields of
> the context require synchronization between the creating and created
> thread.
>
> Fixes: ce6e911d20f6 ("eal: add thread lifetime API")
> Cc: stable@dpdk.org
>
> Signed-off-by: Tyler Retzlaff <roretzla@linux.microsoft.com>
> ---
>  lib/eal/unix/rte_thread.c | 70 +++++++++++++++++++++++++++++------------------
>  1 file changed, 43 insertions(+), 27 deletions(-)
>
> diff --git a/lib/eal/unix/rte_thread.c b/lib/eal/unix/rte_thread.c
> index 37ebfcf..5992b04 100644
> --- a/lib/eal/unix/rte_thread.c
> +++ b/lib/eal/unix/rte_thread.c
> @@ -16,9 +16,14 @@ struct eal_tls_key {
>         pthread_key_t thread_index;
>  };
>
> -struct thread_routine_ctx {
> +struct thread_start_context {
>         rte_thread_func thread_func;
> -       void *routine_args;
> +       void *thread_args;
> +       const rte_thread_attr_t *thread_attr;
> +       pthread_mutex_t wrapper_mutex;
> +       pthread_cond_t wrapper_cond;
> +       int wrapper_ret;
> +       volatile int wrapper_done;

One question.

I see that wrapper_done is accessed under wrapper_mutex.
Is volatile needed?

(nit: a boolean is probably enough too)

I was thinking of squashing below diff:

diff --git a/lib/eal/unix/rte_thread.c b/lib/eal/unix/rte_thread.c
index 5992b04a45..5ab5267ca3 100644
--- a/lib/eal/unix/rte_thread.c
+++ b/lib/eal/unix/rte_thread.c
@@ -23,7 +23,7 @@ struct thread_start_context {
        pthread_mutex_t wrapper_mutex;
        pthread_cond_t wrapper_cond;
        int wrapper_ret;
-       volatile int wrapper_done;
+       bool wrapper_done;
 };

 static int
@@ -101,7 +101,7 @@ thread_start_wrapper(void *arg)

        pthread_mutex_lock(&ctx->wrapper_mutex);
        ctx->wrapper_ret = ret;
-       ctx->wrapper_done = 1;
+       ctx->wrapper_done = true;
        pthread_cond_signal(&ctx->wrapper_cond);
        pthread_mutex_unlock(&ctx->wrapper_mutex);

@@ -127,6 +127,7 @@ rte_thread_create(rte_thread_t *thread_id,
                .thread_func = thread_func,
                .thread_args = args,
                .thread_attr = thread_attr,
+               .wrapper_done = false,
                .wrapper_mutex = PTHREAD_MUTEX_INITIALIZER,
                .wrapper_cond = PTHREAD_COND_INITIALIZER,
        };
@@ -151,7 +152,6 @@ rte_thread_create(rte_thread_t *thread_id,
                        goto cleanup;
                }

-
                if (thread_attr->priority ==
                                RTE_THREAD_PRIORITY_REALTIME_CRITICAL) {
                        ret = ENOTSUP;
@@ -183,7 +183,7 @@ rte_thread_create(rte_thread_t *thread_id,
        }

        pthread_mutex_lock(&ctx.wrapper_mutex);
-       while (ctx.wrapper_done != 1)
+       while (!ctx.wrapper_done)
                pthread_cond_wait(&ctx.wrapper_cond, &ctx.wrapper_mutex);
        ret = ctx.wrapper_ret;
        pthread_mutex_unlock(&ctx.wrapper_mutex);


The rest lgtmn thanks Tyler.
  
Tyler Retzlaff March 17, 2023, 2:49 p.m. UTC | #2
On Fri, Mar 17, 2023 at 11:45:08AM +0100, David Marchand wrote:
> On Thu, Mar 16, 2023 at 1:07 AM Tyler Retzlaff
> <roretzla@linux.microsoft.com> wrote:
> >
> > In rte_thread_create setting affinity after pthread_create may fail.
> > Such a failure should result in the entire rte_thread_create failing
> > but doesn't.
> >
> > Additionally if there is a failure to set affinity a race exists where
> > the creating thread will free ctx and depending on scheduling of the new
> > thread it may also free ctx (double free).
> >
> > Resolve the above by setting the affinity from the newly created thread
> > using a condition variable to signal the completion of the thread
> > start wrapper having completed.
> >
> > Since we are now waiting for the thread start wrapper to complete we can
> > allocate the thread start wrapper context on the stack. While here clean
> > up the variable naming in the context to better highlight the fields of
> > the context require synchronization between the creating and created
> > thread.
> >
> > Fixes: ce6e911d20f6 ("eal: add thread lifetime API")
> > Cc: stable@dpdk.org
> >
> > Signed-off-by: Tyler Retzlaff <roretzla@linux.microsoft.com>
> > ---
> >  lib/eal/unix/rte_thread.c | 70 +++++++++++++++++++++++++++++------------------
> >  1 file changed, 43 insertions(+), 27 deletions(-)
> >
> > diff --git a/lib/eal/unix/rte_thread.c b/lib/eal/unix/rte_thread.c
> > index 37ebfcf..5992b04 100644
> > --- a/lib/eal/unix/rte_thread.c
> > +++ b/lib/eal/unix/rte_thread.c
> > @@ -16,9 +16,14 @@ struct eal_tls_key {
> >         pthread_key_t thread_index;
> >  };
> >
> > -struct thread_routine_ctx {
> > +struct thread_start_context {
> >         rte_thread_func thread_func;
> > -       void *routine_args;
> > +       void *thread_args;
> > +       const rte_thread_attr_t *thread_attr;
> > +       pthread_mutex_t wrapper_mutex;
> > +       pthread_cond_t wrapper_cond;
> > +       int wrapper_ret;
> > +       volatile int wrapper_done;
> 
> One question.
> 
> I see that wrapper_done is accessed under wrapper_mutex.
> Is volatile needed?

I'm not entirely certain. i'm being cautious since i can conceive of the
load in the loop being optimized as a single load by the compiler. but
again i'm not sure, i always like to learn if someone knows better.

> 
> (nit: a boolean is probably enough too)

I have no issue with it being a _Bool if you want to adjust it for that
i certainly don't object. ordinarily i would use _Bool but a lot of dpdk
code seems to prefer int so that's why i chose it. if we use the macro
bool then we should include stdbool.h directly into this translation
unit.

> 
> I was thinking of squashing below diff:

Yeah, no objection. you can decide if you want to keep the volatile or
not and add the stdbool.h include.

Thanks for reviewing, appreciate it.

> 
> diff --git a/lib/eal/unix/rte_thread.c b/lib/eal/unix/rte_thread.c
> index 5992b04a45..5ab5267ca3 100644
> --- a/lib/eal/unix/rte_thread.c
> +++ b/lib/eal/unix/rte_thread.c
> @@ -23,7 +23,7 @@ struct thread_start_context {
>         pthread_mutex_t wrapper_mutex;
>         pthread_cond_t wrapper_cond;
>         int wrapper_ret;
> -       volatile int wrapper_done;
> +       bool wrapper_done;
>  };
> 
>  static int
> @@ -101,7 +101,7 @@ thread_start_wrapper(void *arg)
> 
>         pthread_mutex_lock(&ctx->wrapper_mutex);
>         ctx->wrapper_ret = ret;
> -       ctx->wrapper_done = 1;
> +       ctx->wrapper_done = true;
>         pthread_cond_signal(&ctx->wrapper_cond);
>         pthread_mutex_unlock(&ctx->wrapper_mutex);
> 
> @@ -127,6 +127,7 @@ rte_thread_create(rte_thread_t *thread_id,
>                 .thread_func = thread_func,
>                 .thread_args = args,
>                 .thread_attr = thread_attr,
> +               .wrapper_done = false,
>                 .wrapper_mutex = PTHREAD_MUTEX_INITIALIZER,
>                 .wrapper_cond = PTHREAD_COND_INITIALIZER,
>         };
> @@ -151,7 +152,6 @@ rte_thread_create(rte_thread_t *thread_id,
>                         goto cleanup;
>                 }
> 
> -
>                 if (thread_attr->priority ==
>                                 RTE_THREAD_PRIORITY_REALTIME_CRITICAL) {
>                         ret = ENOTSUP;
> @@ -183,7 +183,7 @@ rte_thread_create(rte_thread_t *thread_id,
>         }
> 
>         pthread_mutex_lock(&ctx.wrapper_mutex);
> -       while (ctx.wrapper_done != 1)
> +       while (!ctx.wrapper_done)
>                 pthread_cond_wait(&ctx.wrapper_cond, &ctx.wrapper_mutex);
>         ret = ctx.wrapper_ret;
>         pthread_mutex_unlock(&ctx.wrapper_mutex);
> 
> 
> The rest lgtmn thanks Tyler.
> 
> 
> 
> -- 
> David Marchand
  
David Marchand March 17, 2023, 6:51 p.m. UTC | #3
On Fri, Mar 17, 2023 at 3:50 PM Tyler Retzlaff
<roretzla@linux.microsoft.com> wrote:
> > > -struct thread_routine_ctx {
> > > +struct thread_start_context {
> > >         rte_thread_func thread_func;
> > > -       void *routine_args;
> > > +       void *thread_args;
> > > +       const rte_thread_attr_t *thread_attr;
> > > +       pthread_mutex_t wrapper_mutex;
> > > +       pthread_cond_t wrapper_cond;
> > > +       int wrapper_ret;
> > > +       volatile int wrapper_done;
> >
> > One question.
> >
> > I see that wrapper_done is accessed under wrapper_mutex.
> > Is volatile needed?
>
> I'm not entirely certain. i'm being cautious since i can conceive of the
> load in the loop being optimized as a single load by the compiler. but
> again i'm not sure, i always like to learn if someone knows better.

After an interesting discussion with Dodji on C99 and side effects
(5.1.2.3/2 and 5.1.2.3/3), I am a bit more convinced that we don't
need this volatile.


>
> >
> > (nit: a boolean is probably enough too)
>
> I have no issue with it being a _Bool if you want to adjust it for that
> i certainly don't object. ordinarily i would use _Bool but a lot of dpdk
> code seems to prefer int so that's why i chose it. if we use the macro
> bool then we should include stdbool.h directly into this translation
> unit.
>
> >
> > I was thinking of squashing below diff:
>
> Yeah, no objection. you can decide if you want to keep the volatile or
> not and add the stdbool.h include.
>
> Thanks for reviewing, appreciate it.

This is a fix but this v5 had an additional change in affinity setting
(switching to rte_thread_set_affinity()).
To be on the safe side wrt backport, I'll also revert to calling
rte_thread_set_affinity_by_id as this is what was being used before.
And this removes the need for patch 1.

Sending a v6 soon, so that it goes through the CI before rc3.
  
Tyler Retzlaff March 17, 2023, 9:20 p.m. UTC | #4
On Fri, Mar 17, 2023 at 07:51:25PM +0100, David Marchand wrote:
> On Fri, Mar 17, 2023 at 3:50 PM Tyler Retzlaff
> <roretzla@linux.microsoft.com> wrote:
> > > > -struct thread_routine_ctx {
> > > > +struct thread_start_context {
> > > >         rte_thread_func thread_func;
> > > > -       void *routine_args;
> > > > +       void *thread_args;
> > > > +       const rte_thread_attr_t *thread_attr;
> > > > +       pthread_mutex_t wrapper_mutex;
> > > > +       pthread_cond_t wrapper_cond;
> > > > +       int wrapper_ret;
> > > > +       volatile int wrapper_done;
> > >
> > > One question.
> > >
> > > I see that wrapper_done is accessed under wrapper_mutex.
> > > Is volatile needed?
> >
> > I'm not entirely certain. i'm being cautious since i can conceive of the
> > load in the loop being optimized as a single load by the compiler. but
> > again i'm not sure, i always like to learn if someone knows better.
> 
> After an interesting discussion with Dodji on C99 and side effects
> (5.1.2.3/2 and 5.1.2.3/3), I am a bit more convinced that we don't
> need this volatile.

Thanks for the references, based on the reading i agree we can drop the
volatile.

> 
> 
> >
> > >
> > > (nit: a boolean is probably enough too)
> >
> > I have no issue with it being a _Bool if you want to adjust it for that
> > i certainly don't object. ordinarily i would use _Bool but a lot of dpdk
> > code seems to prefer int so that's why i chose it. if we use the macro
> > bool then we should include stdbool.h directly into this translation
> > unit.
> >
> > >
> > > I was thinking of squashing below diff:
> >
> > Yeah, no objection. you can decide if you want to keep the volatile or
> > not and add the stdbool.h include.
> >
> > Thanks for reviewing, appreciate it.
> 
> This is a fix but this v5 had an additional change in affinity setting
> (switching to rte_thread_set_affinity()).
> To be on the safe side wrt backport, I'll also revert to calling
> rte_thread_set_affinity_by_id as this is what was being used before.
> And this removes the need for patch 1.

Is it worth merging the const patch but not backporting? I'm not fussed
either way.

> 
> Sending a v6 soon, so that it goes through the CI before rc3.

Yes, great.

Thanks David!

> 
> 
> -- 
> David Marchand
  

Patch

diff --git a/lib/eal/unix/rte_thread.c b/lib/eal/unix/rte_thread.c
index 37ebfcf..5992b04 100644
--- a/lib/eal/unix/rte_thread.c
+++ b/lib/eal/unix/rte_thread.c
@@ -16,9 +16,14 @@  struct eal_tls_key {
 	pthread_key_t thread_index;
 };
 
-struct thread_routine_ctx {
+struct thread_start_context {
 	rte_thread_func thread_func;
-	void *routine_args;
+	void *thread_args;
+	const rte_thread_attr_t *thread_attr;
+	pthread_mutex_t wrapper_mutex;
+	pthread_cond_t wrapper_cond;
+	int wrapper_ret;
+	volatile int wrapper_done;
 };
 
 static int
@@ -81,13 +86,29 @@  struct thread_routine_ctx {
 }
 
 static void *
-thread_func_wrapper(void *arg)
+thread_start_wrapper(void *arg)
 {
-	struct thread_routine_ctx ctx = *(struct thread_routine_ctx *)arg;
+	struct thread_start_context *ctx = (struct thread_start_context *)arg;
+	rte_thread_func thread_func = ctx->thread_func;
+	void *thread_args = ctx->thread_args;
+	int ret = 0;
 
-	free(arg);
+	if (ctx->thread_attr != NULL && CPU_COUNT(&ctx->thread_attr->cpuset) > 0) {
+		ret = rte_thread_set_affinity(&ctx->thread_attr->cpuset);
+		if (ret != 0)
+			RTE_LOG(DEBUG, EAL, "rte_thread_set_affinity failed\n");
+	}
 
-	return (void *)(uintptr_t)ctx.thread_func(ctx.routine_args);
+	pthread_mutex_lock(&ctx->wrapper_mutex);
+	ctx->wrapper_ret = ret;
+	ctx->wrapper_done = 1;
+	pthread_cond_signal(&ctx->wrapper_cond);
+	pthread_mutex_unlock(&ctx->wrapper_mutex);
+
+	if (ret != 0)
+		return NULL;
+
+	return (void *)(uintptr_t)thread_func(thread_args);
 }
 
 int
@@ -98,20 +119,17 @@  struct thread_routine_ctx {
 	int ret = 0;
 	pthread_attr_t attr;
 	pthread_attr_t *attrp = NULL;
-	struct thread_routine_ctx *ctx;
 	struct sched_param param = {
 		.sched_priority = 0,
 	};
 	int policy = SCHED_OTHER;
-
-	ctx = calloc(1, sizeof(*ctx));
-	if (ctx == NULL) {
-		RTE_LOG(DEBUG, EAL, "Insufficient memory for thread context allocations\n");
-		ret = ENOMEM;
-		goto cleanup;
-	}
-	ctx->routine_args = args;
-	ctx->thread_func = thread_func;
+	struct thread_start_context ctx = {
+		.thread_func = thread_func,
+		.thread_args = args,
+		.thread_attr = thread_attr,
+		.wrapper_mutex = PTHREAD_MUTEX_INITIALIZER,
+		.wrapper_cond = PTHREAD_COND_INITIALIZER,
+	};
 
 	if (thread_attr != NULL) {
 		ret = pthread_attr_init(&attr);
@@ -158,24 +176,22 @@  struct thread_routine_ctx {
 	}
 
 	ret = pthread_create((pthread_t *)&thread_id->opaque_id, attrp,
-		thread_func_wrapper, ctx);
+		thread_start_wrapper, &ctx);
 	if (ret != 0) {
 		RTE_LOG(DEBUG, EAL, "pthread_create failed\n");
 		goto cleanup;
 	}
 
-	if (thread_attr != NULL && CPU_COUNT(&thread_attr->cpuset) > 0) {
-		ret = rte_thread_set_affinity_by_id(*thread_id,
-			&thread_attr->cpuset);
-		if (ret != 0) {
-			RTE_LOG(DEBUG, EAL, "rte_thread_set_affinity_by_id failed\n");
-			goto cleanup;
-		}
-	}
+	pthread_mutex_lock(&ctx.wrapper_mutex);
+	while (ctx.wrapper_done != 1)
+		pthread_cond_wait(&ctx.wrapper_cond, &ctx.wrapper_mutex);
+	ret = ctx.wrapper_ret;
+	pthread_mutex_unlock(&ctx.wrapper_mutex);
+
+	if (ret != 0)
+		pthread_join((pthread_t)thread_id->opaque_id, NULL);
 
-	ctx = NULL;
 cleanup:
-	free(ctx);
 	if (attrp != NULL)
 		pthread_attr_destroy(&attr);