[v2,2/6] eal: add thread lifetime management

Message ID 1655250438-18044-3-git-send-email-roretzla@linux.microsoft.com (mailing list archive)
State Superseded, archived
Delegated to: David Marchand
Headers
Series add thread lifetime and attributes API |

Checks

Context Check Description
ci/checkpatch warning coding style issues

Commit Message

Tyler Retzlaff June 14, 2022, 11:47 p.m. UTC
  The *rte_thread_create()* function can optionally receive an
rte_thread_attr_t object that will cause the thread to be created with
the affinity and priority described by the attributes object. If
no rte_thread_attr_t is passed (parameter is NULL), the default
affinity and priority are used.

On Windows, the function executed by a thread when the thread starts is
represeneted by a function pointer of type DWORD (*func) (void*).
On other platforms, the function pointer is a void* (*func) (void*).

Performing a cast between these two types of function pointers to
uniformize the API on all platforms may result in undefined behavior.
TO fix this issue, a wrapper that respects the signature required by
CreateThread() has been created on Windows.

Signed-off-by: Narcisa Vasile <navasile@microsoft.com>
Signed-off-by: Tyler Retzlaff <roretzla@linux.microsoft.com>
---
 lib/eal/include/rte_thread.h    |  65 ++++++++++++
 lib/eal/unix/rte_thread.c       | 108 ++++++++++++++++++++
 lib/eal/version.map             |   3 +
 lib/eal/windows/include/sched.h |   2 +-
 lib/eal/windows/rte_thread.c    | 217 ++++++++++++++++++++++++++++++++--------
 5 files changed, 354 insertions(+), 41 deletions(-)
  

Comments

Dmitry Kozlyuk June 18, 2022, 12:59 p.m. UTC | #1
2022-06-14 16:47 (UTC-0700), Tyler Retzlaff:
> On Windows, the function executed by a thread when the thread starts is
> represeneted by a function pointer of type DWORD (*func) (void*).
> On other platforms, the function pointer is a void* (*func) (void*).
> 
> Performing a cast between these two types of function pointers to
> uniformize the API on all platforms may result in undefined behavior.
> TO fix this issue, a wrapper that respects the signature required by
> CreateThread() has been created on Windows.

The interface issue is still there:
`rte_thread_func` allows the thread routine to have a pointer-sized result.
`rte_thread_join()` allows to obtain this value as `unsigned long`,
which is pointer-sized on 32-bit platforms
and less than that on 64-bit platforms.
This can lead to issues when developers assume they can return a pointer
and this works on 32 bits, but doesn't work on 64 bits.
If you want to keep API as close to phtread as possible,
the limitation must be at least clearly documented in Doxygen
(`rte_thread_func` is undocumented BTW).
I also suggest using `uint32_t` instead of `unsigned long`
precisely to avoid "is it pointer-sized?" doubts.
(I don't see much benefit in keeping pthread-like signature.
When moving from pthread to rte_thread,
it is as trivial to change the thread routine return type.)

> +int
> +rte_thread_create(rte_thread_t *thread_id,
> +		const rte_thread_attr_t *thread_attr,
> +		rte_thread_func thread_func, void *args)
> +{
> [...]
> +		if (thread_attr->priority ==
> +				RTE_THREAD_PRIORITY_REALTIME_CRITICAL) {
> +			ret = ENOTSUP;
> +			goto cleanup;
> +		}
> +		ret = thread_map_priority_to_os_value(thread_attr->priority,
> +				&param.sched_priority, &policy);
> +		if (ret != 0)
> +			goto cleanup;

thread_map_priority_to_os_value() already checks for unsupported values,
why not let it do this particular check?

> +int
> +rte_thread_join(rte_thread_t thread_id, unsigned long *value_ptr)
> +{
> +	int ret = 0;
> +	void *res = NULL;
> +	void **pres = NULL;
> +
> +	if (value_ptr != NULL)
> +		pres = &res;
> +
> +	ret = pthread_join((pthread_t)thread_id.opaque_id, pres);
> +	if (ret != 0) {
> +		RTE_LOG(DEBUG, EAL, "pthread_join failed\n");
> +		return ret;
> +	}
> +
> +	if (value_ptr != NULL && *pres != NULL)
> +		*value_ptr = *(unsigned long *)(*pres);
> +
> +	return 0;
> +}

What makes *pres == NULL special?

> +static DWORD
> +thread_func_wrapper(void *args)
> +{
> +	struct thread_routine_ctx *pctx = args;
> +	struct thread_routine_ctx ctx;
> +
> +	ctx.thread_func = pctx->thread_func;
> +	ctx.routine_args = pctx->routine_args;

ctx = *pctx?

> +
> +	free(pctx);
> +
> +	return (DWORD)(uintptr_t)ctx.thread_func(ctx.routine_args);
> +}
> +
> +int
> +rte_thread_create(rte_thread_t *thread_id,
> +		  const rte_thread_attr_t *thread_attr,
> +		  rte_thread_func thread_func, void *args)
> +{
> +	int ret = 0;
> +	DWORD tid;
> +	HANDLE thread_handle = NULL;
> +	GROUP_AFFINITY thread_affinity;
> +	struct thread_routine_ctx *ctx = NULL;
> +
> +	ctx = calloc(1, sizeof(*ctx));
> +	if (ctx == NULL) {
> +		RTE_LOG(DEBUG, EAL, "Insufficient memory for thread context allocations\n");
> +		ret = ENOMEM;
> +		goto cleanup;
> +	}
> +	ctx->routine_args = args;
> +	ctx->thread_func = thread_func;
> +
> +	thread_handle = CreateThread(NULL, 0, thread_func_wrapper, ctx,
> +		CREATE_SUSPENDED, &tid);
> +	if (thread_handle == NULL) {
> +		ret = thread_log_last_error("CreateThread()");
> +		free(ctx);
> +		goto cleanup;

Missing `free(ctx)` from other error paths below.

> +	}
> +	thread_id->opaque_id = tid;
> +
> +	if (thread_attr != NULL) {
> +		if (CPU_COUNT(&thread_attr->cpuset) > 0) {
> +			ret = convert_cpuset_to_affinity(
> +							&thread_attr->cpuset,
> +							&thread_affinity
> +							);
> +			if (ret != 0) {
> +				RTE_LOG(DEBUG, EAL, "Unable to convert cpuset to thread affinity\n");
> +				goto cleanup;
> +			}
> +
> +			if (!SetThreadGroupAffinity(thread_handle,
> +						    &thread_affinity, NULL)) {
> +				ret = thread_log_last_error("SetThreadGroupAffinity()");
> +				goto cleanup;
> +			}
> +		}
> +		ret = rte_thread_set_priority(*thread_id,
> +				thread_attr->priority);
> +		if (ret != 0) {
> +			RTE_LOG(DEBUG, EAL, "Unable to set thread priority\n");
> +			goto cleanup;
> +		}
> +	}
> +
> +	if (ResumeThread(thread_handle) == (DWORD)-1) {
> +		ret = thread_log_last_error("ResumeThread()");
> +		goto cleanup;
> +	}
> +
> +cleanup:
> +	if (thread_handle != NULL) {
> +		CloseHandle(thread_handle);
> +		thread_handle = NULL;
> +	}
> +
> +	return ret;
> +}
  
Tyler Retzlaff June 20, 2022, 5:39 p.m. UTC | #2
On Sat, Jun 18, 2022 at 03:59:08PM +0300, Dmitry Kozlyuk wrote:
> 2022-06-14 16:47 (UTC-0700), Tyler Retzlaff:
> > On Windows, the function executed by a thread when the thread starts is
> > represeneted by a function pointer of type DWORD (*func) (void*).
> > On other platforms, the function pointer is a void* (*func) (void*).
> > 
> > Performing a cast between these two types of function pointers to
> > uniformize the API on all platforms may result in undefined behavior.
> > TO fix this issue, a wrapper that respects the signature required by
> > CreateThread() has been created on Windows.
> 
> The interface issue is still there:
> `rte_thread_func` allows the thread routine to have a pointer-sized result.
> `rte_thread_join()` allows to obtain this value as `unsigned long`,
> which is pointer-sized on 32-bit platforms
> and less than that on 64-bit platforms.
> This can lead to issues when developers assume they can return a pointer
> and this works on 32 bits, but doesn't work on 64 bits.
> If you want to keep API as close to phtread as possible,
> the limitation must be at least clearly documented in Doxygen
> (`rte_thread_func` is undocumented BTW).
> I also suggest using `uint32_t` instead of `unsigned long`
> precisely to avoid "is it pointer-sized?" doubts.
> (I don't see much benefit in keeping pthread-like signature.
> When moving from pthread to rte_thread,
> it is as trivial to change the thread routine return type.)

thanks Dmitry, i wasn't aware this feedback had been given previously.
let me digest the problem and we'll figure out how we can address it.

> 
> > +int
> > +rte_thread_create(rte_thread_t *thread_id,
> > +		const rte_thread_attr_t *thread_attr,
> > +		rte_thread_func thread_func, void *args)
> > +{
> > [...]
> > +		if (thread_attr->priority ==
> > +				RTE_THREAD_PRIORITY_REALTIME_CRITICAL) {
> > +			ret = ENOTSUP;
> > +			goto cleanup;
> > +		}
> > +		ret = thread_map_priority_to_os_value(thread_attr->priority,
> > +				&param.sched_priority, &policy);
> > +		if (ret != 0)
> > +			goto cleanup;
> 
> thread_map_priority_to_os_value() already checks for unsupported values,
> why not let it do this particular check?

hm, i hadn't noticed that was duplicated. let me try to eliminate it.

> 
> > +int
> > +rte_thread_join(rte_thread_t thread_id, unsigned long *value_ptr)
> > +{
> > +	int ret = 0;
> > +	void *res = NULL;
> > +	void **pres = NULL;
> > +
> > +	if (value_ptr != NULL)
> > +		pres = &res;
> > +
> > +	ret = pthread_join((pthread_t)thread_id.opaque_id, pres);
> > +	if (ret != 0) {
> > +		RTE_LOG(DEBUG, EAL, "pthread_join failed\n");
> > +		return ret;
> > +	}
> > +
> > +	if (value_ptr != NULL && *pres != NULL)
> > +		*value_ptr = *(unsigned long *)(*pres);
> > +
> > +	return 0;
> > +}
> 
> What makes *pres == NULL special?
> 
> > +static DWORD
> > +thread_func_wrapper(void *args)
> > +{
> > +	struct thread_routine_ctx *pctx = args;
> > +	struct thread_routine_ctx ctx;
> > +
> > +	ctx.thread_func = pctx->thread_func;
> > +	ctx.routine_args = pctx->routine_args;
> 
> ctx = *pctx?

ack

> 
> > +
> > +	free(pctx);
> > +
> > +	return (DWORD)(uintptr_t)ctx.thread_func(ctx.routine_args);
> > +}
> > +
> > +int
> > +rte_thread_create(rte_thread_t *thread_id,
> > +		  const rte_thread_attr_t *thread_attr,
> > +		  rte_thread_func thread_func, void *args)
> > +{
> > +	int ret = 0;
> > +	DWORD tid;
> > +	HANDLE thread_handle = NULL;
> > +	GROUP_AFFINITY thread_affinity;
> > +	struct thread_routine_ctx *ctx = NULL;
> > +
> > +	ctx = calloc(1, sizeof(*ctx));
> > +	if (ctx == NULL) {
> > +		RTE_LOG(DEBUG, EAL, "Insufficient memory for thread context allocations\n");
> > +		ret = ENOMEM;
> > +		goto cleanup;
> > +	}
> > +	ctx->routine_args = args;
> > +	ctx->thread_func = thread_func;
> > +
> > +	thread_handle = CreateThread(NULL, 0, thread_func_wrapper, ctx,
> > +		CREATE_SUSPENDED, &tid);
> > +	if (thread_handle == NULL) {
> > +		ret = thread_log_last_error("CreateThread()");
> > +		free(ctx);
> > +		goto cleanup;
> 
> Missing `free(ctx)` from other error paths below.

ack

> 
> > +	}
> > +	thread_id->opaque_id = tid;
> > +
> > +	if (thread_attr != NULL) {
> > +		if (CPU_COUNT(&thread_attr->cpuset) > 0) {
> > +			ret = convert_cpuset_to_affinity(
> > +							&thread_attr->cpuset,
> > +							&thread_affinity
> > +							);
> > +			if (ret != 0) {
> > +				RTE_LOG(DEBUG, EAL, "Unable to convert cpuset to thread affinity\n");
> > +				goto cleanup;
> > +			}
> > +
> > +			if (!SetThreadGroupAffinity(thread_handle,
> > +						    &thread_affinity, NULL)) {
> > +				ret = thread_log_last_error("SetThreadGroupAffinity()");
> > +				goto cleanup;
> > +			}
> > +		}
> > +		ret = rte_thread_set_priority(*thread_id,
> > +				thread_attr->priority);
> > +		if (ret != 0) {
> > +			RTE_LOG(DEBUG, EAL, "Unable to set thread priority\n");
> > +			goto cleanup;
> > +		}
> > +	}
> > +
> > +	if (ResumeThread(thread_handle) == (DWORD)-1) {
> > +		ret = thread_log_last_error("ResumeThread()");
> > +		goto cleanup;
> > +	}
> > +
> > +cleanup:
> > +	if (thread_handle != NULL) {
> > +		CloseHandle(thread_handle);
> > +		thread_handle = NULL;
> > +	}
> > +
> > +	return ret;
> > +}

thanks let me tidy those bits up, i'll submit a revision this week.
  
Tyler Retzlaff June 21, 2022, 4:24 p.m. UTC | #3
On Sat, Jun 18, 2022 at 03:59:08PM +0300, Dmitry Kozlyuk wrote:
> 2022-06-14 16:47 (UTC-0700), Tyler Retzlaff:
> 
> > +int
> > +rte_thread_create(rte_thread_t *thread_id,
> > +		const rte_thread_attr_t *thread_attr,
> > +		rte_thread_func thread_func, void *args)
> > +{
> > [...]
> > +		if (thread_attr->priority ==
> > +				RTE_THREAD_PRIORITY_REALTIME_CRITICAL) {
> > +			ret = ENOTSUP;
> > +			goto cleanup;
> > +		}
> > +		ret = thread_map_priority_to_os_value(thread_attr->priority,
> > +				&param.sched_priority, &policy);
> > +		if (ret != 0)
> > +			goto cleanup;
> 
> thread_map_priority_to_os_value() already checks for unsupported values,
> why not let it do this particular check?

okay, so i looked at this more closely.

* thread_map_priority_to_os_value() just does mapping and
  RTE_THREAD_PRIORITY_REALTIME_CRITICAL is a valid mapping so it does not
  fail. by design it does one thing and one thing only perform the value
  mapping. admittedly it does not map every valid value right now, maybe it
  should?

  for consistency in the behavior of the function i'd suggest we don't
  make it have special cases in case we later decide to expand for
  additional valid mappings.

* rte_thread_set_priority() does fail with ENOTSUP on non-windows if
  provided RTE_THREAD_PRIORITY_REALTIME_CRITICAL however it cannot be
  used at this point since the thread has not been created.

if no further follow up on this i'm going to leave it as is.

thanks
  
Tyler Retzlaff June 21, 2022, 6:51 p.m. UTC | #4
On Sat, Jun 18, 2022 at 03:59:08PM +0300, Dmitry Kozlyuk wrote:
> 2022-06-14 16:47 (UTC-0700), Tyler Retzlaff:
> > On Windows, the function executed by a thread when the thread starts is
> > represeneted by a function pointer of type DWORD (*func) (void*).
> > On other platforms, the function pointer is a void* (*func) (void*).
> > 
> > Performing a cast between these two types of function pointers to
> > uniformize the API on all platforms may result in undefined behavior.
> > TO fix this issue, a wrapper that respects the signature required by
> > CreateThread() has been created on Windows.
> 
> The interface issue is still there:
> `rte_thread_func` allows the thread routine to have a pointer-sized result.
> `rte_thread_join()` allows to obtain this value as `unsigned long`,
> which is pointer-sized on 32-bit platforms
> and less than that on 64-bit platforms.
> This can lead to issues when developers assume they can return a pointer
> and this works on 32 bits, but doesn't work on 64 bits.
> If you want to keep API as close to phtread as possible,
> the limitation must be at least clearly documented in Doxygen
> (`rte_thread_func` is undocumented BTW).
> I also suggest using `uint32_t` instead of `unsigned long`
> precisely to avoid "is it pointer-sized?" doubts.
> (I don't see much benefit in keeping pthread-like signature.
> When moving from pthread to rte_thread,
> it is as trivial to change the thread routine return type.)

i'll alter the rte api to use fixed width uint32_t for thread result. it
seems like an unnecessary feature to return a pointer and it can be
accomplished through void *arg if necessary.

> 
> > +int
> > +rte_thread_join(rte_thread_t thread_id, unsigned long *value_ptr)
> > +{
> > +	int ret = 0;
> > +	void *res = NULL;
> > +	void **pres = NULL;
> > +
> > +	if (value_ptr != NULL)
> > +		pres = &res;
> > +
> > +	ret = pthread_join((pthread_t)thread_id.opaque_id, pres);
> > +	if (ret != 0) {
> > +		RTE_LOG(DEBUG, EAL, "pthread_join failed\n");
> > +		return ret;
> > +	}
> > +
> > +	if (value_ptr != NULL && *pres != NULL)
> > +		*value_ptr = *(unsigned long *)(*pres);
> > +
> > +	return 0;
> > +}
> 
> What makes *pres == NULL special?

it's not clear what you mean, can you explain? maybe there is some
context i am missing from the original patch series?

> > +int
> > +rte_thread_create(rte_thread_t *thread_id,
> > +		  const rte_thread_attr_t *thread_attr,
> > +		  rte_thread_func thread_func, void *args)
> > +{
> > +	int ret = 0;
> > +	DWORD tid;
> > +	HANDLE thread_handle = NULL;
> > +	GROUP_AFFINITY thread_affinity;
> > +	struct thread_routine_ctx *ctx = NULL;
> > +
> > +	ctx = calloc(1, sizeof(*ctx));
> > +	if (ctx == NULL) {
> > +		RTE_LOG(DEBUG, EAL, "Insufficient memory for thread context allocations\n");
> > +		ret = ENOMEM;
> > +		goto cleanup;
> > +	}
> > +	ctx->routine_args = args;
> > +	ctx->thread_func = thread_func;
> > +
> > +	thread_handle = CreateThread(NULL, 0, thread_func_wrapper, ctx,
> > +		CREATE_SUSPENDED, &tid);
> > +	if (thread_handle == NULL) {
> > +		ret = thread_log_last_error("CreateThread()");
> > +		free(ctx);
> > +		goto cleanup;
> 
> Missing `free(ctx)` from other error paths below.

beyond this point free(ctx) will happen in thread_func_wrapper. i will
add a comment to make it clear.

thanks.
  
Dmitry Kozlyuk June 21, 2022, 7:44 p.m. UTC | #5
2022-06-21 11:51 (UTC-0700), Tyler Retzlaff:
> > > +int
> > > +rte_thread_join(rte_thread_t thread_id, unsigned long *value_ptr)
> > > +{
> > > +	int ret = 0;
> > > +	void *res = NULL;
> > > +	void **pres = NULL;
> > > +
> > > +	if (value_ptr != NULL)
> > > +		pres = &res;
> > > +
> > > +	ret = pthread_join((pthread_t)thread_id.opaque_id, pres);
> > > +	if (ret != 0) {
> > > +		RTE_LOG(DEBUG, EAL, "pthread_join failed\n");
> > > +		return ret;
> > > +	}
> > > +
> > > +	if (value_ptr != NULL && *pres != NULL)
> > > +		*value_ptr = *(unsigned long *)(*pres);
> > > +
> > > +	return 0;
> > > +}  
> > 
> > What makes *pres == NULL special?  
> 
> it's not clear what you mean, can you explain? maybe there is some
> context i am missing from the original patch series?

There's no previous context.
After ptread_join(), *pres holds the return value of the thread routine.
You only assign *value_ptr if value_ptr is not NULL (obviously correct)
and if *pres != NULL, that is, if the thread returned a non-NULL value.
But this value is opaque, why do you filter NULL?
Perhaps you meant if (pres != NULL), no dereference?

> > > +int
> > > +rte_thread_create(rte_thread_t *thread_id,
> > > +		  const rte_thread_attr_t *thread_attr,
> > > +		  rte_thread_func thread_func, void *args)
> > > +{
> > > +	int ret = 0;
> > > +	DWORD tid;
> > > +	HANDLE thread_handle = NULL;
> > > +	GROUP_AFFINITY thread_affinity;
> > > +	struct thread_routine_ctx *ctx = NULL;
> > > +
> > > +	ctx = calloc(1, sizeof(*ctx));
> > > +	if (ctx == NULL) {
> > > +		RTE_LOG(DEBUG, EAL, "Insufficient memory for thread context allocations\n");
> > > +		ret = ENOMEM;
> > > +		goto cleanup;
> > > +	}
> > > +	ctx->routine_args = args;
> > > +	ctx->thread_func = thread_func;
> > > +
> > > +	thread_handle = CreateThread(NULL, 0, thread_func_wrapper, ctx,
> > > +		CREATE_SUSPENDED, &tid);
> > > +	if (thread_handle == NULL) {
> > > +		ret = thread_log_last_error("CreateThread()");
> > > +		free(ctx);
> > > +		goto cleanup;  
> > 
> > Missing `free(ctx)` from other error paths below.  
> 
> beyond this point free(ctx) will happen in thread_func_wrapper. i will
> add a comment to make it clear.

Not if you exit before ResumeThread()
and thread_func_wrapper() will never execute to call free().
  
Tyler Retzlaff June 21, 2022, 9:28 p.m. UTC | #6
On Tue, Jun 21, 2022 at 10:44:21PM +0300, Dmitry Kozlyuk wrote:
> 2022-06-21 11:51 (UTC-0700), Tyler Retzlaff:
> > > > +int
> > > > +rte_thread_join(rte_thread_t thread_id, unsigned long *value_ptr)
> > > > +{
> > > > +	int ret = 0;
> > > > +	void *res = NULL;
> > > > +	void **pres = NULL;
> > > > +
> > > > +	if (value_ptr != NULL)
> > > > +		pres = &res;
> > > > +
> > > > +	ret = pthread_join((pthread_t)thread_id.opaque_id, pres);
> > > > +	if (ret != 0) {
> > > > +		RTE_LOG(DEBUG, EAL, "pthread_join failed\n");
> > > > +		return ret;
> > > > +	}
> > > > +
> > > > +	if (value_ptr != NULL && *pres != NULL)
> > > > +		*value_ptr = *(unsigned long *)(*pres);
> > > > +
> > > > +	return 0;
> > > > +}  
> > > 
> > > What makes *pres == NULL special?  
> > 
> > it's not clear what you mean, can you explain? maybe there is some
> > context i am missing from the original patch series?
> 
> There's no previous context.
> After ptread_join(), *pres holds the return value of the thread routine.
> You only assign *value_ptr if value_ptr is not NULL (obviously correct)
> and if *pres != NULL, that is, if the thread returned a non-NULL value.
> But this value is opaque, why do you filter NULL?

i don't think it is opaque here? unsigned long * value_ptr says we have
to store an integer. which leads to a discussion of what should get
stored at the value_ptr location if pthread_join() itself returns no
result but the caller of rte_thread_join() requests the result.

> Perhaps you meant if (pres != NULL), no dereference?

that i think is just a repeat of a test checking if the caller of
rte_thread_join is interested in the result?
i.e. value_ptr != NULL -> pres != NULL

both pres and *pres are dereferenced so it seems to track that prior to
those dereferences they have to be validated as being non-NULL.
i don't see how we could avoid dereferencing **pres to satisfy the
calling contract when the result is requested.

now if value_ptr was unsigned long ** i guess i'd understand. i could
always be reading the code wrong. but thinking about further there is
another problem with this in that we really don't know what is being
aliased in *pres when using the pthread implementation, since pthread
could be returning a pointer to something narrow or with unknown layout
where later dereferencing it as something wider or in this case
specifically as unsigned long * would have horrible consequences.

i think this ends up semi-related to your other comment about what the
result type from rte_thread_func is, we can discuss offline and post
details back to the list.

> 
> > > > +int
> > > > +rte_thread_create(rte_thread_t *thread_id,
> > > > +		  const rte_thread_attr_t *thread_attr,
> > > > +		  rte_thread_func thread_func, void *args)
> > > > +{
> > > > +	int ret = 0;
> > > > +	DWORD tid;
> > > > +	HANDLE thread_handle = NULL;
> > > > +	GROUP_AFFINITY thread_affinity;
> > > > +	struct thread_routine_ctx *ctx = NULL;
> > > > +
> > > > +	ctx = calloc(1, sizeof(*ctx));
> > > > +	if (ctx == NULL) {
> > > > +		RTE_LOG(DEBUG, EAL, "Insufficient memory for thread context allocations\n");
> > > > +		ret = ENOMEM;
> > > > +		goto cleanup;
> > > > +	}
> > > > +	ctx->routine_args = args;
> > > > +	ctx->thread_func = thread_func;
> > > > +
> > > > +	thread_handle = CreateThread(NULL, 0, thread_func_wrapper, ctx,
> > > > +		CREATE_SUSPENDED, &tid);
> > > > +	if (thread_handle == NULL) {
> > > > +		ret = thread_log_last_error("CreateThread()");
> > > > +		free(ctx);
> > > > +		goto cleanup;  
> > > 
> > > Missing `free(ctx)` from other error paths below.  
> > 
> > beyond this point free(ctx) will happen in thread_func_wrapper. i will
> > add a comment to make it clear.
> 
> Not if you exit before ResumeThread()
> and thread_func_wrapper() will never execute to call free().

yes, you are right i forgot that this thread is created suspended.
  
Dmitry Kozlyuk June 21, 2022, 10:24 p.m. UTC | #7
2022-06-21 14:28 (UTC-0700), Tyler Retzlaff:
> On Tue, Jun 21, 2022 at 10:44:21PM +0300, Dmitry Kozlyuk wrote:
> > 2022-06-21 11:51 (UTC-0700), Tyler Retzlaff:  
> > > > > +int
> > > > > +rte_thread_join(rte_thread_t thread_id, unsigned long *value_ptr)
> > > > > +{
> > > > > +	int ret = 0;
> > > > > +	void *res = NULL;
> > > > > +	void **pres = NULL;
> > > > > +
> > > > > +	if (value_ptr != NULL)
> > > > > +		pres = &res;
> > > > > +
> > > > > +	ret = pthread_join((pthread_t)thread_id.opaque_id, pres);
> > > > > +	if (ret != 0) {
> > > > > +		RTE_LOG(DEBUG, EAL, "pthread_join failed\n");
> > > > > +		return ret;
> > > > > +	}
> > > > > +
> > > > > +	if (value_ptr != NULL && *pres != NULL)
> > > > > +		*value_ptr = *(unsigned long *)(*pres);
> > > > > +
> > > > > +	return 0;
> > > > > +}    
> > > > 
> > > > What makes *pres == NULL special?    
> > > 
> > > it's not clear what you mean, can you explain? maybe there is some
> > > context i am missing from the original patch series?  
> > 
> > There's no previous context.
> > After ptread_join(), *pres holds the return value of the thread routine.
> > You only assign *value_ptr if value_ptr is not NULL (obviously correct)
> > and if *pres != NULL, that is, if the thread returned a non-NULL value.
> > But this value is opaque, why do you filter NULL?  
> 
> i don't think it is opaque here? unsigned long * value_ptr says we have
> to store an integer. which leads to a discussion of what should get
> stored at the value_ptr location if pthread_join() itself returns no
> result but the caller of rte_thread_join() requests the result.

There is no question. If `pthread_join()` fails, the function exits early
and `*value_ptr` remains unmodified. If `pthread_join()` succeeds
with a non-NULL second argument (`pres`), `*pres` aka `res` is always filled.
NULL can be placed there too if that's what the thread routine has returned.

> 
> > Perhaps you meant if (pres != NULL), no dereference?  
> 
> that i think is just a repeat of a test checking if the caller of
> rte_thread_join is interested in the result?
> i.e. value_ptr != NULL -> pres != NULL
> 
> both pres and *pres are dereferenced so it seems to track that prior to
> those dereferences they have to be validated as being non-NULL.
> i don't see how we could avoid dereferencing **pres to satisfy the
> calling contract when the result is requested.
> 
> now if value_ptr was unsigned long ** i guess i'd understand. i could
> always be reading the code wrong. but thinking about further there is
> another problem with this in that we really don't know what is being
> aliased in *pres when using the pthread implementation, since pthread
> could be returning a pointer to something narrow or with unknown layout
> where later dereferencing it as something wider or in this case
> specifically as unsigned long * would have horrible consequences.

Sorry, no, it's just all wrong.
We get a pointer from `pthread_join()` and store it in `res`,
which can also be accessed as `*pres`, because `pres == &res`.
Then we store the lower bits of `res` in an `*value_ptr`.
We don't care what `res` points to because it is never dereferenced.

> 
> i think this ends up semi-related to your other comment about what the
> result type from rte_thread_func is, we can discuss offline and post
> details back to the list.
  
Tyler Retzlaff June 22, 2022, 6:21 p.m. UTC | #8
On Wed, Jun 22, 2022 at 01:24:14AM +0300, Dmitry Kozlyuk wrote:
> 2022-06-21 14:28 (UTC-0700), Tyler Retzlaff:
> > On Tue, Jun 21, 2022 at 10:44:21PM +0300, Dmitry Kozlyuk wrote:
> > > 2022-06-21 11:51 (UTC-0700), Tyler Retzlaff:  
> > > > > > +int
> > > > > > +rte_thread_join(rte_thread_t thread_id, unsigned long *value_ptr)
> > > > > > +{
> > > > > > +	int ret = 0;
> > > > > > +	void *res = NULL;
> > > > > > +	void **pres = NULL;
> > > > > > +
> > > > > > +	if (value_ptr != NULL)
> > > > > > +		pres = &res;
> > > > > > +
> > > > > > +	ret = pthread_join((pthread_t)thread_id.opaque_id, pres);
> > > > > > +	if (ret != 0) {
> > > > > > +		RTE_LOG(DEBUG, EAL, "pthread_join failed\n");
> > > > > > +		return ret;
> > > > > > +	}
> > > > > > +
> > > > > > +	if (value_ptr != NULL && *pres != NULL)
> > > > > > +		*value_ptr = *(unsigned long *)(*pres);
> > > > > > +
> > > > > > +	return 0;
> > > > > > +}    
> > > > > 
> > > > > What makes *pres == NULL special?    
> > > > 
> > > > it's not clear what you mean, can you explain? maybe there is some
> > > > context i am missing from the original patch series?  
> > > 
> > > There's no previous context.
> > > After ptread_join(), *pres holds the return value of the thread routine.
> > > You only assign *value_ptr if value_ptr is not NULL (obviously correct)
> > > and if *pres != NULL, that is, if the thread returned a non-NULL value.
> > > But this value is opaque, why do you filter NULL?  
> > 
> > i don't think it is opaque here? unsigned long * value_ptr says we have
> > to store an integer. which leads to a discussion of what should get
> > stored at the value_ptr location if pthread_join() itself returns no
> > result but the caller of rte_thread_join() requests the result.
> 
> There is no question. If `pthread_join()` fails, the function exits early
> and `*value_ptr` remains unmodified. If `pthread_join()` succeeds
> with a non-NULL second argument (`pres`), `*pres` aka `res` is always filled.
> NULL can be placed there too if that's what the thread routine has returned.

okay, discussed offline it was just my misunderstanding of impact on the
conditional block in the presence of the NULL check.

as discussed we'll rewrite the check as.

if (value_ptr != NULL)
    *value_ptr = (uint32_t)(uintptr_t)res;

this doesn't double dereference pres as the original code was and
therefore the *pres NULL check is unnecessary.

thanks!
  

Patch

diff --git a/lib/eal/include/rte_thread.h b/lib/eal/include/rte_thread.h
index 062308d..c27e580 100644
--- a/lib/eal/include/rte_thread.h
+++ b/lib/eal/include/rte_thread.h
@@ -30,6 +30,8 @@ 
 	uintptr_t opaque_id; /**< thread identifier */
 } rte_thread_t;
 
+typedef void * (*rte_thread_func) (void *);
+
 /**
  * Thread priority values.
  */
@@ -61,6 +63,69 @@  enum rte_thread_priority {
  * @warning
  * @b EXPERIMENTAL: this API may change without prior notice.
  *
+ * Create a new thread that will invoke the 'thread_func' routine.
+ *
+ * @param thread_id
+ *    A pointer that will store the id of the newly created thread.
+ *
+ * @param thread_attr
+ *    Attributes that are used at the creation of the new thread.
+ *
+ * @param thread_func
+ *    The routine that the new thread will invoke when starting execution.
+ *
+ * @param arg
+ *    Argument to be passed to the 'thread_func' routine.
+ *
+ * @return
+ *   On success, return 0.
+ *   On failure, return a positive errno-style error number.
+ */
+__rte_experimental
+int rte_thread_create(rte_thread_t *thread_id,
+		const rte_thread_attr_t *thread_attr,
+		rte_thread_func thread_func, void *arg);
+
+/**
+ * @warning
+ * @b EXPERIMENTAL: this API may change without prior notice.
+ *
+ * Waits for the thread identified by 'thread_id' to terminate
+ *
+ * @param thread_id
+ *    The identifier of the thread.
+ *
+ * @param value_ptr
+ *    Stores the exit status of the thread.
+ *
+ * @return
+ *   On success, return 0.
+ *   On failure, return a positive errno-style error number.
+ */
+__rte_experimental
+int rte_thread_join(rte_thread_t thread_id, unsigned long *value_ptr);
+
+/**
+ * @warning
+ * @b EXPERIMENTAL: this API may change without prior notice.
+ *
+ * Indicate that the return value of the thread is not needed and
+ * all thread resources should be release when the thread terminates.
+ *
+ * @param thread_id
+ *    The id of the thread to be detached.
+ *
+ * @return
+ *   On success, return 0.
+ *   On failure, return a positive errno-style error number.
+ */
+__rte_experimental
+int rte_thread_detach(rte_thread_t thread_id);
+
+/**
+ * @warning
+ * @b EXPERIMENTAL: this API may change without prior notice.
+ *
  * Get the id of the calling thread.
  *
  * @return
diff --git a/lib/eal/unix/rte_thread.c b/lib/eal/unix/rte_thread.c
index 9126595..19c7b80 100644
--- a/lib/eal/unix/rte_thread.c
+++ b/lib/eal/unix/rte_thread.c
@@ -75,6 +75,114 @@  struct eal_tls_key {
 	return 0;
 }
 
+int
+rte_thread_create(rte_thread_t *thread_id,
+		const rte_thread_attr_t *thread_attr,
+		rte_thread_func thread_func, void *args)
+{
+	int ret = 0;
+	pthread_attr_t attr;
+	pthread_attr_t *attrp = NULL;
+	struct sched_param param = {
+		.sched_priority = 0,
+	};
+	int policy = SCHED_OTHER;
+
+	if (thread_attr != NULL) {
+		ret = pthread_attr_init(&attr);
+		if (ret != 0) {
+			RTE_LOG(DEBUG, EAL, "pthread_attr_init failed\n");
+			goto cleanup;
+		}
+
+		attrp = &attr;
+
+		/*
+		 * Set the inherit scheduler parameter to explicit,
+		 * otherwise the priority attribute is ignored.
+		 */
+		ret = pthread_attr_setinheritsched(attrp,
+				PTHREAD_EXPLICIT_SCHED);
+		if (ret != 0) {
+			RTE_LOG(DEBUG, EAL, "pthread_attr_setinheritsched failed\n");
+			goto cleanup;
+		}
+
+
+		if (thread_attr->priority ==
+				RTE_THREAD_PRIORITY_REALTIME_CRITICAL) {
+			ret = ENOTSUP;
+			goto cleanup;
+		}
+		ret = thread_map_priority_to_os_value(thread_attr->priority,
+				&param.sched_priority, &policy);
+		if (ret != 0)
+			goto cleanup;
+
+		ret = pthread_attr_setschedpolicy(attrp, policy);
+		if (ret != 0) {
+			RTE_LOG(DEBUG, EAL, "pthread_attr_setschedpolicy failed\n");
+			goto cleanup;
+		}
+
+		ret = pthread_attr_setschedparam(attrp, &param);
+		if (ret != 0) {
+			RTE_LOG(DEBUG, EAL, "pthread_attr_setschedparam failed\n");
+			goto cleanup;
+		}
+	}
+
+	ret = pthread_create((pthread_t *)&thread_id->opaque_id, attrp,
+		thread_func, args);
+	if (ret != 0) {
+		RTE_LOG(DEBUG, EAL, "pthread_create failed\n");
+		goto cleanup;
+	}
+
+	if (thread_attr != NULL && CPU_COUNT(&thread_attr->cpuset) > 0) {
+		ret = rte_thread_set_affinity_by_id(*thread_id,
+			&thread_attr->cpuset);
+		if (ret != 0) {
+			RTE_LOG(DEBUG, EAL, "rte_thread_set_affinity_by_id failed\n");
+			goto cleanup;
+		}
+	}
+
+cleanup:
+	if (attrp != NULL)
+		pthread_attr_destroy(&attr);
+
+	return ret;
+}
+
+int
+rte_thread_join(rte_thread_t thread_id, unsigned long *value_ptr)
+{
+	int ret = 0;
+	void *res = NULL;
+	void **pres = NULL;
+
+	if (value_ptr != NULL)
+		pres = &res;
+
+	ret = pthread_join((pthread_t)thread_id.opaque_id, pres);
+	if (ret != 0) {
+		RTE_LOG(DEBUG, EAL, "pthread_join failed\n");
+		return ret;
+	}
+
+	if (value_ptr != NULL && *pres != NULL)
+		*value_ptr = *(unsigned long *)(*pres);
+
+	return 0;
+}
+
+int
+rte_thread_detach(rte_thread_t thread_id)
+{
+	return pthread_detach((pthread_t)thread_id.opaque_id);
+}
+
 rte_thread_t
 rte_thread_self(void)
 {
diff --git a/lib/eal/version.map b/lib/eal/version.map
index 72e0e14..22e5c85 100644
--- a/lib/eal/version.map
+++ b/lib/eal/version.map
@@ -426,8 +426,11 @@  EXPERIMENTAL {
 	rte_thread_attr_init;
 	rte_thread_attr_set_affinity;
 	rte_thread_attr_set_priority;
+	rte_thread_create;
+	rte_thread_detach;
 	rte_thread_get_affinity_by_id;
 	rte_thread_get_priority;
+	rte_thread_join;
 	rte_thread_self;
 	rte_thread_set_affinity_by_id;
 	rte_thread_set_priority;
diff --git a/lib/eal/windows/include/sched.h b/lib/eal/windows/include/sched.h
index bc31cc8..912fed1 100644
--- a/lib/eal/windows/include/sched.h
+++ b/lib/eal/windows/include/sched.h
@@ -44,7 +44,7 @@ 
 	(1LL << _WHICH_BIT(b))) != 0LL)
 
 static inline int
-count_cpu(rte_cpuset_t *s)
+count_cpu(const rte_cpuset_t *s)
 {
 	unsigned int _i;
 	int count = 0;
diff --git a/lib/eal/windows/rte_thread.c b/lib/eal/windows/rte_thread.c
index 0771525..b5f2b04 100644
--- a/lib/eal/windows/rte_thread.c
+++ b/lib/eal/windows/rte_thread.c
@@ -13,6 +13,11 @@  struct eal_tls_key {
 	DWORD thread_index;
 };
 
+struct thread_routine_ctx {
+	rte_thread_func thread_func;
+	void *routine_args;
+};
+
 /* Translates the most common error codes related to threads */
 static int
 thread_translate_win32_error(DWORD error)
@@ -114,6 +119,178 @@  struct eal_tls_key {
 	return 0;
 }
 
+static int
+convert_cpuset_to_affinity(const rte_cpuset_t *cpuset,
+		PGROUP_AFFINITY affinity)
+{
+	int ret = 0;
+	PGROUP_AFFINITY cpu_affinity = NULL;
+	unsigned int cpu_idx;
+
+	memset(affinity, 0, sizeof(GROUP_AFFINITY));
+	affinity->Group = (USHORT)-1;
+
+	/* Check that all cpus of the set belong to the same processor group and
+	 * accumulate thread affinity to be applied.
+	 */
+	for (cpu_idx = 0; cpu_idx < CPU_SETSIZE; cpu_idx++) {
+		if (!CPU_ISSET(cpu_idx, cpuset))
+			continue;
+
+		cpu_affinity = eal_get_cpu_affinity(cpu_idx);
+
+		if (affinity->Group == (USHORT)-1) {
+			affinity->Group = cpu_affinity->Group;
+		} else if (affinity->Group != cpu_affinity->Group) {
+			RTE_LOG(DEBUG, EAL, "All processors must belong to the same processor group\n");
+			ret = ENOTSUP;
+			goto cleanup;
+		}
+
+		affinity->Mask |= cpu_affinity->Mask;
+	}
+
+	if (affinity->Mask == 0) {
+		ret = EINVAL;
+		goto cleanup;
+	}
+
+cleanup:
+	return ret;
+}
+
+
+static DWORD
+thread_func_wrapper(void *args)
+{
+	struct thread_routine_ctx *pctx = args;
+	struct thread_routine_ctx ctx;
+
+	ctx.thread_func = pctx->thread_func;
+	ctx.routine_args = pctx->routine_args;
+
+	free(pctx);
+
+	return (DWORD)(uintptr_t)ctx.thread_func(ctx.routine_args);
+}
+
+int
+rte_thread_create(rte_thread_t *thread_id,
+		  const rte_thread_attr_t *thread_attr,
+		  rte_thread_func thread_func, void *args)
+{
+	int ret = 0;
+	DWORD tid;
+	HANDLE thread_handle = NULL;
+	GROUP_AFFINITY thread_affinity;
+	struct thread_routine_ctx *ctx = NULL;
+
+	ctx = calloc(1, sizeof(*ctx));
+	if (ctx == NULL) {
+		RTE_LOG(DEBUG, EAL, "Insufficient memory for thread context allocations\n");
+		ret = ENOMEM;
+		goto cleanup;
+	}
+	ctx->routine_args = args;
+	ctx->thread_func = thread_func;
+
+	thread_handle = CreateThread(NULL, 0, thread_func_wrapper, ctx,
+		CREATE_SUSPENDED, &tid);
+	if (thread_handle == NULL) {
+		ret = thread_log_last_error("CreateThread()");
+		free(ctx);
+		goto cleanup;
+	}
+	thread_id->opaque_id = tid;
+
+	if (thread_attr != NULL) {
+		if (CPU_COUNT(&thread_attr->cpuset) > 0) {
+			ret = convert_cpuset_to_affinity(
+							&thread_attr->cpuset,
+							&thread_affinity
+							);
+			if (ret != 0) {
+				RTE_LOG(DEBUG, EAL, "Unable to convert cpuset to thread affinity\n");
+				goto cleanup;
+			}
+
+			if (!SetThreadGroupAffinity(thread_handle,
+						    &thread_affinity, NULL)) {
+				ret = thread_log_last_error("SetThreadGroupAffinity()");
+				goto cleanup;
+			}
+		}
+		ret = rte_thread_set_priority(*thread_id,
+				thread_attr->priority);
+		if (ret != 0) {
+			RTE_LOG(DEBUG, EAL, "Unable to set thread priority\n");
+			goto cleanup;
+		}
+	}
+
+	if (ResumeThread(thread_handle) == (DWORD)-1) {
+		ret = thread_log_last_error("ResumeThread()");
+		goto cleanup;
+	}
+
+cleanup:
+	if (thread_handle != NULL) {
+		CloseHandle(thread_handle);
+		thread_handle = NULL;
+	}
+
+	return ret;
+}
+
+int
+rte_thread_join(rte_thread_t thread_id, unsigned long *value_ptr)
+{
+	HANDLE thread_handle;
+	DWORD result;
+	DWORD exit_code = 0;
+	BOOL err;
+	int ret = 0;
+
+	thread_handle = OpenThread(SYNCHRONIZE | THREAD_QUERY_INFORMATION,
+				   FALSE, thread_id.opaque_id);
+	if (thread_handle == NULL) {
+		ret = thread_log_last_error("OpenThread()");
+		goto cleanup;
+	}
+
+	result = WaitForSingleObject(thread_handle, INFINITE);
+	if (result != WAIT_OBJECT_0) {
+		ret = thread_log_last_error("WaitForSingleObject()");
+		goto cleanup;
+	}
+
+	if (value_ptr != NULL) {
+		err = GetExitCodeThread(thread_handle, &exit_code);
+		if (err == 0) {
+			ret = thread_log_last_error("GetExitCodeThread()");
+			goto cleanup;
+		}
+		*value_ptr = exit_code;
+	}
+
+cleanup:
+	if (thread_handle != NULL) {
+		CloseHandle(thread_handle);
+		thread_handle = NULL;
+	}
+
+	return ret;
+}
+
+int
+rte_thread_detach(rte_thread_t thread_id)
+{
+	/* No resources that need to be released. */
+	RTE_SET_USED(thread_id);
+
+	return 0;
+}
+
 rte_thread_t
 rte_thread_self(void)
 {
@@ -278,46 +455,6 @@  struct eal_tls_key {
 	return output;
 }
 
-static int
-convert_cpuset_to_affinity(const rte_cpuset_t *cpuset,
-		PGROUP_AFFINITY affinity)
-{
-	int ret = 0;
-	PGROUP_AFFINITY cpu_affinity = NULL;
-	unsigned int cpu_idx;
-
-	memset(affinity, 0, sizeof(GROUP_AFFINITY));
-	affinity->Group = (USHORT)-1;
-
-	/* Check that all cpus of the set belong to the same processor group and
-	 * accumulate thread affinity to be applied.
-	 */
-	for (cpu_idx = 0; cpu_idx < CPU_SETSIZE; cpu_idx++) {
-		if (!CPU_ISSET(cpu_idx, cpuset))
-			continue;
-
-		cpu_affinity = eal_get_cpu_affinity(cpu_idx);
-
-		if (affinity->Group == (USHORT)-1) {
-			affinity->Group = cpu_affinity->Group;
-		} else if (affinity->Group != cpu_affinity->Group) {
-			RTE_LOG(DEBUG, EAL, "All processors must belong to the same processor group\n");
-			ret = ENOTSUP;
-			goto cleanup;
-		}
-
-		affinity->Mask |= cpu_affinity->Mask;
-	}
-
-	if (affinity->Mask == 0) {
-		ret = EINVAL;
-		goto cleanup;
-	}
-
-cleanup:
-	return ret;
-}
-
 int
 rte_thread_set_affinity_by_id(rte_thread_t thread_id,
 		const rte_cpuset_t *cpuset)