diff mbox series

[2/3] eventdev: use c11 atomics for lcore timer armed flag

Message ID	1591960798-24024-2-git-send-email-phil.yang@arm.com (mailing list archive)
State	Superseded, archived
Delegated to:	Jerin Jacob
Headers	From: Phil Yang <phil.yang@arm.com> To: dev@dpdk.org, erik.g.carrillo@intel.com Cc: drc@linux.vnet.ibm.com, honnappa.nagarahalli@arm.com, ruifeng.wang@arm.com, dharmik.thakkar@arm.com, nd@arm.com Date: Fri, 12 Jun 2020 19:19:57 +0800 Message-Id: <1591960798-24024-2-git-send-email-phil.yang@arm.com> In-Reply-To: <1591960798-24024-1-git-send-email-phil.yang@arm.com> References: <1591960798-24024-1-git-send-email-phil.yang@arm.com> Subject: [dpdk-dev] [PATCH 2/3] eventdev: use c11 atomics for lcore timer armed flag Precedence: list Errors-To: dev-bounces@dpdk.org Sender: "dev" <dev-bounces@dpdk.org>
Series	[1/3] eventdev: fix race condition on timer list counter \| [1/3] eventdev: fix race condition on timer list counter [2/3] eventdev: use c11 atomics for lcore timer armed flag [3/3] eventdev: relax smp barriers with c11 atomics

Checks

Context	Check	Description
ci/checkpatch	success	coding style OK
ci/Intel-compilation	fail	Compilation issues

Commit Message

Phil Yang June 12, 2020, 11:19 a.m. UTC

  The in_use flag is a per core variable which is not shared between
lcores in the normal case and the access of this variable should be
ordered on the same core. However, if non-EAL thread pick the highest
lcore to insert timers into, there is the possibility of conflicts
on this flag between threads. Then the atomic CAS operation is needed.

Use the c11 atomic CAS instead of the generic rte_atomic operations
to avoid the unnecessary barrier on aarch64.

Signed-off-by: Phil Yang <phil.yang@arm.com>
Reviewed-by: Dharmik Thakkar <dharmik.thakkar@arm.com>
Reviewed-by: Ruifeng Wang <ruifeng.wang@arm.com>
---
 lib/librte_eventdev/rte_event_timer_adapter.c | 15 +++++++++++----
 1 file changed, 11 insertions(+), 4 deletions(-)

Comments

Carrillo, Erik G June 23, 2020, 9:01 p.m. UTC | #1

Hi Phil,

Comment in-line:

> -----Original Message-----
> From: Phil Yang <phil.yang@arm.com>
> Sent: Friday, June 12, 2020 6:20 AM
> To: dev@dpdk.org; Carrillo, Erik G <erik.g.carrillo@intel.com>
> Cc: drc@linux.vnet.ibm.com; honnappa.nagarahalli@arm.com;
> ruifeng.wang@arm.com; dharmik.thakkar@arm.com; nd@arm.com
> Subject: [PATCH 2/3] eventdev: use c11 atomics for lcore timer armed flag
> 
> The in_use flag is a per core variable which is not shared between lcores in
> the normal case and the access of this variable should be ordered on the
> same core. However, if non-EAL thread pick the highest lcore to insert timers
> into, there is the possibility of conflicts on this flag between threads. Then
> the atomic CAS operation is needed.
> 
> Use the c11 atomic CAS instead of the generic rte_atomic operations to avoid
> the unnecessary barrier on aarch64.
> 
> Signed-off-by: Phil Yang <phil.yang@arm.com>
> Reviewed-by: Dharmik Thakkar <dharmik.thakkar@arm.com>
> Reviewed-by: Ruifeng Wang <ruifeng.wang@arm.com>
> ---
>  lib/librte_eventdev/rte_event_timer_adapter.c | 15 +++++++++++----
>  1 file changed, 11 insertions(+), 4 deletions(-)
> 
> diff --git a/lib/librte_eventdev/rte_event_timer_adapter.c
> b/lib/librte_eventdev/rte_event_timer_adapter.c
> index 6a0e283..6947efb 100644
> --- a/lib/librte_eventdev/rte_event_timer_adapter.c
> +++ b/lib/librte_eventdev/rte_event_timer_adapter.c
> @@ -554,7 +554,7 @@ struct swtim {
>  	uint32_t timer_data_id;
>  	/* Track which cores have actually armed a timer */
>  	struct {
> -		rte_atomic16_t v;
> +		int16_t v;
>  	} __rte_cache_aligned in_use[RTE_MAX_LCORE];
>  	/* Track which cores' timer lists should be polled */
>  	unsigned int poll_lcores[RTE_MAX_LCORE]; @@ -606,7 +606,8 @@
> swtim_callback(struct rte_timer *tim)
>  				      "with immediate expiry value");
>  		}
> 
> -		if (unlikely(rte_atomic16_test_and_set(&sw-
> >in_use[lcore].v))) {
> +		if (unlikely(sw->in_use[lcore].v == 0)) {
> +			sw->in_use[lcore].v = 1;
>  			n_lcores = __atomic_fetch_add(&sw->n_poll_lcores,
> 1,
>  						__ATOMIC_RELAXED);
>  			__atomic_store_n(&sw->poll_lcores[n_lcores],
> lcore, @@ -834,7 +835,7 @@ swtim_init(struct rte_event_timer_adapter
> *adapter)
> 
>  	/* Initialize the variables that track in-use timer lists */
>  	for (i = 0; i < RTE_MAX_LCORE; i++)
> -		rte_atomic16_init(&sw->in_use[i].v);
> +		sw->in_use[i].v = 0;
> 
>  	/* Initialize the timer subsystem and allocate timer data instance */
>  	ret = rte_timer_subsystem_init();
> @@ -1017,6 +1018,8 @@ __swtim_arm_burst(const struct
> rte_event_timer_adapter *adapter,
>  	struct rte_timer *tim, *tims[nb_evtims];
>  	uint64_t cycles;
>  	int n_lcores;
> +	/* Timer is not armed state */

A more accurate comment would be something like "Timer list for this lcore is not in use".

With that change, it looks good to me:
Acked-by: Erik Gabriel Carrillo <erik.g.carrillo@intel.com>

> +	int16_t exp_state = 0;
> 
>  #ifdef RTE_LIBRTE_EVENTDEV_DEBUG
>  	/* Check that the service is running. */ @@ -1035,8 +1038,12 @@
> __swtim_arm_burst(const struct rte_event_timer_adapter *adapter,
>  	/* If this is the first time we're arming an event timer on this lcore,
>  	 * mark this lcore as "in use"; this will cause the service
>  	 * function to process the timer list that corresponds to this lcore.
> +	 * The atomic CAS operation can prevent the race condition on
> in_use
> +	 * flag between multiple non-EAL threads.
>  	 */
> -	if (unlikely(rte_atomic16_test_and_set(&sw->in_use[lcore_id].v))) {
> +	if (unlikely(__atomic_compare_exchange_n(&sw-
> >in_use[lcore_id].v,
> +			&exp_state, 1, 0,
> +			__ATOMIC_RELAXED, __ATOMIC_RELAXED))) {
>  		EVTIM_LOG_DBG("Adding lcore id = %u to list of lcores to
> poll",
>  			      lcore_id);
>  		n_lcores = __atomic_fetch_add(&sw->n_poll_lcores, 1,
> --
> 2.7.4

Stephen Hemminger June 23, 2020, 9:20 p.m. UTC | #2

On Fri, 12 Jun 2020 19:19:57 +0800
Phil Yang <phil.yang@arm.com> wrote:

>  	/* Track which cores have actually armed a timer */
>  	struct {
> -		rte_atomic16_t v;
> +		int16_t v;
>  	} __rte_cache_aligned in_use[RTE_MAX_LCORE];

Do you really need this to be cache aligned (ie one per line)?
Why have a signed value for a reference count? Shouldn't it be unsigned?

Carrillo, Erik G June 23, 2020, 9:31 p.m. UTC | #3

> -----Original Message-----
> From: Stephen Hemminger <stephen@networkplumber.org>
> Sent: Tuesday, June 23, 2020 4:20 PM
> To: Phil Yang <phil.yang@arm.com>
> Cc: dev@dpdk.org; Carrillo, Erik G <erik.g.carrillo@intel.com>;
> drc@linux.vnet.ibm.com; honnappa.nagarahalli@arm.com;
> ruifeng.wang@arm.com; dharmik.thakkar@arm.com; nd@arm.com
> Subject: Re: [dpdk-dev] [PATCH 2/3] eventdev: use c11 atomics for lcore
> timer armed flag
> 
> On Fri, 12 Jun 2020 19:19:57 +0800
> Phil Yang <phil.yang@arm.com> wrote:
> 
> >  	/* Track which cores have actually armed a timer */
> >  	struct {
> > -		rte_atomic16_t v;
> > +		int16_t v;
> >  	} __rte_cache_aligned in_use[RTE_MAX_LCORE];
> 
> Do you really need this to be cache aligned (ie one per line)?

I believe I did this originally to keep a cache line from bouncing when two different cores are arming timers, so it's not strictly necessary.

> Why have a signed value for a reference count? Shouldn't it be unsigned?

Phil Yang June 28, 2020, 4:12 p.m. UTC | #4

Hi Erik,

Sorry, I was on vacation.
Thanks for your feedback. I will update it in the next version.

Thanks,
Phil

> -----Original Message-----
> From: Carrillo, Erik G <erik.g.carrillo@intel.com>
> Sent: Wednesday, June 24, 2020 5:02 AM
> To: Phil Yang <Phil.Yang@arm.com>; dev@dpdk.org
> Cc: drc@linux.vnet.ibm.com; Honnappa Nagarahalli
> <Honnappa.Nagarahalli@arm.com>; Ruifeng Wang
> <Ruifeng.Wang@arm.com>; Dharmik Thakkar <Dharmik.Thakkar@arm.com>;
> nd <nd@arm.com>
> Subject: RE: [PATCH 2/3] eventdev: use c11 atomics for lcore timer armed
> flag
> 
> Hi Phil,
> 
> Comment in-line:
> 
> > -----Original Message-----
> > From: Phil Yang <phil.yang@arm.com>
> > Sent: Friday, June 12, 2020 6:20 AM
> > To: dev@dpdk.org; Carrillo, Erik G <erik.g.carrillo@intel.com>
> > Cc: drc@linux.vnet.ibm.com; honnappa.nagarahalli@arm.com;
> > ruifeng.wang@arm.com; dharmik.thakkar@arm.com; nd@arm.com
> > Subject: [PATCH 2/3] eventdev: use c11 atomics for lcore timer armed flag
> >
> > The in_use flag is a per core variable which is not shared between lcores in
> > the normal case and the access of this variable should be ordered on the
> > same core. However, if non-EAL thread pick the highest lcore to insert
> timers
> > into, there is the possibility of conflicts on this flag between threads. Then
> > the atomic CAS operation is needed.
> >
> > Use the c11 atomic CAS instead of the generic rte_atomic operations to
> avoid
> > the unnecessary barrier on aarch64.
> >
> > Signed-off-by: Phil Yang <phil.yang@arm.com>
> > Reviewed-by: Dharmik Thakkar <dharmik.thakkar@arm.com>
> > Reviewed-by: Ruifeng Wang <ruifeng.wang@arm.com>
> > ---
> >  lib/librte_eventdev/rte_event_timer_adapter.c | 15 +++++++++++----
> >  1 file changed, 11 insertions(+), 4 deletions(-)
> >
> > diff --git a/lib/librte_eventdev/rte_event_timer_adapter.c
> > b/lib/librte_eventdev/rte_event_timer_adapter.c
> > index 6a0e283..6947efb 100644
> > --- a/lib/librte_eventdev/rte_event_timer_adapter.c
> > +++ b/lib/librte_eventdev/rte_event_timer_adapter.c
> > @@ -554,7 +554,7 @@ struct swtim {
> >  	uint32_t timer_data_id;
> >  	/* Track which cores have actually armed a timer */
> >  	struct {
> > -		rte_atomic16_t v;
> > +		int16_t v;
> >  	} __rte_cache_aligned in_use[RTE_MAX_LCORE];
> >  	/* Track which cores' timer lists should be polled */
> >  	unsigned int poll_lcores[RTE_MAX_LCORE]; @@ -606,7 +606,8 @@
> > swtim_callback(struct rte_timer *tim)
> >  				      "with immediate expiry value");
> >  		}
> >
> > -		if (unlikely(rte_atomic16_test_and_set(&sw-
> > >in_use[lcore].v))) {
> > +		if (unlikely(sw->in_use[lcore].v == 0)) {
> > +			sw->in_use[lcore].v = 1;
> >  			n_lcores = __atomic_fetch_add(&sw->n_poll_lcores,
> > 1,
> >  						__ATOMIC_RELAXED);
> >  			__atomic_store_n(&sw->poll_lcores[n_lcores],
> > lcore, @@ -834,7 +835,7 @@ swtim_init(struct rte_event_timer_adapter
> > *adapter)
> >
> >  	/* Initialize the variables that track in-use timer lists */
> >  	for (i = 0; i < RTE_MAX_LCORE; i++)
> > -		rte_atomic16_init(&sw->in_use[i].v);
> > +		sw->in_use[i].v = 0;
> >
> >  	/* Initialize the timer subsystem and allocate timer data instance */
> >  	ret = rte_timer_subsystem_init();
> > @@ -1017,6 +1018,8 @@ __swtim_arm_burst(const struct
> > rte_event_timer_adapter *adapter,
> >  	struct rte_timer *tim, *tims[nb_evtims];
> >  	uint64_t cycles;
> >  	int n_lcores;
> > +	/* Timer is not armed state */
> 
> A more accurate comment would be something like "Timer list for this lcore is
> not in use".
> 
> With that change, it looks good to me:
> Acked-by: Erik Gabriel Carrillo <erik.g.carrillo@intel.com>
> 
> > +	int16_t exp_state = 0;
> >
> >  #ifdef RTE_LIBRTE_EVENTDEV_DEBUG
> >  	/* Check that the service is running. */ @@ -1035,8 +1038,12 @@
> > __swtim_arm_burst(const struct rte_event_timer_adapter *adapter,
> >  	/* If this is the first time we're arming an event timer on this lcore,
> >  	 * mark this lcore as "in use"; this will cause the service
> >  	 * function to process the timer list that corresponds to this lcore.
> > +	 * The atomic CAS operation can prevent the race condition on
> > in_use
> > +	 * flag between multiple non-EAL threads.
> >  	 */
> > -	if (unlikely(rte_atomic16_test_and_set(&sw->in_use[lcore_id].v))) {
> > +	if (unlikely(__atomic_compare_exchange_n(&sw-
> > >in_use[lcore_id].v,
> > +			&exp_state, 1, 0,
> > +			__ATOMIC_RELAXED, __ATOMIC_RELAXED))) {
> >  		EVTIM_LOG_DBG("Adding lcore id = %u to list of lcores to
> > poll",
> >  			      lcore_id);
> >  		n_lcores = __atomic_fetch_add(&sw->n_poll_lcores, 1,
> > --
> > 2.7.4

Phil Yang June 28, 2020, 4:32 p.m. UTC | #5

> -----Original Message-----
> From: Carrillo, Erik G <erik.g.carrillo@intel.com>
> Sent: Wednesday, June 24, 2020 5:32 AM
> To: Stephen Hemminger <stephen@networkplumber.org>; Phil Yang
> <Phil.Yang@arm.com>
> Cc: dev@dpdk.org; drc@linux.vnet.ibm.com; Honnappa Nagarahalli
> <Honnappa.Nagarahalli@arm.com>; Ruifeng Wang
> <Ruifeng.Wang@arm.com>; Dharmik Thakkar <Dharmik.Thakkar@arm.com>;
> nd <nd@arm.com>
> Subject: RE: [dpdk-dev] [PATCH 2/3] eventdev: use c11 atomics for lcore
> timer armed flag
> 
> > -----Original Message-----
> > From: Stephen Hemminger <stephen@networkplumber.org>
> > Sent: Tuesday, June 23, 2020 4:20 PM
> > To: Phil Yang <phil.yang@arm.com>
> > Cc: dev@dpdk.org; Carrillo, Erik G <erik.g.carrillo@intel.com>;
> > drc@linux.vnet.ibm.com; honnappa.nagarahalli@arm.com;
> > ruifeng.wang@arm.com; dharmik.thakkar@arm.com; nd@arm.com
> > Subject: Re: [dpdk-dev] [PATCH 2/3] eventdev: use c11 atomics for lcore
> > timer armed flag
> >
> > On Fri, 12 Jun 2020 19:19:57 +0800
> > Phil Yang <phil.yang@arm.com> wrote:
> >
> > >  	/* Track which cores have actually armed a timer */
> > >  	struct {
> > > -		rte_atomic16_t v;
> > > +		int16_t v;
> > >  	} __rte_cache_aligned in_use[RTE_MAX_LCORE];
> >
> > Do you really need this to be cache aligned (ie one per line)?
> 
> I believe I did this originally to keep a cache line from bouncing when two
> different cores are arming timers, so it's not strictly necessary.

Yeah, if we remove it, these per core variables might cause a false sharing issue between threads. 
That will hurt performance.

> 
> > Why have a signed value for a reference count? Shouldn't it be unsigned?

Yes. It should be unsigned in the new code. 
I will update it in the next version.

Thanks,
Phil

diff mbox series

Patch

diff --git a/lib/librte_eventdev/rte_event_timer_adapter.c b/lib/librte_eventdev/rte_event_timer_adapter.c
index 6a0e283..6947efb 100644
--- a/lib/librte_eventdev/rte_event_timer_adapter.c
+++ b/lib/librte_eventdev/rte_event_timer_adapter.c
@@ -554,7 +554,7 @@  struct swtim {
 	uint32_t timer_data_id;
 	/* Track which cores have actually armed a timer */
 	struct {
-		rte_atomic16_t v;
+		int16_t v;
 	} __rte_cache_aligned in_use[RTE_MAX_LCORE];
 	/* Track which cores' timer lists should be polled */
 	unsigned int poll_lcores[RTE_MAX_LCORE];
@@ -606,7 +606,8 @@  swtim_callback(struct rte_timer *tim)
 				      "with immediate expiry value");
 		}
 
-		if (unlikely(rte_atomic16_test_and_set(&sw->in_use[lcore].v))) {
+		if (unlikely(sw->in_use[lcore].v == 0)) {
+			sw->in_use[lcore].v = 1;
 			n_lcores = __atomic_fetch_add(&sw->n_poll_lcores, 1,
 						__ATOMIC_RELAXED);
 			__atomic_store_n(&sw->poll_lcores[n_lcores], lcore,
@@ -834,7 +835,7 @@  swtim_init(struct rte_event_timer_adapter *adapter)
 
 	/* Initialize the variables that track in-use timer lists */
 	for (i = 0; i < RTE_MAX_LCORE; i++)
-		rte_atomic16_init(&sw->in_use[i].v);
+		sw->in_use[i].v = 0;
 
 	/* Initialize the timer subsystem and allocate timer data instance */
 	ret = rte_timer_subsystem_init();
@@ -1017,6 +1018,8 @@  __swtim_arm_burst(const struct rte_event_timer_adapter *adapter,
 	struct rte_timer *tim, *tims[nb_evtims];
 	uint64_t cycles;
 	int n_lcores;
+	/* Timer is not armed state */
+	int16_t exp_state = 0;
 
 #ifdef RTE_LIBRTE_EVENTDEV_DEBUG
 	/* Check that the service is running. */
@@ -1035,8 +1038,12 @@  __swtim_arm_burst(const struct rte_event_timer_adapter *adapter,
 	/* If this is the first time we're arming an event timer on this lcore,
 	 * mark this lcore as "in use"; this will cause the service
 	 * function to process the timer list that corresponds to this lcore.
+	 * The atomic CAS operation can prevent the race condition on in_use
+	 * flag between multiple non-EAL threads.
 	 */
-	if (unlikely(rte_atomic16_test_and_set(&sw->in_use[lcore_id].v))) {
+	if (unlikely(__atomic_compare_exchange_n(&sw->in_use[lcore_id].v,
+			&exp_state, 1, 0,
+			__ATOMIC_RELAXED, __ATOMIC_RELAXED))) {
 		EVTIM_LOG_DBG("Adding lcore id = %u to list of lcores to poll",
 			      lcore_id);
 		n_lcores = __atomic_fetch_add(&sw->n_poll_lcores, 1,