[dpdk-dev] [EXT] [PATCH v3 1/3] eal/arm64: add 128-bit atomic compare exchange

Honnappa Nagarahalli Honnappa.Nagarahalli at arm.com
Fri Jul 5 06:20:33 CEST 2019


<snip>

> > > Subject: [EXT] [PATCH v3 1/3] eal/arm64: add 128-bit atomic compare
> > > exchange
> > >
> > > Add 128-bit atomic compare exchange on aarch64.
> > >
> > > Signed-off-by: Phil Yang <phil.yang at arm.com>
> > > Tested-by: Honnappa Nagarahalli <honnappa.nagarahalli at arm.com>
> > > Reviewed-by: Honnappa Nagarahalli <honnappa.nagarahalli at arm.com>
> > > ---
> > > v3:
> > > 1. Avoid duplication code with macro. (Jerin Jocob) 2. Make invalid
> > > memory order to strongest barrier. (Jerin Jocob) 3. Update
> > > doc/guides/prog_guide/env_abstraction_layer.rst. (Eads Gage) 4. Fix
> > > 32-bit x86 builds issue. (Eads Gage) 5. Correct documentation issues
> > > in UT. (Eads Gage)
> > >
> > >  .../common/include/arch/arm/rte_atomic_64.h        | 165
> > > +++++++++++++++++++++
> > >  .../common/include/arch/x86/rte_atomic_64.h        |  12 --
> > >  lib/librte_eal/common/include/generic/rte_atomic.h |  17 ++-
> > >  3 files changed, 181 insertions(+), 13 deletions(-)
> > >
> > > diff --git a/lib/librte_eal/common/include/arch/arm/rte_atomic_64.h
> > > b/lib/librte_eal/common/include/arch/arm/rte_atomic_64.h
> > > index 97060e4..2080c4d 100644
> > > --- a/lib/librte_eal/common/include/arch/arm/rte_atomic_64.h
> > > +++ b/lib/librte_eal/common/include/arch/arm/rte_atomic_64.h
> > > @@ -1,5 +1,6 @@
> > >  /* SPDX-License-Identifier: BSD-3-Clause
> > >   * Copyright(c) 2015 Cavium, Inc
> > > + * Copyright(c) 2019 Arm Limited
> > >   */
> > >
> > >  #ifndef _RTE_ATOMIC_ARM64_H_
> > > @@ -14,6 +15,9 @@ extern "C" {
> > >  #endif
> > >
> > >  #include "generic/rte_atomic.h"
> > > +#include <rte_branch_prediction.h>
> > > +#include <rte_compat.h>
> > > +#include <rte_debug.h>
> > >
> > >  #define dsb(opt) asm volatile("dsb " #opt : : : "memory")  #define
> > > dmb(opt) asm volatile("dmb " #opt : : : "memory") @@ -40,6 +44,167
> > > @@ extern "C" {
> > >
> > >  #define rte_cio_rmb() dmb(oshld)
> > >
> > > +/*------------------------ 128 bit atomic operations
> > > +-------------------------*/
> > > +
> > > +#define RTE_HAS_ACQ(mo) ((mo) != __ATOMIC_RELAXED && (mo) !=
> > > +__ATOMIC_RELEASE) #define RTE_HAS_RLS(mo) ((mo) ==
> > __ATOMIC_RELEASE
> > > || \
> > > +			 (mo) == __ATOMIC_ACQ_REL || \
> > > +			 (mo) == __ATOMIC_SEQ_CST)
> > > +
> > > +#define RTE_MO_LOAD(mo)  (RTE_HAS_ACQ((mo)) \
> > > +		? __ATOMIC_ACQUIRE : __ATOMIC_RELAXED) #define
> > > RTE_MO_STORE(mo)
> > > +(RTE_HAS_RLS((mo)) \
> > > +		? __ATOMIC_RELEASE : __ATOMIC_RELAXED)
> > > +
> > > +#ifdef __ARM_FEATURE_ATOMICS
> > > +#define __ATOMIC128_CAS_OP(cas_op_name, op_string)
> \
> > > +static inline rte_int128_t                                                  \
> > > +cas_op_name(rte_int128_t *dst, rte_int128_t old,                            \
> > > +		rte_int128_t updated)                                               \
> > > +{                                                                           \
> > > +	/* caspX instructions register pair must start from even-numbered
> > > +	 * register at operand 1.
> > > +	 * So, specify registers for local variables here.
> > > +	 */                                                                     \
> > > +	register uint64_t x0 __asm("x0") = (uint64_t)old.val[0];                \
> >
> > I understand CASP limitation on register has to be even and odd.
> > Is there anyway to remove explicit x0 register allocation and choose
> > compiler to decide the register. Some reason with optimize(03) gcc
> > makes correctly but not clang.
> >
> > Hardcoding to specific register makes compiler to not optimize the
> > stuff, especially if it is inline function.
> 
> It look like the limitation fixed recently in gcc.
> https://patches.linaro.org/patch/147991/
> 
> Not sure about old gcc and clang. ARM compiler experts may know the exact
> status
> 
We could use syntax as follows, an example is in [1]
static inline rte_int128_t
__rte_casp(rte_int128_t *dst, rte_int128_t old, rte_int128_t updated, int mo)
{
		__asm__ volatile("caspl %0, %H0, %1, %H1, [%2]"
				 : "+r" (old)
				 : "r" (updated), "r" (dst)
				 : "memory");
	return old;       
}

[1] https://godbolt.org/z/EUJnuG


More information about the dev mailing list