[dpdk-dev] [EXT] [PATCH v3 1/3] eal/arm64: add 128-bit atomic compare exchange
Honnappa Nagarahalli
Honnappa.Nagarahalli at arm.com
Fri Jul 5 06:20:33 CEST 2019
<snip>
> > > Subject: [EXT] [PATCH v3 1/3] eal/arm64: add 128-bit atomic compare
> > > exchange
> > >
> > > Add 128-bit atomic compare exchange on aarch64.
> > >
> > > Signed-off-by: Phil Yang <phil.yang at arm.com>
> > > Tested-by: Honnappa Nagarahalli <honnappa.nagarahalli at arm.com>
> > > Reviewed-by: Honnappa Nagarahalli <honnappa.nagarahalli at arm.com>
> > > ---
> > > v3:
> > > 1. Avoid duplication code with macro. (Jerin Jocob) 2. Make invalid
> > > memory order to strongest barrier. (Jerin Jocob) 3. Update
> > > doc/guides/prog_guide/env_abstraction_layer.rst. (Eads Gage) 4. Fix
> > > 32-bit x86 builds issue. (Eads Gage) 5. Correct documentation issues
> > > in UT. (Eads Gage)
> > >
> > > .../common/include/arch/arm/rte_atomic_64.h | 165
> > > +++++++++++++++++++++
> > > .../common/include/arch/x86/rte_atomic_64.h | 12 --
> > > lib/librte_eal/common/include/generic/rte_atomic.h | 17 ++-
> > > 3 files changed, 181 insertions(+), 13 deletions(-)
> > >
> > > diff --git a/lib/librte_eal/common/include/arch/arm/rte_atomic_64.h
> > > b/lib/librte_eal/common/include/arch/arm/rte_atomic_64.h
> > > index 97060e4..2080c4d 100644
> > > --- a/lib/librte_eal/common/include/arch/arm/rte_atomic_64.h
> > > +++ b/lib/librte_eal/common/include/arch/arm/rte_atomic_64.h
> > > @@ -1,5 +1,6 @@
> > > /* SPDX-License-Identifier: BSD-3-Clause
> > > * Copyright(c) 2015 Cavium, Inc
> > > + * Copyright(c) 2019 Arm Limited
> > > */
> > >
> > > #ifndef _RTE_ATOMIC_ARM64_H_
> > > @@ -14,6 +15,9 @@ extern "C" {
> > > #endif
> > >
> > > #include "generic/rte_atomic.h"
> > > +#include <rte_branch_prediction.h>
> > > +#include <rte_compat.h>
> > > +#include <rte_debug.h>
> > >
> > > #define dsb(opt) asm volatile("dsb " #opt : : : "memory") #define
> > > dmb(opt) asm volatile("dmb " #opt : : : "memory") @@ -40,6 +44,167
> > > @@ extern "C" {
> > >
> > > #define rte_cio_rmb() dmb(oshld)
> > >
> > > +/*------------------------ 128 bit atomic operations
> > > +-------------------------*/
> > > +
> > > +#define RTE_HAS_ACQ(mo) ((mo) != __ATOMIC_RELAXED && (mo) !=
> > > +__ATOMIC_RELEASE) #define RTE_HAS_RLS(mo) ((mo) ==
> > __ATOMIC_RELEASE
> > > || \
> > > + (mo) == __ATOMIC_ACQ_REL || \
> > > + (mo) == __ATOMIC_SEQ_CST)
> > > +
> > > +#define RTE_MO_LOAD(mo) (RTE_HAS_ACQ((mo)) \
> > > + ? __ATOMIC_ACQUIRE : __ATOMIC_RELAXED) #define
> > > RTE_MO_STORE(mo)
> > > +(RTE_HAS_RLS((mo)) \
> > > + ? __ATOMIC_RELEASE : __ATOMIC_RELAXED)
> > > +
> > > +#ifdef __ARM_FEATURE_ATOMICS
> > > +#define __ATOMIC128_CAS_OP(cas_op_name, op_string)
> \
> > > +static inline rte_int128_t \
> > > +cas_op_name(rte_int128_t *dst, rte_int128_t old, \
> > > + rte_int128_t updated) \
> > > +{ \
> > > + /* caspX instructions register pair must start from even-numbered
> > > + * register at operand 1.
> > > + * So, specify registers for local variables here.
> > > + */ \
> > > + register uint64_t x0 __asm("x0") = (uint64_t)old.val[0]; \
> >
> > I understand CASP limitation on register has to be even and odd.
> > Is there anyway to remove explicit x0 register allocation and choose
> > compiler to decide the register. Some reason with optimize(03) gcc
> > makes correctly but not clang.
> >
> > Hardcoding to specific register makes compiler to not optimize the
> > stuff, especially if it is inline function.
>
> It look like the limitation fixed recently in gcc.
> https://patches.linaro.org/patch/147991/
>
> Not sure about old gcc and clang. ARM compiler experts may know the exact
> status
>
We could use syntax as follows, an example is in [1]
static inline rte_int128_t
__rte_casp(rte_int128_t *dst, rte_int128_t old, rte_int128_t updated, int mo)
{
__asm__ volatile("caspl %0, %H0, %1, %H1, [%2]"
: "+r" (old)
: "r" (updated), "r" (dst)
: "memory");
return old;
}
[1] https://godbolt.org/z/EUJnuG
More information about the dev
mailing list