[v8,3/6] spinlock: use wfe to reduce contention on aarch64
Checks
Commit Message
In acquiring a spinlock, cores repeatedly poll the lock variable.
This is replaced by rte_wait_until_equal API.
Running the micro benchmarking and the testpmd and l3fwd traffic tests
on ThunderX2, Ampere eMAG80 and Arm N1SDP, everything went well and no
notable performance gain nor degradation was measured.
Signed-off-by: Gavin Hu <gavin.hu@arm.com>
Reviewed-by: Ruifeng Wang <ruifeng.wang@arm.com>
Reviewed-by: Phil Yang <phil.yang@arm.com>
Reviewed-by: Steve Capper <steve.capper@arm.com>
Reviewed-by: Ola Liljedahl <ola.liljedahl@arm.com>
Reviewed-by: Honnappa Nagarahalli <honnappa.nagarahalli@arm.com>
Tested-by: Pavan Nikhilesh <pbhagavatula@marvell.com>
---
.../common/include/arch/arm/rte_spinlock.h | 26 ++++++++++++++++++++++
.../common/include/generic/rte_spinlock.h | 2 +-
2 files changed, 27 insertions(+), 1 deletion(-)
@@ -16,6 +16,32 @@ extern "C" {
#include <rte_common.h>
#include "generic/rte_spinlock.h"
+/* armv7a does support WFE, but an explicit wake-up signal using SEV is
+ * required (must be preceded by DSB to drain the store buffer) and
+ * this is less performant, so keep armv7a implementation unchanged.
+ */
+#ifdef RTE_ARM_USE_WFE
+static inline void
+rte_spinlock_lock(rte_spinlock_t *sl)
+{
+ unsigned int tmp;
+ /* http://infocenter.arm.com/help/index.jsp?topic=/com.arm.doc.
+ * faqs/ka16809.html
+ */
+ asm volatile(
+ "1: ldaxr %w[tmp], %w[locked]\n"
+ "cbnz %w[tmp], 2f\n"
+ "stxr %w[tmp], %w[one], %w[locked]\n"
+ "cbnz %w[tmp], 1b\n"
+ "ret\n"
+ "2: sevl\n"
+ "wfe\n"
+ "jmp 1b\n"
+ : [tmp] "=&r" (tmp), [locked] "+Q"(sl->locked)
+ : [one] "r" (1)
+}
+#endif
+
static inline int rte_tm_supported(void)
{
return 0;
@@ -57,7 +57,7 @@ rte_spinlock_init(rte_spinlock_t *sl)
static inline void
rte_spinlock_lock(rte_spinlock_t *sl);
-#ifdef RTE_FORCE_INTRINSICS
+#if defined(RTE_FORCE_INTRINSICS) && !defined(RTE_ARM_USE_WFE)
static inline void
rte_spinlock_lock(rte_spinlock_t *sl)
{