[dpdk-dev] [PATCH] EAL:fix memory barrier implementation on IBM POWER
Chao Zhu
chaozhu at linux.vnet.ibm.com
Fri Jul 15 04:30:19 CEST 2016
On weak memory order architecture like POWER, rte_smp_wmb/rte_smp_rmb
need to use CPU instructions, not compiler barrier. This patch fixes
this. Also, to improve performance on PPC64, use light weight sync
instruction instead of sync instruction.
Signed-off-by: Chao Zhu <chaozhu at linux.vnet.ibm.com>
---
.../common/include/arch/ppc_64/rte_atomic.h | 12 ++++++++++--
1 files changed, 10 insertions(+), 2 deletions(-)
diff --git a/lib/librte_eal/common/include/arch/ppc_64/rte_atomic.h b/lib/librte_eal/common/include/arch/ppc_64/rte_atomic.h
index feae486..924e894 100644
--- a/lib/librte_eal/common/include/arch/ppc_64/rte_atomic.h
+++ b/lib/librte_eal/common/include/arch/ppc_64/rte_atomic.h
@@ -62,7 +62,11 @@ extern "C" {
* Guarantees that the STORE operations generated before the barrier
* occur before the STORE operations generated after.
*/
+#ifdef RTE_ARCH_64
+#define rte_wmb() {asm volatile("lwsync" : : : "memory"); }
+#else
#define rte_wmb() {asm volatile("sync" : : : "memory"); }
+#endif
/**
* Read memory barrier.
@@ -70,13 +74,17 @@ extern "C" {
* Guarantees that the LOAD operations generated before the barrier
* occur before the LOAD operations generated after.
*/
+#ifdef RTE_ARCH_64
+#define rte_rmb() {asm volatile("lwsync" : : : "memory"); }
+#else
#define rte_rmb() {asm volatile("sync" : : : "memory"); }
+#endif
#define rte_smp_mb() rte_mb()
-#define rte_smp_wmb() rte_compiler_barrier()
+#define rte_smp_wmb() rte_wmb()
-#define rte_smp_rmb() rte_compiler_barrier()
+#define rte_smp_rmb() rte_rmb()
/*------------------------- 16 bit atomic operations -------------------------*/
/* To be compatible with Power7, use GCC built-in functions for 16 bit
--
1.7.1
More information about the dev
mailing list