[v8,1/4] eal: add WC store functions

Message ID 1594982985-31551-2-git-send-email-radu.nicolau@intel.com (mailing list archive)
State Superseded, archived
Headers
Series eal: add WC store functions |

Checks

Context Check Description
ci/checkpatch success coding style OK
ci/Intel-compilation success Compilation OK
ci/iol-broadcom-Performance success Performance Testing PASS
ci/iol-intel-Performance success Performance Testing PASS
ci/iol-testing success Testing PASS

Commit Message

Radu Nicolau July 17, 2020, 10:49 a.m. UTC
  Add rte_write32_wc and rte_write32_wc_relaxed functions
that implement 32bit stores using write combining memory protocol.
Provided generic stubs and x86 implementation.

Signed-off-by: Radu Nicolau <radu.nicolau@intel.com>
Acked-by: Bruce Richardson <bruce.richardson@intel.com>
---
 lib/librte_eal/arm/include/rte_io_64.h  | 12 +++++++
 lib/librte_eal/include/generic/rte_io.h | 48 ++++++++++++++++++++++++++++
 lib/librte_eal/x86/include/rte_io.h     | 56 +++++++++++++++++++++++++++++++++
 3 files changed, 116 insertions(+)
  

Comments

Ruifeng Wang July 20, 2020, 6:42 a.m. UTC | #1
> -----Original Message-----
> From: Radu Nicolau <radu.nicolau@intel.com>
> Sent: Friday, July 17, 2020 6:50 PM
> To: dev@dpdk.org
> Cc: beilei.xing@intel.com; jia.guo@intel.com; bruce.richardson@intel.com;
> konstantin.ananyev@intel.com; jerinjacobk@gmail.com;
> david.marchand@redhat.com; fiona.trahe@intel.com; wei.zhao1@intel.com;
> Ruifeng Wang <Ruifeng.Wang@arm.com>; Radu Nicolau
> <radu.nicolau@intel.com>
> Subject: [PATCH v8 1/4] eal: add WC store functions
> 
> Add rte_write32_wc and rte_write32_wc_relaxed functions that implement
> 32bit stores using write combining memory protocol.
> Provided generic stubs and x86 implementation.
> 
> Signed-off-by: Radu Nicolau <radu.nicolau@intel.com>
> Acked-by: Bruce Richardson <bruce.richardson@intel.com>
> ---
>  lib/librte_eal/arm/include/rte_io_64.h  | 12 +++++++
> lib/librte_eal/include/generic/rte_io.h | 48
> ++++++++++++++++++++++++++++
>  lib/librte_eal/x86/include/rte_io.h     | 56
> +++++++++++++++++++++++++++++++++
>  3 files changed, 116 insertions(+)
> 
> diff --git a/lib/librte_eal/arm/include/rte_io_64.h
> b/lib/librte_eal/arm/include/rte_io_64.h
> index e534624..d07d9cb 100644
> --- a/lib/librte_eal/arm/include/rte_io_64.h
> +++ b/lib/librte_eal/arm/include/rte_io_64.h
> @@ -164,6 +164,18 @@ rte_write64(uint64_t value, volatile void *addr)
>  	rte_write64_relaxed(value, addr);
>  }
> 
> +static __rte_always_inline void
> +rte_write32_wc(uint32_t value, volatile void *addr) {
> +	rte_write32(value, addr);
> +}
> +
> +static __rte_always_inline void
> +rte_write32_wc_relaxed(uint32_t value, volatile void *addr) {
> +	rte_write32_relaxed(value, addr);
> +}
> +
>  #ifdef __cplusplus
>  }
>  #endif
> diff --git a/lib/librte_eal/include/generic/rte_io.h
> b/lib/librte_eal/include/generic/rte_io.h
> index da457f7..0669baa 100644
> --- a/lib/librte_eal/include/generic/rte_io.h
> +++ b/lib/librte_eal/include/generic/rte_io.h
> @@ -229,6 +229,40 @@ rte_write32(uint32_t value, volatile void *addr);
> static inline void  rte_write64(uint64_t value, volatile void *addr);
> 
> +/**
> + * Write a 32-bit value to I/O device memory address addr using write
> + * combining memory write protocol. Depending on the platform write
> +combining
> + * may not be available and/or may be treated as a hint and the
> +behavior may
> + * fallback to a regular store.

I'm trying to understand write combining use cases here.
Is it applicable for all MMIO writes? 
How to identify where to use rte_write32_wc(_relaxed)?

Thanks.
/Ruifeng
> + *
> + * @param value
> + *  Value to write
> + * @param addr
> + *  I/O memory address to write the value to  */ __rte_experimental
> +static inline void rte_write32_wc(uint32_t value, volatile void *addr);
> +
> +/**
> + * Write a 32-bit value to I/O device memory address addr using write
> + * combining memory write protocol. Depending on the platform write
> +combining
> + * may not be available and/or may be treated as a hint and the
> +behavior may
> + * fallback to a regular store.
> + *
> + * The relaxed version does not have additional I/O memory barrier,
> +useful in
> + * accessing the device registers of integrated controllers which
> +implicitly
> + * strongly ordered with respect to memory access.
> + *
> + * @param value
> + *  Value to write
> + * @param addr
> + *  I/O memory address to write the value to  */ __rte_experimental
> +static inline void rte_write32_wc_relaxed(uint32_t value, volatile void
> +*addr);
> +
>  #endif /* __DOXYGEN__ */
> 
>  #ifndef RTE_OVERRIDE_IO_H
> @@ -345,6 +379,20 @@ rte_write64(uint64_t value, volatile void *addr)
>  	rte_write64_relaxed(value, addr);
>  }
> 
> +#ifndef RTE_NATIVE_WRITE32_WC
> +static __rte_always_inline void
> +rte_write32_wc(uint32_t value, volatile void *addr) {
> +	rte_write32(value, addr);
> +}
> +
> +static __rte_always_inline void
> +rte_write32_wc_relaxed(uint32_t value, volatile void *addr) {
> +	rte_write32_relaxed(value, addr);
> +}
> +#endif /* RTE_NATIVE_WRITE32_WC */
> +
>  #endif /* RTE_OVERRIDE_IO_H */
> 
>  #endif /* _RTE_IO_H_ */
> diff --git a/lib/librte_eal/x86/include/rte_io.h
> b/lib/librte_eal/x86/include/rte_io.h
> index 2db71b1..c95ed67 100644
> --- a/lib/librte_eal/x86/include/rte_io.h
> +++ b/lib/librte_eal/x86/include/rte_io.h
> @@ -9,8 +9,64 @@
>  extern "C" {
>  #endif
> 
> +#include "rte_cpuflags.h"
> +
> +#define RTE_NATIVE_WRITE32_WC
>  #include "generic/rte_io.h"
> 
> +/**
> + * @internal
> + * MOVDIRI wrapper.
> + */
> +static __rte_always_inline void
> +_rte_x86_movdiri(uint32_t value, volatile void *addr) {
> +	asm volatile(
> +		/* MOVDIRI */
> +		".byte 0x40, 0x0f, 0x38, 0xf9, 0x02"
> +		:
> +		: "a" (value), "d" (addr));
> +}
> +
> +static __rte_always_inline void
> +rte_write32_wc(uint32_t value, volatile void *addr) {
> +	static int _x86_movdiri_flag = -1;
> +	if (_x86_movdiri_flag == 1) {
> +		rte_wmb();
> +		_rte_x86_movdiri(value, addr);
> +	} else if (_x86_movdiri_flag == 0) {
> +		rte_write32(value, addr);
> +	} else {
> +		_x86_movdiri_flag =
> +
> 	(rte_cpu_get_flag_enabled(RTE_CPUFLAG_MOVDIRI) > 0);
> +		if (_x86_movdiri_flag == 1) {
> +			rte_wmb();
> +			_rte_x86_movdiri(value, addr);
> +		} else {
> +			rte_write32(value, addr);
> +		}
> +	}
> +}
> +
> +static __rte_always_inline void
> +rte_write32_wc_relaxed(uint32_t value, volatile void *addr) {
> +	static int _x86_movdiri_flag = -1;
> +	if (_x86_movdiri_flag == 1) {
> +		_rte_x86_movdiri(value, addr);
> +	} else if (_x86_movdiri_flag == 0) {
> +		rte_write32_relaxed(value, addr);
> +	} else {
> +		_x86_movdiri_flag =
> +
> 	(rte_cpu_get_flag_enabled(RTE_CPUFLAG_MOVDIRI) > 0);
> +		if (_x86_movdiri_flag == 1)
> +			_rte_x86_movdiri(value, addr);
> +		else
> +			rte_write32_relaxed(value, addr);
> +	}
> +}
> +
>  #ifdef __cplusplus
>  }
>  #endif
> --
> 2.7.4
  
Radu Nicolau July 20, 2020, 8:52 a.m. UTC | #2
On 7/20/2020 7:42 AM, Ruifeng Wang wrote:
>> -----Original Message-----
>> From: Radu Nicolau <radu.nicolau@intel.com>
>> Sent: Friday, July 17, 2020 6:50 PM
>> To: dev@dpdk.org
>> Cc: beilei.xing@intel.com; jia.guo@intel.com; bruce.richardson@intel.com;
>> konstantin.ananyev@intel.com; jerinjacobk@gmail.com;
>> david.marchand@redhat.com; fiona.trahe@intel.com; wei.zhao1@intel.com;
>> Ruifeng Wang <Ruifeng.Wang@arm.com>; Radu Nicolau
>> <radu.nicolau@intel.com>
>> Subject: [PATCH v8 1/4] eal: add WC store functions
>>
>> Add rte_write32_wc and rte_write32_wc_relaxed functions that implement
>> 32bit stores using write combining memory protocol.
>> Provided generic stubs and x86 implementation.
>>
>> Signed-off-by: Radu Nicolau <radu.nicolau@intel.com>
>> Acked-by: Bruce Richardson <bruce.richardson@intel.com>
>> ---
>>   lib/librte_eal/arm/include/rte_io_64.h  | 12 +++++++
>> lib/librte_eal/include/generic/rte_io.h | 48
>> ++++++++++++++++++++++++++++
>>   lib/librte_eal/x86/include/rte_io.h     | 56
>> +++++++++++++++++++++++++++++++++
>>   3 files changed, 116 insertions(+)
>>
>> diff --git a/lib/librte_eal/arm/include/rte_io_64.h
>> b/lib/librte_eal/arm/include/rte_io_64.h
>> index e534624..d07d9cb 100644
>> --- a/lib/librte_eal/arm/include/rte_io_64.h
>> +++ b/lib/librte_eal/arm/include/rte_io_64.h
>> @@ -164,6 +164,18 @@ rte_write64(uint64_t value, volatile void *addr)
>>   	rte_write64_relaxed(value, addr);
>>   }
>>
>> +static __rte_always_inline void
>> +rte_write32_wc(uint32_t value, volatile void *addr) {
>> +	rte_write32(value, addr);
>> +}
>> +
>> +static __rte_always_inline void
>> +rte_write32_wc_relaxed(uint32_t value, volatile void *addr) {
>> +	rte_write32_relaxed(value, addr);
>> +}
>> +
>>   #ifdef __cplusplus
>>   }
>>   #endif
>> diff --git a/lib/librte_eal/include/generic/rte_io.h
>> b/lib/librte_eal/include/generic/rte_io.h
>> index da457f7..0669baa 100644
>> --- a/lib/librte_eal/include/generic/rte_io.h
>> +++ b/lib/librte_eal/include/generic/rte_io.h
>> @@ -229,6 +229,40 @@ rte_write32(uint32_t value, volatile void *addr);
>> static inline void  rte_write64(uint64_t value, volatile void *addr);
>>
>> +/**
>> + * Write a 32-bit value to I/O device memory address addr using write
>> + * combining memory write protocol. Depending on the platform write
>> +combining
>> + * may not be available and/or may be treated as a hint and the
>> +behavior may
>> + * fallback to a regular store.
> I'm trying to understand write combining use cases here.
> Is it applicable for all MMIO writes?

It's dependant on the architecture and specific use case, but generally 
this is a good usecase, updating the tail registers. It has some 
particularities that prevents it to be a replacement for mmio writes, it 
is weakly ordered and it will bypass the cache hierarchy.

> How to identify where to use rte_write32_wc(_relaxed)?
The relaxed version can be used is sections of the code that already 
have the proper fencing, as to avoid having a redundant memory fence, or 
when there is no need to have a memory fence at all.
>
> Thanks.
> /Ruifeng
>> + *
>> + * @param value
>> + *  Value to write
>> + * @param addr
>> + *  I/O memory address to write the value to  */ __rte_experimental
>> +static inline void rte_write32_wc(uint32_t value, volatile void *addr);
>> +
>> +/**
>> + * Write a 32-bit value to I/O device memory address addr using write
>> + * combining memory write protocol. Depending on the platform write
>> +combining
>> + * may not be available and/or may be treated as a hint and the
>> +behavior may
>> + * fallback to a regular store.
>> + *
>> + * The relaxed version does not have additional I/O memory barrier,
>> +useful in
>> + * accessing the device registers of integrated controllers which
>> +implicitly
>> + * strongly ordered with respect to memory access.
>> + *
>> + * @param value
>> + *  Value to write
>> + * @param addr
>> + *  I/O memory address to write the value to  */ __rte_experimental
>> +static inline void rte_write32_wc_relaxed(uint32_t value, volatile void
>> +*addr);
>> +
>>   #endif /* __DOXYGEN__ */
>>
>>   #ifndef RTE_OVERRIDE_IO_H
>> @@ -345,6 +379,20 @@ rte_write64(uint64_t value, volatile void *addr)
>>   	rte_write64_relaxed(value, addr);
>>   }
>>
>> +#ifndef RTE_NATIVE_WRITE32_WC
>> +static __rte_always_inline void
>> +rte_write32_wc(uint32_t value, volatile void *addr) {
>> +	rte_write32(value, addr);
>> +}
>> +
>> +static __rte_always_inline void
>> +rte_write32_wc_relaxed(uint32_t value, volatile void *addr) {
>> +	rte_write32_relaxed(value, addr);
>> +}
>> +#endif /* RTE_NATIVE_WRITE32_WC */
>> +
>>   #endif /* RTE_OVERRIDE_IO_H */
>>
>>   #endif /* _RTE_IO_H_ */
>> diff --git a/lib/librte_eal/x86/include/rte_io.h
>> b/lib/librte_eal/x86/include/rte_io.h
>> index 2db71b1..c95ed67 100644
>> --- a/lib/librte_eal/x86/include/rte_io.h
>> +++ b/lib/librte_eal/x86/include/rte_io.h
>> @@ -9,8 +9,64 @@
>>   extern "C" {
>>   #endif
>>
>> +#include "rte_cpuflags.h"
>> +
>> +#define RTE_NATIVE_WRITE32_WC
>>   #include "generic/rte_io.h"
>>
>> +/**
>> + * @internal
>> + * MOVDIRI wrapper.
>> + */
>> +static __rte_always_inline void
>> +_rte_x86_movdiri(uint32_t value, volatile void *addr) {
>> +	asm volatile(
>> +		/* MOVDIRI */
>> +		".byte 0x40, 0x0f, 0x38, 0xf9, 0x02"
>> +		:
>> +		: "a" (value), "d" (addr));
>> +}
>> +
>> +static __rte_always_inline void
>> +rte_write32_wc(uint32_t value, volatile void *addr) {
>> +	static int _x86_movdiri_flag = -1;
>> +	if (_x86_movdiri_flag == 1) {
>> +		rte_wmb();
>> +		_rte_x86_movdiri(value, addr);
>> +	} else if (_x86_movdiri_flag == 0) {
>> +		rte_write32(value, addr);
>> +	} else {
>> +		_x86_movdiri_flag =
>> +
>> 	(rte_cpu_get_flag_enabled(RTE_CPUFLAG_MOVDIRI) > 0);
>> +		if (_x86_movdiri_flag == 1) {
>> +			rte_wmb();
>> +			_rte_x86_movdiri(value, addr);
>> +		} else {
>> +			rte_write32(value, addr);
>> +		}
>> +	}
>> +}
>> +
>> +static __rte_always_inline void
>> +rte_write32_wc_relaxed(uint32_t value, volatile void *addr) {
>> +	static int _x86_movdiri_flag = -1;
>> +	if (_x86_movdiri_flag == 1) {
>> +		_rte_x86_movdiri(value, addr);
>> +	} else if (_x86_movdiri_flag == 0) {
>> +		rte_write32_relaxed(value, addr);
>> +	} else {
>> +		_x86_movdiri_flag =
>> +
>> 	(rte_cpu_get_flag_enabled(RTE_CPUFLAG_MOVDIRI) > 0);
>> +		if (_x86_movdiri_flag == 1)
>> +			_rte_x86_movdiri(value, addr);
>> +		else
>> +			rte_write32_relaxed(value, addr);
>> +	}
>> +}
>> +
>>   #ifdef __cplusplus
>>   }
>>   #endif
>> --
>> 2.7.4
  

Patch

diff --git a/lib/librte_eal/arm/include/rte_io_64.h b/lib/librte_eal/arm/include/rte_io_64.h
index e534624..d07d9cb 100644
--- a/lib/librte_eal/arm/include/rte_io_64.h
+++ b/lib/librte_eal/arm/include/rte_io_64.h
@@ -164,6 +164,18 @@  rte_write64(uint64_t value, volatile void *addr)
 	rte_write64_relaxed(value, addr);
 }
 
+static __rte_always_inline void
+rte_write32_wc(uint32_t value, volatile void *addr)
+{
+	rte_write32(value, addr);
+}
+
+static __rte_always_inline void
+rte_write32_wc_relaxed(uint32_t value, volatile void *addr)
+{
+	rte_write32_relaxed(value, addr);
+}
+
 #ifdef __cplusplus
 }
 #endif
diff --git a/lib/librte_eal/include/generic/rte_io.h b/lib/librte_eal/include/generic/rte_io.h
index da457f7..0669baa 100644
--- a/lib/librte_eal/include/generic/rte_io.h
+++ b/lib/librte_eal/include/generic/rte_io.h
@@ -229,6 +229,40 @@  rte_write32(uint32_t value, volatile void *addr);
 static inline void
 rte_write64(uint64_t value, volatile void *addr);
 
+/**
+ * Write a 32-bit value to I/O device memory address addr using write
+ * combining memory write protocol. Depending on the platform write combining
+ * may not be available and/or may be treated as a hint and the behavior may
+ * fallback to a regular store.
+ *
+ * @param value
+ *  Value to write
+ * @param addr
+ *  I/O memory address to write the value to
+ */
+__rte_experimental
+static inline void
+rte_write32_wc(uint32_t value, volatile void *addr);
+
+/**
+ * Write a 32-bit value to I/O device memory address addr using write
+ * combining memory write protocol. Depending on the platform write combining
+ * may not be available and/or may be treated as a hint and the behavior may
+ * fallback to a regular store.
+ *
+ * The relaxed version does not have additional I/O memory barrier, useful in
+ * accessing the device registers of integrated controllers which implicitly
+ * strongly ordered with respect to memory access.
+ *
+ * @param value
+ *  Value to write
+ * @param addr
+ *  I/O memory address to write the value to
+ */
+__rte_experimental
+static inline void
+rte_write32_wc_relaxed(uint32_t value, volatile void *addr);
+
 #endif /* __DOXYGEN__ */
 
 #ifndef RTE_OVERRIDE_IO_H
@@ -345,6 +379,20 @@  rte_write64(uint64_t value, volatile void *addr)
 	rte_write64_relaxed(value, addr);
 }
 
+#ifndef RTE_NATIVE_WRITE32_WC
+static __rte_always_inline void
+rte_write32_wc(uint32_t value, volatile void *addr)
+{
+	rte_write32(value, addr);
+}
+
+static __rte_always_inline void
+rte_write32_wc_relaxed(uint32_t value, volatile void *addr)
+{
+	rte_write32_relaxed(value, addr);
+}
+#endif /* RTE_NATIVE_WRITE32_WC */
+
 #endif /* RTE_OVERRIDE_IO_H */
 
 #endif /* _RTE_IO_H_ */
diff --git a/lib/librte_eal/x86/include/rte_io.h b/lib/librte_eal/x86/include/rte_io.h
index 2db71b1..c95ed67 100644
--- a/lib/librte_eal/x86/include/rte_io.h
+++ b/lib/librte_eal/x86/include/rte_io.h
@@ -9,8 +9,64 @@ 
 extern "C" {
 #endif
 
+#include "rte_cpuflags.h"
+
+#define RTE_NATIVE_WRITE32_WC
 #include "generic/rte_io.h"
 
+/**
+ * @internal
+ * MOVDIRI wrapper.
+ */
+static __rte_always_inline void
+_rte_x86_movdiri(uint32_t value, volatile void *addr)
+{
+	asm volatile(
+		/* MOVDIRI */
+		".byte 0x40, 0x0f, 0x38, 0xf9, 0x02"
+		:
+		: "a" (value), "d" (addr));
+}
+
+static __rte_always_inline void
+rte_write32_wc(uint32_t value, volatile void *addr)
+{
+	static int _x86_movdiri_flag = -1;
+	if (_x86_movdiri_flag == 1) {
+		rte_wmb();
+		_rte_x86_movdiri(value, addr);
+	} else if (_x86_movdiri_flag == 0) {
+		rte_write32(value, addr);
+	} else {
+		_x86_movdiri_flag =
+			(rte_cpu_get_flag_enabled(RTE_CPUFLAG_MOVDIRI) > 0);
+		if (_x86_movdiri_flag == 1) {
+			rte_wmb();
+			_rte_x86_movdiri(value, addr);
+		} else {
+			rte_write32(value, addr);
+		}
+	}
+}
+
+static __rte_always_inline void
+rte_write32_wc_relaxed(uint32_t value, volatile void *addr)
+{
+	static int _x86_movdiri_flag = -1;
+	if (_x86_movdiri_flag == 1) {
+		_rte_x86_movdiri(value, addr);
+	} else if (_x86_movdiri_flag == 0) {
+		rte_write32_relaxed(value, addr);
+	} else {
+		_x86_movdiri_flag =
+			(rte_cpu_get_flag_enabled(RTE_CPUFLAG_MOVDIRI) > 0);
+		if (_x86_movdiri_flag == 1)
+			_rte_x86_movdiri(value, addr);
+		else
+			rte_write32_relaxed(value, addr);
+	}
+}
+
 #ifdef __cplusplus
 }
 #endif