[v2,1/2] eal: add WC store functions
Checks
Commit Message
Add rte_write32_wc and rte_write32_wc_relaxed functions
that implement 32bit stores using write combining memory protocol.
Provided generic stubs and x86 implementation.
Signed-off-by: Radu Nicolau <radu.nicolau@intel.com>
---
v2 rework new eal io functions
lib/librte_eal/include/generic/rte_io.h | 47 ++++++++++++++++++++++++++
lib/librte_eal/x86/include/rte_io.h | 59 +++++++++++++++++++++++++++++++++
2 files changed, 106 insertions(+)
Comments
On Fri, Jun 19, 2020 at 01:06:19PM +0100, Radu Nicolau wrote:
> Add rte_write32_wc and rte_write32_wc_relaxed functions
> that implement 32bit stores using write combining memory protocol.
> Provided generic stubs and x86 implementation.
>
> Signed-off-by: Radu Nicolau <radu.nicolau@intel.com>
> ---
> v2 rework new eal io functions
>
I think you need to add into the cover letter, as suggest on V1 by
Konstantin, a bit about using WC memory for better performance.
> lib/librte_eal/include/generic/rte_io.h | 47 ++++++++++++++++++++++++++
> lib/librte_eal/x86/include/rte_io.h | 59 +++++++++++++++++++++++++++++++++
> 2 files changed, 106 insertions(+)
>
> diff --git a/lib/librte_eal/include/generic/rte_io.h b/lib/librte_eal/include/generic/rte_io.h
> index da457f7..7391782 100644
> --- a/lib/librte_eal/include/generic/rte_io.h
> +++ b/lib/librte_eal/include/generic/rte_io.h
> @@ -229,6 +229,39 @@ rte_write32(uint32_t value, volatile void *addr);
> static inline void
> rte_write64(uint64_t value, volatile void *addr);
>
> +/**
> + * Write a 32-bit value to I/O device memory address addr using write
> + * combining memory write protocol. Depending on the platform write combining
> + * may not be available and/or may be treated as a hint and the behavior may
> + * fallback to a regular store.
> + *
> + * @param value
> + * Value to write
> + * @param addr
> + * I/O memory address to write the value to
> + */
> +static inline void
> +rte_write32_wc(uint32_t value, volatile void *addr);
> +
> +/**
> + * Write a 32-bit value to I/O device memory address addr using write
> + * combining memory write protocol. Depending on the platform write combining
> + * may not be available and/or may be treated as a hint and the behavior may
> + * fallback to a regular store.
> + *
> + * The relaxed version does not have additional I/O memory barrier, useful in
> + * accessing the device registers of integrated controllers which implicitly
> + * strongly ordered with respect to memory access.
> + *
> + * @param value
> + * Value to write
> + * @param addr
> + * I/O memory address to write the value to
> + */
> +static inline void
> +rte_write32_wc_relaxed(uint32_t value, volatile void *addr);
> +
> +
> #endif /* __DOXYGEN__ */
>
> #ifndef RTE_OVERRIDE_IO_H
> @@ -345,6 +378,20 @@ rte_write64(uint64_t value, volatile void *addr)
> rte_write64_relaxed(value, addr);
> }
>
> +#ifndef RTE_NATIVE_WRITE32_WC
> +rte_write32_wc(uint32_t value, volatile void *addr)
> +{
> + rte_write32(value, addr);
> +}
> +
> +static __rte_always_inline void
> +rte_write32_wc_relaxed(uint32_t value, volatile void *addr)
> +{
> + rte_write32_relaxed(value, addr);
> +}
> +#endif /* RTE_NATIVE_WRITE32_WC */
> +
> +
> #endif /* RTE_OVERRIDE_IO_H */
>
I like this approach, since it saves duplicating the non-overridden
functions. Nice!
> #endif /* _RTE_IO_H_ */
> diff --git a/lib/librte_eal/x86/include/rte_io.h b/lib/librte_eal/x86/include/rte_io.h
> index 2db71b1..5efbf0d 100644
> --- a/lib/librte_eal/x86/include/rte_io.h
> +++ b/lib/librte_eal/x86/include/rte_io.h
> @@ -9,8 +9,67 @@
> extern "C" {
> #endif
>
> +#include "rte_cpuflags.h"
> +
> +#define RTE_NATIVE_WRITE32_WC
> #include "generic/rte_io.h"
>
> +/**
> + * @internal
> + * MOVDIRI wrapper.
> + */
> +static __rte_always_inline void
> +_rte_x86_movdiri(uint32_t value, volatile void *addr)
> +{
> + asm volatile(
> + /* MOVDIRI */
> + ".byte 0x40, 0x0f, 0x38, 0xf9, 0x02"
> + :
> + : "a" (value), "d" (addr));
> +}
> +
> +static __rte_always_inline void
> +rte_write32_wc(uint32_t value, volatile void *addr)
> +{
> + static int _x86_movdiri_flag = -1;
> + if (_x86_movdiri_flag == 1) {
> + rte_wmb();
> + _rte_x86_movdiri(value, addr);
> + } else if (_x86_movdiri_flag == 0) {
> + rte_write32(value, addr);
> + } else {
> + _x86_movdiri_flag =
> + (rte_cpu_get_flag_enabled(RTE_CPUFLAG_MOVDIRI) > 0);
> + if (_x86_movdiri_flag == 1) {
> + rte_wmb();
> + _rte_x86_movdiri(value, addr);
> + } else {
> + rte_write32(value, addr);
> + }
> + }
> +}
> +
> +static __rte_always_inline void
> +rte_write32_wc_relaxed(uint32_t value, volatile void *addr)
> +{
> + static int _x86_movdiri_flag = -1;
> + if (_x86_movdiri_flag == 1) {
> + _rte_x86_movdiri(value, addr);
> + } else if (_x86_movdiri_flag == 0) {
> + rte_write32_relaxed(value, addr);
> + } else {
> + _x86_movdiri_flag =
> + (rte_cpu_get_flag_enabled(RTE_CPUFLAG_MOVDIRI) > 0);
> + if (_x86_movdiri_flag == 1)
> + _rte_x86_movdiri(value, addr);
> + else
> + rte_write32_relaxed(value, addr);
> + }
> +}
> +
> +
> +
> +
Rather a lot of whitespace here.
> #ifdef __cplusplus
> }
> #endif
> --
> 2.7.4
With the nits called out above fixed:
Acked-by: Bruce Richardson <bruce.richarson@intel.com>
@@ -229,6 +229,39 @@ rte_write32(uint32_t value, volatile void *addr);
static inline void
rte_write64(uint64_t value, volatile void *addr);
+/**
+ * Write a 32-bit value to I/O device memory address addr using write
+ * combining memory write protocol. Depending on the platform write combining
+ * may not be available and/or may be treated as a hint and the behavior may
+ * fallback to a regular store.
+ *
+ * @param value
+ * Value to write
+ * @param addr
+ * I/O memory address to write the value to
+ */
+static inline void
+rte_write32_wc(uint32_t value, volatile void *addr);
+
+/**
+ * Write a 32-bit value to I/O device memory address addr using write
+ * combining memory write protocol. Depending on the platform write combining
+ * may not be available and/or may be treated as a hint and the behavior may
+ * fallback to a regular store.
+ *
+ * The relaxed version does not have additional I/O memory barrier, useful in
+ * accessing the device registers of integrated controllers which implicitly
+ * strongly ordered with respect to memory access.
+ *
+ * @param value
+ * Value to write
+ * @param addr
+ * I/O memory address to write the value to
+ */
+static inline void
+rte_write32_wc_relaxed(uint32_t value, volatile void *addr);
+
+
#endif /* __DOXYGEN__ */
#ifndef RTE_OVERRIDE_IO_H
@@ -345,6 +378,20 @@ rte_write64(uint64_t value, volatile void *addr)
rte_write64_relaxed(value, addr);
}
+#ifndef RTE_NATIVE_WRITE32_WC
+rte_write32_wc(uint32_t value, volatile void *addr)
+{
+ rte_write32(value, addr);
+}
+
+static __rte_always_inline void
+rte_write32_wc_relaxed(uint32_t value, volatile void *addr)
+{
+ rte_write32_relaxed(value, addr);
+}
+#endif /* RTE_NATIVE_WRITE32_WC */
+
+
#endif /* RTE_OVERRIDE_IO_H */
#endif /* _RTE_IO_H_ */
@@ -9,8 +9,67 @@
extern "C" {
#endif
+#include "rte_cpuflags.h"
+
+#define RTE_NATIVE_WRITE32_WC
#include "generic/rte_io.h"
+/**
+ * @internal
+ * MOVDIRI wrapper.
+ */
+static __rte_always_inline void
+_rte_x86_movdiri(uint32_t value, volatile void *addr)
+{
+ asm volatile(
+ /* MOVDIRI */
+ ".byte 0x40, 0x0f, 0x38, 0xf9, 0x02"
+ :
+ : "a" (value), "d" (addr));
+}
+
+static __rte_always_inline void
+rte_write32_wc(uint32_t value, volatile void *addr)
+{
+ static int _x86_movdiri_flag = -1;
+ if (_x86_movdiri_flag == 1) {
+ rte_wmb();
+ _rte_x86_movdiri(value, addr);
+ } else if (_x86_movdiri_flag == 0) {
+ rte_write32(value, addr);
+ } else {
+ _x86_movdiri_flag =
+ (rte_cpu_get_flag_enabled(RTE_CPUFLAG_MOVDIRI) > 0);
+ if (_x86_movdiri_flag == 1) {
+ rte_wmb();
+ _rte_x86_movdiri(value, addr);
+ } else {
+ rte_write32(value, addr);
+ }
+ }
+}
+
+static __rte_always_inline void
+rte_write32_wc_relaxed(uint32_t value, volatile void *addr)
+{
+ static int _x86_movdiri_flag = -1;
+ if (_x86_movdiri_flag == 1) {
+ _rte_x86_movdiri(value, addr);
+ } else if (_x86_movdiri_flag == 0) {
+ rte_write32_relaxed(value, addr);
+ } else {
+ _x86_movdiri_flag =
+ (rte_cpu_get_flag_enabled(RTE_CPUFLAG_MOVDIRI) > 0);
+ if (_x86_movdiri_flag == 1)
+ _rte_x86_movdiri(value, addr);
+ else
+ rte_write32_relaxed(value, addr);
+ }
+}
+
+
+
+
#ifdef __cplusplus
}
#endif