[3/3] common/mlx5: disable relaxed ordering in unsuitable cpus

Message ID 1589286106-23411-4-git-send-email-shirik@mellanox.com (mailing list archive)
State Accepted, archived
Delegated to: Raslan Darawsheh
Headers
Series mlx5: relaxed ordering fixes |

Checks

Context Check Description
ci/checkpatch success coding style OK
ci/travis-robot warning Travis build: failed
ci/Intel-compilation success Compilation OK

Commit Message

Shiri Kuzin May 12, 2020, 12:21 p.m. UTC
  Relaxed ordering is a PCI optimization that enables reordering
reads/writes in order to improve performance.

Relaxed ordering was enabled for all processors causing
a degradation in performance in Haswell and Broadwell processors
that don't support this optimization.

In order to avoid that we check if the processor is Haswell
or Broadwell and if so we disable relaxed ordering.

Signed-off-by: Shiri Kuzin <shirik@mellanox.com>
---
 drivers/common/mlx5/mlx5_common.c    | 82 ++++++++++++++++++++++++++++++++++++
 drivers/common/mlx5/mlx5_common.h    |  2 +
 drivers/common/mlx5/mlx5_common_mr.c |  6 ++-
 drivers/net/mlx5/mlx5_flow_dv.c      |  3 +-
 4 files changed, 90 insertions(+), 3 deletions(-)
  

Patch

diff --git a/drivers/common/mlx5/mlx5_common.c b/drivers/common/mlx5/mlx5_common.c
index 4261045..1c77763 100644
--- a/drivers/common/mlx5/mlx5_common.c
+++ b/drivers/common/mlx5/mlx5_common.c
@@ -22,6 +22,8 @@ 
 const struct mlx5_glue *mlx5_glue;
 #endif
 
+uint8_t haswell_broadwell_cpu;
+
 /**
  * Get PCI information by sysfs device path.
  *
@@ -292,6 +294,29 @@  enum mlx5_class
 
 #endif
 
+/* In case this is an x86_64 intel processor to check if
+ * we should use relaxed ordering.
+ */
+#ifdef RTE_ARCH_X86_64
+/**
+ * This function returns processor identification and feature information
+ * into the registers.
+ *
+ * @param eax, ebx, ecx, edx
+ *		Pointers to the registers that will hold cpu information.
+ * @param level
+ *		The main category of information returned.
+ */
+static inline void mlx5_cpu_id(unsigned int level,
+				unsigned int *eax, unsigned int *ebx,
+				unsigned int *ecx, unsigned int *edx)
+{
+	__asm__("cpuid\n\t"
+		: "=a" (*eax), "=b" (*ebx), "=c" (*ecx), "=d" (*edx)
+		: "0" (level));
+}
+#endif
+
 RTE_INIT_PRIO(mlx5_log_init, LOG)
 {
 	mlx5_common_logtype = rte_log_register("pmd.common.mlx5");
@@ -350,3 +375,60 @@  enum mlx5_class
 	mlx5_glue = NULL;
 	return;
 }
+
+/**
+ * This function is responsible of initializing the variable
+ *  haswell_broadwell_cpu by checking if the cpu is intel
+ *  and reading the data returned from mlx5_cpu_id().
+ *  since haswell and broadwell cpus don't have improved performance
+ *  when using relaxed ordering we want to check the cpu type before
+ *  before deciding whether to enable RO or not.
+ *  if the cpu is haswell or broadwell the variable will be set to 1
+ *  otherwise it will be 0.
+ */
+RTE_INIT_PRIO(mlx5_is_haswell_broadwell_cpu, LOG)
+{
+#ifdef RTE_ARCH_X86_64
+	unsigned int broadwell_models[4] = {0x3d, 0x47, 0x4F, 0x56};
+	unsigned int haswell_models[4] = {0x3c, 0x3f, 0x45, 0x46};
+	unsigned int i, model, family, brand_id, vendor;
+	unsigned int signature_intel_ebx = 0x756e6547;
+	unsigned int extended_model;
+	unsigned int eax = 0;
+	unsigned int ebx = 0;
+	unsigned int ecx = 0;
+	unsigned int edx = 0;
+	int max_level;
+
+	mlx5_cpu_id(0, &eax, &ebx, &ecx, &edx);
+	vendor = ebx;
+	max_level = eax;
+	if (max_level < 1) {
+		haswell_broadwell_cpu = 0;
+		return;
+	}
+	mlx5_cpu_id(1, &eax, &ebx, &ecx, &edx);
+	model = (eax >> 4) & 0x0f;
+	family = (eax >> 8) & 0x0f;
+	brand_id = ebx & 0xff;
+	extended_model = (eax >> 12) & 0xf0;
+	/* Check if the processor is Haswell or Broadwell */
+	if (vendor == signature_intel_ebx) {
+		if (family == 0x06)
+			model += extended_model;
+		if (brand_id == 0 && family == 0x6) {
+			for (i = 0; i < RTE_DIM(broadwell_models); i++)
+				if (model == broadwell_models[i]) {
+					haswell_broadwell_cpu = 1;
+					return;
+				}
+			for (i = 0; i < RTE_DIM(haswell_models); i++)
+				if (model == haswell_models[i]) {
+					haswell_broadwell_cpu = 1;
+					return;
+				}
+		}
+	}
+#endif
+	haswell_broadwell_cpu = 0;
+}
diff --git a/drivers/common/mlx5/mlx5_common.h b/drivers/common/mlx5/mlx5_common.h
index b37b820..8cd3ea5 100644
--- a/drivers/common/mlx5/mlx5_common.h
+++ b/drivers/common/mlx5/mlx5_common.h
@@ -213,4 +213,6 @@  enum mlx5_class {
 void mlx5_translate_port_name(const char *port_name_in,
 			      struct mlx5_switch_info *port_info_out);
 
+extern uint8_t haswell_broadwell_cpu;
+
 #endif /* RTE_PMD_MLX5_COMMON_H_ */
diff --git a/drivers/common/mlx5/mlx5_common_mr.c b/drivers/common/mlx5/mlx5_common_mr.c
index 9d4a06d..3b46446 100644
--- a/drivers/common/mlx5/mlx5_common_mr.c
+++ b/drivers/common/mlx5/mlx5_common_mr.c
@@ -770,7 +770,8 @@  struct mlx5_mr *
 	 */
 	mr->ibv_mr = mlx5_glue->reg_mr(pd, (void *)data.start, len,
 				       IBV_ACCESS_LOCAL_WRITE |
-					   IBV_ACCESS_RELAXED_ORDERING);
+				       (haswell_broadwell_cpu ? 0 :
+				       IBV_ACCESS_RELAXED_ORDERING));
 	if (mr->ibv_mr == NULL) {
 		DEBUG("Fail to create a verbs MR for address (%p)",
 		      (void *)addr);
@@ -1045,7 +1046,8 @@  struct mlx5_mr *
 		return NULL;
 	mr->ibv_mr = mlx5_glue->reg_mr(pd, (void *)addr, len,
 				       IBV_ACCESS_LOCAL_WRITE |
-					   IBV_ACCESS_RELAXED_ORDERING);
+				       (haswell_broadwell_cpu ? 0 :
+				       IBV_ACCESS_RELAXED_ORDERING));
 	if (mr->ibv_mr == NULL) {
 		DRV_LOG(WARNING,
 			"Fail to create a verbs MR for address (%p)",
diff --git a/drivers/net/mlx5/mlx5_flow_dv.c b/drivers/net/mlx5/mlx5_flow_dv.c
index c7702c5..8b018fb 100644
--- a/drivers/net/mlx5/mlx5_flow_dv.c
+++ b/drivers/net/mlx5/mlx5_flow_dv.c
@@ -4134,7 +4134,8 @@  struct field_modify_info modify_tcp[] = {
 	mkey_attr.klm_array = NULL;
 	mkey_attr.klm_num = 0;
 	if (priv->config.hca_attr.relaxed_ordering_write &&
-		priv->config.hca_attr.relaxed_ordering_read)
+		priv->config.hca_attr.relaxed_ordering_read  &&
+		!haswell_broadwell_cpu)
 		mkey_attr.relaxed_ordering = 1;
 	mem_mng->dm = mlx5_devx_cmd_mkey_create(sh->ctx, &mkey_attr);
 	if (!mem_mng->dm) {