[dpdk-stable] [PATCH 04/67] net/mlx5: fix allocation when no memory on device NUMA node

Yongseok Koh yskoh at mellanox.com
Tue Jun 5 02:10:26 CEST 2018


From: Olivier Matz <olivier.matz at 6wind.com>

[ backported from upstream commit d10b09db0a455a27f1f1fa5e4a0fa7b1350706a5 ]

When no memory is available on the same numa node than the device, the
initialization of the device fails. However, the use case where the
cores and memory are on a different socket than the device is valid,
even if not optimal.

To fix this issue, this commit introduces an infrastructure to select
the socket on which to allocate the verbs objects based on the ethdev
configuration and the object type, rather than the PCI numa node.

Fixes: 1e3a39f72d5d ("net/mlx5: allocate verbs object into shared memory")
Cc: stable at dpdk.org

Signed-off-by: Olivier Matz <olivier.matz at 6wind.com>
Signed-off-by: Nelio Laranjeiro <nelio.laranjeiro at 6wind.com>
---
 drivers/net/mlx5/mlx5.c     | 14 ++++++++++++--
 drivers/net/mlx5/mlx5.h     | 20 ++++++++++++++++++++
 drivers/net/mlx5/mlx5_rxq.c |  4 ++++
 drivers/net/mlx5/mlx5_txq.c |  4 ++++
 4 files changed, 40 insertions(+), 2 deletions(-)

diff --git a/drivers/net/mlx5/mlx5.c b/drivers/net/mlx5/mlx5.c
index 7f44bb169..16ba5b724 100644
--- a/drivers/net/mlx5/mlx5.c
+++ b/drivers/net/mlx5/mlx5.c
@@ -156,10 +156,20 @@ mlx5_alloc_verbs_buf(size_t size, void *data)
 	struct priv *priv = data;
 	void *ret;
 	size_t alignment = sysconf(_SC_PAGESIZE);
+	unsigned int socket = SOCKET_ID_ANY;
 
+	if (priv->verbs_alloc_ctx.type == MLX5_VERBS_ALLOC_TYPE_TX_QUEUE) {
+		const struct mlx5_txq_ctrl *ctrl = priv->verbs_alloc_ctx.obj;
+
+		socket = ctrl->socket;
+	} else if (priv->verbs_alloc_ctx.type ==
+		   MLX5_VERBS_ALLOC_TYPE_RX_QUEUE) {
+		const struct mlx5_rxq_ctrl *ctrl = priv->verbs_alloc_ctx.obj;
+
+		socket = ctrl->socket;
+	}
 	assert(data != NULL);
-	ret = rte_malloc_socket(__func__, size, alignment,
-				priv->dev->device->numa_node);
+	ret = rte_malloc_socket(__func__, size, alignment, socket);
 	DEBUG("Extern alloc size: %lu, align: %lu: %p", size, alignment, ret);
 	return ret;
 }
diff --git a/drivers/net/mlx5/mlx5.h b/drivers/net/mlx5/mlx5.h
index d49595bc9..004e83670 100644
--- a/drivers/net/mlx5/mlx5.h
+++ b/drivers/net/mlx5/mlx5.h
@@ -90,6 +90,24 @@ struct mlx5_xstats_ctrl {
 /* Flow list . */
 TAILQ_HEAD(mlx5_flows, rte_flow);
 
+/**
+ * Type of objet being allocated.
+ */
+enum mlx5_verbs_alloc_type {
+	MLX5_VERBS_ALLOC_TYPE_NONE,
+	MLX5_VERBS_ALLOC_TYPE_TX_QUEUE,
+	MLX5_VERBS_ALLOC_TYPE_RX_QUEUE,
+};
+
+/**
+ * Verbs allocator needs a context to know in the callback which kind of
+ * resources it is allocating.
+ */
+struct mlx5_verbs_alloc_ctx {
+	enum mlx5_verbs_alloc_type type; /* Kind of object being allocated. */
+	const void *obj; /* Pointer to the DPDK object. */
+};
+
 struct priv {
 	struct rte_eth_dev *dev; /* Ethernet device of master process. */
 	struct ibv_context *ctx; /* Verbs context. */
@@ -149,6 +167,8 @@ struct priv {
 	rte_spinlock_t lock; /* Lock for control functions. */
 	int primary_socket; /* Unix socket for primary process. */
 	struct rte_intr_handle intr_handle_socket; /* Interrupt handler. */
+	struct mlx5_verbs_alloc_ctx verbs_alloc_ctx;
+	/* Context for Verbs allocator. */
 };
 
 /**
diff --git a/drivers/net/mlx5/mlx5_rxq.c b/drivers/net/mlx5/mlx5_rxq.c
index a4cdd374a..457f919b4 100644
--- a/drivers/net/mlx5/mlx5_rxq.c
+++ b/drivers/net/mlx5/mlx5_rxq.c
@@ -573,6 +573,8 @@ mlx5_priv_rxq_ibv_new(struct priv *priv, uint16_t idx)
 
 	assert(rxq_data);
 	assert(!rxq_ctrl->ibv);
+	priv->verbs_alloc_ctx.type = MLX5_VERBS_ALLOC_TYPE_RX_QUEUE;
+	priv->verbs_alloc_ctx.obj = rxq_ctrl;
 	tmpl = rte_calloc_socket(__func__, 1, sizeof(*tmpl), 0,
 				 rxq_ctrl->socket);
 	if (!tmpl) {
@@ -736,6 +738,7 @@ mlx5_priv_rxq_ibv_new(struct priv *priv, uint16_t idx)
 	DEBUG("%p: Verbs Rx queue %p: refcnt %d", (void *)priv,
 	      (void *)tmpl, rte_atomic32_read(&tmpl->refcnt));
 	LIST_INSERT_HEAD(&priv->rxqsibv, tmpl, next);
+	priv->verbs_alloc_ctx.type = MLX5_VERBS_ALLOC_TYPE_NONE;
 	return tmpl;
 error:
 	if (tmpl->wq)
@@ -746,6 +749,7 @@ mlx5_priv_rxq_ibv_new(struct priv *priv, uint16_t idx)
 		claim_zero(ibv_destroy_comp_channel(tmpl->channel));
 	if (tmpl->mr)
 		priv_mr_release(priv, tmpl->mr);
+	priv->verbs_alloc_ctx.type = MLX5_VERBS_ALLOC_TYPE_NONE;
 	return NULL;
 }
 
diff --git a/drivers/net/mlx5/mlx5_txq.c b/drivers/net/mlx5/mlx5_txq.c
index 7ca99f5aa..12f258bbb 100644
--- a/drivers/net/mlx5/mlx5_txq.c
+++ b/drivers/net/mlx5/mlx5_txq.c
@@ -307,6 +307,8 @@ mlx5_priv_txq_ibv_new(struct priv *priv, uint16_t idx)
 	int ret = 0;
 
 	assert(txq_data);
+	priv->verbs_alloc_ctx.type = MLX5_VERBS_ALLOC_TYPE_TX_QUEUE;
+	priv->verbs_alloc_ctx.obj = txq_ctrl;
 	if (mlx5_getenv_int("MLX5_ENABLE_CQE_COMPRESSION")) {
 		ERROR("MLX5_ENABLE_CQE_COMPRESSION must never be set");
 		goto error;
@@ -435,12 +437,14 @@ mlx5_priv_txq_ibv_new(struct priv *priv, uint16_t idx)
 	DEBUG("%p: Verbs Tx queue %p: refcnt %d", (void *)priv,
 	      (void *)txq_ibv, rte_atomic32_read(&txq_ibv->refcnt));
 	LIST_INSERT_HEAD(&priv->txqsibv, txq_ibv, next);
+	priv->verbs_alloc_ctx.type = MLX5_VERBS_ALLOC_TYPE_NONE;
 	return txq_ibv;
 error:
 	if (tmpl.cq)
 		claim_zero(ibv_destroy_cq(tmpl.cq));
 	if (tmpl.qp)
 		claim_zero(ibv_destroy_qp(tmpl.qp));
+	priv->verbs_alloc_ctx.type = MLX5_VERBS_ALLOC_TYPE_NONE;
 	return NULL;
 }
 
-- 
2.11.0



More information about the stable mailing list