[dpdk-dev,v2,2/2] net/mlx5: fix allocation when no memory on device NUMA node

Message ID 20180119162517.4451-2-olivier.matz@6wind.com (mailing list archive)
State Superseded, archived
Delegated to: Shahaf Shuler
Headers

Checks

Context Check Description
ci/checkpatch success coding style OK
ci/Intel-compilation fail Compilation issues

Commit Message

Olivier Matz Jan. 19, 2018, 4:25 p.m. UTC
  If there is no memory available on the same numa node than the
device, it is preferable to fallback on another socket instead
of failing.

Fixes: 1e3a39f72d5d ("net/mlx5: allocate verbs object into shared memory")
Cc: stable@dpdk.org

Signed-off-by: Olivier Matz <olivier.matz@6wind.com>
Signed-off-by: Nelio Laranjeiro <nelio.laranjeiro@6wind.com>
---

This new version of the patch was provided by Nelio (thanks), I
validated it on my platform. I just did minimal changes to fix the
checkpatch issues in the comments of mlx5.h (/** instead of /*).

 drivers/net/mlx5/mlx5.c     | 14 ++++++++++++--
 drivers/net/mlx5/mlx5.h     | 20 ++++++++++++++++++++
 drivers/net/mlx5/mlx5_rxq.c |  4 ++++
 drivers/net/mlx5/mlx5_txq.c |  4 ++++
 4 files changed, 40 insertions(+), 2 deletions(-)
  

Comments

Shahaf Shuler Jan. 21, 2018, 6:58 a.m. UTC | #1
Friday, January 19, 2018 6:25 PM, Olivier Matz:
on the same numa node than the device, it is
> preferable to fallback on another socket instead of failing.
> 
> Fixes: 1e3a39f72d5d ("net/mlx5: allocate verbs object into shared memory")
> Cc: stable@dpdk.org
> 
> Signed-off-by: Olivier Matz <olivier.matz@6wind.com>
> Signed-off-by: Nelio Laranjeiro <nelio.laranjeiro@6wind.com>
> ---
> 
> This new version of the patch was provided by Nelio (thanks), I validated it
> on my platform. I just did minimal changes to fix the checkpatch issues in the
> comments of mlx5.h (/** instead of /*).

Per my understanding the below patch is to select the socket on which to create the Verbs object based on the ethdev configuration rather than the PCI numa node.
While it introduce the infrastructure to do fallback to other socket id, it is not yet used. 
I think the commit log should be modified to better explain this patch.

> 
>  drivers/net/mlx5/mlx5.c     | 14 ++++++++++++--
>  drivers/net/mlx5/mlx5.h     | 20 ++++++++++++++++++++
>  drivers/net/mlx5/mlx5_rxq.c |  4 ++++
>  drivers/net/mlx5/mlx5_txq.c |  4 ++++
>  4 files changed, 40 insertions(+), 2 deletions(-)
> 
> diff --git a/drivers/net/mlx5/mlx5.c b/drivers/net/mlx5/mlx5.c index
> 1c95f3520..7a04ccf98 100644
> --- a/drivers/net/mlx5/mlx5.c
> +++ b/drivers/net/mlx5/mlx5.c
> @@ -139,10 +139,20 @@ mlx5_alloc_verbs_buf(size_t size, void *data)
>  	struct priv *priv = data;
>  	void *ret;
>  	size_t alignment = sysconf(_SC_PAGESIZE);
> +	unsigned int socket = SOCKET_ID_ANY;
> 
> +	if (priv->verbs_alloc_ctx.type ==
> MLX5_VERSB_ALLOC_TYPE_TX_QUEUE) {
> +		const struct mlx5_txq_ctrl *ctrl = priv->verbs_alloc_ctx.obj;
> +
> +		socket = ctrl->socket;
> +	} else if (priv->verbs_alloc_ctx.type ==
> +		   MLX5_VERSB_ALLOC_TYPE_RX_QUEUE) {
> +		const struct mlx5_rxq_ctrl *ctrl = priv->verbs_alloc_ctx.obj;
> +
> +		socket = ctrl->socket;
> +	}
>  	assert(data != NULL);
> -	ret = rte_malloc_socket(__func__, size, alignment,
> -				priv->dev->device->numa_node);
> +	ret = rte_malloc_socket(__func__, size, alignment, socket);
>  	DEBUG("Extern alloc size: %lu, align: %lu: %p", size, alignment, ret);
>  	return ret;
>  }
> diff --git a/drivers/net/mlx5/mlx5.h b/drivers/net/mlx5/mlx5.h index
> e740a4e77..abcae95b8 100644
> --- a/drivers/net/mlx5/mlx5.h
> +++ b/drivers/net/mlx5/mlx5.h
> @@ -123,6 +123,24 @@ struct mlx5_dev_config {
>  	int inline_max_packet_sz; /* Max packet size for inlining. */  };
> 
> +/**
> + * Type of objet being allocated.
> + */
> +enum mlx5_verbs_alloc_type {
> +	MLX5_VERSB_ALLOC_TYPE_NONE,
> +	MLX5_VERSB_ALLOC_TYPE_TX_QUEUE,
> +	MLX5_VERSB_ALLOC_TYPE_RX_QUEUE,
> +};
> +
> +/**
> + * Verbs allocator needs a context to know in the callback which kind
> +of
> + * resources it is allocating.
> + */
> +struct mlx5_verbs_alloc_ctx {
> +	enum mlx5_verbs_alloc_type type; /* Kind of object being allocated.
> */
> +	const void *obj; /* Pointer to the DPDK object. */ };
> +
>  struct priv {
>  	struct rte_eth_dev *dev; /* Ethernet device of master process. */
>  	struct ibv_context *ctx; /* Verbs context. */ @@ -164,6 +182,8 @@
> struct priv {
>  	int primary_socket; /* Unix socket for primary process. */
>  	struct rte_intr_handle intr_handle_socket; /* Interrupt handler. */
>  	struct mlx5_dev_config config; /* Device configuration. */
> +	struct mlx5_verbs_alloc_ctx verbs_alloc_ctx;
> +	/* Context for Verbs allocator. */
>  };
> 
>  /**
> diff --git a/drivers/net/mlx5/mlx5_rxq.c b/drivers/net/mlx5/mlx5_rxq.c
> index 950472754..a43a67526 100644
> --- a/drivers/net/mlx5/mlx5_rxq.c
> +++ b/drivers/net/mlx5/mlx5_rxq.c
> @@ -655,6 +655,8 @@ mlx5_priv_rxq_ibv_new(struct priv *priv, uint16_t
> idx)
> 
>  	assert(rxq_data);
>  	assert(!rxq_ctrl->ibv);
> +	priv->verbs_alloc_ctx.type = MLX5_VERSB_ALLOC_TYPE_RX_QUEUE;
> +	priv->verbs_alloc_ctx.obj = rxq_ctrl;
>  	tmpl = rte_calloc_socket(__func__, 1, sizeof(*tmpl), 0,
>  				 rxq_ctrl->socket);
>  	if (!tmpl) {
> @@ -818,6 +820,7 @@ mlx5_priv_rxq_ibv_new(struct priv *priv, uint16_t
> idx)
>  	DEBUG("%p: Verbs Rx queue %p: refcnt %d", (void *)priv,
>  	      (void *)tmpl, rte_atomic32_read(&tmpl->refcnt));
>  	LIST_INSERT_HEAD(&priv->rxqsibv, tmpl, next);
> +	priv->verbs_alloc_ctx.type = MLX5_VERSB_ALLOC_TYPE_NONE;
>  	return tmpl;
>  error:
>  	if (tmpl->wq)
> @@ -828,6 +831,7 @@ mlx5_priv_rxq_ibv_new(struct priv *priv, uint16_t
> idx)
>  		claim_zero(ibv_destroy_comp_channel(tmpl->channel));
>  	if (tmpl->mr)
>  		priv_mr_release(priv, tmpl->mr);
> +	priv->verbs_alloc_ctx.type = MLX5_VERSB_ALLOC_TYPE_NONE;
>  	return NULL;
>  }
> 
> diff --git a/drivers/net/mlx5/mlx5_txq.c b/drivers/net/mlx5/mlx5_txq.c
> index 26db15a4f..b43cc9ed0 100644
> --- a/drivers/net/mlx5/mlx5_txq.c
> +++ b/drivers/net/mlx5/mlx5_txq.c
> @@ -396,6 +396,8 @@ mlx5_priv_txq_ibv_new(struct priv *priv, uint16_t
> idx)
>  	int ret = 0;
> 
>  	assert(txq_data);
> +	priv->verbs_alloc_ctx.type = MLX5_VERSB_ALLOC_TYPE_TX_QUEUE;
> +	priv->verbs_alloc_ctx.obj = txq_ctrl;
>  	if (mlx5_getenv_int("MLX5_ENABLE_CQE_COMPRESSION")) {
>  		ERROR("MLX5_ENABLE_CQE_COMPRESSION must never be
> set");
>  		goto error;
> @@ -526,12 +528,14 @@ mlx5_priv_txq_ibv_new(struct priv *priv, uint16_t
> idx)
>  	DEBUG("%p: Verbs Tx queue %p: refcnt %d", (void *)priv,
>  	      (void *)txq_ibv, rte_atomic32_read(&txq_ibv->refcnt));
>  	LIST_INSERT_HEAD(&priv->txqsibv, txq_ibv, next);
> +	priv->verbs_alloc_ctx.type = MLX5_VERSB_ALLOC_TYPE_NONE;
>  	return txq_ibv;
>  error:
>  	if (tmpl.cq)
>  		claim_zero(ibv_destroy_cq(tmpl.cq));
>  	if (tmpl.qp)
>  		claim_zero(ibv_destroy_qp(tmpl.qp));
> +	priv->verbs_alloc_ctx.type = MLX5_VERSB_ALLOC_TYPE_NONE;
>  	return NULL;
>  }
> 
> --
> 2.11.0
  
Olivier Matz Jan. 22, 2018, 8:20 a.m. UTC | #2
Hi Shahaf,

On Sun, Jan 21, 2018 at 06:58:09AM +0000, Shahaf Shuler wrote:
> Friday, January 19, 2018 6:25 PM, Olivier Matz:
> on the same numa node than the device, it is
> > preferable to fallback on another socket instead of failing.
> > 
> > Fixes: 1e3a39f72d5d ("net/mlx5: allocate verbs object into shared memory")
> > Cc: stable@dpdk.org
> > 
> > Signed-off-by: Olivier Matz <olivier.matz@6wind.com>
> > Signed-off-by: Nelio Laranjeiro <nelio.laranjeiro@6wind.com>
> > ---
> > 
> > This new version of the patch was provided by Nelio (thanks), I validated it
> > on my platform. I just did minimal changes to fix the checkpatch issues in the
> > comments of mlx5.h (/** instead of /*).
> 
> Per my understanding the below patch is to select the socket on which to create the Verbs object based on the ethdev configuration rather than the PCI numa node.
> While it introduce the infrastructure to do fallback to other socket id, it is not yet used. 
> I think the commit log should be modified to better explain this patch.

That's right, the commit log should be updated, it's still the commit log
of the v1, which does not match.

> > 
> >  drivers/net/mlx5/mlx5.c     | 14 ++++++++++++--
> >  drivers/net/mlx5/mlx5.h     | 20 ++++++++++++++++++++
> >  drivers/net/mlx5/mlx5_rxq.c |  4 ++++
> >  drivers/net/mlx5/mlx5_txq.c |  4 ++++
> >  4 files changed, 40 insertions(+), 2 deletions(-)
> > 
> > diff --git a/drivers/net/mlx5/mlx5.c b/drivers/net/mlx5/mlx5.c index
> > 1c95f3520..7a04ccf98 100644
> > --- a/drivers/net/mlx5/mlx5.c
> > +++ b/drivers/net/mlx5/mlx5.c
> > @@ -139,10 +139,20 @@ mlx5_alloc_verbs_buf(size_t size, void *data)
> >  	struct priv *priv = data;
> >  	void *ret;
> >  	size_t alignment = sysconf(_SC_PAGESIZE);
> > +	unsigned int socket = SOCKET_ID_ANY;
> > 
> > +	if (priv->verbs_alloc_ctx.type ==
> > MLX5_VERSB_ALLOC_TYPE_TX_QUEUE) {

It looks we should also replace VERSB by VERBS :D

I will send a v3.
  

Patch

diff --git a/drivers/net/mlx5/mlx5.c b/drivers/net/mlx5/mlx5.c
index 1c95f3520..7a04ccf98 100644
--- a/drivers/net/mlx5/mlx5.c
+++ b/drivers/net/mlx5/mlx5.c
@@ -139,10 +139,20 @@  mlx5_alloc_verbs_buf(size_t size, void *data)
 	struct priv *priv = data;
 	void *ret;
 	size_t alignment = sysconf(_SC_PAGESIZE);
+	unsigned int socket = SOCKET_ID_ANY;
 
+	if (priv->verbs_alloc_ctx.type == MLX5_VERSB_ALLOC_TYPE_TX_QUEUE) {
+		const struct mlx5_txq_ctrl *ctrl = priv->verbs_alloc_ctx.obj;
+
+		socket = ctrl->socket;
+	} else if (priv->verbs_alloc_ctx.type ==
+		   MLX5_VERSB_ALLOC_TYPE_RX_QUEUE) {
+		const struct mlx5_rxq_ctrl *ctrl = priv->verbs_alloc_ctx.obj;
+
+		socket = ctrl->socket;
+	}
 	assert(data != NULL);
-	ret = rte_malloc_socket(__func__, size, alignment,
-				priv->dev->device->numa_node);
+	ret = rte_malloc_socket(__func__, size, alignment, socket);
 	DEBUG("Extern alloc size: %lu, align: %lu: %p", size, alignment, ret);
 	return ret;
 }
diff --git a/drivers/net/mlx5/mlx5.h b/drivers/net/mlx5/mlx5.h
index e740a4e77..abcae95b8 100644
--- a/drivers/net/mlx5/mlx5.h
+++ b/drivers/net/mlx5/mlx5.h
@@ -123,6 +123,24 @@  struct mlx5_dev_config {
 	int inline_max_packet_sz; /* Max packet size for inlining. */
 };
 
+/**
+ * Type of objet being allocated.
+ */
+enum mlx5_verbs_alloc_type {
+	MLX5_VERSB_ALLOC_TYPE_NONE,
+	MLX5_VERSB_ALLOC_TYPE_TX_QUEUE,
+	MLX5_VERSB_ALLOC_TYPE_RX_QUEUE,
+};
+
+/**
+ * Verbs allocator needs a context to know in the callback which kind of
+ * resources it is allocating.
+ */
+struct mlx5_verbs_alloc_ctx {
+	enum mlx5_verbs_alloc_type type; /* Kind of object being allocated. */
+	const void *obj; /* Pointer to the DPDK object. */
+};
+
 struct priv {
 	struct rte_eth_dev *dev; /* Ethernet device of master process. */
 	struct ibv_context *ctx; /* Verbs context. */
@@ -164,6 +182,8 @@  struct priv {
 	int primary_socket; /* Unix socket for primary process. */
 	struct rte_intr_handle intr_handle_socket; /* Interrupt handler. */
 	struct mlx5_dev_config config; /* Device configuration. */
+	struct mlx5_verbs_alloc_ctx verbs_alloc_ctx;
+	/* Context for Verbs allocator. */
 };
 
 /**
diff --git a/drivers/net/mlx5/mlx5_rxq.c b/drivers/net/mlx5/mlx5_rxq.c
index 950472754..a43a67526 100644
--- a/drivers/net/mlx5/mlx5_rxq.c
+++ b/drivers/net/mlx5/mlx5_rxq.c
@@ -655,6 +655,8 @@  mlx5_priv_rxq_ibv_new(struct priv *priv, uint16_t idx)
 
 	assert(rxq_data);
 	assert(!rxq_ctrl->ibv);
+	priv->verbs_alloc_ctx.type = MLX5_VERSB_ALLOC_TYPE_RX_QUEUE;
+	priv->verbs_alloc_ctx.obj = rxq_ctrl;
 	tmpl = rte_calloc_socket(__func__, 1, sizeof(*tmpl), 0,
 				 rxq_ctrl->socket);
 	if (!tmpl) {
@@ -818,6 +820,7 @@  mlx5_priv_rxq_ibv_new(struct priv *priv, uint16_t idx)
 	DEBUG("%p: Verbs Rx queue %p: refcnt %d", (void *)priv,
 	      (void *)tmpl, rte_atomic32_read(&tmpl->refcnt));
 	LIST_INSERT_HEAD(&priv->rxqsibv, tmpl, next);
+	priv->verbs_alloc_ctx.type = MLX5_VERSB_ALLOC_TYPE_NONE;
 	return tmpl;
 error:
 	if (tmpl->wq)
@@ -828,6 +831,7 @@  mlx5_priv_rxq_ibv_new(struct priv *priv, uint16_t idx)
 		claim_zero(ibv_destroy_comp_channel(tmpl->channel));
 	if (tmpl->mr)
 		priv_mr_release(priv, tmpl->mr);
+	priv->verbs_alloc_ctx.type = MLX5_VERSB_ALLOC_TYPE_NONE;
 	return NULL;
 }
 
diff --git a/drivers/net/mlx5/mlx5_txq.c b/drivers/net/mlx5/mlx5_txq.c
index 26db15a4f..b43cc9ed0 100644
--- a/drivers/net/mlx5/mlx5_txq.c
+++ b/drivers/net/mlx5/mlx5_txq.c
@@ -396,6 +396,8 @@  mlx5_priv_txq_ibv_new(struct priv *priv, uint16_t idx)
 	int ret = 0;
 
 	assert(txq_data);
+	priv->verbs_alloc_ctx.type = MLX5_VERSB_ALLOC_TYPE_TX_QUEUE;
+	priv->verbs_alloc_ctx.obj = txq_ctrl;
 	if (mlx5_getenv_int("MLX5_ENABLE_CQE_COMPRESSION")) {
 		ERROR("MLX5_ENABLE_CQE_COMPRESSION must never be set");
 		goto error;
@@ -526,12 +528,14 @@  mlx5_priv_txq_ibv_new(struct priv *priv, uint16_t idx)
 	DEBUG("%p: Verbs Tx queue %p: refcnt %d", (void *)priv,
 	      (void *)txq_ibv, rte_atomic32_read(&txq_ibv->refcnt));
 	LIST_INSERT_HEAD(&priv->txqsibv, txq_ibv, next);
+	priv->verbs_alloc_ctx.type = MLX5_VERSB_ALLOC_TYPE_NONE;
 	return txq_ibv;
 error:
 	if (tmpl.cq)
 		claim_zero(ibv_destroy_cq(tmpl.cq));
 	if (tmpl.qp)
 		claim_zero(ibv_destroy_qp(tmpl.qp));
+	priv->verbs_alloc_ctx.type = MLX5_VERSB_ALLOC_TYPE_NONE;
 	return NULL;
 }