[dpdk-stable] [PATCH 3/4] crypto/mlx5: fix the queue size configuration

Raja Zidane rzidane at nvidia.com
Thu Nov 4 13:49:28 CET 2021


The DevX interface for QP creation expects the number of WQEBBs.
Wrongly, the number of descriptors was provided to the QP creation.
In addition, the QP size must be a power of 2 what was not guaranteed.
Provide the number of WQEBBs to the QP creation API.
Round up the SQ size to a power of 2.
Rename (sq/rq)_size to num_of_(send/receive)_wqes.

Fixes: 6152534e211e ("crypto/mlx5: support queue pairs operations")
Cc: stable at dpdk.org

Signed-off-by: Raja Zidane <rzidane at nvidia.com>
Acked-by: Matan Azrad <matan at nvidia.com>
---
 drivers/common/mlx5/mlx5_devx_cmds.c    |  14 +--
 drivers/common/mlx5/mlx5_devx_cmds.h    |   5 +-
 drivers/compress/mlx5/mlx5_compress.c   |   4 +-
 drivers/crypto/mlx5/mlx5_crypto.c       | 120 +++++++++++++++++++-----
 drivers/crypto/mlx5/mlx5_crypto.h       |   8 +-
 drivers/regex/mlx5/mlx5_regex_control.c |   4 +-
 drivers/vdpa/mlx5/mlx5_vdpa_event.c     |   4 +-
 7 files changed, 120 insertions(+), 39 deletions(-)

diff --git a/drivers/common/mlx5/mlx5_devx_cmds.c b/drivers/common/mlx5/mlx5_devx_cmds.c
index 802c11c0d8..05382a66b8 100644
--- a/drivers/common/mlx5/mlx5_devx_cmds.c
+++ b/drivers/common/mlx5/mlx5_devx_cmds.c
@@ -832,6 +832,7 @@ mlx5_devx_cmd_query_hca_attr(void *ctx,
 			MLX5_HCA_CAP_OPMOD_GET_CUR);
 	if (!hcattr)
 		return rc;
+	attr->max_wqe_sz_sq = MLX5_GET(cmd_hca_cap, hcattr, max_wqe_sz_sq);
 	attr->flow_counter_bulk_alloc_bitmap =
 			MLX5_GET(cmd_hca_cap, hcattr, flow_counter_bulk_alloc);
 	attr->flow_counters_dump = MLX5_GET(cmd_hca_cap, hcattr,
@@ -2099,21 +2100,22 @@ mlx5_devx_cmd_create_qp(void *ctx,
 		if (attr->log_page_size > MLX5_ADAPTER_PAGE_SHIFT)
 			MLX5_SET(qpc, qpc, log_page_size,
 				 attr->log_page_size - MLX5_ADAPTER_PAGE_SHIFT);
-		if (attr->sq_size) {
-			MLX5_ASSERT(RTE_IS_POWER_OF_2(attr->sq_size));
+		if (attr->num_of_send_wqbbs) {
+			MLX5_ASSERT(RTE_IS_POWER_OF_2(attr->num_of_send_wqbbs));
 			MLX5_SET(qpc, qpc, cqn_snd, attr->cqn);
 			MLX5_SET(qpc, qpc, log_sq_size,
-				 rte_log2_u32(attr->sq_size));
+				 rte_log2_u32(attr->num_of_send_wqbbs));
 		} else {
 			MLX5_SET(qpc, qpc, no_sq, 1);
 		}
-		if (attr->rq_size) {
-			MLX5_ASSERT(RTE_IS_POWER_OF_2(attr->rq_size));
+		if (attr->num_of_receive_wqes) {
+			MLX5_ASSERT(RTE_IS_POWER_OF_2(
+					attr->num_of_receive_wqes));
 			MLX5_SET(qpc, qpc, cqn_rcv, attr->cqn);
 			MLX5_SET(qpc, qpc, log_rq_stride, attr->log_rq_stride -
 				 MLX5_LOG_RQ_STRIDE_SHIFT);
 			MLX5_SET(qpc, qpc, log_rq_size,
-				 rte_log2_u32(attr->rq_size));
+				 rte_log2_u32(attr->num_of_receive_wqes));
 			MLX5_SET(qpc, qpc, rq_type, MLX5_NON_ZERO_RQ);
 		} else {
 			MLX5_SET(qpc, qpc, rq_type, MLX5_ZERO_LEN_RQ);
diff --git a/drivers/common/mlx5/mlx5_devx_cmds.h b/drivers/common/mlx5/mlx5_devx_cmds.h
index 2326f1e968..fdc253da00 100644
--- a/drivers/common/mlx5/mlx5_devx_cmds.h
+++ b/drivers/common/mlx5/mlx5_devx_cmds.h
@@ -247,6 +247,7 @@ struct mlx5_hca_attr {
 	uint32_t log_max_mmo_decompress:5;
 	uint32_t umr_modify_entity_size_disabled:1;
 	uint32_t umr_indirect_mkey_disabled:1;
+	uint16_t max_wqe_sz_sq;
 };
 
 /* LAG Context. */
@@ -462,9 +463,9 @@ struct mlx5_devx_qp_attr {
 	uint32_t uar_index:24;
 	uint32_t cqn:24;
 	uint32_t log_page_size:5;
-	uint32_t rq_size:17; /* Must be power of 2. */
+	uint32_t num_of_receive_wqes:17; /* Must be power of 2. */
 	uint32_t log_rq_stride:3;
-	uint32_t sq_size:17; /* Must be power of 2. */
+	uint32_t num_of_send_wqbbs:17; /* Must be power of 2. */
 	uint32_t ts_format:2;
 	uint32_t dbr_umem_valid:1;
 	uint32_t dbr_umem_id;
diff --git a/drivers/compress/mlx5/mlx5_compress.c b/drivers/compress/mlx5/mlx5_compress.c
index c4081c5f7d..6bb750781f 100644
--- a/drivers/compress/mlx5/mlx5_compress.c
+++ b/drivers/compress/mlx5/mlx5_compress.c
@@ -243,8 +243,8 @@ mlx5_compress_qp_setup(struct rte_compressdev *dev, uint16_t qp_id,
 	qp_attr.cqn = qp->cq.cq->id;
 	qp_attr.ts_format =
 		mlx5_ts_format_conv(priv->cdev->config.hca_attr.qp_ts_format);
-	qp_attr.rq_size = 0;
-	qp_attr.sq_size = RTE_BIT32(log_ops_n);
+	qp_attr.num_of_receive_wqes = 0;
+	qp_attr.num_of_send_wqbbs = RTE_BIT32(log_ops_n);
 	qp_attr.mmo = priv->mmo_decomp_qp && priv->mmo_comp_qp
 			&& priv->mmo_dma_qp;
 	ret = mlx5_devx_qp_create(priv->cdev->ctx, &qp->qp, log_ops_n, &qp_attr,
diff --git a/drivers/crypto/mlx5/mlx5_crypto.c b/drivers/crypto/mlx5/mlx5_crypto.c
index 07c2a9c68b..7931a84070 100644
--- a/drivers/crypto/mlx5/mlx5_crypto.c
+++ b/drivers/crypto/mlx5/mlx5_crypto.c
@@ -557,7 +557,7 @@ mlx5_crypto_qp_init(struct mlx5_crypto_priv *priv, struct mlx5_crypto_qp *qp)
 		ucseg->if_cf_toe_cq_res = RTE_BE32(1u << MLX5_UMRC_IF_OFFSET);
 		ucseg->mkey_mask = RTE_BE64(1u << 0); /* Mkey length bit. */
 		ucseg->ko_to_bs = rte_cpu_to_be_32
-			((RTE_ALIGN(priv->max_segs_num, 4u) <<
+			((MLX5_CRYPTO_KLM_SEGS_NUM(priv->umr_wqe_size) <<
 			 MLX5_UMRC_KO_OFFSET) | (4 << MLX5_UMRC_TO_BS_OFFSET));
 		bsf->keytag = priv->keytag;
 		/* Init RDMA WRITE WQE. */
@@ -581,7 +581,7 @@ mlx5_crypto_indirect_mkeys_prepare(struct mlx5_crypto_priv *priv,
 		.umr_en = 1,
 		.crypto_en = 1,
 		.set_remote_rw = 1,
-		.klm_num = RTE_ALIGN(priv->max_segs_num, 4),
+		.klm_num = MLX5_CRYPTO_KLM_SEGS_NUM(priv->umr_wqe_size),
 	};
 
 	for (umr = (struct mlx5_umr_wqe *)qp->qp_obj.umem_buf, i = 0;
@@ -609,6 +609,7 @@ mlx5_crypto_queue_pair_setup(struct rte_cryptodev *dev, uint16_t qp_id,
 	uint16_t log_nb_desc = rte_log2_u32(qp_conf->nb_descriptors);
 	uint32_t ret;
 	uint32_t alloc_size = sizeof(*qp);
+	uint32_t log_wqbb_n;
 	struct mlx5_devx_cq_attr cq_attr = {
 		.uar_page_id = mlx5_os_get_devx_uar_page_id(priv->uar),
 	};
@@ -631,14 +632,16 @@ mlx5_crypto_queue_pair_setup(struct rte_cryptodev *dev, uint16_t qp_id,
 		DRV_LOG(ERR, "Failed to create CQ.");
 		goto error;
 	}
+	log_wqbb_n = rte_log2_u32(RTE_BIT32(log_nb_desc) *
+				(priv->wqe_set_size / MLX5_SEND_WQE_BB));
 	attr.pd = priv->cdev->pdn;
 	attr.uar_index = mlx5_os_get_devx_uar_page_id(priv->uar);
 	attr.cqn = qp->cq_obj.cq->id;
-	attr.rq_size = 0;
-	attr.sq_size = RTE_BIT32(log_nb_desc);
+	attr.num_of_receive_wqes = 0;
+	attr.num_of_send_wqbbs = RTE_BIT32(log_wqbb_n);
 	attr.ts_format =
 		mlx5_ts_format_conv(priv->cdev->config.hca_attr.qp_ts_format);
-	ret = mlx5_devx_qp_create(priv->cdev->ctx, &qp->qp_obj, log_nb_desc,
+	ret = mlx5_devx_qp_create(priv->cdev->ctx, &qp->qp_obj, log_wqbb_n,
 				  &attr, socket_id);
 	if (ret) {
 		DRV_LOG(ERR, "Failed to create QP.");
@@ -783,10 +786,8 @@ mlx5_crypto_args_check_handler(const char *key, const char *val, void *opaque)
 		return -errno;
 	}
 	if (strcmp(key, "max_segs_num") == 0) {
-		if (!tmp || tmp > MLX5_CRYPTO_MAX_SEGS) {
-			DRV_LOG(WARNING, "Invalid max_segs_num: %d, should"
-				" be less than %d.",
-				(uint32_t)tmp, MLX5_CRYPTO_MAX_SEGS);
+		if (!tmp) {
+			DRV_LOG(ERR, "max_segs_num must be greater than 0.");
 			rte_errno = EINVAL;
 			return -rte_errno;
 		}
@@ -845,6 +846,81 @@ mlx5_crypto_parse_devargs(struct rte_devargs *devargs,
 	return 0;
 }
 
+/*
+ * Calculate UMR WQE size and RDMA Write WQE size with the
+ * following limitations:
+ *	- Each WQE size is multiple of 64.
+ *	- The summarize of both UMR WQE and RDMA_W WQE is a power of 2.
+ *	- The number of entries in the UMR WQE's KLM list is multiple of 4.
+ */
+static void
+mlx5_crypto_get_wqe_sizes(uint32_t segs_num, uint32_t *umr_size,
+			uint32_t *rdmaw_size)
+{
+	uint32_t diff, wqe_set_size;
+
+	*umr_size = MLX5_CRYPTO_UMR_WQE_STATIC_SIZE +
+			RTE_ALIGN(segs_num, 4) *
+			sizeof(struct mlx5_wqe_dseg);
+	/* Make sure UMR WQE size is multiple of WQBB. */
+	*umr_size = RTE_ALIGN(*umr_size, MLX5_SEND_WQE_BB);
+	*rdmaw_size = sizeof(struct mlx5_rdma_write_wqe) +
+			sizeof(struct mlx5_wqe_dseg) *
+			(segs_num <= 2 ? 2 : 2 +
+			RTE_ALIGN(segs_num - 2, 4));
+	/* Make sure RDMA_WRITE WQE size is multiple of WQBB. */
+	*rdmaw_size = RTE_ALIGN(*rdmaw_size, MLX5_SEND_WQE_BB);
+	wqe_set_size = *rdmaw_size + *umr_size;
+	diff = rte_align32pow2(wqe_set_size) - wqe_set_size;
+	/* Make sure wqe_set size is power of 2. */
+	if (diff)
+		*umr_size += diff;
+}
+
+static uint8_t
+mlx5_crypto_max_segs_num(uint16_t max_wqe_size)
+{
+	int klms_sizes = max_wqe_size - MLX5_CRYPTO_UMR_WQE_STATIC_SIZE;
+	uint32_t max_segs_cap = RTE_ALIGN_FLOOR(klms_sizes, MLX5_SEND_WQE_BB) /
+			sizeof(struct mlx5_wqe_dseg);
+
+	MLX5_ASSERT(klms_sizes >= MLX5_SEND_WQE_BB);
+	while (max_segs_cap) {
+		uint32_t umr_wqe_size, rdmw_wqe_size;
+
+		mlx5_crypto_get_wqe_sizes(max_segs_cap, &umr_wqe_size,
+						&rdmw_wqe_size);
+		if (umr_wqe_size <= max_wqe_size &&
+				rdmw_wqe_size <= max_wqe_size)
+			break;
+		max_segs_cap -= 4;
+	}
+	return max_segs_cap;
+}
+
+static int
+mlx5_crypto_configure_wqe_size(struct mlx5_crypto_priv *priv,
+				uint16_t max_wqe_size, uint32_t max_segs_num)
+{
+	uint32_t rdmw_wqe_size, umr_wqe_size;
+
+	mlx5_crypto_get_wqe_sizes(max_segs_num, &umr_wqe_size,
+					&rdmw_wqe_size);
+	priv->wqe_set_size = rdmw_wqe_size + umr_wqe_size;
+	if (umr_wqe_size > max_wqe_size ||
+				rdmw_wqe_size > max_wqe_size) {
+		DRV_LOG(ERR, "Invalid max_segs_num: %u. should be %u or lower.",
+			max_segs_num,
+			mlx5_crypto_max_segs_num(max_wqe_size));
+		rte_errno = EINVAL;
+		return -EINVAL;
+	}
+	priv->umr_wqe_size = (uint16_t)umr_wqe_size;
+	priv->umr_wqe_stride = priv->umr_wqe_size / MLX5_SEND_WQE_BB;
+	priv->max_rdmar_ds = rdmw_wqe_size / sizeof(struct mlx5_wqe_dseg);
+	return 0;
+}
+
 static int
 mlx5_crypto_dev_probe(struct mlx5_common_device *cdev)
 {
@@ -860,7 +936,6 @@ mlx5_crypto_dev_probe(struct mlx5_common_device *cdev)
 				RTE_CRYPTODEV_PMD_DEFAULT_MAX_NB_QUEUE_PAIRS,
 	};
 	const char *ibdev_name = mlx5_os_get_ctx_device_name(cdev->ctx);
-	uint16_t rdmw_wqe_size;
 	int ret;
 
 	if (rte_eal_process_type() != RTE_PROC_PRIMARY) {
@@ -907,20 +982,17 @@ mlx5_crypto_dev_probe(struct mlx5_common_device *cdev)
 		return -1;
 	}
 	priv->keytag = rte_cpu_to_be_64(devarg_prms.keytag);
-	priv->max_segs_num = devarg_prms.max_segs_num;
-	priv->umr_wqe_size = sizeof(struct mlx5_wqe_umr_bsf_seg) +
-			     sizeof(struct mlx5_wqe_cseg) +
-			     sizeof(struct mlx5_wqe_umr_cseg) +
-			     sizeof(struct mlx5_wqe_mkey_cseg) +
-			     RTE_ALIGN(priv->max_segs_num, 4) *
-			     sizeof(struct mlx5_wqe_dseg);
-	rdmw_wqe_size = sizeof(struct mlx5_rdma_write_wqe) +
-			      sizeof(struct mlx5_wqe_dseg) *
-			      (priv->max_segs_num <= 2 ? 2 : 2 +
-			       RTE_ALIGN(priv->max_segs_num - 2, 4));
-	priv->wqe_set_size = priv->umr_wqe_size + rdmw_wqe_size;
-	priv->umr_wqe_stride = priv->umr_wqe_size / MLX5_SEND_WQE_BB;
-	priv->max_rdmar_ds = rdmw_wqe_size / sizeof(struct mlx5_wqe_dseg);
+	ret = mlx5_crypto_configure_wqe_size(priv,
+		cdev->config.hca_attr.max_wqe_sz_sq, devarg_prms.max_segs_num);
+	if (ret) {
+		mlx5_crypto_uar_release(priv);
+		rte_cryptodev_pmd_destroy(priv->crypto_dev);
+		return -1;
+	}
+	DRV_LOG(INFO, "Max number of segments: %u.",
+		(unsigned int)RTE_MIN(
+			MLX5_CRYPTO_KLM_SEGS_NUM(priv->umr_wqe_size),
+			(uint16_t)(priv->max_rdmar_ds - 2)));
 	pthread_mutex_lock(&priv_list_lock);
 	TAILQ_INSERT_TAIL(&mlx5_crypto_priv_list, priv, next);
 	pthread_mutex_unlock(&priv_list_lock);
diff --git a/drivers/crypto/mlx5/mlx5_crypto.h b/drivers/crypto/mlx5/mlx5_crypto.h
index 69cef81d77..33f244aaf3 100644
--- a/drivers/crypto/mlx5/mlx5_crypto.h
+++ b/drivers/crypto/mlx5/mlx5_crypto.h
@@ -16,6 +16,13 @@
 
 #define MLX5_CRYPTO_DEK_HTABLE_SZ (1 << 11)
 #define MLX5_CRYPTO_KEY_LENGTH 80
+#define MLX5_CRYPTO_UMR_WQE_STATIC_SIZE (sizeof(struct mlx5_wqe_cseg) +\
+					sizeof(struct mlx5_wqe_umr_cseg) +\
+					sizeof(struct mlx5_wqe_mkey_cseg) +\
+					sizeof(struct mlx5_wqe_umr_bsf_seg))
+#define MLX5_CRYPTO_KLM_SEGS_NUM(umr_wqe_sz) ((umr_wqe_sz -\
+					MLX5_CRYPTO_UMR_WQE_STATIC_SIZE) /\
+					MLX5_WSEG_SIZE)
 
 struct mlx5_crypto_priv {
 	TAILQ_ENTRY(mlx5_crypto_priv) next;
@@ -23,7 +30,6 @@ struct mlx5_crypto_priv {
 	struct rte_cryptodev *crypto_dev;
 	void *uar; /* User Access Region. */
 	volatile uint64_t *uar_addr;
-	uint32_t max_segs_num; /* Maximum supported data segs. */
 	struct mlx5_hlist *dek_hlist; /* Dek hash list. */
 	struct rte_cryptodev_config dev_config;
 	struct mlx5_devx_obj *login_obj;
diff --git a/drivers/regex/mlx5/mlx5_regex_control.c b/drivers/regex/mlx5/mlx5_regex_control.c
index 50c966a022..4491f1c98a 100644
--- a/drivers/regex/mlx5/mlx5_regex_control.c
+++ b/drivers/regex/mlx5/mlx5_regex_control.c
@@ -150,8 +150,8 @@ regex_ctrl_create_hw_qp(struct mlx5_regex_priv *priv, struct mlx5_regex_qp *qp,
 	qp_obj->qpn = q_ind;
 	qp_obj->ci = 0;
 	qp_obj->pi = 0;
-	attr.rq_size = 0;
-	attr.sq_size = RTE_BIT32(MLX5_REGEX_WQE_LOG_NUM(priv->has_umr,
+	attr.num_of_receive_wqes = 0;
+	attr.num_of_send_wqbbs = RTE_BIT32(MLX5_REGEX_WQE_LOG_NUM(priv->has_umr,
 			log_nb_desc));
 	attr.mmo = priv->mmo_regex_qp_cap;
 	ret = mlx5_devx_qp_create(priv->cdev->ctx, &qp_obj->qp_obj,
diff --git a/drivers/vdpa/mlx5/mlx5_vdpa_event.c b/drivers/vdpa/mlx5/mlx5_vdpa_event.c
index 042d22777f..759d7633c9 100644
--- a/drivers/vdpa/mlx5/mlx5_vdpa_event.c
+++ b/drivers/vdpa/mlx5/mlx5_vdpa_event.c
@@ -608,9 +608,9 @@ mlx5_vdpa_event_qp_create(struct mlx5_vdpa_priv *priv, uint16_t desc_n,
 	}
 	attr.uar_index = priv->uar->page_id;
 	attr.cqn = eqp->cq.cq_obj.cq->id;
-	attr.rq_size = RTE_BIT32(log_desc_n);
+	attr.num_of_receive_wqes = RTE_BIT32(log_desc_n);
 	attr.log_rq_stride = rte_log2_u32(MLX5_WSEG_SIZE);
-	attr.sq_size = 0; /* No need SQ. */
+	attr.num_of_send_wqbbs = 0; /* No need SQ. */
 	attr.ts_format =
 		mlx5_ts_format_conv(priv->cdev->config.hca_attr.qp_ts_format);
 	ret = mlx5_devx_qp_create(priv->cdev->ctx, &(eqp->sw_qp), log_desc_n,
-- 
2.17.1



More information about the stable mailing list