[dpdk-dev,v3,6/7] app/crypto-perf: support multiple queue pairs

Message ID 20170922075519.28342-7-pablo.de.lara.guarch@intel.com (mailing list archive)
State Superseded, archived
Delegated to: Pablo de Lara Guarch
Headers

Checks

Context Check Description
ci/checkpatch success coding style OK
ci/Intel-compilation success Compilation OK

Commit Message

De Lara Guarch, Pablo Sept. 22, 2017, 7:55 a.m. UTC
  Add parameter "qps" in crypto performance app,
to create multiple queue pairs per device.

This new parameter is useful to have multiple logical
cores using a single crypto device, without needing
to initialize a crypto device per core.

Signed-off-by: Pablo de Lara <pablo.de.lara.guarch@intel.com>
---
 app/test-crypto-perf/cperf_options.h             |  2 +
 app/test-crypto-perf/cperf_options_parsing.c     | 22 ++++++++++
 app/test-crypto-perf/cperf_test_latency.c        | 14 +++---
 app/test-crypto-perf/cperf_test_pmd_cyclecount.c |  7 +--
 app/test-crypto-perf/cperf_test_throughput.c     | 14 +++---
 app/test-crypto-perf/cperf_test_verify.c         | 14 +++---
 app/test-crypto-perf/main.c                      | 56 ++++++++++++++----------
 doc/guides/tools/cryptoperf.rst                  |  4 ++
 8 files changed, 84 insertions(+), 49 deletions(-)
  

Comments

Akhil Goyal Sept. 26, 2017, 8:42 a.m. UTC | #1
Hi Pablo,
On 9/22/2017 1:25 PM, Pablo de Lara wrote:
> Add parameter "qps" in crypto performance app,
> to create multiple queue pairs per device.
> 
> This new parameter is useful to have multiple logical
> cores using a single crypto device, without needing
> to initialize a crypto device per core.
> 
> Signed-off-by: Pablo de Lara <pablo.de.lara.guarch@intel.com>
> ---
>   app/test-crypto-perf/cperf_options.h             |  2 +
>   app/test-crypto-perf/cperf_options_parsing.c     | 22 ++++++++++
>   app/test-crypto-perf/cperf_test_latency.c        | 14 +++---
>   app/test-crypto-perf/cperf_test_pmd_cyclecount.c |  7 +--
>   app/test-crypto-perf/cperf_test_throughput.c     | 14 +++---
>   app/test-crypto-perf/cperf_test_verify.c         | 14 +++---
>   app/test-crypto-perf/main.c                      | 56 ++++++++++++++----------
>   doc/guides/tools/cryptoperf.rst                  |  4 ++
>   8 files changed, 84 insertions(+), 49 deletions(-)
> 
> diff --git a/app/test-crypto-perf/cperf_options.h b/app/test-crypto-perf/cperf_options.h
> index 6d339f4..468d5e2 100644
> --- a/app/test-crypto-perf/cperf_options.h
> +++ b/app/test-crypto-perf/cperf_options.h
> @@ -15,6 +15,7 @@
>   #define CPERF_DESC_NB		("desc-nb")
>   
>   #define CPERF_DEVTYPE		("devtype")
> +#define CPERF_QP_NB		("qp-nb")
>   #define CPERF_OPTYPE		("optype")
>   #define CPERF_SESSIONLESS	("sessionless")
>   #define CPERF_OUT_OF_PLACE	("out-of-place")
> @@ -74,6 +75,7 @@ struct cperf_options {
>   	uint32_t segment_sz;
>   	uint32_t test_buffer_size;
>   	uint32_t nb_descriptors;
> +	uint32_t nb_qps;
>   
>   	uint32_t sessionless:1;
>   	uint32_t out_of_place:1;
> diff --git a/app/test-crypto-perf/cperf_options_parsing.c b/app/test-crypto-perf/cperf_options_parsing.c
> index 89f86a2..441cd61 100644
> --- a/app/test-crypto-perf/cperf_options_parsing.c
> +++ b/app/test-crypto-perf/cperf_options_parsing.c
> @@ -364,6 +364,24 @@ parse_desc_nb(struct cperf_options *opts, const char *arg)
>   }
>   
>   static int
> +parse_qp_nb(struct cperf_options *opts, const char *arg)
> +{
> +	int ret = parse_uint32_t(&opts->nb_qps, arg);
> +
> +	if (ret) {
> +		RTE_LOG(ERR, USER1, "failed to parse number of queue pairs\n");
> +		return -1;
> +	}
> +
> +	if ((opts->nb_qps == 0) || (opts->nb_qps > 256)) {
Shouldn't this be a macro for max nb_qps.

Also a generic comment on this patch..  Why do we need an explicit 
parameter for nb-qps. Can't we do it similar to ipsec-secgw.
It takes the devices and maps the queues with core as per the devices' 
capabilities.

-Akhil
  
De Lara Guarch, Pablo Oct. 4, 2017, 10:25 a.m. UTC | #2
Hi Akhil,

> -----Original Message-----

> From: Akhil Goyal [mailto:akhil.goyal@nxp.com]

> Sent: Tuesday, September 26, 2017 9:42 AM

> To: De Lara Guarch, Pablo <pablo.de.lara.guarch@intel.com>; Doherty,

> Declan <declan.doherty@intel.com>

> Cc: dev@dpdk.org

> Subject: Re: [PATCH v3 6/7] app/crypto-perf: support multiple queue pairs

> 

> Hi Pablo,

> On 9/22/2017 1:25 PM, Pablo de Lara wrote:

> > Add parameter "qps" in crypto performance app, to create multiple

> > queue pairs per device.

> >

> > This new parameter is useful to have multiple logical cores using a

> > single crypto device, without needing to initialize a crypto device

> > per core.

> >

> > Signed-off-by: Pablo de Lara <pablo.de.lara.guarch@intel.com>

> > ---

> >   app/test-crypto-perf/cperf_options.h             |  2 +

> >   app/test-crypto-perf/cperf_options_parsing.c     | 22 ++++++++++

> >   app/test-crypto-perf/cperf_test_latency.c        | 14 +++---

> >   app/test-crypto-perf/cperf_test_pmd_cyclecount.c |  7 +--

> >   app/test-crypto-perf/cperf_test_throughput.c     | 14 +++---

> >   app/test-crypto-perf/cperf_test_verify.c         | 14 +++---

> >   app/test-crypto-perf/main.c                      | 56 ++++++++++++++----------

> >   doc/guides/tools/cryptoperf.rst                  |  4 ++

> >   8 files changed, 84 insertions(+), 49 deletions(-)

> >

> > diff --git a/app/test-crypto-perf/cperf_options.h

> > b/app/test-crypto-perf/cperf_options.h

> > index 6d339f4..468d5e2 100644

> > --- a/app/test-crypto-perf/cperf_options.h

> > +++ b/app/test-crypto-perf/cperf_options.h

> > @@ -15,6 +15,7 @@

> >   #define CPERF_DESC_NB		("desc-nb")

> >

> >   #define CPERF_DEVTYPE		("devtype")

> > +#define CPERF_QP_NB		("qp-nb")

> >   #define CPERF_OPTYPE		("optype")

> >   #define CPERF_SESSIONLESS	("sessionless")

> >   #define CPERF_OUT_OF_PLACE	("out-of-place")

> > @@ -74,6 +75,7 @@ struct cperf_options {

> >   	uint32_t segment_sz;

> >   	uint32_t test_buffer_size;

> >   	uint32_t nb_descriptors;

> > +	uint32_t nb_qps;

> >

> >   	uint32_t sessionless:1;

> >   	uint32_t out_of_place:1;

> > diff --git a/app/test-crypto-perf/cperf_options_parsing.c

> > b/app/test-crypto-perf/cperf_options_parsing.c

> > index 89f86a2..441cd61 100644

> > --- a/app/test-crypto-perf/cperf_options_parsing.c

> > +++ b/app/test-crypto-perf/cperf_options_parsing.c

> > @@ -364,6 +364,24 @@ parse_desc_nb(struct cperf_options *opts,

> const char *arg)

> >   }

> >

> >   static int

> > +parse_qp_nb(struct cperf_options *opts, const char *arg) {

> > +	int ret = parse_uint32_t(&opts->nb_qps, arg);

> > +

> > +	if (ret) {

> > +		RTE_LOG(ERR, USER1, "failed to parse number of queue

> pairs\n");

> > +		return -1;

> > +	}

> > +

> > +	if ((opts->nb_qps == 0) || (opts->nb_qps > 256)) {

> Shouldn't this be a macro for max nb_qps.

> 

> Also a generic comment on this patch..  Why do we need an explicit

> parameter for nb-qps. Can't we do it similar to ipsec-secgw.

> It takes the devices and maps the queues with core as per the devices'

> capabilities.


I see... that looks like a good idea. I am implementing it, but will do it slightly different.
Instead of having the number of queue pairs per device equal to the number of logical cores,
I will divide the number of cores by the number of crypto devices.
So, if 4 cores are available and 2 crypto devices are used, 2 queue pairs will be set up. 

Thanks for your review,
Pablo

> 

> -Akhil
  

Patch

diff --git a/app/test-crypto-perf/cperf_options.h b/app/test-crypto-perf/cperf_options.h
index 6d339f4..468d5e2 100644
--- a/app/test-crypto-perf/cperf_options.h
+++ b/app/test-crypto-perf/cperf_options.h
@@ -15,6 +15,7 @@ 
 #define CPERF_DESC_NB		("desc-nb")
 
 #define CPERF_DEVTYPE		("devtype")
+#define CPERF_QP_NB		("qp-nb")
 #define CPERF_OPTYPE		("optype")
 #define CPERF_SESSIONLESS	("sessionless")
 #define CPERF_OUT_OF_PLACE	("out-of-place")
@@ -74,6 +75,7 @@  struct cperf_options {
 	uint32_t segment_sz;
 	uint32_t test_buffer_size;
 	uint32_t nb_descriptors;
+	uint32_t nb_qps;
 
 	uint32_t sessionless:1;
 	uint32_t out_of_place:1;
diff --git a/app/test-crypto-perf/cperf_options_parsing.c b/app/test-crypto-perf/cperf_options_parsing.c
index 89f86a2..441cd61 100644
--- a/app/test-crypto-perf/cperf_options_parsing.c
+++ b/app/test-crypto-perf/cperf_options_parsing.c
@@ -364,6 +364,24 @@  parse_desc_nb(struct cperf_options *opts, const char *arg)
 }
 
 static int
+parse_qp_nb(struct cperf_options *opts, const char *arg)
+{
+	int ret = parse_uint32_t(&opts->nb_qps, arg);
+
+	if (ret) {
+		RTE_LOG(ERR, USER1, "failed to parse number of queue pairs\n");
+		return -1;
+	}
+
+	if ((opts->nb_qps == 0) || (opts->nb_qps > 256)) {
+		RTE_LOG(ERR, USER1, "invalid number of queue pairs specified\n");
+		return -1;
+	}
+
+	return 0;
+}
+
+static int
 parse_device_type(struct cperf_options *opts, const char *arg)
 {
 	if (strlen(arg) > (sizeof(opts->device_type) - 1))
@@ -680,6 +698,7 @@  static struct option lgopts[] = {
 	{ CPERF_BUFFER_SIZE, required_argument, 0, 0 },
 	{ CPERF_SEGMENT_SIZE, required_argument, 0, 0 },
 	{ CPERF_DESC_NB, required_argument, 0, 0 },
+	{ CPERF_QP_NB, required_argument, 0, 0 },
 
 	{ CPERF_DEVTYPE, required_argument, 0, 0 },
 	{ CPERF_OPTYPE, required_argument, 0, 0 },
@@ -747,6 +766,7 @@  cperf_options_default(struct cperf_options *opts)
 
 	strncpy(opts->device_type, "crypto_aesni_mb",
 			sizeof(opts->device_type));
+	opts->nb_qps = 1;
 
 	opts->op_type = CPERF_CIPHER_THEN_AUTH;
 
@@ -789,6 +809,7 @@  cperf_opts_parse_long(int opt_idx, struct cperf_options *opts)
 		{ CPERF_BUFFER_SIZE,	parse_buffer_sz },
 		{ CPERF_SEGMENT_SIZE,	parse_segment_sz },
 		{ CPERF_DESC_NB,	parse_desc_nb },
+		{ CPERF_QP_NB,		parse_qp_nb },
 		{ CPERF_DEVTYPE,	parse_device_type },
 		{ CPERF_OPTYPE,		parse_op_type },
 		{ CPERF_SESSIONLESS,	parse_sessionless },
@@ -1032,6 +1053,7 @@  cperf_options_dump(struct cperf_options *opts)
 	printf("#\n");
 	printf("# cryptodev type: %s\n", opts->device_type);
 	printf("#\n");
+	printf("# number of queue pairs per device: %u\n", opts->nb_qps);
 	printf("# crypto operation: %s\n", cperf_op_type_strs[opts->op_type]);
 	printf("# sessionless: %s\n", opts->sessionless ? "yes" : "no");
 	printf("# out of place: %s\n", opts->out_of_place ? "yes" : "no");
diff --git a/app/test-crypto-perf/cperf_test_latency.c b/app/test-crypto-perf/cperf_test_latency.c
index acd8545..99b92d3 100644
--- a/app/test-crypto-perf/cperf_test_latency.c
+++ b/app/test-crypto-perf/cperf_test_latency.c
@@ -218,8 +218,8 @@  cperf_latency_test_constructor(struct rte_mempool *sess_mp,
 	if (ctx->sess == NULL)
 		goto err;
 
-	snprintf(pool_name, sizeof(pool_name), "cperf_pool_in_cdev_%d",
-				dev_id);
+	snprintf(pool_name, sizeof(pool_name), "cperf_pool_in_cdev_%d_qp_%d",
+			dev_id, qp_id);
 
 	uint32_t max_size = options->max_buffer_size + options->digest_sz;
 	uint16_t segments_nb = (max_size % options->segment_sz) ?
@@ -252,8 +252,8 @@  cperf_latency_test_constructor(struct rte_mempool *sess_mp,
 	if (options->out_of_place == 1)	{
 
 		snprintf(pool_name, sizeof(pool_name),
-				"cperf_pool_out_cdev_%d",
-				dev_id);
+				"cperf_pool_out_cdev_%d_qp_%d",
+				dev_id, qp_id);
 
 		ctx->pkt_mbuf_pool_out = rte_pktmbuf_pool_create(
 				pool_name, options->pool_sz, 0, 0,
@@ -281,8 +281,8 @@  cperf_latency_test_constructor(struct rte_mempool *sess_mp,
 		}
 	}
 
-	snprintf(pool_name, sizeof(pool_name), "cperf_op_pool_cdev_%d",
-			dev_id);
+	snprintf(pool_name, sizeof(pool_name), "cperf_op_pool_cdev_%d_qp_%d",
+			dev_id, qp_id);
 
 	uint16_t priv_size = RTE_ALIGN_CEIL(sizeof(struct priv_op_data) +
 			test_vector->cipher_iv.length +
@@ -583,7 +583,5 @@  cperf_latency_test_destructor(void *arg)
 	if (ctx == NULL)
 		return;
 
-	rte_cryptodev_stop(ctx->dev_id);
-
 	cperf_latency_test_free(ctx, ctx->options->pool_sz);
 }
diff --git a/app/test-crypto-perf/cperf_test_pmd_cyclecount.c b/app/test-crypto-perf/cperf_test_pmd_cyclecount.c
index 962dc69..5940836 100644
--- a/app/test-crypto-perf/cperf_test_pmd_cyclecount.c
+++ b/app/test-crypto-perf/cperf_test_pmd_cyclecount.c
@@ -239,7 +239,8 @@  cperf_pmd_cyclecount_test_constructor(struct rte_mempool *sess_mp,
 	if (ctx->sess == NULL)
 		goto err;
 
-	snprintf(pool_name, sizeof(pool_name), "cperf_pool_in_cdev_%d", dev_id);
+	snprintf(pool_name, sizeof(pool_name), "cperf_pool_in_cdev_%d_qp_%d",
+			dev_id, qp_id);
 
 	uint32_t max_size = options->max_buffer_size + options->digest_sz;
 	uint16_t segments_nb = (max_size % options->segment_sz) ?
@@ -267,8 +268,8 @@  cperf_pmd_cyclecount_test_constructor(struct rte_mempool *sess_mp,
 	}
 
 	if (options->out_of_place == 1) {
-		snprintf(pool_name, sizeof(pool_name), "cperf_pool_out_cdev_%d",
-				dev_id);
+		snprintf(pool_name, sizeof(pool_name), "cperf_pool_out_cdev_%d_qp_%d",
+				dev_id, qp_id);
 
 		ctx->pkt_mbuf_pool_out = rte_pktmbuf_pool_create(pool_name,
 				options->pool_sz, 0, 0,
diff --git a/app/test-crypto-perf/cperf_test_throughput.c b/app/test-crypto-perf/cperf_test_throughput.c
index e4da0d5..9255915 100644
--- a/app/test-crypto-perf/cperf_test_throughput.c
+++ b/app/test-crypto-perf/cperf_test_throughput.c
@@ -201,8 +201,8 @@  cperf_throughput_test_constructor(struct rte_mempool *sess_mp,
 	if (ctx->sess == NULL)
 		goto err;
 
-	snprintf(pool_name, sizeof(pool_name), "cperf_pool_in_cdev_%d",
-			dev_id);
+	snprintf(pool_name, sizeof(pool_name), "cperf_pool_in_cdev_%d_qp_%d",
+			dev_id, qp_id);
 
 	uint32_t max_size = options->max_buffer_size + options->digest_sz;
 	uint16_t segments_nb = (max_size % options->segment_sz) ?
@@ -233,8 +233,8 @@  cperf_throughput_test_constructor(struct rte_mempool *sess_mp,
 
 	if (options->out_of_place == 1)	{
 
-		snprintf(pool_name, sizeof(pool_name), "cperf_pool_out_cdev_%d",
-				dev_id);
+		snprintf(pool_name, sizeof(pool_name), "cperf_pool_out_cdev_%d_qp_%d",
+				dev_id, qp_id);
 
 		ctx->pkt_mbuf_pool_out = rte_pktmbuf_pool_create(
 				pool_name, options->pool_sz, 0, 0,
@@ -262,8 +262,8 @@  cperf_throughput_test_constructor(struct rte_mempool *sess_mp,
 		}
 	}
 
-	snprintf(pool_name, sizeof(pool_name), "cperf_op_pool_cdev_%d",
-			dev_id);
+	snprintf(pool_name, sizeof(pool_name), "cperf_op_pool_cdev_%d_qp_%d",
+			dev_id, qp_id);
 
 	uint16_t priv_size = RTE_ALIGN_CEIL(test_vector->cipher_iv.length +
 		test_vector->auth_iv.length + test_vector->aead_iv.length, 16) +
@@ -530,7 +530,5 @@  cperf_throughput_test_destructor(void *arg)
 	if (ctx == NULL)
 		return;
 
-	rte_cryptodev_stop(ctx->dev_id);
-
 	cperf_throughput_test_free(ctx, ctx->options->pool_sz);
 }
diff --git a/app/test-crypto-perf/cperf_test_verify.c b/app/test-crypto-perf/cperf_test_verify.c
index 3159361..dd97354 100644
--- a/app/test-crypto-perf/cperf_test_verify.c
+++ b/app/test-crypto-perf/cperf_test_verify.c
@@ -233,8 +233,8 @@  cperf_verify_test_constructor(struct rte_mempool *sess_mp,
 	if (ctx->sess == NULL)
 		goto err;
 
-	snprintf(pool_name, sizeof(pool_name), "cperf_pool_in_cdev_%d",
-			dev_id);
+	snprintf(pool_name, sizeof(pool_name), "cperf_pool_in_cdev_%d_qp_%d",
+			dev_id, qp_id);
 
 	uint32_t max_size = options->max_buffer_size + options->digest_sz;
 	uint16_t segments_nb = (max_size % options->segment_sz) ?
@@ -265,8 +265,8 @@  cperf_verify_test_constructor(struct rte_mempool *sess_mp,
 
 	if (options->out_of_place == 1)	{
 
-		snprintf(pool_name, sizeof(pool_name), "cperf_pool_out_cdev_%d",
-				dev_id);
+		snprintf(pool_name, sizeof(pool_name), "cperf_pool_out_cdev_%d_qp_%d",
+				dev_id, qp_id);
 
 		ctx->pkt_mbuf_pool_out = rte_pktmbuf_pool_create(
 				pool_name, options->pool_sz, 0, 0,
@@ -294,8 +294,8 @@  cperf_verify_test_constructor(struct rte_mempool *sess_mp,
 		}
 	}
 
-	snprintf(pool_name, sizeof(pool_name), "cperf_op_pool_cdev_%d",
-			dev_id);
+	snprintf(pool_name, sizeof(pool_name), "cperf_op_pool_cdev_%d_qp_%d",
+			dev_id, qp_id);
 
 	uint16_t priv_size = RTE_ALIGN_CEIL(test_vector->cipher_iv.length +
 		test_vector->auth_iv.length + test_vector->aead_iv.length, 16) +
@@ -626,7 +626,5 @@  cperf_verify_test_destructor(void *arg)
 	if (ctx == NULL)
 		return;
 
-	rte_cryptodev_stop(ctx->dev_id);
-
 	cperf_verify_test_free(ctx, ctx->options->pool_sz);
 }
diff --git a/app/test-crypto-perf/main.c b/app/test-crypto-perf/main.c
index ffa7180..97dc19c 100644
--- a/app/test-crypto-perf/main.c
+++ b/app/test-crypto-perf/main.c
@@ -90,7 +90,7 @@  cperf_initialize_cryptodev(struct cperf_options *opts, uint8_t *enabled_cdevs,
 			struct rte_mempool *session_pool_socket[])
 {
 	uint8_t enabled_cdev_count = 0, nb_lcores, cdev_id;
-	unsigned int i;
+	unsigned int i, j;
 	int ret;
 
 	enabled_cdev_count = rte_cryptodev_devices_get(opts->device_type,
@@ -125,8 +125,8 @@  cperf_initialize_cryptodev(struct cperf_options *opts, uint8_t *enabled_cdevs,
 		uint8_t socket_id = rte_cryptodev_socket_id(cdev_id);
 
 		struct rte_cryptodev_config conf = {
-				.nb_queue_pairs = 1,
-				.socket_id = socket_id
+			.nb_queue_pairs = opts->nb_qps,
+			.socket_id = socket_id
 		};
 
 		struct rte_cryptodev_qp_conf qp_conf = {
@@ -165,14 +165,16 @@  cperf_initialize_cryptodev(struct cperf_options *opts, uint8_t *enabled_cdevs,
 			return -EINVAL;
 		}
 
-		ret = rte_cryptodev_queue_pair_setup(cdev_id, 0,
+		for (j = 0; j < opts->nb_qps; j++) {
+			ret = rte_cryptodev_queue_pair_setup(cdev_id, j,
 				&qp_conf, socket_id,
 				session_pool_socket[socket_id]);
 			if (ret < 0) {
 				printf("Failed to setup queue pair %u on "
-					"cryptodev %u",	0, cdev_id);
+					"cryptodev %u",	j, cdev_id);
 				return -EINVAL;
 			}
+		}
 
 		ret = rte_cryptodev_start(cdev_id);
 		if (ret < 0) {
@@ -471,23 +473,29 @@  main(int argc, char **argv)
 	if (!opts.silent)
 		show_test_vector(t_vec);
 
+	uint16_t total_nb_qps = nb_cryptodevs * opts.nb_qps;
+
 	i = 0;
+	uint8_t qp_id = 0, cdev_index = 0;
 	RTE_LCORE_FOREACH_SLAVE(lcore_id) {
 
-		if (i == nb_cryptodevs)
+		if (i == total_nb_qps)
 			break;
 
-		cdev_id = enabled_cdevs[i];
+		cdev_id = enabled_cdevs[cdev_index];
 
 		uint8_t socket_id = rte_cryptodev_socket_id(cdev_id);
 
-		ctx[cdev_id] = cperf_testmap[opts.test].constructor(
-				session_pool_socket[socket_id], cdev_id, 0,
+		ctx[i] = cperf_testmap[opts.test].constructor(
+				session_pool_socket[socket_id], cdev_id, qp_id,
 				&opts, t_vec, &op_fns);
-		if (ctx[cdev_id] == NULL) {
+		if (ctx[i] == NULL) {
 			RTE_LOG(ERR, USER1, "Test run constructor failed\n");
 			goto err;
 		}
+		qp_id = (qp_id + 1) % opts.nb_qps;
+		if (qp_id == 0)
+			cdev_index++;
 		i++;
 	}
 
@@ -501,19 +509,17 @@  main(int argc, char **argv)
 		i = 0;
 		RTE_LCORE_FOREACH_SLAVE(lcore_id) {
 
-			if (i == nb_cryptodevs)
+			if (i == total_nb_qps)
 				break;
 
-			cdev_id = enabled_cdevs[i];
-
 			rte_eal_remote_launch(cperf_testmap[opts.test].runner,
-				ctx[cdev_id], lcore_id);
+				ctx[i], lcore_id);
 			i++;
 		}
 		i = 0;
 		RTE_LCORE_FOREACH_SLAVE(lcore_id) {
 
-			if (i == nb_cryptodevs)
+			if (i == total_nb_qps)
 				break;
 			rte_eal_wait_lcore(lcore_id);
 			i++;
@@ -532,15 +538,17 @@  main(int argc, char **argv)
 	i = 0;
 	RTE_LCORE_FOREACH_SLAVE(lcore_id) {
 
-		if (i == nb_cryptodevs)
+		if (i == total_nb_qps)
 			break;
 
-		cdev_id = enabled_cdevs[i];
-
-		cperf_testmap[opts.test].destructor(ctx[cdev_id]);
+		cperf_testmap[opts.test].destructor(ctx[i]);
 		i++;
 	}
 
+	for (i = 0; i < nb_cryptodevs &&
+			i < RTE_CRYPTO_MAX_DEVS; i++)
+		rte_cryptodev_stop(enabled_cdevs[i]);
+
 	free_test_vector(t_vec, &opts);
 
 	printf("\n");
@@ -549,16 +557,20 @@  main(int argc, char **argv)
 err:
 	i = 0;
 	RTE_LCORE_FOREACH_SLAVE(lcore_id) {
-		if (i == nb_cryptodevs)
+		if (i == total_nb_qps)
 			break;
 
 		cdev_id = enabled_cdevs[i];
 
-		if (ctx[cdev_id] && cperf_testmap[opts.test].destructor)
-			cperf_testmap[opts.test].destructor(ctx[cdev_id]);
+		if (ctx[i] && cperf_testmap[opts.test].destructor)
+			cperf_testmap[opts.test].destructor(ctx[i]);
 		i++;
 	}
 
+	for (i = 0; i < nb_cryptodevs &&
+			i < RTE_CRYPTO_MAX_DEVS; i++)
+		rte_cryptodev_stop(enabled_cdevs[i]);
+
 	free_test_vector(t_vec, &opts);
 
 	printf("\n");
diff --git a/doc/guides/tools/cryptoperf.rst b/doc/guides/tools/cryptoperf.rst
index d587c20..b114b15 100644
--- a/doc/guides/tools/cryptoperf.rst
+++ b/doc/guides/tools/cryptoperf.rst
@@ -194,6 +194,10 @@  The following are the appication command-line options:
            crypto_armv8
            crypto_scheduler
 
+* ``--qp-nb <n>``
+
+       Set the number of queue pairs per device (1 by default).
+
 * ``--optype <name>``
 
         Set operation type, where ``name`` is one of the following::