[dpdk-dev] [PATCH] app/crypto-perf: add minimise-offload-cost flag
Fiona Trahe
fiona.trahe at intel.com
Tue May 9 18:14:03 CEST 2017
The throughput test enqueues and dequeues bursts of operations
to the device. For software devices the full burst size will
usually be successfully en/dequeued, on hardware devices
however the CPU can call the API more frequently than necessary,
as it has nothing else to do.
Minimum offload cost is achieved when the specified
burst_size is en/dequeued. So rather than
wasting CPU cycles continually retrying, with a
fraction of the burst being en/dequeued each time,
fewer CPU cycles are used by backing off until a full
burst can be enqueued.
This patch adds a --minimise-offload-cost flag.
When set the test backs off until full bursts are
en/dequeued and counts the cycles while waiting.
These cycles represent cycles saved by
offloading, which in a real application are
available for other work. Hence these cycles are
deducted from the total cycle-count to show the
minimum offload-cost which can be acheived.
Signed-off-by: Fiona Trahe <fiona.trahe at intel.com>
---
app/test-crypto-perf/cperf_options.h | 2 +
app/test-crypto-perf/cperf_options_parsing.c | 12 +++++
app/test-crypto-perf/cperf_test_throughput.c | 76 ++++++++++++++++++++++------
3 files changed, 75 insertions(+), 15 deletions(-)
diff --git a/app/test-crypto-perf/cperf_options.h b/app/test-crypto-perf/cperf_options.h
index b928c58..48ca1de 100644
--- a/app/test-crypto-perf/cperf_options.h
+++ b/app/test-crypto-perf/cperf_options.h
@@ -31,6 +31,7 @@
#define CPERF_AUTH_DIGEST_SZ ("auth-digest-sz")
#define CPERF_AUTH_AAD_SZ ("auth-aad-sz")
#define CPERF_CSV ("csv-friendly")
+#define CPERF_MINIMISE_OFFLOAD_COST ("minimise-offload-cost")
#define MAX_LIST 32
@@ -65,6 +66,7 @@ struct cperf_options {
uint32_t out_of_place:1;
uint32_t silent:1;
uint32_t csv:1;
+ uint32_t minimise_offload_cost:1;
enum rte_crypto_cipher_algorithm cipher_algo;
enum rte_crypto_cipher_operation cipher_op;
diff --git a/app/test-crypto-perf/cperf_options_parsing.c b/app/test-crypto-perf/cperf_options_parsing.c
index d172671..c8d31ef 100644
--- a/app/test-crypto-perf/cperf_options_parsing.c
+++ b/app/test-crypto-perf/cperf_options_parsing.c
@@ -439,6 +439,15 @@ parse_silent(struct cperf_options *opts,
}
static int
+parse_minimise_offload_cost(struct cperf_options *opts,
+ const char *arg __rte_unused)
+{
+ opts->minimise_offload_cost = 1;
+
+ return 0;
+}
+
+static int
parse_cipher_algo(struct cperf_options *opts, const char *arg)
{
@@ -603,6 +612,7 @@ static struct option lgopts[] = {
{ CPERF_AUTH_DIGEST_SZ, required_argument, 0, 0 },
{ CPERF_AUTH_AAD_SZ, required_argument, 0, 0 },
{ CPERF_CSV, no_argument, 0, 0},
+ { CPERF_MINIMISE_OFFLOAD_COST, no_argument, 0, 0 },
{ NULL, 0, 0, 0 }
};
@@ -640,6 +650,7 @@ cperf_options_default(struct cperf_options *opts)
opts->sessionless = 0;
opts->out_of_place = 0;
opts->csv = 0;
+ opts->minimise_offload_cost = 0;
opts->cipher_algo = RTE_CRYPTO_CIPHER_AES_CBC;
opts->cipher_op = RTE_CRYPTO_CIPHER_OP_ENCRYPT;
@@ -681,6 +692,7 @@ cperf_opts_parse_long(int opt_idx, struct cperf_options *opts)
{ CPERF_AUTH_DIGEST_SZ, parse_auth_digest_sz },
{ CPERF_AUTH_AAD_SZ, parse_auth_aad_sz },
{ CPERF_CSV, parse_csv_friendly},
+ { CPERF_MINIMISE_OFFLOAD_COST, parse_minimise_offload_cost },
};
unsigned int i;
diff --git a/app/test-crypto-perf/cperf_test_throughput.c b/app/test-crypto-perf/cperf_test_throughput.c
index 61b27ea..17044ee 100644
--- a/app/test-crypto-perf/cperf_test_throughput.c
+++ b/app/test-crypto-perf/cperf_test_throughput.c
@@ -320,6 +320,9 @@ cperf_throughput_test_runner(void *test_ctx)
uint64_t ops_deqd = 0, ops_deqd_total = 0, ops_deqd_failed = 0;
uint64_t m_idx = 0, tsc_start, tsc_end, tsc_duration;
+ uint64_t wait_start = 0, wait_end = 0;
+ uint64_t wait_duration = 0;
+ uint32_t wait_cycles = 0;
uint16_t ops_unused = 0;
@@ -375,8 +378,13 @@ cperf_throughput_test_runner(void *test_ctx)
/* Enqueue burst of ops on crypto device */
ops_enqd = rte_cryptodev_enqueue_burst(ctx->dev_id, ctx->qp_id,
ops, burst_size);
- if (ops_enqd < burst_size)
+ if (ops_enqd < burst_size) {
ops_enqd_failed++;
+ wait_cycles += 10;
+ } else {
+ if (wait_cycles)
+ wait_cycles--;
+ }
/**
* Calculate number of ops not enqueued (mainly for hw
@@ -385,6 +393,26 @@ cperf_throughput_test_runner(void *test_ctx)
ops_unused = burst_size - ops_enqd;
ops_enqd_total += ops_enqd;
+ /**
+ * Minimum offload cost will be achieved when the
+ * specified burst_size is enqueued. Rather than
+ * wasting CPU cycles continually retrying, with a
+ * fraction of the burst being enqueued each time,
+ * back off until a full burst can be enqueued.
+ * The cycles counted here represent cycles saved by
+ * offloading, which in a real application are
+ * available for other work. Hence these cycles are
+ * deducted from the total cycle-count to show the
+ * offload cost.
+ */
+ if (ctx->options->minimise_offload_cost &&
+ wait_cycles > 30) {
+ wait_start = wait_end = rte_rdtsc_precise();
+ while ((wait_end - wait_start) < wait_cycles)
+ wait_end = rte_rdtsc_precise();
+ wait_duration += (wait_end - wait_start);
+ }
+
/* Dequeue processed burst of ops from crypto device */
ops_deqd = rte_cryptodev_dequeue_burst(ctx->dev_id, ctx->qp_id,
@@ -400,13 +428,19 @@ cperf_throughput_test_runner(void *test_ctx)
rte_crypto_op_free(ops_processed[i]);
ops_deqd_total += ops_deqd;
- } else {
+ }
+
+ if (ops_deqd != test_burst_size) {
/**
- * Count dequeue polls which didn't return any
- * processed operations. This statistic is mainly
+ * Count dequeue polls which don't return a
+ * full burst. This statistic is mainly
* relevant to hw accelerators.
*/
ops_deqd_failed++;
+ wait_cycles += 10;
+ } else {
+ if (wait_cycles)
+ wait_cycles--;
}
m_idx += ops_needed;
@@ -415,8 +449,7 @@ cperf_throughput_test_runner(void *test_ctx)
}
/* Dequeue any operations still in the crypto device */
-
- while (ops_deqd_total < ctx->options->total_ops) {
+ while (ops_deqd_total < ops_enqd_total) {
/* Sending 0 length burst to flush sw crypto device */
rte_cryptodev_enqueue_burst(ctx->dev_id, ctx->qp_id, NULL, 0);
@@ -447,18 +480,22 @@ cperf_throughput_test_runner(void *test_ctx)
/* Calculate average cycles per packet */
double cycles_per_packet = ((double)tsc_duration /
ctx->options->total_ops);
+ double available_cycles_per_packet = ((double)wait_duration /
+ ctx->options->total_ops);
if (!ctx->options->csv) {
if (!only_once)
- printf("%12s%12s%12s%12s%12s%12s%12s%12s%12s%12s\n\n",
- "lcore id", "Buf Size", "Burst Size",
+ printf(
+ "%8s%10s%6s%12s%12s%12s%12s%8s%8s%18s%10s%12s\n\n",
+ "lcore id", "Buf Size", "Burst",
"Enqueued", "Dequeued", "Failed Enq",
"Failed Deq", "MOps", "Gbps",
- "Cycles/Buf");
+ "Cycles/Buf(Total)", "(Offload)",
+ "(Available)");
only_once = 1;
- printf("%12u%12u%12u%12"PRIu64"%12"PRIu64"%12"PRIu64
- "%12"PRIu64"%12.4f%12.4f%12.2f\n",
+ printf("%8u%10u%6u%12"PRIu64"%12"PRIu64"%12"PRIu64
+ "%12"PRIu64"%8.4f%8.4f%12.0f%12.0f%12.0f\n",
ctx->lcore_id,
ctx->options->test_buffer_size,
test_burst_size,
@@ -468,17 +505,22 @@ cperf_throughput_test_runner(void *test_ctx)
ops_deqd_failed,
ops_per_second/1000000,
throughput_gbps,
- cycles_per_packet);
+ cycles_per_packet,
+ cycles_per_packet
+ - available_cycles_per_packet,
+ available_cycles_per_packet);
+
} else {
if (!only_once)
printf("# lcore id, Buffer Size(B),"
"Burst Size,Enqueued,Dequeued,Failed Enq,"
"Failed Deq,Ops(Millions),Throughput(Gbps),"
- "Cycles/Buf\n\n");
+ "Cycles/Buf(Total),(Offload),"
+ "(Available)\n\n");
only_once = 1;
printf("%10u;%10u;%u;%"PRIu64";%"PRIu64";%"PRIu64";%"PRIu64";"
- "%.f3;%.f3;%.f3\n",
+ "%.f3;%.f3;%.f3;%.f3;%.f3\n",
ctx->lcore_id,
ctx->options->test_buffer_size,
test_burst_size,
@@ -488,7 +530,11 @@ cperf_throughput_test_runner(void *test_ctx)
ops_deqd_failed,
ops_per_second/1000000,
throughput_gbps,
- cycles_per_packet);
+ cycles_per_packet,
+ cycles_per_packet
+ - available_cycles_per_packet,
+ available_cycles_per_packet);
+
}
/* Get next size from range or list */
--
2.5.0
More information about the dev
mailing list