[v1,09/12] app/compress: use compiler atomic builtins for display sync
Checks
Commit Message
Convert rte_atomic_test_and_set usage to compiler atomic
CAS operation for display sync.
Signed-off-by: Joyce Kong <joyce.kong@arm.com>
Reviewed-by: Ruifeng Wang <ruifeng.wang@arm.com>
---
app/test-compress-perf/comp_perf_test_common.h | 2 +-
app/test-compress-perf/comp_perf_test_cyclecount.c | 10 +++++++---
app/test-compress-perf/comp_perf_test_throughput.c | 11 ++++++++---
app/test-compress-perf/comp_perf_test_verify.c | 6 ++++--
4 files changed, 20 insertions(+), 9 deletions(-)
Comments
<snip>
>
> Convert rte_atomic_test_and_set usage to compiler atomic CAS operation for
> display sync.
>
> Signed-off-by: Joyce Kong <joyce.kong@arm.com>
> Reviewed-by: Ruifeng Wang <ruifeng.wang@arm.com>
> ---
> app/test-compress-perf/comp_perf_test_common.h | 2 +-
> app/test-compress-perf/comp_perf_test_cyclecount.c | 10 +++++++---
> app/test-compress-perf/comp_perf_test_throughput.c | 11 ++++++++---
> app/test-compress-perf/comp_perf_test_verify.c | 6 ++++--
> 4 files changed, 20 insertions(+), 9 deletions(-)
>
> diff --git a/app/test-compress-perf/comp_perf_test_common.h b/app/test-
> compress-perf/comp_perf_test_common.h
> index 72705c6a2b..d039e5a29a 100644
> --- a/app/test-compress-perf/comp_perf_test_common.h
> +++ b/app/test-compress-perf/comp_perf_test_common.h
> @@ -14,7 +14,7 @@ struct cperf_mem_resources {
> uint16_t qp_id;
> uint8_t lcore_id;
>
> - rte_atomic16_t print_info_once;
> + uint16_t print_info_once;
>
> uint32_t total_bufs;
> uint8_t *compressed_data;
> diff --git a/app/test-compress-perf/comp_perf_test_cyclecount.c b/app/test-
> compress-perf/comp_perf_test_cyclecount.c
> index 55559a7d5a..e002e53bdf 100644
> --- a/app/test-compress-perf/comp_perf_test_cyclecount.c
> +++ b/app/test-compress-perf/comp_perf_test_cyclecount.c
> @@ -468,7 +468,7 @@ cperf_cyclecount_test_runner(void *test_ctx)
> struct cperf_cyclecount_ctx *ctx = test_ctx;
> struct comp_test_data *test_data = ctx->ver.options;
> uint32_t lcore = rte_lcore_id();
> - static rte_atomic16_t display_once = RTE_ATOMIC16_INIT(0);
> + static uint16_t display_once;
> static rte_spinlock_t print_spinlock;
> int i;
>
> @@ -488,10 +488,12 @@ cperf_cyclecount_test_runner(void *test_ctx)
>
> ctx->ver.mem.lcore_id = lcore;
>
> + uint16_t exp = 0;
> /*
> * printing information about current compression thread
> */
> - if (rte_atomic16_test_and_set(&ctx->ver.mem.print_info_once))
> + if (__atomic_compare_exchange_n(&ctx->ver.mem.print_info_once,
> &exp,
> + 1, 0, __ATOMIC_RELAXED,
> __ATOMIC_RELAXED))
> printf(" lcore: %u,"
> " driver name: %s,"
> " device name: %s,"
> @@ -547,8 +549,10 @@ cperf_cyclecount_test_runner(void *test_ctx)
> duration_setup_per_op = ctx->duration_op /
> (ctx->ver.mem.total_bufs * test_data->num_iter);
>
> + exp = 0;
> /* R E P O R T processing */
> - if (rte_atomic16_test_and_set(&display_once)) {
> + if (__atomic_compare_exchange_n(&display_once, &exp, 1, 0,
> + __ATOMIC_RELAXED, __ATOMIC_RELAXED)) {
I think we can re-organize the current code which will avoid using atomic operations on 'display_once'. How about the following code?
rte_spinlock_lock(&print_spinlock);
if (display_once != 1) {
display_once = 1;
printf("\nLegend for the table\n".....
<all other printfs>
}
printf....
printf....
printf....
rte_spinlock_unlock(&print_spinlock);
Rest of the changes look fine.
>
> rte_spinlock_lock(&print_spinlock);
>
> diff --git a/app/test-compress-perf/comp_perf_test_throughput.c b/app/test-
> compress-perf/comp_perf_test_throughput.c
> index 13922b658c..f587ad2ec3 100644
> --- a/app/test-compress-perf/comp_perf_test_throughput.c
> +++ b/app/test-compress-perf/comp_perf_test_throughput.c
> @@ -329,15 +329,18 @@ cperf_throughput_test_runner(void *test_ctx)
> struct cperf_benchmark_ctx *ctx = test_ctx;
> struct comp_test_data *test_data = ctx->ver.options;
> uint32_t lcore = rte_lcore_id();
> - static rte_atomic16_t display_once = RTE_ATOMIC16_INIT(0);
> + static uint16_t display_once;
> int i, ret = EXIT_SUCCESS;
>
> ctx->ver.mem.lcore_id = lcore;
>
> + uint16_t exp = 0;
> /*
> * printing information about current compression thread
> */
> - if (rte_atomic16_test_and_set(&ctx->ver.mem.print_info_once))
> + if (__atomic_compare_exchange_n(&ctx->ver.mem.print_info_once,
> &exp,
> + 1, 0, __ATOMIC_RELAXED,
> __ATOMIC_RELAXED))
> +
> printf(" lcore: %u,"
> " driver name: %s,"
> " device name: %s,"
> @@ -391,7 +394,9 @@ cperf_throughput_test_runner(void *test_ctx)
> ctx->decomp_gbps = rte_get_tsc_hz() / ctx->decomp_tsc_byte * 8 /
> 1000000000;
>
> - if (rte_atomic16_test_and_set(&display_once)) {
> + exp = 0;
> + if (__atomic_compare_exchange_n(&display_once, &exp, 1, 0,
> + __ATOMIC_RELAXED, __ATOMIC_RELAXED)) {
> printf("\n%12s%6s%12s%17s%15s%16s\n",
> "lcore id", "Level", "Comp size", "Comp ratio [%]",
> "Comp [Gbps]", "Decomp [Gbps]");
> diff --git a/app/test-compress-perf/comp_perf_test_verify.c b/app/test-
> compress-perf/comp_perf_test_verify.c
> index 5e13257b79..6a2497985b 100644
> --- a/app/test-compress-perf/comp_perf_test_verify.c
> +++ b/app/test-compress-perf/comp_perf_test_verify.c
> @@ -388,7 +388,7 @@ cperf_verify_test_runner(void *test_ctx)
> struct cperf_verify_ctx *ctx = test_ctx;
> struct comp_test_data *test_data = ctx->options;
> int ret = EXIT_SUCCESS;
> - static rte_atomic16_t display_once = RTE_ATOMIC16_INIT(0);
> + static uint16_t display_once;
> uint32_t lcore = rte_lcore_id();
>
> ctx->mem.lcore_id = lcore;
> @@ -428,7 +428,9 @@ cperf_verify_test_runner(void *test_ctx)
> test_data->input_data_sz * 100;
>
> if (!ctx->silent) {
> - if (rte_atomic16_test_and_set(&display_once)) {
> + uint16_t exp = 0;
> + if (__atomic_compare_exchange_n(&display_once, &exp, 1,
> 0,
> + __ATOMIC_RELAXED, __ATOMIC_RELAXED)) {
> printf("%12s%6s%12s%17s\n",
> "lcore id", "Level", "Comp size", "Comp ratio [%]");
> }
> --
> 2.17.1
> <snip>
>
> >
> > + exp = 0;
> > /* R E P O R T processing */
> > - if (rte_atomic16_test_and_set(&display_once)) {
> > + if (__atomic_compare_exchange_n(&display_once, &exp, 1, 0,
> > + __ATOMIC_RELAXED, __ATOMIC_RELAXED)) {
> I think we can re-organize the current code which will avoid using atomic
> operations on 'display_once'. How about the following code?
>
> rte_spinlock_lock(&print_spinlock);
> if (display_once != 1) {
> display_once = 1;
> printf("\nLegend for the table\n".....
> <all other printfs>
> }
> printf....
> printf....
> printf....
> rte_spinlock_unlock(&print_spinlock);
>
> Rest of the changes look fine.
>
Will send out the next version with the changes.
@@ -14,7 +14,7 @@ struct cperf_mem_resources {
uint16_t qp_id;
uint8_t lcore_id;
- rte_atomic16_t print_info_once;
+ uint16_t print_info_once;
uint32_t total_bufs;
uint8_t *compressed_data;
@@ -468,7 +468,7 @@ cperf_cyclecount_test_runner(void *test_ctx)
struct cperf_cyclecount_ctx *ctx = test_ctx;
struct comp_test_data *test_data = ctx->ver.options;
uint32_t lcore = rte_lcore_id();
- static rte_atomic16_t display_once = RTE_ATOMIC16_INIT(0);
+ static uint16_t display_once;
static rte_spinlock_t print_spinlock;
int i;
@@ -488,10 +488,12 @@ cperf_cyclecount_test_runner(void *test_ctx)
ctx->ver.mem.lcore_id = lcore;
+ uint16_t exp = 0;
/*
* printing information about current compression thread
*/
- if (rte_atomic16_test_and_set(&ctx->ver.mem.print_info_once))
+ if (__atomic_compare_exchange_n(&ctx->ver.mem.print_info_once, &exp,
+ 1, 0, __ATOMIC_RELAXED, __ATOMIC_RELAXED))
printf(" lcore: %u,"
" driver name: %s,"
" device name: %s,"
@@ -547,8 +549,10 @@ cperf_cyclecount_test_runner(void *test_ctx)
duration_setup_per_op = ctx->duration_op /
(ctx->ver.mem.total_bufs * test_data->num_iter);
+ exp = 0;
/* R E P O R T processing */
- if (rte_atomic16_test_and_set(&display_once)) {
+ if (__atomic_compare_exchange_n(&display_once, &exp, 1, 0,
+ __ATOMIC_RELAXED, __ATOMIC_RELAXED)) {
rte_spinlock_lock(&print_spinlock);
@@ -329,15 +329,18 @@ cperf_throughput_test_runner(void *test_ctx)
struct cperf_benchmark_ctx *ctx = test_ctx;
struct comp_test_data *test_data = ctx->ver.options;
uint32_t lcore = rte_lcore_id();
- static rte_atomic16_t display_once = RTE_ATOMIC16_INIT(0);
+ static uint16_t display_once;
int i, ret = EXIT_SUCCESS;
ctx->ver.mem.lcore_id = lcore;
+ uint16_t exp = 0;
/*
* printing information about current compression thread
*/
- if (rte_atomic16_test_and_set(&ctx->ver.mem.print_info_once))
+ if (__atomic_compare_exchange_n(&ctx->ver.mem.print_info_once, &exp,
+ 1, 0, __ATOMIC_RELAXED, __ATOMIC_RELAXED))
+
printf(" lcore: %u,"
" driver name: %s,"
" device name: %s,"
@@ -391,7 +394,9 @@ cperf_throughput_test_runner(void *test_ctx)
ctx->decomp_gbps = rte_get_tsc_hz() / ctx->decomp_tsc_byte * 8 /
1000000000;
- if (rte_atomic16_test_and_set(&display_once)) {
+ exp = 0;
+ if (__atomic_compare_exchange_n(&display_once, &exp, 1, 0,
+ __ATOMIC_RELAXED, __ATOMIC_RELAXED)) {
printf("\n%12s%6s%12s%17s%15s%16s\n",
"lcore id", "Level", "Comp size", "Comp ratio [%]",
"Comp [Gbps]", "Decomp [Gbps]");
@@ -388,7 +388,7 @@ cperf_verify_test_runner(void *test_ctx)
struct cperf_verify_ctx *ctx = test_ctx;
struct comp_test_data *test_data = ctx->options;
int ret = EXIT_SUCCESS;
- static rte_atomic16_t display_once = RTE_ATOMIC16_INIT(0);
+ static uint16_t display_once;
uint32_t lcore = rte_lcore_id();
ctx->mem.lcore_id = lcore;
@@ -428,7 +428,9 @@ cperf_verify_test_runner(void *test_ctx)
test_data->input_data_sz * 100;
if (!ctx->silent) {
- if (rte_atomic16_test_and_set(&display_once)) {
+ uint16_t exp = 0;
+ if (__atomic_compare_exchange_n(&display_once, &exp, 1, 0,
+ __ATOMIC_RELAXED, __ATOMIC_RELAXED)) {
printf("%12s%6s%12s%17s\n",
"lcore id", "Level", "Comp size", "Comp ratio [%]");
}