[v1,09/12] app/compress: use compiler atomic builtins for display sync

Message ID 20210802101847.3462-10-joyce.kong@arm.com (mailing list archive)
State Superseded, archived
Delegated to: Thomas Monjalon
Headers
Series use compiler atomic builtins for app |

Checks

Context Check Description
ci/checkpatch warning coding style issues

Commit Message

Joyce Kong Aug. 2, 2021, 10:18 a.m. UTC
  Convert rte_atomic_test_and_set usage to compiler atomic
CAS operation for display sync.

Signed-off-by: Joyce Kong <joyce.kong@arm.com>
Reviewed-by: Ruifeng Wang <ruifeng.wang@arm.com>
---
 app/test-compress-perf/comp_perf_test_common.h     |  2 +-
 app/test-compress-perf/comp_perf_test_cyclecount.c | 10 +++++++---
 app/test-compress-perf/comp_perf_test_throughput.c | 11 ++++++++---
 app/test-compress-perf/comp_perf_test_verify.c     |  6 ++++--
 4 files changed, 20 insertions(+), 9 deletions(-)
  

Comments

Honnappa Nagarahalli Nov. 9, 2021, 10:59 p.m. UTC | #1
<snip>

> 
> Convert rte_atomic_test_and_set usage to compiler atomic CAS operation for
> display sync.
> 
> Signed-off-by: Joyce Kong <joyce.kong@arm.com>
> Reviewed-by: Ruifeng Wang <ruifeng.wang@arm.com>
> ---
>  app/test-compress-perf/comp_perf_test_common.h     |  2 +-
>  app/test-compress-perf/comp_perf_test_cyclecount.c | 10 +++++++---
> app/test-compress-perf/comp_perf_test_throughput.c | 11 ++++++++---
>  app/test-compress-perf/comp_perf_test_verify.c     |  6 ++++--
>  4 files changed, 20 insertions(+), 9 deletions(-)
> 
> diff --git a/app/test-compress-perf/comp_perf_test_common.h b/app/test-
> compress-perf/comp_perf_test_common.h
> index 72705c6a2b..d039e5a29a 100644
> --- a/app/test-compress-perf/comp_perf_test_common.h
> +++ b/app/test-compress-perf/comp_perf_test_common.h
> @@ -14,7 +14,7 @@ struct cperf_mem_resources {
>  	uint16_t qp_id;
>  	uint8_t lcore_id;
> 
> -	rte_atomic16_t print_info_once;
> +	uint16_t print_info_once;
> 
>  	uint32_t total_bufs;
>  	uint8_t *compressed_data;
> diff --git a/app/test-compress-perf/comp_perf_test_cyclecount.c b/app/test-
> compress-perf/comp_perf_test_cyclecount.c
> index 55559a7d5a..e002e53bdf 100644
> --- a/app/test-compress-perf/comp_perf_test_cyclecount.c
> +++ b/app/test-compress-perf/comp_perf_test_cyclecount.c
> @@ -468,7 +468,7 @@ cperf_cyclecount_test_runner(void *test_ctx)
>  	struct cperf_cyclecount_ctx *ctx = test_ctx;
>  	struct comp_test_data *test_data = ctx->ver.options;
>  	uint32_t lcore = rte_lcore_id();
> -	static rte_atomic16_t display_once = RTE_ATOMIC16_INIT(0);
> +	static uint16_t display_once;
>  	static rte_spinlock_t print_spinlock;
>  	int i;
> 
> @@ -488,10 +488,12 @@ cperf_cyclecount_test_runner(void *test_ctx)
> 
>  	ctx->ver.mem.lcore_id = lcore;
> 
> +	uint16_t exp = 0;
>  	/*
>  	 * printing information about current compression thread
>  	 */
> -	if (rte_atomic16_test_and_set(&ctx->ver.mem.print_info_once))
> +	if (__atomic_compare_exchange_n(&ctx->ver.mem.print_info_once,
> &exp,
> +				1, 0, __ATOMIC_RELAXED,
> __ATOMIC_RELAXED))
>  		printf("    lcore: %u,"
>  				" driver name: %s,"
>  				" device name: %s,"
> @@ -547,8 +549,10 @@ cperf_cyclecount_test_runner(void *test_ctx)
>  	duration_setup_per_op = ctx->duration_op /
>  			(ctx->ver.mem.total_bufs * test_data->num_iter);
> 
> +	exp = 0;
>  	/* R E P O R T processing */
> -	if (rte_atomic16_test_and_set(&display_once)) {
> +	if (__atomic_compare_exchange_n(&display_once, &exp, 1, 0,
> +			__ATOMIC_RELAXED, __ATOMIC_RELAXED)) {
I think we can re-organize the current code which will avoid using atomic operations on 'display_once'. How about the following code?

rte_spinlock_lock(&print_spinlock);
if (display_once != 1) {
	display_once = 1;
	printf("\nLegend for the table\n".....
	<all other printfs>
}
printf....
printf....
printf....
rte_spinlock_unlock(&print_spinlock);

Rest of the changes look fine.

> 
>  		rte_spinlock_lock(&print_spinlock);
> 
> diff --git a/app/test-compress-perf/comp_perf_test_throughput.c b/app/test-
> compress-perf/comp_perf_test_throughput.c
> index 13922b658c..f587ad2ec3 100644
> --- a/app/test-compress-perf/comp_perf_test_throughput.c
> +++ b/app/test-compress-perf/comp_perf_test_throughput.c
> @@ -329,15 +329,18 @@ cperf_throughput_test_runner(void *test_ctx)
>  	struct cperf_benchmark_ctx *ctx = test_ctx;
>  	struct comp_test_data *test_data = ctx->ver.options;
>  	uint32_t lcore = rte_lcore_id();
> -	static rte_atomic16_t display_once = RTE_ATOMIC16_INIT(0);
> +	static uint16_t display_once;
>  	int i, ret = EXIT_SUCCESS;
> 
>  	ctx->ver.mem.lcore_id = lcore;
> 
> +	uint16_t exp = 0;
>  	/*
>  	 * printing information about current compression thread
>  	 */
> -	if (rte_atomic16_test_and_set(&ctx->ver.mem.print_info_once))
> +	if (__atomic_compare_exchange_n(&ctx->ver.mem.print_info_once,
> &exp,
> +				1, 0, __ATOMIC_RELAXED,
> __ATOMIC_RELAXED))
> +
>  		printf("    lcore: %u,"
>  				" driver name: %s,"
>  				" device name: %s,"
> @@ -391,7 +394,9 @@ cperf_throughput_test_runner(void *test_ctx)
>  	ctx->decomp_gbps = rte_get_tsc_hz() / ctx->decomp_tsc_byte * 8 /
>  			1000000000;
> 
> -	if (rte_atomic16_test_and_set(&display_once)) {
> +	exp = 0;
> +	if (__atomic_compare_exchange_n(&display_once, &exp, 1, 0,
> +				__ATOMIC_RELAXED, __ATOMIC_RELAXED)) {
>  		printf("\n%12s%6s%12s%17s%15s%16s\n",
>  			"lcore id", "Level", "Comp size", "Comp ratio [%]",
>  			"Comp [Gbps]", "Decomp [Gbps]");
> diff --git a/app/test-compress-perf/comp_perf_test_verify.c b/app/test-
> compress-perf/comp_perf_test_verify.c
> index 5e13257b79..6a2497985b 100644
> --- a/app/test-compress-perf/comp_perf_test_verify.c
> +++ b/app/test-compress-perf/comp_perf_test_verify.c
> @@ -388,7 +388,7 @@ cperf_verify_test_runner(void *test_ctx)
>  	struct cperf_verify_ctx *ctx = test_ctx;
>  	struct comp_test_data *test_data = ctx->options;
>  	int ret = EXIT_SUCCESS;
> -	static rte_atomic16_t display_once = RTE_ATOMIC16_INIT(0);
> +	static uint16_t display_once;
>  	uint32_t lcore = rte_lcore_id();
> 
>  	ctx->mem.lcore_id = lcore;
> @@ -428,7 +428,9 @@ cperf_verify_test_runner(void *test_ctx)
>  			test_data->input_data_sz * 100;
> 
>  	if (!ctx->silent) {
> -		if (rte_atomic16_test_and_set(&display_once)) {
> +		uint16_t exp = 0;
> +		if (__atomic_compare_exchange_n(&display_once, &exp, 1,
> 0,
> +				__ATOMIC_RELAXED, __ATOMIC_RELAXED)) {
>  			printf("%12s%6s%12s%17s\n",
>  			    "lcore id", "Level", "Comp size", "Comp ratio [%]");
>  		}
> --
> 2.17.1
  
Joyce Kong Nov. 11, 2021, 8:13 a.m. UTC | #2
> <snip>
> 
> >
> > +	exp = 0;
> >  	/* R E P O R T processing */
> > -	if (rte_atomic16_test_and_set(&display_once)) {
> > +	if (__atomic_compare_exchange_n(&display_once, &exp, 1, 0,
> > +			__ATOMIC_RELAXED, __ATOMIC_RELAXED)) {
> I think we can re-organize the current code which will avoid using atomic
> operations on 'display_once'. How about the following code?
> 
> rte_spinlock_lock(&print_spinlock);
> if (display_once != 1) {
> 	display_once = 1;
> 	printf("\nLegend for the table\n".....
> 	<all other printfs>
> }
> printf....
> printf....
> printf....
> rte_spinlock_unlock(&print_spinlock);
> 
> Rest of the changes look fine.
> 

Will send out the next version with the changes.
  

Patch

diff --git a/app/test-compress-perf/comp_perf_test_common.h b/app/test-compress-perf/comp_perf_test_common.h
index 72705c6a2b..d039e5a29a 100644
--- a/app/test-compress-perf/comp_perf_test_common.h
+++ b/app/test-compress-perf/comp_perf_test_common.h
@@ -14,7 +14,7 @@  struct cperf_mem_resources {
 	uint16_t qp_id;
 	uint8_t lcore_id;
 
-	rte_atomic16_t print_info_once;
+	uint16_t print_info_once;
 
 	uint32_t total_bufs;
 	uint8_t *compressed_data;
diff --git a/app/test-compress-perf/comp_perf_test_cyclecount.c b/app/test-compress-perf/comp_perf_test_cyclecount.c
index 55559a7d5a..e002e53bdf 100644
--- a/app/test-compress-perf/comp_perf_test_cyclecount.c
+++ b/app/test-compress-perf/comp_perf_test_cyclecount.c
@@ -468,7 +468,7 @@  cperf_cyclecount_test_runner(void *test_ctx)
 	struct cperf_cyclecount_ctx *ctx = test_ctx;
 	struct comp_test_data *test_data = ctx->ver.options;
 	uint32_t lcore = rte_lcore_id();
-	static rte_atomic16_t display_once = RTE_ATOMIC16_INIT(0);
+	static uint16_t display_once;
 	static rte_spinlock_t print_spinlock;
 	int i;
 
@@ -488,10 +488,12 @@  cperf_cyclecount_test_runner(void *test_ctx)
 
 	ctx->ver.mem.lcore_id = lcore;
 
+	uint16_t exp = 0;
 	/*
 	 * printing information about current compression thread
 	 */
-	if (rte_atomic16_test_and_set(&ctx->ver.mem.print_info_once))
+	if (__atomic_compare_exchange_n(&ctx->ver.mem.print_info_once, &exp,
+				1, 0, __ATOMIC_RELAXED,  __ATOMIC_RELAXED))
 		printf("    lcore: %u,"
 				" driver name: %s,"
 				" device name: %s,"
@@ -547,8 +549,10 @@  cperf_cyclecount_test_runner(void *test_ctx)
 	duration_setup_per_op = ctx->duration_op /
 			(ctx->ver.mem.total_bufs * test_data->num_iter);
 
+	exp = 0;
 	/* R E P O R T processing */
-	if (rte_atomic16_test_and_set(&display_once)) {
+	if (__atomic_compare_exchange_n(&display_once, &exp, 1, 0,
+			__ATOMIC_RELAXED, __ATOMIC_RELAXED)) {
 
 		rte_spinlock_lock(&print_spinlock);
 
diff --git a/app/test-compress-perf/comp_perf_test_throughput.c b/app/test-compress-perf/comp_perf_test_throughput.c
index 13922b658c..f587ad2ec3 100644
--- a/app/test-compress-perf/comp_perf_test_throughput.c
+++ b/app/test-compress-perf/comp_perf_test_throughput.c
@@ -329,15 +329,18 @@  cperf_throughput_test_runner(void *test_ctx)
 	struct cperf_benchmark_ctx *ctx = test_ctx;
 	struct comp_test_data *test_data = ctx->ver.options;
 	uint32_t lcore = rte_lcore_id();
-	static rte_atomic16_t display_once = RTE_ATOMIC16_INIT(0);
+	static uint16_t display_once;
 	int i, ret = EXIT_SUCCESS;
 
 	ctx->ver.mem.lcore_id = lcore;
 
+	uint16_t exp = 0;
 	/*
 	 * printing information about current compression thread
 	 */
-	if (rte_atomic16_test_and_set(&ctx->ver.mem.print_info_once))
+	if (__atomic_compare_exchange_n(&ctx->ver.mem.print_info_once, &exp,
+				1, 0, __ATOMIC_RELAXED, __ATOMIC_RELAXED))
+
 		printf("    lcore: %u,"
 				" driver name: %s,"
 				" device name: %s,"
@@ -391,7 +394,9 @@  cperf_throughput_test_runner(void *test_ctx)
 	ctx->decomp_gbps = rte_get_tsc_hz() / ctx->decomp_tsc_byte * 8 /
 			1000000000;
 
-	if (rte_atomic16_test_and_set(&display_once)) {
+	exp = 0;
+	if (__atomic_compare_exchange_n(&display_once, &exp, 1, 0,
+				__ATOMIC_RELAXED, __ATOMIC_RELAXED)) {
 		printf("\n%12s%6s%12s%17s%15s%16s\n",
 			"lcore id", "Level", "Comp size", "Comp ratio [%]",
 			"Comp [Gbps]", "Decomp [Gbps]");
diff --git a/app/test-compress-perf/comp_perf_test_verify.c b/app/test-compress-perf/comp_perf_test_verify.c
index 5e13257b79..6a2497985b 100644
--- a/app/test-compress-perf/comp_perf_test_verify.c
+++ b/app/test-compress-perf/comp_perf_test_verify.c
@@ -388,7 +388,7 @@  cperf_verify_test_runner(void *test_ctx)
 	struct cperf_verify_ctx *ctx = test_ctx;
 	struct comp_test_data *test_data = ctx->options;
 	int ret = EXIT_SUCCESS;
-	static rte_atomic16_t display_once = RTE_ATOMIC16_INIT(0);
+	static uint16_t display_once;
 	uint32_t lcore = rte_lcore_id();
 
 	ctx->mem.lcore_id = lcore;
@@ -428,7 +428,9 @@  cperf_verify_test_runner(void *test_ctx)
 			test_data->input_data_sz * 100;
 
 	if (!ctx->silent) {
-		if (rte_atomic16_test_and_set(&display_once)) {
+		uint16_t exp = 0;
+		if (__atomic_compare_exchange_n(&display_once, &exp, 1, 0,
+				__ATOMIC_RELAXED, __ATOMIC_RELAXED)) {
 			printf("%12s%6s%12s%17s\n",
 			    "lcore id", "Level", "Comp size", "Comp ratio [%]");
 		}