[v2] test/mcslock: move performance test to perf tests

Message ID 1595906669-18538-1-git-send-email-phil.yang@arm.com (mailing list archive)
State Rejected, archived
Delegated to: David Marchand
Headers
Series [v2] test/mcslock: move performance test to perf tests |

Checks

Context Check Description
ci/checkpatch warning coding style issues
ci/Intel-compilation success Compilation OK
ci/iol-broadcom-Performance success Performance Testing PASS
ci/iol-intel-Performance success Performance Testing PASS
ci/travis-robot success Travis build: passed
ci/iol-testing success Testing PASS
ci/iol-mellanox-Performance success Performance Testing PASS

Commit Message

Phil Yang July 28, 2020, 3:24 a.m. UTC
  Move the MCS lock performance test into perf tests.

Signed-off-by: Phil Yang <phil.yang@arm.com>
Reviewed-by: Gavin Hu <gavin.hu@arm.com>
---
v2:
1. Rebase code.
2. Split from the original patchset.

 MAINTAINERS                  |   1 +
 app/test/Makefile            |   1 +
 app/test/autotest_data.py    |   6 +++
 app/test/meson.build         |   4 +-
 app/test/test_mcslock.c      |  88 -------------------------------
 app/test/test_mcslock_perf.c | 121 +++++++++++++++++++++++++++++++++++++++++++
 6 files changed, 132 insertions(+), 89 deletions(-)
 create mode 100644 app/test/test_mcslock_perf.c
  

Comments

Juraj Linkeš Sept. 17, 2020, 9:16 a.m. UTC | #1
> -----Original Message-----
> From: dev <dev-bounces@dpdk.org> On Behalf Of Phil Yang
> Sent: Tuesday, July 28, 2020 5:24 AM
> To: david.marchand@redhat.com; dev@dpdk.org
> Cc: aconole@redhat.com; Honnappa.Nagarahalli@arm.com;
> Ruifeng.Wang@arm.com; nd@arm.com; Thomas Monjalon
> <thomas@monjalon.net>
> Subject: [dpdk-dev] [PATCH v2] test/mcslock: move performance test to perf
> tests
> 
> Move the MCS lock performance test into perf tests.
> 
> Signed-off-by: Phil Yang <phil.yang@arm.com>
> Reviewed-by: Gavin Hu <gavin.hu@arm.com>
> ---
> v2:
> 1. Rebase code.
> 2. Split from the original patchset.
> 
>  MAINTAINERS                  |   1 +
>  app/test/Makefile            |   1 +
>  app/test/autotest_data.py    |   6 +++
>  app/test/meson.build         |   4 +-
>  app/test/test_mcslock.c      |  88 -------------------------------
>  app/test/test_mcslock_perf.c | 121
> +++++++++++++++++++++++++++++++++++++++++++
>  6 files changed, 132 insertions(+), 89 deletions(-)  create mode 100644
> app/test/test_mcslock_perf.c
> 
> diff --git a/MAINTAINERS b/MAINTAINERS
> index 171c4c7..eae6034 100644
> --- a/MAINTAINERS
> +++ b/MAINTAINERS
> @@ -265,6 +265,7 @@ MCSlock - EXPERIMENTAL
>  M: Phil Yang <phil.yang@arm.com>
>  F: lib/librte_eal/include/generic/rte_mcslock.h
>  F: app/test/test_mcslock.c
> +F: app/test/test_mcslock_perf.c
> 
>  Ticketlock
>  M: Joyce Kong <joyce.kong@arm.com>
> diff --git a/app/test/Makefile b/app/test/Makefile index f406527..e4b3327
> 100644
> --- a/app/test/Makefile
> +++ b/app/test/Makefile
> @@ -65,6 +65,7 @@ SRCS-y += test_barrier.c  SRCS-y += test_malloc.c  SRCS-y
> += test_cycles.c  SRCS-y += test_mcslock.c
> +SRCS-y += test_mcslock_perf.c
>  SRCS-y += test_spinlock.c
>  SRCS-y += test_ticketlock.c
>  SRCS-y += test_memory.c
> diff --git a/app/test/autotest_data.py b/app/test/autotest_data.py index
> 4b7da45..c813b93 100644
> --- a/app/test/autotest_data.py
> +++ b/app/test/autotest_data.py
> @@ -796,6 +796,12 @@
>          "Func":    default_autotest,
>          "Report":  None,
>      },
> +    {
> +        "Name":    "MCS Lock performance autotest",
> +        "Command": "mcslock_perf_autotest",
> +        "Func":    default_autotest,
> +        "Report":  None,
> +    },
>      #
>      # Please always make sure that ring_perf is the last test!
>      #
> diff --git a/app/test/meson.build b/app/test/meson.build index
> 786a213..59df2fd 100644
> --- a/app/test/meson.build
> +++ b/app/test/meson.build
> @@ -86,6 +86,7 @@ test_sources = files('commands.c',
>  	'test_meter.c',
>  	'test_metrics.c',
>  	'test_mcslock.c',
> +	'test_mcslock_perf.c',
>  	'test_mp_secondary.c',
>  	'test_per_lcore.c',
>  	'test_pmd_perf.c',
> @@ -297,7 +298,8 @@ perf_test_names = [
>          'hash_readwrite_perf_autotest',
>          'hash_readwrite_lf_perf_autotest',
>          'trace_perf_autotest',
> -	'ipsec_perf_autotest',
> +        'ipsec_perf_autotest',
> +        'mcslock_perf_autotest',
>  ]
> 
>  driver_test_names = [
> diff --git a/app/test/test_mcslock.c b/app/test/test_mcslock.c index
> ddccaaf..a1dc610 100644
> --- a/app/test/test_mcslock.c
> +++ b/app/test/test_mcslock.c
> @@ -32,23 +32,16 @@
>   *
>   *   - The function takes the global lock, display something, then releases
>   *     the global lock on each core.
> - *
> - * - A load test is carried out, with all cores attempting to lock a single
> - *   lock multiple times.
>   */
> 
>  RTE_DEFINE_PER_LCORE(rte_mcslock_t, _ml_me);
> RTE_DEFINE_PER_LCORE(rte_mcslock_t, _ml_try_me); -
> RTE_DEFINE_PER_LCORE(rte_mcslock_t, _ml_perf_me);
> 
>  rte_mcslock_t *p_ml;
>  rte_mcslock_t *p_ml_try;
> -rte_mcslock_t *p_ml_perf;
> 
>  static unsigned int count;
> 
> -static rte_atomic32_t synchro;
> -
>  static int
>  test_mcslock_per_core(__rte_unused void *arg)  { @@ -63,85 +56,8 @@
> test_mcslock_per_core(__rte_unused void *arg)
>  	return 0;
>  }
> 
> -static uint64_t time_count[RTE_MAX_LCORE] = {0};
> -
>  #define MAX_LOOP 1000000
> 
> -static int
> -load_loop_fn(void *func_param)
> -{
> -	uint64_t time_diff = 0, begin;
> -	uint64_t hz = rte_get_timer_hz();
> -	volatile uint64_t lcount = 0;
> -	const int use_lock = *(int *)func_param;
> -	const unsigned int lcore = rte_lcore_id();
> -
> -	/**< Per core me node. */
> -	rte_mcslock_t ml_perf_me = RTE_PER_LCORE(_ml_perf_me);
> -
> -	/* wait synchro */
> -	while (rte_atomic32_read(&synchro) == 0)
> -		;
> -
> -	begin = rte_get_timer_cycles();
> -	while (lcount < MAX_LOOP) {
> -		if (use_lock)
> -			rte_mcslock_lock(&p_ml_perf, &ml_perf_me);
> -
> -		lcount++;
> -		if (use_lock)
> -			rte_mcslock_unlock(&p_ml_perf, &ml_perf_me);
> -	}
> -	time_diff = rte_get_timer_cycles() - begin;
> -	time_count[lcore] = time_diff * 1000000 / hz;
> -	return 0;
> -}
> -
> -static int
> -test_mcslock_perf(void)
> -{
> -	unsigned int i;
> -	uint64_t total = 0;
> -	int lock = 0;
> -	const unsigned int lcore = rte_lcore_id();
> -
> -	printf("\nTest with no lock on single core...\n");
> -	rte_atomic32_set(&synchro, 1);
> -	load_loop_fn(&lock);
> -	printf("Core [%u] Cost Time = %"PRIu64" us\n",
> -			lcore, time_count[lcore]);
> -	memset(time_count, 0, sizeof(time_count));
> -
> -	printf("\nTest with lock on single core...\n");
> -	lock = 1;
> -	rte_atomic32_set(&synchro, 1);
> -	load_loop_fn(&lock);
> -	printf("Core [%u] Cost Time = %"PRIu64" us\n",
> -			lcore, time_count[lcore]);
> -	memset(time_count, 0, sizeof(time_count));
> -
> -	printf("\nTest with lock on %u cores...\n", (rte_lcore_count()));
> -
> -	rte_atomic32_set(&synchro, 0);
> -	rte_eal_mp_remote_launch(load_loop_fn, &lock, SKIP_MASTER);
> -
> -	/* start synchro and launch test on master */
> -	rte_atomic32_set(&synchro, 1);
> -	load_loop_fn(&lock);
> -
> -	rte_eal_mp_wait_lcore();
> -
> -	RTE_LCORE_FOREACH(i) {
> -		printf("Core [%u] Cost Time = %"PRIu64" us\n",
> -				i, time_count[i]);
> -		total += time_count[i];
> -	}
> -
> -	printf("Total Cost Time = %"PRIu64" us\n", total);
> -
> -	return 0;
> -}
> -
>  /*
>   * Use rte_mcslock_trylock() to trylock a mcs lock object,
>   * If it could not lock the object successfully, it would @@ -240,10 +156,6 @@
> test_mcslock(void)
>  		ret = -1;
>  	rte_mcslock_unlock(&p_ml, &ml_me);
> 
> -	/* mcs lock perf test */
> -	if (test_mcslock_perf() < 0)
> -		return -1;
> -
>  	return ret;
>  }
> 
> diff --git a/app/test/test_mcslock_perf.c b/app/test/test_mcslock_perf.c new
> file mode 100644 index 0000000..6948344
> --- /dev/null
> +++ b/app/test/test_mcslock_perf.c
> @@ -0,0 +1,121 @@
> +/* SPDX-License-Identifier: BSD-3-Clause
> + * Copyright(c) 2019 Arm Limited
> + */
> +
> +#include <stdio.h>
> +#include <stdint.h>
> +#include <inttypes.h>
> +#include <string.h>
> +#include <unistd.h>
> +#include <sys/queue.h>
> +
> +#include <rte_common.h>
> +#include <rte_memory.h>
> +#include <rte_per_lcore.h>
> +#include <rte_launch.h>
> +#include <rte_eal.h>
> +#include <rte_lcore.h>
> +#include <rte_cycles.h>
> +#include <rte_mcslock.h>
> +#include <rte_atomic.h>
> +
> +#include "test.h"
> +
> +/*
> + * RTE MCS lock perf test
> + * ======================
> + *
> + * These tests are derived from spin lock perf test cases.
> + *
> + * - A load test is carried out, with all cores attempting to lock a single
> + *   lock multiple times.
> + */
> +
> +RTE_DEFINE_PER_LCORE(rte_mcslock_t, _ml_perf_me); rte_mcslock_t
> +*p_ml_perf;
> +
> +static rte_atomic32_t synchro;
> +static uint64_t time_count[RTE_MAX_LCORE] = {0};
> +
> +#define MAX_LOOP 1000000
> +
> +static int
> +load_loop_fn(void *func_param)
> +{
> +	uint64_t time_diff = 0, begin;
> +	uint64_t hz = rte_get_timer_hz();
> +	volatile uint64_t lcount = 0;
> +	const int use_lock = *(int *)func_param;
> +	const unsigned int lcore = rte_lcore_id();
> +
> +	/**< Per core me node. */
> +	rte_mcslock_t ml_perf_me = RTE_PER_LCORE(_ml_perf_me);
> +
> +	/* wait synchro */
> +	while (rte_atomic32_read(&synchro) == 0)
> +		;
> +
> +	begin = rte_get_timer_cycles();
> +	while (lcount < MAX_LOOP) {
> +		if (use_lock)
> +			rte_mcslock_lock(&p_ml_perf, &ml_perf_me);
> +
> +		lcount++;
> +		if (use_lock)
> +			rte_mcslock_unlock(&p_ml_perf, &ml_perf_me);
> +	}
> +	time_diff = rte_get_timer_cycles() - begin;
> +	time_count[lcore] = time_diff * 1000000 / hz;
> +	return 0;
> +}
> +
> +/*
> + * Test rte_eal_get_lcore_state() in addition to mcs locks
> + * as we have "waiting" then "running" lcores.
> + */
> +static int
> +test_mcslock_perf(void)
> +{
> +	unsigned int i;
> +	uint64_t total = 0;
> +	int lock = 0;
> +	const unsigned int lcore = rte_lcore_id();
> +
> +	printf("\nTest with no lock on single core...\n");
> +	rte_atomic32_set(&synchro, 1);
> +	load_loop_fn(&lock);
> +	printf("Core [%u] Cost Time = %"PRIu64" us\n",
> +			lcore, time_count[lcore]);
> +	memset(time_count, 0, sizeof(time_count));
> +
> +	printf("\nTest with lock on single core...\n");
> +	lock = 1;
> +	rte_atomic32_set(&synchro, 1);
> +	load_loop_fn(&lock);
> +	printf("Core [%u] Cost Time = %"PRIu64" us\n",
> +			lcore, time_count[lcore]);
> +	memset(time_count, 0, sizeof(time_count));
> +
> +	printf("\nTest with lock on %u cores...\n", (rte_lcore_count()));
> +
> +	rte_atomic32_set(&synchro, 0);
> +	rte_eal_mp_remote_launch(load_loop_fn, &lock, SKIP_MASTER);
> +
> +	/* start synchro and launch test on master */
> +	rte_atomic32_set(&synchro, 1);
> +	load_loop_fn(&lock);
> +
> +	rte_eal_mp_wait_lcore();
> +
> +	RTE_LCORE_FOREACH(i) {
> +		printf("Core [%u] Cost Time = %"PRIu64" us\n",
> +				i, time_count[i]);
> +		total += time_count[i];
> +	}
> +
> +	printf("Total Cost Time = %"PRIu64" us\n", total);
> +
> +	return 0;
> +}
> +
> +REGISTER_TEST_COMMAND(mcslock_perf_autotest, test_mcslock_perf);
> --
> 2.7.4
> 

Reviewed-by: Juraj Linkeš <juraj.linkes@pantheon.tech>
  

Patch

diff --git a/MAINTAINERS b/MAINTAINERS
index 171c4c7..eae6034 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -265,6 +265,7 @@  MCSlock - EXPERIMENTAL
 M: Phil Yang <phil.yang@arm.com>
 F: lib/librte_eal/include/generic/rte_mcslock.h
 F: app/test/test_mcslock.c
+F: app/test/test_mcslock_perf.c
 
 Ticketlock
 M: Joyce Kong <joyce.kong@arm.com>
diff --git a/app/test/Makefile b/app/test/Makefile
index f406527..e4b3327 100644
--- a/app/test/Makefile
+++ b/app/test/Makefile
@@ -65,6 +65,7 @@  SRCS-y += test_barrier.c
 SRCS-y += test_malloc.c
 SRCS-y += test_cycles.c
 SRCS-y += test_mcslock.c
+SRCS-y += test_mcslock_perf.c
 SRCS-y += test_spinlock.c
 SRCS-y += test_ticketlock.c
 SRCS-y += test_memory.c
diff --git a/app/test/autotest_data.py b/app/test/autotest_data.py
index 4b7da45..c813b93 100644
--- a/app/test/autotest_data.py
+++ b/app/test/autotest_data.py
@@ -796,6 +796,12 @@ 
         "Func":    default_autotest,
         "Report":  None,
     },
+    {
+        "Name":    "MCS Lock performance autotest",
+        "Command": "mcslock_perf_autotest",
+        "Func":    default_autotest,
+        "Report":  None,
+    },
     #
     # Please always make sure that ring_perf is the last test!
     #
diff --git a/app/test/meson.build b/app/test/meson.build
index 786a213..59df2fd 100644
--- a/app/test/meson.build
+++ b/app/test/meson.build
@@ -86,6 +86,7 @@  test_sources = files('commands.c',
 	'test_meter.c',
 	'test_metrics.c',
 	'test_mcslock.c',
+	'test_mcslock_perf.c',
 	'test_mp_secondary.c',
 	'test_per_lcore.c',
 	'test_pmd_perf.c',
@@ -297,7 +298,8 @@  perf_test_names = [
         'hash_readwrite_perf_autotest',
         'hash_readwrite_lf_perf_autotest',
         'trace_perf_autotest',
-	'ipsec_perf_autotest',
+        'ipsec_perf_autotest',
+        'mcslock_perf_autotest',
 ]
 
 driver_test_names = [
diff --git a/app/test/test_mcslock.c b/app/test/test_mcslock.c
index ddccaaf..a1dc610 100644
--- a/app/test/test_mcslock.c
+++ b/app/test/test_mcslock.c
@@ -32,23 +32,16 @@ 
  *
  *   - The function takes the global lock, display something, then releases
  *     the global lock on each core.
- *
- * - A load test is carried out, with all cores attempting to lock a single
- *   lock multiple times.
  */
 
 RTE_DEFINE_PER_LCORE(rte_mcslock_t, _ml_me);
 RTE_DEFINE_PER_LCORE(rte_mcslock_t, _ml_try_me);
-RTE_DEFINE_PER_LCORE(rte_mcslock_t, _ml_perf_me);
 
 rte_mcslock_t *p_ml;
 rte_mcslock_t *p_ml_try;
-rte_mcslock_t *p_ml_perf;
 
 static unsigned int count;
 
-static rte_atomic32_t synchro;
-
 static int
 test_mcslock_per_core(__rte_unused void *arg)
 {
@@ -63,85 +56,8 @@  test_mcslock_per_core(__rte_unused void *arg)
 	return 0;
 }
 
-static uint64_t time_count[RTE_MAX_LCORE] = {0};
-
 #define MAX_LOOP 1000000
 
-static int
-load_loop_fn(void *func_param)
-{
-	uint64_t time_diff = 0, begin;
-	uint64_t hz = rte_get_timer_hz();
-	volatile uint64_t lcount = 0;
-	const int use_lock = *(int *)func_param;
-	const unsigned int lcore = rte_lcore_id();
-
-	/**< Per core me node. */
-	rte_mcslock_t ml_perf_me = RTE_PER_LCORE(_ml_perf_me);
-
-	/* wait synchro */
-	while (rte_atomic32_read(&synchro) == 0)
-		;
-
-	begin = rte_get_timer_cycles();
-	while (lcount < MAX_LOOP) {
-		if (use_lock)
-			rte_mcslock_lock(&p_ml_perf, &ml_perf_me);
-
-		lcount++;
-		if (use_lock)
-			rte_mcslock_unlock(&p_ml_perf, &ml_perf_me);
-	}
-	time_diff = rte_get_timer_cycles() - begin;
-	time_count[lcore] = time_diff * 1000000 / hz;
-	return 0;
-}
-
-static int
-test_mcslock_perf(void)
-{
-	unsigned int i;
-	uint64_t total = 0;
-	int lock = 0;
-	const unsigned int lcore = rte_lcore_id();
-
-	printf("\nTest with no lock on single core...\n");
-	rte_atomic32_set(&synchro, 1);
-	load_loop_fn(&lock);
-	printf("Core [%u] Cost Time = %"PRIu64" us\n",
-			lcore, time_count[lcore]);
-	memset(time_count, 0, sizeof(time_count));
-
-	printf("\nTest with lock on single core...\n");
-	lock = 1;
-	rte_atomic32_set(&synchro, 1);
-	load_loop_fn(&lock);
-	printf("Core [%u] Cost Time = %"PRIu64" us\n",
-			lcore, time_count[lcore]);
-	memset(time_count, 0, sizeof(time_count));
-
-	printf("\nTest with lock on %u cores...\n", (rte_lcore_count()));
-
-	rte_atomic32_set(&synchro, 0);
-	rte_eal_mp_remote_launch(load_loop_fn, &lock, SKIP_MASTER);
-
-	/* start synchro and launch test on master */
-	rte_atomic32_set(&synchro, 1);
-	load_loop_fn(&lock);
-
-	rte_eal_mp_wait_lcore();
-
-	RTE_LCORE_FOREACH(i) {
-		printf("Core [%u] Cost Time = %"PRIu64" us\n",
-				i, time_count[i]);
-		total += time_count[i];
-	}
-
-	printf("Total Cost Time = %"PRIu64" us\n", total);
-
-	return 0;
-}
-
 /*
  * Use rte_mcslock_trylock() to trylock a mcs lock object,
  * If it could not lock the object successfully, it would
@@ -240,10 +156,6 @@  test_mcslock(void)
 		ret = -1;
 	rte_mcslock_unlock(&p_ml, &ml_me);
 
-	/* mcs lock perf test */
-	if (test_mcslock_perf() < 0)
-		return -1;
-
 	return ret;
 }
 
diff --git a/app/test/test_mcslock_perf.c b/app/test/test_mcslock_perf.c
new file mode 100644
index 0000000..6948344
--- /dev/null
+++ b/app/test/test_mcslock_perf.c
@@ -0,0 +1,121 @@ 
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2019 Arm Limited
+ */
+
+#include <stdio.h>
+#include <stdint.h>
+#include <inttypes.h>
+#include <string.h>
+#include <unistd.h>
+#include <sys/queue.h>
+
+#include <rte_common.h>
+#include <rte_memory.h>
+#include <rte_per_lcore.h>
+#include <rte_launch.h>
+#include <rte_eal.h>
+#include <rte_lcore.h>
+#include <rte_cycles.h>
+#include <rte_mcslock.h>
+#include <rte_atomic.h>
+
+#include "test.h"
+
+/*
+ * RTE MCS lock perf test
+ * ======================
+ *
+ * These tests are derived from spin lock perf test cases.
+ *
+ * - A load test is carried out, with all cores attempting to lock a single
+ *   lock multiple times.
+ */
+
+RTE_DEFINE_PER_LCORE(rte_mcslock_t, _ml_perf_me);
+rte_mcslock_t *p_ml_perf;
+
+static rte_atomic32_t synchro;
+static uint64_t time_count[RTE_MAX_LCORE] = {0};
+
+#define MAX_LOOP 1000000
+
+static int
+load_loop_fn(void *func_param)
+{
+	uint64_t time_diff = 0, begin;
+	uint64_t hz = rte_get_timer_hz();
+	volatile uint64_t lcount = 0;
+	const int use_lock = *(int *)func_param;
+	const unsigned int lcore = rte_lcore_id();
+
+	/**< Per core me node. */
+	rte_mcslock_t ml_perf_me = RTE_PER_LCORE(_ml_perf_me);
+
+	/* wait synchro */
+	while (rte_atomic32_read(&synchro) == 0)
+		;
+
+	begin = rte_get_timer_cycles();
+	while (lcount < MAX_LOOP) {
+		if (use_lock)
+			rte_mcslock_lock(&p_ml_perf, &ml_perf_me);
+
+		lcount++;
+		if (use_lock)
+			rte_mcslock_unlock(&p_ml_perf, &ml_perf_me);
+	}
+	time_diff = rte_get_timer_cycles() - begin;
+	time_count[lcore] = time_diff * 1000000 / hz;
+	return 0;
+}
+
+/*
+ * Test rte_eal_get_lcore_state() in addition to mcs locks
+ * as we have "waiting" then "running" lcores.
+ */
+static int
+test_mcslock_perf(void)
+{
+	unsigned int i;
+	uint64_t total = 0;
+	int lock = 0;
+	const unsigned int lcore = rte_lcore_id();
+
+	printf("\nTest with no lock on single core...\n");
+	rte_atomic32_set(&synchro, 1);
+	load_loop_fn(&lock);
+	printf("Core [%u] Cost Time = %"PRIu64" us\n",
+			lcore, time_count[lcore]);
+	memset(time_count, 0, sizeof(time_count));
+
+	printf("\nTest with lock on single core...\n");
+	lock = 1;
+	rte_atomic32_set(&synchro, 1);
+	load_loop_fn(&lock);
+	printf("Core [%u] Cost Time = %"PRIu64" us\n",
+			lcore, time_count[lcore]);
+	memset(time_count, 0, sizeof(time_count));
+
+	printf("\nTest with lock on %u cores...\n", (rte_lcore_count()));
+
+	rte_atomic32_set(&synchro, 0);
+	rte_eal_mp_remote_launch(load_loop_fn, &lock, SKIP_MASTER);
+
+	/* start synchro and launch test on master */
+	rte_atomic32_set(&synchro, 1);
+	load_loop_fn(&lock);
+
+	rte_eal_mp_wait_lcore();
+
+	RTE_LCORE_FOREACH(i) {
+		printf("Core [%u] Cost Time = %"PRIu64" us\n",
+				i, time_count[i]);
+		total += time_count[i];
+	}
+
+	printf("Total Cost Time = %"PRIu64" us\n", total);
+
+	return 0;
+}
+
+REGISTER_TEST_COMMAND(mcslock_perf_autotest, test_mcslock_perf);