[v2,4/4] test/lpm: avoid code duplication in rcu qsbr perf
Checks
Commit Message
Avoid code duplication by combining single and multi threaded tests
Signed-off-by: Dharmik Thakkar <dharmik.thakkar@arm.com>
Reviewed-by: Ruifeng Wang <ruifeng.wang@arm.com>
---
app/test/test_lpm_perf.c | 362 ++++++++++-----------------------------
1 file changed, 91 insertions(+), 271 deletions(-)
Comments
<snip>
>
> Avoid code duplication by combining single and multi threaded tests
>
> Signed-off-by: Dharmik Thakkar <dharmik.thakkar@arm.com>
> Reviewed-by: Ruifeng Wang <ruifeng.wang@arm.com>
> ---
> app/test/test_lpm_perf.c | 362 ++++++++++-----------------------------
> 1 file changed, 91 insertions(+), 271 deletions(-)
>
> diff --git a/app/test/test_lpm_perf.c b/app/test/test_lpm_perf.c index
> 224c92fa3d65..229c835c23f7 100644
> --- a/app/test/test_lpm_perf.c
> +++ b/app/test/test_lpm_perf.c
> @@ -67,6 +67,12 @@ enum {
> IP_CLASS_C
> };
>
> +enum {
> + SINGLE_WRITER = 1,
> + MULTI_WRITER_1,
> + MULTI_WRITER_2
> +};
Do we need this? Can we use the number of cores instead?
> +
> /* struct route_rule_count defines the total number of rules in following
> a/b/c
> * each item in a[]/b[]/c[] is the number of common IP address class A/B/C,
> not
> * including the ones for private local network.
> @@ -430,11 +436,16 @@ test_lpm_rcu_qsbr_writer(void *arg) {
> unsigned int i, j, si, ei;
> uint64_t begin, total_cycles;
> - uint8_t core_id = (uint8_t)((uintptr_t)arg);
> + uint8_t writer_id = (uint8_t)((uintptr_t)arg);
> uint32_t next_hop_add = 0xAA;
>
> - /* 2 writer threads are used */
> - if (core_id % 2 == 0) {
> + /* Single writer (writer_id = 1) */
> + if (writer_id == SINGLE_WRITER) {
> + si = 0;
> + ei = NUM_LDEPTH_ROUTE_ENTRIES;
> + }
> + /* 2 Writers (writer_id = 2/3)*/
> + else if (writer_id == MULTI_WRITER_1) {
> si = 0;
> ei = NUM_LDEPTH_ROUTE_ENTRIES / 2;
> } else {
> @@ -447,29 +458,35 @@ test_lpm_rcu_qsbr_writer(void *arg)
> for (i = 0; i < RCU_ITERATIONS; i++) {
> /* Add all the entries */
> for (j = si; j < ei; j++) {
> - pthread_mutex_lock(&lpm_mutex);
> + if (writer_id != SINGLE_WRITER)
> + pthread_mutex_lock(&lpm_mutex);
> if (rte_lpm_add(lpm, large_ldepth_route_table[j].ip,
> large_ldepth_route_table[j].depth,
> next_hop_add) != 0) {
> printf("Failed to add iteration %d, route#
> %d\n",
> i, j);
> - pthread_mutex_unlock(&lpm_mutex);
> + if (writer_id != SINGLE_WRITER)
> +
> pthread_mutex_unlock(&lpm_mutex);
> return -1;
> }
> - pthread_mutex_unlock(&lpm_mutex);
> + if (writer_id != SINGLE_WRITER)
> + pthread_mutex_unlock(&lpm_mutex);
> }
>
> /* Delete all the entries */
> for (j = si; j < ei; j++) {
> - pthread_mutex_lock(&lpm_mutex);
> + if (writer_id != SINGLE_WRITER)
> + pthread_mutex_lock(&lpm_mutex);
> if (rte_lpm_delete(lpm,
> large_ldepth_route_table[j].ip,
> large_ldepth_route_table[j].depth) != 0) {
> printf("Failed to delete iteration %d, route#
> %d\n",
> i, j);
> - pthread_mutex_unlock(&lpm_mutex);
> + if (writer_id != SINGLE_WRITER)
> +
> pthread_mutex_unlock(&lpm_mutex);
> return -1;
> }
> - pthread_mutex_unlock(&lpm_mutex);
> + if (writer_id != SINGLE_WRITER)
> + pthread_mutex_unlock(&lpm_mutex);
> }
> }
>
> @@ -482,16 +499,17 @@ test_lpm_rcu_qsbr_writer(void *arg)
>
> /*
> * Functional test:
> - * 2 writers, rest are readers
> + * 1/2 writers, rest are readers
> */
> static int
> -test_lpm_rcu_perf_multi_writer(void)
> +test_lpm_rcu_perf_multi_writer(uint8_t use_rcu)
> {
> struct rte_lpm_config config;
> size_t sz;
> - unsigned int i;
> + unsigned int i, j;
> uint16_t core_id;
> struct rte_lpm_rcu_config rcu_cfg = {0};
> + int (*reader_f)(void *arg) = NULL;
>
> if (rte_lcore_count() < 3) {
> printf("Not enough cores for lpm_rcu_perf_autotest,
> expecting at least 3\n"); @@ -504,273 +522,76 @@
> test_lpm_rcu_perf_multi_writer(void)
> num_cores++;
> }
>
> - printf("\nPerf test: 2 writers, %d readers, RCU integration
> enabled\n",
> - num_cores - 2);
> -
> - /* Create LPM table */
> - config.max_rules = NUM_LDEPTH_ROUTE_ENTRIES;
> - config.number_tbl8s = NUM_LDEPTH_ROUTE_ENTRIES;
> - config.flags = 0;
> - lpm = rte_lpm_create(__func__, SOCKET_ID_ANY, &config);
> - TEST_LPM_ASSERT(lpm != NULL);
> -
> - /* Init RCU variable */
> - sz = rte_rcu_qsbr_get_memsize(num_cores);
> - rv = (struct rte_rcu_qsbr *)rte_zmalloc("rcu0", sz,
> - RTE_CACHE_LINE_SIZE);
> - rte_rcu_qsbr_init(rv, num_cores);
> -
> - rcu_cfg.v = rv;
> - /* Assign the RCU variable to LPM */
> - if (rte_lpm_rcu_qsbr_add(lpm, &rcu_cfg) != 0) {
> - printf("RCU variable assignment failed\n");
> - goto error;
> - }
> -
> - writer_done = 0;
> - __atomic_store_n(&gwrite_cycles, 0, __ATOMIC_RELAXED);
> -
> - __atomic_store_n(&thr_id, 0, __ATOMIC_SEQ_CST);
> -
> - /* Launch reader threads */
> - for (i = 2; i < num_cores; i++)
> - rte_eal_remote_launch(test_lpm_rcu_qsbr_reader, NULL,
> - enabled_core_ids[i]);
> -
> - /* Launch writer threads */
> - for (i = 0; i < 2; i++)
> - rte_eal_remote_launch(test_lpm_rcu_qsbr_writer,
> - (void *)(uintptr_t)i,
> - enabled_core_ids[i]);
> -
> - /* Wait for writer threads */
> - for (i = 0; i < 2; i++)
> - if (rte_eal_wait_lcore(enabled_core_ids[i]) < 0)
> - goto error;
> -
> - printf("Total LPM Adds: %d\n", TOTAL_WRITES);
> - printf("Total LPM Deletes: %d\n", TOTAL_WRITES);
> - printf("Average LPM Add/Del: %"PRIu64" cycles\n",
> - __atomic_load_n(&gwrite_cycles, __ATOMIC_RELAXED)
> - / TOTAL_WRITES);
> -
> - writer_done = 1;
> - /* Wait until all readers have exited */
> - for (i = 2; i < num_cores; i++)
> - rte_eal_wait_lcore(enabled_core_ids[i]);
> -
> - rte_lpm_free(lpm);
> - rte_free(rv);
> - lpm = NULL;
> - rv = NULL;
> -
> - /* Test without RCU integration */
> - printf("\nPerf test: 2 writers, %d readers, RCU integration
> disabled\n",
> - num_cores - 2);
> -
> - /* Create LPM table */
> - config.max_rules = NUM_LDEPTH_ROUTE_ENTRIES;
> - config.number_tbl8s = NUM_LDEPTH_ROUTE_ENTRIES;
> - config.flags = 0;
> - lpm = rte_lpm_create(__func__, SOCKET_ID_ANY, &config);
> - TEST_LPM_ASSERT(lpm != NULL);
> -
> - writer_done = 0;
> - __atomic_store_n(&gwrite_cycles, 0, __ATOMIC_RELAXED);
> - __atomic_store_n(&thr_id, 0, __ATOMIC_SEQ_CST);
> -
> - /* Launch reader threads */
> - for (i = 2; i < num_cores; i++)
> - rte_eal_remote_launch(test_lpm_reader, NULL,
> - enabled_core_ids[i]);
> -
> - /* Launch writer threads */
> - for (i = 0; i < 2; i++)
> - rte_eal_remote_launch(test_lpm_rcu_qsbr_writer,
> - (void *)(uintptr_t)i,
> - enabled_core_ids[i]);
> -
> - /* Wait for writer threads */
> - for (i = 0; i < 2; i++)
> - if (rte_eal_wait_lcore(enabled_core_ids[i]) < 0)
> - goto error;
> -
> - printf("Total LPM Adds: %d\n", TOTAL_WRITES);
> - printf("Total LPM Deletes: %d\n", TOTAL_WRITES);
> - printf("Average LPM Add/Del: %"PRIu64" cycles\n",
> - __atomic_load_n(&gwrite_cycles, __ATOMIC_RELAXED)
> - / TOTAL_WRITES);
> -
> - writer_done = 1;
> - /* Wait until all readers have exited */
> - for (i = 2; i < num_cores; i++)
> - rte_eal_wait_lcore(enabled_core_ids[i]);
> -
> - rte_lpm_free(lpm);
> -
> - return 0;
> -
> -error:
> - writer_done = 1;
> - /* Wait until all readers have exited */
> - rte_eal_mp_wait_lcore();
> -
> - rte_lpm_free(lpm);
> - rte_free(rv);
> -
> - return -1;
> -}
> -
> -/*
> - * Functional test:
> - * Single writer, rest are readers
> - */
> -static int
> -test_lpm_rcu_perf(void)
> -{
> - struct rte_lpm_config config;
> - uint64_t begin, total_cycles;
> - size_t sz;
> - unsigned int i, j;
> - uint16_t core_id;
> - uint32_t next_hop_add = 0xAA;
> - struct rte_lpm_rcu_config rcu_cfg = {0};
> -
> - if (rte_lcore_count() < 2) {
> - printf("Not enough cores for lpm_rcu_perf_autotest,
> expecting at least 2\n");
> - return TEST_SKIPPED;
> - }
> -
> - num_cores = 0;
> - RTE_LCORE_FOREACH_WORKER(core_id) {
> - enabled_core_ids[num_cores] = core_id;
> - num_cores++;
> - }
> -
> - printf("\nPerf test: 1 writer, %d readers, RCU integration enabled\n",
> - num_cores);
> -
> - /* Create LPM table */
> - config.max_rules = NUM_LDEPTH_ROUTE_ENTRIES;
> - config.number_tbl8s = NUM_LDEPTH_ROUTE_ENTRIES;
> - config.flags = 0;
> - lpm = rte_lpm_create(__func__, SOCKET_ID_ANY, &config);
> - TEST_LPM_ASSERT(lpm != NULL);
> -
> - /* Init RCU variable */
> - sz = rte_rcu_qsbr_get_memsize(num_cores);
> - rv = (struct rte_rcu_qsbr *)rte_zmalloc("rcu0", sz,
> - RTE_CACHE_LINE_SIZE);
> - rte_rcu_qsbr_init(rv, num_cores);
> -
> - rcu_cfg.v = rv;
> - /* Assign the RCU variable to LPM */
> - if (rte_lpm_rcu_qsbr_add(lpm, &rcu_cfg) != 0) {
> - printf("RCU variable assignment failed\n");
> - goto error;
> - }
> -
> - writer_done = 0;
> - __atomic_store_n(&thr_id, 0, __ATOMIC_SEQ_CST);
> -
> - /* Launch reader threads */
> - for (i = 0; i < num_cores; i++)
> - rte_eal_remote_launch(test_lpm_rcu_qsbr_reader, NULL,
> - enabled_core_ids[i]);
> -
> - /* Measure add/delete. */
> - begin = rte_rdtsc_precise();
> - for (i = 0; i < RCU_ITERATIONS; i++) {
> - /* Add all the entries */
> - for (j = 0; j < NUM_LDEPTH_ROUTE_ENTRIES; j++)
> - if (rte_lpm_add(lpm, large_ldepth_route_table[j].ip,
> - large_ldepth_route_table[j].depth,
> - next_hop_add) != 0) {
> - printf("Failed to add iteration %d, route#
> %d\n",
> - i, j);
> + for (j = 1; j < 3; j++) {
> + if (use_rcu)
> + printf("\nPerf test: %d writer(s), %d reader(s),"
> + " RCU integration enabled\n", j, num_cores - j);
> + else
> + printf("\nPerf test: %d writer(s), %d reader(s),"
> + " RCU integration disabled\n", j, num_cores - j);
> +
> + /* Create LPM table */
> + config.max_rules = NUM_LDEPTH_ROUTE_ENTRIES;
> + config.number_tbl8s = NUM_LDEPTH_ROUTE_ENTRIES;
> + config.flags = 0;
> + lpm = rte_lpm_create(__func__, SOCKET_ID_ANY, &config);
> + TEST_LPM_ASSERT(lpm != NULL);
> +
> + /* Init RCU variable */
> + if (use_rcu) {
> + sz = rte_rcu_qsbr_get_memsize(num_cores);
> + rv = (struct rte_rcu_qsbr *)rte_zmalloc("rcu0", sz,
> +
> RTE_CACHE_LINE_SIZE);
> + rte_rcu_qsbr_init(rv, num_cores);
> +
> + rcu_cfg.v = rv;
> + /* Assign the RCU variable to LPM */
> + if (rte_lpm_rcu_qsbr_add(lpm, &rcu_cfg) != 0) {
> + printf("RCU variable assignment failed\n");
> goto error;
> }
>
> - /* Delete all the entries */
> - for (j = 0; j < NUM_LDEPTH_ROUTE_ENTRIES; j++)
> - if (rte_lpm_delete(lpm,
> large_ldepth_route_table[j].ip,
> - large_ldepth_route_table[j].depth) != 0) {
> - printf("Failed to delete iteration %d, route#
> %d\n",
> - i, j);
> - goto error;
> - }
> - }
> - total_cycles = rte_rdtsc_precise() - begin;
> + reader_f = test_lpm_rcu_qsbr_reader;
> + } else
> + reader_f = test_lpm_reader;
>
> - printf("Total LPM Adds: %d\n", TOTAL_WRITES);
> - printf("Total LPM Deletes: %d\n", TOTAL_WRITES);
> - printf("Average LPM Add/Del: %g cycles\n",
> - (double)total_cycles / TOTAL_WRITES);
> + writer_done = 0;
> + __atomic_store_n(&gwrite_cycles, 0, __ATOMIC_RELAXED);
>
> - writer_done = 1;
> - /* Wait until all readers have exited */
> - for (i = 0; i < num_cores; i++)
> - if (rte_eal_wait_lcore(enabled_core_ids[i]);
> -
> - rte_lpm_free(lpm);
> - rte_free(rv);
> - lpm = NULL;
> - rv = NULL;
> -
> - /* Test without RCU integration */
> - printf("\nPerf test: 1 writer, %d readers, RCU integration
> disabled\n",
> - num_cores);
> -
> - /* Create LPM table */
> - config.max_rules = NUM_LDEPTH_ROUTE_ENTRIES;
> - config.number_tbl8s = NUM_LDEPTH_ROUTE_ENTRIES;
> - config.flags = 0;
> - lpm = rte_lpm_create(__func__, SOCKET_ID_ANY, &config);
> - TEST_LPM_ASSERT(lpm != NULL);
> + __atomic_store_n(&thr_id, 0, __ATOMIC_SEQ_CST);
>
> - writer_done = 0;
> - __atomic_store_n(&thr_id, 0, __ATOMIC_SEQ_CST);
> + /* Launch reader threads */
> + for (i = j; i < num_cores; i++)
> + rte_eal_remote_launch(reader_f, NULL,
> + enabled_core_ids[i]);
>
> - /* Launch reader threads */
> - for (i = 0; i < num_cores; i++)
> - rte_eal_remote_launch(test_lpm_reader, NULL,
> - enabled_core_ids[i]);
> + /* Launch writer threads */
> + for (i = 0; i < j; i++)
> + rte_eal_remote_launch(test_lpm_rcu_qsbr_writer,
> + (void *)(uintptr_t)(i + j),
This can be just 'j'?
> + enabled_core_ids[i]);
>
> - /* Measure add/delete. */
> - begin = rte_rdtsc_precise();
> - for (i = 0; i < RCU_ITERATIONS; i++) {
> - /* Add all the entries */
> - for (j = 0; j < NUM_LDEPTH_ROUTE_ENTRIES; j++)
> - if (rte_lpm_add(lpm, large_ldepth_route_table[j].ip,
> - large_ldepth_route_table[j].depth,
> - next_hop_add) != 0) {
> - printf("Failed to add iteration %d, route#
> %d\n",
> - i, j);
> + /* Wait for writer threads */
> + for (i = 0; i < j; i++)
> + if (rte_eal_wait_lcore(enabled_core_ids[i]) < 0)
> goto error;
> - }
>
> - /* Delete all the entries */
> - for (j = 0; j < NUM_LDEPTH_ROUTE_ENTRIES; j++)
> - if (rte_lpm_delete(lpm,
> large_ldepth_route_table[j].ip,
> - large_ldepth_route_table[j].depth) != 0) {
> - printf("Failed to delete iteration %d, route#
> %d\n",
> - i, j);
> - goto error;
> - }
> + printf("Total LPM Adds: %d\n", TOTAL_WRITES);
> + printf("Total LPM Deletes: %d\n", TOTAL_WRITES);
> + printf("Average LPM Add/Del: %"PRIu64" cycles\n",
> + __atomic_load_n(&gwrite_cycles,
> __ATOMIC_RELAXED)
> + / TOTAL_WRITES);
> +
> + writer_done = 1;
> + /* Wait until all readers have exited */
> + for (i = j; i < num_cores; i++)
> + rte_eal_wait_lcore(enabled_core_ids[i]);
> +
> + rte_lpm_free(lpm);
> + rte_free(rv);
> + lpm = NULL;
> + rv = NULL;
> }
> - total_cycles = rte_rdtsc_precise() - begin;
> -
> - printf("Total LPM Adds: %d\n", TOTAL_WRITES);
> - printf("Total LPM Deletes: %d\n", TOTAL_WRITES);
> - printf("Average LPM Add/Del: %g cycles\n",
> - (double)total_cycles / TOTAL_WRITES);
> -
> - writer_done = 1;
> - /* Wait until all readers have exited */
> - for (i = 0; i < num_cores; i++)
> - rte_eal_wait_lcore(enabled_core_ids[i]);
> -
> - rte_lpm_free(lpm);
>
> return 0;
>
> @@ -946,9 +767,8 @@ test_lpm_perf(void)
> rte_lpm_delete_all(lpm);
> rte_lpm_free(lpm);
>
> - test_lpm_rcu_perf();
> -
> - test_lpm_rcu_perf_multi_writer();
> + test_lpm_rcu_perf_multi_writer(0);
> + test_lpm_rcu_perf_multi_writer(1);
>
> return 0;
> }
> --
> 2.17.1
> On Nov 2, 2020, at 10:21 PM, Honnappa Nagarahalli <Honnappa.Nagarahalli@arm.com> wrote:
>
> <snip>
>>
>> Avoid code duplication by combining single and multi threaded tests
>>
>> Signed-off-by: Dharmik Thakkar <dharmik.thakkar@arm.com>
>> Reviewed-by: Ruifeng Wang <ruifeng.wang@arm.com>
>> ---
>> app/test/test_lpm_perf.c | 362 ++++++++++-----------------------------
>> 1 file changed, 91 insertions(+), 271 deletions(-)
>>
>> diff --git a/app/test/test_lpm_perf.c b/app/test/test_lpm_perf.c index
>> 224c92fa3d65..229c835c23f7 100644
>> --- a/app/test/test_lpm_perf.c
>> +++ b/app/test/test_lpm_perf.c
>> @@ -67,6 +67,12 @@ enum {
>> IP_CLASS_C
>> };
>>
>> +enum {
>> + SINGLE_WRITER = 1,
>> + MULTI_WRITER_1,
>> + MULTI_WRITER_2
>> +};
> Do we need this? Can we use the number of cores instead?
>
There are 3 combinations of writes (adds/deletes):
1. Write all the entries - in case of a single writer
2. Write half of the entries - in case of multiple writers
3. Write remaining half of the entries - in case of multiple writers
So, I think this is required.
>> +
>> /* struct route_rule_count defines the total number of rules in following
>> a/b/c
>> * each item in a[]/b[]/c[] is the number of common IP address class A/B/C,
>> not
>> * including the ones for private local network.
>> @@ -430,11 +436,16 @@ test_lpm_rcu_qsbr_writer(void *arg) {
>> unsigned int i, j, si, ei;
>> uint64_t begin, total_cycles;
>> - uint8_t core_id = (uint8_t)((uintptr_t)arg);
>> + uint8_t writer_id = (uint8_t)((uintptr_t)arg);
>> uint32_t next_hop_add = 0xAA;
>>
>> - /* 2 writer threads are used */
>> - if (core_id % 2 == 0) {
>> + /* Single writer (writer_id = 1) */
>> + if (writer_id == SINGLE_WRITER) {
>> + si = 0;
>> + ei = NUM_LDEPTH_ROUTE_ENTRIES;
>> + }
>> + /* 2 Writers (writer_id = 2/3)*/
>> + else if (writer_id == MULTI_WRITER_1) {
>> si = 0;
>> ei = NUM_LDEPTH_ROUTE_ENTRIES / 2;
>> } else {
>> @@ -447,29 +458,35 @@ test_lpm_rcu_qsbr_writer(void *arg)
>> for (i = 0; i < RCU_ITERATIONS; i++) {
>> /* Add all the entries */
>> for (j = si; j < ei; j++) {
>> - pthread_mutex_lock(&lpm_mutex);
>> + if (writer_id != SINGLE_WRITER)
>> + pthread_mutex_lock(&lpm_mutex);
>> if (rte_lpm_add(lpm, large_ldepth_route_table[j].ip,
>> large_ldepth_route_table[j].depth,
>> next_hop_add) != 0) {
>> printf("Failed to add iteration %d, route#
>> %d\n",
>> i, j);
>> - pthread_mutex_unlock(&lpm_mutex);
>> + if (writer_id != SINGLE_WRITER)
>> +
>> pthread_mutex_unlock(&lpm_mutex);
>> return -1;
>> }
>> - pthread_mutex_unlock(&lpm_mutex);
>> + if (writer_id != SINGLE_WRITER)
>> + pthread_mutex_unlock(&lpm_mutex);
>> }
>>
>> /* Delete all the entries */
>> for (j = si; j < ei; j++) {
>> - pthread_mutex_lock(&lpm_mutex);
>> + if (writer_id != SINGLE_WRITER)
>> + pthread_mutex_lock(&lpm_mutex);
>> if (rte_lpm_delete(lpm,
>> large_ldepth_route_table[j].ip,
>> large_ldepth_route_table[j].depth) != 0) {
>> printf("Failed to delete iteration %d, route#
>> %d\n",
>> i, j);
>> - pthread_mutex_unlock(&lpm_mutex);
>> + if (writer_id != SINGLE_WRITER)
>> +
>> pthread_mutex_unlock(&lpm_mutex);
>> return -1;
>> }
>> - pthread_mutex_unlock(&lpm_mutex);
>> + if (writer_id != SINGLE_WRITER)
>> + pthread_mutex_unlock(&lpm_mutex);
>> }
>> }
>>
>> @@ -482,16 +499,17 @@ test_lpm_rcu_qsbr_writer(void *arg)
>>
>> /*
>> * Functional test:
>> - * 2 writers, rest are readers
>> + * 1/2 writers, rest are readers
>> */
>> static int
>> -test_lpm_rcu_perf_multi_writer(void)
>> +test_lpm_rcu_perf_multi_writer(uint8_t use_rcu)
>> {
>> struct rte_lpm_config config;
>> size_t sz;
>> - unsigned int i;
>> + unsigned int i, j;
>> uint16_t core_id;
>> struct rte_lpm_rcu_config rcu_cfg = {0};
>> + int (*reader_f)(void *arg) = NULL;
>>
>> if (rte_lcore_count() < 3) {
>> printf("Not enough cores for lpm_rcu_perf_autotest,
>> expecting at least 3\n"); @@ -504,273 +522,76 @@
>> test_lpm_rcu_perf_multi_writer(void)
>> num_cores++;
>> }
>>
>> - printf("\nPerf test: 2 writers, %d readers, RCU integration
>> enabled\n",
>> - num_cores - 2);
>> -
>> - /* Create LPM table */
>> - config.max_rules = NUM_LDEPTH_ROUTE_ENTRIES;
>> - config.number_tbl8s = NUM_LDEPTH_ROUTE_ENTRIES;
>> - config.flags = 0;
>> - lpm = rte_lpm_create(__func__, SOCKET_ID_ANY, &config);
>> - TEST_LPM_ASSERT(lpm != NULL);
>> -
>> - /* Init RCU variable */
>> - sz = rte_rcu_qsbr_get_memsize(num_cores);
>> - rv = (struct rte_rcu_qsbr *)rte_zmalloc("rcu0", sz,
>> - RTE_CACHE_LINE_SIZE);
>> - rte_rcu_qsbr_init(rv, num_cores);
>> -
>> - rcu_cfg.v = rv;
>> - /* Assign the RCU variable to LPM */
>> - if (rte_lpm_rcu_qsbr_add(lpm, &rcu_cfg) != 0) {
>> - printf("RCU variable assignment failed\n");
>> - goto error;
>> - }
>> -
>> - writer_done = 0;
>> - __atomic_store_n(&gwrite_cycles, 0, __ATOMIC_RELAXED);
>> -
>> - __atomic_store_n(&thr_id, 0, __ATOMIC_SEQ_CST);
>> -
>> - /* Launch reader threads */
>> - for (i = 2; i < num_cores; i++)
>> - rte_eal_remote_launch(test_lpm_rcu_qsbr_reader, NULL,
>> - enabled_core_ids[i]);
>> -
>> - /* Launch writer threads */
>> - for (i = 0; i < 2; i++)
>> - rte_eal_remote_launch(test_lpm_rcu_qsbr_writer,
>> - (void *)(uintptr_t)i,
>> - enabled_core_ids[i]);
>> -
>> - /* Wait for writer threads */
>> - for (i = 0; i < 2; i++)
>> - if (rte_eal_wait_lcore(enabled_core_ids[i]) < 0)
>> - goto error;
>> -
>> - printf("Total LPM Adds: %d\n", TOTAL_WRITES);
>> - printf("Total LPM Deletes: %d\n", TOTAL_WRITES);
>> - printf("Average LPM Add/Del: %"PRIu64" cycles\n",
>> - __atomic_load_n(&gwrite_cycles, __ATOMIC_RELAXED)
>> - / TOTAL_WRITES);
>> -
>> - writer_done = 1;
>> - /* Wait until all readers have exited */
>> - for (i = 2; i < num_cores; i++)
>> - rte_eal_wait_lcore(enabled_core_ids[i]);
>> -
>> - rte_lpm_free(lpm);
>> - rte_free(rv);
>> - lpm = NULL;
>> - rv = NULL;
>> -
>> - /* Test without RCU integration */
>> - printf("\nPerf test: 2 writers, %d readers, RCU integration
>> disabled\n",
>> - num_cores - 2);
>> -
>> - /* Create LPM table */
>> - config.max_rules = NUM_LDEPTH_ROUTE_ENTRIES;
>> - config.number_tbl8s = NUM_LDEPTH_ROUTE_ENTRIES;
>> - config.flags = 0;
>> - lpm = rte_lpm_create(__func__, SOCKET_ID_ANY, &config);
>> - TEST_LPM_ASSERT(lpm != NULL);
>> -
>> - writer_done = 0;
>> - __atomic_store_n(&gwrite_cycles, 0, __ATOMIC_RELAXED);
>> - __atomic_store_n(&thr_id, 0, __ATOMIC_SEQ_CST);
>> -
>> - /* Launch reader threads */
>> - for (i = 2; i < num_cores; i++)
>> - rte_eal_remote_launch(test_lpm_reader, NULL,
>> - enabled_core_ids[i]);
>> -
>> - /* Launch writer threads */
>> - for (i = 0; i < 2; i++)
>> - rte_eal_remote_launch(test_lpm_rcu_qsbr_writer,
>> - (void *)(uintptr_t)i,
>> - enabled_core_ids[i]);
>> -
>> - /* Wait for writer threads */
>> - for (i = 0; i < 2; i++)
>> - if (rte_eal_wait_lcore(enabled_core_ids[i]) < 0)
>> - goto error;
>> -
>> - printf("Total LPM Adds: %d\n", TOTAL_WRITES);
>> - printf("Total LPM Deletes: %d\n", TOTAL_WRITES);
>> - printf("Average LPM Add/Del: %"PRIu64" cycles\n",
>> - __atomic_load_n(&gwrite_cycles, __ATOMIC_RELAXED)
>> - / TOTAL_WRITES);
>> -
>> - writer_done = 1;
>> - /* Wait until all readers have exited */
>> - for (i = 2; i < num_cores; i++)
>> - rte_eal_wait_lcore(enabled_core_ids[i]);
>> -
>> - rte_lpm_free(lpm);
>> -
>> - return 0;
>> -
>> -error:
>> - writer_done = 1;
>> - /* Wait until all readers have exited */
>> - rte_eal_mp_wait_lcore();
>> -
>> - rte_lpm_free(lpm);
>> - rte_free(rv);
>> -
>> - return -1;
>> -}
>> -
>> -/*
>> - * Functional test:
>> - * Single writer, rest are readers
>> - */
>> -static int
>> -test_lpm_rcu_perf(void)
>> -{
>> - struct rte_lpm_config config;
>> - uint64_t begin, total_cycles;
>> - size_t sz;
>> - unsigned int i, j;
>> - uint16_t core_id;
>> - uint32_t next_hop_add = 0xAA;
>> - struct rte_lpm_rcu_config rcu_cfg = {0};
>> -
>> - if (rte_lcore_count() < 2) {
>> - printf("Not enough cores for lpm_rcu_perf_autotest,
>> expecting at least 2\n");
>> - return TEST_SKIPPED;
>> - }
>> -
>> - num_cores = 0;
>> - RTE_LCORE_FOREACH_WORKER(core_id) {
>> - enabled_core_ids[num_cores] = core_id;
>> - num_cores++;
>> - }
>> -
>> - printf("\nPerf test: 1 writer, %d readers, RCU integration enabled\n",
>> - num_cores);
>> -
>> - /* Create LPM table */
>> - config.max_rules = NUM_LDEPTH_ROUTE_ENTRIES;
>> - config.number_tbl8s = NUM_LDEPTH_ROUTE_ENTRIES;
>> - config.flags = 0;
>> - lpm = rte_lpm_create(__func__, SOCKET_ID_ANY, &config);
>> - TEST_LPM_ASSERT(lpm != NULL);
>> -
>> - /* Init RCU variable */
>> - sz = rte_rcu_qsbr_get_memsize(num_cores);
>> - rv = (struct rte_rcu_qsbr *)rte_zmalloc("rcu0", sz,
>> - RTE_CACHE_LINE_SIZE);
>> - rte_rcu_qsbr_init(rv, num_cores);
>> -
>> - rcu_cfg.v = rv;
>> - /* Assign the RCU variable to LPM */
>> - if (rte_lpm_rcu_qsbr_add(lpm, &rcu_cfg) != 0) {
>> - printf("RCU variable assignment failed\n");
>> - goto error;
>> - }
>> -
>> - writer_done = 0;
>> - __atomic_store_n(&thr_id, 0, __ATOMIC_SEQ_CST);
>> -
>> - /* Launch reader threads */
>> - for (i = 0; i < num_cores; i++)
>> - rte_eal_remote_launch(test_lpm_rcu_qsbr_reader, NULL,
>> - enabled_core_ids[i]);
>> -
>> - /* Measure add/delete. */
>> - begin = rte_rdtsc_precise();
>> - for (i = 0; i < RCU_ITERATIONS; i++) {
>> - /* Add all the entries */
>> - for (j = 0; j < NUM_LDEPTH_ROUTE_ENTRIES; j++)
>> - if (rte_lpm_add(lpm, large_ldepth_route_table[j].ip,
>> - large_ldepth_route_table[j].depth,
>> - next_hop_add) != 0) {
>> - printf("Failed to add iteration %d, route#
>> %d\n",
>> - i, j);
>> + for (j = 1; j < 3; j++) {
>> + if (use_rcu)
>> + printf("\nPerf test: %d writer(s), %d reader(s),"
>> + " RCU integration enabled\n", j, num_cores - j);
>> + else
>> + printf("\nPerf test: %d writer(s), %d reader(s),"
>> + " RCU integration disabled\n", j, num_cores - j);
>> +
>> + /* Create LPM table */
>> + config.max_rules = NUM_LDEPTH_ROUTE_ENTRIES;
>> + config.number_tbl8s = NUM_LDEPTH_ROUTE_ENTRIES;
>> + config.flags = 0;
>> + lpm = rte_lpm_create(__func__, SOCKET_ID_ANY, &config);
>> + TEST_LPM_ASSERT(lpm != NULL);
>> +
>> + /* Init RCU variable */
>> + if (use_rcu) {
>> + sz = rte_rcu_qsbr_get_memsize(num_cores);
>> + rv = (struct rte_rcu_qsbr *)rte_zmalloc("rcu0", sz,
>> +
>> RTE_CACHE_LINE_SIZE);
>> + rte_rcu_qsbr_init(rv, num_cores);
>> +
>> + rcu_cfg.v = rv;
>> + /* Assign the RCU variable to LPM */
>> + if (rte_lpm_rcu_qsbr_add(lpm, &rcu_cfg) != 0) {
>> + printf("RCU variable assignment failed\n");
>> goto error;
>> }
>>
>> - /* Delete all the entries */
>> - for (j = 0; j < NUM_LDEPTH_ROUTE_ENTRIES; j++)
>> - if (rte_lpm_delete(lpm,
>> large_ldepth_route_table[j].ip,
>> - large_ldepth_route_table[j].depth) != 0) {
>> - printf("Failed to delete iteration %d, route#
>> %d\n",
>> - i, j);
>> - goto error;
>> - }
>> - }
>> - total_cycles = rte_rdtsc_precise() - begin;
>> + reader_f = test_lpm_rcu_qsbr_reader;
>> + } else
>> + reader_f = test_lpm_reader;
>>
>> - printf("Total LPM Adds: %d\n", TOTAL_WRITES);
>> - printf("Total LPM Deletes: %d\n", TOTAL_WRITES);
>> - printf("Average LPM Add/Del: %g cycles\n",
>> - (double)total_cycles / TOTAL_WRITES);
>> + writer_done = 0;
>> + __atomic_store_n(&gwrite_cycles, 0, __ATOMIC_RELAXED);
>>
>> - writer_done = 1;
>> - /* Wait until all readers have exited */
>> - for (i = 0; i < num_cores; i++)
>> - if (rte_eal_wait_lcore(enabled_core_ids[i]);
>> -
>> - rte_lpm_free(lpm);
>> - rte_free(rv);
>> - lpm = NULL;
>> - rv = NULL;
>> -
>> - /* Test without RCU integration */
>> - printf("\nPerf test: 1 writer, %d readers, RCU integration
>> disabled\n",
>> - num_cores);
>> -
>> - /* Create LPM table */
>> - config.max_rules = NUM_LDEPTH_ROUTE_ENTRIES;
>> - config.number_tbl8s = NUM_LDEPTH_ROUTE_ENTRIES;
>> - config.flags = 0;
>> - lpm = rte_lpm_create(__func__, SOCKET_ID_ANY, &config);
>> - TEST_LPM_ASSERT(lpm != NULL);
>> + __atomic_store_n(&thr_id, 0, __ATOMIC_SEQ_CST);
>>
>> - writer_done = 0;
>> - __atomic_store_n(&thr_id, 0, __ATOMIC_SEQ_CST);
>> + /* Launch reader threads */
>> + for (i = j; i < num_cores; i++)
>> + rte_eal_remote_launch(reader_f, NULL,
>> + enabled_core_ids[i]);
>>
>> - /* Launch reader threads */
>> - for (i = 0; i < num_cores; i++)
>> - rte_eal_remote_launch(test_lpm_reader, NULL,
>> - enabled_core_ids[i]);
>> + /* Launch writer threads */
>> + for (i = 0; i < j; i++)
>> + rte_eal_remote_launch(test_lpm_rcu_qsbr_writer,
>> + (void *)(uintptr_t)(i + j),
> This can be just 'j'?
>
>> + enabled_core_ids[i]);
>>
>> - /* Measure add/delete. */
>> - begin = rte_rdtsc_precise();
>> - for (i = 0; i < RCU_ITERATIONS; i++) {
>> - /* Add all the entries */
>> - for (j = 0; j < NUM_LDEPTH_ROUTE_ENTRIES; j++)
>> - if (rte_lpm_add(lpm, large_ldepth_route_table[j].ip,
>> - large_ldepth_route_table[j].depth,
>> - next_hop_add) != 0) {
>> - printf("Failed to add iteration %d, route#
>> %d\n",
>> - i, j);
>> + /* Wait for writer threads */
>> + for (i = 0; i < j; i++)
>> + if (rte_eal_wait_lcore(enabled_core_ids[i]) < 0)
>> goto error;
>> - }
>>
>> - /* Delete all the entries */
>> - for (j = 0; j < NUM_LDEPTH_ROUTE_ENTRIES; j++)
>> - if (rte_lpm_delete(lpm,
>> large_ldepth_route_table[j].ip,
>> - large_ldepth_route_table[j].depth) != 0) {
>> - printf("Failed to delete iteration %d, route#
>> %d\n",
>> - i, j);
>> - goto error;
>> - }
>> + printf("Total LPM Adds: %d\n", TOTAL_WRITES);
>> + printf("Total LPM Deletes: %d\n", TOTAL_WRITES);
>> + printf("Average LPM Add/Del: %"PRIu64" cycles\n",
>> + __atomic_load_n(&gwrite_cycles,
>> __ATOMIC_RELAXED)
>> + / TOTAL_WRITES);
>> +
>> + writer_done = 1;
>> + /* Wait until all readers have exited */
>> + for (i = j; i < num_cores; i++)
>> + rte_eal_wait_lcore(enabled_core_ids[i]);
>> +
>> + rte_lpm_free(lpm);
>> + rte_free(rv);
>> + lpm = NULL;
>> + rv = NULL;
>> }
>> - total_cycles = rte_rdtsc_precise() - begin;
>> -
>> - printf("Total LPM Adds: %d\n", TOTAL_WRITES);
>> - printf("Total LPM Deletes: %d\n", TOTAL_WRITES);
>> - printf("Average LPM Add/Del: %g cycles\n",
>> - (double)total_cycles / TOTAL_WRITES);
>> -
>> - writer_done = 1;
>> - /* Wait until all readers have exited */
>> - for (i = 0; i < num_cores; i++)
>> - rte_eal_wait_lcore(enabled_core_ids[i]);
>> -
>> - rte_lpm_free(lpm);
>>
>> return 0;
>>
>> @@ -946,9 +767,8 @@ test_lpm_perf(void)
>> rte_lpm_delete_all(lpm);
>> rte_lpm_free(lpm);
>>
>> - test_lpm_rcu_perf();
>> -
>> - test_lpm_rcu_perf_multi_writer();
>> + test_lpm_rcu_perf_multi_writer(0);
>> + test_lpm_rcu_perf_multi_writer(1);
>>
>> return 0;
>> }
>> --
>> 2.17.1
<snip>
> >>
> >> Avoid code duplication by combining single and multi threaded tests
> >>
> >> Signed-off-by: Dharmik Thakkar <dharmik.thakkar@arm.com>
> >> Reviewed-by: Ruifeng Wang <ruifeng.wang@arm.com>
> >> ---
> >> app/test/test_lpm_perf.c | 362
> >> ++++++++++-----------------------------
> >> 1 file changed, 91 insertions(+), 271 deletions(-)
> >>
> >> diff --git a/app/test/test_lpm_perf.c b/app/test/test_lpm_perf.c
> >> index
> >> 224c92fa3d65..229c835c23f7 100644
> >> --- a/app/test/test_lpm_perf.c
> >> +++ b/app/test/test_lpm_perf.c
> >> @@ -67,6 +67,12 @@ enum {
> >> IP_CLASS_C
> >> };
> >>
> >> +enum {
> >> +SINGLE_WRITER = 1,
> >> +MULTI_WRITER_1,
> >> +MULTI_WRITER_2
> >> +};
> > Do we need this? Can we use the number of cores instead?
> >
>
> There are 3 combinations of writes (adds/deletes):
> 1. Write all the entries - in case of a single writer 2. Write half of the entries -
> in case of multiple writers 3. Write remaining half of the entries - in case of
> multiple writers
>
> So, I think this is required.
IMO, this is not scalable. Essentially, we need 2 parameters to divide the routes among each writer thread. We need 2 parameters, 1) total number of writers 2) the core ID in the linear space.
Creating a structure with these 2 and passing that to the writer thread would be better and scalable.
>
> >> +
> >> /* struct route_rule_count defines the total number of rules in
> >> following a/b/c
> >> * each item in a[]/b[]/c[] is the number of common IP address class
> >> A/B/C, not
> >> * including the ones for private local network.
> >> @@ -430,11 +436,16 @@ test_lpm_rcu_qsbr_writer(void *arg) {
> unsigned
> >> int i, j, si, ei; uint64_t begin, total_cycles; -uint8_t core_id =
> >> (uint8_t)((uintptr_t)arg);
> >> +uint8_t writer_id = (uint8_t)((uintptr_t)arg);
> >> uint32_t next_hop_add = 0xAA;
> >>
> >> -/* 2 writer threads are used */
> >> -if (core_id % 2 == 0) {
> >> +/* Single writer (writer_id = 1) */
> >> +if (writer_id == SINGLE_WRITER) {
> >> +si = 0;
> >> +ei = NUM_LDEPTH_ROUTE_ENTRIES;
> >> +}
> >> +/* 2 Writers (writer_id = 2/3)*/
> >> +else if (writer_id == MULTI_WRITER_1) {
> >> si = 0;
> >> ei = NUM_LDEPTH_ROUTE_ENTRIES / 2;
> >> } else {
> >> @@ -447,29 +458,35 @@ test_lpm_rcu_qsbr_writer(void *arg) for (i = 0;
> >> i < RCU_ITERATIONS; i++) {
> >> /* Add all the entries */
> >> for (j = si; j < ei; j++) {
> >> -pthread_mutex_lock(&lpm_mutex);
> >> +if (writer_id != SINGLE_WRITER)
> >> +pthread_mutex_lock(&lpm_mutex);
> >> if (rte_lpm_add(lpm, large_ldepth_route_table[j].ip,
> >> large_ldepth_route_table[j].depth,
> >> next_hop_add) != 0) {
> >> printf("Failed to add iteration %d, route# %d\n", i, j);
> >> -pthread_mutex_unlock(&lpm_mutex);
> >> +if (writer_id != SINGLE_WRITER)
> >> +
> >> pthread_mutex_unlock(&lpm_mutex);
> >> return -1;
> >> }
> >> -pthread_mutex_unlock(&lpm_mutex);
> >> +if (writer_id != SINGLE_WRITER)
> >> +pthread_mutex_unlock(&lpm_mutex);
> >> }
> >>
> >> /* Delete all the entries */
> >> for (j = si; j < ei; j++) {
> >> -pthread_mutex_lock(&lpm_mutex);
> >> +if (writer_id != SINGLE_WRITER)
> >> +pthread_mutex_lock(&lpm_mutex);
> >> if (rte_lpm_delete(lpm,
> >> large_ldepth_route_table[j].ip,
> >> large_ldepth_route_table[j].depth) != 0) { printf("Failed to delete
> >> iteration %d, route# %d\n", i, j); -pthread_mutex_unlock(&lpm_mutex);
> >> +if (writer_id != SINGLE_WRITER)
> >> +
> >> pthread_mutex_unlock(&lpm_mutex);
> >> return -1;
> >> }
> >> -pthread_mutex_unlock(&lpm_mutex);
> >> +if (writer_id != SINGLE_WRITER)
> >> +pthread_mutex_unlock(&lpm_mutex);
> >> }
> >> }
> >>
> >> @@ -482,16 +499,17 @@ test_lpm_rcu_qsbr_writer(void *arg)
> >>
> >> /*
> >> * Functional test:
> >> - * 2 writers, rest are readers
> >> + * 1/2 writers, rest are readers
> >> */
> >> static int
> >> -test_lpm_rcu_perf_multi_writer(void)
> >> +test_lpm_rcu_perf_multi_writer(uint8_t use_rcu)
> >> {
> >> struct rte_lpm_config config;
> >> size_t sz;
> >> -unsigned int i;
> >> +unsigned int i, j;
> >> uint16_t core_id;
> >> struct rte_lpm_rcu_config rcu_cfg = {0};
> >> +int (*reader_f)(void *arg) = NULL;
> >>
> >> if (rte_lcore_count() < 3) {
> >> printf("Not enough cores for lpm_rcu_perf_autotest, expecting at
> >> least 3\n"); @@ -504,273 +522,76 @@
> >> test_lpm_rcu_perf_multi_writer(void)
> >> num_cores++;
> >> }
> >>
> >> -printf("\nPerf test: 2 writers, %d readers, RCU integration
> >> enabled\n", -num_cores - 2);
> >> -
> >> -/* Create LPM table */
> >> -config.max_rules = NUM_LDEPTH_ROUTE_ENTRIES; -
> config.number_tbl8s =
> >> NUM_LDEPTH_ROUTE_ENTRIES; -config.flags = 0; -lpm =
> >> rte_lpm_create(__func__, SOCKET_ID_ANY, &config);
> >> -TEST_LPM_ASSERT(lpm != NULL);
> >> -
> >> -/* Init RCU variable */
> >> -sz = rte_rcu_qsbr_get_memsize(num_cores);
> >> -rv = (struct rte_rcu_qsbr *)rte_zmalloc("rcu0", sz,
> >> -RTE_CACHE_LINE_SIZE); -rte_rcu_qsbr_init(rv, num_cores);
> >> -
> >> -rcu_cfg.v = rv;
> >> -/* Assign the RCU variable to LPM */ -if (rte_lpm_rcu_qsbr_add(lpm,
> >> &rcu_cfg) != 0) { -printf("RCU variable assignment failed\n"); -goto
> >> error; -}
> >> -
> >> -writer_done = 0;
> >> -__atomic_store_n(&gwrite_cycles, 0, __ATOMIC_RELAXED);
> >> -
> >> -__atomic_store_n(&thr_id, 0, __ATOMIC_SEQ_CST);
> >> -
> >> -/* Launch reader threads */
> >> -for (i = 2; i < num_cores; i++)
> >> -rte_eal_remote_launch(test_lpm_rcu_qsbr_reader, NULL,
> >> -enabled_core_ids[i]);
> >> -
> >> -/* Launch writer threads */
> >> -for (i = 0; i < 2; i++)
> >> -rte_eal_remote_launch(test_lpm_rcu_qsbr_writer,
> >> -(void *)(uintptr_t)i,
> >> -enabled_core_ids[i]);
> >> -
> >> -/* Wait for writer threads */
> >> -for (i = 0; i < 2; i++)
> >> -if (rte_eal_wait_lcore(enabled_core_ids[i]) < 0) -goto error;
> >> -
> >> -printf("Total LPM Adds: %d\n", TOTAL_WRITES); -printf("Total LPM
> >> Deletes: %d\n", TOTAL_WRITES); -printf("Average LPM Add/Del:
> >> %"PRIu64" cycles\n", -__atomic_load_n(&gwrite_cycles,
> >> __ATOMIC_RELAXED) -/ TOTAL_WRITES);
> >> -
> >> -writer_done = 1;
> >> -/* Wait until all readers have exited */ -for (i = 2; i < num_cores;
> >> i++) -rte_eal_wait_lcore(enabled_core_ids[i]);
> >> -
> >> -rte_lpm_free(lpm);
> >> -rte_free(rv);
> >> -lpm = NULL;
> >> -rv = NULL;
> >> -
> >> -/* Test without RCU integration */
> >> -printf("\nPerf test: 2 writers, %d readers, RCU integration
> >> disabled\n", -num_cores - 2);
> >> -
> >> -/* Create LPM table */
> >> -config.max_rules = NUM_LDEPTH_ROUTE_ENTRIES; -
> config.number_tbl8s =
> >> NUM_LDEPTH_ROUTE_ENTRIES; -config.flags = 0; -lpm =
> >> rte_lpm_create(__func__, SOCKET_ID_ANY, &config);
> >> -TEST_LPM_ASSERT(lpm != NULL);
> >> -
> >> -writer_done = 0;
> >> -__atomic_store_n(&gwrite_cycles, 0, __ATOMIC_RELAXED);
> >> -__atomic_store_n(&thr_id, 0, __ATOMIC_SEQ_CST);
> >> -
> >> -/* Launch reader threads */
> >> -for (i = 2; i < num_cores; i++)
> >> -rte_eal_remote_launch(test_lpm_reader, NULL, -enabled_core_ids[i]);
> >> -
> >> -/* Launch writer threads */
> >> -for (i = 0; i < 2; i++)
> >> -rte_eal_remote_launch(test_lpm_rcu_qsbr_writer,
> >> -(void *)(uintptr_t)i,
> >> -enabled_core_ids[i]);
> >> -
> >> -/* Wait for writer threads */
> >> -for (i = 0; i < 2; i++)
> >> -if (rte_eal_wait_lcore(enabled_core_ids[i]) < 0) -goto error;
> >> -
> >> -printf("Total LPM Adds: %d\n", TOTAL_WRITES); -printf("Total LPM
> >> Deletes: %d\n", TOTAL_WRITES); -printf("Average LPM Add/Del:
> >> %"PRIu64" cycles\n", -__atomic_load_n(&gwrite_cycles,
> >> __ATOMIC_RELAXED) -/ TOTAL_WRITES);
> >> -
> >> -writer_done = 1;
> >> -/* Wait until all readers have exited */ -for (i = 2; i < num_cores;
> >> i++) -rte_eal_wait_lcore(enabled_core_ids[i]);
> >> -
> >> -rte_lpm_free(lpm);
> >> -
> >> -return 0;
> >> -
> >> -error:
> >> -writer_done = 1;
> >> -/* Wait until all readers have exited */ -rte_eal_mp_wait_lcore();
> >> -
> >> -rte_lpm_free(lpm);
> >> -rte_free(rv);
> >> -
> >> -return -1;
> >> -}
> >> -
> >> -/*
> >> - * Functional test:
> >> - * Single writer, rest are readers
> >> - */
> >> -static int
> >> -test_lpm_rcu_perf(void)
> >> -{
> >> -struct rte_lpm_config config;
> >> -uint64_t begin, total_cycles;
> >> -size_t sz;
> >> -unsigned int i, j;
> >> -uint16_t core_id;
> >> -uint32_t next_hop_add = 0xAA;
> >> -struct rte_lpm_rcu_config rcu_cfg = {0};
> >> -
> >> -if (rte_lcore_count() < 2) {
> >> -printf("Not enough cores for lpm_rcu_perf_autotest, expecting at
> >> least 2\n"); -return TEST_SKIPPED; -}
> >> -
> >> -num_cores = 0;
> >> -RTE_LCORE_FOREACH_WORKER(core_id) {
> >> -enabled_core_ids[num_cores] = core_id; -num_cores++; -}
> >> -
> >> -printf("\nPerf test: 1 writer, %d readers, RCU integration
> >> enabled\n", -num_cores);
> >> -
> >> -/* Create LPM table */
> >> -config.max_rules = NUM_LDEPTH_ROUTE_ENTRIES; -
> config.number_tbl8s =
> >> NUM_LDEPTH_ROUTE_ENTRIES; -config.flags = 0; -lpm =
> >> rte_lpm_create(__func__, SOCKET_ID_ANY, &config);
> >> -TEST_LPM_ASSERT(lpm != NULL);
> >> -
> >> -/* Init RCU variable */
> >> -sz = rte_rcu_qsbr_get_memsize(num_cores);
> >> -rv = (struct rte_rcu_qsbr *)rte_zmalloc("rcu0", sz,
> >> -RTE_CACHE_LINE_SIZE); -rte_rcu_qsbr_init(rv, num_cores);
> >> -
> >> -rcu_cfg.v = rv;
> >> -/* Assign the RCU variable to LPM */ -if (rte_lpm_rcu_qsbr_add(lpm,
> >> &rcu_cfg) != 0) { -printf("RCU variable assignment failed\n"); -goto
> >> error; -}
> >> -
> >> -writer_done = 0;
> >> -__atomic_store_n(&thr_id, 0, __ATOMIC_SEQ_CST);
> >> -
> >> -/* Launch reader threads */
> >> -for (i = 0; i < num_cores; i++)
> >> -rte_eal_remote_launch(test_lpm_rcu_qsbr_reader, NULL,
> >> -enabled_core_ids[i]);
> >> -
> >> -/* Measure add/delete. */
> >> -begin = rte_rdtsc_precise();
> >> -for (i = 0; i < RCU_ITERATIONS; i++) {
> >> -/* Add all the entries */
> >> -for (j = 0; j < NUM_LDEPTH_ROUTE_ENTRIES; j++) -if (rte_lpm_add(lpm,
> >> large_ldepth_route_table[j].ip, -large_ldepth_route_table[j].depth,
> >> -next_hop_add) != 0) {
> >> -printf("Failed to add iteration %d, route# %d\n", -i, j);
> >> +for (j = 1; j < 3; j++) {
> >> +if (use_rcu)
> >> +printf("\nPerf test: %d writer(s), %d reader(s),"
> >> + " RCU integration enabled\n", j, num_cores - j); else
> >> +printf("\nPerf test: %d writer(s), %d reader(s),"
> >> + " RCU integration disabled\n", j, num_cores - j);
> >> +
> >> +/* Create LPM table */
> >> +config.max_rules = NUM_LDEPTH_ROUTE_ENTRIES;
> config.number_tbl8s =
> >> +NUM_LDEPTH_ROUTE_ENTRIES; config.flags = 0; lpm =
> >> +rte_lpm_create(__func__, SOCKET_ID_ANY, &config);
> >> +TEST_LPM_ASSERT(lpm != NULL);
> >> +
> >> +/* Init RCU variable */
> >> +if (use_rcu) {
> >> +sz = rte_rcu_qsbr_get_memsize(num_cores);
> >> +rv = (struct rte_rcu_qsbr *)rte_zmalloc("rcu0", sz,
> >> +
> >> RTE_CACHE_LINE_SIZE);
> >> +rte_rcu_qsbr_init(rv, num_cores);
> >> +
> >> +rcu_cfg.v = rv;
> >> +/* Assign the RCU variable to LPM */ if (rte_lpm_rcu_qsbr_add(lpm,
> >> +&rcu_cfg) != 0) { printf("RCU variable assignment failed\n");
> >> goto error;
> >> }
> >>
> >> -/* Delete all the entries */
> >> -for (j = 0; j < NUM_LDEPTH_ROUTE_ENTRIES; j++) -if
> >> (rte_lpm_delete(lpm, large_ldepth_route_table[j].ip,
> >> -large_ldepth_route_table[j].depth) != 0) { -printf("Failed to delete
> >> iteration %d, route# %d\n", -i, j); -goto error; -} -} -total_cycles
> >> = rte_rdtsc_precise() - begin;
> >> +reader_f = test_lpm_rcu_qsbr_reader; } else reader_f =
> >> +test_lpm_reader;
> >>
> >> -printf("Total LPM Adds: %d\n", TOTAL_WRITES); -printf("Total LPM
> >> Deletes: %d\n", TOTAL_WRITES); -printf("Average LPM Add/Del: %g
> >> cycles\n", -(double)total_cycles / TOTAL_WRITES);
> >> +writer_done = 0;
> >> +__atomic_store_n(&gwrite_cycles, 0, __ATOMIC_RELAXED);
> >>
> >> -writer_done = 1;
> >> -/* Wait until all readers have exited */ -for (i = 0; i < num_cores;
> >> i++) -if (rte_eal_wait_lcore(enabled_core_ids[i]);
> >> -
> >> -rte_lpm_free(lpm);
> >> -rte_free(rv);
> >> -lpm = NULL;
> >> -rv = NULL;
> >> -
> >> -/* Test without RCU integration */
> >> -printf("\nPerf test: 1 writer, %d readers, RCU integration
> >> disabled\n", -num_cores);
> >> -
> >> -/* Create LPM table */
> >> -config.max_rules = NUM_LDEPTH_ROUTE_ENTRIES; -
> config.number_tbl8s =
> >> NUM_LDEPTH_ROUTE_ENTRIES; -config.flags = 0; -lpm =
> >> rte_lpm_create(__func__, SOCKET_ID_ANY, &config);
> >> -TEST_LPM_ASSERT(lpm != NULL);
> >> +__atomic_store_n(&thr_id, 0, __ATOMIC_SEQ_CST);
> >>
> >> -writer_done = 0;
> >> -__atomic_store_n(&thr_id, 0, __ATOMIC_SEQ_CST);
> >> +/* Launch reader threads */
> >> +for (i = j; i < num_cores; i++)
> >> +rte_eal_remote_launch(reader_f, NULL,
> >> +enabled_core_ids[i]);
> >>
> >> -/* Launch reader threads */
> >> -for (i = 0; i < num_cores; i++)
> >> -rte_eal_remote_launch(test_lpm_reader, NULL,
> >> -enabled_core_ids[i]);
> >> +/* Launch writer threads */
> >> +for (i = 0; i < j; i++)
> >> +rte_eal_remote_launch(test_lpm_rcu_qsbr_writer,
> >> +(void *)(uintptr_t)(i + j),
> > This can be just 'j'?
> >
> >> +enabled_core_ids[i]);
> >>
> >> -/* Measure add/delete. */
> >> -begin = rte_rdtsc_precise();
> >> -for (i = 0; i < RCU_ITERATIONS; i++) {
> >> -/* Add all the entries */
> >> -for (j = 0; j < NUM_LDEPTH_ROUTE_ENTRIES; j++)
> >> -if (rte_lpm_add(lpm, large_ldepth_route_table[j].ip,
> >> -large_ldepth_route_table[j].depth,
> >> -next_hop_add) != 0) {
> >> -printf("Failed to add iteration %d, route#
> >> %d\n",
> >> -i, j);
> >> +/* Wait for writer threads */
> >> +for (i = 0; i < j; i++)
> >> +if (rte_eal_wait_lcore(enabled_core_ids[i]) < 0)
> >> goto error;
> >> -}
> >>
> >> -/* Delete all the entries */
> >> -for (j = 0; j < NUM_LDEPTH_ROUTE_ENTRIES; j++)
> >> -if (rte_lpm_delete(lpm,
> >> large_ldepth_route_table[j].ip,
> >> -large_ldepth_route_table[j].depth) != 0) {
> >> -printf("Failed to delete iteration %d, route#
> >> %d\n",
> >> -i, j);
> >> -goto error;
> >> -}
> >> +printf("Total LPM Adds: %d\n", TOTAL_WRITES);
> >> +printf("Total LPM Deletes: %d\n", TOTAL_WRITES);
> >> +printf("Average LPM Add/Del: %"PRIu64" cycles\n",
> >> +__atomic_load_n(&gwrite_cycles,
> >> __ATOMIC_RELAXED)
> >> +/ TOTAL_WRITES);
> >> +
> >> +writer_done = 1;
> >> +/* Wait until all readers have exited */
> >> +for (i = j; i < num_cores; i++)
> >> +rte_eal_wait_lcore(enabled_core_ids[i]);
> >> +
> >> +rte_lpm_free(lpm);
> >> +rte_free(rv);
> >> +lpm = NULL;
> >> +rv = NULL;
> >> }
> >> -total_cycles = rte_rdtsc_precise() - begin;
> >> -
> >> -printf("Total LPM Adds: %d\n", TOTAL_WRITES);
> >> -printf("Total LPM Deletes: %d\n", TOTAL_WRITES);
> >> -printf("Average LPM Add/Del: %g cycles\n",
> >> -(double)total_cycles / TOTAL_WRITES);
> >> -
> >> -writer_done = 1;
> >> -/* Wait until all readers have exited */
> >> -for (i = 0; i < num_cores; i++)
> >> -rte_eal_wait_lcore(enabled_core_ids[i]);
> >> -
> >> -rte_lpm_free(lpm);
> >>
> >> return 0;
> >>
> >> @@ -946,9 +767,8 @@ test_lpm_perf(void)
> >> rte_lpm_delete_all(lpm);
> >> rte_lpm_free(lpm);
> >>
> >> -test_lpm_rcu_perf();
> >> -
> >> -test_lpm_rcu_perf_multi_writer();
> >> +test_lpm_rcu_perf_multi_writer(0);
> >> +test_lpm_rcu_perf_multi_writer(1);
> >>
> >> return 0;
> >> }
> >> --
> >> 2.17.1
>
> On Nov 2, 2020, at 11:32 PM, Honnappa Nagarahalli <Honnappa.Nagarahalli@arm.com> wrote:
>
> <snip>
>
>>>>
>>>> Avoid code duplication by combining single and multi threaded tests
>>>>
>>>> Signed-off-by: Dharmik Thakkar <dharmik.thakkar@arm.com>
>>>> Reviewed-by: Ruifeng Wang <ruifeng.wang@arm.com>
>>>> ---
>>>> app/test/test_lpm_perf.c | 362
>>>> ++++++++++-----------------------------
>>>> 1 file changed, 91 insertions(+), 271 deletions(-)
>>>>
>>>> diff --git a/app/test/test_lpm_perf.c b/app/test/test_lpm_perf.c
>>>> index
>>>> 224c92fa3d65..229c835c23f7 100644
>>>> --- a/app/test/test_lpm_perf.c
>>>> +++ b/app/test/test_lpm_perf.c
>>>> @@ -67,6 +67,12 @@ enum {
>>>> IP_CLASS_C
>>>> };
>>>>
>>>> +enum {
>>>> +SINGLE_WRITER = 1,
>>>> +MULTI_WRITER_1,
>>>> +MULTI_WRITER_2
>>>> +};
>>> Do we need this? Can we use the number of cores instead?
>>>
>>
>> There are 3 combinations of writes (adds/deletes):
>> 1. Write all the entries - in case of a single writer 2. Write half of the entries -
>> in case of multiple writers 3. Write remaining half of the entries - in case of
>> multiple writers
>>
>> So, I think this is required.
> IMO, this is not scalable. Essentially, we need 2 parameters to divide the routes among each writer thread. We need 2 parameters, 1) total number of writers 2) the core ID in the linear space.
> Creating a structure with these 2 and passing that to the writer thread would be better and scalable.
Yes, agreed this is only applicable for 2 writers. Currently, the multi writer test is only limited to a maximum of 2 writers.
To support more number of writers, we need something like this (which I believe is in lines with your suggestion):
1. Calculate what each writer will write: single_insert = TOTAL_WRITES / num_writers
2. Pass core ID in linear space as an argument to the writer function: pos_core
3. Calculate si and ei in the writer function: si = pos_core * single_insert; ei = si + single_insert
I can update the patch to enable more than 2 writers.
Do you also suggest we expand the scope of the test to test with more than 2 writers?
This will increase the time for which the test is running (which currently is significant even with 2 writers).
>
>>
>>>> +
>>>> /* struct route_rule_count defines the total number of rules in
>>>> following a/b/c
>>>> * each item in a[]/b[]/c[] is the number of common IP address class
>>>> A/B/C, not
>>>> * including the ones for private local network.
>>>> @@ -430,11 +436,16 @@ test_lpm_rcu_qsbr_writer(void *arg) {
>> unsigned
>>>> int i, j, si, ei; uint64_t begin, total_cycles; -uint8_t core_id =
>>>> (uint8_t)((uintptr_t)arg);
>>>> +uint8_t writer_id = (uint8_t)((uintptr_t)arg);
>>>> uint32_t next_hop_add = 0xAA;
>>>>
>>>> -/* 2 writer threads are used */
>>>> -if (core_id % 2 == 0) {
>>>> +/* Single writer (writer_id = 1) */
>>>> +if (writer_id == SINGLE_WRITER) {
>>>> +si = 0;
>>>> +ei = NUM_LDEPTH_ROUTE_ENTRIES;
>>>> +}
>>>> +/* 2 Writers (writer_id = 2/3)*/
>>>> +else if (writer_id == MULTI_WRITER_1) {
>>>> si = 0;
>>>> ei = NUM_LDEPTH_ROUTE_ENTRIES / 2;
>>>> } else {
>>>> @@ -447,29 +458,35 @@ test_lpm_rcu_qsbr_writer(void *arg) for (i = 0;
>>>> i < RCU_ITERATIONS; i++) {
>>>> /* Add all the entries */
>>>> for (j = si; j < ei; j++) {
>>>> -pthread_mutex_lock(&lpm_mutex);
>>>> +if (writer_id != SINGLE_WRITER)
>>>> +pthread_mutex_lock(&lpm_mutex);
>>>> if (rte_lpm_add(lpm, large_ldepth_route_table[j].ip,
>>>> large_ldepth_route_table[j].depth,
>>>> next_hop_add) != 0) {
>>>> printf("Failed to add iteration %d, route# %d\n", i, j);
>>>> -pthread_mutex_unlock(&lpm_mutex);
>>>> +if (writer_id != SINGLE_WRITER)
>>>> +
>>>> pthread_mutex_unlock(&lpm_mutex);
>>>> return -1;
>>>> }
>>>> -pthread_mutex_unlock(&lpm_mutex);
>>>> +if (writer_id != SINGLE_WRITER)
>>>> +pthread_mutex_unlock(&lpm_mutex);
>>>> }
>>>>
>>>> /* Delete all the entries */
>>>> for (j = si; j < ei; j++) {
>>>> -pthread_mutex_lock(&lpm_mutex);
>>>> +if (writer_id != SINGLE_WRITER)
>>>> +pthread_mutex_lock(&lpm_mutex);
>>>> if (rte_lpm_delete(lpm,
>>>> large_ldepth_route_table[j].ip,
>>>> large_ldepth_route_table[j].depth) != 0) { printf("Failed to delete
>>>> iteration %d, route# %d\n", i, j); -pthread_mutex_unlock(&lpm_mutex);
>>>> +if (writer_id != SINGLE_WRITER)
>>>> +
>>>> pthread_mutex_unlock(&lpm_mutex);
>>>> return -1;
>>>> }
>>>> -pthread_mutex_unlock(&lpm_mutex);
>>>> +if (writer_id != SINGLE_WRITER)
>>>> +pthread_mutex_unlock(&lpm_mutex);
>>>> }
>>>> }
>>>>
>>>> @@ -482,16 +499,17 @@ test_lpm_rcu_qsbr_writer(void *arg)
>>>>
>>>> /*
>>>> * Functional test:
>>>> - * 2 writers, rest are readers
>>>> + * 1/2 writers, rest are readers
>>>> */
>>>> static int
>>>> -test_lpm_rcu_perf_multi_writer(void)
>>>> +test_lpm_rcu_perf_multi_writer(uint8_t use_rcu)
>>>> {
>>>> struct rte_lpm_config config;
>>>> size_t sz;
>>>> -unsigned int i;
>>>> +unsigned int i, j;
>>>> uint16_t core_id;
>>>> struct rte_lpm_rcu_config rcu_cfg = {0};
>>>> +int (*reader_f)(void *arg) = NULL;
>>>>
>>>> if (rte_lcore_count() < 3) {
>>>> printf("Not enough cores for lpm_rcu_perf_autotest, expecting at
>>>> least 3\n"); @@ -504,273 +522,76 @@
>>>> test_lpm_rcu_perf_multi_writer(void)
>>>> num_cores++;
>>>> }
>>>>
>>>> -printf("\nPerf test: 2 writers, %d readers, RCU integration
>>>> enabled\n", -num_cores - 2);
>>>> -
>>>> -/* Create LPM table */
>>>> -config.max_rules = NUM_LDEPTH_ROUTE_ENTRIES; -
>> config.number_tbl8s =
>>>> NUM_LDEPTH_ROUTE_ENTRIES; -config.flags = 0; -lpm =
>>>> rte_lpm_create(__func__, SOCKET_ID_ANY, &config);
>>>> -TEST_LPM_ASSERT(lpm != NULL);
>>>> -
>>>> -/* Init RCU variable */
>>>> -sz = rte_rcu_qsbr_get_memsize(num_cores);
>>>> -rv = (struct rte_rcu_qsbr *)rte_zmalloc("rcu0", sz,
>>>> -RTE_CACHE_LINE_SIZE); -rte_rcu_qsbr_init(rv, num_cores);
>>>> -
>>>> -rcu_cfg.v = rv;
>>>> -/* Assign the RCU variable to LPM */ -if (rte_lpm_rcu_qsbr_add(lpm,
>>>> &rcu_cfg) != 0) { -printf("RCU variable assignment failed\n"); -goto
>>>> error; -}
>>>> -
>>>> -writer_done = 0;
>>>> -__atomic_store_n(&gwrite_cycles, 0, __ATOMIC_RELAXED);
>>>> -
>>>> -__atomic_store_n(&thr_id, 0, __ATOMIC_SEQ_CST);
>>>> -
>>>> -/* Launch reader threads */
>>>> -for (i = 2; i < num_cores; i++)
>>>> -rte_eal_remote_launch(test_lpm_rcu_qsbr_reader, NULL,
>>>> -enabled_core_ids[i]);
>>>> -
>>>> -/* Launch writer threads */
>>>> -for (i = 0; i < 2; i++)
>>>> -rte_eal_remote_launch(test_lpm_rcu_qsbr_writer,
>>>> -(void *)(uintptr_t)i,
>>>> -enabled_core_ids[i]);
>>>> -
>>>> -/* Wait for writer threads */
>>>> -for (i = 0; i < 2; i++)
>>>> -if (rte_eal_wait_lcore(enabled_core_ids[i]) < 0) -goto error;
>>>> -
>>>> -printf("Total LPM Adds: %d\n", TOTAL_WRITES); -printf("Total LPM
>>>> Deletes: %d\n", TOTAL_WRITES); -printf("Average LPM Add/Del:
>>>> %"PRIu64" cycles\n", -__atomic_load_n(&gwrite_cycles,
>>>> __ATOMIC_RELAXED) -/ TOTAL_WRITES);
>>>> -
>>>> -writer_done = 1;
>>>> -/* Wait until all readers have exited */ -for (i = 2; i < num_cores;
>>>> i++) -rte_eal_wait_lcore(enabled_core_ids[i]);
>>>> -
>>>> -rte_lpm_free(lpm);
>>>> -rte_free(rv);
>>>> -lpm = NULL;
>>>> -rv = NULL;
>>>> -
>>>> -/* Test without RCU integration */
>>>> -printf("\nPerf test: 2 writers, %d readers, RCU integration
>>>> disabled\n", -num_cores - 2);
>>>> -
>>>> -/* Create LPM table */
>>>> -config.max_rules = NUM_LDEPTH_ROUTE_ENTRIES; -
>> config.number_tbl8s =
>>>> NUM_LDEPTH_ROUTE_ENTRIES; -config.flags = 0; -lpm =
>>>> rte_lpm_create(__func__, SOCKET_ID_ANY, &config);
>>>> -TEST_LPM_ASSERT(lpm != NULL);
>>>> -
>>>> -writer_done = 0;
>>>> -__atomic_store_n(&gwrite_cycles, 0, __ATOMIC_RELAXED);
>>>> -__atomic_store_n(&thr_id, 0, __ATOMIC_SEQ_CST);
>>>> -
>>>> -/* Launch reader threads */
>>>> -for (i = 2; i < num_cores; i++)
>>>> -rte_eal_remote_launch(test_lpm_reader, NULL, -enabled_core_ids[i]);
>>>> -
>>>> -/* Launch writer threads */
>>>> -for (i = 0; i < 2; i++)
>>>> -rte_eal_remote_launch(test_lpm_rcu_qsbr_writer,
>>>> -(void *)(uintptr_t)i,
>>>> -enabled_core_ids[i]);
>>>> -
>>>> -/* Wait for writer threads */
>>>> -for (i = 0; i < 2; i++)
>>>> -if (rte_eal_wait_lcore(enabled_core_ids[i]) < 0) -goto error;
>>>> -
>>>> -printf("Total LPM Adds: %d\n", TOTAL_WRITES); -printf("Total LPM
>>>> Deletes: %d\n", TOTAL_WRITES); -printf("Average LPM Add/Del:
>>>> %"PRIu64" cycles\n", -__atomic_load_n(&gwrite_cycles,
>>>> __ATOMIC_RELAXED) -/ TOTAL_WRITES);
>>>> -
>>>> -writer_done = 1;
>>>> -/* Wait until all readers have exited */ -for (i = 2; i < num_cores;
>>>> i++) -rte_eal_wait_lcore(enabled_core_ids[i]);
>>>> -
>>>> -rte_lpm_free(lpm);
>>>> -
>>>> -return 0;
>>>> -
>>>> -error:
>>>> -writer_done = 1;
>>>> -/* Wait until all readers have exited */ -rte_eal_mp_wait_lcore();
>>>> -
>>>> -rte_lpm_free(lpm);
>>>> -rte_free(rv);
>>>> -
>>>> -return -1;
>>>> -}
>>>> -
>>>> -/*
>>>> - * Functional test:
>>>> - * Single writer, rest are readers
>>>> - */
>>>> -static int
>>>> -test_lpm_rcu_perf(void)
>>>> -{
>>>> -struct rte_lpm_config config;
>>>> -uint64_t begin, total_cycles;
>>>> -size_t sz;
>>>> -unsigned int i, j;
>>>> -uint16_t core_id;
>>>> -uint32_t next_hop_add = 0xAA;
>>>> -struct rte_lpm_rcu_config rcu_cfg = {0};
>>>> -
>>>> -if (rte_lcore_count() < 2) {
>>>> -printf("Not enough cores for lpm_rcu_perf_autotest, expecting at
>>>> least 2\n"); -return TEST_SKIPPED; -}
>>>> -
>>>> -num_cores = 0;
>>>> -RTE_LCORE_FOREACH_WORKER(core_id) {
>>>> -enabled_core_ids[num_cores] = core_id; -num_cores++; -}
>>>> -
>>>> -printf("\nPerf test: 1 writer, %d readers, RCU integration
>>>> enabled\n", -num_cores);
>>>> -
>>>> -/* Create LPM table */
>>>> -config.max_rules = NUM_LDEPTH_ROUTE_ENTRIES; -
>> config.number_tbl8s =
>>>> NUM_LDEPTH_ROUTE_ENTRIES; -config.flags = 0; -lpm =
>>>> rte_lpm_create(__func__, SOCKET_ID_ANY, &config);
>>>> -TEST_LPM_ASSERT(lpm != NULL);
>>>> -
>>>> -/* Init RCU variable */
>>>> -sz = rte_rcu_qsbr_get_memsize(num_cores);
>>>> -rv = (struct rte_rcu_qsbr *)rte_zmalloc("rcu0", sz,
>>>> -RTE_CACHE_LINE_SIZE); -rte_rcu_qsbr_init(rv, num_cores);
>>>> -
>>>> -rcu_cfg.v = rv;
>>>> -/* Assign the RCU variable to LPM */ -if (rte_lpm_rcu_qsbr_add(lpm,
>>>> &rcu_cfg) != 0) { -printf("RCU variable assignment failed\n"); -goto
>>>> error; -}
>>>> -
>>>> -writer_done = 0;
>>>> -__atomic_store_n(&thr_id, 0, __ATOMIC_SEQ_CST);
>>>> -
>>>> -/* Launch reader threads */
>>>> -for (i = 0; i < num_cores; i++)
>>>> -rte_eal_remote_launch(test_lpm_rcu_qsbr_reader, NULL,
>>>> -enabled_core_ids[i]);
>>>> -
>>>> -/* Measure add/delete. */
>>>> -begin = rte_rdtsc_precise();
>>>> -for (i = 0; i < RCU_ITERATIONS; i++) {
>>>> -/* Add all the entries */
>>>> -for (j = 0; j < NUM_LDEPTH_ROUTE_ENTRIES; j++) -if (rte_lpm_add(lpm,
>>>> large_ldepth_route_table[j].ip, -large_ldepth_route_table[j].depth,
>>>> -next_hop_add) != 0) {
>>>> -printf("Failed to add iteration %d, route# %d\n", -i, j);
>>>> +for (j = 1; j < 3; j++) {
>>>> +if (use_rcu)
>>>> +printf("\nPerf test: %d writer(s), %d reader(s),"
>>>> + " RCU integration enabled\n", j, num_cores - j); else
>>>> +printf("\nPerf test: %d writer(s), %d reader(s),"
>>>> + " RCU integration disabled\n", j, num_cores - j);
>>>> +
>>>> +/* Create LPM table */
>>>> +config.max_rules = NUM_LDEPTH_ROUTE_ENTRIES;
>> config.number_tbl8s =
>>>> +NUM_LDEPTH_ROUTE_ENTRIES; config.flags = 0; lpm =
>>>> +rte_lpm_create(__func__, SOCKET_ID_ANY, &config);
>>>> +TEST_LPM_ASSERT(lpm != NULL);
>>>> +
>>>> +/* Init RCU variable */
>>>> +if (use_rcu) {
>>>> +sz = rte_rcu_qsbr_get_memsize(num_cores);
>>>> +rv = (struct rte_rcu_qsbr *)rte_zmalloc("rcu0", sz,
>>>> +
>>>> RTE_CACHE_LINE_SIZE);
>>>> +rte_rcu_qsbr_init(rv, num_cores);
>>>> +
>>>> +rcu_cfg.v = rv;
>>>> +/* Assign the RCU variable to LPM */ if (rte_lpm_rcu_qsbr_add(lpm,
>>>> +&rcu_cfg) != 0) { printf("RCU variable assignment failed\n");
>>>> goto error;
>>>> }
>>>>
>>>> -/* Delete all the entries */
>>>> -for (j = 0; j < NUM_LDEPTH_ROUTE_ENTRIES; j++) -if
>>>> (rte_lpm_delete(lpm, large_ldepth_route_table[j].ip,
>>>> -large_ldepth_route_table[j].depth) != 0) { -printf("Failed to delete
>>>> iteration %d, route# %d\n", -i, j); -goto error; -} -} -total_cycles
>>>> = rte_rdtsc_precise() - begin;
>>>> +reader_f = test_lpm_rcu_qsbr_reader; } else reader_f =
>>>> +test_lpm_reader;
>>>>
>>>> -printf("Total LPM Adds: %d\n", TOTAL_WRITES); -printf("Total LPM
>>>> Deletes: %d\n", TOTAL_WRITES); -printf("Average LPM Add/Del: %g
>>>> cycles\n", -(double)total_cycles / TOTAL_WRITES);
>>>> +writer_done = 0;
>>>> +__atomic_store_n(&gwrite_cycles, 0, __ATOMIC_RELAXED);
>>>>
>>>> -writer_done = 1;
>>>> -/* Wait until all readers have exited */ -for (i = 0; i < num_cores;
>>>> i++) -if (rte_eal_wait_lcore(enabled_core_ids[i]);
>>>> -
>>>> -rte_lpm_free(lpm);
>>>> -rte_free(rv);
>>>> -lpm = NULL;
>>>> -rv = NULL;
>>>> -
>>>> -/* Test without RCU integration */
>>>> -printf("\nPerf test: 1 writer, %d readers, RCU integration
>>>> disabled\n", -num_cores);
>>>> -
>>>> -/* Create LPM table */
>>>> -config.max_rules = NUM_LDEPTH_ROUTE_ENTRIES; -
>> config.number_tbl8s =
>>>> NUM_LDEPTH_ROUTE_ENTRIES; -config.flags = 0; -lpm =
>>>> rte_lpm_create(__func__, SOCKET_ID_ANY, &config);
>>>> -TEST_LPM_ASSERT(lpm != NULL);
>>>> +__atomic_store_n(&thr_id, 0, __ATOMIC_SEQ_CST);
>>>>
>>>> -writer_done = 0;
>>>> -__atomic_store_n(&thr_id, 0, __ATOMIC_SEQ_CST);
>>>> +/* Launch reader threads */
>>>> +for (i = j; i < num_cores; i++)
>>>> +rte_eal_remote_launch(reader_f, NULL,
>>>> +enabled_core_ids[i]);
>>>>
>>>> -/* Launch reader threads */
>>>> -for (i = 0; i < num_cores; i++)
>>>> -rte_eal_remote_launch(test_lpm_reader, NULL,
>>>> -enabled_core_ids[i]);
>>>> +/* Launch writer threads */
>>>> +for (i = 0; i < j; i++)
>>>> +rte_eal_remote_launch(test_lpm_rcu_qsbr_writer,
>>>> +(void *)(uintptr_t)(i + j),
>>> This can be just 'j'?
>>>
>>>> +enabled_core_ids[i]);
>>>>
>>>> -/* Measure add/delete. */
>>>> -begin = rte_rdtsc_precise();
>>>> -for (i = 0; i < RCU_ITERATIONS; i++) {
>>>> -/* Add all the entries */
>>>> -for (j = 0; j < NUM_LDEPTH_ROUTE_ENTRIES; j++)
>>>> -if (rte_lpm_add(lpm, large_ldepth_route_table[j].ip,
>>>> -large_ldepth_route_table[j].depth,
>>>> -next_hop_add) != 0) {
>>>> -printf("Failed to add iteration %d, route#
>>>> %d\n",
>>>> -i, j);
>>>> +/* Wait for writer threads */
>>>> +for (i = 0; i < j; i++)
>>>> +if (rte_eal_wait_lcore(enabled_core_ids[i]) < 0)
>>>> goto error;
>>>> -}
>>>>
>>>> -/* Delete all the entries */
>>>> -for (j = 0; j < NUM_LDEPTH_ROUTE_ENTRIES; j++)
>>>> -if (rte_lpm_delete(lpm,
>>>> large_ldepth_route_table[j].ip,
>>>> -large_ldepth_route_table[j].depth) != 0) {
>>>> -printf("Failed to delete iteration %d, route#
>>>> %d\n",
>>>> -i, j);
>>>> -goto error;
>>>> -}
>>>> +printf("Total LPM Adds: %d\n", TOTAL_WRITES);
>>>> +printf("Total LPM Deletes: %d\n", TOTAL_WRITES);
>>>> +printf("Average LPM Add/Del: %"PRIu64" cycles\n",
>>>> +__atomic_load_n(&gwrite_cycles,
>>>> __ATOMIC_RELAXED)
>>>> +/ TOTAL_WRITES);
>>>> +
>>>> +writer_done = 1;
>>>> +/* Wait until all readers have exited */
>>>> +for (i = j; i < num_cores; i++)
>>>> +rte_eal_wait_lcore(enabled_core_ids[i]);
>>>> +
>>>> +rte_lpm_free(lpm);
>>>> +rte_free(rv);
>>>> +lpm = NULL;
>>>> +rv = NULL;
>>>> }
>>>> -total_cycles = rte_rdtsc_precise() - begin;
>>>> -
>>>> -printf("Total LPM Adds: %d\n", TOTAL_WRITES);
>>>> -printf("Total LPM Deletes: %d\n", TOTAL_WRITES);
>>>> -printf("Average LPM Add/Del: %g cycles\n",
>>>> -(double)total_cycles / TOTAL_WRITES);
>>>> -
>>>> -writer_done = 1;
>>>> -/* Wait until all readers have exited */
>>>> -for (i = 0; i < num_cores; i++)
>>>> -rte_eal_wait_lcore(enabled_core_ids[i]);
>>>> -
>>>> -rte_lpm_free(lpm);
>>>>
>>>> return 0;
>>>>
>>>> @@ -946,9 +767,8 @@ test_lpm_perf(void)
>>>> rte_lpm_delete_all(lpm);
>>>> rte_lpm_free(lpm);
>>>>
>>>> -test_lpm_rcu_perf();
>>>> -
>>>> -test_lpm_rcu_perf_multi_writer();
>>>> +test_lpm_rcu_perf_multi_writer(0);
>>>> +test_lpm_rcu_perf_multi_writer(1);
>>>>
>>>> return 0;
>>>> }
>>>> --
>>>> 2.17.1
<snip>
> >>>>
> >>>> Avoid code duplication by combining single and multi threaded tests
> >>>>
> >>>> Signed-off-by: Dharmik Thakkar <dharmik.thakkar@arm.com>
> >>>> Reviewed-by: Ruifeng Wang <ruifeng.wang@arm.com>
> >>>> ---
> >>>> app/test/test_lpm_perf.c | 362
> >>>> ++++++++++-----------------------------
> >>>> 1 file changed, 91 insertions(+), 271 deletions(-)
> >>>>
> >>>> diff --git a/app/test/test_lpm_perf.c b/app/test/test_lpm_perf.c
> >>>> index
> >>>> 224c92fa3d65..229c835c23f7 100644
> >>>> --- a/app/test/test_lpm_perf.c
> >>>> +++ b/app/test/test_lpm_perf.c
> >>>> @@ -67,6 +67,12 @@ enum {
> >>>> IP_CLASS_C
> >>>> };
> >>>>
> >>>> +enum {
> >>>> +SINGLE_WRITER = 1,
> >>>> +MULTI_WRITER_1,
> >>>> +MULTI_WRITER_2
> >>>> +};
> >>> Do we need this? Can we use the number of cores instead?
> >>>
> >>
> >> There are 3 combinations of writes (adds/deletes):
> >> 1. Write all the entries - in case of a single writer 2. Write half
> >> of the entries - in case of multiple writers 3. Write remaining half
> >> of the entries - in case of multiple writers
> >>
> >> So, I think this is required.
> > IMO, this is not scalable. Essentially, we need 2 parameters to divide the
> routes among each writer thread. We need 2 parameters, 1) total number of
> writers 2) the core ID in the linear space.
> > Creating a structure with these 2 and passing that to the writer thread
> would be better and scalable.
>
> Yes, agreed this is only applicable for 2 writers. Currently, the multi writer
> test is only limited to a maximum of 2 writers.
> To support more number of writers, we need something like this (which I
> believe is in lines with your suggestion):
> 1. Calculate what each writer will write: single_insert = TOTAL_WRITES /
> num_writers 2. Pass core ID in linear space as an argument to the writer
> function: pos_core 3. Calculate si and ei in the writer function: si = pos_core *
> single_insert; ei = si + single_insert
>
> I can update the patch to enable more than 2 writers.
> Do you also suggest we expand the scope of the test to test with more than
> 2 writers?
> This will increase the time for which the test is running (which currently is
> significant even with 2 writers).
Agree, no to increasing the number of writers. Yes for making the code more generic.
>
> >
> >>
> >>>> +
> >>>> /* struct route_rule_count defines the total number of rules in
> >>>> following a/b/c
> >>>> * each item in a[]/b[]/c[] is the number of common IP address class
> >>>> A/B/C, not
> >>>> * including the ones for private local network.
> >>>> @@ -430,11 +436,16 @@ test_lpm_rcu_qsbr_writer(void *arg) {
> >> unsigned
> >>>> int i, j, si, ei; uint64_t begin, total_cycles; -uint8_t core_id =
> >>>> (uint8_t)((uintptr_t)arg);
> >>>> +uint8_t writer_id = (uint8_t)((uintptr_t)arg);
> >>>> uint32_t next_hop_add = 0xAA;
> >>>>
> >>>> -/* 2 writer threads are used */
> >>>> -if (core_id % 2 == 0) {
> >>>> +/* Single writer (writer_id = 1) */ if (writer_id ==
> >>>> +SINGLE_WRITER) { si = 0; ei = NUM_LDEPTH_ROUTE_ENTRIES; }
> >>>> +/* 2 Writers (writer_id = 2/3)*/
> >>>> +else if (writer_id == MULTI_WRITER_1) {
> >>>> si = 0;
> >>>> ei = NUM_LDEPTH_ROUTE_ENTRIES / 2;
> >>>> } else {
> >>>> @@ -447,29 +458,35 @@ test_lpm_rcu_qsbr_writer(void *arg) for (i =
> >>>> 0; i < RCU_ITERATIONS; i++) {
> >>>> /* Add all the entries */
> >>>> for (j = si; j < ei; j++) {
> >>>> -pthread_mutex_lock(&lpm_mutex);
> >>>> +if (writer_id != SINGLE_WRITER)
> >>>> +pthread_mutex_lock(&lpm_mutex);
> >>>> if (rte_lpm_add(lpm, large_ldepth_route_table[j].ip,
> >>>> large_ldepth_route_table[j].depth,
> >>>> next_hop_add) != 0) {
> >>>> printf("Failed to add iteration %d, route# %d\n", i, j);
> >>>> -pthread_mutex_unlock(&lpm_mutex);
> >>>> +if (writer_id != SINGLE_WRITER)
> >>>> +
> >>>> pthread_mutex_unlock(&lpm_mutex);
> >>>> return -1;
> >>>> }
> >>>> -pthread_mutex_unlock(&lpm_mutex);
> >>>> +if (writer_id != SINGLE_WRITER)
> >>>> +pthread_mutex_unlock(&lpm_mutex);
> >>>> }
> >>>>
> >>>> /* Delete all the entries */
> >>>> for (j = si; j < ei; j++) {
> >>>> -pthread_mutex_lock(&lpm_mutex);
> >>>> +if (writer_id != SINGLE_WRITER)
> >>>> +pthread_mutex_lock(&lpm_mutex);
> >>>> if (rte_lpm_delete(lpm,
> >>>> large_ldepth_route_table[j].ip,
> >>>> large_ldepth_route_table[j].depth) != 0) { printf("Failed to delete
> >>>> iteration %d, route# %d\n", i, j);
> >>>> -pthread_mutex_unlock(&lpm_mutex);
> >>>> +if (writer_id != SINGLE_WRITER)
> >>>> +
> >>>> pthread_mutex_unlock(&lpm_mutex);
> >>>> return -1;
> >>>> }
> >>>> -pthread_mutex_unlock(&lpm_mutex);
> >>>> +if (writer_id != SINGLE_WRITER)
> >>>> +pthread_mutex_unlock(&lpm_mutex);
> >>>> }
> >>>> }
> >>>>
> >>>> @@ -482,16 +499,17 @@ test_lpm_rcu_qsbr_writer(void *arg)
> >>>>
> >>>> /*
> >>>> * Functional test:
> >>>> - * 2 writers, rest are readers
> >>>> + * 1/2 writers, rest are readers
> >>>> */
> >>>> static int
> >>>> -test_lpm_rcu_perf_multi_writer(void)
> >>>> +test_lpm_rcu_perf_multi_writer(uint8_t use_rcu)
> >>>> {
> >>>> struct rte_lpm_config config;
> >>>> size_t sz;
> >>>> -unsigned int i;
> >>>> +unsigned int i, j;
> >>>> uint16_t core_id;
> >>>> struct rte_lpm_rcu_config rcu_cfg = {0};
> >>>> +int (*reader_f)(void *arg) = NULL;
> >>>>
> >>>> if (rte_lcore_count() < 3) {
> >>>> printf("Not enough cores for lpm_rcu_perf_autotest, expecting at
> >>>> least 3\n"); @@ -504,273 +522,76 @@
> >>>> test_lpm_rcu_perf_multi_writer(void)
> >>>> num_cores++;
> >>>> }
> >>>>
> >>>> -printf("\nPerf test: 2 writers, %d readers, RCU integration
> >>>> enabled\n", -num_cores - 2);
> >>>> -
> >>>> -/* Create LPM table */
> >>>> -config.max_rules = NUM_LDEPTH_ROUTE_ENTRIES; -
> >> config.number_tbl8s =
> >>>> NUM_LDEPTH_ROUTE_ENTRIES; -config.flags = 0; -lpm =
> >>>> rte_lpm_create(__func__, SOCKET_ID_ANY, &config);
> >>>> -TEST_LPM_ASSERT(lpm != NULL);
> >>>> -
> >>>> -/* Init RCU variable */
> >>>> -sz = rte_rcu_qsbr_get_memsize(num_cores);
> >>>> -rv = (struct rte_rcu_qsbr *)rte_zmalloc("rcu0", sz,
> >>>> -RTE_CACHE_LINE_SIZE); -rte_rcu_qsbr_init(rv, num_cores);
> >>>> -
> >>>> -rcu_cfg.v = rv;
> >>>> -/* Assign the RCU variable to LPM */ -if
> >>>> (rte_lpm_rcu_qsbr_add(lpm,
> >>>> &rcu_cfg) != 0) { -printf("RCU variable assignment failed\n");
> >>>> -goto error; -}
> >>>> -
> >>>> -writer_done = 0;
> >>>> -__atomic_store_n(&gwrite_cycles, 0, __ATOMIC_RELAXED);
> >>>> -
> >>>> -__atomic_store_n(&thr_id, 0, __ATOMIC_SEQ_CST);
> >>>> -
> >>>> -/* Launch reader threads */
> >>>> -for (i = 2; i < num_cores; i++)
> >>>> -rte_eal_remote_launch(test_lpm_rcu_qsbr_reader, NULL,
> >>>> -enabled_core_ids[i]);
> >>>> -
> >>>> -/* Launch writer threads */
> >>>> -for (i = 0; i < 2; i++)
> >>>> -rte_eal_remote_launch(test_lpm_rcu_qsbr_writer,
> >>>> -(void *)(uintptr_t)i,
> >>>> -enabled_core_ids[i]);
> >>>> -
> >>>> -/* Wait for writer threads */
> >>>> -for (i = 0; i < 2; i++)
> >>>> -if (rte_eal_wait_lcore(enabled_core_ids[i]) < 0) -goto error;
> >>>> -
> >>>> -printf("Total LPM Adds: %d\n", TOTAL_WRITES); -printf("Total LPM
> >>>> Deletes: %d\n", TOTAL_WRITES); -printf("Average LPM Add/Del:
> >>>> %"PRIu64" cycles\n", -__atomic_load_n(&gwrite_cycles,
> >>>> __ATOMIC_RELAXED) -/ TOTAL_WRITES);
> >>>> -
> >>>> -writer_done = 1;
> >>>> -/* Wait until all readers have exited */ -for (i = 2; i <
> >>>> num_cores;
> >>>> i++) -rte_eal_wait_lcore(enabled_core_ids[i]);
> >>>> -
> >>>> -rte_lpm_free(lpm);
> >>>> -rte_free(rv);
> >>>> -lpm = NULL;
> >>>> -rv = NULL;
> >>>> -
> >>>> -/* Test without RCU integration */ -printf("\nPerf test: 2
> >>>> writers, %d readers, RCU integration disabled\n", -num_cores - 2);
> >>>> -
> >>>> -/* Create LPM table */
> >>>> -config.max_rules = NUM_LDEPTH_ROUTE_ENTRIES; -
> >> config.number_tbl8s =
> >>>> NUM_LDEPTH_ROUTE_ENTRIES; -config.flags = 0; -lpm =
> >>>> rte_lpm_create(__func__, SOCKET_ID_ANY, &config);
> >>>> -TEST_LPM_ASSERT(lpm != NULL);
> >>>> -
> >>>> -writer_done = 0;
> >>>> -__atomic_store_n(&gwrite_cycles, 0, __ATOMIC_RELAXED);
> >>>> -__atomic_store_n(&thr_id, 0, __ATOMIC_SEQ_CST);
> >>>> -
> >>>> -/* Launch reader threads */
> >>>> -for (i = 2; i < num_cores; i++)
> >>>> -rte_eal_remote_launch(test_lpm_reader, NULL,
> >>>> -enabled_core_ids[i]);
> >>>> -
> >>>> -/* Launch writer threads */
> >>>> -for (i = 0; i < 2; i++)
> >>>> -rte_eal_remote_launch(test_lpm_rcu_qsbr_writer,
> >>>> -(void *)(uintptr_t)i,
> >>>> -enabled_core_ids[i]);
> >>>> -
> >>>> -/* Wait for writer threads */
> >>>> -for (i = 0; i < 2; i++)
> >>>> -if (rte_eal_wait_lcore(enabled_core_ids[i]) < 0) -goto error;
> >>>> -
> >>>> -printf("Total LPM Adds: %d\n", TOTAL_WRITES); -printf("Total LPM
> >>>> Deletes: %d\n", TOTAL_WRITES); -printf("Average LPM Add/Del:
> >>>> %"PRIu64" cycles\n", -__atomic_load_n(&gwrite_cycles,
> >>>> __ATOMIC_RELAXED) -/ TOTAL_WRITES);
> >>>> -
> >>>> -writer_done = 1;
> >>>> -/* Wait until all readers have exited */ -for (i = 2; i <
> >>>> num_cores;
> >>>> i++) -rte_eal_wait_lcore(enabled_core_ids[i]);
> >>>> -
> >>>> -rte_lpm_free(lpm);
> >>>> -
> >>>> -return 0;
> >>>> -
> >>>> -error:
> >>>> -writer_done = 1;
> >>>> -/* Wait until all readers have exited */ -rte_eal_mp_wait_lcore();
> >>>> -
> >>>> -rte_lpm_free(lpm);
> >>>> -rte_free(rv);
> >>>> -
> >>>> -return -1;
> >>>> -}
> >>>> -
> >>>> -/*
> >>>> - * Functional test:
> >>>> - * Single writer, rest are readers
> >>>> - */
> >>>> -static int
> >>>> -test_lpm_rcu_perf(void)
> >>>> -{
> >>>> -struct rte_lpm_config config;
> >>>> -uint64_t begin, total_cycles;
> >>>> -size_t sz;
> >>>> -unsigned int i, j;
> >>>> -uint16_t core_id;
> >>>> -uint32_t next_hop_add = 0xAA;
> >>>> -struct rte_lpm_rcu_config rcu_cfg = {0};
> >>>> -
> >>>> -if (rte_lcore_count() < 2) {
> >>>> -printf("Not enough cores for lpm_rcu_perf_autotest, expecting at
> >>>> least 2\n"); -return TEST_SKIPPED; -}
> >>>> -
> >>>> -num_cores = 0;
> >>>> -RTE_LCORE_FOREACH_WORKER(core_id) { -
> enabled_core_ids[num_cores] =
> >>>> core_id; -num_cores++; -}
> >>>> -
> >>>> -printf("\nPerf test: 1 writer, %d readers, RCU integration
> >>>> enabled\n", -num_cores);
> >>>> -
> >>>> -/* Create LPM table */
> >>>> -config.max_rules = NUM_LDEPTH_ROUTE_ENTRIES; -
> >> config.number_tbl8s =
> >>>> NUM_LDEPTH_ROUTE_ENTRIES; -config.flags = 0; -lpm =
> >>>> rte_lpm_create(__func__, SOCKET_ID_ANY, &config);
> >>>> -TEST_LPM_ASSERT(lpm != NULL);
> >>>> -
> >>>> -/* Init RCU variable */
> >>>> -sz = rte_rcu_qsbr_get_memsize(num_cores);
> >>>> -rv = (struct rte_rcu_qsbr *)rte_zmalloc("rcu0", sz,
> >>>> -RTE_CACHE_LINE_SIZE); -rte_rcu_qsbr_init(rv, num_cores);
> >>>> -
> >>>> -rcu_cfg.v = rv;
> >>>> -/* Assign the RCU variable to LPM */ -if
> >>>> (rte_lpm_rcu_qsbr_add(lpm,
> >>>> &rcu_cfg) != 0) { -printf("RCU variable assignment failed\n");
> >>>> -goto error; -}
> >>>> -
> >>>> -writer_done = 0;
> >>>> -__atomic_store_n(&thr_id, 0, __ATOMIC_SEQ_CST);
> >>>> -
> >>>> -/* Launch reader threads */
> >>>> -for (i = 0; i < num_cores; i++)
> >>>> -rte_eal_remote_launch(test_lpm_rcu_qsbr_reader, NULL,
> >>>> -enabled_core_ids[i]);
> >>>> -
> >>>> -/* Measure add/delete. */
> >>>> -begin = rte_rdtsc_precise();
> >>>> -for (i = 0; i < RCU_ITERATIONS; i++) {
> >>>> -/* Add all the entries */
> >>>> -for (j = 0; j < NUM_LDEPTH_ROUTE_ENTRIES; j++) -if
> >>>> (rte_lpm_add(lpm, large_ldepth_route_table[j].ip,
> >>>> -large_ldepth_route_table[j].depth,
> >>>> -next_hop_add) != 0) {
> >>>> -printf("Failed to add iteration %d, route# %d\n", -i, j);
> >>>> +for (j = 1; j < 3; j++) {
> >>>> +if (use_rcu)
> >>>> +printf("\nPerf test: %d writer(s), %d reader(s),"
> >>>> + " RCU integration enabled\n", j, num_cores - j); else
> >>>> +printf("\nPerf test: %d writer(s), %d reader(s),"
> >>>> + " RCU integration disabled\n", j, num_cores - j);
> >>>> +
> >>>> +/* Create LPM table */
> >>>> +config.max_rules = NUM_LDEPTH_ROUTE_ENTRIES;
> >> config.number_tbl8s =
> >>>> +NUM_LDEPTH_ROUTE_ENTRIES; config.flags = 0; lpm =
> >>>> +rte_lpm_create(__func__, SOCKET_ID_ANY, &config);
> >>>> +TEST_LPM_ASSERT(lpm != NULL);
> >>>> +
> >>>> +/* Init RCU variable */
> >>>> +if (use_rcu) {
> >>>> +sz = rte_rcu_qsbr_get_memsize(num_cores);
> >>>> +rv = (struct rte_rcu_qsbr *)rte_zmalloc("rcu0", sz,
> >>>> +
> >>>> RTE_CACHE_LINE_SIZE);
> >>>> +rte_rcu_qsbr_init(rv, num_cores);
> >>>> +
> >>>> +rcu_cfg.v = rv;
> >>>> +/* Assign the RCU variable to LPM */ if (rte_lpm_rcu_qsbr_add(lpm,
> >>>> +&rcu_cfg) != 0) { printf("RCU variable assignment failed\n");
> >>>> goto error;
> >>>> }
> >>>>
> >>>> -/* Delete all the entries */
> >>>> -for (j = 0; j < NUM_LDEPTH_ROUTE_ENTRIES; j++) -if
> >>>> (rte_lpm_delete(lpm, large_ldepth_route_table[j].ip,
> >>>> -large_ldepth_route_table[j].depth) != 0) { -printf("Failed to
> >>>> delete iteration %d, route# %d\n", -i, j); -goto error; -} -}
> >>>> -total_cycles = rte_rdtsc_precise() - begin;
> >>>> +reader_f = test_lpm_rcu_qsbr_reader; } else reader_f =
> >>>> +test_lpm_reader;
> >>>>
> >>>> -printf("Total LPM Adds: %d\n", TOTAL_WRITES); -printf("Total LPM
> >>>> Deletes: %d\n", TOTAL_WRITES); -printf("Average LPM Add/Del: %g
> >>>> cycles\n", -(double)total_cycles / TOTAL_WRITES);
> >>>> +writer_done = 0;
> >>>> +__atomic_store_n(&gwrite_cycles, 0, __ATOMIC_RELAXED);
> >>>>
> >>>> -writer_done = 1;
> >>>> -/* Wait until all readers have exited */ -for (i = 0; i <
> >>>> num_cores;
> >>>> i++) -if (rte_eal_wait_lcore(enabled_core_ids[i]);
> >>>> -
> >>>> -rte_lpm_free(lpm);
> >>>> -rte_free(rv);
> >>>> -lpm = NULL;
> >>>> -rv = NULL;
> >>>> -
> >>>> -/* Test without RCU integration */ -printf("\nPerf test: 1 writer,
> >>>> %d readers, RCU integration disabled\n", -num_cores);
> >>>> -
> >>>> -/* Create LPM table */
> >>>> -config.max_rules = NUM_LDEPTH_ROUTE_ENTRIES; -
> >> config.number_tbl8s =
> >>>> NUM_LDEPTH_ROUTE_ENTRIES; -config.flags = 0; -lpm =
> >>>> rte_lpm_create(__func__, SOCKET_ID_ANY, &config);
> >>>> -TEST_LPM_ASSERT(lpm != NULL);
> >>>> +__atomic_store_n(&thr_id, 0, __ATOMIC_SEQ_CST);
> >>>>
> >>>> -writer_done = 0;
> >>>> -__atomic_store_n(&thr_id, 0, __ATOMIC_SEQ_CST);
> >>>> +/* Launch reader threads */
> >>>> +for (i = j; i < num_cores; i++)
> >>>> +rte_eal_remote_launch(reader_f, NULL, enabled_core_ids[i]);
> >>>>
> >>>> -/* Launch reader threads */
> >>>> -for (i = 0; i < num_cores; i++)
> >>>> -rte_eal_remote_launch(test_lpm_reader, NULL,
> >>>> -enabled_core_ids[i]);
> >>>> +/* Launch writer threads */
> >>>> +for (i = 0; i < j; i++)
> >>>> +rte_eal_remote_launch(test_lpm_rcu_qsbr_writer,
> >>>> +(void *)(uintptr_t)(i + j),
> >>> This can be just 'j'?
> >>>
> >>>> +enabled_core_ids[i]);
> >>>>
> >>>> -/* Measure add/delete. */
> >>>> -begin = rte_rdtsc_precise();
> >>>> -for (i = 0; i < RCU_ITERATIONS; i++) {
> >>>> -/* Add all the entries */
> >>>> -for (j = 0; j < NUM_LDEPTH_ROUTE_ENTRIES; j++) -if
> >>>> (rte_lpm_add(lpm, large_ldepth_route_table[j].ip,
> >>>> -large_ldepth_route_table[j].depth,
> >>>> -next_hop_add) != 0) {
> >>>> -printf("Failed to add iteration %d, route# %d\n", -i, j);
> >>>> +/* Wait for writer threads */
> >>>> +for (i = 0; i < j; i++)
> >>>> +if (rte_eal_wait_lcore(enabled_core_ids[i]) < 0)
> >>>> goto error;
> >>>> -}
> >>>>
> >>>> -/* Delete all the entries */
> >>>> -for (j = 0; j < NUM_LDEPTH_ROUTE_ENTRIES; j++) -if
> >>>> (rte_lpm_delete(lpm, large_ldepth_route_table[j].ip,
> >>>> -large_ldepth_route_table[j].depth) != 0) { -printf("Failed to
> >>>> delete iteration %d, route# %d\n", -i, j); -goto error; -}
> >>>> +printf("Total LPM Adds: %d\n", TOTAL_WRITES); printf("Total LPM
> >>>> +Deletes: %d\n", TOTAL_WRITES); printf("Average LPM Add/Del:
> >>>> +%"PRIu64" cycles\n", __atomic_load_n(&gwrite_cycles,
> >>>> __ATOMIC_RELAXED)
> >>>> +/ TOTAL_WRITES);
> >>>> +
> >>>> +writer_done = 1;
> >>>> +/* Wait until all readers have exited */ for (i = j; i <
> >>>> +num_cores; i++) rte_eal_wait_lcore(enabled_core_ids[i]);
> >>>> +
> >>>> +rte_lpm_free(lpm);
> >>>> +rte_free(rv);
> >>>> +lpm = NULL;
> >>>> +rv = NULL;
> >>>> }
> >>>> -total_cycles = rte_rdtsc_precise() - begin;
> >>>> -
> >>>> -printf("Total LPM Adds: %d\n", TOTAL_WRITES); -printf("Total LPM
> >>>> Deletes: %d\n", TOTAL_WRITES); -printf("Average LPM Add/Del: %g
> >>>> cycles\n", -(double)total_cycles / TOTAL_WRITES);
> >>>> -
> >>>> -writer_done = 1;
> >>>> -/* Wait until all readers have exited */ -for (i = 0; i <
> >>>> num_cores; i++) -rte_eal_wait_lcore(enabled_core_ids[i]);
> >>>> -
> >>>> -rte_lpm_free(lpm);
> >>>>
> >>>> return 0;
> >>>>
> >>>> @@ -946,9 +767,8 @@ test_lpm_perf(void) rte_lpm_delete_all(lpm);
> >>>> rte_lpm_free(lpm);
> >>>>
> >>>> -test_lpm_rcu_perf();
> >>>> -
> >>>> -test_lpm_rcu_perf_multi_writer();
> >>>> +test_lpm_rcu_perf_multi_writer(0);
> >>>> +test_lpm_rcu_perf_multi_writer(1);
> >>>>
> >>>> return 0;
> >>>> }
> >>>> --
> >>>> 2.17.1
>
Hi,
On 03/11/2020 14:03, Dharmik Thakkar wrote:
>
>
>> On Nov 2, 2020, at 11:32 PM, Honnappa Nagarahalli <Honnappa.Nagarahalli@arm.com> wrote:
>>
>> <snip>
>>
>>>>>
>>>>> Avoid code duplication by combining single and multi threaded tests
>>>>>
>>>>> Signed-off-by: Dharmik Thakkar <dharmik.thakkar@arm.com>
>>>>> Reviewed-by: Ruifeng Wang <ruifeng.wang@arm.com>
>>>>> ---
>>>>> app/test/test_lpm_perf.c | 362
>>>>> ++++++++++-----------------------------
>>>>> 1 file changed, 91 insertions(+), 271 deletions(-)
>>>>>
>>>>> diff --git a/app/test/test_lpm_perf.c b/app/test/test_lpm_perf.c
>>>>> index
>>>>> 224c92fa3d65..229c835c23f7 100644
>>>>> --- a/app/test/test_lpm_perf.c
>>>>> +++ b/app/test/test_lpm_perf.c
>>>>> @@ -67,6 +67,12 @@ enum {
>>>>> IP_CLASS_C
>>>>> };
>>>>>
>>>>> +enum {
>>>>> +SINGLE_WRITER = 1,
>>>>> +MULTI_WRITER_1,
>>>>> +MULTI_WRITER_2
>>>>> +};
>>>> Do we need this? Can we use the number of cores instead?
>>>>
>>>
>>> There are 3 combinations of writes (adds/deletes):
>>> 1. Write all the entries - in case of a single writer 2. Write half of the entries -
>>> in case of multiple writers 3. Write remaining half of the entries - in case of
>>> multiple writers
>>>
>>> So, I think this is required.
>> IMO, this is not scalable. Essentially, we need 2 parameters to divide the routes among each writer thread. We need 2 parameters, 1) total number of writers 2) the core ID in the linear space.
>> Creating a structure with these 2 and passing that to the writer thread would be better and scalable.
>
> Yes, agreed this is only applicable for 2 writers. Currently, the multi writer test is only limited to a maximum of 2 writers.
> To support more number of writers, we need something like this (which I believe is in lines with your suggestion):
> 1. Calculate what each writer will write: single_insert = TOTAL_WRITES / num_writers
> 2. Pass core ID in linear space as an argument to the writer function: pos_core
> 3. Calculate si and ei in the writer function: si = pos_core * single_insert; ei = si + single_insert
>
Agree to Honnappa suggestion, for me it looks good, better than previous
implementation.
> I can update the patch to enable more than 2 writers.
> Do you also suggest we expand the scope of the test to test with more than 2 writers?
> This will increase the time for which the test is running (which currently is significant even with 2 writers).
>
I don't see any reason to increase the number of writers more than 2.
>>
>>>
>>>>> +
>>>>> /* struct route_rule_count defines the total number of rules in
>>>>> following a/b/c
>>>>> * each item in a[]/b[]/c[] is the number of common IP address class
>>>>> A/B/C, not
>>>>> * including the ones for private local network.
>>>>> @@ -430,11 +436,16 @@ test_lpm_rcu_qsbr_writer(void *arg) {
>>> unsigned
>>>>> int i, j, si, ei; uint64_t begin, total_cycles; -uint8_t core_id =
>>>>> (uint8_t)((uintptr_t)arg);
>>>>> +uint8_t writer_id = (uint8_t)((uintptr_t)arg);
>>>>> uint32_t next_hop_add = 0xAA;
>>>>>
>>>>> -/* 2 writer threads are used */
>>>>> -if (core_id % 2 == 0) {
>>>>> +/* Single writer (writer_id = 1) */
>>>>> +if (writer_id == SINGLE_WRITER) {
>>>>> +si = 0;
>>>>> +ei = NUM_LDEPTH_ROUTE_ENTRIES;
>>>>> +}
>>>>> +/* 2 Writers (writer_id = 2/3)*/
>>>>> +else if (writer_id == MULTI_WRITER_1) {
>>>>> si = 0;
>>>>> ei = NUM_LDEPTH_ROUTE_ENTRIES / 2;
>>>>> } else {
>>>>> @@ -447,29 +458,35 @@ test_lpm_rcu_qsbr_writer(void *arg) for (i = 0;
>>>>> i < RCU_ITERATIONS; i++) {
>>>>> /* Add all the entries */
>>>>> for (j = si; j < ei; j++) {
>>>>> -pthread_mutex_lock(&lpm_mutex);
>>>>> +if (writer_id != SINGLE_WRITER)
>>>>> +pthread_mutex_lock(&lpm_mutex);
>>>>> if (rte_lpm_add(lpm, large_ldepth_route_table[j].ip,
>>>>> large_ldepth_route_table[j].depth,
>>>>> next_hop_add) != 0) {
>>>>> printf("Failed to add iteration %d, route# %d\n", i, j);
>>>>> -pthread_mutex_unlock(&lpm_mutex);
>>>>> +if (writer_id != SINGLE_WRITER)
>>>>> +
>>>>> pthread_mutex_unlock(&lpm_mutex);
>>>>> return -1;
>>>>> }
>>>>> -pthread_mutex_unlock(&lpm_mutex);
>>>>> +if (writer_id != SINGLE_WRITER)
>>>>> +pthread_mutex_unlock(&lpm_mutex);
>>>>> }
>>>>>
>>>>> /* Delete all the entries */
>>>>> for (j = si; j < ei; j++) {
>>>>> -pthread_mutex_lock(&lpm_mutex);
>>>>> +if (writer_id != SINGLE_WRITER)
>>>>> +pthread_mutex_lock(&lpm_mutex);
>>>>> if (rte_lpm_delete(lpm,
>>>>> large_ldepth_route_table[j].ip,
>>>>> large_ldepth_route_table[j].depth) != 0) { printf("Failed to delete
>>>>> iteration %d, route# %d\n", i, j); -pthread_mutex_unlock(&lpm_mutex);
>>>>> +if (writer_id != SINGLE_WRITER)
>>>>> +
>>>>> pthread_mutex_unlock(&lpm_mutex);
>>>>> return -1;
>>>>> }
>>>>> -pthread_mutex_unlock(&lpm_mutex);
>>>>> +if (writer_id != SINGLE_WRITER)
>>>>> +pthread_mutex_unlock(&lpm_mutex);
>>>>> }
>>>>> }
>>>>>
>>>>> @@ -482,16 +499,17 @@ test_lpm_rcu_qsbr_writer(void *arg)
>>>>>
>>>>> /*
>>>>> * Functional test:
>>>>> - * 2 writers, rest are readers
>>>>> + * 1/2 writers, rest are readers
>>>>> */
>>>>> static int
>>>>> -test_lpm_rcu_perf_multi_writer(void)
>>>>> +test_lpm_rcu_perf_multi_writer(uint8_t use_rcu)
>>>>> {
>>>>> struct rte_lpm_config config;
>>>>> size_t sz;
>>>>> -unsigned int i;
>>>>> +unsigned int i, j;
>>>>> uint16_t core_id;
>>>>> struct rte_lpm_rcu_config rcu_cfg = {0};
>>>>> +int (*reader_f)(void *arg) = NULL;
>>>>>
>>>>> if (rte_lcore_count() < 3) {
>>>>> printf("Not enough cores for lpm_rcu_perf_autotest, expecting at
>>>>> least 3\n"); @@ -504,273 +522,76 @@
>>>>> test_lpm_rcu_perf_multi_writer(void)
>>>>> num_cores++;
>>>>> }
>>>>>
>>>>> -printf("\nPerf test: 2 writers, %d readers, RCU integration
>>>>> enabled\n", -num_cores - 2);
>>>>> -
>>>>> -/* Create LPM table */
>>>>> -config.max_rules = NUM_LDEPTH_ROUTE_ENTRIES; -
>>> config.number_tbl8s =
>>>>> NUM_LDEPTH_ROUTE_ENTRIES; -config.flags = 0; -lpm =
>>>>> rte_lpm_create(__func__, SOCKET_ID_ANY, &config);
>>>>> -TEST_LPM_ASSERT(lpm != NULL);
>>>>> -
>>>>> -/* Init RCU variable */
>>>>> -sz = rte_rcu_qsbr_get_memsize(num_cores);
>>>>> -rv = (struct rte_rcu_qsbr *)rte_zmalloc("rcu0", sz,
>>>>> -RTE_CACHE_LINE_SIZE); -rte_rcu_qsbr_init(rv, num_cores);
>>>>> -
>>>>> -rcu_cfg.v = rv;
>>>>> -/* Assign the RCU variable to LPM */ -if (rte_lpm_rcu_qsbr_add(lpm,
>>>>> &rcu_cfg) != 0) { -printf("RCU variable assignment failed\n"); -goto
>>>>> error; -}
>>>>> -
>>>>> -writer_done = 0;
>>>>> -__atomic_store_n(&gwrite_cycles, 0, __ATOMIC_RELAXED);
>>>>> -
>>>>> -__atomic_store_n(&thr_id, 0, __ATOMIC_SEQ_CST);
>>>>> -
>>>>> -/* Launch reader threads */
>>>>> -for (i = 2; i < num_cores; i++)
>>>>> -rte_eal_remote_launch(test_lpm_rcu_qsbr_reader, NULL,
>>>>> -enabled_core_ids[i]);
>>>>> -
>>>>> -/* Launch writer threads */
>>>>> -for (i = 0; i < 2; i++)
>>>>> -rte_eal_remote_launch(test_lpm_rcu_qsbr_writer,
>>>>> -(void *)(uintptr_t)i,
>>>>> -enabled_core_ids[i]);
>>>>> -
>>>>> -/* Wait for writer threads */
>>>>> -for (i = 0; i < 2; i++)
>>>>> -if (rte_eal_wait_lcore(enabled_core_ids[i]) < 0) -goto error;
>>>>> -
>>>>> -printf("Total LPM Adds: %d\n", TOTAL_WRITES); -printf("Total LPM
>>>>> Deletes: %d\n", TOTAL_WRITES); -printf("Average LPM Add/Del:
>>>>> %"PRIu64" cycles\n", -__atomic_load_n(&gwrite_cycles,
>>>>> __ATOMIC_RELAXED) -/ TOTAL_WRITES);
>>>>> -
>>>>> -writer_done = 1;
>>>>> -/* Wait until all readers have exited */ -for (i = 2; i < num_cores;
>>>>> i++) -rte_eal_wait_lcore(enabled_core_ids[i]);
>>>>> -
>>>>> -rte_lpm_free(lpm);
>>>>> -rte_free(rv);
>>>>> -lpm = NULL;
>>>>> -rv = NULL;
>>>>> -
>>>>> -/* Test without RCU integration */
>>>>> -printf("\nPerf test: 2 writers, %d readers, RCU integration
>>>>> disabled\n", -num_cores - 2);
>>>>> -
>>>>> -/* Create LPM table */
>>>>> -config.max_rules = NUM_LDEPTH_ROUTE_ENTRIES; -
>>> config.number_tbl8s =
>>>>> NUM_LDEPTH_ROUTE_ENTRIES; -config.flags = 0; -lpm =
>>>>> rte_lpm_create(__func__, SOCKET_ID_ANY, &config);
>>>>> -TEST_LPM_ASSERT(lpm != NULL);
>>>>> -
>>>>> -writer_done = 0;
>>>>> -__atomic_store_n(&gwrite_cycles, 0, __ATOMIC_RELAXED);
>>>>> -__atomic_store_n(&thr_id, 0, __ATOMIC_SEQ_CST);
>>>>> -
>>>>> -/* Launch reader threads */
>>>>> -for (i = 2; i < num_cores; i++)
>>>>> -rte_eal_remote_launch(test_lpm_reader, NULL, -enabled_core_ids[i]);
>>>>> -
>>>>> -/* Launch writer threads */
>>>>> -for (i = 0; i < 2; i++)
>>>>> -rte_eal_remote_launch(test_lpm_rcu_qsbr_writer,
>>>>> -(void *)(uintptr_t)i,
>>>>> -enabled_core_ids[i]);
>>>>> -
>>>>> -/* Wait for writer threads */
>>>>> -for (i = 0; i < 2; i++)
>>>>> -if (rte_eal_wait_lcore(enabled_core_ids[i]) < 0) -goto error;
>>>>> -
>>>>> -printf("Total LPM Adds: %d\n", TOTAL_WRITES); -printf("Total LPM
>>>>> Deletes: %d\n", TOTAL_WRITES); -printf("Average LPM Add/Del:
>>>>> %"PRIu64" cycles\n", -__atomic_load_n(&gwrite_cycles,
>>>>> __ATOMIC_RELAXED) -/ TOTAL_WRITES);
>>>>> -
>>>>> -writer_done = 1;
>>>>> -/* Wait until all readers have exited */ -for (i = 2; i < num_cores;
>>>>> i++) -rte_eal_wait_lcore(enabled_core_ids[i]);
>>>>> -
>>>>> -rte_lpm_free(lpm);
>>>>> -
>>>>> -return 0;
>>>>> -
>>>>> -error:
>>>>> -writer_done = 1;
>>>>> -/* Wait until all readers have exited */ -rte_eal_mp_wait_lcore();
>>>>> -
>>>>> -rte_lpm_free(lpm);
>>>>> -rte_free(rv);
>>>>> -
>>>>> -return -1;
>>>>> -}
>>>>> -
>>>>> -/*
>>>>> - * Functional test:
>>>>> - * Single writer, rest are readers
>>>>> - */
>>>>> -static int
>>>>> -test_lpm_rcu_perf(void)
>>>>> -{
>>>>> -struct rte_lpm_config config;
>>>>> -uint64_t begin, total_cycles;
>>>>> -size_t sz;
>>>>> -unsigned int i, j;
>>>>> -uint16_t core_id;
>>>>> -uint32_t next_hop_add = 0xAA;
>>>>> -struct rte_lpm_rcu_config rcu_cfg = {0};
>>>>> -
>>>>> -if (rte_lcore_count() < 2) {
>>>>> -printf("Not enough cores for lpm_rcu_perf_autotest, expecting at
>>>>> least 2\n"); -return TEST_SKIPPED; -}
>>>>> -
>>>>> -num_cores = 0;
>>>>> -RTE_LCORE_FOREACH_WORKER(core_id) {
>>>>> -enabled_core_ids[num_cores] = core_id; -num_cores++; -}
>>>>> -
>>>>> -printf("\nPerf test: 1 writer, %d readers, RCU integration
>>>>> enabled\n", -num_cores);
>>>>> -
>>>>> -/* Create LPM table */
>>>>> -config.max_rules = NUM_LDEPTH_ROUTE_ENTRIES; -
>>> config.number_tbl8s =
>>>>> NUM_LDEPTH_ROUTE_ENTRIES; -config.flags = 0; -lpm =
>>>>> rte_lpm_create(__func__, SOCKET_ID_ANY, &config);
>>>>> -TEST_LPM_ASSERT(lpm != NULL);
>>>>> -
>>>>> -/* Init RCU variable */
>>>>> -sz = rte_rcu_qsbr_get_memsize(num_cores);
>>>>> -rv = (struct rte_rcu_qsbr *)rte_zmalloc("rcu0", sz,
>>>>> -RTE_CACHE_LINE_SIZE); -rte_rcu_qsbr_init(rv, num_cores);
>>>>> -
>>>>> -rcu_cfg.v = rv;
>>>>> -/* Assign the RCU variable to LPM */ -if (rte_lpm_rcu_qsbr_add(lpm,
>>>>> &rcu_cfg) != 0) { -printf("RCU variable assignment failed\n"); -goto
>>>>> error; -}
>>>>> -
>>>>> -writer_done = 0;
>>>>> -__atomic_store_n(&thr_id, 0, __ATOMIC_SEQ_CST);
>>>>> -
>>>>> -/* Launch reader threads */
>>>>> -for (i = 0; i < num_cores; i++)
>>>>> -rte_eal_remote_launch(test_lpm_rcu_qsbr_reader, NULL,
>>>>> -enabled_core_ids[i]);
>>>>> -
>>>>> -/* Measure add/delete. */
>>>>> -begin = rte_rdtsc_precise();
>>>>> -for (i = 0; i < RCU_ITERATIONS; i++) {
>>>>> -/* Add all the entries */
>>>>> -for (j = 0; j < NUM_LDEPTH_ROUTE_ENTRIES; j++) -if (rte_lpm_add(lpm,
>>>>> large_ldepth_route_table[j].ip, -large_ldepth_route_table[j].depth,
>>>>> -next_hop_add) != 0) {
>>>>> -printf("Failed to add iteration %d, route# %d\n", -i, j);
>>>>> +for (j = 1; j < 3; j++) {
>>>>> +if (use_rcu)
>>>>> +printf("\nPerf test: %d writer(s), %d reader(s),"
>>>>> + " RCU integration enabled\n", j, num_cores - j); else
>>>>> +printf("\nPerf test: %d writer(s), %d reader(s),"
>>>>> + " RCU integration disabled\n", j, num_cores - j);
>>>>> +
>>>>> +/* Create LPM table */
>>>>> +config.max_rules = NUM_LDEPTH_ROUTE_ENTRIES;
>>> config.number_tbl8s =
>>>>> +NUM_LDEPTH_ROUTE_ENTRIES; config.flags = 0; lpm =
>>>>> +rte_lpm_create(__func__, SOCKET_ID_ANY, &config);
>>>>> +TEST_LPM_ASSERT(lpm != NULL);
>>>>> +
>>>>> +/* Init RCU variable */
>>>>> +if (use_rcu) {
>>>>> +sz = rte_rcu_qsbr_get_memsize(num_cores);
>>>>> +rv = (struct rte_rcu_qsbr *)rte_zmalloc("rcu0", sz,
>>>>> +
>>>>> RTE_CACHE_LINE_SIZE);
>>>>> +rte_rcu_qsbr_init(rv, num_cores);
>>>>> +
>>>>> +rcu_cfg.v = rv;
>>>>> +/* Assign the RCU variable to LPM */ if (rte_lpm_rcu_qsbr_add(lpm,
>>>>> +&rcu_cfg) != 0) { printf("RCU variable assignment failed\n");
>>>>> goto error;
>>>>> }
>>>>>
>>>>> -/* Delete all the entries */
>>>>> -for (j = 0; j < NUM_LDEPTH_ROUTE_ENTRIES; j++) -if
>>>>> (rte_lpm_delete(lpm, large_ldepth_route_table[j].ip,
>>>>> -large_ldepth_route_table[j].depth) != 0) { -printf("Failed to delete
>>>>> iteration %d, route# %d\n", -i, j); -goto error; -} -} -total_cycles
>>>>> = rte_rdtsc_precise() - begin;
>>>>> +reader_f = test_lpm_rcu_qsbr_reader; } else reader_f =
>>>>> +test_lpm_reader;
>>>>>
>>>>> -printf("Total LPM Adds: %d\n", TOTAL_WRITES); -printf("Total LPM
>>>>> Deletes: %d\n", TOTAL_WRITES); -printf("Average LPM Add/Del: %g
>>>>> cycles\n", -(double)total_cycles / TOTAL_WRITES);
>>>>> +writer_done = 0;
>>>>> +__atomic_store_n(&gwrite_cycles, 0, __ATOMIC_RELAXED);
>>>>>
>>>>> -writer_done = 1;
>>>>> -/* Wait until all readers have exited */ -for (i = 0; i < num_cores;
>>>>> i++) -if (rte_eal_wait_lcore(enabled_core_ids[i]);
>>>>> -
>>>>> -rte_lpm_free(lpm);
>>>>> -rte_free(rv);
>>>>> -lpm = NULL;
>>>>> -rv = NULL;
>>>>> -
>>>>> -/* Test without RCU integration */
>>>>> -printf("\nPerf test: 1 writer, %d readers, RCU integration
>>>>> disabled\n", -num_cores);
>>>>> -
>>>>> -/* Create LPM table */
>>>>> -config.max_rules = NUM_LDEPTH_ROUTE_ENTRIES; -
>>> config.number_tbl8s =
>>>>> NUM_LDEPTH_ROUTE_ENTRIES; -config.flags = 0; -lpm =
>>>>> rte_lpm_create(__func__, SOCKET_ID_ANY, &config);
>>>>> -TEST_LPM_ASSERT(lpm != NULL);
>>>>> +__atomic_store_n(&thr_id, 0, __ATOMIC_SEQ_CST);
>>>>>
>>>>> -writer_done = 0;
>>>>> -__atomic_store_n(&thr_id, 0, __ATOMIC_SEQ_CST);
>>>>> +/* Launch reader threads */
>>>>> +for (i = j; i < num_cores; i++)
>>>>> +rte_eal_remote_launch(reader_f, NULL,
>>>>> +enabled_core_ids[i]);
>>>>>
>>>>> -/* Launch reader threads */
>>>>> -for (i = 0; i < num_cores; i++)
>>>>> -rte_eal_remote_launch(test_lpm_reader, NULL,
>>>>> -enabled_core_ids[i]);
>>>>> +/* Launch writer threads */
>>>>> +for (i = 0; i < j; i++)
>>>>> +rte_eal_remote_launch(test_lpm_rcu_qsbr_writer,
>>>>> +(void *)(uintptr_t)(i + j),
>>>> This can be just 'j'?
>>>>
>>>>> +enabled_core_ids[i]);
>>>>>
>>>>> -/* Measure add/delete. */
>>>>> -begin = rte_rdtsc_precise();
>>>>> -for (i = 0; i < RCU_ITERATIONS; i++) {
>>>>> -/* Add all the entries */
>>>>> -for (j = 0; j < NUM_LDEPTH_ROUTE_ENTRIES; j++)
>>>>> -if (rte_lpm_add(lpm, large_ldepth_route_table[j].ip,
>>>>> -large_ldepth_route_table[j].depth,
>>>>> -next_hop_add) != 0) {
>>>>> -printf("Failed to add iteration %d, route#
>>>>> %d\n",
>>>>> -i, j);
>>>>> +/* Wait for writer threads */
>>>>> +for (i = 0; i < j; i++)
>>>>> +if (rte_eal_wait_lcore(enabled_core_ids[i]) < 0)
>>>>> goto error;
>>>>> -}
>>>>>
>>>>> -/* Delete all the entries */
>>>>> -for (j = 0; j < NUM_LDEPTH_ROUTE_ENTRIES; j++)
>>>>> -if (rte_lpm_delete(lpm,
>>>>> large_ldepth_route_table[j].ip,
>>>>> -large_ldepth_route_table[j].depth) != 0) {
>>>>> -printf("Failed to delete iteration %d, route#
>>>>> %d\n",
>>>>> -i, j);
>>>>> -goto error;
>>>>> -}
>>>>> +printf("Total LPM Adds: %d\n", TOTAL_WRITES);
>>>>> +printf("Total LPM Deletes: %d\n", TOTAL_WRITES);
>>>>> +printf("Average LPM Add/Del: %"PRIu64" cycles\n",
>>>>> +__atomic_load_n(&gwrite_cycles,
>>>>> __ATOMIC_RELAXED)
>>>>> +/ TOTAL_WRITES);
>>>>> +
>>>>> +writer_done = 1;
>>>>> +/* Wait until all readers have exited */
>>>>> +for (i = j; i < num_cores; i++)
>>>>> +rte_eal_wait_lcore(enabled_core_ids[i]);
>>>>> +
>>>>> +rte_lpm_free(lpm);
>>>>> +rte_free(rv);
>>>>> +lpm = NULL;
>>>>> +rv = NULL;
>>>>> }
>>>>> -total_cycles = rte_rdtsc_precise() - begin;
>>>>> -
>>>>> -printf("Total LPM Adds: %d\n", TOTAL_WRITES);
>>>>> -printf("Total LPM Deletes: %d\n", TOTAL_WRITES);
>>>>> -printf("Average LPM Add/Del: %g cycles\n",
>>>>> -(double)total_cycles / TOTAL_WRITES);
>>>>> -
>>>>> -writer_done = 1;
>>>>> -/* Wait until all readers have exited */
>>>>> -for (i = 0; i < num_cores; i++)
>>>>> -rte_eal_wait_lcore(enabled_core_ids[i]);
>>>>> -
>>>>> -rte_lpm_free(lpm);
>>>>>
>>>>> return 0;
>>>>>
>>>>> @@ -946,9 +767,8 @@ test_lpm_perf(void)
>>>>> rte_lpm_delete_all(lpm);
>>>>> rte_lpm_free(lpm);
>>>>>
>>>>> -test_lpm_rcu_perf();
>>>>> -
>>>>> -test_lpm_rcu_perf_multi_writer();
>>>>> +test_lpm_rcu_perf_multi_writer(0);
>>>>> +test_lpm_rcu_perf_multi_writer(1);
>>>>>
>>>>> return 0;
>>>>> }
>>>>> --
>>>>> 2.17.1
>
@@ -67,6 +67,12 @@ enum {
IP_CLASS_C
};
+enum {
+ SINGLE_WRITER = 1,
+ MULTI_WRITER_1,
+ MULTI_WRITER_2
+};
+
/* struct route_rule_count defines the total number of rules in following a/b/c
* each item in a[]/b[]/c[] is the number of common IP address class A/B/C, not
* including the ones for private local network.
@@ -430,11 +436,16 @@ test_lpm_rcu_qsbr_writer(void *arg)
{
unsigned int i, j, si, ei;
uint64_t begin, total_cycles;
- uint8_t core_id = (uint8_t)((uintptr_t)arg);
+ uint8_t writer_id = (uint8_t)((uintptr_t)arg);
uint32_t next_hop_add = 0xAA;
- /* 2 writer threads are used */
- if (core_id % 2 == 0) {
+ /* Single writer (writer_id = 1) */
+ if (writer_id == SINGLE_WRITER) {
+ si = 0;
+ ei = NUM_LDEPTH_ROUTE_ENTRIES;
+ }
+ /* 2 Writers (writer_id = 2/3)*/
+ else if (writer_id == MULTI_WRITER_1) {
si = 0;
ei = NUM_LDEPTH_ROUTE_ENTRIES / 2;
} else {
@@ -447,29 +458,35 @@ test_lpm_rcu_qsbr_writer(void *arg)
for (i = 0; i < RCU_ITERATIONS; i++) {
/* Add all the entries */
for (j = si; j < ei; j++) {
- pthread_mutex_lock(&lpm_mutex);
+ if (writer_id != SINGLE_WRITER)
+ pthread_mutex_lock(&lpm_mutex);
if (rte_lpm_add(lpm, large_ldepth_route_table[j].ip,
large_ldepth_route_table[j].depth,
next_hop_add) != 0) {
printf("Failed to add iteration %d, route# %d\n",
i, j);
- pthread_mutex_unlock(&lpm_mutex);
+ if (writer_id != SINGLE_WRITER)
+ pthread_mutex_unlock(&lpm_mutex);
return -1;
}
- pthread_mutex_unlock(&lpm_mutex);
+ if (writer_id != SINGLE_WRITER)
+ pthread_mutex_unlock(&lpm_mutex);
}
/* Delete all the entries */
for (j = si; j < ei; j++) {
- pthread_mutex_lock(&lpm_mutex);
+ if (writer_id != SINGLE_WRITER)
+ pthread_mutex_lock(&lpm_mutex);
if (rte_lpm_delete(lpm, large_ldepth_route_table[j].ip,
large_ldepth_route_table[j].depth) != 0) {
printf("Failed to delete iteration %d, route# %d\n",
i, j);
- pthread_mutex_unlock(&lpm_mutex);
+ if (writer_id != SINGLE_WRITER)
+ pthread_mutex_unlock(&lpm_mutex);
return -1;
}
- pthread_mutex_unlock(&lpm_mutex);
+ if (writer_id != SINGLE_WRITER)
+ pthread_mutex_unlock(&lpm_mutex);
}
}
@@ -482,16 +499,17 @@ test_lpm_rcu_qsbr_writer(void *arg)
/*
* Functional test:
- * 2 writers, rest are readers
+ * 1/2 writers, rest are readers
*/
static int
-test_lpm_rcu_perf_multi_writer(void)
+test_lpm_rcu_perf_multi_writer(uint8_t use_rcu)
{
struct rte_lpm_config config;
size_t sz;
- unsigned int i;
+ unsigned int i, j;
uint16_t core_id;
struct rte_lpm_rcu_config rcu_cfg = {0};
+ int (*reader_f)(void *arg) = NULL;
if (rte_lcore_count() < 3) {
printf("Not enough cores for lpm_rcu_perf_autotest, expecting at least 3\n");
@@ -504,273 +522,76 @@ test_lpm_rcu_perf_multi_writer(void)
num_cores++;
}
- printf("\nPerf test: 2 writers, %d readers, RCU integration enabled\n",
- num_cores - 2);
-
- /* Create LPM table */
- config.max_rules = NUM_LDEPTH_ROUTE_ENTRIES;
- config.number_tbl8s = NUM_LDEPTH_ROUTE_ENTRIES;
- config.flags = 0;
- lpm = rte_lpm_create(__func__, SOCKET_ID_ANY, &config);
- TEST_LPM_ASSERT(lpm != NULL);
-
- /* Init RCU variable */
- sz = rte_rcu_qsbr_get_memsize(num_cores);
- rv = (struct rte_rcu_qsbr *)rte_zmalloc("rcu0", sz,
- RTE_CACHE_LINE_SIZE);
- rte_rcu_qsbr_init(rv, num_cores);
-
- rcu_cfg.v = rv;
- /* Assign the RCU variable to LPM */
- if (rte_lpm_rcu_qsbr_add(lpm, &rcu_cfg) != 0) {
- printf("RCU variable assignment failed\n");
- goto error;
- }
-
- writer_done = 0;
- __atomic_store_n(&gwrite_cycles, 0, __ATOMIC_RELAXED);
-
- __atomic_store_n(&thr_id, 0, __ATOMIC_SEQ_CST);
-
- /* Launch reader threads */
- for (i = 2; i < num_cores; i++)
- rte_eal_remote_launch(test_lpm_rcu_qsbr_reader, NULL,
- enabled_core_ids[i]);
-
- /* Launch writer threads */
- for (i = 0; i < 2; i++)
- rte_eal_remote_launch(test_lpm_rcu_qsbr_writer,
- (void *)(uintptr_t)i,
- enabled_core_ids[i]);
-
- /* Wait for writer threads */
- for (i = 0; i < 2; i++)
- if (rte_eal_wait_lcore(enabled_core_ids[i]) < 0)
- goto error;
-
- printf("Total LPM Adds: %d\n", TOTAL_WRITES);
- printf("Total LPM Deletes: %d\n", TOTAL_WRITES);
- printf("Average LPM Add/Del: %"PRIu64" cycles\n",
- __atomic_load_n(&gwrite_cycles, __ATOMIC_RELAXED)
- / TOTAL_WRITES);
-
- writer_done = 1;
- /* Wait until all readers have exited */
- for (i = 2; i < num_cores; i++)
- rte_eal_wait_lcore(enabled_core_ids[i]);
-
- rte_lpm_free(lpm);
- rte_free(rv);
- lpm = NULL;
- rv = NULL;
-
- /* Test without RCU integration */
- printf("\nPerf test: 2 writers, %d readers, RCU integration disabled\n",
- num_cores - 2);
-
- /* Create LPM table */
- config.max_rules = NUM_LDEPTH_ROUTE_ENTRIES;
- config.number_tbl8s = NUM_LDEPTH_ROUTE_ENTRIES;
- config.flags = 0;
- lpm = rte_lpm_create(__func__, SOCKET_ID_ANY, &config);
- TEST_LPM_ASSERT(lpm != NULL);
-
- writer_done = 0;
- __atomic_store_n(&gwrite_cycles, 0, __ATOMIC_RELAXED);
- __atomic_store_n(&thr_id, 0, __ATOMIC_SEQ_CST);
-
- /* Launch reader threads */
- for (i = 2; i < num_cores; i++)
- rte_eal_remote_launch(test_lpm_reader, NULL,
- enabled_core_ids[i]);
-
- /* Launch writer threads */
- for (i = 0; i < 2; i++)
- rte_eal_remote_launch(test_lpm_rcu_qsbr_writer,
- (void *)(uintptr_t)i,
- enabled_core_ids[i]);
-
- /* Wait for writer threads */
- for (i = 0; i < 2; i++)
- if (rte_eal_wait_lcore(enabled_core_ids[i]) < 0)
- goto error;
-
- printf("Total LPM Adds: %d\n", TOTAL_WRITES);
- printf("Total LPM Deletes: %d\n", TOTAL_WRITES);
- printf("Average LPM Add/Del: %"PRIu64" cycles\n",
- __atomic_load_n(&gwrite_cycles, __ATOMIC_RELAXED)
- / TOTAL_WRITES);
-
- writer_done = 1;
- /* Wait until all readers have exited */
- for (i = 2; i < num_cores; i++)
- rte_eal_wait_lcore(enabled_core_ids[i]);
-
- rte_lpm_free(lpm);
-
- return 0;
-
-error:
- writer_done = 1;
- /* Wait until all readers have exited */
- rte_eal_mp_wait_lcore();
-
- rte_lpm_free(lpm);
- rte_free(rv);
-
- return -1;
-}
-
-/*
- * Functional test:
- * Single writer, rest are readers
- */
-static int
-test_lpm_rcu_perf(void)
-{
- struct rte_lpm_config config;
- uint64_t begin, total_cycles;
- size_t sz;
- unsigned int i, j;
- uint16_t core_id;
- uint32_t next_hop_add = 0xAA;
- struct rte_lpm_rcu_config rcu_cfg = {0};
-
- if (rte_lcore_count() < 2) {
- printf("Not enough cores for lpm_rcu_perf_autotest, expecting at least 2\n");
- return TEST_SKIPPED;
- }
-
- num_cores = 0;
- RTE_LCORE_FOREACH_WORKER(core_id) {
- enabled_core_ids[num_cores] = core_id;
- num_cores++;
- }
-
- printf("\nPerf test: 1 writer, %d readers, RCU integration enabled\n",
- num_cores);
-
- /* Create LPM table */
- config.max_rules = NUM_LDEPTH_ROUTE_ENTRIES;
- config.number_tbl8s = NUM_LDEPTH_ROUTE_ENTRIES;
- config.flags = 0;
- lpm = rte_lpm_create(__func__, SOCKET_ID_ANY, &config);
- TEST_LPM_ASSERT(lpm != NULL);
-
- /* Init RCU variable */
- sz = rte_rcu_qsbr_get_memsize(num_cores);
- rv = (struct rte_rcu_qsbr *)rte_zmalloc("rcu0", sz,
- RTE_CACHE_LINE_SIZE);
- rte_rcu_qsbr_init(rv, num_cores);
-
- rcu_cfg.v = rv;
- /* Assign the RCU variable to LPM */
- if (rte_lpm_rcu_qsbr_add(lpm, &rcu_cfg) != 0) {
- printf("RCU variable assignment failed\n");
- goto error;
- }
-
- writer_done = 0;
- __atomic_store_n(&thr_id, 0, __ATOMIC_SEQ_CST);
-
- /* Launch reader threads */
- for (i = 0; i < num_cores; i++)
- rte_eal_remote_launch(test_lpm_rcu_qsbr_reader, NULL,
- enabled_core_ids[i]);
-
- /* Measure add/delete. */
- begin = rte_rdtsc_precise();
- for (i = 0; i < RCU_ITERATIONS; i++) {
- /* Add all the entries */
- for (j = 0; j < NUM_LDEPTH_ROUTE_ENTRIES; j++)
- if (rte_lpm_add(lpm, large_ldepth_route_table[j].ip,
- large_ldepth_route_table[j].depth,
- next_hop_add) != 0) {
- printf("Failed to add iteration %d, route# %d\n",
- i, j);
+ for (j = 1; j < 3; j++) {
+ if (use_rcu)
+ printf("\nPerf test: %d writer(s), %d reader(s),"
+ " RCU integration enabled\n", j, num_cores - j);
+ else
+ printf("\nPerf test: %d writer(s), %d reader(s),"
+ " RCU integration disabled\n", j, num_cores - j);
+
+ /* Create LPM table */
+ config.max_rules = NUM_LDEPTH_ROUTE_ENTRIES;
+ config.number_tbl8s = NUM_LDEPTH_ROUTE_ENTRIES;
+ config.flags = 0;
+ lpm = rte_lpm_create(__func__, SOCKET_ID_ANY, &config);
+ TEST_LPM_ASSERT(lpm != NULL);
+
+ /* Init RCU variable */
+ if (use_rcu) {
+ sz = rte_rcu_qsbr_get_memsize(num_cores);
+ rv = (struct rte_rcu_qsbr *)rte_zmalloc("rcu0", sz,
+ RTE_CACHE_LINE_SIZE);
+ rte_rcu_qsbr_init(rv, num_cores);
+
+ rcu_cfg.v = rv;
+ /* Assign the RCU variable to LPM */
+ if (rte_lpm_rcu_qsbr_add(lpm, &rcu_cfg) != 0) {
+ printf("RCU variable assignment failed\n");
goto error;
}
- /* Delete all the entries */
- for (j = 0; j < NUM_LDEPTH_ROUTE_ENTRIES; j++)
- if (rte_lpm_delete(lpm, large_ldepth_route_table[j].ip,
- large_ldepth_route_table[j].depth) != 0) {
- printf("Failed to delete iteration %d, route# %d\n",
- i, j);
- goto error;
- }
- }
- total_cycles = rte_rdtsc_precise() - begin;
+ reader_f = test_lpm_rcu_qsbr_reader;
+ } else
+ reader_f = test_lpm_reader;
- printf("Total LPM Adds: %d\n", TOTAL_WRITES);
- printf("Total LPM Deletes: %d\n", TOTAL_WRITES);
- printf("Average LPM Add/Del: %g cycles\n",
- (double)total_cycles / TOTAL_WRITES);
+ writer_done = 0;
+ __atomic_store_n(&gwrite_cycles, 0, __ATOMIC_RELAXED);
- writer_done = 1;
- /* Wait until all readers have exited */
- for (i = 0; i < num_cores; i++)
- if (rte_eal_wait_lcore(enabled_core_ids[i]);
-
- rte_lpm_free(lpm);
- rte_free(rv);
- lpm = NULL;
- rv = NULL;
-
- /* Test without RCU integration */
- printf("\nPerf test: 1 writer, %d readers, RCU integration disabled\n",
- num_cores);
-
- /* Create LPM table */
- config.max_rules = NUM_LDEPTH_ROUTE_ENTRIES;
- config.number_tbl8s = NUM_LDEPTH_ROUTE_ENTRIES;
- config.flags = 0;
- lpm = rte_lpm_create(__func__, SOCKET_ID_ANY, &config);
- TEST_LPM_ASSERT(lpm != NULL);
+ __atomic_store_n(&thr_id, 0, __ATOMIC_SEQ_CST);
- writer_done = 0;
- __atomic_store_n(&thr_id, 0, __ATOMIC_SEQ_CST);
+ /* Launch reader threads */
+ for (i = j; i < num_cores; i++)
+ rte_eal_remote_launch(reader_f, NULL,
+ enabled_core_ids[i]);
- /* Launch reader threads */
- for (i = 0; i < num_cores; i++)
- rte_eal_remote_launch(test_lpm_reader, NULL,
- enabled_core_ids[i]);
+ /* Launch writer threads */
+ for (i = 0; i < j; i++)
+ rte_eal_remote_launch(test_lpm_rcu_qsbr_writer,
+ (void *)(uintptr_t)(i + j),
+ enabled_core_ids[i]);
- /* Measure add/delete. */
- begin = rte_rdtsc_precise();
- for (i = 0; i < RCU_ITERATIONS; i++) {
- /* Add all the entries */
- for (j = 0; j < NUM_LDEPTH_ROUTE_ENTRIES; j++)
- if (rte_lpm_add(lpm, large_ldepth_route_table[j].ip,
- large_ldepth_route_table[j].depth,
- next_hop_add) != 0) {
- printf("Failed to add iteration %d, route# %d\n",
- i, j);
+ /* Wait for writer threads */
+ for (i = 0; i < j; i++)
+ if (rte_eal_wait_lcore(enabled_core_ids[i]) < 0)
goto error;
- }
- /* Delete all the entries */
- for (j = 0; j < NUM_LDEPTH_ROUTE_ENTRIES; j++)
- if (rte_lpm_delete(lpm, large_ldepth_route_table[j].ip,
- large_ldepth_route_table[j].depth) != 0) {
- printf("Failed to delete iteration %d, route# %d\n",
- i, j);
- goto error;
- }
+ printf("Total LPM Adds: %d\n", TOTAL_WRITES);
+ printf("Total LPM Deletes: %d\n", TOTAL_WRITES);
+ printf("Average LPM Add/Del: %"PRIu64" cycles\n",
+ __atomic_load_n(&gwrite_cycles, __ATOMIC_RELAXED)
+ / TOTAL_WRITES);
+
+ writer_done = 1;
+ /* Wait until all readers have exited */
+ for (i = j; i < num_cores; i++)
+ rte_eal_wait_lcore(enabled_core_ids[i]);
+
+ rte_lpm_free(lpm);
+ rte_free(rv);
+ lpm = NULL;
+ rv = NULL;
}
- total_cycles = rte_rdtsc_precise() - begin;
-
- printf("Total LPM Adds: %d\n", TOTAL_WRITES);
- printf("Total LPM Deletes: %d\n", TOTAL_WRITES);
- printf("Average LPM Add/Del: %g cycles\n",
- (double)total_cycles / TOTAL_WRITES);
-
- writer_done = 1;
- /* Wait until all readers have exited */
- for (i = 0; i < num_cores; i++)
- rte_eal_wait_lcore(enabled_core_ids[i]);
-
- rte_lpm_free(lpm);
return 0;
@@ -946,9 +767,8 @@ test_lpm_perf(void)
rte_lpm_delete_all(lpm);
rte_lpm_free(lpm);
- test_lpm_rcu_perf();
-
- test_lpm_rcu_perf_multi_writer();
+ test_lpm_rcu_perf_multi_writer(0);
+ test_lpm_rcu_perf_multi_writer(1);
return 0;
}