[EXT] [PATCH v5] app/dma-perf: introduce dma-perf application
Amit Prakash Shukla
amitprakashs at marvell.com
Fri Jun 9 16:03:12 CEST 2023
> -----Original Message-----
> From: Cheng Jiang <cheng1.jiang at intel.com>
> Sent: Thursday, June 8, 2023 2:14 PM
> To: thomas at monjalon.net; bruce.richardson at intel.com;
> mb at smartsharesystems.com; chenbo.xia at intel.com
> Cc: dev at dpdk.org; jiayu.hu at intel.com; xuan.ding at intel.com;
> wenwux.ma at intel.com; yuanx.wang at intel.com; xingguang.he at intel.com;
> Cheng Jiang <cheng1.jiang at intel.com>
> Subject: [EXT] [PATCH v5] app/dma-perf: introduce dma-perf application
>
> External Email
>
> ----------------------------------------------------------------------
> There are many high-performance DMA devices supported in DPDK now,
> and these DMA devices can also be integrated into other modules of DPDK as
> accelerators, such as Vhost. Before integrating DMA into applications,
> developers need to know the performance of these DMA devices in various
> scenarios and the performance of CPUs in the same scenario, such as
> different buffer lengths. Only in this way can we know the target
> performance of the application accelerated by using them. This patch
> introduces a high-performance testing tool, which supports comparing the
> performance of CPU and DMA in different scenarios automatically with a pre-
> set config file. Memory Copy performance test are supported for now.
>
> Signed-off-by: Cheng Jiang <cheng1.jiang at intel.com>
> Signed-off-by: Jiayu Hu <jiayu.hu at intel.com>
> Signed-off-by: Yuan Wang <yuanx.wang at intel.com>
> Acked-by: Morten Brørup <mb at smartsharesystems.com>
> Acked-by: Chenbo Xia <chenbo.xia at intel.com>
> ---
> v5:
> fixed some LONG_LINE warnings;
> v4:
> fixed inaccuracy of the memory footprint display;
> v3:
> fixed some typos;
> v2:
> added lcore/dmadev designation;
> added error case process;
> removed worker_threads parameter from config.ini;
> improved the logs;
> improved config file;
>
> app/meson.build | 1 +
> app/test-dma-perf/benchmark.c | 472 ++++++++++++++++++++++++++++
> app/test-dma-perf/config.ini | 59 ++++
> app/test-dma-perf/main.c | 569
> ++++++++++++++++++++++++++++++++++
> app/test-dma-perf/main.h | 69 +++++
> app/test-dma-perf/meson.build | 17 +
> 6 files changed, 1187 insertions(+)
> create mode 100644 app/test-dma-perf/benchmark.c create mode 100644
> app/test-dma-perf/config.ini create mode 100644 app/test-dma-
> perf/main.c create mode 100644 app/test-dma-perf/main.h create mode
> 100644 app/test-dma-perf/meson.build
>
<snip>
> +
> +static inline int
> +do_dma_mem_copy(void *p)
> +{
> + uint16_t *para_idx = (uint16_t *)p;
> + volatile struct lcore_params *para = worker_params[*para_idx];
> + volatile struct worker_info *worker_info = &(para->worker_info);
> + uint16_t dev_id = para->dev_id;
> + uint32_t nr_buf = para->nr_buf;
> + uint16_t kick_batch = para->kick_batch;
> + uint32_t buf_size = para->buf_size;
> + struct rte_mbuf **srcs = para->srcs;
> + struct rte_mbuf **dsts = para->dsts;
> + int64_t async_cnt = 0;
> + int nr_cpl = 0;
> + uint32_t i;
> + uint32_t poll_cnt = 0;
> +
> + worker_info->stop_flag = false;
> + worker_info->ready_flag = true;
> +
> + while (!worker_info->start_flag)
> + ;
> +
> + while (1) {
> + for (i = 0; i < nr_buf; i++) {
> + if (unlikely(rte_dma_copy(dev_id,
> + 0,
> + rte_pktmbuf_iova(srcs[i]),
> + rte_pktmbuf_iova(dsts[i]),
> + buf_size,
> + 0) < 0)) {
> + rte_dma_submit(dev_id, 0);
> + while (rte_dma_burst_capacity(dev_id, 0) ==
> 0) {
> + nr_cpl = rte_dma_completed(dev_id,
> 0, MAX_DMA_CPL_NB,
> + NULL, NULL);
> + async_cnt -= nr_cpl;
> + worker_info->total_cpl += nr_cpl;
> + }
> + if (rte_dma_copy(dev_id,
> + 0,
> + rte_pktmbuf_iova(srcs[i]),
> + rte_pktmbuf_iova(dsts[i]),
> + buf_size,
> + 0) < 0) {
> + printf("enqueue fail again at %u\n",
> i);
> + printf("space:%d\n",
> rte_dma_burst_capacity(dev_id, 0));
> + rte_exit(EXIT_FAILURE, "DMA
> enqueue failed\n");
[Amit]: On all success or failure exits, please call rte_dma_stop and rte_dma_close to exit cleanly.
> + }
> + }
> + async_cnt++;
> +
> + if ((async_cnt % kick_batch) == 0) {
> + rte_dma_submit(dev_id, 0);
> + /* add a poll to avoid ring full */
> + nr_cpl = rte_dma_completed(dev_id, 0,
> MAX_DMA_CPL_NB, NULL, NULL);
> + async_cnt -= nr_cpl;
> + worker_info->total_cpl += nr_cpl;
> + }
> + }
> +
> + if (worker_info->stop_flag)
> + break;
> + }
> +
> + rte_dma_submit(dev_id, 0);
> + while ((async_cnt > 0) && (poll_cnt++ < POLL_MAX)) {
> + nr_cpl = rte_dma_completed(dev_id, 0,
> MAX_DMA_CPL_NB, NULL, NULL);
> + async_cnt -= nr_cpl;
> + }
> +
> + return 0;
> +}
> +
<snip>
> +
> +void
> +mem_copy_benchmark(struct test_configure *cfg, bool is_dma) {
> + uint16_t i;
> + uint32_t offset;
> + unsigned int lcore_id = 0;
> + struct rte_mbuf **srcs = NULL, **dsts = NULL;
> + struct lcore_dma_map_t *ldm = &cfg->lcore_dma_map;
> + unsigned int buf_size = cfg->buf_size.cur;
> + uint16_t kick_batch = cfg->kick_batch.cur;
> + uint32_t nr_buf = cfg->nr_buf = (cfg->mem_size.cur * 1024 * 1024) /
> (cfg->buf_size.cur * 2);
> + uint16_t nb_workers = ldm->cnt;
> + uint16_t test_secs = cfg->test_secs;
> + float memory;
> + uint32_t avg_cycles = 0;
> + float mops;
> + float bandwidth;
> +
> + if (setup_memory_env(cfg, &srcs, &dsts) < 0)
> + goto out;
> +
> + if (is_dma)
> + if (config_dmadevs(cfg) < 0)
> + goto out;
> +
> + if (cfg->cache_flush) {
> + cache_flush_buf(srcs, buf_size, nr_buf);
> + cache_flush_buf(dsts, buf_size, nr_buf);
> + rte_mb();
> + }
> +
> + printf("Start testing....\n");
> +
> + for (i = 0; i < nb_workers; i++) {
> + lcore_id = ldm->lcores[i];
> + offset = nr_buf / nb_workers * i;
> +
> + worker_params[i] = rte_malloc(NULL, sizeof(struct
> lcore_params), 0);
> + if (!worker_params[i]) {
> + printf("lcore parameters malloc failure for lcore
> %d\n", lcore_id);
> + break;
> + }
> + if (is_dma) {
> + worker_params[i]->dma_name = ldm-
> >dma_names[i];
> + worker_params[i]->dev_id = ldm->dma_ids[i];
> + worker_params[i]->kick_batch = kick_batch;
> + }
> + worker_params[i]->worker_id = i;
> + worker_params[i]->nr_buf = (uint32_t)(nr_buf /
> nb_workers);
> + worker_params[i]->buf_size = buf_size;
> + worker_params[i]->test_secs = test_secs;
> + worker_params[i]->srcs = srcs + offset;
> + worker_params[i]->dsts = dsts + offset;
> + worker_params[i]->scenario_id = cfg->scenario_id;
> + worker_params[i]->lcore_id = lcore_id;
> +
> + if (is_dma)
> + rte_eal_remote_launch(do_dma_mem_copy, (void
> *)(&i), lcore_id);
> + else
> + rte_eal_remote_launch(do_cpu_mem_copy, (void
> *)(&i), lcore_id);
> + }
> +
> + while (1) {
> + bool ready = true;
> + for (i = 0; i < nb_workers; i++) {
> + if (worker_params[i]->worker_info.ready_flag ==
> false) {
> + ready = 0;
> + break;
> + }
> + }
> + if (ready)
> + break;
> + }
> +
> + for (i = 0; i < nb_workers; i++)
> + worker_params[i]->worker_info.start_flag = true;
> +
> + usleep(TEST_WAIT_U_SECOND);
> + for (i = 0; i < nb_workers; i++)
> + worker_params[i]->worker_info.test_cpl =
> +worker_params[i]->worker_info.total_cpl;
> +
> + usleep(test_secs * 1000 * 1000);
> + for (i = 0; i < nb_workers; i++)
> + worker_params[i]->worker_info.test_cpl =
> worker_params[i]->worker_info.total_cpl -
> + worker_params[i]-
> >worker_info.test_cpl;
> +
> + for (i = 0; i < nb_workers; i++)
> + worker_params[i]->worker_info.stop_flag = true;
> +
> + rte_eal_mp_wait_lcore();
> +
> + for (i = 0; i < nb_workers; i++) {
> + calc_result(buf_size, nr_buf, nb_workers, test_secs,
> + worker_params[i]->worker_info.test_cpl,
> + &memory, &avg_cycles, &bandwidth, &mops);
> + output_result(cfg->scenario_id, worker_params[i]->lcore_id,
> + worker_params[i]->dma_name,
> avg_cycles, buf_size,
> + nr_buf / nb_workers, memory,
> bandwidth, mops, is_dma);
> + }
> +
> +out:
> + /* free env */
> + if (srcs)
> + rte_pktmbuf_free_bulk(srcs, nr_buf);
> + if (dsts)
> + rte_pktmbuf_free_bulk(dsts, nr_buf);
> +
> + if (src_pool)
> + rte_mempool_free(src_pool);
> + if (dst_pool)
> + rte_mempool_free(dst_pool);
> +
> + if (is_dma) {
> + for (i = 0; i < nb_workers; i++) {
> + printf("Stopping dmadev %d\n", ldm->dma_ids[i]);
> + rte_dma_stop(ldm->dma_ids[i]);
[Amit]: Below rte_dma_stop please call rte_dma_close for clean exit.
<snip>
> +#endif /* _MAIN_H_ */
> diff --git a/app/test-dma-perf/meson.build b/app/test-dma-
> perf/meson.build new file mode 100644 index 0000000000..bd6c264002
> --- /dev/null
> +++ b/app/test-dma-perf/meson.build
> @@ -0,0 +1,17 @@
> +# SPDX-License-Identifier: BSD-3-Clause # Copyright(c) 2019-2023 Intel
> +Corporation
> +
> +# meson file, for building this app as part of a main DPDK build.
> +
> +if is_windows
> + build = false
> + reason = 'not supported on Windows'
> + subdir_done()
> +endif
> +
> +deps += ['dmadev', 'mbuf', 'cfgfile']
> +
> +sources = files(
> + 'main.c',
> + 'benchmark.c',
> +)
> --
> 2.40.1
More information about the dev
mailing list