[PATCH v6] app/dma-perf: introduce dma-perf application

Jiang, Cheng1 cheng1.jiang at intel.com
Wed Jun 14 08:40:08 CEST 2023


Hi,

Thanks for your comments, replies are inline.

Thanks a again,
Cheng

> -----Original Message-----
> From: huangdengdui <huangdengdui at huawei.com>
> Sent: Tuesday, June 13, 2023 8:55 PM
> To: Jiang, Cheng1 <cheng1.jiang at intel.com>; thomas at monjalon.net;
> Richardson, Bruce <bruce.richardson at intel.com>;
> mb at smartsharesystems.com; Xia, Chenbo <chenbo.xia at intel.com>;
> amitprakashs at marvell.com; anoobj at marvell.com
> Cc: dev at dpdk.org; Hu, Jiayu <jiayu.hu at intel.com>; Ding, Xuan
> <xuan.ding at intel.com>; Ma, WenwuX <wenwux.ma at intel.com>; Wang,
> YuanX <yuanx.wang at intel.com>; He, Xingguang <xingguang.he at intel.com>
> Subject: Re: [PATCH v6] app/dma-perf: introduce dma-perf application
> 
> Hi Cheng,
> 
> Few comments inline. Please check.
> 
> Thanks,
> Dengdui
> 
> On 2023/6/13 12:31, Cheng Jiang wrote:
> > There are many high-performance DMA devices supported in DPDK now,
> and
> > these DMA devices can also be integrated into other modules of DPDK as
> > accelerators, such as Vhost. Before integrating DMA into applications,
> > developers need to know the performance of these DMA devices in
> > various scenarios and the performance of CPUs in the same scenario,
> > such as different buffer lengths. Only in this way can we know the
> > target performance of the application accelerated by using them. This
> > patch introduces a high-performance testing tool, which supports
> > comparing the performance of CPU and DMA in different scenarios
> > automatically with a pre-set config file. Memory Copy performance test are
> supported for now.
> >
> > Signed-off-by: Cheng Jiang <cheng1.jiang at intel.com>
> > Signed-off-by: Jiayu Hu <jiayu.hu at intel.com>
> > Signed-off-by: Yuan Wang <yuanx.wang at intel.com>
> > Acked-by: Morten Brørup <mb at smartsharesystems.com>
> > Acked-by: Chenbo Xia <chenbo.xia at intel.com>
> > ---
> > v6:
> >   improved code based on Anoob's comments;
> >   fixed some code structure issues;
> > v5:
> >   fixed some LONG_LINE warnings;
> > v4:
> >   fixed inaccuracy of the memory footprint display;
> > v3:
> >   fixed some typos;
> > v2:
> >   added lcore/dmadev designation;
> >   added error case process;
> >   removed worker_threads parameter from config.ini;
> >   improved the logs;
> >   improved config file;
> >
> >  app/meson.build               |   1 +
> >  app/test-dma-perf/benchmark.c | 477 ++++++++++++++++++++++++++++
> > app/test-dma-perf/config.ini  |  59 ++++
> >  app/test-dma-perf/main.c      | 569
> ++++++++++++++++++++++++++++++++++
> >  app/test-dma-perf/main.h      |  69 +++++
> >  app/test-dma-perf/meson.build |  17 +
> >  6 files changed, 1192 insertions(+)
> >  create mode 100644 app/test-dma-perf/benchmark.c  create mode
> 100644
> > app/test-dma-perf/config.ini  create mode 100644
> > app/test-dma-perf/main.c  create mode 100644 app/test-dma-perf/main.h
> > create mode 100644 app/test-dma-perf/meson.build
> >
> > diff --git a/app/meson.build b/app/meson.build index
> > 74d2420f67..4fc1a83eba 100644
> > --- a/app/meson.build
> > +++ b/app/meson.build
> > @@ -19,6 +19,7 @@ apps = [
> >          'test-cmdline',
> >          'test-compress-perf',
> >          'test-crypto-perf',
> > +        'test-dma-perf',
> >          'test-eventdev',
> >          'test-fib',
> >          'test-flow-perf',
> > diff --git a/app/test-dma-perf/benchmark.c
> > b/app/test-dma-perf/benchmark.c new file mode 100644 index
> > 0000000000..bc1ca82297
> > --- /dev/null
> > +++ b/app/test-dma-perf/benchmark.c
> > @@ -0,0 +1,477 @@
> > +/* SPDX-License-Identifier: BSD-3-Clause
> > + * Copyright(c) 2023 Intel Corporation  */
> > +
> 
> <snip>
> 
> > +static inline int
> > +__rte_format_printf(3, 4)
> > +print_err(const char *func, int lineno, const char *format, ...) {
> > +	va_list ap;
> > +	int ret;
> > +
> > +	ret = fprintf(stderr, "In %s:%d - ", func, lineno);
> > +	va_start(ap, format);
> > +	ret += vfprintf(stderr, format, ap);
> > +	va_end(ap);
> > +
> > +	return ret;
> > +}
> > +
> > +static inline void
> > +calc_result(uint32_t buf_size, uint32_t nr_buf, uint16_t nb_workers,
> uint16_t test_secs,
> > +				uint32_t total_cnt, float *memory, uint32_t
> *ave_cycle,
> > +				float *bandwidth, float *mops)
> > +{
> > +	*memory = (float)(buf_size * (nr_buf / nb_workers) * 2) / (1024 *
> 1024);
> > +	*ave_cycle = test_secs * rte_get_timer_hz() / total_cnt;
> > +	*bandwidth = (buf_size * 8 * (rte_get_timer_hz() / (float)*ave_cycle))
> / 1000000000;
> > +	*mops = (float)rte_get_timer_hz() / *ave_cycle / 1000000;
> 
> The value of ave_cycle may be 0.
> 
> *mops = (float)(total_cnt / test_secs) / 1000000; ?

OK, it makes sense to me. I'll fix it in the next version.

> 
> > +}
> > +
> > +static void
> > +output_result(uint8_t scenario_id, uint32_t lcore_id, char *dma_name,
> uint64_t ave_cycle,
> > +			uint32_t buf_size, uint32_t nr_buf, float memory,
> > +			float bandwidth, float mops, bool is_dma) {
> > +	if (is_dma)
> > +		printf("lcore %u, DMA %s:\n", lcore_id, dma_name);
> > +	else
> > +		printf("lcore %u\n", lcore_id);
> > +
> > +	printf("average cycles/op: %" PRIu64 ", buffer size: %u, nr_buf: %u,
> memory: %.2lfMB, frequency: %" PRIu64 ".\n",
> > +			ave_cycle, buf_size, nr_buf, memory,
> rte_get_timer_hz());
> > +	printf("Average bandwidth: %.3lfGbps, MOps: %.3lf\n", bandwidth,
> > +mops);
> > +
> > +	if (is_dma)
> > +		snprintf(output_str[lcore_id], MAX_OUTPUT_STR_LEN,
> CSV_LINE_DMA_FMT,
> > +			scenario_id, lcore_id, dma_name, buf_size,
> > +			nr_buf, memory, ave_cycle, bandwidth, mops);
> > +	else
> > +		snprintf(output_str[lcore_id], MAX_OUTPUT_STR_LEN,
> CSV_LINE_CPU_FMT,
> > +			scenario_id, lcore_id, buf_size,
> > +			nr_buf, memory, ave_cycle, bandwidth, mops); }
> > +
> > +static inline void
> > +cache_flush_buf(__maybe_unused struct rte_mbuf **array,
> > +		__maybe_unused uint32_t buf_size,
> > +		__maybe_unused uint32_t nr_buf)
> > +{
> > +#ifdef RTE_ARCH_X86_64
> > +	char *data;
> > +	struct rte_mbuf **srcs = array;
> > +	uint32_t i, offset;
> > +
> > +	for (i = 0; i < nr_buf; i++) {
> > +		data = rte_pktmbuf_mtod(srcs[i], char *);
> > +		for (offset = 0; offset < buf_size; offset += 64)
> > +			__builtin_ia32_clflush(data + offset);
> > +	}
> > +#endif
> > +}
> > +
> 
> <snip>
> 
> > +
> > +/* Parse the argument given in the command line of the application */
> > +static int append_eal_args(int argc, char **argv, const char
> > +*eal_args, char **new_argv) {
> > +	int i;
> > +	char *tokens[MAX_EAL_PARAM_NB];
> > +	char args[MAX_EAL_PARAM_LEN] = {0};
> > +	int token_nb, new_argc = 0;
> > +
> > +	for (i = 0; i < argc; i++) {
> > +		if ((strcmp(argv[i], CMDLINE_CONFIG_ARG) == 0) ||
> > +				(strcmp(argv[i], CMDLINE_RESULT_ARG) == 0))
> {
> > +			i++;
> > +			continue;
> > +		}
> > +		strlcpy(new_argv[new_argc], argv[i],
> sizeof(new_argv[new_argc]));
> 
> The type_of argv[new_argc] is *char. Cannot use sizeof().
> 
> strcpy(new_argv[new_argc], argv[i]); or strlcpy(new_argv[new_argc], argv[i],
> MAX_EAL_PARAM_LEN); ?

Yes, it's a mistake, thanks for pointing out! I'll fix it in the next version.

> 
> > +		new_argc++;
> > +	}
> > +
> > +	if (eal_args) {
> > +		strlcpy(args, eal_args, sizeof(args));
> > +		token_nb = rte_strsplit(args, strlen(args),
> > +					tokens, MAX_EAL_PARAM_NB, ' ');
> > +		for (i = 0; i < token_nb; i++)
> > +			strcpy(new_argv[new_argc++], tokens[i]);
> > +	}
> > +
> > +	return new_argc;
> > +}
> > +
> > +int
> > +main(int argc, char *argv[])
> > +{
> > +	int ret;
> > +	uint16_t case_nb;
> > +	uint32_t i, nb_lcores;
> > +	pid_t cpid, wpid;
> > +	int wstatus;
> > +	char args[MAX_EAL_PARAM_NB][MAX_EAL_PARAM_LEN];
> > +	char *pargs[MAX_EAL_PARAM_NB];
> > +	char *cfg_path_ptr = NULL;
> > +	char *rst_path_ptr = NULL;
> > +	char rst_path[PATH_MAX];
> > +	int new_argc;
> > +	bool is_first_case = true;
> > +
> > +	memset(args, 0, sizeof(args));
> > +
> > +	for (i = 0; i < RTE_DIM(pargs); i++)
> > +		pargs[i] = args[i];
> > +
> > +	for (i = 0; i < (uint32_t)argc; i++) {
> > +		if (strncmp(argv[i], CMDLINE_CONFIG_ARG,
> MAX_LONG_OPT_SZ) == 0)
> > +			cfg_path_ptr = argv[i + 1];
> > +		if (strncmp(argv[i], CMDLINE_RESULT_ARG,
> MAX_LONG_OPT_SZ) == 0)
> > +			rst_path_ptr = argv[i + 1];
> > +	}
> > +	if (cfg_path_ptr == NULL) {
> > +		printf("Config file not assigned.\n");
> > +		return -1;
> > +	}
> > +	if (rst_path_ptr == NULL) {
> > +		strlcpy(rst_path, cfg_path_ptr, PATH_MAX);
> > +		strcat(strtok(basename(rst_path), "."), "_result.csv");
> > +		rst_path_ptr = rst_path;
> > +	}
> > +
> > +	case_nb = load_configs(cfg_path_ptr);
> > +	fd = fopen(rst_path_ptr, "w");
> > +	if (fd == NULL) {
> > +		printf("Open output CSV file error.\n");
> > +		return -1;
> > +	}
> > +	fclose(fd);
> > +
> > +	for (i = 0; i < case_nb; i++) {
> > +		if (test_cases[i].test_type == TEST_TYPE_NONE) {
> > +			printf("No test type in test case %d.\n\n", i + 1);
> > +			continue;
> > +		}
> > +		if (!test_cases[i].is_valid) {
> > +			printf("Invalid test case %d.\n\n", i + 1);
> > +			continue;
> > +		}
> > +
> > +		cpid = fork();
> > +		if (cpid < 0) {
> > +			printf("Fork case %d failed.\n", i + 1);
> > +			exit(EXIT_FAILURE);
> > +		} else if (cpid == 0) {
> > +			printf("\nRunning case %u\n\n", i + 1);
> > +
> > +			new_argc = append_eal_args(argc, argv,
> test_cases[i].eal_args, pargs);
> > +			ret = rte_eal_init(new_argc, pargs);
> > +			if (ret < 0)
> > +				rte_exit(EXIT_FAILURE, "Invalid EAL
> arguments\n");
> > +
> > +			/* Check lcores. */
> > +			nb_lcores = rte_lcore_count();
> > +			if (nb_lcores < 2)
> > +				rte_exit(EXIT_FAILURE,
> > +					"There should be at least 2 worker
> lcores.\n");
> > +
> > +			fd = fopen(rst_path_ptr, "a");
> > +			if (!fd) {
> > +				printf("Open output CSV file error.\n");
> > +				return 0;
> > +			}
> > +
> > +			if (is_first_case) {
> > +				output_env_info();
> > +				is_first_case = false;
> > +			}
> > +			run_test(i + 1, &test_cases[i]);
> > +
> > +			/* clean up the EAL */
> > +			rte_eal_cleanup();
> > +
> > +			fclose(fd);
> > +
> > +			printf("\nCase %u completed.\n\n", i + 1);
> > +
> > +			exit(EXIT_SUCCESS);
> > +		} else {
> > +			wpid = waitpid(cpid, &wstatus, 0);
> > +			if (wpid == -1) {
> > +				printf("waitpid error.\n");
> > +				exit(EXIT_FAILURE);
> > +			}
> > +
> > +			if (WIFEXITED(wstatus))
> > +				printf("Case process exited. status %d\n\n",
> > +					WEXITSTATUS(wstatus));
> > +			else if (WIFSIGNALED(wstatus))
> > +				printf("Case process killed by signal %d\n\n",
> > +					WTERMSIG(wstatus));
> > +			else if (WIFSTOPPED(wstatus))
> > +				printf("Case process stopped by
> signal %d\n\n",
> > +					WSTOPSIG(wstatus));
> > +			else if (WIFCONTINUED(wstatus))
> > +				printf("Case process continued.\n\n");
> > +			else
> > +				printf("Case process unknown
> terminated.\n\n");
> > +		}
> > +	}
> > +
> > +	printf("Bye...\n");
> > +	return 0;
> > +}
> 
> <snip>


More information about the dev mailing list