[PATCH v5 1/4] eal: add lcore info in telemetry
Kevin Laatz
kevin.laatz at intel.com
Wed Jan 18 10:42:07 CET 2023
On 16/12/2022 10:21, Robin Jarry wrote:
> Report the same information than rte_lcore_dump() in the telemetry
> API into /eal/lcore/list and /eal/lcore/info,ID.
>
> Example:
>
> --> /eal/lcore/info,3
> {
> "/eal/lcore/info": {
> "lcore_id": 3,
> "socket": 0,
> "role": "RTE",
> "cpuset": [
> 3
> ]
> }
> }
>
> Signed-off-by: Robin Jarry <rjarry at redhat.com>
> Acked-by: Morten Brørup <mb at smartsharesystems.com>
> ---
> v4 -> v5: No change
>
> lib/eal/common/eal_common_lcore.c | 96 +++++++++++++++++++++++++++++++
> 1 file changed, 96 insertions(+)
>
> diff --git a/lib/eal/common/eal_common_lcore.c b/lib/eal/common/eal_common_lcore.c
> index 06c594b0224f..16548977dce8 100644
> --- a/lib/eal/common/eal_common_lcore.c
> +++ b/lib/eal/common/eal_common_lcore.c
> @@ -10,6 +10,9 @@
> #include <rte_errno.h>
> #include <rte_lcore.h>
> #include <rte_log.h>
> +#ifndef RTE_EXEC_ENV_WINDOWS
> +#include <rte_telemetry.h>
> +#endif
>
> #include "eal_private.h"
> #include "eal_thread.h"
> @@ -456,3 +459,96 @@ rte_lcore_dump(FILE *f)
> {
> rte_lcore_iterate(lcore_dump_cb, f);
> }
> +
> +#ifndef RTE_EXEC_ENV_WINDOWS
> +static int
> +lcore_telemetry_id_cb(unsigned int lcore_id, void *arg)
> +{
> + struct rte_tel_data *d = arg;
> + return rte_tel_data_add_array_int(d, lcore_id);
> +}
> +
> +static int
> +handle_lcore_list(const char *cmd __rte_unused,
> + const char *params __rte_unused,
> + struct rte_tel_data *d)
> +{
> + int ret = rte_tel_data_start_array(d, RTE_TEL_INT_VAL);
> + if (ret)
> + return ret;
> + return rte_lcore_iterate(lcore_telemetry_id_cb, d);
> +}
> +
> +struct lcore_telemetry_info {
> + unsigned int lcore_id;
> + struct rte_tel_data *d;
> +};
> +
> +static int
> +lcore_telemetry_info_cb(unsigned int lcore_id, void *arg)
> +{
> + struct lcore_telemetry_info *info = arg;
> + struct rte_config *cfg = rte_eal_get_configuration();
> + struct rte_tel_data *cpuset;
> + const char *role;
> + unsigned int cpu;
> +
> + if (info->lcore_id != lcore_id)
> + return 0;
> +
> + switch (cfg->lcore_role[lcore_id]) {
> + case ROLE_RTE:
> + role = "RTE";
> + break;
> + case ROLE_SERVICE:
> + role = "SERVICE";
> + break;
> + case ROLE_NON_EAL:
> + role = "NON_EAL";
> + break;
> + default:
> + role = "UNKNOWN";
> + break;
> + }
> + rte_tel_data_start_dict(info->d);
> + rte_tel_data_add_dict_int(info->d, "lcore_id", lcore_id);
> + rte_tel_data_add_dict_int(info->d, "socket", rte_lcore_to_socket_id(lcore_id));
> + rte_tel_data_add_dict_string(info->d, "role", role);
> + cpuset = rte_tel_data_alloc();
> + if (!cpuset)
> + return -ENOMEM;
> + rte_tel_data_start_array(cpuset, RTE_TEL_INT_VAL);
> + for (cpu = 0; cpu < CPU_SETSIZE; cpu++)
> + if (CPU_ISSET(cpu, &lcore_config[lcore_id].cpuset))
> + rte_tel_data_add_array_int(cpuset, cpu);
> + rte_tel_data_add_dict_container(info->d, "cpuset", cpuset, 0);
> +
> + return 0;
> +}
> +
> +static int
> +handle_lcore_info(const char *cmd __rte_unused, const char *params, struct rte_tel_data *d)
> +{
> + struct lcore_telemetry_info info = { .d = d };
> + char *endptr = NULL;
> + if (params == NULL || strlen(params) == 0)
> + return -EINVAL;
> + errno = 0;
> + info.lcore_id = strtoul(params, &endptr, 10);
> + if (errno)
> + return -errno;
> + if (endptr == params)
> + return -EINVAL;
> + return rte_lcore_iterate(lcore_telemetry_info_cb, &info);
> +}
> +
> +RTE_INIT(lcore_telemetry)
> +{
> + rte_telemetry_register_cmd(
> + "/eal/lcore/list", handle_lcore_list,
> + "List of lcore ids. Takes no parameters");
> + rte_telemetry_register_cmd(
> + "/eal/lcore/info", handle_lcore_info,
> + "Returns lcore info. Parameters: int lcore_id");
> +}
> +#endif /* !RTE_EXEC_ENV_WINDOWS */
Hi Robin,
Thanks for taking the time to work on this. It is a good implementation
for debug use-cases.
I have 2 suggestions which would improve the usability of the data:
1. Could we make the lcore_id paramater on /eal/lcore/info optional?
This would allow users to read info for all lcores in the application at
once.
2. Could we add 2 additional telemetry endpoints? One which returns an
array of busy_cycles values and the other returns an array of
total_cycles values. These arrays could be used in conjunction with the
/eal/lcore/list endpoint to quickly read the usage related metrics. I've
included an example diff below [1].
We have a use-case beyond debugging in which we read telemetry every few
milliseconds. From a performance point of view, adding the 2 additional
endpoints would be very beneficial.
Thanks,
Kevin
[1]
diff --git a/lib/eal/common/eal_common_lcore.c
b/lib/eal/common/eal_common_lcore.c
index 210636d21d..94ddb276c5 100644
--- a/lib/eal/common/eal_common_lcore.c
+++ b/lib/eal/common/eal_common_lcore.c
@@ -569,6 +569,32 @@ handle_lcore_info(const char *cmd __rte_unused,
const char *params, struct rte_t
return rte_lcore_iterate(lcore_telemetry_info_cb, &info);
}
+static int
+lcore_telemetry_busy_cycles_cb(unsigned int lcore_id, void *arg)
+{
+ struct rte_tel_data *d = arg;
+ struct rte_lcore_usage usage;
+ rte_lcore_usage_cb usage_cb;
+ unsigned long cycles = 0;
+
+ memset(&usage, 0, sizeof(usage));
+ usage_cb = lcore_usage_cb;
+ if (usage_cb != NULL && usage_cb(lcore_id, &usage) == 0)
+ cycles = usage.busy_cycles;
+
+ return rte_tel_data_add_array_u64(d, cycles);
+}
+
+static int
+handle_lcore_busy_cycles(const char *cmd __rte_unused,
+ const char *params __rte_unused, struct rte_tel_data *d)
+{
+ int ret = rte_tel_data_start_array(d, RTE_TEL_U64_VAL);
+ if (ret)
+ return ret;
+ return rte_lcore_iterate(lcore_telemetry_busy_cycles_cb, d);
+}
+
RTE_INIT(lcore_telemetry)
{
rte_telemetry_register_cmd(
@@ -577,5 +603,8 @@ RTE_INIT(lcore_telemetry)
rte_telemetry_register_cmd(
"/eal/lcore/info", handle_lcore_info,
"Returns lcore info. Parameters: int lcore_id");
+ rte_telemetry_register_cmd(
+ "/eal/lcore/busy_cycles", handle_lcore_busy_cycles,
+ "List of busy cycle values. Takes no parameters");
}
#endif /* !RTE_EXEC_ENV_WINDOWS */
More information about the dev
mailing list