[dpdk-dev] [PATCH v1] examples/distributor: detect high frequency cores
Burakov, Anatoly
anatoly.burakov at intel.com
Wed Mar 27 14:58:32 CET 2019
On 22-Feb-19 11:45 AM, David Hunt wrote:
> The distributor application is bottlenecked by the distributor core,
> so if we can give more frequency to this core, then the overall
> performance of the application may increase.
>
> This patch uses the rte_power_get_capabilities() API to query the cores
> provided in the core mask, and if any high frequency cores are found
> (e.g. Turbo Boost is enabled), we will pin the distributor workload to
> that core.
>
> Signed-off-by: Liang Ma <liang.j.ma at intel.com>
> Signed-off-by: David Hunt <david.hunt at intel.com>
> ---
> examples/distributor/main.c | 185 ++++++++++++++++++++++++-------
> examples/distributor/meson.build | 2 +-
> 2 files changed, 149 insertions(+), 38 deletions(-)
>
> diff --git a/examples/distributor/main.c b/examples/distributor/main.c
> index 03a05e3d9..0541c50b0 100644
> --- a/examples/distributor/main.c
> +++ b/examples/distributor/main.c
> @@ -16,6 +16,7 @@
> #include <rte_prefetch.h>
> #include <rte_distributor.h>
> #include <rte_pause.h>
> +#include <rte_power.h>
>
> #define RX_RING_SIZE 1024
> #define TX_RING_SIZE 1024
> @@ -281,6 +282,7 @@ lcore_rx(struct lcore_params *p)
> if (++port == nb_ports)
> port = 0;
> }
> + rte_power_exit(rte_lcore_id());
why is this being added? it doesn't seem relevant to neither the commit
message nor the feature. if this was missing before, please add it in a
separate patch. same applies to all other instances where
rte_power_exit() is added.
also, your app seems to support power and non-power operation. what
happens when rte_power_exit is called on an lcore that's not been
initialized (i.e. the fallback to non-power mode)? does this (and other
rte_power_exit() instances) code only get called when in power mode?
> /* set worker & tx threads quit flag */
> printf("\nCore %u exiting rx task.\n", rte_lcore_id());
> quit_signal = 1;
> @@ -364,6 +366,8 @@ lcore_distributor(struct lcore_params *p)
> printf("\nCore %u exiting distributor task.\n", rte_lcore_id());
> quit_signal_work = 1;
>
> + rte_power_exit(rte_lcore_id());
> +
> rte_distributor_flush(d);
> /* Unblock any returns so workers can exit */
> rte_distributor_clear_returns(d);
> @@ -435,6 +439,7 @@ lcore_tx(struct rte_ring *in_r)
> }
> }
> }
> + rte_power_exit(rte_lcore_id());
> printf("\nCore %u exiting tx task.\n", rte_lcore_id());
> return 0;
> }
> @@ -575,9 +580,32 @@ lcore_worker(struct lcore_params *p)
> if (num > 0)
> app_stats.worker_bursts[p->worker_id][num-1]++;
> }
> + rte_power_exit(rte_lcore_id());
> return 0;
> }
>
> +static int
> +init_power_library(void)
> +{
> + int ret = 0, lcore_id;
> + for (lcore_id = 0; lcore_id < RTE_MAX_LCORE; lcore_id++) {
RTE_LCORE_FOREACH?
> + if (rte_lcore_is_enabled(lcore_id)) {
> + /* init power management library */
> + ret = rte_power_init(lcore_id);
> + if (ret)
> + RTE_LOG(ERR, POWER,
> + "Library initialization failed on core %u\n",
> + lcore_id);
> + /*
> + * Return on first failure, we'll fall back
> + * to non-power operation
> + */
> + return ret;
You'll probably want to fix indentation here, it's misleading.
> + }
> + }
> + return ret;
> +}
> +
> /* display usage */
> static void
> print_usage(const char *prgname)
<...>
> + * Here we'll pre-assign lcore ids to the rx, tx and
> + * distributor workloads if there's higher frequency
> + * on those cores e.g. if Turbo Boost is enabled.
> + * It's also worth mentioning that it will assign cores in a
> + * specific order, so that if there's less than three
> + * available, the higher frequency cores will go to the
> + * distributor first, then rx, then tx.
> + */
> + RTE_LCORE_FOREACH_SLAVE(lcore_id) {
> +
> + rte_power_get_capabilities(lcore_id, &lcore_cap);
> +
> + if (lcore_cap.turbo == 1) {
> + priority_num++;
> + switch (priority_num) {
> + case 1:
> + distr_core_id = lcore_id;
> + printf("Distributor on priority core %d\n",
This says "priority", other instances say "preferred". Which is it? :)
> + lcore_id);
> + break;
> + case 2:
> + rx_core_id = lcore_id;
> + printf("Rx on preferred core %d\n",
> + lcore_id);
> + break;
> + case 3:
> + tx_core_id = lcore_id;
> + printf("Tx on preferred core %d\n",
> + lcore_id);
> + break;
> + default:
> + break;
> + }
> + }
> + }
> + }
> +
> + /*
> + * If there's any of the key workloads left without an lcore_id
Double space after "there's".
> + * after the higer frequency core assignment above, pre-assign
> + * them here.
> + */
> RTE_LCORE_FOREACH_SLAVE(lcore_id) {
> - if (worker_id == rte_lcore_count() - 3) {
> - printf("Starting distributor on lcore_id %d\n",
> - lcore_id);
> - /* distributor core */
> - struct lcore_params *p =
> - rte_malloc(NULL, sizeof(*p), 0);
> - if (!p)
> - rte_panic("malloc failure\n");
> - *p = (struct lcore_params){worker_id, d,
> - rx_dist_ring, dist_tx_ring, mbuf_pool};
> - rte_eal_remote_launch(
> - (lcore_function_t *)lcore_distributor,
> - p, lcore_id);
> - } else if (worker_id == rte_lcore_count() - 4) {
> - printf("Starting tx on worker_id %d, lcore_id %d\n",
> - worker_id, lcore_id);
> - /* tx core */
> - rte_eal_remote_launch((lcore_function_t *)lcore_tx,
> - dist_tx_ring, lcore_id);
> - } else if (worker_id == rte_lcore_count() - 2) {
> - printf("Starting rx on worker_id %d, lcore_id %d\n",
> - worker_id, lcore_id);
> - /* rx core */
> - struct lcore_params *p =
> - rte_malloc(NULL, sizeof(*p), 0);
> - if (!p)
> - rte_panic("malloc failure\n");
> - *p = (struct lcore_params){worker_id, d, rx_dist_ring,
> - dist_tx_ring, mbuf_pool};
> - rte_eal_remote_launch((lcore_function_t *)lcore_rx,
> - p, lcore_id);
> +
> + if (distr_core_id == 0) {
0 is a valid core id. You would probably want to use -1 here.
> + distr_core_id = lcore_id;
> + printf("Distributor on core %d\n", lcore_id);
> + }
> + if ((rx_core_id == 0) &&
> + (lcore_id != distr_core_id)) {
You could just check if (lcore_id == distr_core_id || lcore_id ==
rx_core_id || lcore_id == tx_core_id) and skip the iteration entirely,
rather than checking at every step.
> + rx_core_id = lcore_id;
> + printf("Rx on core %d\n", lcore_id);
> + }
> + if ((tx_core_id == 0) &&
> + (lcore_id != distr_core_id) &&
> + (lcore_id != rx_core_id)) {
> + tx_core_id = lcore_id;
> + printf("Tx on core %d\n", lcore_id);
> + }
> + counter++;
> + }
> +
> + printf(" tx id %d, dist id %d, rx id %d\n",
> + tx_core_id,
> + distr_core_id,
> + rx_core_id);
> +
> + /*
> + * Kick off all the worker threads first, avoiding the pre-assigned
> + * lcore_ids for tx, rx and distributor workloads.
> + */
> + RTE_LCORE_FOREACH_SLAVE(lcore_id) {
> +
> + if ((lcore_id == distr_core_id) ||
> + (lcore_id == rx_core_id) ||
> + (lcore_id == tx_core_id)) {
> +
> } else {
This is a very unorthodox way of skipping an iteration :)
--
Thanks,
Anatoly
More information about the dev
mailing list