[dpdk-dev] [PATCH v3 6/7] power: support monitoring multiple Rx queues

Ananyev, Konstantin konstantin.ananyev at intel.com
Tue Jun 29 14:14:38 CEST 2021



> -----Original Message-----
> From: Burakov, Anatoly <anatoly.burakov at intel.com>
> Sent: Tuesday, June 29, 2021 12:40 PM
> To: Ananyev, Konstantin <konstantin.ananyev at intel.com>; dev at dpdk.org; Hunt, David <david.hunt at intel.com>
> Cc: Loftus, Ciara <ciara.loftus at intel.com>
> Subject: Re: [dpdk-dev] [PATCH v3 6/7] power: support monitoring multiple Rx queues
> 
> On 29-Jun-21 1:07 AM, Ananyev, Konstantin wrote:
> >
> >
> >>>> Use the new multi-monitor intrinsic to allow monitoring multiple ethdev
> >>>> Rx queues while entering the energy efficient power state. The multi
> >>>> version will be used unconditionally if supported, and the UMWAIT one
> >>>> will only be used when multi-monitor is not supported by the hardware.
> >>>>
> >>>> Signed-off-by: Anatoly Burakov <anatoly.burakov at intel.com>
> >>>> ---
> >>>>    doc/guides/prog_guide/power_man.rst |  9 ++--
> >>>>    lib/power/rte_power_pmd_mgmt.c      | 76 ++++++++++++++++++++++++++++-
> >>>>    2 files changed, 80 insertions(+), 5 deletions(-)
> >>>>
> >>>> diff --git a/doc/guides/prog_guide/power_man.rst b/doc/guides/prog_guide/power_man.rst
> >>>> index fac2c19516..3245a5ebed 100644
> >>>> --- a/doc/guides/prog_guide/power_man.rst
> >>>> +++ b/doc/guides/prog_guide/power_man.rst
> >>>> @@ -221,13 +221,16 @@ power saving whenever empty poll count reaches a certain number.
> >>>>    The "monitor" mode is only supported in the following configurations and scenarios:
> >>>>
> >>>>    * If ``rte_cpu_get_intrinsics_support()`` function indicates that
> >>>> +  ``rte_power_monitor_multi()`` function is supported by the platform, then
> >>>> +  monitoring multiple Ethernet Rx queues for traffic will be supported.
> >>>> +
> >>>> +* If ``rte_cpu_get_intrinsics_support()`` function indicates that only
> >>>>      ``rte_power_monitor()`` is supported by the platform, then monitoring will be
> >>>>      limited to a mapping of 1 core 1 queue (thus, each Rx queue will have to be
> >>>>      monitored from a different lcore).
> >>>>
> >>>> -* If ``rte_cpu_get_intrinsics_support()`` function indicates that the
> >>>> -  ``rte_power_monitor()`` function is not supported, then monitor mode will not
> >>>> -  be supported.
> >>>> +* If ``rte_cpu_get_intrinsics_support()`` function indicates that neither of the
> >>>> +  two monitoring functions are supported, then monitor mode will not be supported.
> >>>>
> >>>>    * Not all Ethernet devices support monitoring, even if the underlying
> >>>>      platform may support the necessary CPU instructions. Please refer to
> >>>> diff --git a/lib/power/rte_power_pmd_mgmt.c b/lib/power/rte_power_pmd_mgmt.c
> >>>> index 7762cd39b8..aab2d4f1ee 100644
> >>>> --- a/lib/power/rte_power_pmd_mgmt.c
> >>>> +++ b/lib/power/rte_power_pmd_mgmt.c
> >>>> @@ -155,6 +155,24 @@ queue_list_remove(struct pmd_core_cfg *cfg, const union queue *q)
> >>>>         return 0;
> >>>>    }
> >>>>
> >>>> +static inline int
> >>>> +get_monitor_addresses(struct pmd_core_cfg *cfg,
> >>>> +             struct rte_power_monitor_cond *pmc)
> >>>> +{
> >>>> +     const struct queue_list_entry *qle;
> >>>> +     size_t i = 0;
> >>>> +     int ret;
> >>>> +
> >>>> +     TAILQ_FOREACH(qle, &cfg->head, next) {
> >>>> +             struct rte_power_monitor_cond *cur = &pmc[i];
> >>>
> >>> Looks like you never increment 'i' value inside that function.
> >>> Also it probably will be safer to add 'num' parameter to check that
> >>> we will never over-run pmc[] boundaries.
> >>
> >> Will fix in v4, good catch!
> >>
> >>>
> >>>> +             const union queue *q = &qle->queue;
> >>>> +             ret = rte_eth_get_monitor_addr(q->portid, q->qid, cur);
> >>>> +             if (ret < 0)
> >>>> +                     return ret;
> >>>> +     }
> >>>> +     return 0;
> >>>> +}
> >>>> +
> >>>>    static void
> >>>>    calc_tsc(void)
> >>>>    {
> >>>> @@ -183,6 +201,48 @@ calc_tsc(void)
> >>>>         }
> >>>>    }
> >>>>
> >>>> +static uint16_t
> >>>> +clb_multiwait(uint16_t port_id, uint16_t qidx,
> >>>> +             struct rte_mbuf **pkts __rte_unused, uint16_t nb_rx,
> >>>> +             uint16_t max_pkts __rte_unused, void *addr __rte_unused)
> >>>> +{
> >>>> +     const unsigned int lcore = rte_lcore_id();
> >>>> +     const union queue q = {.portid = port_id, .qid = qidx};
> >>>> +     const bool empty = nb_rx == 0;
> >>>> +     struct pmd_core_cfg *q_conf;
> >>>> +
> >>>> +     q_conf = &lcore_cfg[lcore];
> >>>> +
> >>>> +     /* early exit */
> >>>> +     if (likely(!empty)) {
> >>>> +             q_conf->empty_poll_stats = 0;
> >>>> +     } else {
> >>>> +             /* do we care about this particular queue? */
> >>>> +             if (!queue_is_power_save(q_conf, &q))
> >>>> +                     return nb_rx;
> >>>
> >>> I still don't understand the need of 'special' power_save queue here...
> >>> Why we can't just have a function:
> >>>
> >>> get_number_of_queues_whose_sequential_empty_polls_less_then_threshold(struct pmd_core_cfg *lcore_cfg),
> >>> and then just:
> >>>
> >>> /* all queues have at least EMPTYPOLL_MAX sequential empty polls */
> >>> if (get_number_of_queues_whose_sequential_empty_polls_less_then_threshold(q_conf) == 0) {
> >>>       /* go into power-save mode here */
> >>> }
> >>
> >> Okay, let's go through this step by step :)
> >>
> >> Let's suppose we have three queues - q0, q1 and q2. We want to sleep
> >> whenever there's no traffic on *all of them*, however we cannot know
> >> that until we have checked all of them.
> >>
> >> So, let's suppose that q0, q1 and q2 were empty all this time, but now
> >> some traffic arrived at q2 while we're still checking q0. We see that q0
> >> is empty, and all of the queues were empty for the last N polls, so we
> >> think we will be safe to sleep at q0 despite the fact that traffic has
> >> just arrived at q2.
> >> This is not an issue with MONITOR mode because we will be able to see if
> >> current Rx ring descriptor is busy or not via the NIC callback, *but
> >> this is not possible* with PAUSE and SCALE modes, because they don't
> >> have the sneaky lookahead function of MONITOR! So, with PAUSE and SCALE
> >> modes, it is possible to end up in a situation where you *think* you
> >> don't have any traffic, but you actually do, you just haven't checked
> >> the relevant queue yet.
> >
> > I think such situation is unavoidable.
> > Yes, traffic can arrive to *any* queue at *any* time.
> > With your example above - user choose q2 as 'special' queue, but
> > traffic actually arrives on q0 or q1.
> > And yes, if user choose PAUSE or SCALE methods he *can* miss the traffic,
> > because as you said for these methods there is no notification mechanisms.
> > I think there are just unavoidable limitations with these power-save methods.
> >
> >> In order to prevent this from happening, we do not sleep on every queue,
> >> instead we sleep *once* per loop.
> >
> > Yes, totally agree we shouldn't sleep on *every* queue.
> > We need to go to sleep when there is no traffic on *any* of queues we monitor.
> >
> >> That is, we check q0, check q1, check
> >> q2, and only then we decide whether we want to sleep or not.
> >
> >> Of course, with such scheme it is still possible to e.g. sleep in q2
> >> while there's traffic waiting in q0,
> >
> > Yes, exactly.
> >
> >> but worst case is less bad with
> >> this scheme, because we'll be doing at worst 1 extra sleep.
> >
> > Hmm, I think it would be one extra sleep anyway.
> >
> >> Whereas with what you're suggesting, if we had e.g. 10 queues to poll,
> >> and we checked q1 but traffic has just arrived at q0, we'll be sleeping
> >> at q1, then we'll be sleeping at q2, then we'll be sleeping at q3, then
> >> we'll be sleeping at q4, then we'll be sleeping at q5.... and 9 sleeps
> >> later we finally reach q0 and find out after all this time that we
> >> shouldn't have slept in the first place.
> >
> > Ah ok, I think I understand now what you are saying.
> > Sure, to avoid such situation, we'll need to maintain extra counters and
> > update them properly when we go to sleep.
> > I should state it clearly at the beginning.
> > It might be easier to explain what I meant by code snippet:
> >
> > lcore_conf needs 2 counters:
> > uint64_t   nb_queues_ready_to_sleep;
> > uint64_t   nb_sleeps;
> >
> > Plus each queue needs 2 counters:
> > uint64_t nb_empty_polls;
> > uint64_t nb_sleeps;
> >
> > Now, at rx_callback():
> >
> > /* check did sleep happen since previous call,
> >       if yes, then reset queue counters */
> > if (queue->nb_sleeps != lcore_conf->nb_sleeps) {
> >      queue->nb_sleeps = lcore_conf->nb_sleeps;
> >      queue->nb_empty_polls = 0;
> > }
> >
> >   /* packet arrived, reset counters */
> >   if (nb_rx != 0) {
> >     /* queue is not 'ready_to_sleep' any more */
> >     if (queue->nb_empty_polls > EMPTYPOLL_MAX)
> >         lcore_conf-> nb_queues_ready_to_sleep--;
> >     queue->nb_empty_polls = 0;
> >
> > /* empty poll */
> > } else {
> >      /* queue reaches EMPTYPOLL_MAX threshold, mark it as 'ready_to_sleep' */
> >      if (queue->nb_empty_polls == EMPTYPOLL_MAX)
> >         lcore_conf-> nb_queues_ready_to_sleep++;
> >      queue->nb_empty_polls++;
> > }
> >
> >     /* no traffic on any queue for at least EMPTYPOLL_MAX iterations */
> >     if (lcore_conf-> nb_queues_ready_to_sleep == lcore_conf->n_queues) {
> >        /* update counters and sleep */
> >        lcore_conf->nb_sleeps++;
> >        lcore_conf-> nb_queues_ready_to_sleep = 0;
> >        goto_sleep();
> >     }
> > }
> >
> Actually, i don't think this is going to work, because i can see no
> (easy) way to get from lcore to specific queue. I mean, you could have
> an O(N) for loop that will loop over the list of queues every time we
> enter the callback, but i don't think that's such a good idea.

I think something like that will work:

struct queue_list_entry {
        TAILQ_ENTRY(queue_list_entry) next;
        union queue queue;
+     /* pointer to the lcore that queue is managed by */
+      struct pmd_core_cfg *lcore_cfg;
+     /* queue RX callback */
+      const struct rte_eth_rxtx_callback *cur_cb;
};

At rte_power_ethdev_pmgmt_queue_enable():

+      struct queue_list_entry *qle;
...
-        ret = queue_list_add(queue_cfg, &qdata);
+       qle = queue_list_add(queue_cfg, &qdata);
...
-       queue_cfg->cur_cb = rte_eth_add_rx_callback(port_id, queue_id, clb, NULL);
+      qle->cur_cb = rte_eth_add_rx_callback(port_id, queue_id, clb, qdata);

At actual clb_xxx(uint16_t port_id, uint16_t qidx,
                struct rte_mbuf **pkts __rte_unused, uint16_t nb_rx,
                uint16_t max_pkts __rte_unused, void *addr __rte_unused)
{
   ...
   struct queue_list_entry *qle = addr;
   struct pmd_core_cfg *lcore_cfg = qle->lcore_conf;
   ....
}




More information about the dev mailing list