[dpdk-dev] [PATCH v4 1/6] lib: distributor performance enhancements

Bruce Richardson bruce.richardson at intel.com
Mon Jan 16 17:36:54 CET 2017


On Mon, Jan 09, 2017 at 07:50:43AM +0000, David Hunt wrote:
> Now sends bursts of up to 8 mbufs to each worker, and tracks
> the in-flight flow-ids (atomic scheduling)
> 
> New file with a new api, similar to the old API except with _burst
> at the end of the function names
>

Can you explain why this is necessary, and also how the new version
works compared to the old. I know this is explained in the cover letter,
but the cover letter does not make the git commit log.

> Signed-off-by: David Hunt <david.hunt at intel.com>
> ---
<snip>
> diff --git a/lib/librte_distributor/rte_distributor_burst.c b/lib/librte_distributor/rte_distributor_burst.c
> new file mode 100644
> index 0000000..ae7cf9d
> --- /dev/null
> +++ b/lib/librte_distributor/rte_distributor_burst.c
> @@ -0,0 +1,558 @@
> +/*-
> + *   BSD LICENSE
> + *
> + *   Copyright(c) 2016 Intel Corporation. All rights reserved.

Update year since we aren't in 2016 any more.

> + *
> + *   Redistribution and use in source and binary forms, with or without
> + *   modification, are permitted provided that the following conditions
> + *   are met:
> + *
> + *     * Redistributions of source code must retain the above copyright
> + *       notice, this list of conditions and the following disclaimer.
> + *     * Redistributions in binary form must reproduce the above copyright
> + *       notice, this list of conditions and the following disclaimer in
> + *       the documentation and/or other materials provided with the
> + *       distribution.
> + *     * Neither the name of Intel Corporation nor the names of its
> + *       contributors may be used to endorse or promote products derived
> + *       from this software without specific prior written permission.
> + *
> + *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
> + *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
> + *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
> + *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
> + *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
> + *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
> + *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
> + *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
> + *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
> + *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
> + *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
> + */
> +
> +#include <stdio.h>
> +#include <sys/queue.h>
> +#include <string.h>
> +#include <rte_mbuf.h>
> +#include <rte_memory.h>
> +#include <rte_cycles.h>
> +#include <rte_memzone.h>
> +#include <rte_errno.h>
> +#include <rte_string_fns.h>
> +#include <rte_eal_memconfig.h>
> +#include "rte_distributor_priv.h"
> +#include "rte_distributor_burst.h"
> +
> +TAILQ_HEAD(rte_dist_burst_list, rte_distributor_burst);
> +
> +static struct rte_tailq_elem rte_dist_burst_tailq = {
> +	.name = "RTE_DIST_BURST",
> +};
> +EAL_REGISTER_TAILQ(rte_dist_burst_tailq)
> +
> +/**** APIs called by workers ****/
> +
> +/**** Burst Packet APIs called by workers ****/
> +
> +/* This function should really be called return_pkt_burst() */
1) Why should it be? 
2) Why isn't it called that? 
Please explain the naming. :-)

> +void
> +rte_distributor_request_pkt_burst(struct rte_distributor_burst *d,
> +		unsigned int worker_id, struct rte_mbuf **oldpkt,
> +		unsigned int count)
> +{
> +	struct rte_distributor_buffer_burst *buf = &(d->bufs[worker_id]);
> +	unsigned int i;
> +
> +	volatile int64_t *retptr64;
> +
> +
> +	/* if we dont' have any packets to return, return. */
> +	if (count == 0)
> +		return;
> +
So if we don't return anything we don't get any more packets, right?
What happens if we return fewer packets than we were previously given?
If that is allowed, why the restriction on returning at least one?

> +	retptr64 = &(buf->retptr64[0]);
<snip>
> +
> +int
> +rte_distributor_get_pkt_burst(struct rte_distributor_burst *d,
> +		unsigned int worker_id, struct rte_mbuf **pkts,
> +		struct rte_mbuf **oldpkt, unsigned int return_count)
> +{
> +	unsigned int count;
> +	uint64_t retries = 0;
> +
> +	rte_distributor_request_pkt_burst(d, worker_id, oldpkt, return_count);
> +
> +	count = rte_distributor_poll_pkt_burst(d, worker_id, pkts);
> +	while (count == 0) {
> +		rte_pause();
> +		retries++;
> +		if (retries > 1000)
> +			return 0;

This behaviour is different to the original get_pkt() behaviour in that
it has a timeout. Why the change to add the timeout, and should the
timeout not be user configurable in some way?

> +
> +		uint64_t t = rte_rdtsc()+100;

need spaces around the "+"

> +
> +		while (rte_rdtsc() < t)
> +			rte_pause();
> +
> +		count = rte_distributor_poll_pkt_burst(d, worker_id, pkts);
> +	}
> +	return count;
> +}
> +
> +int
> +rte_distributor_return_pkt_burst(struct rte_distributor_burst *d,
> +		unsigned int worker_id, struct rte_mbuf **oldpkt, int num)
> +{
> +	struct rte_distributor_buffer_burst *buf = &d->bufs[worker_id];
> +	unsigned int i;
> +
> +	for (i = 0; i < RTE_DIST_BURST_SIZE; i++)
> +		/* Switch off the return bit first */
> +		buf->retptr64[i] &= ~RTE_DISTRIB_RETURN_BUF;
> +
> +	for (i = num; i-- > 0; )
> +		buf->retptr64[i] = (((int64_t)(uintptr_t)oldpkt[i]) <<
> +			RTE_DISTRIB_FLAG_BITS) | RTE_DISTRIB_RETURN_BUF;
> +
> +	/* set the GET_BUF but even if we got no returns */
> +	buf->retptr64[0] |= RTE_DISTRIB_GET_BUF;

Does this mean we are requesting more packets here?

> +
> +	return 0;
> +}
> +
> +/**** APIs called on distributor core ***/
> +
<snip>
> +
> +static unsigned int
> +release(struct rte_distributor_burst *d, unsigned int wkr)

I think this function needs a comment describing what it is doing,
and where is it called from and why. Other functions on distributor side
probably need the same thing too.

> +{
> +	struct rte_distributor_buffer_burst *buf = &(d->bufs[wkr]);
> +	unsigned int i;
> +
> +	if (d->backlog[wkr].count == 0)
> +		return 0;
> +
> +	while (!(d->bufs[wkr].bufptr64[0] & RTE_DISTRIB_GET_BUF))
> +		rte_pause();
> +
> +	handle_returns(d, wkr);
> +
> +	buf->count = 0;
> +
> +	for (i = 0; i < d->backlog[wkr].count; i++) {
> +		d->bufs[wkr].bufptr64[i] = d->backlog[wkr].pkts[i] |
> +				RTE_DISTRIB_GET_BUF | RTE_DISTRIB_VALID_BUF;
> +		d->in_flight_tags[wkr][i] = d->backlog[wkr].tags[i];
> +	}
> +	buf->count = i;
> +	for ( ; i < RTE_DIST_BURST_SIZE ; i++) {
> +		buf->bufptr64[i] = RTE_DISTRIB_GET_BUF;
> +		d->in_flight_tags[wkr][i] = 0;
> +	}
> +
> +	d->backlog[wkr].count = 0;
> +
> +	/* Clear the GET bit */
> +	buf->bufptr64[0] &= ~RTE_DISTRIB_GET_BUF;
> +	return  buf->count;
> +
> +}
<snip>
> +/**
> + * API called by a worker to get new packets to process. Any previous packets
> + * given to the worker is assumed to have completed processing, and may be
> + * optionally returned to the distributor via the oldpkt parameter.
> + *
> + * @param d
> + *   The distributor instance to be used
> + * @param worker_id
> + *   The worker instance number to use - must be less that num_workers passed
> + *   at distributor creation time.
> + * @param pkts
> + *   The mbufs pointer array to be filled in (up to 8 packets)
> + * @param oldpkt
> + *   The previous packet, if any, being processed by the worker
> + * @param retcount
> + *   The number of packets being returneda

I think you need to document that it can't be zero, if I read the above
C implementation correctly.

> + *
> + * @return
> + *   The number of packets in the pkts array
> + */
> +int
> +rte_distributor_get_pkt_burst(struct rte_distributor_burst *d,
> +	unsigned int worker_id, struct rte_mbuf **pkts,
> +	struct rte_mbuf **oldpkt, unsigned int retcount);
> +
> +/**
<snip>
> +
> +/**
> + * Number of packets to deal with in bursts. Needs to be 8 so as to
> + * fit in one cache line.
> + */
> +#define RTE_DIST_BURST_SIZE (sizeof(__m128i) / sizeof(uint16_t))

Does this compile for non-x86 with the references to __m128i?

> +
<snip>
> +
> +	struct rte_distributor_returned_pkts returns;
> +};
> +
> +/* All different signature compare functions */
> +enum rte_distributor_match_function {
> +	RTE_DIST_MATCH_SCALAR = 0,
> +	RTE_DIST_MATCH_NUM

I think this last entry should be "RTE_DIST_NUM_MATCH_FNS", as
"NUM" is not a match function, and the define doesn't ready right.

> +};
> +
> +struct rte_distributor_burst {
> +	TAILQ_ENTRY(rte_distributor_burst) next;    /**< Next in list. */
> +
> +	char name[RTE_DISTRIBUTOR_NAMESIZE];  /**< Name of the ring. */
> +	unsigned int num_workers;             /**< Number of workers polling */
> +
> +	/**>
> +	 * First cache line in the this array are the tags inflight
> +	 * on the worker core. Second cache line are the backlog
> +	 * that are going to go to the worker core.
> +	 */
> +	uint16_t in_flight_tags[RTE_DISTRIB_MAX_WORKERS][RTE_DIST_BURST_SIZE*2]
> +			__rte_cache_aligned;
> +
> +	struct rte_distributor_backlog backlog[RTE_DISTRIB_MAX_WORKERS]
> +			__rte_cache_aligned;
> +
> +	struct rte_distributor_buffer_burst bufs[RTE_DISTRIB_MAX_WORKERS];
> +
> +	struct rte_distributor_returned_pkts returns;
> +
> +	enum rte_distributor_match_function dist_match_fn;
> +};
> +
> +#ifdef __cplusplus
> +}
> +#endif
> +
> +#endif
> diff --git a/lib/librte_distributor/rte_distributor_version.map b/lib/librte_distributor/rte_distributor_version.map
> index 73fdc43..39795a1 100644
> --- a/lib/librte_distributor/rte_distributor_version.map
> +++ b/lib/librte_distributor/rte_distributor_version.map
> @@ -2,14 +2,23 @@ DPDK_2.0 {
>  	global:
>  
>  	rte_distributor_clear_returns;
> +	rte_distributor_clear_returns_burst;
>  	rte_distributor_create;
> +	rte_distributor_create_burst;
>  	rte_distributor_flush;
> +	rte_distributor_flush_burst;
>  	rte_distributor_get_pkt;
> +	rte_distributor_get_pkt_burst;
>  	rte_distributor_poll_pkt;
> +	rte_distributor_poll_pkt_burst;
>  	rte_distributor_process;
> +	rte_distributor_process_burst;
>  	rte_distributor_request_pkt;
> +	rte_distributor_request_pkt_burst;
>  	rte_distributor_return_pkt;
> +	rte_distributor_return_pkt_burst;
>  	rte_distributor_returned_pkts;
> +	rte_distributor_returned_pkts_burst;
>  
>  	local: *;
>  };

The new functions are not present in DPDK 2.0, so you need a new node
for the 17.02 release.

Regards,
/Bruce



More information about the dev mailing list