[dpdk-dev] [PATCH v1 1/6] app/test: fix deadlock in distributor test

David Hunt david.hunt at intel.com
Thu Sep 17 13:21:02 CEST 2020


Hi Lukasz,

On 15/9/2020 8:34 PM, Lukasz Wojciechowski wrote:
> The sanity test with worker shutdown delegates all bufs
> to be processed by a single lcore worker, then it freezes
> one of the lcore workers and continues to send more bufs.
>
> Problem occurred if freezed lcore is the same as the one
> that is processing the mbufs. The lcore processing mbufs
> might be different every time test is launched.
> This is caused by keeping the value of wkr static variable
> in rte_distributor_process function between running test cases.
>
> Test freezed always lcore with 0 id. The patch changes avoids
> possible collision by freezing lcore with zero_idx. The lcore
> that receives the data updates the zero_idx, so it is not freezed
> itself.
>
> To reproduce the issue fixed by this patch, please run
> distributor_autotest command in test app several times in a row.
>
> Fixes: c3eabff124e6 ("distributor: add unit tests")
> Cc: bruce.richardson at intel.com
> Cc: stable at dpdk.org
>
> Signed-off-by: Lukasz Wojciechowski <l.wojciechow at partner.samsung.com>
> ---
>   app/test/test_distributor.c | 22 ++++++++++++++++++++--
>   1 file changed, 20 insertions(+), 2 deletions(-)
>
> diff --git a/app/test/test_distributor.c b/app/test/test_distributor.c
> index ba1f81cf8..35b25463a 100644
> --- a/app/test/test_distributor.c
> +++ b/app/test/test_distributor.c
> @@ -28,6 +28,7 @@ struct worker_params worker_params;
>   static volatile int quit;      /**< general quit variable for all threads */
>   static volatile int zero_quit; /**< var for when we just want thr0 to quit*/
>   static volatile unsigned worker_idx;
> +static volatile unsigned zero_idx;
>   
>   struct worker_stats {
>   	volatile unsigned handled_packets;
> @@ -346,27 +347,43 @@ handle_work_for_shutdown_test(void *arg)
>   	unsigned int total = 0;
>   	unsigned int i;
>   	unsigned int returned = 0;
> +	unsigned int zero_id = 0;
>   	const unsigned int id = __atomic_fetch_add(&worker_idx, 1,
>   			__ATOMIC_RELAXED);
>   
>   	num = rte_distributor_get_pkt(d, id, buf, buf, num);
>   
> +	zero_id = __atomic_load_n(&zero_idx, __ATOMIC_ACQUIRE);
> +	if (id == zero_id && num > 0) {
> +		zero_id = (zero_id + 1) %  __atomic_load_n(&worker_idx,
> +			__ATOMIC_ACQUIRE);
> +		__atomic_store_n(&zero_idx, zero_id, __ATOMIC_RELEASE);
> +	}
> +
>   	/* wait for quit single globally, or for worker zero, wait
>   	 * for zero_quit */
> -	while (!quit && !(id == 0 && zero_quit)) {
> +	while (!quit && !(id == zero_id && zero_quit)) {
>   		worker_stats[id].handled_packets += num;
>   		count += num;
>   		for (i = 0; i < num; i++)
>   			rte_pktmbuf_free(buf[i]);
>   		num = rte_distributor_get_pkt(d,
>   				id, buf, buf, num);
> +
> +		zero_id = __atomic_load_n(&zero_idx, __ATOMIC_ACQUIRE);
> +		if (id == zero_id && num > 0) {
> +			zero_id = (zero_id + 1) %  __atomic_load_n(&worker_idx,
> +				__ATOMIC_ACQUIRE);
> +			__atomic_store_n(&zero_idx, zero_id, __ATOMIC_RELEASE);
> +		}
> +
>   		total += num;
>   	}
>   	worker_stats[id].handled_packets += num;
>   	count += num;
>   	returned = rte_distributor_return_pkt(d, id, buf, num);
>   
> -	if (id == 0) {
> +	if (id == zero_id) {
>   		/* for worker zero, allow it to restart to pick up last packet
>   		 * when all workers are shutting down.
>   		 */
> @@ -586,6 +603,7 @@ quit_workers(struct worker_params *wp, struct rte_mempool *p)
>   	rte_eal_mp_wait_lcore();
>   	quit = 0;
>   	worker_idx = 0;
> +	zero_idx = 0;
>   }
>   
>   static int


Lockup reproducable if you run the distributor_autotest 19 times in 
succesion. I was able to run the test many times more than that with the 
patch applied. Thanks.

Tested-by: David Hunt <david.hunt at intel.com>






More information about the dev mailing list