[dpdk-stable] patch 'lib/distributor: fix deadlock on aarch64' has been queued to LTS release 18.11.6

Kevin Traynor ktraynor at redhat.com
Tue Dec 3 19:27:01 CET 2019


Hi,

FYI, your patch has been queued to LTS release 18.11.6

Note it hasn't been pushed to http://dpdk.org/browse/dpdk-stable yet.
It will be pushed if I get no objections before 12/10/19. So please
shout if anyone has objections.

Also note that after the patch there's a diff of the upstream commit vs the
patch applied to the branch. This will indicate if there was any rebasing
needed to apply to the stable branch. If there were code changes for rebasing
(ie: not only metadata diffs), please double check that the rebase was
correctly done.

Queued patches are on a temporary branch at:
https://github.com/kevintraynor/dpdk-stable-queue

This queued commit can be viewed at:
https://github.com/kevintraynor/dpdk-stable-queue/commit/01b5ea180ab7e79a3a5524f2b4bef3a3cd0c25f3

Thanks.

Kevin.

---
>From 01b5ea180ab7e79a3a5524f2b4bef3a3cd0c25f3 Mon Sep 17 00:00:00 2001
From: Ruifeng Wang <ruifeng.wang at arm.com>
Date: Tue, 15 Oct 2019 17:28:25 +0800
Subject: [PATCH] lib/distributor: fix deadlock on aarch64

[ upstream commit 52833924822490391df3dce3eec3a2ee7777acc5 ]

Distributor and worker threads rely on data structs in cache line
for synchronization. The shared data structs were not protected.
This caused deadlock issue on weaker memory ordering platforms as
aarch64.
Fix this issue by adding memory barriers to ensure synchronization
among cores.

Bugzilla ID: 342
Fixes: 775003ad2f96 ("distributor: add new burst-capable library")

Signed-off-by: Ruifeng Wang <ruifeng.wang at arm.com>
Reviewed-by: Gavin Hu <gavin.hu at arm.com>
Acked-by: David Hunt <david.hunt at intel.com>
---
 lib/librte_distributor/meson.build           |  5 ++
 lib/librte_distributor/rte_distributor.c     | 68 ++++++++++++++------
 lib/librte_distributor/rte_distributor_v20.c | 59 ++++++++++++-----
 3 files changed, 97 insertions(+), 35 deletions(-)

diff --git a/lib/librte_distributor/meson.build b/lib/librte_distributor/meson.build
index dba7e3b2a..26577dbc1 100644
--- a/lib/librte_distributor/meson.build
+++ b/lib/librte_distributor/meson.build
@@ -10,2 +10,7 @@ endif
 headers = files('rte_distributor.h')
 deps += ['mbuf']
+
+# for clang 32-bit compiles we need libatomic for 64-bit atomic ops
+if cc.get_id() == 'clang' and dpdk_conf.get('RTE_ARCH_64') == false
+	ext_deps += cc.find_library('atomic')
+endif
diff --git a/lib/librte_distributor/rte_distributor.c b/lib/librte_distributor/rte_distributor.c
index b60acdeed..62a763404 100644
--- a/lib/librte_distributor/rte_distributor.c
+++ b/lib/librte_distributor/rte_distributor.c
@@ -49,6 +49,9 @@ rte_distributor_request_pkt_v1705(struct rte_distributor *d,
 
 	retptr64 = &(buf->retptr64[0]);
-	/* Spin while handshake bits are set (scheduler clears it) */
-	while (unlikely(*retptr64 & RTE_DISTRIB_GET_BUF)) {
+	/* Spin while handshake bits are set (scheduler clears it).
+	 * Sync with worker on GET_BUF flag.
+	 */
+	while (unlikely(__atomic_load_n(retptr64, __ATOMIC_ACQUIRE)
+			& RTE_DISTRIB_GET_BUF)) {
 		rte_pause();
 		uint64_t t = rte_rdtsc()+100;
@@ -75,6 +78,8 @@ rte_distributor_request_pkt_v1705(struct rte_distributor *d,
 	 * Finally, set the GET_BUF  to signal to distributor that cache
 	 * line is ready for processing
+	 * Sync with distributor to release retptrs
 	 */
-	*retptr64 |= RTE_DISTRIB_GET_BUF;
+	__atomic_store_n(retptr64, *retptr64 | RTE_DISTRIB_GET_BUF,
+			__ATOMIC_RELEASE);
 }
 BIND_DEFAULT_SYMBOL(rte_distributor_request_pkt, _v1705, 17.05);
@@ -98,6 +103,9 @@ rte_distributor_poll_pkt_v1705(struct rte_distributor *d,
 	}
 
-	/* If bit is set, return */
-	if (buf->bufptr64[0] & RTE_DISTRIB_GET_BUF)
+	/* If bit is set, return
+	 * Sync with distributor to acquire bufptrs
+	 */
+	if (__atomic_load_n(&(buf->bufptr64[0]), __ATOMIC_ACQUIRE)
+		& RTE_DISTRIB_GET_BUF)
 		return -1;
 
@@ -114,6 +122,8 @@ rte_distributor_poll_pkt_v1705(struct rte_distributor *d,
 	 * mbuf pointers, so toggle the bit so scheduler can start working
 	 * on the next cacheline while we're working.
+	 * Sync with distributor on GET_BUF flag. Release bufptrs.
 	 */
-	buf->bufptr64[0] |= RTE_DISTRIB_GET_BUF;
+	__atomic_store_n(&(buf->bufptr64[0]),
+		buf->bufptr64[0] | RTE_DISTRIB_GET_BUF, __ATOMIC_RELEASE);
 
 	return count;
@@ -174,4 +184,6 @@ rte_distributor_return_pkt_v1705(struct rte_distributor *d,
 	}
 
+	/* Sync with distributor to acquire retptrs */
+	__atomic_thread_fence(__ATOMIC_ACQUIRE);
 	for (i = 0; i < RTE_DIST_BURST_SIZE; i++)
 		/* Switch off the return bit first */
@@ -182,6 +194,9 @@ rte_distributor_return_pkt_v1705(struct rte_distributor *d,
 			RTE_DISTRIB_FLAG_BITS) | RTE_DISTRIB_RETURN_BUF;
 
-	/* set the GET_BUF but even if we got no returns */
-	buf->retptr64[0] |= RTE_DISTRIB_GET_BUF;
+	/* set the GET_BUF but even if we got no returns.
+	 * Sync with distributor on GET_BUF flag. Release retptrs.
+	 */
+	__atomic_store_n(&(buf->retptr64[0]),
+		buf->retptr64[0] | RTE_DISTRIB_GET_BUF, __ATOMIC_RELEASE);
 
 	return 0;
@@ -273,5 +288,7 @@ handle_returns(struct rte_distributor *d, unsigned int wkr)
 	unsigned int i;
 
-	if (buf->retptr64[0] & RTE_DISTRIB_GET_BUF) {
+	/* Sync on GET_BUF flag. Acquire retptrs. */
+	if (__atomic_load_n(&(buf->retptr64[0]), __ATOMIC_ACQUIRE)
+		& RTE_DISTRIB_GET_BUF) {
 		for (i = 0; i < RTE_DIST_BURST_SIZE; i++) {
 			if (buf->retptr64[i] & RTE_DISTRIB_RETURN_BUF) {
@@ -286,6 +303,8 @@ handle_returns(struct rte_distributor *d, unsigned int wkr)
 		d->returns.start = ret_start;
 		d->returns.count = ret_count;
-		/* Clear for the worker to populate with more returns */
-		buf->retptr64[0] = 0;
+		/* Clear for the worker to populate with more returns.
+		 * Sync with distributor on GET_BUF flag. Release retptrs.
+		 */
+		__atomic_store_n(&(buf->retptr64[0]), 0, __ATOMIC_RELEASE);
 	}
 	return count;
@@ -307,5 +326,7 @@ release(struct rte_distributor *d, unsigned int wkr)
 	unsigned int i;
 
-	while (!(d->bufs[wkr].bufptr64[0] & RTE_DISTRIB_GET_BUF))
+	/* Sync with worker on GET_BUF flag */
+	while (!(__atomic_load_n(&(d->bufs[wkr].bufptr64[0]), __ATOMIC_ACQUIRE)
+		& RTE_DISTRIB_GET_BUF))
 		rte_pause();
 
@@ -327,6 +348,9 @@ release(struct rte_distributor *d, unsigned int wkr)
 	d->backlog[wkr].count = 0;
 
-	/* Clear the GET bit */
-	buf->bufptr64[0] &= ~RTE_DISTRIB_GET_BUF;
+	/* Clear the GET bit.
+	 * Sync with worker on GET_BUF flag. Release bufptrs.
+	 */
+	__atomic_store_n(&(buf->bufptr64[0]),
+		buf->bufptr64[0] & ~RTE_DISTRIB_GET_BUF, __ATOMIC_RELEASE);
 	return  buf->count;
 
@@ -355,5 +379,7 @@ rte_distributor_process_v1705(struct rte_distributor *d,
 		/* Flush out all non-full cache-lines to workers. */
 		for (wid = 0 ; wid < d->num_workers; wid++) {
-			if (d->bufs[wid].bufptr64[0] & RTE_DISTRIB_GET_BUF) {
+			/* Sync with worker on GET_BUF flag. */
+			if (__atomic_load_n(&(d->bufs[wid].bufptr64[0]),
+				__ATOMIC_ACQUIRE) & RTE_DISTRIB_GET_BUF) {
 				release(d, wid);
 				handle_returns(d, wid);
@@ -367,5 +393,7 @@ rte_distributor_process_v1705(struct rte_distributor *d,
 		unsigned int pkts;
 
-		if (d->bufs[wkr].bufptr64[0] & RTE_DISTRIB_GET_BUF)
+		/* Sync with worker on GET_BUF flag. */
+		if (__atomic_load_n(&(d->bufs[wkr].bufptr64[0]),
+			__ATOMIC_ACQUIRE) & RTE_DISTRIB_GET_BUF)
 			d->bufs[wkr].count = 0;
 
@@ -465,5 +493,7 @@ rte_distributor_process_v1705(struct rte_distributor *d,
 	/* Flush out all non-full cache-lines to workers. */
 	for (wid = 0 ; wid < d->num_workers; wid++)
-		if ((d->bufs[wid].bufptr64[0] & RTE_DISTRIB_GET_BUF))
+		/* Sync with worker on GET_BUF flag. */
+		if ((__atomic_load_n(&(d->bufs[wid].bufptr64[0]),
+			__ATOMIC_ACQUIRE) & RTE_DISTRIB_GET_BUF))
 			release(d, wid);
 
@@ -574,5 +604,7 @@ rte_distributor_clear_returns_v1705(struct rte_distributor *d)
 	/* throw away returns, so workers can exit */
 	for (wkr = 0; wkr < d->num_workers; wkr++)
-		d->bufs[wkr].retptr64[0] = 0;
+		/* Sync with worker. Release retptrs. */
+		__atomic_store_n(&(d->bufs[wkr].retptr64[0]), 0,
+				__ATOMIC_RELEASE);
 }
 BIND_DEFAULT_SYMBOL(rte_distributor_clear_returns, _v1705, 17.05);
diff --git a/lib/librte_distributor/rte_distributor_v20.c b/lib/librte_distributor/rte_distributor_v20.c
index 9566b53f2..35adc8ea8 100644
--- a/lib/librte_distributor/rte_distributor_v20.c
+++ b/lib/librte_distributor/rte_distributor_v20.c
@@ -34,7 +34,10 @@ rte_distributor_request_pkt_v20(struct rte_distributor_v20 *d,
 	int64_t req = (((int64_t)(uintptr_t)oldpkt) << RTE_DISTRIB_FLAG_BITS)
 			| RTE_DISTRIB_GET_BUF;
-	while (unlikely(buf->bufptr64 & RTE_DISTRIB_FLAGS_MASK))
+	while (unlikely(__atomic_load_n(&buf->bufptr64, __ATOMIC_RELAXED)
+			& RTE_DISTRIB_FLAGS_MASK))
 		rte_pause();
-	buf->bufptr64 = req;
+
+	/* Sync with distributor on GET_BUF flag. */
+	__atomic_store_n(&(buf->bufptr64), req, __ATOMIC_RELEASE);
 }
 VERSION_SYMBOL(rte_distributor_request_pkt, _v20, 2.0);
@@ -45,5 +48,7 @@ rte_distributor_poll_pkt_v20(struct rte_distributor_v20 *d,
 {
 	union rte_distributor_buffer_v20 *buf = &d->bufs[worker_id];
-	if (buf->bufptr64 & RTE_DISTRIB_GET_BUF)
+	/* Sync with distributor. Acquire bufptr64. */
+	if (__atomic_load_n(&buf->bufptr64, __ATOMIC_ACQUIRE)
+		& RTE_DISTRIB_GET_BUF)
 		return NULL;
 
@@ -73,5 +78,6 @@ rte_distributor_return_pkt_v20(struct rte_distributor_v20 *d,
 	uint64_t req = (((int64_t)(uintptr_t)oldpkt) << RTE_DISTRIB_FLAG_BITS)
 			| RTE_DISTRIB_RETURN_BUF;
-	buf->bufptr64 = req;
+	/* Sync with distributor on RETURN_BUF flag. */
+	__atomic_store_n(&(buf->bufptr64), req, __ATOMIC_RELEASE);
 	return 0;
 }
@@ -117,5 +123,6 @@ handle_worker_shutdown(struct rte_distributor_v20 *d, unsigned int wkr)
 	d->in_flight_tags[wkr] = 0;
 	d->in_flight_bitmask &= ~(1UL << wkr);
-	d->bufs[wkr].bufptr64 = 0;
+	/* Sync with worker. Release bufptr64. */
+	__atomic_store_n(&(d->bufs[wkr].bufptr64), 0, __ATOMIC_RELEASE);
 	if (unlikely(d->backlog[wkr].count != 0)) {
 		/* On return of a packet, we need to move the
@@ -161,15 +168,21 @@ process_returns(struct rte_distributor_v20 *d)
 
 	for (wkr = 0; wkr < d->num_workers; wkr++) {
-
-		const int64_t data = d->bufs[wkr].bufptr64;
 		uintptr_t oldbuf = 0;
+		/* Sync with worker. Acquire bufptr64. */
+		const int64_t data = __atomic_load_n(&(d->bufs[wkr].bufptr64),
+							__ATOMIC_ACQUIRE);
 
 		if (data & RTE_DISTRIB_GET_BUF) {
 			flushed++;
 			if (d->backlog[wkr].count)
-				d->bufs[wkr].bufptr64 =
-						backlog_pop(&d->backlog[wkr]);
+				/* Sync with worker. Release bufptr64. */
+				__atomic_store_n(&(d->bufs[wkr].bufptr64),
+					backlog_pop(&d->backlog[wkr]),
+					__ATOMIC_RELEASE);
 			else {
-				d->bufs[wkr].bufptr64 = RTE_DISTRIB_GET_BUF;
+				/* Sync with worker on GET_BUF flag. */
+				__atomic_store_n(&(d->bufs[wkr].bufptr64),
+					RTE_DISTRIB_GET_BUF,
+					__ATOMIC_RELEASE);
 				d->in_flight_tags[wkr] = 0;
 				d->in_flight_bitmask &= ~(1UL << wkr);
@@ -207,7 +220,8 @@ rte_distributor_process_v20(struct rte_distributor_v20 *d,
 
 	while (next_idx < num_mbufs || next_mb != NULL) {
-
-		int64_t data = d->bufs[wkr].bufptr64;
 		uintptr_t oldbuf = 0;
+		/* Sync with worker. Acquire bufptr64. */
+		int64_t data = __atomic_load_n(&(d->bufs[wkr].bufptr64),
+						__ATOMIC_ACQUIRE);
 
 		if (!next_mb) {
@@ -255,9 +269,14 @@ rte_distributor_process_v20(struct rte_distributor_v20 *d,
 
 			if (d->backlog[wkr].count)
-				d->bufs[wkr].bufptr64 =
-						backlog_pop(&d->backlog[wkr]);
+				/* Sync with worker. Release bufptr64. */
+				__atomic_store_n(&(d->bufs[wkr].bufptr64),
+						backlog_pop(&d->backlog[wkr]),
+						__ATOMIC_RELEASE);
 
 			else {
-				d->bufs[wkr].bufptr64 = next_value;
+				/* Sync with worker. Release bufptr64.  */
+				__atomic_store_n(&(d->bufs[wkr].bufptr64),
+						next_value,
+						__ATOMIC_RELEASE);
 				d->in_flight_tags[wkr] = new_tag;
 				d->in_flight_bitmask |= (1UL << wkr);
@@ -280,11 +299,17 @@ rte_distributor_process_v20(struct rte_distributor_v20 *d,
 	for (wkr = 0; wkr < d->num_workers; wkr++)
 		if (d->backlog[wkr].count &&
-				(d->bufs[wkr].bufptr64 & RTE_DISTRIB_GET_BUF)) {
+				/* Sync with worker. Acquire bufptr64. */
+				(__atomic_load_n(&(d->bufs[wkr].bufptr64),
+				__ATOMIC_ACQUIRE) & RTE_DISTRIB_GET_BUF)) {
 
 			int64_t oldbuf = d->bufs[wkr].bufptr64 >>
 					RTE_DISTRIB_FLAG_BITS;
+
 			store_return(oldbuf, d, &ret_start, &ret_count);
 
-			d->bufs[wkr].bufptr64 = backlog_pop(&d->backlog[wkr]);
+			/* Sync with worker. Release bufptr64. */
+			__atomic_store_n(&(d->bufs[wkr].bufptr64),
+				backlog_pop(&d->backlog[wkr]),
+				__ATOMIC_RELEASE);
 		}
 
-- 
2.21.0

---
  Diff of the applied patch vs upstream commit (please double-check if non-empty:
---
--- -	2019-12-03 17:29:54.758306253 +0000
+++ 0052-lib-distributor-fix-deadlock-on-aarch64.patch	2019-12-03 17:29:51.776749412 +0000
@@ -1 +1 @@
-From 52833924822490391df3dce3eec3a2ee7777acc5 Mon Sep 17 00:00:00 2001
+From 01b5ea180ab7e79a3a5524f2b4bef3a3cd0c25f3 Mon Sep 17 00:00:00 2001
@@ -5,0 +6,2 @@
+[ upstream commit 52833924822490391df3dce3eec3a2ee7777acc5 ]
+
@@ -15 +16,0 @@
-Cc: stable at dpdk.org
@@ -39 +40 @@
-index 21eb1fb0a..0a03625c9 100644
+index b60acdeed..62a763404 100644
@@ -42 +43 @@
-@@ -50,6 +50,9 @@ rte_distributor_request_pkt_v1705(struct rte_distributor *d,
+@@ -49,6 +49,9 @@ rte_distributor_request_pkt_v1705(struct rte_distributor *d,
@@ -54 +55 @@
-@@ -76,6 +79,8 @@ rte_distributor_request_pkt_v1705(struct rte_distributor *d,
+@@ -75,6 +78,8 @@ rte_distributor_request_pkt_v1705(struct rte_distributor *d,
@@ -64 +65 @@
-@@ -99,6 +104,9 @@ rte_distributor_poll_pkt_v1705(struct rte_distributor *d,
+@@ -98,6 +103,9 @@ rte_distributor_poll_pkt_v1705(struct rte_distributor *d,
@@ -76 +77 @@
-@@ -115,6 +123,8 @@ rte_distributor_poll_pkt_v1705(struct rte_distributor *d,
+@@ -114,6 +122,8 @@ rte_distributor_poll_pkt_v1705(struct rte_distributor *d,
@@ -86 +87 @@
-@@ -175,4 +185,6 @@ rte_distributor_return_pkt_v1705(struct rte_distributor *d,
+@@ -174,4 +184,6 @@ rte_distributor_return_pkt_v1705(struct rte_distributor *d,
@@ -93 +94 @@
-@@ -183,6 +195,9 @@ rte_distributor_return_pkt_v1705(struct rte_distributor *d,
+@@ -182,6 +194,9 @@ rte_distributor_return_pkt_v1705(struct rte_distributor *d,
@@ -105 +106 @@
-@@ -274,5 +289,7 @@ handle_returns(struct rte_distributor *d, unsigned int wkr)
+@@ -273,5 +288,7 @@ handle_returns(struct rte_distributor *d, unsigned int wkr)
@@ -114 +115 @@
-@@ -287,6 +304,8 @@ handle_returns(struct rte_distributor *d, unsigned int wkr)
+@@ -286,6 +303,8 @@ handle_returns(struct rte_distributor *d, unsigned int wkr)
@@ -125 +126 @@
-@@ -308,5 +327,7 @@ release(struct rte_distributor *d, unsigned int wkr)
+@@ -307,5 +326,7 @@ release(struct rte_distributor *d, unsigned int wkr)
@@ -134 +135 @@
-@@ -328,6 +349,9 @@ release(struct rte_distributor *d, unsigned int wkr)
+@@ -327,6 +348,9 @@ release(struct rte_distributor *d, unsigned int wkr)
@@ -146 +147 @@
-@@ -356,5 +380,7 @@ rte_distributor_process_v1705(struct rte_distributor *d,
+@@ -355,5 +379,7 @@ rte_distributor_process_v1705(struct rte_distributor *d,
@@ -155 +156 @@
-@@ -368,5 +394,7 @@ rte_distributor_process_v1705(struct rte_distributor *d,
+@@ -367,5 +393,7 @@ rte_distributor_process_v1705(struct rte_distributor *d,
@@ -164 +165 @@
-@@ -466,5 +494,7 @@ rte_distributor_process_v1705(struct rte_distributor *d,
+@@ -465,5 +493,7 @@ rte_distributor_process_v1705(struct rte_distributor *d,
@@ -173 +174 @@
-@@ -575,5 +605,7 @@ rte_distributor_clear_returns_v1705(struct rte_distributor *d)
+@@ -574,5 +604,7 @@ rte_distributor_clear_returns_v1705(struct rte_distributor *d)
@@ -183 +184 @@
-index cdc0969a8..ef6d5cb4b 100644
+index 9566b53f2..35adc8ea8 100644
@@ -186 +187 @@
-@@ -35,7 +35,10 @@ rte_distributor_request_pkt_v20(struct rte_distributor_v20 *d,
+@@ -34,7 +34,10 @@ rte_distributor_request_pkt_v20(struct rte_distributor_v20 *d,
@@ -199 +200 @@
-@@ -46,5 +49,7 @@ rte_distributor_poll_pkt_v20(struct rte_distributor_v20 *d,
+@@ -45,5 +48,7 @@ rte_distributor_poll_pkt_v20(struct rte_distributor_v20 *d,
@@ -208 +209 @@
-@@ -74,5 +79,6 @@ rte_distributor_return_pkt_v20(struct rte_distributor_v20 *d,
+@@ -73,5 +78,6 @@ rte_distributor_return_pkt_v20(struct rte_distributor_v20 *d,
@@ -216 +217 @@
-@@ -118,5 +124,6 @@ handle_worker_shutdown(struct rte_distributor_v20 *d, unsigned int wkr)
+@@ -117,5 +123,6 @@ handle_worker_shutdown(struct rte_distributor_v20 *d, unsigned int wkr)
@@ -224 +225 @@
-@@ -162,15 +169,21 @@ process_returns(struct rte_distributor_v20 *d)
+@@ -161,15 +168,21 @@ process_returns(struct rte_distributor_v20 *d)
@@ -251 +252 @@
-@@ -208,7 +221,8 @@ rte_distributor_process_v20(struct rte_distributor_v20 *d,
+@@ -207,7 +220,8 @@ rte_distributor_process_v20(struct rte_distributor_v20 *d,
@@ -262 +263 @@
-@@ -256,9 +270,14 @@ rte_distributor_process_v20(struct rte_distributor_v20 *d,
+@@ -255,9 +269,14 @@ rte_distributor_process_v20(struct rte_distributor_v20 *d,
@@ -280 +281 @@
-@@ -281,11 +300,17 @@ rte_distributor_process_v20(struct rte_distributor_v20 *d,
+@@ -280,11 +299,17 @@ rte_distributor_process_v20(struct rte_distributor_v20 *d,



More information about the stable mailing list