patch 'ring: optimize corner case for enqueue/dequeue' has been queued to stable release 20.11.5

luca.boccassi at gmail.com luca.boccassi at gmail.com
Fri Feb 18 13:38:42 CET 2022


Hi,

FYI, your patch has been queued to stable release 20.11.5

Note it hasn't been pushed to http://dpdk.org/browse/dpdk-stable yet.
It will be pushed if I get no objections before 02/20/22. So please
shout if anyone has objections.

Also note that after the patch there's a diff of the upstream commit vs the
patch applied to the branch. This will indicate if there was any rebasing
needed to apply to the stable branch. If there were code changes for rebasing
(ie: not only metadata diffs), please double check that the rebase was
correctly done.

Queued patches are on a temporary branch at:
https://github.com/bluca/dpdk-stable

This queued commit can be viewed at:
https://github.com/bluca/dpdk-stable/commit/6c4c4398a4398fdc311496d5a5f6ae80c3179629

Thanks.

Luca Boccassi

---
>From 6c4c4398a4398fdc311496d5a5f6ae80c3179629 Mon Sep 17 00:00:00 2001
From: Andrzej Ostruszka <amo at semihalf.com>
Date: Tue, 11 Jan 2022 12:37:39 +0100
Subject: [PATCH] ring: optimize corner case for enqueue/dequeue
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

[ upstream commit 97ed4cb6fb324f4277ee754d4b6f3c7a0d96400b ]

When enqueueing/dequeueing to/from the ring we try to optimize by manual
loop unrolling.  The check for this optimization looks like:

	if (likely(idx + n < size)) {

where 'idx' points to the first usable element (empty slot for enqueue,
data for dequeue).  The correct comparison here should be '<=' instead
of '<'.

This is not a functional error since we fall back to the loop with
correct checks on indexes.  Just a minor suboptimal behaviour for the
case when we want to enqueue/dequeue exactly the number of elements that
we have in the ring before wrapping to its beginning.

Fixes: cc4b218790f6 ("ring: support configurable element size")
Fixes: 286bd05bf70d ("ring: optimisations")

Signed-off-by: Andrzej Ostruszka <amo at semihalf.com>
Reviewed-by: Olivier Matz <olivier.matz at 6wind.com>
Acked-by: Konstantin Ananyev <konstantin.ananyev at intel.com>
Reviewed-by: Morten Brørup <mb at smartsharesystems.com>
---
 lib/librte_ring/rte_ring_elem.h | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/lib/librte_ring/rte_ring_elem.h b/lib/librte_ring/rte_ring_elem.h
index 7034d29c07..0057da3597 100644
--- a/lib/librte_ring/rte_ring_elem.h
+++ b/lib/librte_ring/rte_ring_elem.h
@@ -112,7 +112,7 @@ __rte_ring_enqueue_elems_32(struct rte_ring *r, const uint32_t size,
 	unsigned int i;
 	uint32_t *ring = (uint32_t *)&r[1];
 	const uint32_t *obj = (const uint32_t *)obj_table;
-	if (likely(idx + n < size)) {
+	if (likely(idx + n <= size)) {
 		for (i = 0; i < (n & ~0x7); i += 8, idx += 8) {
 			ring[idx] = obj[i];
 			ring[idx + 1] = obj[i + 1];
@@ -157,7 +157,7 @@ __rte_ring_enqueue_elems_64(struct rte_ring *r, uint32_t prod_head,
 	uint32_t idx = prod_head & r->mask;
 	uint64_t *ring = (uint64_t *)&r[1];
 	const unaligned_uint64_t *obj = (const unaligned_uint64_t *)obj_table;
-	if (likely(idx + n < size)) {
+	if (likely(idx + n <= size)) {
 		for (i = 0; i < (n & ~0x3); i += 4, idx += 4) {
 			ring[idx] = obj[i];
 			ring[idx + 1] = obj[i + 1];
@@ -190,7 +190,7 @@ __rte_ring_enqueue_elems_128(struct rte_ring *r, uint32_t prod_head,
 	uint32_t idx = prod_head & r->mask;
 	rte_int128_t *ring = (rte_int128_t *)&r[1];
 	const rte_int128_t *obj = (const rte_int128_t *)obj_table;
-	if (likely(idx + n < size)) {
+	if (likely(idx + n <= size)) {
 		for (i = 0; i < (n & ~0x1); i += 2, idx += 2)
 			memcpy((void *)(ring + idx),
 				(const void *)(obj + i), 32);
@@ -246,7 +246,7 @@ __rte_ring_dequeue_elems_32(struct rte_ring *r, const uint32_t size,
 	unsigned int i;
 	uint32_t *ring = (uint32_t *)&r[1];
 	uint32_t *obj = (uint32_t *)obj_table;
-	if (likely(idx + n < size)) {
+	if (likely(idx + n <= size)) {
 		for (i = 0; i < (n & ~0x7); i += 8, idx += 8) {
 			obj[i] = ring[idx];
 			obj[i + 1] = ring[idx + 1];
@@ -291,7 +291,7 @@ __rte_ring_dequeue_elems_64(struct rte_ring *r, uint32_t prod_head,
 	uint32_t idx = prod_head & r->mask;
 	uint64_t *ring = (uint64_t *)&r[1];
 	unaligned_uint64_t *obj = (unaligned_uint64_t *)obj_table;
-	if (likely(idx + n < size)) {
+	if (likely(idx + n <= size)) {
 		for (i = 0; i < (n & ~0x3); i += 4, idx += 4) {
 			obj[i] = ring[idx];
 			obj[i + 1] = ring[idx + 1];
@@ -324,7 +324,7 @@ __rte_ring_dequeue_elems_128(struct rte_ring *r, uint32_t prod_head,
 	uint32_t idx = prod_head & r->mask;
 	rte_int128_t *ring = (rte_int128_t *)&r[1];
 	rte_int128_t *obj = (rte_int128_t *)obj_table;
-	if (likely(idx + n < size)) {
+	if (likely(idx + n <= size)) {
 		for (i = 0; i < (n & ~0x1); i += 2, idx += 2)
 			memcpy((void *)(obj + i), (void *)(ring + idx), 32);
 		switch (n & 0x1) {
-- 
2.30.2

---
  Diff of the applied patch vs upstream commit (please double-check if non-empty:
---
--- -	2022-02-18 12:37:40.766001111 +0000
+++ 0073-ring-optimize-corner-case-for-enqueue-dequeue.patch	2022-02-18 12:37:37.750793260 +0000
@@ -1 +1 @@
-From 97ed4cb6fb324f4277ee754d4b6f3c7a0d96400b Mon Sep 17 00:00:00 2001
+From 6c4c4398a4398fdc311496d5a5f6ae80c3179629 Mon Sep 17 00:00:00 2001
@@ -8,0 +9,2 @@
+[ upstream commit 97ed4cb6fb324f4277ee754d4b6f3c7a0d96400b ]
+
@@ -31 +33 @@
- lib/ring/rte_ring_elem_pvt.h | 12 ++++++------
+ lib/librte_ring/rte_ring_elem.h | 12 ++++++------
@@ -34,5 +36,5 @@
-diff --git a/lib/ring/rte_ring_elem_pvt.h b/lib/ring/rte_ring_elem_pvt.h
-index 275ec55393..83788c56e6 100644
---- a/lib/ring/rte_ring_elem_pvt.h
-+++ b/lib/ring/rte_ring_elem_pvt.h
-@@ -17,7 +17,7 @@ __rte_ring_enqueue_elems_32(struct rte_ring *r, const uint32_t size,
+diff --git a/lib/librte_ring/rte_ring_elem.h b/lib/librte_ring/rte_ring_elem.h
+index 7034d29c07..0057da3597 100644
+--- a/lib/librte_ring/rte_ring_elem.h
++++ b/lib/librte_ring/rte_ring_elem.h
+@@ -112,7 +112,7 @@ __rte_ring_enqueue_elems_32(struct rte_ring *r, const uint32_t size,
@@ -47 +49 @@
-@@ -62,7 +62,7 @@ __rte_ring_enqueue_elems_64(struct rte_ring *r, uint32_t prod_head,
+@@ -157,7 +157,7 @@ __rte_ring_enqueue_elems_64(struct rte_ring *r, uint32_t prod_head,
@@ -56 +58 @@
-@@ -95,7 +95,7 @@ __rte_ring_enqueue_elems_128(struct rte_ring *r, uint32_t prod_head,
+@@ -190,7 +190,7 @@ __rte_ring_enqueue_elems_128(struct rte_ring *r, uint32_t prod_head,
@@ -65 +67 @@
-@@ -151,7 +151,7 @@ __rte_ring_dequeue_elems_32(struct rte_ring *r, const uint32_t size,
+@@ -246,7 +246,7 @@ __rte_ring_dequeue_elems_32(struct rte_ring *r, const uint32_t size,
@@ -74 +76 @@
-@@ -196,7 +196,7 @@ __rte_ring_dequeue_elems_64(struct rte_ring *r, uint32_t prod_head,
+@@ -291,7 +291,7 @@ __rte_ring_dequeue_elems_64(struct rte_ring *r, uint32_t prod_head,
@@ -83 +85 @@
-@@ -229,7 +229,7 @@ __rte_ring_dequeue_elems_128(struct rte_ring *r, uint32_t prod_head,
+@@ -324,7 +324,7 @@ __rte_ring_dequeue_elems_128(struct rte_ring *r, uint32_t prod_head,


More information about the stable mailing list