[v2,2/2] virtio: one way barrier for packed vring desc used flags
Checks
Commit Message
In case VIRTIO_F_ORDER_PLATFORM(36) is not negotiated, then the
frontend and backend are assumed to be implemented in software,
that is they can run on identical CPUs in an SMP configuration.
Thus a weak form of memory barriers like rte_smp_r/wmb, other
than rte_cio_r/wmb, is sufficient for this case(vq->hw->weak_barriers
== 1) and yields better performance.
For the above case, this patch helps yielding even better performance
by replacing the two-way barriers with C11 one-way barriers.
Signed-off-by: Joyce Kong <joyce.kong@arm.com>
Reviewed-by: Gavin Hu <gavin.hu@arm.com>
Reviewed-by: Phil Yang <phil.yang@arm.com>
---
drivers/net/virtio/virtio_rxtx.c | 12 +++++++++---
drivers/net/virtio/virtio_user/virtio_user_dev.c | 4 ++--
drivers/net/virtio/virtqueue.h | 7 ++++++-
lib/librte_vhost/virtio_net.c | 5 ++---
4 files changed, 19 insertions(+), 9 deletions(-)
@@ -122,9 +122,11 @@ virtqueue_dequeue_burst_rx_packed(struct virtqueue *vq,
for (i = 0; i < num; i++) {
used_idx = vq->vq_used_cons_idx;
+ /* desc_is_used has a load-acquire or rte_cio_rmb inside
+ * and wait for used desc in virtqueue.
+ */
if (!desc_is_used(&desc[used_idx], vq))
return i;
- virtio_rmb(vq->hw->weak_barriers);
len[i] = desc[used_idx].len;
id = desc[used_idx].id;
cookie = (struct rte_mbuf *)vq->vq_descx[id].cookie;
@@ -233,8 +235,10 @@ virtio_xmit_cleanup_inorder_packed(struct virtqueue *vq, int num)
struct vq_desc_extra *dxp;
used_idx = vq->vq_used_cons_idx;
+ /* desc_is_used has a load-acquire or rte_cio_rmb inside
+ * and wait for used desc in virtqueue.
+ */
while (num > 0 && desc_is_used(&desc[used_idx], vq)) {
- virtio_rmb(vq->hw->weak_barriers);
id = desc[used_idx].id;
do {
curr_id = used_idx;
@@ -265,8 +269,10 @@ virtio_xmit_cleanup_normal_packed(struct virtqueue *vq, int num)
struct vq_desc_extra *dxp;
used_idx = vq->vq_used_cons_idx;
+ /* desc_is_used has a load-acquire or rte_cio_rmb inside
+ * and wait for used desc in virtqueue.
+ */
while (num-- && desc_is_used(&desc[used_idx], vq)) {
- virtio_rmb(vq->hw->weak_barriers);
id = desc[used_idx].id;
dxp = &vq->vq_descx[id];
vq->vq_used_cons_idx += dxp->ndescs;
@@ -698,8 +698,8 @@ virtio_user_handle_cq_packed(struct virtio_user_dev *dev, uint16_t queue_idx)
if (vq->used_wrap_counter)
flags |= VRING_PACKED_DESC_F_AVAIL_USED;
- rte_smp_wmb();
- vring->desc[vq->used_idx].flags = flags;
+ __atomic_store_n(&vring->desc[vq->used_idx].flags, flags,
+ __ATOMIC_RELEASE);
vq->used_idx += n_descs;
if (vq->used_idx >= dev->queue_size) {
@@ -286,7 +286,12 @@ desc_is_used(struct vring_packed_desc *desc, struct virtqueue *vq)
{
uint16_t used, avail, flags;
- flags = desc->flags;
+ if (vq->hw->weak_barriers)
+ flags = __atomic_load_n(&desc->flags, __ATOMIC_ACQUIRE);
+ else {
+ flags = desc->flags;
+ rte_cio_rmb();
+ }
used = !!(flags & VRING_PACKED_DESC_F_USED);
avail = !!(flags & VRING_PACKED_DESC_F_AVAIL);
@@ -110,8 +110,6 @@ flush_shadow_used_ring_packed(struct virtio_net *dev,
used_idx -= vq->size;
}
- rte_smp_wmb();
-
for (i = 0; i < vq->shadow_used_idx; i++) {
uint16_t flags;
@@ -147,7 +145,8 @@ flush_shadow_used_ring_packed(struct virtio_net *dev,
}
}
- vq->desc_packed[head_idx].flags = head_flags;
+ __atomic_store_n(&vq->desc_packed[head_idx].flags, head_flags,
+ __ATOMIC_RELEASE);
vhost_log_cache_used_vring(dev, vq,
head_idx *