[RFC] net/virtio: use real barriers for vDPA

Message ID 20181214123704.20110-1-i.maximets@samsung.com (mailing list archive)
State Superseded, archived
Delegated to: Maxime Coquelin
Headers
Series [RFC] net/virtio: use real barriers for vDPA |

Checks

Context Check Description
ci/checkpatch success coding style OK
ci/Intel-compilation success Compilation OK

Commit Message

Ilya Maximets Dec. 14, 2018, 12:37 p.m. UTC
  SMP barriers are OK for software vhost implementation, but vDPA is a
DMA capable hardware and we have to use at least DMA barriers to
ensure the memory ordering.

This patch mimics the kernel virtio-net driver in part of choosing
barriers we need.

Fixes: a3f8150eac6d ("net/ifcvf: add ifcvf vDPA driver")
Cc: stable@dpdk.org

Signed-off-by: Ilya Maximets <i.maximets@samsung.com>
---

Sending as RFC, because the patch is completely not tested. I heve no
HW to test the real behaviour. And I actually do not know if the
subsystem_vendor/device_id's are available at the time and has
IFCVF_SUBSYS_* values inside the real guest system.

One more thing I want to mention that cross net client Live Migration
is actually not possible without any support from the guest driver.
Because migration from the software vhost to vDPA will lead to using
weaker barriers than required.

The similar change (weak_barriers = false) should be made in the linux
kernel virtio-net driver.

Another dirty solution could be to restrict the vDPA support to x86
systems only.

 drivers/net/virtio/virtio_ethdev.c      |  9 +++++++
 drivers/net/virtio/virtio_pci.h         |  1 +
 drivers/net/virtio/virtio_rxtx.c        | 14 +++++-----
 drivers/net/virtio/virtio_user_ethdev.c |  1 +
 drivers/net/virtio/virtqueue.h          | 35 +++++++++++++++++++++----
 5 files changed, 48 insertions(+), 12 deletions(-)
  

Comments

Michael S. Tsirkin Dec. 14, 2018, 12:58 p.m. UTC | #1
On Fri, Dec 14, 2018 at 03:37:04PM +0300, Ilya Maximets wrote:
> SMP barriers are OK for software vhost implementation, but vDPA is a
> DMA capable hardware and we have to use at least DMA barriers to
> ensure the memory ordering.
> 
> This patch mimics the kernel virtio-net driver in part of choosing
> barriers we need.
> 
> Fixes: a3f8150eac6d ("net/ifcvf: add ifcvf vDPA driver")
> Cc: stable@dpdk.org
> 
> Signed-off-by: Ilya Maximets <i.maximets@samsung.com>

Instead of this vendor specific hack, can you please
use VIRTIO_F_ORDER_PLATFORM?


> ---
> 
> Sending as RFC, because the patch is completely not tested. I heve no
> HW to test the real behaviour. And I actually do not know if the
> subsystem_vendor/device_id's are available at the time and has
> IFCVF_SUBSYS_* values inside the real guest system.
> 
> One more thing I want to mention that cross net client Live Migration
> is actually not possible without any support from the guest driver.
> Because migration from the software vhost to vDPA will lead to using
> weaker barriers than required.
> 
> The similar change (weak_barriers = false) should be made in the linux
> kernel virtio-net driver.

IMHO no, it should use VIRTIO_F_ORDER_PLATFORM.

> Another dirty solution could be to restrict the vDPA support to x86
> systems only.
> 
>  drivers/net/virtio/virtio_ethdev.c      |  9 +++++++
>  drivers/net/virtio/virtio_pci.h         |  1 +
>  drivers/net/virtio/virtio_rxtx.c        | 14 +++++-----
>  drivers/net/virtio/virtio_user_ethdev.c |  1 +
>  drivers/net/virtio/virtqueue.h          | 35 +++++++++++++++++++++----
>  5 files changed, 48 insertions(+), 12 deletions(-)
> 
> diff --git a/drivers/net/virtio/virtio_ethdev.c b/drivers/net/virtio/virtio_ethdev.c
> index cb2b2e0bf..249b536fd 100644
> --- a/drivers/net/virtio/virtio_ethdev.c
> +++ b/drivers/net/virtio/virtio_ethdev.c
> @@ -1448,6 +1448,9 @@ virtio_configure_intr(struct rte_eth_dev *dev)
>  	return 0;
>  }
>  
> +#define IFCVF_SUBSYS_VENDOR_ID	0x8086
> +#define IFCVF_SUBSYS_DEVICE_ID	0x001A
> +
>  /* reset device and renegotiate features if needed */
>  static int
>  virtio_init_device(struct rte_eth_dev *eth_dev, uint64_t req_features)
> @@ -1477,6 +1480,12 @@ virtio_init_device(struct rte_eth_dev *eth_dev, uint64_t req_features)
>  	if (!hw->virtio_user_dev) {
>  		pci_dev = RTE_ETH_DEV_TO_PCI(eth_dev);
>  		rte_eth_copy_pci_info(eth_dev, pci_dev);
> +
> +		if (pci_dev->id.subsystem_vendor_id == IFCVF_SUBSYS_VENDOR_ID &&
> +		    pci_dev->id.subsystem_device_id == IFCVF_SUBSYS_DEVICE_ID)
> +			hw->weak_barriers = 0;
> +		else
> +			hw->weak_barriers = 1;
>  	}
>  
>  	/* If host does not support both status and MSI-X then disable LSC */
> diff --git a/drivers/net/virtio/virtio_pci.h b/drivers/net/virtio/virtio_pci.h
> index e961a58ca..1f8e719a9 100644
> --- a/drivers/net/virtio/virtio_pci.h
> +++ b/drivers/net/virtio/virtio_pci.h
> @@ -240,6 +240,7 @@ struct virtio_hw {
>  	uint8_t     use_simple_rx;
>  	uint8_t     use_inorder_rx;
>  	uint8_t     use_inorder_tx;
> +	uint8_t     weak_barriers;
>  	bool        has_tx_offload;
>  	bool        has_rx_offload;
>  	uint16_t    port_id;
> diff --git a/drivers/net/virtio/virtio_rxtx.c b/drivers/net/virtio/virtio_rxtx.c
> index cb8f89f18..66195bf47 100644
> --- a/drivers/net/virtio/virtio_rxtx.c
> +++ b/drivers/net/virtio/virtio_rxtx.c
> @@ -906,7 +906,7 @@ virtio_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t nb_pkts)
>  
>  	nb_used = VIRTQUEUE_NUSED(vq);
>  
> -	virtio_rmb();
> +	virtio_rmb(hw->weak_barriers);
>  
>  	num = likely(nb_used <= nb_pkts) ? nb_used : nb_pkts;
>  	if (unlikely(num > VIRTIO_MBUF_BURST_SZ))
> @@ -1017,7 +1017,7 @@ virtio_recv_mergeable_pkts_inorder(void *rx_queue,
>  	nb_used = RTE_MIN(nb_used, nb_pkts);
>  	nb_used = RTE_MIN(nb_used, VIRTIO_MBUF_BURST_SZ);
>  
> -	virtio_rmb();
> +	virtio_rmb(hw->weak_barriers);
>  
>  	PMD_RX_LOG(DEBUG, "used:%d", nb_used);
>  
> @@ -1202,7 +1202,7 @@ virtio_recv_mergeable_pkts(void *rx_queue,
>  
>  	nb_used = VIRTQUEUE_NUSED(vq);
>  
> -	virtio_rmb();
> +	virtio_rmb(hw->weak_barriers);
>  
>  	PMD_RX_LOG(DEBUG, "used:%d", nb_used);
>  
> @@ -1365,7 +1365,7 @@ virtio_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t nb_pkts)
>  	PMD_TX_LOG(DEBUG, "%d packets to xmit", nb_pkts);
>  	nb_used = VIRTQUEUE_NUSED(vq);
>  
> -	virtio_rmb();
> +	virtio_rmb(hw->weak_barriers);
>  	if (likely(nb_used > vq->vq_nentries - vq->vq_free_thresh))
>  		virtio_xmit_cleanup(vq, nb_used);
>  
> @@ -1407,7 +1407,7 @@ virtio_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t nb_pkts)
>  		/* Positive value indicates it need free vring descriptors */
>  		if (unlikely(need > 0)) {
>  			nb_used = VIRTQUEUE_NUSED(vq);
> -			virtio_rmb();
> +			virtio_rmb(hw->weak_barriers);
>  			need = RTE_MIN(need, (int)nb_used);
>  
>  			virtio_xmit_cleanup(vq, need);
> @@ -1463,7 +1463,7 @@ virtio_xmit_pkts_inorder(void *tx_queue,
>  	PMD_TX_LOG(DEBUG, "%d packets to xmit", nb_pkts);
>  	nb_used = VIRTQUEUE_NUSED(vq);
>  
> -	virtio_rmb();
> +	virtio_rmb(hw->weak_barriers);
>  	if (likely(nb_used > vq->vq_nentries - vq->vq_free_thresh))
>  		virtio_xmit_cleanup_inorder(vq, nb_used);
>  
> @@ -1511,7 +1511,7 @@ virtio_xmit_pkts_inorder(void *tx_queue,
>  		need = slots - vq->vq_free_cnt;
>  		if (unlikely(need > 0)) {
>  			nb_used = VIRTQUEUE_NUSED(vq);
> -			virtio_rmb();
> +			virtio_rmb(hw->weak_barriers);
>  			need = RTE_MIN(need, (int)nb_used);
>  
>  			virtio_xmit_cleanup_inorder(vq, need);
> diff --git a/drivers/net/virtio/virtio_user_ethdev.c b/drivers/net/virtio/virtio_user_ethdev.c
> index f8791391a..f075774b4 100644
> --- a/drivers/net/virtio/virtio_user_ethdev.c
> +++ b/drivers/net/virtio/virtio_user_ethdev.c
> @@ -434,6 +434,7 @@ virtio_user_eth_dev_alloc(struct rte_vdev_device *vdev)
>  	hw->use_simple_rx = 0;
>  	hw->use_inorder_rx = 0;
>  	hw->use_inorder_tx = 0;
> +	hw->weak_barriers = 1;
>  	hw->virtio_user_dev = dev;
>  	return eth_dev;
>  }
> diff --git a/drivers/net/virtio/virtqueue.h b/drivers/net/virtio/virtqueue.h
> index 26518ed98..7bf17d3bf 100644
> --- a/drivers/net/virtio/virtqueue.h
> +++ b/drivers/net/virtio/virtqueue.h
> @@ -19,15 +19,40 @@
>  struct rte_mbuf;
>  
>  /*
> - * Per virtio_config.h in Linux.
> + * Per virtio_ring.h in Linux.
>   *     For virtio_pci on SMP, we don't need to order with respect to MMIO
>   *     accesses through relaxed memory I/O windows, so smp_mb() et al are
>   *     sufficient.
>   *
> + *     For using virtio to talk to real devices (eg. vDPA) we do need real
> + *     barriers.
>   */
> -#define virtio_mb()	rte_smp_mb()
> -#define virtio_rmb()	rte_smp_rmb()
> -#define virtio_wmb()	rte_smp_wmb()
> +static inline void
> +virtio_mb(uint8_t weak_barriers)
> +{
> +	if (weak_barriers)
> +		rte_smp_mb();
> +	else
> +		rte_mb();
> +}
> +
> +static inline void
> +virtio_rmb(uint8_t weak_barriers)
> +{
> +	if (weak_barriers)
> +		rte_smp_rmb();
> +	else
> +		rte_cio_rmb();
> +}
> +
> +static inline void
> +virtio_wmb(uint8_t weak_barriers)
> +{
> +	if (weak_barriers)
> +		rte_smp_wmb();
> +	else
> +		rte_cio_wmb();
> +}
>  
>  #ifdef RTE_PMD_PACKET_PREFETCH
>  #define rte_packet_prefetch(p)  rte_prefetch1(p)


Note kernel uses dma_ barriers not full barriers.
I don't know whether it applies to dpdk.

> @@ -312,7 +337,7 @@ void vq_ring_free_inorder(struct virtqueue *vq, uint16_t desc_idx,
>  static inline void
>  vq_update_avail_idx(struct virtqueue *vq)
>  {
> -	virtio_wmb();
> +	virtio_wmb(vq->hw->weak_barriers);
>  	vq->vq_ring.avail->idx = vq->vq_avail_idx;
>  }
>  
> -- 
> 2.17.1
  
Ilya Maximets Dec. 14, 2018, 2:03 p.m. UTC | #2
On 14.12.2018 15:58, Michael S. Tsirkin wrote:
> On Fri, Dec 14, 2018 at 03:37:04PM +0300, Ilya Maximets wrote:
>> SMP barriers are OK for software vhost implementation, but vDPA is a
>> DMA capable hardware and we have to use at least DMA barriers to
>> ensure the memory ordering.
>>
>> This patch mimics the kernel virtio-net driver in part of choosing
>> barriers we need.
>>
>> Fixes: a3f8150eac6d ("net/ifcvf: add ifcvf vDPA driver")
>> Cc: stable@dpdk.org
>>
>> Signed-off-by: Ilya Maximets <i.maximets@samsung.com>
> 
> Instead of this vendor specific hack, can you please
> use VIRTIO_F_ORDER_PLATFORM?

Hmm. Yes. You're probably right.
But VIRTIO_F_ORDER_PLATFORM a.k.a. VIRTIO_F_IO_BARRIER is not implemented
anywhere at the time. This will require changes to the whole stack (vhost,
qemu, virtio drivers) to allow the negotiation. Moreover, I guess that it's
not offered/correctly treated by the current vDPA HW i.e. it allows to work
with weak barriers (It allowed to do that by the spec, but I don't think that
it's right). This vendor specific hack could allow to work right now.

Actually, I'm not sure if vDPA is usable with current QEMU ?
Does anybody know ? If it's usable, than having a hack could be acceptable
for now. If not, of course, we could drop this and implement virtio native
solution.

But yes, I agree that virtio native solution should be done anyway regardless
of having or not having hacks like this.

Maybe I can split the patch in two:
1. Add ability to use real barriers. ('weak_barriers' always 'true')
2. Hack to initialize 'weak_barriers' according to subsystem_device_id.

The first patch will be needed anyway. The second one could be dropped or
applied and replaced in the future by proper solution with VIRTIO_F_ORDER_PLATFORM.

Thoughts ?

One more thing I have concerns about is that vDPA offers support for earlier
versions of virtio protocol, which looks strange.

> 
> 
>> ---
>>
>> Sending as RFC, because the patch is completely not tested. I heve no
>> HW to test the real behaviour. And I actually do not know if the
>> subsystem_vendor/device_id's are available at the time and has
>> IFCVF_SUBSYS_* values inside the real guest system.
>>
>> One more thing I want to mention that cross net client Live Migration
>> is actually not possible without any support from the guest driver.
>> Because migration from the software vhost to vDPA will lead to using
>> weaker barriers than required.
>>
>> The similar change (weak_barriers = false) should be made in the linux
>> kernel virtio-net driver.
> 
> IMHO no, it should use VIRTIO_F_ORDER_PLATFORM.

Sure. I'm not a fan of hacks too, especially inside kernel.

> 
>> Another dirty solution could be to restrict the vDPA support to x86
>> systems only.
>>
>>  drivers/net/virtio/virtio_ethdev.c      |  9 +++++++
>>  drivers/net/virtio/virtio_pci.h         |  1 +
>>  drivers/net/virtio/virtio_rxtx.c        | 14 +++++-----
>>  drivers/net/virtio/virtio_user_ethdev.c |  1 +
>>  drivers/net/virtio/virtqueue.h          | 35 +++++++++++++++++++++----
>>  5 files changed, 48 insertions(+), 12 deletions(-)
>>
>> diff --git a/drivers/net/virtio/virtio_ethdev.c b/drivers/net/virtio/virtio_ethdev.c
>> index cb2b2e0bf..249b536fd 100644
>> --- a/drivers/net/virtio/virtio_ethdev.c
>> +++ b/drivers/net/virtio/virtio_ethdev.c
>> @@ -1448,6 +1448,9 @@ virtio_configure_intr(struct rte_eth_dev *dev)
>>  	return 0;
>>  }
>>  
>> +#define IFCVF_SUBSYS_VENDOR_ID	0x8086
>> +#define IFCVF_SUBSYS_DEVICE_ID	0x001A
>> +
>>  /* reset device and renegotiate features if needed */
>>  static int
>>  virtio_init_device(struct rte_eth_dev *eth_dev, uint64_t req_features)
>> @@ -1477,6 +1480,12 @@ virtio_init_device(struct rte_eth_dev *eth_dev, uint64_t req_features)
>>  	if (!hw->virtio_user_dev) {
>>  		pci_dev = RTE_ETH_DEV_TO_PCI(eth_dev);
>>  		rte_eth_copy_pci_info(eth_dev, pci_dev);
>> +
>> +		if (pci_dev->id.subsystem_vendor_id == IFCVF_SUBSYS_VENDOR_ID &&
>> +		    pci_dev->id.subsystem_device_id == IFCVF_SUBSYS_DEVICE_ID)
>> +			hw->weak_barriers = 0;
>> +		else
>> +			hw->weak_barriers = 1;
>>  	}
>>  
>>  	/* If host does not support both status and MSI-X then disable LSC */
>> diff --git a/drivers/net/virtio/virtio_pci.h b/drivers/net/virtio/virtio_pci.h
>> index e961a58ca..1f8e719a9 100644
>> --- a/drivers/net/virtio/virtio_pci.h
>> +++ b/drivers/net/virtio/virtio_pci.h
>> @@ -240,6 +240,7 @@ struct virtio_hw {
>>  	uint8_t     use_simple_rx;
>>  	uint8_t     use_inorder_rx;
>>  	uint8_t     use_inorder_tx;
>> +	uint8_t     weak_barriers;
>>  	bool        has_tx_offload;
>>  	bool        has_rx_offload;
>>  	uint16_t    port_id;
>> diff --git a/drivers/net/virtio/virtio_rxtx.c b/drivers/net/virtio/virtio_rxtx.c
>> index cb8f89f18..66195bf47 100644
>> --- a/drivers/net/virtio/virtio_rxtx.c
>> +++ b/drivers/net/virtio/virtio_rxtx.c
>> @@ -906,7 +906,7 @@ virtio_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t nb_pkts)
>>  
>>  	nb_used = VIRTQUEUE_NUSED(vq);
>>  
>> -	virtio_rmb();
>> +	virtio_rmb(hw->weak_barriers);
>>  
>>  	num = likely(nb_used <= nb_pkts) ? nb_used : nb_pkts;
>>  	if (unlikely(num > VIRTIO_MBUF_BURST_SZ))
>> @@ -1017,7 +1017,7 @@ virtio_recv_mergeable_pkts_inorder(void *rx_queue,
>>  	nb_used = RTE_MIN(nb_used, nb_pkts);
>>  	nb_used = RTE_MIN(nb_used, VIRTIO_MBUF_BURST_SZ);
>>  
>> -	virtio_rmb();
>> +	virtio_rmb(hw->weak_barriers);
>>  
>>  	PMD_RX_LOG(DEBUG, "used:%d", nb_used);
>>  
>> @@ -1202,7 +1202,7 @@ virtio_recv_mergeable_pkts(void *rx_queue,
>>  
>>  	nb_used = VIRTQUEUE_NUSED(vq);
>>  
>> -	virtio_rmb();
>> +	virtio_rmb(hw->weak_barriers);
>>  
>>  	PMD_RX_LOG(DEBUG, "used:%d", nb_used);
>>  
>> @@ -1365,7 +1365,7 @@ virtio_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t nb_pkts)
>>  	PMD_TX_LOG(DEBUG, "%d packets to xmit", nb_pkts);
>>  	nb_used = VIRTQUEUE_NUSED(vq);
>>  
>> -	virtio_rmb();
>> +	virtio_rmb(hw->weak_barriers);
>>  	if (likely(nb_used > vq->vq_nentries - vq->vq_free_thresh))
>>  		virtio_xmit_cleanup(vq, nb_used);
>>  
>> @@ -1407,7 +1407,7 @@ virtio_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t nb_pkts)
>>  		/* Positive value indicates it need free vring descriptors */
>>  		if (unlikely(need > 0)) {
>>  			nb_used = VIRTQUEUE_NUSED(vq);
>> -			virtio_rmb();
>> +			virtio_rmb(hw->weak_barriers);
>>  			need = RTE_MIN(need, (int)nb_used);
>>  
>>  			virtio_xmit_cleanup(vq, need);
>> @@ -1463,7 +1463,7 @@ virtio_xmit_pkts_inorder(void *tx_queue,
>>  	PMD_TX_LOG(DEBUG, "%d packets to xmit", nb_pkts);
>>  	nb_used = VIRTQUEUE_NUSED(vq);
>>  
>> -	virtio_rmb();
>> +	virtio_rmb(hw->weak_barriers);
>>  	if (likely(nb_used > vq->vq_nentries - vq->vq_free_thresh))
>>  		virtio_xmit_cleanup_inorder(vq, nb_used);
>>  
>> @@ -1511,7 +1511,7 @@ virtio_xmit_pkts_inorder(void *tx_queue,
>>  		need = slots - vq->vq_free_cnt;
>>  		if (unlikely(need > 0)) {
>>  			nb_used = VIRTQUEUE_NUSED(vq);
>> -			virtio_rmb();
>> +			virtio_rmb(hw->weak_barriers);
>>  			need = RTE_MIN(need, (int)nb_used);
>>  
>>  			virtio_xmit_cleanup_inorder(vq, need);
>> diff --git a/drivers/net/virtio/virtio_user_ethdev.c b/drivers/net/virtio/virtio_user_ethdev.c
>> index f8791391a..f075774b4 100644
>> --- a/drivers/net/virtio/virtio_user_ethdev.c
>> +++ b/drivers/net/virtio/virtio_user_ethdev.c
>> @@ -434,6 +434,7 @@ virtio_user_eth_dev_alloc(struct rte_vdev_device *vdev)
>>  	hw->use_simple_rx = 0;
>>  	hw->use_inorder_rx = 0;
>>  	hw->use_inorder_tx = 0;
>> +	hw->weak_barriers = 1;
>>  	hw->virtio_user_dev = dev;
>>  	return eth_dev;
>>  }
>> diff --git a/drivers/net/virtio/virtqueue.h b/drivers/net/virtio/virtqueue.h
>> index 26518ed98..7bf17d3bf 100644
>> --- a/drivers/net/virtio/virtqueue.h
>> +++ b/drivers/net/virtio/virtqueue.h
>> @@ -19,15 +19,40 @@
>>  struct rte_mbuf;
>>  
>>  /*
>> - * Per virtio_config.h in Linux.
>> + * Per virtio_ring.h in Linux.
>>   *     For virtio_pci on SMP, we don't need to order with respect to MMIO
>>   *     accesses through relaxed memory I/O windows, so smp_mb() et al are
>>   *     sufficient.
>>   *
>> + *     For using virtio to talk to real devices (eg. vDPA) we do need real
>> + *     barriers.
>>   */
>> -#define virtio_mb()	rte_smp_mb()
>> -#define virtio_rmb()	rte_smp_rmb()
>> -#define virtio_wmb()	rte_smp_wmb()
>> +static inline void
>> +virtio_mb(uint8_t weak_barriers)
>> +{
>> +	if (weak_barriers)
>> +		rte_smp_mb();
>> +	else
>> +		rte_mb();
>> +}
>> +
>> +static inline void
>> +virtio_rmb(uint8_t weak_barriers)
>> +{
>> +	if (weak_barriers)
>> +		rte_smp_rmb();
>> +	else
>> +		rte_cio_rmb();
>> +}
>> +
>> +static inline void
>> +virtio_wmb(uint8_t weak_barriers)
>> +{
>> +	if (weak_barriers)
>> +		rte_smp_wmb();
>> +	else
>> +		rte_cio_wmb();
>> +}
>>  
>>  #ifdef RTE_PMD_PACKET_PREFETCH
>>  #define rte_packet_prefetch(p)  rte_prefetch1(p)
> 
> 
> Note kernel uses dma_ barriers not full barriers.
> I don't know whether it applies to dpdk.

Yes. I know. I used rte_cio_* barriers which are dpdk equivalent of dma_*
barriers from kernel.

> 
>> @@ -312,7 +337,7 @@ void vq_ring_free_inorder(struct virtqueue *vq, uint16_t desc_idx,
>>  static inline void
>>  vq_update_avail_idx(struct virtqueue *vq)
>>  {
>> -	virtio_wmb();
>> +	virtio_wmb(vq->hw->weak_barriers);
>>  	vq->vq_ring.avail->idx = vq->vq_avail_idx;
>>  }
>>  
>> -- 
>> 2.17.1
> 
>
  
Michael S. Tsirkin Dec. 14, 2018, 3:01 p.m. UTC | #3
On Fri, Dec 14, 2018 at 05:03:43PM +0300, Ilya Maximets wrote:
> On 14.12.2018 15:58, Michael S. Tsirkin wrote:
> > On Fri, Dec 14, 2018 at 03:37:04PM +0300, Ilya Maximets wrote:
> >> SMP barriers are OK for software vhost implementation, but vDPA is a
> >> DMA capable hardware and we have to use at least DMA barriers to
> >> ensure the memory ordering.
> >>
> >> This patch mimics the kernel virtio-net driver in part of choosing
> >> barriers we need.
> >>
> >> Fixes: a3f8150eac6d ("net/ifcvf: add ifcvf vDPA driver")
> >> Cc: stable@dpdk.org
> >>
> >> Signed-off-by: Ilya Maximets <i.maximets@samsung.com>
> > 
> > Instead of this vendor specific hack, can you please
> > use VIRTIO_F_ORDER_PLATFORM?
> 
> Hmm. Yes. You're probably right.
> But VIRTIO_F_ORDER_PLATFORM a.k.a. VIRTIO_F_IO_BARRIER is not implemented
> anywhere at the time. This will require changes to the whole stack (vhost,
> qemu, virtio drivers) to allow the negotiation.

You are changing virtio drivers anyway, don't you?

> Moreover, I guess that it's
> not offered/correctly treated by the current vDPA HW i.e. it allows to work
> with weak barriers (It allowed to do that by the spec, but I don't think that
> it's right).

The idea is that the device (e.g. vdpa backend) can detect that
- platform is weakly ordered
- VIRTIO_F_ORDER_PLATFORM is not negotiated
and then use a software fallback. Slower but safe.

> This vendor specific hack could allow to work right now.
> 
> Actually, I'm not sure if vDPA is usable with current QEMU ?
> Does anybody know ? If it's usable, than having a hack could be acceptable
> for now. If not, of course, we could drop this and implement virtio native
> solution.

I think it's only usable on x86 right now.

> But yes, I agree that virtio native solution should be done anyway regardless
> of having or not having hacks like this.
> 
> Maybe I can split the patch in two:
> 1. Add ability to use real barriers. ('weak_barriers' always 'true')
> 2. Hack to initialize 'weak_barriers' according to subsystem_device_id.
> 
> The first patch will be needed anyway. The second one could be dropped or
> applied and replaced in the future by proper solution with VIRTIO_F_ORDER_PLATFORM.
> 
> Thoughts ?

I'd rather not do 2 at all since otherwise we will just
get more vendors asking us to special-case their hardware.
Exposing a new feature bit is not a lot of work at all.

> One more thing I have concerns about is that vDPA offers support for earlier
> versions of virtio protocol, which looks strange.

I suspect things like BE just don't work. Again x86 is fine.


> > 
> > 
> >> ---
> >>
> >> Sending as RFC, because the patch is completely not tested. I heve no
> >> HW to test the real behaviour. And I actually do not know if the
> >> subsystem_vendor/device_id's are available at the time and has
> >> IFCVF_SUBSYS_* values inside the real guest system.
> >>
> >> One more thing I want to mention that cross net client Live Migration
> >> is actually not possible without any support from the guest driver.
> >> Because migration from the software vhost to vDPA will lead to using
> >> weaker barriers than required.
> >>
> >> The similar change (weak_barriers = false) should be made in the linux
> >> kernel virtio-net driver.
> > 
> > IMHO no, it should use VIRTIO_F_ORDER_PLATFORM.
> 
> Sure. I'm not a fan of hacks too, especially inside kernel.
> 
> > 
> >> Another dirty solution could be to restrict the vDPA support to x86
> >> systems only.
> >>
> >>  drivers/net/virtio/virtio_ethdev.c      |  9 +++++++
> >>  drivers/net/virtio/virtio_pci.h         |  1 +
> >>  drivers/net/virtio/virtio_rxtx.c        | 14 +++++-----
> >>  drivers/net/virtio/virtio_user_ethdev.c |  1 +
> >>  drivers/net/virtio/virtqueue.h          | 35 +++++++++++++++++++++----
> >>  5 files changed, 48 insertions(+), 12 deletions(-)
> >>
> >> diff --git a/drivers/net/virtio/virtio_ethdev.c b/drivers/net/virtio/virtio_ethdev.c
> >> index cb2b2e0bf..249b536fd 100644
> >> --- a/drivers/net/virtio/virtio_ethdev.c
> >> +++ b/drivers/net/virtio/virtio_ethdev.c
> >> @@ -1448,6 +1448,9 @@ virtio_configure_intr(struct rte_eth_dev *dev)
> >>  	return 0;
> >>  }
> >>  
> >> +#define IFCVF_SUBSYS_VENDOR_ID	0x8086
> >> +#define IFCVF_SUBSYS_DEVICE_ID	0x001A
> >> +
> >>  /* reset device and renegotiate features if needed */
> >>  static int
> >>  virtio_init_device(struct rte_eth_dev *eth_dev, uint64_t req_features)
> >> @@ -1477,6 +1480,12 @@ virtio_init_device(struct rte_eth_dev *eth_dev, uint64_t req_features)
> >>  	if (!hw->virtio_user_dev) {
> >>  		pci_dev = RTE_ETH_DEV_TO_PCI(eth_dev);
> >>  		rte_eth_copy_pci_info(eth_dev, pci_dev);
> >> +
> >> +		if (pci_dev->id.subsystem_vendor_id == IFCVF_SUBSYS_VENDOR_ID &&
> >> +		    pci_dev->id.subsystem_device_id == IFCVF_SUBSYS_DEVICE_ID)
> >> +			hw->weak_barriers = 0;
> >> +		else
> >> +			hw->weak_barriers = 1;
> >>  	}
> >>  
> >>  	/* If host does not support both status and MSI-X then disable LSC */
> >> diff --git a/drivers/net/virtio/virtio_pci.h b/drivers/net/virtio/virtio_pci.h
> >> index e961a58ca..1f8e719a9 100644
> >> --- a/drivers/net/virtio/virtio_pci.h
> >> +++ b/drivers/net/virtio/virtio_pci.h
> >> @@ -240,6 +240,7 @@ struct virtio_hw {
> >>  	uint8_t     use_simple_rx;
> >>  	uint8_t     use_inorder_rx;
> >>  	uint8_t     use_inorder_tx;
> >> +	uint8_t     weak_barriers;
> >>  	bool        has_tx_offload;
> >>  	bool        has_rx_offload;
> >>  	uint16_t    port_id;
> >> diff --git a/drivers/net/virtio/virtio_rxtx.c b/drivers/net/virtio/virtio_rxtx.c
> >> index cb8f89f18..66195bf47 100644
> >> --- a/drivers/net/virtio/virtio_rxtx.c
> >> +++ b/drivers/net/virtio/virtio_rxtx.c
> >> @@ -906,7 +906,7 @@ virtio_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t nb_pkts)
> >>  
> >>  	nb_used = VIRTQUEUE_NUSED(vq);
> >>  
> >> -	virtio_rmb();
> >> +	virtio_rmb(hw->weak_barriers);
> >>  
> >>  	num = likely(nb_used <= nb_pkts) ? nb_used : nb_pkts;
> >>  	if (unlikely(num > VIRTIO_MBUF_BURST_SZ))
> >> @@ -1017,7 +1017,7 @@ virtio_recv_mergeable_pkts_inorder(void *rx_queue,
> >>  	nb_used = RTE_MIN(nb_used, nb_pkts);
> >>  	nb_used = RTE_MIN(nb_used, VIRTIO_MBUF_BURST_SZ);
> >>  
> >> -	virtio_rmb();
> >> +	virtio_rmb(hw->weak_barriers);
> >>  
> >>  	PMD_RX_LOG(DEBUG, "used:%d", nb_used);
> >>  
> >> @@ -1202,7 +1202,7 @@ virtio_recv_mergeable_pkts(void *rx_queue,
> >>  
> >>  	nb_used = VIRTQUEUE_NUSED(vq);
> >>  
> >> -	virtio_rmb();
> >> +	virtio_rmb(hw->weak_barriers);
> >>  
> >>  	PMD_RX_LOG(DEBUG, "used:%d", nb_used);
> >>  
> >> @@ -1365,7 +1365,7 @@ virtio_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t nb_pkts)
> >>  	PMD_TX_LOG(DEBUG, "%d packets to xmit", nb_pkts);
> >>  	nb_used = VIRTQUEUE_NUSED(vq);
> >>  
> >> -	virtio_rmb();
> >> +	virtio_rmb(hw->weak_barriers);
> >>  	if (likely(nb_used > vq->vq_nentries - vq->vq_free_thresh))
> >>  		virtio_xmit_cleanup(vq, nb_used);
> >>  
> >> @@ -1407,7 +1407,7 @@ virtio_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t nb_pkts)
> >>  		/* Positive value indicates it need free vring descriptors */
> >>  		if (unlikely(need > 0)) {
> >>  			nb_used = VIRTQUEUE_NUSED(vq);
> >> -			virtio_rmb();
> >> +			virtio_rmb(hw->weak_barriers);
> >>  			need = RTE_MIN(need, (int)nb_used);
> >>  
> >>  			virtio_xmit_cleanup(vq, need);
> >> @@ -1463,7 +1463,7 @@ virtio_xmit_pkts_inorder(void *tx_queue,
> >>  	PMD_TX_LOG(DEBUG, "%d packets to xmit", nb_pkts);
> >>  	nb_used = VIRTQUEUE_NUSED(vq);
> >>  
> >> -	virtio_rmb();
> >> +	virtio_rmb(hw->weak_barriers);
> >>  	if (likely(nb_used > vq->vq_nentries - vq->vq_free_thresh))
> >>  		virtio_xmit_cleanup_inorder(vq, nb_used);
> >>  
> >> @@ -1511,7 +1511,7 @@ virtio_xmit_pkts_inorder(void *tx_queue,
> >>  		need = slots - vq->vq_free_cnt;
> >>  		if (unlikely(need > 0)) {
> >>  			nb_used = VIRTQUEUE_NUSED(vq);
> >> -			virtio_rmb();
> >> +			virtio_rmb(hw->weak_barriers);
> >>  			need = RTE_MIN(need, (int)nb_used);
> >>  
> >>  			virtio_xmit_cleanup_inorder(vq, need);
> >> diff --git a/drivers/net/virtio/virtio_user_ethdev.c b/drivers/net/virtio/virtio_user_ethdev.c
> >> index f8791391a..f075774b4 100644
> >> --- a/drivers/net/virtio/virtio_user_ethdev.c
> >> +++ b/drivers/net/virtio/virtio_user_ethdev.c
> >> @@ -434,6 +434,7 @@ virtio_user_eth_dev_alloc(struct rte_vdev_device *vdev)
> >>  	hw->use_simple_rx = 0;
> >>  	hw->use_inorder_rx = 0;
> >>  	hw->use_inorder_tx = 0;
> >> +	hw->weak_barriers = 1;
> >>  	hw->virtio_user_dev = dev;
> >>  	return eth_dev;
> >>  }
> >> diff --git a/drivers/net/virtio/virtqueue.h b/drivers/net/virtio/virtqueue.h
> >> index 26518ed98..7bf17d3bf 100644
> >> --- a/drivers/net/virtio/virtqueue.h
> >> +++ b/drivers/net/virtio/virtqueue.h
> >> @@ -19,15 +19,40 @@
> >>  struct rte_mbuf;
> >>  
> >>  /*
> >> - * Per virtio_config.h in Linux.
> >> + * Per virtio_ring.h in Linux.
> >>   *     For virtio_pci on SMP, we don't need to order with respect to MMIO
> >>   *     accesses through relaxed memory I/O windows, so smp_mb() et al are
> >>   *     sufficient.
> >>   *
> >> + *     For using virtio to talk to real devices (eg. vDPA) we do need real
> >> + *     barriers.
> >>   */
> >> -#define virtio_mb()	rte_smp_mb()
> >> -#define virtio_rmb()	rte_smp_rmb()
> >> -#define virtio_wmb()	rte_smp_wmb()
> >> +static inline void
> >> +virtio_mb(uint8_t weak_barriers)
> >> +{
> >> +	if (weak_barriers)
> >> +		rte_smp_mb();
> >> +	else
> >> +		rte_mb();
> >> +}
> >> +
> >> +static inline void
> >> +virtio_rmb(uint8_t weak_barriers)
> >> +{
> >> +	if (weak_barriers)
> >> +		rte_smp_rmb();
> >> +	else
> >> +		rte_cio_rmb();
> >> +}
> >> +
> >> +static inline void
> >> +virtio_wmb(uint8_t weak_barriers)
> >> +{
> >> +	if (weak_barriers)
> >> +		rte_smp_wmb();
> >> +	else
> >> +		rte_cio_wmb();
> >> +}
> >>  
> >>  #ifdef RTE_PMD_PACKET_PREFETCH
> >>  #define rte_packet_prefetch(p)  rte_prefetch1(p)
> > 
> > 
> > Note kernel uses dma_ barriers not full barriers.
> > I don't know whether it applies to dpdk.
> 
> Yes. I know. I used rte_cio_* barriers which are dpdk equivalent of dma_*
> barriers from kernel.
> 
> > 
> >> @@ -312,7 +337,7 @@ void vq_ring_free_inorder(struct virtqueue *vq, uint16_t desc_idx,
> >>  static inline void
> >>  vq_update_avail_idx(struct virtqueue *vq)
> >>  {
> >> -	virtio_wmb();
> >> +	virtio_wmb(vq->hw->weak_barriers);
> >>  	vq->vq_ring.avail->idx = vq->vq_avail_idx;
> >>  }
> >>  
> >> -- 
> >> 2.17.1
> > 
> >
  
Ilya Maximets Dec. 14, 2018, 3:56 p.m. UTC | #4
On 14.12.2018 18:01, Michael S. Tsirkin wrote:
> On Fri, Dec 14, 2018 at 05:03:43PM +0300, Ilya Maximets wrote:
>> On 14.12.2018 15:58, Michael S. Tsirkin wrote:
>>> On Fri, Dec 14, 2018 at 03:37:04PM +0300, Ilya Maximets wrote:
>>>> SMP barriers are OK for software vhost implementation, but vDPA is a
>>>> DMA capable hardware and we have to use at least DMA barriers to
>>>> ensure the memory ordering.
>>>>
>>>> This patch mimics the kernel virtio-net driver in part of choosing
>>>> barriers we need.
>>>>
>>>> Fixes: a3f8150eac6d ("net/ifcvf: add ifcvf vDPA driver")
>>>> Cc: stable@dpdk.org
>>>>
>>>> Signed-off-by: Ilya Maximets <i.maximets@samsung.com>
>>>
>>> Instead of this vendor specific hack, can you please
>>> use VIRTIO_F_ORDER_PLATFORM?
>>
>> Hmm. Yes. You're probably right.
>> But VIRTIO_F_ORDER_PLATFORM a.k.a. VIRTIO_F_IO_BARRIER is not implemented
>> anywhere at the time. This will require changes to the whole stack (vhost,
>> qemu, virtio drivers) to allow the negotiation.
> 
> You are changing virtio drivers anyway, don't you?

But this hack allows not to change the QEMU.

> 
>> Moreover, I guess that it's
>> not offered/correctly treated by the current vDPA HW i.e. it allows to work
>> with weak barriers (It allowed to do that by the spec, but I don't think that
>> it's right).
> 
> The idea is that the device (e.g. vdpa backend) can detect that
> - platform is weakly ordered
> - VIRTIO_F_ORDER_PLATFORM is not negotiated
> and then use a software fallback. Slower but safe.

OK.

> 
>> This vendor specific hack could allow to work right now.
>>
>> Actually, I'm not sure if vDPA is usable with current QEMU ?
>> Does anybody know ? If it's usable, than having a hack could be acceptable
>> for now. If not, of course, we could drop this and implement virtio native
>> solution.
> 
> I think it's only usable on x86 right now.
> 
>> But yes, I agree that virtio native solution should be done anyway regardless
>> of having or not having hacks like this.
>>
>> Maybe I can split the patch in two:
>> 1. Add ability to use real barriers. ('weak_barriers' always 'true')
>> 2. Hack to initialize 'weak_barriers' according to subsystem_device_id.
>>
>> The first patch will be needed anyway. The second one could be dropped or
>> applied and replaced in the future by proper solution with VIRTIO_F_ORDER_PLATFORM.
>>
>> Thoughts ?
> 
> I'd rather not do 2 at all since otherwise we will just
> get more vendors asking us to special-case their hardware.

OK. So, I prepared the patch that actually implements the VIRTIO_F_ORDER_PLATFORM:
    http://mails.dpdk.org/archives/dev/2018-December/121031.html
And this should be enough from the DPDK side in case vDPA HW offers the feature.

> Exposing a new feature bit is not a lot of work at all.

It depends. However this one should be simple.
I'll prepare patch for QEMU.

> 
>> One more thing I have concerns about is that vDPA offers support for earlier
>> versions of virtio protocol, which looks strange.
> 
> I suspect things like BE just don't work. Again x86 is fine.

OK.

> 
> 
>>>
>>>
>>>> ---
>>>>
>>>> Sending as RFC, because the patch is completely not tested. I heve no
>>>> HW to test the real behaviour. And I actually do not know if the
>>>> subsystem_vendor/device_id's are available at the time and has
>>>> IFCVF_SUBSYS_* values inside the real guest system.
>>>>
>>>> One more thing I want to mention that cross net client Live Migration
>>>> is actually not possible without any support from the guest driver.
>>>> Because migration from the software vhost to vDPA will lead to using
>>>> weaker barriers than required.
>>>>
>>>> The similar change (weak_barriers = false) should be made in the linux
>>>> kernel virtio-net driver.
>>>
>>> IMHO no, it should use VIRTIO_F_ORDER_PLATFORM.
>>
>> Sure. I'm not a fan of hacks too, especially inside kernel.
>>
>>>
>>>> Another dirty solution could be to restrict the vDPA support to x86
>>>> systems only.
>>>>
>>>>  drivers/net/virtio/virtio_ethdev.c      |  9 +++++++
>>>>  drivers/net/virtio/virtio_pci.h         |  1 +
>>>>  drivers/net/virtio/virtio_rxtx.c        | 14 +++++-----
>>>>  drivers/net/virtio/virtio_user_ethdev.c |  1 +
>>>>  drivers/net/virtio/virtqueue.h          | 35 +++++++++++++++++++++----
>>>>  5 files changed, 48 insertions(+), 12 deletions(-)
>>>>
>>>> diff --git a/drivers/net/virtio/virtio_ethdev.c b/drivers/net/virtio/virtio_ethdev.c
>>>> index cb2b2e0bf..249b536fd 100644
>>>> --- a/drivers/net/virtio/virtio_ethdev.c
>>>> +++ b/drivers/net/virtio/virtio_ethdev.c
>>>> @@ -1448,6 +1448,9 @@ virtio_configure_intr(struct rte_eth_dev *dev)
>>>>  	return 0;
>>>>  }
>>>>  
>>>> +#define IFCVF_SUBSYS_VENDOR_ID	0x8086
>>>> +#define IFCVF_SUBSYS_DEVICE_ID	0x001A
>>>> +
>>>>  /* reset device and renegotiate features if needed */
>>>>  static int
>>>>  virtio_init_device(struct rte_eth_dev *eth_dev, uint64_t req_features)
>>>> @@ -1477,6 +1480,12 @@ virtio_init_device(struct rte_eth_dev *eth_dev, uint64_t req_features)
>>>>  	if (!hw->virtio_user_dev) {
>>>>  		pci_dev = RTE_ETH_DEV_TO_PCI(eth_dev);
>>>>  		rte_eth_copy_pci_info(eth_dev, pci_dev);
>>>> +
>>>> +		if (pci_dev->id.subsystem_vendor_id == IFCVF_SUBSYS_VENDOR_ID &&
>>>> +		    pci_dev->id.subsystem_device_id == IFCVF_SUBSYS_DEVICE_ID)
>>>> +			hw->weak_barriers = 0;
>>>> +		else
>>>> +			hw->weak_barriers = 1;
>>>>  	}
>>>>  
>>>>  	/* If host does not support both status and MSI-X then disable LSC */
>>>> diff --git a/drivers/net/virtio/virtio_pci.h b/drivers/net/virtio/virtio_pci.h
>>>> index e961a58ca..1f8e719a9 100644
>>>> --- a/drivers/net/virtio/virtio_pci.h
>>>> +++ b/drivers/net/virtio/virtio_pci.h
>>>> @@ -240,6 +240,7 @@ struct virtio_hw {
>>>>  	uint8_t     use_simple_rx;
>>>>  	uint8_t     use_inorder_rx;
>>>>  	uint8_t     use_inorder_tx;
>>>> +	uint8_t     weak_barriers;
>>>>  	bool        has_tx_offload;
>>>>  	bool        has_rx_offload;
>>>>  	uint16_t    port_id;
>>>> diff --git a/drivers/net/virtio/virtio_rxtx.c b/drivers/net/virtio/virtio_rxtx.c
>>>> index cb8f89f18..66195bf47 100644
>>>> --- a/drivers/net/virtio/virtio_rxtx.c
>>>> +++ b/drivers/net/virtio/virtio_rxtx.c
>>>> @@ -906,7 +906,7 @@ virtio_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t nb_pkts)
>>>>  
>>>>  	nb_used = VIRTQUEUE_NUSED(vq);
>>>>  
>>>> -	virtio_rmb();
>>>> +	virtio_rmb(hw->weak_barriers);
>>>>  
>>>>  	num = likely(nb_used <= nb_pkts) ? nb_used : nb_pkts;
>>>>  	if (unlikely(num > VIRTIO_MBUF_BURST_SZ))
>>>> @@ -1017,7 +1017,7 @@ virtio_recv_mergeable_pkts_inorder(void *rx_queue,
>>>>  	nb_used = RTE_MIN(nb_used, nb_pkts);
>>>>  	nb_used = RTE_MIN(nb_used, VIRTIO_MBUF_BURST_SZ);
>>>>  
>>>> -	virtio_rmb();
>>>> +	virtio_rmb(hw->weak_barriers);
>>>>  
>>>>  	PMD_RX_LOG(DEBUG, "used:%d", nb_used);
>>>>  
>>>> @@ -1202,7 +1202,7 @@ virtio_recv_mergeable_pkts(void *rx_queue,
>>>>  
>>>>  	nb_used = VIRTQUEUE_NUSED(vq);
>>>>  
>>>> -	virtio_rmb();
>>>> +	virtio_rmb(hw->weak_barriers);
>>>>  
>>>>  	PMD_RX_LOG(DEBUG, "used:%d", nb_used);
>>>>  
>>>> @@ -1365,7 +1365,7 @@ virtio_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t nb_pkts)
>>>>  	PMD_TX_LOG(DEBUG, "%d packets to xmit", nb_pkts);
>>>>  	nb_used = VIRTQUEUE_NUSED(vq);
>>>>  
>>>> -	virtio_rmb();
>>>> +	virtio_rmb(hw->weak_barriers);
>>>>  	if (likely(nb_used > vq->vq_nentries - vq->vq_free_thresh))
>>>>  		virtio_xmit_cleanup(vq, nb_used);
>>>>  
>>>> @@ -1407,7 +1407,7 @@ virtio_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t nb_pkts)
>>>>  		/* Positive value indicates it need free vring descriptors */
>>>>  		if (unlikely(need > 0)) {
>>>>  			nb_used = VIRTQUEUE_NUSED(vq);
>>>> -			virtio_rmb();
>>>> +			virtio_rmb(hw->weak_barriers);
>>>>  			need = RTE_MIN(need, (int)nb_used);
>>>>  
>>>>  			virtio_xmit_cleanup(vq, need);
>>>> @@ -1463,7 +1463,7 @@ virtio_xmit_pkts_inorder(void *tx_queue,
>>>>  	PMD_TX_LOG(DEBUG, "%d packets to xmit", nb_pkts);
>>>>  	nb_used = VIRTQUEUE_NUSED(vq);
>>>>  
>>>> -	virtio_rmb();
>>>> +	virtio_rmb(hw->weak_barriers);
>>>>  	if (likely(nb_used > vq->vq_nentries - vq->vq_free_thresh))
>>>>  		virtio_xmit_cleanup_inorder(vq, nb_used);
>>>>  
>>>> @@ -1511,7 +1511,7 @@ virtio_xmit_pkts_inorder(void *tx_queue,
>>>>  		need = slots - vq->vq_free_cnt;
>>>>  		if (unlikely(need > 0)) {
>>>>  			nb_used = VIRTQUEUE_NUSED(vq);
>>>> -			virtio_rmb();
>>>> +			virtio_rmb(hw->weak_barriers);
>>>>  			need = RTE_MIN(need, (int)nb_used);
>>>>  
>>>>  			virtio_xmit_cleanup_inorder(vq, need);
>>>> diff --git a/drivers/net/virtio/virtio_user_ethdev.c b/drivers/net/virtio/virtio_user_ethdev.c
>>>> index f8791391a..f075774b4 100644
>>>> --- a/drivers/net/virtio/virtio_user_ethdev.c
>>>> +++ b/drivers/net/virtio/virtio_user_ethdev.c
>>>> @@ -434,6 +434,7 @@ virtio_user_eth_dev_alloc(struct rte_vdev_device *vdev)
>>>>  	hw->use_simple_rx = 0;
>>>>  	hw->use_inorder_rx = 0;
>>>>  	hw->use_inorder_tx = 0;
>>>> +	hw->weak_barriers = 1;
>>>>  	hw->virtio_user_dev = dev;
>>>>  	return eth_dev;
>>>>  }
>>>> diff --git a/drivers/net/virtio/virtqueue.h b/drivers/net/virtio/virtqueue.h
>>>> index 26518ed98..7bf17d3bf 100644
>>>> --- a/drivers/net/virtio/virtqueue.h
>>>> +++ b/drivers/net/virtio/virtqueue.h
>>>> @@ -19,15 +19,40 @@
>>>>  struct rte_mbuf;
>>>>  
>>>>  /*
>>>> - * Per virtio_config.h in Linux.
>>>> + * Per virtio_ring.h in Linux.
>>>>   *     For virtio_pci on SMP, we don't need to order with respect to MMIO
>>>>   *     accesses through relaxed memory I/O windows, so smp_mb() et al are
>>>>   *     sufficient.
>>>>   *
>>>> + *     For using virtio to talk to real devices (eg. vDPA) we do need real
>>>> + *     barriers.
>>>>   */
>>>> -#define virtio_mb()	rte_smp_mb()
>>>> -#define virtio_rmb()	rte_smp_rmb()
>>>> -#define virtio_wmb()	rte_smp_wmb()
>>>> +static inline void
>>>> +virtio_mb(uint8_t weak_barriers)
>>>> +{
>>>> +	if (weak_barriers)
>>>> +		rte_smp_mb();
>>>> +	else
>>>> +		rte_mb();
>>>> +}
>>>> +
>>>> +static inline void
>>>> +virtio_rmb(uint8_t weak_barriers)
>>>> +{
>>>> +	if (weak_barriers)
>>>> +		rte_smp_rmb();
>>>> +	else
>>>> +		rte_cio_rmb();
>>>> +}
>>>> +
>>>> +static inline void
>>>> +virtio_wmb(uint8_t weak_barriers)
>>>> +{
>>>> +	if (weak_barriers)
>>>> +		rte_smp_wmb();
>>>> +	else
>>>> +		rte_cio_wmb();
>>>> +}
>>>>  
>>>>  #ifdef RTE_PMD_PACKET_PREFETCH
>>>>  #define rte_packet_prefetch(p)  rte_prefetch1(p)
>>>
>>>
>>> Note kernel uses dma_ barriers not full barriers.
>>> I don't know whether it applies to dpdk.
>>
>> Yes. I know. I used rte_cio_* barriers which are dpdk equivalent of dma_*
>> barriers from kernel.
>>
>>>
>>>> @@ -312,7 +337,7 @@ void vq_ring_free_inorder(struct virtqueue *vq, uint16_t desc_idx,
>>>>  static inline void
>>>>  vq_update_avail_idx(struct virtqueue *vq)
>>>>  {
>>>> -	virtio_wmb();
>>>> +	virtio_wmb(vq->hw->weak_barriers);
>>>>  	vq->vq_ring.avail->idx = vq->vq_avail_idx;
>>>>  }
>>>>  
>>>> -- 
>>>> 2.17.1
>>>
>>>
> 
>
  

Patch

diff --git a/drivers/net/virtio/virtio_ethdev.c b/drivers/net/virtio/virtio_ethdev.c
index cb2b2e0bf..249b536fd 100644
--- a/drivers/net/virtio/virtio_ethdev.c
+++ b/drivers/net/virtio/virtio_ethdev.c
@@ -1448,6 +1448,9 @@  virtio_configure_intr(struct rte_eth_dev *dev)
 	return 0;
 }
 
+#define IFCVF_SUBSYS_VENDOR_ID	0x8086
+#define IFCVF_SUBSYS_DEVICE_ID	0x001A
+
 /* reset device and renegotiate features if needed */
 static int
 virtio_init_device(struct rte_eth_dev *eth_dev, uint64_t req_features)
@@ -1477,6 +1480,12 @@  virtio_init_device(struct rte_eth_dev *eth_dev, uint64_t req_features)
 	if (!hw->virtio_user_dev) {
 		pci_dev = RTE_ETH_DEV_TO_PCI(eth_dev);
 		rte_eth_copy_pci_info(eth_dev, pci_dev);
+
+		if (pci_dev->id.subsystem_vendor_id == IFCVF_SUBSYS_VENDOR_ID &&
+		    pci_dev->id.subsystem_device_id == IFCVF_SUBSYS_DEVICE_ID)
+			hw->weak_barriers = 0;
+		else
+			hw->weak_barriers = 1;
 	}
 
 	/* If host does not support both status and MSI-X then disable LSC */
diff --git a/drivers/net/virtio/virtio_pci.h b/drivers/net/virtio/virtio_pci.h
index e961a58ca..1f8e719a9 100644
--- a/drivers/net/virtio/virtio_pci.h
+++ b/drivers/net/virtio/virtio_pci.h
@@ -240,6 +240,7 @@  struct virtio_hw {
 	uint8_t     use_simple_rx;
 	uint8_t     use_inorder_rx;
 	uint8_t     use_inorder_tx;
+	uint8_t     weak_barriers;
 	bool        has_tx_offload;
 	bool        has_rx_offload;
 	uint16_t    port_id;
diff --git a/drivers/net/virtio/virtio_rxtx.c b/drivers/net/virtio/virtio_rxtx.c
index cb8f89f18..66195bf47 100644
--- a/drivers/net/virtio/virtio_rxtx.c
+++ b/drivers/net/virtio/virtio_rxtx.c
@@ -906,7 +906,7 @@  virtio_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t nb_pkts)
 
 	nb_used = VIRTQUEUE_NUSED(vq);
 
-	virtio_rmb();
+	virtio_rmb(hw->weak_barriers);
 
 	num = likely(nb_used <= nb_pkts) ? nb_used : nb_pkts;
 	if (unlikely(num > VIRTIO_MBUF_BURST_SZ))
@@ -1017,7 +1017,7 @@  virtio_recv_mergeable_pkts_inorder(void *rx_queue,
 	nb_used = RTE_MIN(nb_used, nb_pkts);
 	nb_used = RTE_MIN(nb_used, VIRTIO_MBUF_BURST_SZ);
 
-	virtio_rmb();
+	virtio_rmb(hw->weak_barriers);
 
 	PMD_RX_LOG(DEBUG, "used:%d", nb_used);
 
@@ -1202,7 +1202,7 @@  virtio_recv_mergeable_pkts(void *rx_queue,
 
 	nb_used = VIRTQUEUE_NUSED(vq);
 
-	virtio_rmb();
+	virtio_rmb(hw->weak_barriers);
 
 	PMD_RX_LOG(DEBUG, "used:%d", nb_used);
 
@@ -1365,7 +1365,7 @@  virtio_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t nb_pkts)
 	PMD_TX_LOG(DEBUG, "%d packets to xmit", nb_pkts);
 	nb_used = VIRTQUEUE_NUSED(vq);
 
-	virtio_rmb();
+	virtio_rmb(hw->weak_barriers);
 	if (likely(nb_used > vq->vq_nentries - vq->vq_free_thresh))
 		virtio_xmit_cleanup(vq, nb_used);
 
@@ -1407,7 +1407,7 @@  virtio_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t nb_pkts)
 		/* Positive value indicates it need free vring descriptors */
 		if (unlikely(need > 0)) {
 			nb_used = VIRTQUEUE_NUSED(vq);
-			virtio_rmb();
+			virtio_rmb(hw->weak_barriers);
 			need = RTE_MIN(need, (int)nb_used);
 
 			virtio_xmit_cleanup(vq, need);
@@ -1463,7 +1463,7 @@  virtio_xmit_pkts_inorder(void *tx_queue,
 	PMD_TX_LOG(DEBUG, "%d packets to xmit", nb_pkts);
 	nb_used = VIRTQUEUE_NUSED(vq);
 
-	virtio_rmb();
+	virtio_rmb(hw->weak_barriers);
 	if (likely(nb_used > vq->vq_nentries - vq->vq_free_thresh))
 		virtio_xmit_cleanup_inorder(vq, nb_used);
 
@@ -1511,7 +1511,7 @@  virtio_xmit_pkts_inorder(void *tx_queue,
 		need = slots - vq->vq_free_cnt;
 		if (unlikely(need > 0)) {
 			nb_used = VIRTQUEUE_NUSED(vq);
-			virtio_rmb();
+			virtio_rmb(hw->weak_barriers);
 			need = RTE_MIN(need, (int)nb_used);
 
 			virtio_xmit_cleanup_inorder(vq, need);
diff --git a/drivers/net/virtio/virtio_user_ethdev.c b/drivers/net/virtio/virtio_user_ethdev.c
index f8791391a..f075774b4 100644
--- a/drivers/net/virtio/virtio_user_ethdev.c
+++ b/drivers/net/virtio/virtio_user_ethdev.c
@@ -434,6 +434,7 @@  virtio_user_eth_dev_alloc(struct rte_vdev_device *vdev)
 	hw->use_simple_rx = 0;
 	hw->use_inorder_rx = 0;
 	hw->use_inorder_tx = 0;
+	hw->weak_barriers = 1;
 	hw->virtio_user_dev = dev;
 	return eth_dev;
 }
diff --git a/drivers/net/virtio/virtqueue.h b/drivers/net/virtio/virtqueue.h
index 26518ed98..7bf17d3bf 100644
--- a/drivers/net/virtio/virtqueue.h
+++ b/drivers/net/virtio/virtqueue.h
@@ -19,15 +19,40 @@ 
 struct rte_mbuf;
 
 /*
- * Per virtio_config.h in Linux.
+ * Per virtio_ring.h in Linux.
  *     For virtio_pci on SMP, we don't need to order with respect to MMIO
  *     accesses through relaxed memory I/O windows, so smp_mb() et al are
  *     sufficient.
  *
+ *     For using virtio to talk to real devices (eg. vDPA) we do need real
+ *     barriers.
  */
-#define virtio_mb()	rte_smp_mb()
-#define virtio_rmb()	rte_smp_rmb()
-#define virtio_wmb()	rte_smp_wmb()
+static inline void
+virtio_mb(uint8_t weak_barriers)
+{
+	if (weak_barriers)
+		rte_smp_mb();
+	else
+		rte_mb();
+}
+
+static inline void
+virtio_rmb(uint8_t weak_barriers)
+{
+	if (weak_barriers)
+		rte_smp_rmb();
+	else
+		rte_cio_rmb();
+}
+
+static inline void
+virtio_wmb(uint8_t weak_barriers)
+{
+	if (weak_barriers)
+		rte_smp_wmb();
+	else
+		rte_cio_wmb();
+}
 
 #ifdef RTE_PMD_PACKET_PREFETCH
 #define rte_packet_prefetch(p)  rte_prefetch1(p)
@@ -312,7 +337,7 @@  void vq_ring_free_inorder(struct virtqueue *vq, uint16_t desc_idx,
 static inline void
 vq_update_avail_idx(struct virtqueue *vq)
 {
-	virtio_wmb();
+	virtio_wmb(vq->hw->weak_barriers);
 	vq->vq_ring.avail->idx = vq->vq_avail_idx;
 }