[dpdk-dev,v2] net/virtio: cache Rx/Tx offload ability check

Message ID 1478269793-11082-1-git-send-email-yuanhan.liu@linux.intel.com (mailing list archive)
State Changes Requested, archived
Delegated to: Yuanhan Liu
Headers

Checks

Context Check Description
tmonjalo/checkpatch success coding style OK

Commit Message

Yuanhan Liu Nov. 4, 2016, 2:29 p.m. UTC
  It's not a good idea to do the check of whether Rx/Tx offload is
enabled at the data path. Instead, we could do the check at init
stage and store the result, so that we could avoid the check again
and again at the critical datapath.

Cc: Olivier Matz <olivier.matz@6wind.com>
Signed-off-by: Yuanhan Liu <yuanhan.liu@linux.intel.com>
---
v2: - rebase on top of the bug fix patches
    - define rx/tx_offload as uint8_t instead of int

 drivers/net/virtio/virtio_ethdev.c | 19 +++++++++++++++++++
 drivers/net/virtio/virtio_pci.h    |  2 ++
 drivers/net/virtio/virtio_rxtx.c   | 31 +++++--------------------------
 3 files changed, 26 insertions(+), 26 deletions(-)
  

Comments

Olivier Matz Nov. 8, 2016, 8:43 a.m. UTC | #1
Hi Yuanhan,

On 11/04/2016 03:29 PM, Yuanhan Liu wrote:
> It's not a good idea to do the check of whether Rx/Tx offload is
> enabled at the data path. Instead, we could do the check at init
> stage and store the result, so that we could avoid the check again
> and again at the critical datapath.
> 
> Cc: Olivier Matz <olivier.matz@6wind.com>
> Signed-off-by: Yuanhan Liu <yuanhan.liu@linux.intel.com>
> ---
> v2: - rebase on top of the bug fix patches
>     - define rx/tx_offload as uint8_t instead of int
> 
>  drivers/net/virtio/virtio_ethdev.c | 19 +++++++++++++++++++
>  drivers/net/virtio/virtio_pci.h    |  2 ++
>  drivers/net/virtio/virtio_rxtx.c   | 31 +++++--------------------------
>  3 files changed, 26 insertions(+), 26 deletions(-)
> 
> diff --git a/drivers/net/virtio/virtio_ethdev.c b/drivers/net/virtio/virtio_ethdev.c
> index 1505f67..2adae58 100644
> --- a/drivers/net/virtio/virtio_ethdev.c
> +++ b/drivers/net/virtio/virtio_ethdev.c
> @@ -1188,6 +1188,22 @@ rx_func_get(struct rte_eth_dev *eth_dev)
>  		eth_dev->rx_pkt_burst = &virtio_recv_pkts;
>  }
>  
> +static inline int
> +rx_offload_enabled(struct virtio_hw *hw)
> +{
> +	return vtpci_with_feature(hw, VIRTIO_NET_F_GUEST_CSUM) ||
> +		vtpci_with_feature(hw, VIRTIO_NET_F_GUEST_TSO4) ||
> +		vtpci_with_feature(hw, VIRTIO_NET_F_GUEST_TSO6);
> +}
> +
> +static inline int
> +tx_offload_enabled(struct virtio_hw *hw)
> +{
> +	return vtpci_with_feature(hw, VIRTIO_NET_F_CSUM) ||
> +		vtpci_with_feature(hw, VIRTIO_NET_F_HOST_TSO4) ||
> +		vtpci_with_feature(hw, VIRTIO_NET_F_HOST_TSO6);
> +}

Do we need these functions to be inlined?

It looks better to do like this, but out of curiosity, do you see a
performance improvement?

Regards,
Olivier
  
Yuanhan Liu Nov. 9, 2016, 5:24 a.m. UTC | #2
On Tue, Nov 08, 2016 at 09:43:14AM +0100, Olivier Matz wrote:
> Hi Yuanhan,
> 
> On 11/04/2016 03:29 PM, Yuanhan Liu wrote:
> > It's not a good idea to do the check of whether Rx/Tx offload is
> > enabled at the data path. Instead, we could do the check at init
> > stage and store the result, so that we could avoid the check again
> > and again at the critical datapath.
> > 
> > Cc: Olivier Matz <olivier.matz@6wind.com>
> > Signed-off-by: Yuanhan Liu <yuanhan.liu@linux.intel.com>
> > ---
> > v2: - rebase on top of the bug fix patches
> >     - define rx/tx_offload as uint8_t instead of int
> > 
> >  drivers/net/virtio/virtio_ethdev.c | 19 +++++++++++++++++++
> >  drivers/net/virtio/virtio_pci.h    |  2 ++
> >  drivers/net/virtio/virtio_rxtx.c   | 31 +++++--------------------------
> >  3 files changed, 26 insertions(+), 26 deletions(-)
> > 
> > diff --git a/drivers/net/virtio/virtio_ethdev.c b/drivers/net/virtio/virtio_ethdev.c
> > index 1505f67..2adae58 100644
> > --- a/drivers/net/virtio/virtio_ethdev.c
> > +++ b/drivers/net/virtio/virtio_ethdev.c
> > @@ -1188,6 +1188,22 @@ rx_func_get(struct rte_eth_dev *eth_dev)
> >  		eth_dev->rx_pkt_burst = &virtio_recv_pkts;
> >  }
> >  
> > +static inline int
> > +rx_offload_enabled(struct virtio_hw *hw)
> > +{
> > +	return vtpci_with_feature(hw, VIRTIO_NET_F_GUEST_CSUM) ||
> > +		vtpci_with_feature(hw, VIRTIO_NET_F_GUEST_TSO4) ||
> > +		vtpci_with_feature(hw, VIRTIO_NET_F_GUEST_TSO6);
> > +}
> > +
> > +static inline int
> > +tx_offload_enabled(struct virtio_hw *hw)
> > +{
> > +	return vtpci_with_feature(hw, VIRTIO_NET_F_CSUM) ||
> > +		vtpci_with_feature(hw, VIRTIO_NET_F_HOST_TSO4) ||
> > +		vtpci_with_feature(hw, VIRTIO_NET_F_HOST_TSO6);
> > +}
> 
> Do we need these functions to be inlined?

Nope, it was done simply by copy & paste. I could remove them in future
version.

> It looks better to do like this, but out of curiosity, do you see a
> performance improvement?

I didn't bother to have a try: I'd assume it brings no (at least
no obvious) improvements.

	--yliu
  

Patch

diff --git a/drivers/net/virtio/virtio_ethdev.c b/drivers/net/virtio/virtio_ethdev.c
index 1505f67..2adae58 100644
--- a/drivers/net/virtio/virtio_ethdev.c
+++ b/drivers/net/virtio/virtio_ethdev.c
@@ -1188,6 +1188,22 @@  rx_func_get(struct rte_eth_dev *eth_dev)
 		eth_dev->rx_pkt_burst = &virtio_recv_pkts;
 }
 
+static inline int
+rx_offload_enabled(struct virtio_hw *hw)
+{
+	return vtpci_with_feature(hw, VIRTIO_NET_F_GUEST_CSUM) ||
+		vtpci_with_feature(hw, VIRTIO_NET_F_GUEST_TSO4) ||
+		vtpci_with_feature(hw, VIRTIO_NET_F_GUEST_TSO6);
+}
+
+static inline int
+tx_offload_enabled(struct virtio_hw *hw)
+{
+	return vtpci_with_feature(hw, VIRTIO_NET_F_CSUM) ||
+		vtpci_with_feature(hw, VIRTIO_NET_F_HOST_TSO4) ||
+		vtpci_with_feature(hw, VIRTIO_NET_F_HOST_TSO6);
+}
+
 /* reset device and renegotiate features if needed */
 static int
 virtio_init_device(struct rte_eth_dev *eth_dev, uint64_t req_features)
@@ -1209,6 +1225,9 @@  virtio_init_device(struct rte_eth_dev *eth_dev, uint64_t req_features)
 	if (virtio_negotiate_features(hw, req_features) < 0)
 		return -1;
 
+	hw->tx_offload = tx_offload_enabled(hw);
+	hw->rx_offload = rx_offload_enabled(hw);
+
 	/* If host does not support status then disable LSC */
 	if (!vtpci_with_feature(hw, VIRTIO_NET_F_STATUS))
 		eth_dev->data->dev_flags &= ~RTE_ETH_DEV_INTR_LSC;
diff --git a/drivers/net/virtio/virtio_pci.h b/drivers/net/virtio/virtio_pci.h
index de271bf..7d1dd9b 100644
--- a/drivers/net/virtio/virtio_pci.h
+++ b/drivers/net/virtio/virtio_pci.h
@@ -254,6 +254,8 @@  struct virtio_hw {
 	uint8_t	    use_msix;
 	uint8_t     modern;
 	uint8_t     use_simple_rxtx;
+	uint8_t     tx_offload;
+	uint8_t     rx_offload;
 	uint8_t     mac_addr[ETHER_ADDR_LEN];
 	uint32_t    notify_off_multiplier;
 	uint8_t     *isr;
diff --git a/drivers/net/virtio/virtio_rxtx.c b/drivers/net/virtio/virtio_rxtx.c
index 4cb2ce7..7571191 100644
--- a/drivers/net/virtio/virtio_rxtx.c
+++ b/drivers/net/virtio/virtio_rxtx.c
@@ -250,14 +250,6 @@  virtio_tso_fix_cksum(struct rte_mbuf *m)
 	}
 }
 
-static inline int
-tx_offload_enabled(struct virtio_hw *hw)
-{
-	return vtpci_with_feature(hw, VIRTIO_NET_F_CSUM) ||
-		vtpci_with_feature(hw, VIRTIO_NET_F_HOST_TSO4) ||
-		vtpci_with_feature(hw, VIRTIO_NET_F_HOST_TSO6);
-}
-
 static inline void
 virtqueue_enqueue_xmit(struct virtnet_tx *txvq, struct rte_mbuf *cookie,
 		       uint16_t needed, int use_indirect, int can_push)
@@ -270,9 +262,7 @@  virtqueue_enqueue_xmit(struct virtnet_tx *txvq, struct rte_mbuf *cookie,
 	uint16_t head_idx, idx;
 	uint16_t head_size = vq->hw->vtnet_hdr_size;
 	struct virtio_net_hdr *hdr;
-	int offload;
 
-	offload = tx_offload_enabled(vq->hw);
 	head_idx = vq->vq_desc_head_idx;
 	idx = head_idx;
 	dxp = &vq->vq_descx[idx];
@@ -286,7 +276,7 @@  virtqueue_enqueue_xmit(struct virtnet_tx *txvq, struct rte_mbuf *cookie,
 		hdr = (struct virtio_net_hdr *)
 			rte_pktmbuf_prepend(cookie, head_size);
 		/* if offload disabled, it is not zeroed below, do it now */
-		if (offload == 0)
+		if (vq->hw->tx_offload == 0)
 			memset(hdr, 0, head_size);
 	} else if (use_indirect) {
 		/* setup tx ring slot to point to indirect
@@ -318,7 +308,7 @@  virtqueue_enqueue_xmit(struct virtnet_tx *txvq, struct rte_mbuf *cookie,
 	}
 
 	/* Checksum Offload / TSO */
-	if (offload) {
+	if (vq->hw->tx_offload) {
 		if (cookie->ol_flags & PKT_TX_TCP_SEG)
 			cookie->ol_flags |= PKT_TX_TCP_CKSUM;
 
@@ -735,14 +725,6 @@  virtio_rx_offload(struct rte_mbuf *m, struct virtio_net_hdr *hdr)
 	return 0;
 }
 
-static inline int
-rx_offload_enabled(struct virtio_hw *hw)
-{
-	return vtpci_with_feature(hw, VIRTIO_NET_F_GUEST_CSUM) ||
-		vtpci_with_feature(hw, VIRTIO_NET_F_GUEST_TSO4) ||
-		vtpci_with_feature(hw, VIRTIO_NET_F_GUEST_TSO6);
-}
-
 #define VIRTIO_MBUF_BURST_SZ 64
 #define DESC_PER_CACHELINE (RTE_CACHE_LINE_SIZE / sizeof(struct vring_desc))
 uint16_t
@@ -758,7 +740,6 @@  virtio_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t nb_pkts)
 	int error;
 	uint32_t i, nb_enqueued;
 	uint32_t hdr_size;
-	int offload;
 	struct virtio_net_hdr *hdr;
 
 	nb_used = VIRTQUEUE_NUSED(vq);
@@ -777,7 +758,6 @@  virtio_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t nb_pkts)
 	nb_rx = 0;
 	nb_enqueued = 0;
 	hdr_size = hw->vtnet_hdr_size;
-	offload = rx_offload_enabled(hw);
 
 	for (i = 0; i < num ; i++) {
 		rxm = rcv_pkts[i];
@@ -808,7 +788,7 @@  virtio_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t nb_pkts)
 		if (hw->vlan_strip)
 			rte_vlan_strip(rxm);
 
-		if (offload && virtio_rx_offload(rxm, hdr) < 0) {
+		if (hw->rx_offload && virtio_rx_offload(rxm, hdr) < 0) {
 			virtio_discard_rxbuf(vq, rxm);
 			rxvq->stats.errors++;
 			continue;
@@ -873,7 +853,6 @@  virtio_recv_mergeable_pkts(void *rx_queue,
 	uint16_t extra_idx;
 	uint32_t seg_res;
 	uint32_t hdr_size;
-	int offload;
 
 	nb_used = VIRTQUEUE_NUSED(vq);
 
@@ -889,7 +868,6 @@  virtio_recv_mergeable_pkts(void *rx_queue,
 	extra_idx = 0;
 	seg_res = 0;
 	hdr_size = hw->vtnet_hdr_size;
-	offload = rx_offload_enabled(hw);
 
 	while (i < nb_used) {
 		struct virtio_net_hdr_mrg_rxbuf *header;
@@ -935,7 +913,8 @@  virtio_recv_mergeable_pkts(void *rx_queue,
 		rx_pkts[nb_rx] = rxm;
 		prev = rxm;
 
-		if (offload && virtio_rx_offload(rxm, &header->hdr) < 0) {
+		if (hw->rx_offload &&
+		    virtio_rx_offload(rxm, &header->hdr) < 0) {
 			virtio_discard_rxbuf(vq, rxm);
 			rxvq->stats.errors++;
 			continue;