[dpdk-dev] [PATCH 5/5] net/virtio: fix Tso when mbuf is shared
Olivier Matz
olivier.matz at 6wind.com
Thu Nov 24 09:56:38 CET 2016
With virtio, doing tso requires to modify the network
packet data:
- the dpdk API requires to set the l4 checksum to an
Intel-Nic-like pseudo header checksum that does
not include the ip length
- the virtio peer expects that the l4 checksum is
a standard pseudo header checksum.
This is a problem with shared packets, because they
should not be modified.
This patch fixes this issue by copying the headers into
a linear buffer in that case. This buffer is located in
the virtio_tx_region, at the same place where the
virtio header is stored.
The size of this buffer is set to 256, which should
be enough in all cases:
sizeof(ethernet) + sizeof(vlan) * 2 + sizeof(ip6)
sizeof(ip6-ext) + sizeof(tcp) + sizeof(tcp-opts)
= 14 + 8 + 40 + sizeof(ip6-ext) + 40 + sizeof(tcp-opts)
= 102 + sizeof(ip6-ext) + sizeof(tcp-opts)
Fixes: 696573046e9e ("net/virtio: support TSO")
Signed-off-by: Olivier Matz <olivier.matz at 6wind.com>
---
drivers/net/virtio/virtio_rxtx.c | 119 +++++++++++++++++++++++++++------------
drivers/net/virtio/virtqueue.h | 2 +
2 files changed, 85 insertions(+), 36 deletions(-)
diff --git a/drivers/net/virtio/virtio_rxtx.c b/drivers/net/virtio/virtio_rxtx.c
index 22d97a4..577c775 100644
--- a/drivers/net/virtio/virtio_rxtx.c
+++ b/drivers/net/virtio/virtio_rxtx.c
@@ -211,43 +211,73 @@ virtqueue_enqueue_recv_refill(struct virtqueue *vq, struct rte_mbuf *cookie)
/* When doing TSO, the IP length is not included in the pseudo header
* checksum of the packet given to the PMD, but for virtio it is
- * expected.
+ * expected. Fix the mbuf or a copy if the mbuf is shared.
*/
-static void
-virtio_tso_fix_cksum(struct rte_mbuf *m)
+static unsigned int
+virtio_tso_fix_cksum(struct rte_mbuf *m, char *hdr, size_t hdr_sz)
{
- /* common case: header is not fragmented */
- if (likely(rte_pktmbuf_data_len(m) >= m->l2_len + m->l3_len +
- m->l4_len)) {
- struct ipv4_hdr *iph;
- struct ipv6_hdr *ip6h;
- struct tcp_hdr *th;
- uint16_t prev_cksum, new_cksum, ip_len, ip_paylen;
- uint32_t tmp;
-
- iph = rte_pktmbuf_mtod_offset(m, struct ipv4_hdr *, m->l2_len);
- th = RTE_PTR_ADD(iph, m->l3_len);
- if ((iph->version_ihl >> 4) == 4) {
- iph->hdr_checksum = 0;
- iph->hdr_checksum = rte_ipv4_cksum(iph);
- ip_len = iph->total_length;
- ip_paylen = rte_cpu_to_be_16(rte_be_to_cpu_16(ip_len) -
- m->l3_len);
- } else {
- ip6h = (struct ipv6_hdr *)iph;
- ip_paylen = ip6h->payload_len;
+ struct ipv4_hdr *iph, iph_copy;
+ struct ipv6_hdr *ip6h = NULL, ip6h_copy;
+ struct tcp_hdr *th, th_copy;
+ size_t hdrlen = m->l2_len + m->l3_len + m->l4_len;
+ uint16_t prev_cksum, new_cksum, ip_len, ip_paylen;
+ uint32_t tmp;
+ int shared = 0;
+
+ /* mbuf is write-only, we need to copy the headers in a linear buffer */
+ if (unlikely(rte_pktmbuf_data_is_shared(m, 0, hdrlen))) {
+ shared = 1;
+
+ /* network headers are too big, there's nothing we can do */
+ if (hdrlen > hdr_sz)
+ return 0;
+
+ rte_pktmbuf_read_copy(m, 0, hdrlen, hdr);
+ iph = (struct ipv4_hdr *)(hdr + m->l2_len);
+ ip6h = (struct ipv6_hdr *)(hdr + m->l2_len);
+ th = (struct tcp_hdr *)(hdr + m->l2_len + m->l3_len);
+ } else {
+ iph = rte_pktmbuf_read(m, m->l2_len, sizeof(*iph), &iph_copy);
+ th = rte_pktmbuf_read(m, m->l2_len + m->l3_len, sizeof(*th),
+ &th_copy);
+ }
+
+ if ((iph->version_ihl >> 4) == 4) {
+ iph->hdr_checksum = 0;
+ iph->hdr_checksum = rte_ipv4_cksum(iph);
+ ip_len = iph->total_length;
+ ip_paylen = rte_cpu_to_be_16(rte_be_to_cpu_16(ip_len) -
+ m->l3_len);
+ } else {
+ if (!shared) {
+ ip6h = rte_pktmbuf_read(m, m->l2_len, sizeof(*ip6h),
+ &ip6h_copy);
}
+ ip_paylen = ip6h->payload_len;
+ }
- /* calculate the new phdr checksum not including ip_paylen */
- prev_cksum = th->cksum;
- tmp = prev_cksum;
- tmp += ip_paylen;
- tmp = (tmp & 0xffff) + (tmp >> 16);
- new_cksum = tmp;
+ /* calculate the new phdr checksum not including ip_paylen */
+ prev_cksum = th->cksum;
+ tmp = prev_cksum;
+ tmp += ip_paylen;
+ tmp = (tmp & 0xffff) + (tmp >> 16);
+ new_cksum = tmp;
- /* replace it in the packet */
- th->cksum = new_cksum;
- }
+ /* replace it in the header */
+ th->cksum = new_cksum;
+
+ /* the update was done in the linear buffer, return */
+ if (shared)
+ return hdrlen;
+
+ /* copy from local buffer into mbuf if required */
+ if ((iph->version_ihl >> 4) == 4)
+ rte_pktmbuf_write(m, m->l2_len, sizeof(*iph), iph);
+ else
+ rte_pktmbuf_write(m, m->l2_len, sizeof(*ip6h), ip6h);
+ rte_pktmbuf_write(m, m->l2_len + m->l3_len, sizeof(*th), th);
+
+ return 0;
}
static inline int
@@ -268,7 +298,9 @@ virtqueue_enqueue_xmit(struct virtnet_tx *txvq, struct rte_mbuf *cookie,
struct vring_desc *start_dp;
uint16_t seg_num = cookie->nb_segs;
uint16_t head_idx, idx;
+ uint16_t hdr_idx = 0;
uint16_t head_size = vq->hw->vtnet_hdr_size;
+ unsigned int offset = 0;
struct virtio_net_hdr *hdr;
int offload;
@@ -303,6 +335,8 @@ virtqueue_enqueue_xmit(struct virtnet_tx *txvq, struct rte_mbuf *cookie,
/* loop below will fill in rest of the indirect elements */
start_dp = txr[idx].tx_indir;
+ hdr_idx = 0;
+ start_dp[hdr_idx].len = vq->hw->vtnet_hdr_size;
idx = 1;
} else {
/* setup first tx ring slot to point to header
@@ -313,7 +347,7 @@ virtqueue_enqueue_xmit(struct virtnet_tx *txvq, struct rte_mbuf *cookie,
start_dp[idx].len = vq->hw->vtnet_hdr_size;
start_dp[idx].flags = VRING_DESC_F_NEXT;
hdr = (struct virtio_net_hdr *)&txr[idx].tx_hdr;
-
+ hdr_idx = idx;
idx = start_dp[idx].next;
}
@@ -345,7 +379,14 @@ virtqueue_enqueue_xmit(struct virtnet_tx *txvq, struct rte_mbuf *cookie,
/* TCP Segmentation Offload */
if (cookie->ol_flags & PKT_TX_TCP_SEG) {
- virtio_tso_fix_cksum(cookie);
+ offset = virtio_tso_fix_cksum(cookie,
+ RTE_PTR_ADD(hdr, start_dp[hdr_idx].len),
+ VIRTIO_MAX_HDR_SZ);
+ if (offset > 0) {
+ RTE_ASSERT(can_push != 0);
+ start_dp[hdr_idx].len += offset;
+ }
+
hdr->gso_type = (cookie->ol_flags & PKT_TX_IPV6) ?
VIRTIO_NET_HDR_GSO_TCPV6 :
VIRTIO_NET_HDR_GSO_TCPV4;
@@ -362,10 +403,16 @@ virtqueue_enqueue_xmit(struct virtnet_tx *txvq, struct rte_mbuf *cookie,
}
do {
- start_dp[idx].addr = VIRTIO_MBUF_DATA_DMA_ADDR(cookie, vq);
- start_dp[idx].len = cookie->data_len;
+ if (offset > cookie->data_len) {
+ offset -= cookie->data_len;
+ continue;
+ }
+ start_dp[idx].addr = VIRTIO_MBUF_DATA_DMA_ADDR(cookie, vq) +
+ offset;
+ start_dp[idx].len = cookie->data_len - offset;
start_dp[idx].flags = cookie->next ? VRING_DESC_F_NEXT : 0;
idx = start_dp[idx].next;
+ offset = 0;
} while ((cookie = cookie->next) != NULL);
if (use_indirect)
diff --git a/drivers/net/virtio/virtqueue.h b/drivers/net/virtio/virtqueue.h
index f0bb089..edfe0dd 100644
--- a/drivers/net/virtio/virtqueue.h
+++ b/drivers/net/virtio/virtqueue.h
@@ -254,8 +254,10 @@ struct virtio_net_hdr_mrg_rxbuf {
/* Region reserved to allow for transmit header and indirect ring */
#define VIRTIO_MAX_TX_INDIRECT 8
+#define VIRTIO_MAX_HDR_SZ 256
struct virtio_tx_region {
struct virtio_net_hdr_mrg_rxbuf tx_hdr;
+ char net_headers[VIRTIO_MAX_HDR_SZ]; /* for offload if mbuf is RO */
struct vring_desc tx_indir[VIRTIO_MAX_TX_INDIRECT]
__attribute__((__aligned__(16)));
};
--
2.8.1
More information about the dev
mailing list