[v7,5/5] net/af_xdp: enable zero copy
Checks
Commit Message
Try to check if external mempool (from rx_queue_setup) is fit for
af_xdp, if it is, it will be registered to af_xdp socket directly and
there will be no packet data copy on Rx and Tx.
Signed-off-by: Xiaolong Ye <xiaolong.ye@intel.com>
---
drivers/net/af_xdp/rte_eth_af_xdp.c | 130 ++++++++++++++++++++--------
1 file changed, 96 insertions(+), 34 deletions(-)
Comments
On 3/27/2019 9:00 AM, Xiaolong Ye wrote:
> Try to check if external mempool (from rx_queue_setup) is fit for
> af_xdp, if it is, it will be registered to af_xdp socket directly and
> there will be no packet data copy on Rx and Tx.
>
> Signed-off-by: Xiaolong Ye <xiaolong.ye@intel.com>
> ---
> drivers/net/af_xdp/rte_eth_af_xdp.c | 130 ++++++++++++++++++++--------
> 1 file changed, 96 insertions(+), 34 deletions(-)
>
> diff --git a/drivers/net/af_xdp/rte_eth_af_xdp.c b/drivers/net/af_xdp/rte_eth_af_xdp.c
> index a1fda9212..c6ade4c94 100644
> --- a/drivers/net/af_xdp/rte_eth_af_xdp.c
> +++ b/drivers/net/af_xdp/rte_eth_af_xdp.c
> @@ -67,6 +67,7 @@ struct xsk_umem_info {
> struct xsk_umem *umem;
> struct rte_mempool *mb_pool;
> void *buffer;
> + uint8_t zc;
> };
>
> struct rx_stats {
> @@ -85,6 +86,7 @@ struct pkt_rx_queue {
>
> struct pkt_tx_queue *pair;
> uint16_t queue_idx;
> + uint8_t zc;
> };
>
> struct tx_stats {
<...>
> @@ -630,6 +685,13 @@ eth_rx_queue_setup(struct rte_eth_dev *dev,
>
> internals->umem = rxq->umem;
>
> + if (mb_pool == internals->umem->mb_pool)
> + rxq->zc = internals->umem->zc;
> +
> + if (rxq->zc)
> + AF_XDP_LOG(INFO,
> + "zero copy enabled on rx queue %d\n", rx_queue_id);
> +
The "zero copy" implemented in this patch, also the variable 'zc', is from
'af_xdp' umem to mbuf data via versa copy, right?
There is also another "zero copy" support in af_xdp, device to buffers...
Do you think can these be confused with each other, should we have another log
message and variable name for this one?
Indeed I can't think of a good name, but something like, "pmd/driver zero copy"
& 'pmd_zc' ??
On 03/28, Ferruh Yigit wrote:
>On 3/27/2019 9:00 AM, Xiaolong Ye wrote:
>> Try to check if external mempool (from rx_queue_setup) is fit for
>> af_xdp, if it is, it will be registered to af_xdp socket directly and
>> there will be no packet data copy on Rx and Tx.
>>
>> Signed-off-by: Xiaolong Ye <xiaolong.ye@intel.com>
>> ---
>> drivers/net/af_xdp/rte_eth_af_xdp.c | 130 ++++++++++++++++++++--------
>> 1 file changed, 96 insertions(+), 34 deletions(-)
>>
>> diff --git a/drivers/net/af_xdp/rte_eth_af_xdp.c b/drivers/net/af_xdp/rte_eth_af_xdp.c
>> index a1fda9212..c6ade4c94 100644
>> --- a/drivers/net/af_xdp/rte_eth_af_xdp.c
>> +++ b/drivers/net/af_xdp/rte_eth_af_xdp.c
>> @@ -67,6 +67,7 @@ struct xsk_umem_info {
>> struct xsk_umem *umem;
>> struct rte_mempool *mb_pool;
>> void *buffer;
>> + uint8_t zc;
>> };
>>
>> struct rx_stats {
>> @@ -85,6 +86,7 @@ struct pkt_rx_queue {
>>
>> struct pkt_tx_queue *pair;
>> uint16_t queue_idx;
>> + uint8_t zc;
>> };
>>
>> struct tx_stats {
>
><...>
>
>> @@ -630,6 +685,13 @@ eth_rx_queue_setup(struct rte_eth_dev *dev,
>>
>> internals->umem = rxq->umem;
>>
>> + if (mb_pool == internals->umem->mb_pool)
>> + rxq->zc = internals->umem->zc;
>> +
>> + if (rxq->zc)
>> + AF_XDP_LOG(INFO,
>> + "zero copy enabled on rx queue %d\n", rx_queue_id);
>> +
>
>The "zero copy" implemented in this patch, also the variable 'zc', is from
>'af_xdp' umem to mbuf data via versa copy, right?
>There is also another "zero copy" support in af_xdp, device to buffers...
>Do you think can these be confused with each other, should we have another log
>message and variable name for this one?
>Indeed I can't think of a good name, but something like, "pmd/driver zero copy"
>& 'pmd_zc' ??
That's a good suggestion, will adopt it.
Thanks,
Xiaolong
@@ -67,6 +67,7 @@ struct xsk_umem_info {
struct xsk_umem *umem;
struct rte_mempool *mb_pool;
void *buffer;
+ uint8_t zc;
};
struct rx_stats {
@@ -85,6 +86,7 @@ struct pkt_rx_queue {
struct pkt_tx_queue *pair;
uint16_t queue_idx;
+ uint8_t zc;
};
struct tx_stats {
@@ -202,7 +204,9 @@ eth_af_xdp_rx(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
if (xsk_prod_nb_free(fq, free_thresh) >= free_thresh)
(void)reserve_fill_queue(umem, ETH_AF_XDP_RX_BATCH_SIZE);
- if (unlikely(rte_pktmbuf_alloc_bulk(rxq->mb_pool, mbufs, rcvd) != 0))
+ if (!rxq->zc &&
+ unlikely(rte_pktmbuf_alloc_bulk(rxq->mb_pool,
+ mbufs, rcvd) != 0))
return 0;
for (i = 0; i < rcvd; i++) {
@@ -216,13 +220,23 @@ eth_af_xdp_rx(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
len = desc->len;
pkt = xsk_umem__get_data(rxq->umem->buffer, addr);
- rte_memcpy(rte_pktmbuf_mtod(mbufs[i], void *), pkt, len);
- rte_pktmbuf_pkt_len(mbufs[i]) = len;
- rte_pktmbuf_data_len(mbufs[i]) = len;
- rx_bytes += len;
- bufs[count++] = mbufs[i];
-
- rte_pktmbuf_free(addr_to_mbuf(umem, addr));
+ if (rxq->zc) {
+ struct rte_mbuf *mbuf;
+ mbuf = addr_to_mbuf(rxq->umem, addr);
+ rte_pktmbuf_pkt_len(mbuf) = len;
+ rte_pktmbuf_data_len(mbuf) = len;
+ rx_bytes += len;
+ bufs[count++] = mbuf;
+ } else {
+ rte_memcpy(rte_pktmbuf_mtod(mbufs[i], void *),
+ pkt, len);
+ rte_pktmbuf_pkt_len(mbufs[i]) = len;
+ rte_pktmbuf_data_len(mbufs[i]) = len;
+ rx_bytes += len;
+ bufs[count++] = mbufs[i];
+
+ rte_pktmbuf_free(addr_to_mbuf(umem, addr));
+ }
}
xsk_ring_cons__release(rx, rcvd);
@@ -295,22 +309,29 @@ eth_af_xdp_tx(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
- ETH_AF_XDP_DATA_HEADROOM;
desc = xsk_ring_prod__tx_desc(&txq->tx, idx_tx + i);
mbuf = bufs[i];
- if (mbuf->pkt_len <= buf_len) {
- mbuf_to_tx = rte_pktmbuf_alloc(umem->mb_pool);
- if (unlikely(mbuf_to_tx == NULL)) {
- rte_pktmbuf_free(mbuf);
- continue;
- }
- desc->addr = mbuf_to_addr(umem, mbuf_to_tx);
+ if (txq->pair->zc && mbuf->pool == umem->mb_pool) {
+ desc->addr = mbuf_to_addr(umem, mbuf);
desc->len = mbuf->pkt_len;
- pkt = xsk_umem__get_data(umem->buffer,
- desc->addr);
- rte_memcpy(pkt, rte_pktmbuf_mtod(mbuf, void *),
- desc->len);
valid++;
tx_bytes += mbuf->pkt_len;
+ } else {
+ if (mbuf->pkt_len <= buf_len) {
+ mbuf_to_tx = rte_pktmbuf_alloc(umem->mb_pool);
+ if (unlikely(mbuf_to_tx == NULL)) {
+ rte_pktmbuf_free(mbuf);
+ continue;
+ }
+ desc->addr = mbuf_to_addr(umem, mbuf_to_tx);
+ desc->len = mbuf->pkt_len;
+ pkt = xsk_umem__get_data(umem->buffer,
+ desc->addr);
+ memcpy(pkt, rte_pktmbuf_mtod(mbuf, void *),
+ desc->len);
+ valid++;
+ tx_bytes += mbuf->pkt_len;
+ }
+ rte_pktmbuf_free(mbuf);
}
- rte_pktmbuf_free(mbuf);
}
xsk_ring_prod__submit(&txq->tx, nb_pkts);
@@ -488,7 +509,7 @@ static inline uint64_t get_len(struct rte_mempool *mp)
return (uint64_t)(memhdr->len);
}
-static struct xsk_umem_info *xdp_umem_configure(void)
+static struct xsk_umem_info *xdp_umem_configure(struct rte_mempool *mb_pool)
{
struct xsk_umem_info *umem;
struct xsk_umem_config usr_config = {
@@ -505,16 +526,23 @@ static struct xsk_umem_info *xdp_umem_configure(void)
return NULL;
}
- umem->mb_pool = rte_pktmbuf_pool_create_with_flags("af_xdp_mempool",
- ETH_AF_XDP_NUM_BUFFERS,
- 250, 0,
- ETH_AF_XDP_FRAME_SIZE -
- ETH_AF_XDP_MBUF_OVERHEAD,
- MEMPOOL_F_NO_SPREAD | MEMPOOL_CHUNK_F_PAGE_ALIGN,
- SOCKET_ID_ANY);
- if (umem->mb_pool == NULL || umem->mb_pool->nb_mem_chunks != 1) {
- AF_XDP_LOG(ERR, "Failed to create mempool\n");
- goto err;
+ if (!mb_pool) {
+ umem->mb_pool = rte_pktmbuf_pool_create_with_flags("af_xdp_mempool",
+ ETH_AF_XDP_NUM_BUFFERS,
+ 250, 0,
+ ETH_AF_XDP_FRAME_SIZE -
+ ETH_AF_XDP_MBUF_OVERHEAD,
+ MEMPOOL_F_NO_SPREAD | MEMPOOL_CHUNK_F_PAGE_ALIGN,
+ SOCKET_ID_ANY);
+
+ if (umem->mb_pool == NULL ||
+ umem->mb_pool->nb_mem_chunks != 1) {
+ AF_XDP_LOG(ERR, "Failed to create mempool\n");
+ goto err;
+ }
+ } else {
+ umem->mb_pool = mb_pool;
+ umem->zc = 1;
}
base_addr = (void *)get_base_addr(umem->mb_pool);
@@ -536,16 +564,43 @@ static struct xsk_umem_info *xdp_umem_configure(void)
return NULL;
}
+static uint8_t
+check_mempool_zc(struct rte_mempool *mp)
+{
+ RTE_ASSERT(mp);
+
+ /* must continues */
+ if (mp->nb_mem_chunks > 1)
+ return 0;
+
+ /* check header size */
+ if (mp->header_size != RTE_CACHE_LINE_SIZE)
+ return 0;
+
+ /* check base address */
+ if ((uint64_t)get_base_addr(mp) % getpagesize() != 0)
+ return 0;
+
+ /* check chunk size */
+ if ((mp->elt_size + mp->header_size + mp->trailer_size) %
+ ETH_AF_XDP_FRAME_SIZE != 0)
+ return 0;
+
+ return 1;
+}
+
static int
xsk_configure(struct pmd_internals *internals, struct pkt_rx_queue *rxq,
- int ring_size)
+ int ring_size, struct rte_mempool *mb_pool)
{
struct xsk_socket_config cfg;
struct pkt_tx_queue *txq = rxq->pair;
+ struct rte_mempool *mp;
int ret = 0;
int reserve_size;
- rxq->umem = xdp_umem_configure();
+ mp = check_mempool_zc(mb_pool) ? mb_pool : NULL;
+ rxq->umem = xdp_umem_configure(mp);
if (rxq->umem == NULL)
return -ENOMEM;
@@ -622,7 +677,7 @@ eth_rx_queue_setup(struct rte_eth_dev *dev,
rxq->mb_pool = mb_pool;
- if (xsk_configure(internals, rxq, nb_rx_desc)) {
+ if (xsk_configure(internals, rxq, nb_rx_desc, mb_pool)) {
AF_XDP_LOG(ERR, "Failed to configure xdp socket\n");
ret = -EINVAL;
goto err;
@@ -630,6 +685,13 @@ eth_rx_queue_setup(struct rte_eth_dev *dev,
internals->umem = rxq->umem;
+ if (mb_pool == internals->umem->mb_pool)
+ rxq->zc = internals->umem->zc;
+
+ if (rxq->zc)
+ AF_XDP_LOG(INFO,
+ "zero copy enabled on rx queue %d\n", rx_queue_id);
+
dev->data->rx_queues[rx_queue_id] = rxq;
return 0;