[dpdk-dev,4/4] net/dpaa2: support parallel recv mode

Message ID 1492607395-5922-4-git-send-email-hemant.agrawal@nxp.com (mailing list archive)
State Changes Requested, archived
Delegated to: Ferruh Yigit
Headers

Checks

Context Check Description
ci/checkpatch warning coding style issues
ci/Intel-compilation success Compilation OK

Commit Message

Hemant Agrawal April 19, 2017, 1:09 p.m. UTC
  Typically when the PMD issues a RX command to DPAA2 hardware,
the hw writes the available descriptors into the given memory.
The RX function then processes the frames and prepare them as
mbufs.

This patch adds support to issue another pull request to hardware
in another memory location, before we start processing the output
of the first request. This help in controlling the cpu cycles
wasted during the wait for the hardware to write the descriptors.

During hw debugging, it may be desired to keep the original
mode, so the original mode is also preserved and can be controlled
with an env flag.

Signed-off-by: Hemant Agrawal <hemant.agrawal@nxp.com>
---
 drivers/net/dpaa2/dpaa2_ethdev.c |  11 +++-
 drivers/net/dpaa2/dpaa2_ethdev.h |   2 +
 drivers/net/dpaa2/dpaa2_rxtx.c   | 133 +++++++++++++++++++++++++++++++++++++++
 3 files changed, 144 insertions(+), 2 deletions(-)
  

Comments

Ferruh Yigit May 12, 2017, 1:48 p.m. UTC | #1
On 4/19/2017 2:09 PM, Hemant Agrawal wrote:
> Typically when the PMD issues a RX command to DPAA2 hardware,
> the hw writes the available descriptors into the given memory.
> The RX function then processes the frames and prepare them as
> mbufs.
> 
> This patch adds support to issue another pull request to hardware
> in another memory location, before we start processing the output
> of the first request. This help in controlling the cpu cycles
> wasted during the wait for the hardware to write the descriptors.
> 
> During hw debugging, it may be desired to keep the original
> mode, so the original mode is also preserved and can be controlled
> with an env flag.
> 
> Signed-off-by: Hemant Agrawal <hemant.agrawal@nxp.com>

<...>

>  
> +	/*If no prefetch is configured. */
> +	if (getenv("DPAA2_RX_NO_PREFETCH")) {

Instead of getting configuration option from environment variable, can
you please make it argument to the driver?
This will be more consistent to the DPDK usage.

> +		eth_dev->rx_pkt_burst = dpaa2_dev_rx;
> +		PMD_INIT_LOG(INFO, "No Prefetch enabled");
> +	}
> +

<...>
  
Hemant Agrawal May 15, 2017, 8:26 a.m. UTC | #2
On 5/12/2017 7:18 PM, Ferruh Yigit wrote:
> On 4/19/2017 2:09 PM, Hemant Agrawal wrote:
>> Typically when the PMD issues a RX command to DPAA2 hardware,
>> the hw writes the available descriptors into the given memory.
>> The RX function then processes the frames and prepare them as
>> mbufs.
>>
>> This patch adds support to issue another pull request to hardware
>> in another memory location, before we start processing the output
>> of the first request. This help in controlling the cpu cycles
>> wasted during the wait for the hardware to write the descriptors.
>>
>> During hw debugging, it may be desired to keep the original
>> mode, so the original mode is also preserved and can be controlled
>> with an env flag.
>>
>> Signed-off-by: Hemant Agrawal <hemant.agrawal@nxp.com>
>
> <...>
>
>>
>> +	/*If no prefetch is configured. */
>> +	if (getenv("DPAA2_RX_NO_PREFETCH")) {
>
> Instead of getting configuration option from environment variable, can
> you please make it argument to the driver?
> This will be more consistent to the DPDK usage.

I guess you are talking about the devargs.  That code is still not 
cleaned up for non-pci devices.

In any case, we will work on that cleanup.

>
>> +		eth_dev->rx_pkt_burst = dpaa2_dev_rx;
>> +		PMD_INIT_LOG(INFO, "No Prefetch enabled");
>> +	}
>> +
>
> <...>
>
  

Patch

diff --git a/drivers/net/dpaa2/dpaa2_ethdev.c b/drivers/net/dpaa2/dpaa2_ethdev.c
index e9800f4..5569400 100644
--- a/drivers/net/dpaa2/dpaa2_ethdev.c
+++ b/drivers/net/dpaa2/dpaa2_ethdev.c
@@ -391,7 +391,8 @@ 
 		RTE_PTYPE_UNKNOWN
 	};
 
-	if (dev->rx_pkt_burst == dpaa2_dev_rx)
+	if (dev->rx_pkt_burst == dpaa2_dev_rx ||
+	    dev->rx_pkt_burst == dpaa2_dev_prefetch_rx)
 		return ptypes;
 	return NULL;
 }
@@ -883,10 +884,16 @@  void dpaa2_dev_stats_reset(struct rte_eth_dev *dev)
 	eth_dev->dev_ops = &dpaa2_ethdev_ops;
 	eth_dev->data->drv_name = rte_dpaa2_pmd.driver.name;
 
-	eth_dev->rx_pkt_burst = dpaa2_dev_rx;
+	eth_dev->rx_pkt_burst = dpaa2_dev_prefetch_rx;
 	eth_dev->tx_pkt_burst = dpaa2_dev_tx;
 	rte_fslmc_vfio_dmamap();
 
+	/*If no prefetch is configured. */
+	if (getenv("DPAA2_RX_NO_PREFETCH")) {
+		eth_dev->rx_pkt_burst = dpaa2_dev_rx;
+		PMD_INIT_LOG(INFO, "No Prefetch enabled");
+	}
+
 	return 0;
 init_err:
 	dpaa2_dev_uninit(eth_dev);
diff --git a/drivers/net/dpaa2/dpaa2_ethdev.h b/drivers/net/dpaa2/dpaa2_ethdev.h
index 7196398..ec27b84 100644
--- a/drivers/net/dpaa2/dpaa2_ethdev.h
+++ b/drivers/net/dpaa2/dpaa2_ethdev.h
@@ -78,6 +78,8 @@  int dpaa2_remove_flow_dist(struct rte_eth_dev *eth_dev,
 int dpaa2_attach_bp_list(struct dpaa2_dev_priv *priv, void *blist);
 
 uint16_t dpaa2_dev_rx(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts);
+uint16_t dpaa2_dev_prefetch_rx(void *queue, struct rte_mbuf **bufs,
+			       uint16_t nb_pkts);
 uint16_t dpaa2_dev_tx(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts);
 
 #endif /* _DPAA2_ETHDEV_H */
diff --git a/drivers/net/dpaa2/dpaa2_rxtx.c b/drivers/net/dpaa2/dpaa2_rxtx.c
index c5d49cb..8365703 100644
--- a/drivers/net/dpaa2/dpaa2_rxtx.c
+++ b/drivers/net/dpaa2/dpaa2_rxtx.c
@@ -51,6 +51,7 @@ 
 #include "dpaa2_ethdev.h"
 #include "base/dpaa2_hw_dpni_annot.h"
 
+struct swp_active_dqs global_active_dqs_list[NUM_MAX_SWP];
 static inline uint32_t __attribute__((hot))
 dpaa2_dev_rx_parse(uint64_t hw_annot_addr)
 {
@@ -335,6 +336,138 @@  static inline int __attribute__((hot))
 	return num_rx;
 }
 
+uint16_t
+dpaa2_dev_prefetch_rx(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
+{
+	/* Function is responsible to receive frames for a given device and VQ*/
+	struct dpaa2_queue *dpaa2_q = (struct dpaa2_queue *)queue;
+	struct qbman_result *dq_storage;
+	uint32_t fqid = dpaa2_q->fqid;
+	int ret, num_rx = 0;
+	uint8_t is_last = 0, status;
+	struct qbman_swp *swp;
+	const struct qbman_fd *fd[DPAA2_DQRR_RING_SIZE];
+	struct qbman_pull_desc pulldesc;
+	struct queue_storage_info_t *q_storage = dpaa2_q->q_storage;
+	struct rte_eth_dev *dev = dpaa2_q->dev;
+
+	if (unlikely(!DPAA2_PER_LCORE_DPIO)) {
+		ret = dpaa2_affine_qbman_swp();
+		if (ret) {
+			RTE_LOG(ERR, PMD, "Failure in affining portal\n");
+			return 0;
+		}
+	}
+	swp = DPAA2_PER_LCORE_PORTAL;
+	if (!q_storage->active_dqs) {
+		q_storage->toggle = 0;
+		dq_storage = q_storage->dq_storage[q_storage->toggle];
+		qbman_pull_desc_clear(&pulldesc);
+		qbman_pull_desc_set_numframes(&pulldesc,
+					      (nb_pkts > DPAA2_DQRR_RING_SIZE) ?
+					       DPAA2_DQRR_RING_SIZE : nb_pkts);
+		qbman_pull_desc_set_fq(&pulldesc, fqid);
+		qbman_pull_desc_set_storage(&pulldesc, dq_storage,
+			(dma_addr_t)(DPAA2_VADDR_TO_IOVA(dq_storage)), 1);
+		if (check_swp_active_dqs(DPAA2_PER_LCORE_DPIO->index)) {
+			while (!qbman_check_command_complete(swp,
+			       get_swp_active_dqs(DPAA2_PER_LCORE_DPIO->index)))
+				;
+			clear_swp_active_dqs(DPAA2_PER_LCORE_DPIO->index);
+		}
+		while (1) {
+			if (qbman_swp_pull(swp, &pulldesc)) {
+				PMD_RX_LOG(WARNING, "VDQ command is not issued."
+					   "QBMAN is busy\n");
+				/* Portal was busy, try again */
+				continue;
+			}
+			break;
+		}
+		q_storage->active_dqs = dq_storage;
+		q_storage->active_dpio_id = DPAA2_PER_LCORE_DPIO->index;
+		set_swp_active_dqs(DPAA2_PER_LCORE_DPIO->index, dq_storage);
+	}
+	dq_storage = q_storage->active_dqs;
+	/* Check if the previous issued command is completed.
+	 * Also seems like the SWP is shared between the Ethernet Driver
+	 * and the SEC driver.
+	 */
+	while (!qbman_check_command_complete(swp, dq_storage))
+		;
+	if (dq_storage == get_swp_active_dqs(q_storage->active_dpio_id))
+		clear_swp_active_dqs(q_storage->active_dpio_id);
+	while (!is_last) {
+		/* Loop until the dq_storage is updated with
+		 * new token by QBMAN
+		 */
+		struct rte_mbuf *mbuf;
+
+		while (!qbman_result_has_new_result(swp, dq_storage))
+			;
+		rte_prefetch0((void *)((uint64_t)(dq_storage + 1)));
+		/* Check whether Last Pull command is Expired and
+		 * setting Condition for Loop termination
+		 */
+		if (qbman_result_DQ_is_pull_complete(dq_storage)) {
+			is_last = 1;
+			/* Check for valid frame. */
+			status = (uint8_t)qbman_result_DQ_flags(dq_storage);
+			if (unlikely((status & QBMAN_DQ_STAT_VALIDFRAME) == 0))
+				continue;
+		}
+		fd[num_rx] = qbman_result_DQ_fd(dq_storage);
+		mbuf = (struct rte_mbuf *)DPAA2_IOVA_TO_VADDR(
+			DPAA2_GET_FD_ADDR(fd[num_rx])
+			 - rte_dpaa2_bpid_info[DPAA2_GET_FD_BPID(fd[num_rx])].meta_data_size);
+		/* Prefeth mbuf */
+		rte_prefetch0(mbuf);
+		/* Prefetch Annotation address for the parse results */
+		rte_prefetch0((void *)((uint64_t)DPAA2_GET_FD_ADDR(fd[num_rx])
+				+ DPAA2_FD_PTA_SIZE + 16));
+
+		bufs[num_rx] = eth_fd_to_mbuf(fd[num_rx]);
+		bufs[num_rx]->port = dev->data->port_id;
+		if (dev->data->dev_conf.rxmode.hw_vlan_strip)
+			rte_vlan_strip(bufs[num_rx]);
+
+		dq_storage++;
+		num_rx++;
+
+	} /* End of Packet Rx loop */
+
+	if (check_swp_active_dqs(DPAA2_PER_LCORE_DPIO->index)) {
+		while (!qbman_check_command_complete(swp,
+		       get_swp_active_dqs(DPAA2_PER_LCORE_DPIO->index)))
+			;
+		clear_swp_active_dqs(DPAA2_PER_LCORE_DPIO->index);
+	}
+	q_storage->toggle ^= 1;
+	dq_storage = q_storage->dq_storage[q_storage->toggle];
+	qbman_pull_desc_clear(&pulldesc);
+	qbman_pull_desc_set_numframes(&pulldesc, DPAA2_DQRR_RING_SIZE);
+	qbman_pull_desc_set_fq(&pulldesc, fqid);
+	qbman_pull_desc_set_storage(&pulldesc, dq_storage,
+			(dma_addr_t)(DPAA2_VADDR_TO_IOVA(dq_storage)), 1);
+	/*Issue a volatile dequeue command. */
+	while (1) {
+		if (qbman_swp_pull(swp, &pulldesc)) {
+			PMD_RX_LOG(WARNING, "VDQ command is not issued."
+				   "QBMAN is busy\n");
+			continue;
+		}
+		break;
+	}
+	q_storage->active_dqs = dq_storage;
+	q_storage->active_dpio_id = DPAA2_PER_LCORE_DPIO->index;
+	set_swp_active_dqs(DPAA2_PER_LCORE_DPIO->index, dq_storage);
+
+	dpaa2_q->rx_pkts += num_rx;
+
+	/*Return the total number of packets received to DPAA2 app*/
+	return num_rx;
+}
+
 /*
  * Callback to handle sending packets through WRIOP based interface
  */