[dpdk-dev] [PATCH] [VIRTIO] Support multiple queues feature in DPDK based virtio-net frontend.

Ouyang Changchun changchun.ouyang at intel.com
Fri May 16 15:45:51 CEST 2014


This patch support multiple queues feature in DPDK based virtio-net frontend. 
It firstly gets max queue number of virtio-net from virtio pci configuration and then send command to negotiate the queue numer with backend;
when receiving and transmiting packets, negotiated multiple virtio-net queues can serve that; 
To utilize this featrue, the backend also need support mulitiple queues feature and enable it.

Signed-off-by: Ouyang Changchun <changchun.ouyang at intel.com>
---
 lib/librte_pmd_virtio/virtio_ethdev.c | 326 ++++++++++++++++++++++++++++------
 lib/librte_pmd_virtio/virtio_ethdev.h |  10 +-
 lib/librte_pmd_virtio/virtio_pci.h    |   4 +-
 lib/librte_pmd_virtio/virtio_rxtx.c   |  79 +++++---
 lib/librte_pmd_virtio/virtqueue.h     |  61 +++++--
 5 files changed, 388 insertions(+), 92 deletions(-)

diff --git a/lib/librte_pmd_virtio/virtio_ethdev.c b/lib/librte_pmd_virtio/virtio_ethdev.c
index c6a1df5..a3616ea 100644
--- a/lib/librte_pmd_virtio/virtio_ethdev.c
+++ b/lib/librte_pmd_virtio/virtio_ethdev.c
@@ -80,6 +80,9 @@ static void virtio_dev_stats_get(struct rte_eth_dev *dev, struct rte_eth_stats *
 static void virtio_dev_stats_reset(struct rte_eth_dev *dev);
 static void virtio_dev_free_mbufs(struct rte_eth_dev *dev);
 
+static int virtio_dev_queue_stats_mapping_set(__rte_unused struct rte_eth_dev *eth_dev,
+__rte_unused uint16_t queue_id, __rte_unused uint8_t stat_idx, __rte_unused uint8_t is_rx);
+
 /*
  * The set of PCI devices this driver supports
  */
@@ -91,6 +94,130 @@ static struct rte_pci_id pci_id_virtio_map[] = {
 { .vendor_id = 0, /* sentinel */ },
 };
 
+static int
+virtio_send_command(struct virtqueue* vq, struct virtio_pmd_ctrl* ctrl,
+		    int* dlen, int pkt_num)
+{
+	uint32_t head = vq->vq_desc_head_idx, i;
+	int k, sum = 0;
+	virtio_net_ctrl_ack status = ~0;
+	struct virtio_pmd_ctrl result;
+
+	ctrl->status = status;
+
+	if (!vq->hw->cvq) {
+		PMD_INIT_LOG(ERR, "%s(): Control queue is "
+                        "not supported by this device.\n", __func__);
+		return -1;
+	}
+
+	PMD_INIT_LOG(DEBUG, "vq->vq_desc_head_idx = %d, status = %d, vq->hw->cvq = %p \n"
+		"vq = %p \n", vq->vq_desc_head_idx, status, vq->hw->cvq, vq);
+
+	if ((vq->vq_free_cnt < ((uint32_t)pkt_num + 2)) || (pkt_num < 1)) {
+		return -1;
+	}
+
+	memcpy(vq->virtio_net_hdr_mz->addr, ctrl, sizeof(struct virtio_pmd_ctrl));
+
+	/*
+	 * Format is enforced in qemu code:
+	 * One TX packet for header;
+	 * At least one TX packet per argument;
+	 * One RX packet for ACK.
+	 */
+	vq->vq_ring.desc[head].flags = VRING_DESC_F_NEXT;
+	vq->vq_ring.desc[head].addr = vq->virtio_net_hdr_mz->phys_addr;
+	vq->vq_ring.desc[head].len = sizeof(struct virtio_net_ctrl_hdr);
+	vq->vq_free_cnt--;
+	i = vq->vq_ring.desc[head].next;
+
+	for (k = 0; k < pkt_num; k++) {
+		vq->vq_ring.desc[i].flags = VRING_DESC_F_NEXT;
+		vq->vq_ring.desc[i].addr = vq->virtio_net_hdr_mz->phys_addr +
+			sizeof(struct virtio_net_ctrl_hdr) + sizeof(ctrl->status) + sizeof(uint8_t)*sum; 
+		vq->vq_ring.desc[i].len = dlen[k];
+		sum += dlen[k];
+		vq->vq_free_cnt--;
+		i = vq->vq_ring.desc[i].next;
+	}
+
+	vq->vq_ring.desc[i].flags = VRING_DESC_F_WRITE;
+	vq->vq_ring.desc[i].addr = vq->virtio_net_hdr_mz->phys_addr + sizeof(struct virtio_net_ctrl_hdr);
+	vq->vq_ring.desc[i].len = sizeof(ctrl->status);
+	vq->vq_free_cnt--;
+
+	vq->vq_desc_head_idx = vq->vq_ring.desc[i].next;
+
+	vq_update_avail_ring(vq, head);
+	vq_update_avail_idx(vq);
+
+	PMD_INIT_LOG(DEBUG, "vq->vq_queue_index = %d \n", vq->vq_queue_index);
+
+	virtqueue_notify(vq);
+
+	while (vq->vq_used_cons_idx == vq->vq_ring.used->idx) {
+		usleep(100);
+	}
+
+	while (vq->vq_used_cons_idx != vq->vq_ring.used->idx) {
+		uint32_t idx, desc_idx, used_idx;
+		struct vring_used_elem *uep;
+
+		rmb();
+
+		used_idx = (uint32_t)(vq->vq_used_cons_idx & (vq->vq_nentries - 1));
+		uep = &vq->vq_ring.used->ring[used_idx];
+		idx = (uint32_t) uep->id;
+		desc_idx = idx;
+
+		while (vq->vq_ring.desc[desc_idx].flags & VRING_DESC_F_NEXT) {
+			desc_idx = vq->vq_ring.desc[desc_idx].next;
+			vq->vq_free_cnt++;
+		}
+
+		vq->vq_ring.desc[desc_idx].next = vq->vq_desc_head_idx;
+		vq->vq_desc_head_idx = idx;
+
+		vq->vq_used_cons_idx++;
+		vq->vq_free_cnt++;
+	}
+
+	PMD_INIT_LOG(DEBUG, "vq->vq_free_cnt=%d\nvq->vq_desc_head_idx=%d\n",
+			vq->vq_free_cnt, vq->vq_desc_head_idx);
+
+	memcpy(&result, vq->virtio_net_hdr_mz->addr, sizeof(struct virtio_pmd_ctrl));
+
+	return result.status;
+}
+
+static int
+virtio_set_multiple_queues(struct rte_eth_dev *dev, uint16_t nb_queues)
+{
+	struct virtio_hw *hw = VIRTIO_DEV_PRIVATE_TO_HW(dev->data->dev_private);
+	struct virtio_pmd_ctrl ctrl;
+	int dlen[1];
+	int ret;
+
+	ctrl.hdr.class = VIRTIO_NET_CTRL_MQ;
+	ctrl.hdr.cmd = VIRTIO_NET_CTRL_MQ_VQ_PAIRS_SET;
+	memcpy(ctrl.data, &nb_queues, sizeof(uint16_t));
+
+	PMD_INIT_LOG(DEBUG, "ctrl.data=%d\n", *(int*)ctrl.data);
+
+	dlen[0] = sizeof(uint16_t);
+
+	ret = virtio_send_command(hw->cvq, &ctrl, dlen, 1);
+
+	if (ret) {
+		PMD_INIT_LOG(ERR, "Multiqueue configured but send command "
+			  "failed, this is too late now...\n");
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
 int virtio_dev_queue_setup(struct rte_eth_dev *dev,
 			int queue_type,
 			uint16_t queue_idx,
@@ -133,7 +260,7 @@ int virtio_dev_queue_setup(struct rte_eth_dev *dev,
 
 	if (queue_type == VTNET_RQ) {
 		rte_snprintf(vq_name, sizeof(vq_name), "port%d_rvq%d",
-				dev->data->port_id, queue_idx);
+			dev->data->port_id, queue_idx);
 		vq = rte_zmalloc(vq_name, sizeof(struct virtqueue) +
 			vq_size * sizeof(struct vq_desc_extra), CACHE_LINE_SIZE);
 		memcpy(vq->vq_name, vq_name, sizeof(vq->vq_name));
@@ -145,15 +272,16 @@ int virtio_dev_queue_setup(struct rte_eth_dev *dev,
 		memcpy(vq->vq_name, vq_name, sizeof(vq->vq_name));
 	} else if(queue_type == VTNET_CQ) {
 		rte_snprintf(vq_name, sizeof(vq_name), "port%d_cvq",
-				dev->data->port_id);
-		vq = rte_zmalloc(vq_name, sizeof(struct virtqueue),
-			CACHE_LINE_SIZE);
+			dev->data->port_id);
+		vq = rte_zmalloc(vq_name, sizeof(struct virtqueue) +
+			vq_size * sizeof(struct vq_desc_extra),	CACHE_LINE_SIZE);
 		memcpy(vq->vq_name, vq_name, sizeof(vq->vq_name));
 	}
 	if (vq == NULL) {
 		PMD_INIT_LOG(ERR, "%s: Can not allocate virtqueue\n", __func__);
 		return (-ENOMEM); 
 	}
+
 	vq->hw = hw;
 	vq->port_id = dev->data->port_id;
 	vq->queue_id = queue_idx;
@@ -170,11 +298,12 @@ int virtio_dev_queue_setup(struct rte_eth_dev *dev,
 	PMD_INIT_LOG(DEBUG, "vring_size: %d, rounded_vring_size: %d\n", size, vq->vq_ring_size);
 
 	mz = rte_memzone_reserve_aligned(vq_name, vq->vq_ring_size,
-			socket_id, 0, VIRTIO_PCI_VRING_ALIGN);
+		socket_id, 0, VIRTIO_PCI_VRING_ALIGN);
 	if (mz == NULL) {
 		rte_free(vq);
 		return (-ENOMEM);
 	}
+
 	/*
  	* Virtio PCI device VIRTIO_PCI_QUEUE_PF register is 32bit,
  	* and only accepts 32 bit page frame number. 
@@ -185,6 +314,7 @@ int virtio_dev_queue_setup(struct rte_eth_dev *dev,
 		rte_free(vq);
 		return (-ENOMEM);
 	}
+
 	memset(mz->addr, 0, sizeof(mz->len));
 	vq->mz = mz;
 	vq->vq_ring_mem = mz->phys_addr;
@@ -196,8 +326,8 @@ int virtio_dev_queue_setup(struct rte_eth_dev *dev,
 
 	if (queue_type == VTNET_TQ) {
 		/* 
-		* For each xmit packet, allocate a virtio_net_hdr
-		*/
+		 * For each xmit packet, allocate a virtio_net_hdr
+		 */
 		rte_snprintf(vq_name, sizeof(vq_name), "port%d_tvq%d_hdrzone",
 			dev->data->port_id, queue_idx);
 		vq->virtio_net_hdr_mz = rte_memzone_reserve_aligned(vq_name,
@@ -234,8 +364,8 @@ int virtio_dev_queue_setup(struct rte_eth_dev *dev,
 }
 
 static int
-virtio_dev_cq_queue_setup(struct rte_eth_dev *dev,
-		unsigned int socket_id)
+virtio_dev_cq_queue_setup(struct rte_eth_dev *dev, uint16_t vtpci_queue_idx,
+		uint32_t socket_id)
 {
 	struct virtqueue *vq;
 	uint16_t nb_desc = 0;
@@ -244,8 +374,9 @@ virtio_dev_cq_queue_setup(struct rte_eth_dev *dev,
 		VIRTIO_DEV_PRIVATE_TO_HW(dev->data->dev_private);
 
 	PMD_INIT_FUNC_TRACE();
-	ret = virtio_dev_queue_setup(dev, VTNET_CQ, 0, VTNET_SQ_CQ_QUEUE_IDX,
-			nb_desc, socket_id, &vq);
+	ret = virtio_dev_queue_setup(dev, VTNET_CQ, VTNET_SQ_CQ_QUEUE_IDX,
+			vtpci_queue_idx, nb_desc, socket_id, &vq);
+
 	if (ret < 0) {
 		PMD_INIT_LOG(ERR, "control vq initialization failed\n");
 		return ret;
@@ -263,26 +394,26 @@ virtio_dev_close(struct rte_eth_dev *dev)
 	virtio_dev_stop(dev);
 }
 
-
 /*
  * dev_ops for virtio, bare necessities for basic operation
  */
 static struct eth_dev_ops virtio_eth_dev_ops = {
-	.dev_configure         = virtio_dev_configure,
-	.dev_start             = virtio_dev_start,
-	.dev_stop              = virtio_dev_stop,
-	.dev_close             = virtio_dev_close,
-
-	.dev_infos_get         = virtio_dev_info_get,
-	.stats_get             = virtio_dev_stats_get,
-	.stats_reset           = virtio_dev_stats_reset,
-	.link_update           = virtio_dev_link_update,
-	.mac_addr_add          = NULL,
-	.mac_addr_remove       = NULL,
-	.rx_queue_setup        = virtio_dev_rx_queue_setup,
-	.rx_queue_release      = virtio_dev_rx_queue_release,  /* meaningfull only to multiple queue */
-	.tx_queue_setup        = virtio_dev_tx_queue_setup,
-	.tx_queue_release      = virtio_dev_tx_queue_release /* meaningfull only to multiple queue */
+	.dev_configure           = virtio_dev_configure,
+	.dev_start               = virtio_dev_start,
+	.dev_stop                = virtio_dev_stop,
+	.dev_close               = virtio_dev_close,
+
+	.dev_infos_get           = virtio_dev_info_get,
+	.stats_get               = virtio_dev_stats_get,
+	.stats_reset             = virtio_dev_stats_reset,
+	.link_update             = virtio_dev_link_update,
+	.mac_addr_add            = NULL,
+	.mac_addr_remove         = NULL,
+	.rx_queue_setup          = virtio_dev_rx_queue_setup,
+	.rx_queue_release        = virtio_dev_rx_queue_release,  /* meaningfull only to multiple queue */
+	.tx_queue_setup          = virtio_dev_tx_queue_setup,
+	.tx_queue_release        = virtio_dev_tx_queue_release,  /* meaningfull only to multiple queue */
+	.queue_stats_mapping_set = virtio_dev_queue_stats_mapping_set   /* collect stats per queue */
 };
 
 static inline int
@@ -369,7 +500,7 @@ static void
 virtio_negotiate_features(struct virtio_hw *hw)
 {
 	uint32_t guest_features, mask;
-	mask = VIRTIO_NET_F_CTRL_VQ | VIRTIO_NET_F_CTRL_RX | VIRTIO_NET_F_CTRL_VLAN;
+	mask = VIRTIO_NET_F_CTRL_RX | VIRTIO_NET_F_CTRL_VLAN;
 	mask |= VIRTIO_NET_F_CSUM | VIRTIO_NET_F_GUEST_CSUM ;
 
 	/* TSO and LRO are only available when their corresponding
@@ -387,12 +518,15 @@ virtio_negotiate_features(struct virtio_hw *hw)
 
 	/* Prepare guest_features: feature that driver wants to support */
 	guest_features = VTNET_FEATURES & ~mask;
+	PMD_INIT_LOG(DEBUG, "guest_features before negotiate = %x\n", guest_features);
 
 	/* Read device(host) feature bits */
 	hw->host_features = VIRTIO_READ_REG_4(hw, VIRTIO_PCI_HOST_FEATURES);
+	PMD_INIT_LOG(DEBUG, "host_features before negotiate = %x\n", hw->host_features);
 
 	/* Negotiate features: Subset of device feature bits are written back (guest feature bits) */
 	hw->guest_features = vtpci_negotiate_features(hw, guest_features);
+	PMD_INIT_LOG(DEBUG, "features after negotiate = %x\n", hw->guest_features);
 }
 
 #ifdef RTE_EXEC_ENV_LINUXAPP
@@ -500,9 +634,13 @@ static int
 eth_virtio_dev_init(__rte_unused struct eth_driver *eth_drv,
 		struct rte_eth_dev *eth_dev)
 {
+	struct virtio_net_config *config;
+	struct virtio_net_config local_config;
+	uint32_t offset_conf = sizeof(config->mac);
 	struct rte_pci_device *pci_dev;
 	struct virtio_hw *hw =
 		VIRTIO_DEV_PRIVATE_TO_HW(eth_dev->data->dev_private);
+
 	if (RTE_PKTMBUF_HEADROOM < sizeof(struct virtio_net_hdr) ) {
 		PMD_INIT_LOG(ERR, 
 			"MBUF HEADROOM should be enough to hold virtio net hdr\n");
@@ -560,9 +698,6 @@ eth_virtio_dev_init(__rte_unused struct eth_driver *eth_drv,
 #endif
 	hw->io_base = (uint32_t)(uintptr_t)pci_dev->mem_resource[0].addr;
 
-	hw->max_rx_queues = VIRTIO_MAX_RX_QUEUES;
-	hw->max_tx_queues = VIRTIO_MAX_TX_QUEUES;
-
 	/* Reset the device although not necessary at startup */
 	vtpci_reset(hw);
 
@@ -572,6 +707,7 @@ eth_virtio_dev_init(__rte_unused struct eth_driver *eth_drv,
 	/* Tell the host we've known how to drive the device. */
 	vtpci_set_status(hw, VIRTIO_CONFIG_STATUS_DRIVER);
 	virtio_negotiate_features(hw);
+
 	/* Setting up rx_header size for the device */
 	if(vtpci_with_feature(hw, VIRTIO_NET_F_MRG_RXBUF))
 		hw->vtnet_hdr_size = sizeof(struct virtio_net_hdr_mrg_rxbuf);
@@ -586,6 +722,7 @@ eth_virtio_dev_init(__rte_unused struct eth_driver *eth_drv,
 			ETHER_ADDR_LEN);
 		return (-ENOMEM);
 	}
+
 	/* Copy the permanent MAC address to: virtio_hw */
 	virtio_get_hwaddr(hw);
 	ether_addr_copy((struct ether_addr *) hw->mac_addr,
@@ -593,9 +730,46 @@ eth_virtio_dev_init(__rte_unused struct eth_driver *eth_drv,
 	PMD_INIT_LOG(DEBUG, "PORT MAC: %02X:%02X:%02X:%02X:%02X:%02X\n", hw->mac_addr[0],
 			hw->mac_addr[1],hw->mac_addr[2], hw->mac_addr[3], hw->mac_addr[4], hw->mac_addr[5]);
 
-	if(vtpci_with_feature(hw, VIRTIO_NET_F_CTRL_VQ))
-		virtio_dev_cq_queue_setup(eth_dev, SOCKET_ID_ANY);
+	if(vtpci_with_feature(hw, VIRTIO_NET_F_CTRL_VQ)) {
+		config = &local_config;
+
+		if (vtpci_with_feature(hw, VIRTIO_NET_F_STATUS)) {
+			offset_conf += sizeof(config->status);
+		} else {
+			PMD_INIT_LOG(DEBUG, "VIRTIO_NET_F_STATUS is not supported\n");
+			config->status = 0;
+		}
+
+		if (vtpci_with_feature(hw, VIRTIO_NET_F_MQ)) {
+			offset_conf += sizeof(config->max_virtqueue_pairs);
+		} else {
+			PMD_INIT_LOG(DEBUG, "VIRTIO_NET_F_MQ is not supported!!!\n");
+			config->max_virtqueue_pairs = 1;
+		}
+
+		vtpci_read_dev_config(hw, 0, (uint8_t*)config, offset_conf);
+
+		hw->max_rx_queues = (VIRTIO_MAX_RX_QUEUES < config->max_virtqueue_pairs) ?
+					VIRTIO_MAX_RX_QUEUES : config->max_virtqueue_pairs;
+		hw->max_tx_queues = (VIRTIO_MAX_TX_QUEUES < config->max_virtqueue_pairs) ?
+					VIRTIO_MAX_TX_QUEUES : config->max_virtqueue_pairs;
+
+		virtio_dev_cq_queue_setup(eth_dev,config->max_virtqueue_pairs * 2,SOCKET_ID_ANY);
 
+		PMD_INIT_LOG(DEBUG, "config->max_virtqueue_pairs=%d\n", config->max_virtqueue_pairs);
+		PMD_INIT_LOG(DEBUG, "config->status=%d\n", config->status);
+		PMD_INIT_LOG(DEBUG, "PORT MAC: %02X:%02X:%02X:%02X:%02X:%02X\n", config->mac[0],
+			config->mac[1],config->mac[2], config->mac[3], config->mac[4], config->mac[5]);
+	} else {
+		hw->max_rx_queues = 1;
+		hw->max_tx_queues = 1;
+	}
+
+	eth_dev->data->nb_rx_queues = hw->max_rx_queues;
+	eth_dev->data->nb_tx_queues = hw->max_tx_queues;
+
+	PMD_INIT_LOG(DEBUG, "hw->max_rx_queues=%d   hw->max_tx_queues=%d\n",
+			hw->max_rx_queues, hw->max_tx_queues);
 	PMD_INIT_LOG(DEBUG, "port %d vendorID=0x%x deviceID=0x%x",
 			eth_dev->data->port_id, pci_dev->id.vendor_id,
 			pci_dev->id.device_id);
@@ -606,6 +780,7 @@ static struct eth_driver rte_virtio_pmd = {
 	{
 		.name = "rte_virtio_pmd",
 		.id_table = pci_id_virtio_map,
+		.drv_flags = RTE_PCI_DRV_NEED_IGB_UIO,
 	},
 	.eth_dev_init = eth_virtio_dev_init,
 	.dev_private_size = sizeof(struct virtio_adapter),
@@ -651,6 +826,7 @@ virtio_dev_configure(__rte_unused struct rte_eth_dev *dev)
 static int
 virtio_dev_start(struct rte_eth_dev *dev)
 {
+	uint16_t nb_queues, i;
 	uint16_t status;
 	struct virtio_hw *hw =
 		VIRTIO_DEV_PRIVATE_TO_HW(dev->data->dev_private);
@@ -663,12 +839,13 @@ virtio_dev_start(struct rte_eth_dev *dev)
 
 	hw->adapter_stopped = 0;
 
+	virtio_dev_cq_start(dev);
+
 	/* Do final configuration before rx/tx engine starts */
 	virtio_dev_rxtx_start(dev);
 
 	/* Check VIRTIO_NET_F_STATUS for link status*/
 	if(vtpci_with_feature(hw, VIRTIO_NET_F_STATUS)) {
-
 		vtpci_read_dev_config(hw,
 				offsetof(struct virtio_net_config, status),
 				&status, sizeof(status));
@@ -685,34 +862,63 @@ virtio_dev_start(struct rte_eth_dev *dev)
 	 *Otherwise the tap backend might already stop its queue due to fullness.
 	 *vhost backend will have no chance to be waked up
 	 */
-	virtqueue_notify(dev->data->rx_queues[0]);
+	nb_queues = dev->data->nb_rx_queues;
+	if (nb_queues > 1) {
+		if (virtio_set_multiple_queues(dev, nb_queues) != 0)
+			return -EINVAL;
+	}
+
+	PMD_INIT_LOG(DEBUG, "nb_queues=%d\n",nb_queues);
+
+	for(i = 0; i < nb_queues; i++){
+		virtqueue_notify(dev->data->rx_queues[i]);
+	}
+
 	PMD_INIT_LOG(DEBUG, "Notified backend at initialization\n");
+
+	for( i = 0; i < dev->data->nb_rx_queues; i++){
+		VIRTQUEUE_DUMP((struct virtqueue *)dev->data->rx_queues[i]);
+	}
+
+	for(i = 0; i < dev->data->nb_tx_queues; i++){
+		VIRTQUEUE_DUMP((struct virtqueue *)dev->data->tx_queues[i]);
+	}
+
 	return (0);
 }
 
 static void virtio_dev_free_mbufs(struct rte_eth_dev *dev)
 {
 	struct rte_mbuf * buf;
-	int i = 0;
-	PMD_INIT_LOG(DEBUG, "Before freeing rxq used and unused buf \n");
-	VIRTQUEUE_DUMP((struct virtqueue *)dev->data->rx_queues[0]);
-	while( (buf =(struct rte_mbuf *)virtqueue_detatch_unused(dev->data->rx_queues[0])) != NULL) {
-		rte_pktmbuf_free_seg(buf);
-		i++;
-	}
-	PMD_INIT_LOG(DEBUG, "free %d mbufs\n", i);
-	PMD_INIT_LOG(DEBUG, "After freeing rxq used and unused buf\n");
-	VIRTQUEUE_DUMP((struct virtqueue *)dev->data->rx_queues[0]);
-	PMD_INIT_LOG(DEBUG, "Before freeing txq used and unused bufs\n");
-	VIRTQUEUE_DUMP((struct virtqueue *)dev->data->tx_queues[0]);
- 	i = 0;
-	while( (buf = (struct rte_mbuf *)virtqueue_detatch_unused(dev->data->tx_queues[0])) != NULL) {
-		rte_pktmbuf_free_seg(buf);
-		i++;
-	}
-	PMD_INIT_LOG(DEBUG, "free %d mbufs\n", i);
-	PMD_INIT_LOG(DEBUG, "After freeing txq used and unused buf\n");
-	VIRTQUEUE_DUMP((struct virtqueue *)dev->data->tx_queues[0]);
+	int i, mbuf_num = 0;
+	for( i = 0; i < dev->data->nb_rx_queues; i++) {
+		PMD_INIT_LOG(DEBUG, "Before freeing rxq[%d] used and unused buf \n", i);
+		VIRTQUEUE_DUMP((struct virtqueue *)dev->data->rx_queues[i]);
+
+		while( (buf =(struct rte_mbuf *)virtqueue_detatch_unused(dev->data->rx_queues[i])) != NULL) {
+			rte_pktmbuf_free_seg(buf);
+			mbuf_num ++;
+		}
+
+		PMD_INIT_LOG(DEBUG, "free %d mbufs\n", mbuf_num);
+		PMD_INIT_LOG(DEBUG, "After freeing rxq[%d] used and unused buf\n", i);
+		VIRTQUEUE_DUMP((struct virtqueue *)dev->data->rx_queues[i]);
+	}
+
+	for( i = 0; i < dev->data->nb_tx_queues; i++) {
+		PMD_INIT_LOG(DEBUG, "Before freeing txq[%d] used and unused bufs\n", i);
+		VIRTQUEUE_DUMP((struct virtqueue *)dev->data->tx_queues[i]);
+
+		mbuf_num = 0;
+		while( (buf = (struct rte_mbuf *)virtqueue_detatch_unused(dev->data->tx_queues[i])) != NULL) {
+			rte_pktmbuf_free_seg(buf);
+			mbuf_num ++;
+		}
+
+		PMD_INIT_LOG(DEBUG, "free %d mbufs\n", mbuf_num);
+		PMD_INIT_LOG(DEBUG, "After freeing txq[%d] used and unused buf\n", i);
+		VIRTQUEUE_DUMP((struct virtqueue *)dev->data->tx_queues[i]);
+	}
 }
 
 /*
@@ -774,3 +980,13 @@ virtio_dev_info_get(struct rte_eth_dev *dev, struct rte_eth_dev_info *dev_info)
 	dev_info->max_rx_pktlen = VIRTIO_MAX_RX_PKTLEN;
 	dev_info->max_mac_addrs = VIRTIO_MAX_MAC_ADDRS;
 }
+
+/*
+ * It enables testpmd to collect per queue stats.
+ */
+static int
+virtio_dev_queue_stats_mapping_set(__rte_unused struct rte_eth_dev *eth_dev,
+__rte_unused uint16_t queue_id, __rte_unused uint8_t stat_idx, __rte_unused uint8_t is_rx)
+{
+	return 0;
+}
diff --git a/lib/librte_pmd_virtio/virtio_ethdev.h b/lib/librte_pmd_virtio/virtio_ethdev.h
index 80c9d63..afb77c0 100644
--- a/lib/librte_pmd_virtio/virtio_ethdev.h
+++ b/lib/librte_pmd_virtio/virtio_ethdev.h
@@ -49,8 +49,8 @@
 #define PAGE_SIZE 4096
 #endif
 
-#define VIRTIO_MAX_RX_QUEUES 1
-#define VIRTIO_MAX_TX_QUEUES 1
+#define VIRTIO_MAX_RX_QUEUES 128
+#define VIRTIO_MAX_TX_QUEUES 128
 #define VIRTIO_MAX_MAC_ADDRS 1
 #define VIRTIO_MIN_RX_BUFSIZE 64
 #define VIRTIO_MAX_RX_PKTLEN  1518
@@ -59,6 +59,7 @@
 #define VTNET_FEATURES \
     (VIRTIO_NET_F_MAC        | \
      VIRTIO_NET_F_STATUS     | \
+     VIRTIO_NET_F_MQ         | \
      VIRTIO_NET_F_CTRL_VQ    | \
      VIRTIO_NET_F_CTRL_RX    | \
      VIRTIO_NET_F_CTRL_VLAN  | \
@@ -74,6 +75,11 @@
      VIRTIO_RING_F_INDIRECT_DESC)
 
 /*
+ * CQ function prototype
+ */
+void virtio_dev_cq_start(struct rte_eth_dev *dev);
+
+/*
  * RX/TX function prototypes
  */
 void virtio_dev_rxtx_start(struct rte_eth_dev *dev);
diff --git a/lib/librte_pmd_virtio/virtio_pci.h b/lib/librte_pmd_virtio/virtio_pci.h
index f163877..c37a32c 100644
--- a/lib/librte_pmd_virtio/virtio_pci.h
+++ b/lib/librte_pmd_virtio/virtio_pci.h
@@ -192,7 +192,9 @@ struct virtio_net_config {
 	uint8_t    mac[ETHER_ADDR_LEN];
 	/* See VIRTIO_NET_F_STATUS and VIRTIO_NET_S_* above */
 	uint16_t   status;
-};
+	uint16_t   max_virtqueue_pairs;
+}__attribute__((packed));
+
 /* Value indicated in device config */
 #define VIRTIO_PCI_FLAG_MSIX  0x0020
 /*
diff --git a/lib/librte_pmd_virtio/virtio_rxtx.c b/lib/librte_pmd_virtio/virtio_rxtx.c
index fe94a3f..0824f4c 100644
--- a/lib/librte_pmd_virtio/virtio_rxtx.c
+++ b/lib/librte_pmd_virtio/virtio_rxtx.c
@@ -82,14 +82,14 @@ virtio_dev_vring_start(struct rte_eth_dev *dev, struct virtqueue *vq, int queue_
 	PMD_INIT_FUNC_TRACE();
 
 	/*
-	* Reinitialise since virtio port might have been stopped and restarted
-	*/
+	 * Reinitialise since virtio port might have been stopped and restarted
+	 */
 	memset(vq->vq_ring_virt_mem, 0, vq->vq_ring_size);
 	vring_init(vr, size, ring_mem, vq->vq_alignment);
 	vq->vq_used_cons_idx = 0;
 	vq->vq_desc_head_idx = 0;
 	vq->vq_avail_idx = 0;
-	vq->vq_desc_tail_idx = vq->vq_nentries - 1;
+	vq->vq_desc_tail_idx = (uint16_t)(vq->vq_nentries - 1);
 	vq->vq_free_cnt = vq->vq_nentries;
 	memset(vq->vq_descx, 0, sizeof(struct vq_desc_extra) * vq->vq_nentries);
 
@@ -110,55 +110,78 @@ virtio_dev_vring_start(struct rte_eth_dev *dev, struct virtqueue *vq, int queue_
 	/* Only rx virtqueue needs mbufs to be allocated at initialization */
 	if (queue_type == VTNET_RQ) {
 		if (vq->mpool == NULL)
-				rte_exit(EXIT_FAILURE, "Cannot allocate initial mbufs for rx virtqueue\n");
-		 /* Allocate blank mbufs for the each rx descriptor */
+			rte_exit(EXIT_FAILURE, "Cannot allocate initial mbufs for rx virtqueue\n");
+
+		/* Allocate blank mbufs for the each rx descriptor */
 		nbufs = 0;
 		error = ENOSPC;
 		while (!virtqueue_full(vq)) {
 			m = rte_rxmbuf_alloc(vq->mpool);
 			if (m == NULL)
 				break;
+
 			/******************************************
 			*         Enqueue allocated buffers        *
 			*******************************************/
 			error = virtqueue_enqueue_recv_refill(vq, m);
+
 			if (error) {
  				rte_pktmbuf_free_seg(m);
 				break;
 			}
 			nbufs++;
 		}
+
 		vq_update_avail_idx(vq);
+
 		PMD_INIT_LOG(DEBUG, "Allocated %d bufs\n", nbufs);
-		VIRTIO_WRITE_REG_2(vq->hw, VIRTIO_PCI_QUEUE_SEL, VTNET_SQ_RQ_QUEUE_IDX);
+
+		VIRTIO_WRITE_REG_2(vq->hw, VIRTIO_PCI_QUEUE_SEL, vq->vq_queue_index);
+		VIRTIO_WRITE_REG_4(vq->hw, VIRTIO_PCI_QUEUE_PFN,
+			vq->mz->phys_addr >> VIRTIO_PCI_QUEUE_ADDR_SHIFT);
+	} else if(queue_type == VTNET_TQ) {
+		VIRTIO_WRITE_REG_2(vq->hw, VIRTIO_PCI_QUEUE_SEL, vq->vq_queue_index);
 		VIRTIO_WRITE_REG_4(vq->hw, VIRTIO_PCI_QUEUE_PFN,
 			vq->mz->phys_addr >> VIRTIO_PCI_QUEUE_ADDR_SHIFT);
 	} else {
-		VIRTIO_WRITE_REG_2(vq->hw, VIRTIO_PCI_QUEUE_SEL, VTNET_SQ_TQ_QUEUE_IDX);
+		VIRTIO_WRITE_REG_2(vq->hw, VIRTIO_PCI_QUEUE_SEL, vq->vq_queue_index);
 		VIRTIO_WRITE_REG_4(vq->hw, VIRTIO_PCI_QUEUE_PFN,
 			vq->mz->phys_addr >> VIRTIO_PCI_QUEUE_ADDR_SHIFT);
 	}
 }
 
 void
+virtio_dev_cq_start(struct rte_eth_dev *dev)
+{
+	struct virtio_hw *hw = VIRTIO_DEV_PRIVATE_TO_HW(dev->data->dev_private);
+
+	virtio_dev_vring_start(dev, hw->cvq, VTNET_CQ);
+	VIRTQUEUE_DUMP((struct virtqueue *)hw->cvq);
+}
+
+void
 virtio_dev_rxtx_start(struct rte_eth_dev *dev)
 {
 	/*
 	 * Start recieve and transmit vrings
-	 * -	Setup vring structure for all queues
-	 * -	Initialize descriptor for the rx vring
-	 * -	Allocate blank mbufs for the each rx descriptor
-	 *
+	 * Setup vring structure for all queues
+	 * Initialize descriptor for the rx vring
+	 * Allocate blank mbufs for the each rx descriptor
 	 */
+	int i;
 	PMD_INIT_FUNC_TRACE();
 
-	/* Start rx vring: by default we have 1 rx virtqueue. */
-	virtio_dev_vring_start(dev, dev->data->rx_queues[0], VTNET_RQ);
-	VIRTQUEUE_DUMP((struct virtqueue *)dev->data->rx_queues[0]);
+	/* Start rx vring. */
+	for( i = 0; i < dev->data->nb_rx_queues; i++){
+		virtio_dev_vring_start(dev, dev->data->rx_queues[i], VTNET_RQ);
+		VIRTQUEUE_DUMP((struct virtqueue *)dev->data->rx_queues[i]);
+	}
 
-	/* Start tx vring: by default we have 1 tx virtqueue. */
-	virtio_dev_vring_start(dev, dev->data->tx_queues[0], VTNET_TQ);
-	VIRTQUEUE_DUMP((struct virtqueue *)dev->data->tx_queues[0]);
+	/* Start tx vring. */
+	for(i = 0; i < dev->data->nb_tx_queues; i++){
+		virtio_dev_vring_start(dev, dev->data->tx_queues[i], VTNET_TQ);
+		VIRTQUEUE_DUMP((struct virtqueue *)dev->data->tx_queues[i]);
+	}
 }
 
 int
@@ -169,7 +192,7 @@ virtio_dev_rx_queue_setup(struct rte_eth_dev *dev,
 			__rte_unused const struct rte_eth_rxconf *rx_conf,
 			struct rte_mempool *mp)
 {
-	uint8_t vtpci_queue_idx = VTNET_SQ_RQ_QUEUE_IDX;
+	uint8_t vtpci_queue_idx = 2 * queue_idx + VTNET_SQ_RQ_QUEUE_IDX;
 	struct virtqueue *vq;
 	int ret;
 
@@ -180,6 +203,7 @@ virtio_dev_rx_queue_setup(struct rte_eth_dev *dev,
 		PMD_INIT_LOG(ERR, "tvq initialization failed\n");
 		return ret;
 	}
+
 	/* Create mempool for rx mbuf allocation */
 	vq->mpool = mp;
 
@@ -201,7 +225,7 @@ virtio_dev_tx_queue_setup(struct rte_eth_dev *dev,
 			unsigned int socket_id,
 			__rte_unused const struct rte_eth_txconf *tx_conf)
 {
-	uint8_t vtpci_queue_idx = VTNET_SQ_TQ_QUEUE_IDX;
+	uint8_t vtpci_queue_idx = 2 * queue_idx + VTNET_SQ_TQ_QUEUE_IDX;
 	struct virtqueue *vq;
 	int ret;
 
@@ -254,12 +278,16 @@ virtio_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t nb_pkts)
 	num = (uint16_t)(likely(num <= VIRTIO_MBUF_BURST_SZ) ? num : VIRTIO_MBUF_BURST_SZ);
 	if (likely(num > DESC_PER_CACHELINE))
 		num = num - ((rxvq->vq_used_cons_idx + num) % DESC_PER_CACHELINE);
+
 	if(num == 0) return 0;
+
 	num = virtqueue_dequeue_burst_rx(rxvq, rcv_pkts, len, num);
 	PMD_RX_LOG(DEBUG, "used:%d dequeue:%d\n", nb_used, num);
 	for (i = 0; i < num ; i ++) {
 		rxm = rcv_pkts[i];
+
 		PMD_RX_LOG(DEBUG, "packet len:%d\n", len[i]);
+
 		if (unlikely(len[i] < (uint32_t)hw->vtnet_hdr_size + ETHER_HDR_LEN)) {
 			PMD_RX_LOG(ERR, "Packet drop\n");
 			nb_enqueued++;
@@ -267,17 +295,23 @@ virtio_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t nb_pkts)
 			hw->eth_stats.ierrors++;
 			continue;
 		}
+
 		rxm->pkt.in_port = rxvq->port_id;
 		rxm->pkt.data = (char *)rxm->buf_addr + RTE_PKTMBUF_HEADROOM;
 		rxm->pkt.nb_segs = 1;
 		rxm->pkt.next = NULL;
 		rxm->pkt.pkt_len  = (uint32_t)(len[i] - sizeof(struct virtio_net_hdr));
 		rxm->pkt.data_len = (uint16_t)(len[i] - sizeof(struct virtio_net_hdr));
+
 		VIRTIO_DUMP_PACKET(rxm, rxm->pkt.data_len);
+
 		rx_pkts[nb_rx++] = rxm;
 		hw->eth_stats.ibytes += len[i] - sizeof(struct virtio_net_hdr);
+		hw->eth_stats.q_ibytes[rxvq->queue_id] += len[i] - sizeof(struct virtio_net_hdr);
 	}
+
 	hw->eth_stats.ipackets += nb_rx;
+	hw->eth_stats.q_ipackets[rxvq->queue_id] += nb_rx;
 
 	/* Allocate new mbuf for the used descriptor */
 	error = ENOSPC;
@@ -300,8 +334,9 @@ virtio_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t nb_pkts)
 			PMD_RX_LOG(DEBUG, "Notified\n");
 		}
 	}
+
 	vq_update_avail_idx(rxvq);
-	
+
 	return (nb_rx);
 }
 
@@ -332,6 +367,7 @@ virtio_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t nb_pkts)
 			virtqueue_dequeue_pkt_tx(txvq);
 			num--;
 		}
+
 		if(!virtqueue_full(txvq)) {
 			txm = tx_pkts[nb_tx];
 			/* Enqueue Packet buffers */
@@ -347,6 +383,7 @@ virtio_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t nb_pkts)
 			}
 	 		nb_tx++;
 			hw->eth_stats.obytes += txm->pkt.data_len;
+			hw->eth_stats.q_obytes[txvq->queue_id] += txm->pkt.data_len;
 		} else {
 			PMD_TX_LOG(ERR, "No free tx descriptors to transmit\n");
 			break;
@@ -355,10 +392,12 @@ virtio_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t nb_pkts)
 	vq_update_avail_idx(txvq);
 
 	hw->eth_stats.opackets += nb_tx;
+	hw->eth_stats.q_opackets[txvq->queue_id] += nb_tx;
 
 	if(unlikely(virtqueue_kick_prepare(txvq))) {
  		virtqueue_notify(txvq);
 		PMD_TX_LOG(DEBUG, "Notified backend after xmit\n");
 	}
+
 	return (nb_tx);
 }
diff --git a/lib/librte_pmd_virtio/virtqueue.h b/lib/librte_pmd_virtio/virtqueue.h
index b67c223..5ce61eb 100644
--- a/lib/librte_pmd_virtio/virtqueue.h
+++ b/lib/librte_pmd_virtio/virtqueue.h
@@ -103,6 +103,24 @@ enum { VTNET_RQ = 0, VTNET_TQ = 1, VTNET_CQ = 2 };
 #define VIRTIO_NET_CTRL_VLAN_ADD 0
 #define VIRTIO_NET_CTRL_VLAN_DEL 1
 
+struct virtio_net_ctrl_hdr {
+	uint8_t class;
+	uint8_t cmd;
+} __attribute__((packed));
+
+typedef uint8_t virtio_net_ctrl_ack;
+
+#define VIRTIO_NET_OK     0
+#define VIRTIO_NET_ERR    1
+
+#define VIRTIO_MAX_CTRL_DATA 128
+
+struct virtio_pmd_ctrl {
+	struct virtio_net_ctrl_hdr hdr;
+	virtio_net_ctrl_ack status;
+	uint8_t data[VIRTIO_MAX_CTRL_DATA];
+};
+
 struct virtqueue {
 	char        vq_name[VIRTQUEUE_MAX_NAME_SZ];
 	struct virtio_hw         *hw;     /**< virtio_hw structure pointer. */
@@ -142,6 +160,15 @@ struct virtqueue {
 	} vq_descx[0];
 };
 
+/* If multiqueue is provided by host, then we suppport it. */
+#ifndef VIRTIO_NET_F_MQ
+#define VIRTIO_NET_F_MQ 0x400000      /* Device supports Receive Flow Steering */
+#define VIRTIO_NET_CTRL_MQ   4
+#define VIRTIO_NET_CTRL_MQ_VQ_PAIRS_SET        0
+#define VIRTIO_NET_CTRL_MQ_VQ_PAIRS_MIN        1
+#define VIRTIO_NET_CTRL_MQ_VQ_PAIRS_MAX        0x8000
+#endif
+
 /**
  * This is the first element of the scatter-gather list.  If you don't
  * specify GSO or CSUM features, you can simply ignore the header.
@@ -198,15 +225,17 @@ vq_update_avail_idx(struct virtqueue *vq)
 	vq->vq_ring.avail->idx = vq->vq_avail_idx;
 }
 
+
 static inline void __attribute__((always_inline))
 vq_update_avail_ring(struct virtqueue *vq, uint16_t desc_idx)
 {
 	uint16_t avail_idx;
 	/*
 	 * Place the head of the descriptor chain into the next slot and make
-	 * it usable to the host. We wait to inform the host until after the burst 
-	 * is complete to avoid cache alignment issues with descriptors. This 
-	 * also helps to avoid any contention on the available index.
+	 * it usable to the host. The chain is made available now rather than
+	 * deferring to virtqueue_notify() in the hopes that if the host is
+	 * currently running on another CPU, we can keep it processing the new
+	 * descriptor.
 	 */
 	avail_idx = (uint16_t)(vq->vq_avail_idx & (vq->vq_nentries - 1));
 	vq->vq_ring.avail->ring[avail_idx] = desc_idx;
@@ -242,7 +271,7 @@ vq_ring_free_chain(struct virtqueue *vq, uint16_t desc_idx)
 	vq->vq_free_cnt = (uint16_t)(vq->vq_free_cnt + dxp->ndescs);
 	if ((dp->flags & VRING_DESC_F_INDIRECT) == 0) {
 		while (dp->flags & VRING_DESC_F_NEXT) {
-			desc_idx_last = dp->next; 
+			desc_idx_last = dp->next;
 			dp = &vq->vq_ring.desc[dp->next];
 		}
 	}
@@ -259,6 +288,7 @@ vq_ring_free_chain(struct virtqueue *vq, uint16_t desc_idx)
 		dp_tail = &vq->vq_ring.desc[vq->vq_desc_tail_idx];
 		dp_tail->next = desc_idx;
 	}
+
 	vq->vq_desc_tail_idx = desc_idx_last;
 	dp->next = VQ_RING_DESC_CHAIN_END;
 }
@@ -294,7 +324,7 @@ virtqueue_enqueue_recv_refill(struct virtqueue *vq, struct rte_mbuf *cookie)
 	idx = start_dp[idx].next;
 	vq->vq_desc_head_idx = idx;
 	if (vq->vq_desc_head_idx == VQ_RING_DESC_CHAIN_END)
-		vq->vq_desc_tail_idx = idx; 
+		vq->vq_desc_tail_idx = idx;
 	vq->vq_free_cnt = (uint16_t)(vq->vq_free_cnt - needed);
 	vq_update_avail_ring(vq, head_idx);
 
@@ -335,7 +365,7 @@ virtqueue_enqueue_xmit(struct virtqueue *txvq, struct rte_mbuf *cookie)
 	idx = start_dp[idx].next;
 	txvq->vq_desc_head_idx = idx;
 	if (txvq->vq_desc_head_idx == VQ_RING_DESC_CHAIN_END)
-		txvq->vq_desc_tail_idx = idx; 
+		txvq->vq_desc_tail_idx = idx;
 	txvq->vq_free_cnt = (uint16_t)(txvq->vq_free_cnt - needed);
 	vq_update_avail_ring(txvq, head_idx);
 
@@ -357,11 +387,13 @@ virtqueue_dequeue_burst_rx(struct virtqueue *vq, struct rte_mbuf **rx_pkts, uint
 		desc_idx = (uint16_t) uep->id;
 		len[i] = uep->len;
 		cookie = (struct rte_mbuf *)vq->vq_descx[desc_idx].cookie;
+
 		if (unlikely(cookie == NULL)) {
 			PMD_DRV_LOG(ERR, "vring descriptor with no mbuf cookie at %u\n", 
 				vq->vq_used_cons_idx);
 			break;
 		}
+
 		rte_prefetch0(cookie);
 		rte_packet_prefetch(cookie->pkt.data);
 		rx_pkts[i]  = cookie;
@@ -369,22 +401,23 @@ virtqueue_dequeue_burst_rx(struct virtqueue *vq, struct rte_mbuf **rx_pkts, uint
 		vq_ring_free_chain(vq, desc_idx);
 		vq->vq_descx[desc_idx].cookie = NULL;
 	}
+
 	return (i);
 }
 
 static inline uint16_t __attribute__((always_inline))
 virtqueue_dequeue_pkt_tx(struct virtqueue *vq)
 {
-        struct vring_used_elem *uep;
-        uint16_t used_idx, desc_idx;
+	struct vring_used_elem *uep;
+	uint16_t used_idx, desc_idx;
 
-        used_idx = (uint16_t)(vq->vq_used_cons_idx & (vq->vq_nentries - 1));
-        uep = &vq->vq_ring.used->ring[used_idx];
-        desc_idx = (uint16_t) uep->id;
-		vq->vq_used_cons_idx++;
-        vq_ring_free_chain(vq, desc_idx);
+	used_idx = (uint16_t)(vq->vq_used_cons_idx & (vq->vq_nentries - 1));
+	uep = &vq->vq_ring.used->ring[used_idx];
+	desc_idx = (uint16_t) uep->id;
+	vq->vq_used_cons_idx++;
+	vq_ring_free_chain(vq, desc_idx);
 
-        return 0;
+	return 0;
 }
 
 #ifdef  RTE_LIBRTE_VIRTIO_DEBUG_DUMP
-- 
1.9.0



More information about the dev mailing list