[dpdk-dev] [PATCH v2] virtio: Support multiple queues feature in DPDK based virtio-net frontend
Ouyang Changchun
changchun.ouyang at intel.com
Fri May 23 09:59:27 CEST 2014
This patch supports multiple queues feature in DPDK based virtio-net frontend.
It firstly gets max queue number of virtio-net from virtio PCI configuration and
then send command to negotiate the queue number with backend; When receiving and
transmitting packets, it negotiates multiple virtio-net queues which serve RX/TX;
To utilize this feature, the backend also need support multiple queues feature
and enable it.
It also fixes some patch style issues.
Signed-off-by: Ouyang Changchun <changchun.ouyang at intel.com>
---
lib/librte_pmd_virtio/virtio_ethdev.c | 326 ++++++++++++++++++++++++++++------
lib/librte_pmd_virtio/virtio_ethdev.h | 10 +-
lib/librte_pmd_virtio/virtio_pci.h | 4 +-
lib/librte_pmd_virtio/virtio_rxtx.c | 72 ++++++--
lib/librte_pmd_virtio/virtqueue.h | 60 +++++--
5 files changed, 384 insertions(+), 88 deletions(-)
diff --git a/lib/librte_pmd_virtio/virtio_ethdev.c b/lib/librte_pmd_virtio/virtio_ethdev.c
index 49e236b..79693f4 100644
--- a/lib/librte_pmd_virtio/virtio_ethdev.c
+++ b/lib/librte_pmd_virtio/virtio_ethdev.c
@@ -81,6 +81,9 @@ static void virtio_dev_stats_get(struct rte_eth_dev *dev, struct rte_eth_stats *
static void virtio_dev_stats_reset(struct rte_eth_dev *dev);
static void virtio_dev_free_mbufs(struct rte_eth_dev *dev);
+static int virtio_dev_queue_stats_mapping_set(__rte_unused struct rte_eth_dev *eth_dev,
+__rte_unused uint16_t queue_id, __rte_unused uint8_t stat_idx, __rte_unused uint8_t is_rx);
+
/*
* The set of PCI devices this driver supports
*/
@@ -92,6 +95,130 @@ static struct rte_pci_id pci_id_virtio_map[] = {
{ .vendor_id = 0, /* sentinel */ },
};
+static int
+virtio_send_command(struct virtqueue* vq, struct virtio_pmd_ctrl* ctrl,
+ int* dlen, int pkt_num)
+{
+ uint32_t head = vq->vq_desc_head_idx, i;
+ int k, sum = 0;
+ virtio_net_ctrl_ack status = ~0;
+ struct virtio_pmd_ctrl result;
+
+ ctrl->status = status;
+
+ if (!vq->hw->cvq) {
+ PMD_INIT_LOG(ERR, "%s(): Control queue is "
+ "not supported by this device.\n", __func__);
+ return -1;
+ }
+
+ PMD_INIT_LOG(DEBUG, "vq->vq_desc_head_idx = %d, status = %d, vq->hw->cvq = %p \n"
+ "vq = %p \n", vq->vq_desc_head_idx, status, vq->hw->cvq, vq);
+
+ if ((vq->vq_free_cnt < ((uint32_t)pkt_num + 2)) || (pkt_num < 1)) {
+ return -1;
+ }
+
+ memcpy(vq->virtio_net_hdr_mz->addr, ctrl, sizeof(struct virtio_pmd_ctrl));
+
+ /*
+ * Format is enforced in qemu code:
+ * One TX packet for header;
+ * At least one TX packet per argument;
+ * One RX packet for ACK.
+ */
+ vq->vq_ring.desc[head].flags = VRING_DESC_F_NEXT;
+ vq->vq_ring.desc[head].addr = vq->virtio_net_hdr_mz->phys_addr;
+ vq->vq_ring.desc[head].len = sizeof(struct virtio_net_ctrl_hdr);
+ vq->vq_free_cnt--;
+ i = vq->vq_ring.desc[head].next;
+
+ for (k = 0; k < pkt_num; k++) {
+ vq->vq_ring.desc[i].flags = VRING_DESC_F_NEXT;
+ vq->vq_ring.desc[i].addr = vq->virtio_net_hdr_mz->phys_addr +
+ sizeof(struct virtio_net_ctrl_hdr) + sizeof(ctrl->status) + sizeof(uint8_t)*sum;
+ vq->vq_ring.desc[i].len = dlen[k];
+ sum += dlen[k];
+ vq->vq_free_cnt--;
+ i = vq->vq_ring.desc[i].next;
+ }
+
+ vq->vq_ring.desc[i].flags = VRING_DESC_F_WRITE;
+ vq->vq_ring.desc[i].addr = vq->virtio_net_hdr_mz->phys_addr + sizeof(struct virtio_net_ctrl_hdr);
+ vq->vq_ring.desc[i].len = sizeof(ctrl->status);
+ vq->vq_free_cnt--;
+
+ vq->vq_desc_head_idx = vq->vq_ring.desc[i].next;
+
+ vq_update_avail_ring(vq, head);
+ vq_update_avail_idx(vq);
+
+ PMD_INIT_LOG(DEBUG, "vq->vq_queue_index = %d \n", vq->vq_queue_index);
+
+ virtqueue_notify(vq);
+
+ while (vq->vq_used_cons_idx == vq->vq_ring.used->idx) {
+ usleep(100);
+ }
+
+ while (vq->vq_used_cons_idx != vq->vq_ring.used->idx) {
+ uint32_t idx, desc_idx, used_idx;
+ struct vring_used_elem *uep;
+
+ rmb();
+
+ used_idx = (uint32_t)(vq->vq_used_cons_idx & (vq->vq_nentries - 1));
+ uep = &vq->vq_ring.used->ring[used_idx];
+ idx = (uint32_t) uep->id;
+ desc_idx = idx;
+
+ while (vq->vq_ring.desc[desc_idx].flags & VRING_DESC_F_NEXT) {
+ desc_idx = vq->vq_ring.desc[desc_idx].next;
+ vq->vq_free_cnt++;
+ }
+
+ vq->vq_ring.desc[desc_idx].next = vq->vq_desc_head_idx;
+ vq->vq_desc_head_idx = idx;
+
+ vq->vq_used_cons_idx++;
+ vq->vq_free_cnt++;
+ }
+
+ PMD_INIT_LOG(DEBUG, "vq->vq_free_cnt=%d\nvq->vq_desc_head_idx=%d\n",
+ vq->vq_free_cnt, vq->vq_desc_head_idx);
+
+ memcpy(&result, vq->virtio_net_hdr_mz->addr, sizeof(struct virtio_pmd_ctrl));
+
+ return result.status;
+}
+
+static int
+virtio_set_multiple_queues(struct rte_eth_dev *dev, uint16_t nb_queues)
+{
+ struct virtio_hw *hw = VIRTIO_DEV_PRIVATE_TO_HW(dev->data->dev_private);
+ struct virtio_pmd_ctrl ctrl;
+ int dlen[1];
+ int ret;
+
+ ctrl.hdr.class = VIRTIO_NET_CTRL_MQ;
+ ctrl.hdr.cmd = VIRTIO_NET_CTRL_MQ_VQ_PAIRS_SET;
+ memcpy(ctrl.data, &nb_queues, sizeof(uint16_t));
+
+ PMD_INIT_LOG(DEBUG, "ctrl.data=%d\n", *(int*)ctrl.data);
+
+ dlen[0] = sizeof(uint16_t);
+
+ ret = virtio_send_command(hw->cvq, &ctrl, dlen, 1);
+
+ if (ret) {
+ PMD_INIT_LOG(ERR, "Multiqueue configured but send command "
+ "failed, this is too late now...\n");
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
int virtio_dev_queue_setup(struct rte_eth_dev *dev,
int queue_type,
uint16_t queue_idx,
@@ -134,7 +261,7 @@ int virtio_dev_queue_setup(struct rte_eth_dev *dev,
if (queue_type == VTNET_RQ) {
rte_snprintf(vq_name, sizeof(vq_name), "port%d_rvq%d",
- dev->data->port_id, queue_idx);
+ dev->data->port_id, queue_idx);
vq = rte_zmalloc(vq_name, sizeof(struct virtqueue) +
vq_size * sizeof(struct vq_desc_extra), CACHE_LINE_SIZE);
memcpy(vq->vq_name, vq_name, sizeof(vq->vq_name));
@@ -146,15 +273,16 @@ int virtio_dev_queue_setup(struct rte_eth_dev *dev,
memcpy(vq->vq_name, vq_name, sizeof(vq->vq_name));
} else if(queue_type == VTNET_CQ) {
rte_snprintf(vq_name, sizeof(vq_name), "port%d_cvq",
- dev->data->port_id);
- vq = rte_zmalloc(vq_name, sizeof(struct virtqueue),
- CACHE_LINE_SIZE);
+ dev->data->port_id);
+ vq = rte_zmalloc(vq_name, sizeof(struct virtqueue) +
+ vq_size * sizeof(struct vq_desc_extra), CACHE_LINE_SIZE);
memcpy(vq->vq_name, vq_name, sizeof(vq->vq_name));
}
if (vq == NULL) {
PMD_INIT_LOG(ERR, "%s: Can not allocate virtqueue\n", __func__);
return (-ENOMEM);
}
+
vq->hw = hw;
vq->port_id = dev->data->port_id;
vq->queue_id = queue_idx;
@@ -171,11 +299,12 @@ int virtio_dev_queue_setup(struct rte_eth_dev *dev,
PMD_INIT_LOG(DEBUG, "vring_size: %d, rounded_vring_size: %d\n", size, vq->vq_ring_size);
mz = rte_memzone_reserve_aligned(vq_name, vq->vq_ring_size,
- socket_id, 0, VIRTIO_PCI_VRING_ALIGN);
+ socket_id, 0, VIRTIO_PCI_VRING_ALIGN);
if (mz == NULL) {
rte_free(vq);
return (-ENOMEM);
}
+
/*
* Virtio PCI device VIRTIO_PCI_QUEUE_PF register is 32bit,
* and only accepts 32 bit page frame number.
@@ -186,6 +315,7 @@ int virtio_dev_queue_setup(struct rte_eth_dev *dev,
rte_free(vq);
return (-ENOMEM);
}
+
memset(mz->addr, 0, sizeof(mz->len));
vq->mz = mz;
vq->vq_ring_mem = mz->phys_addr;
@@ -197,8 +327,8 @@ int virtio_dev_queue_setup(struct rte_eth_dev *dev,
if (queue_type == VTNET_TQ) {
/*
- * For each xmit packet, allocate a virtio_net_hdr
- */
+ * For each xmit packet, allocate a virtio_net_hdr
+ */
rte_snprintf(vq_name, sizeof(vq_name), "port%d_tvq%d_hdrzone",
dev->data->port_id, queue_idx);
vq->virtio_net_hdr_mz = rte_memzone_reserve_aligned(vq_name,
@@ -235,8 +365,8 @@ int virtio_dev_queue_setup(struct rte_eth_dev *dev,
}
static int
-virtio_dev_cq_queue_setup(struct rte_eth_dev *dev,
- unsigned int socket_id)
+virtio_dev_cq_queue_setup(struct rte_eth_dev *dev, uint16_t vtpci_queue_idx,
+ uint32_t socket_id)
{
struct virtqueue *vq;
uint16_t nb_desc = 0;
@@ -245,8 +375,9 @@ virtio_dev_cq_queue_setup(struct rte_eth_dev *dev,
VIRTIO_DEV_PRIVATE_TO_HW(dev->data->dev_private);
PMD_INIT_FUNC_TRACE();
- ret = virtio_dev_queue_setup(dev, VTNET_CQ, 0, VTNET_SQ_CQ_QUEUE_IDX,
- nb_desc, socket_id, &vq);
+ ret = virtio_dev_queue_setup(dev, VTNET_CQ, VTNET_SQ_CQ_QUEUE_IDX,
+ vtpci_queue_idx, nb_desc, socket_id, &vq);
+
if (ret < 0) {
PMD_INIT_LOG(ERR, "control vq initialization failed\n");
return ret;
@@ -264,26 +395,26 @@ virtio_dev_close(struct rte_eth_dev *dev)
virtio_dev_stop(dev);
}
-
/*
* dev_ops for virtio, bare necessities for basic operation
*/
static struct eth_dev_ops virtio_eth_dev_ops = {
- .dev_configure = virtio_dev_configure,
- .dev_start = virtio_dev_start,
- .dev_stop = virtio_dev_stop,
- .dev_close = virtio_dev_close,
-
- .dev_infos_get = virtio_dev_info_get,
- .stats_get = virtio_dev_stats_get,
- .stats_reset = virtio_dev_stats_reset,
- .link_update = virtio_dev_link_update,
- .mac_addr_add = NULL,
- .mac_addr_remove = NULL,
- .rx_queue_setup = virtio_dev_rx_queue_setup,
- .rx_queue_release = virtio_dev_rx_queue_release, /* meaningfull only to multiple queue */
- .tx_queue_setup = virtio_dev_tx_queue_setup,
- .tx_queue_release = virtio_dev_tx_queue_release /* meaningfull only to multiple queue */
+ .dev_configure = virtio_dev_configure,
+ .dev_start = virtio_dev_start,
+ .dev_stop = virtio_dev_stop,
+ .dev_close = virtio_dev_close,
+
+ .dev_infos_get = virtio_dev_info_get,
+ .stats_get = virtio_dev_stats_get,
+ .stats_reset = virtio_dev_stats_reset,
+ .link_update = virtio_dev_link_update,
+ .mac_addr_add = NULL,
+ .mac_addr_remove = NULL,
+ .rx_queue_setup = virtio_dev_rx_queue_setup,
+ .rx_queue_release = virtio_dev_rx_queue_release, /* meaningfull only to multiple queue */
+ .tx_queue_setup = virtio_dev_tx_queue_setup,
+ .tx_queue_release = virtio_dev_tx_queue_release, /* meaningfull only to multiple queue */
+ .queue_stats_mapping_set = virtio_dev_queue_stats_mapping_set /* collect stats per queue */
};
static inline int
@@ -370,7 +501,7 @@ static void
virtio_negotiate_features(struct virtio_hw *hw)
{
uint32_t guest_features, mask;
- mask = VIRTIO_NET_F_CTRL_VQ | VIRTIO_NET_F_CTRL_RX | VIRTIO_NET_F_CTRL_VLAN;
+ mask = VIRTIO_NET_F_CTRL_RX | VIRTIO_NET_F_CTRL_VLAN;
mask |= VIRTIO_NET_F_CSUM | VIRTIO_NET_F_GUEST_CSUM ;
/* TSO and LRO are only available when their corresponding
@@ -388,12 +519,15 @@ virtio_negotiate_features(struct virtio_hw *hw)
/* Prepare guest_features: feature that driver wants to support */
guest_features = VTNET_FEATURES & ~mask;
+ PMD_INIT_LOG(DEBUG, "guest_features before negotiate = %x\n", guest_features);
/* Read device(host) feature bits */
hw->host_features = VIRTIO_READ_REG_4(hw, VIRTIO_PCI_HOST_FEATURES);
+ PMD_INIT_LOG(DEBUG, "host_features before negotiate = %x\n", hw->host_features);
/* Negotiate features: Subset of device feature bits are written back (guest feature bits) */
hw->guest_features = vtpci_negotiate_features(hw, guest_features);
+ PMD_INIT_LOG(DEBUG, "features after negotiate = %x\n", hw->guest_features);
}
#ifdef RTE_EXEC_ENV_LINUXAPP
@@ -501,9 +635,13 @@ static int
eth_virtio_dev_init(__rte_unused struct eth_driver *eth_drv,
struct rte_eth_dev *eth_dev)
{
+ struct virtio_net_config *config;
+ struct virtio_net_config local_config;
+ uint32_t offset_conf = sizeof(config->mac);
struct rte_pci_device *pci_dev;
struct virtio_hw *hw =
VIRTIO_DEV_PRIVATE_TO_HW(eth_dev->data->dev_private);
+
if (RTE_PKTMBUF_HEADROOM < sizeof(struct virtio_net_hdr) ) {
PMD_INIT_LOG(ERR,
"MBUF HEADROOM should be enough to hold virtio net hdr\n");
@@ -561,9 +699,6 @@ eth_virtio_dev_init(__rte_unused struct eth_driver *eth_drv,
#endif
hw->io_base = (uint32_t)(uintptr_t)pci_dev->mem_resource[0].addr;
- hw->max_rx_queues = VIRTIO_MAX_RX_QUEUES;
- hw->max_tx_queues = VIRTIO_MAX_TX_QUEUES;
-
/* Reset the device although not necessary at startup */
vtpci_reset(hw);
@@ -573,6 +708,7 @@ eth_virtio_dev_init(__rte_unused struct eth_driver *eth_drv,
/* Tell the host we've known how to drive the device. */
vtpci_set_status(hw, VIRTIO_CONFIG_STATUS_DRIVER);
virtio_negotiate_features(hw);
+
/* Setting up rx_header size for the device */
if(vtpci_with_feature(hw, VIRTIO_NET_F_MRG_RXBUF))
hw->vtnet_hdr_size = sizeof(struct virtio_net_hdr_mrg_rxbuf);
@@ -587,6 +723,7 @@ eth_virtio_dev_init(__rte_unused struct eth_driver *eth_drv,
ETHER_ADDR_LEN);
return (-ENOMEM);
}
+
/* Copy the permanent MAC address to: virtio_hw */
virtio_get_hwaddr(hw);
ether_addr_copy((struct ether_addr *) hw->mac_addr,
@@ -594,9 +731,46 @@ eth_virtio_dev_init(__rte_unused struct eth_driver *eth_drv,
PMD_INIT_LOG(DEBUG, "PORT MAC: %02X:%02X:%02X:%02X:%02X:%02X\n", hw->mac_addr[0],
hw->mac_addr[1],hw->mac_addr[2], hw->mac_addr[3], hw->mac_addr[4], hw->mac_addr[5]);
- if(vtpci_with_feature(hw, VIRTIO_NET_F_CTRL_VQ))
- virtio_dev_cq_queue_setup(eth_dev, SOCKET_ID_ANY);
+ if(vtpci_with_feature(hw, VIRTIO_NET_F_CTRL_VQ)) {
+ config = &local_config;
+
+ if (vtpci_with_feature(hw, VIRTIO_NET_F_STATUS)) {
+ offset_conf += sizeof(config->status);
+ } else {
+ PMD_INIT_LOG(DEBUG, "VIRTIO_NET_F_STATUS is not supported\n");
+ config->status = 0;
+ }
+
+ if (vtpci_with_feature(hw, VIRTIO_NET_F_MQ)) {
+ offset_conf += sizeof(config->max_virtqueue_pairs);
+ } else {
+ PMD_INIT_LOG(DEBUG, "VIRTIO_NET_F_MQ is not supported!!!\n");
+ config->max_virtqueue_pairs = 1;
+ }
+
+ vtpci_read_dev_config(hw, 0, (uint8_t*)config, offset_conf);
+
+ hw->max_rx_queues = (VIRTIO_MAX_RX_QUEUES < config->max_virtqueue_pairs) ?
+ VIRTIO_MAX_RX_QUEUES : config->max_virtqueue_pairs;
+ hw->max_tx_queues = (VIRTIO_MAX_TX_QUEUES < config->max_virtqueue_pairs) ?
+ VIRTIO_MAX_TX_QUEUES : config->max_virtqueue_pairs;
+
+ virtio_dev_cq_queue_setup(eth_dev,config->max_virtqueue_pairs * 2,SOCKET_ID_ANY);
+ PMD_INIT_LOG(DEBUG, "config->max_virtqueue_pairs=%d\n", config->max_virtqueue_pairs);
+ PMD_INIT_LOG(DEBUG, "config->status=%d\n", config->status);
+ PMD_INIT_LOG(DEBUG, "PORT MAC: %02X:%02X:%02X:%02X:%02X:%02X\n", config->mac[0],
+ config->mac[1],config->mac[2], config->mac[3], config->mac[4], config->mac[5]);
+ } else {
+ hw->max_rx_queues = 1;
+ hw->max_tx_queues = 1;
+ }
+
+ eth_dev->data->nb_rx_queues = hw->max_rx_queues;
+ eth_dev->data->nb_tx_queues = hw->max_tx_queues;
+
+ PMD_INIT_LOG(DEBUG, "hw->max_rx_queues=%d hw->max_tx_queues=%d\n",
+ hw->max_rx_queues, hw->max_tx_queues);
PMD_INIT_LOG(DEBUG, "port %d vendorID=0x%x deviceID=0x%x",
eth_dev->data->port_id, pci_dev->id.vendor_id,
pci_dev->id.device_id);
@@ -607,6 +781,7 @@ static struct eth_driver rte_virtio_pmd = {
{
.name = "rte_virtio_pmd",
.id_table = pci_id_virtio_map,
+ .drv_flags = RTE_PCI_DRV_NEED_IGB_UIO,
},
.eth_dev_init = eth_virtio_dev_init,
.dev_private_size = sizeof(struct virtio_adapter),
@@ -652,6 +827,7 @@ virtio_dev_configure(__rte_unused struct rte_eth_dev *dev)
static int
virtio_dev_start(struct rte_eth_dev *dev)
{
+ uint16_t nb_queues, i;
uint16_t status;
struct virtio_hw *hw =
VIRTIO_DEV_PRIVATE_TO_HW(dev->data->dev_private);
@@ -664,12 +840,13 @@ virtio_dev_start(struct rte_eth_dev *dev)
hw->adapter_stopped = 0;
+ virtio_dev_cq_start(dev);
+
/* Do final configuration before rx/tx engine starts */
virtio_dev_rxtx_start(dev);
/* Check VIRTIO_NET_F_STATUS for link status*/
if(vtpci_with_feature(hw, VIRTIO_NET_F_STATUS)) {
-
vtpci_read_dev_config(hw,
offsetof(struct virtio_net_config, status),
&status, sizeof(status));
@@ -686,34 +863,63 @@ virtio_dev_start(struct rte_eth_dev *dev)
*Otherwise the tap backend might already stop its queue due to fullness.
*vhost backend will have no chance to be waked up
*/
- virtqueue_notify(dev->data->rx_queues[0]);
+ nb_queues = dev->data->nb_rx_queues;
+ if (nb_queues > 1) {
+ if (virtio_set_multiple_queues(dev, nb_queues) != 0)
+ return -EINVAL;
+ }
+
+ PMD_INIT_LOG(DEBUG, "nb_queues=%d\n",nb_queues);
+
+ for(i = 0; i < nb_queues; i++){
+ virtqueue_notify(dev->data->rx_queues[i]);
+ }
+
PMD_INIT_LOG(DEBUG, "Notified backend at initialization\n");
+
+ for( i = 0; i < dev->data->nb_rx_queues; i++){
+ VIRTQUEUE_DUMP((struct virtqueue *)dev->data->rx_queues[i]);
+ }
+
+ for(i = 0; i < dev->data->nb_tx_queues; i++){
+ VIRTQUEUE_DUMP((struct virtqueue *)dev->data->tx_queues[i]);
+ }
+
return (0);
}
static void virtio_dev_free_mbufs(struct rte_eth_dev *dev)
{
struct rte_mbuf * buf;
- int i = 0;
- PMD_INIT_LOG(DEBUG, "Before freeing rxq used and unused buf \n");
- VIRTQUEUE_DUMP((struct virtqueue *)dev->data->rx_queues[0]);
- while( (buf =(struct rte_mbuf *)virtqueue_detatch_unused(dev->data->rx_queues[0])) != NULL) {
- rte_pktmbuf_free_seg(buf);
- i++;
- }
- PMD_INIT_LOG(DEBUG, "free %d mbufs\n", i);
- PMD_INIT_LOG(DEBUG, "After freeing rxq used and unused buf\n");
- VIRTQUEUE_DUMP((struct virtqueue *)dev->data->rx_queues[0]);
- PMD_INIT_LOG(DEBUG, "Before freeing txq used and unused bufs\n");
- VIRTQUEUE_DUMP((struct virtqueue *)dev->data->tx_queues[0]);
- i = 0;
- while( (buf = (struct rte_mbuf *)virtqueue_detatch_unused(dev->data->tx_queues[0])) != NULL) {
- rte_pktmbuf_free_seg(buf);
- i++;
- }
- PMD_INIT_LOG(DEBUG, "free %d mbufs\n", i);
- PMD_INIT_LOG(DEBUG, "After freeing txq used and unused buf\n");
- VIRTQUEUE_DUMP((struct virtqueue *)dev->data->tx_queues[0]);
+ int i, mbuf_num = 0;
+ for( i = 0; i < dev->data->nb_rx_queues; i++) {
+ PMD_INIT_LOG(DEBUG, "Before freeing rxq[%d] used and unused buf \n", i);
+ VIRTQUEUE_DUMP((struct virtqueue *)dev->data->rx_queues[i]);
+
+ while( (buf =(struct rte_mbuf *)virtqueue_detatch_unused(dev->data->rx_queues[i])) != NULL) {
+ rte_pktmbuf_free_seg(buf);
+ mbuf_num ++;
+ }
+
+ PMD_INIT_LOG(DEBUG, "free %d mbufs\n", mbuf_num);
+ PMD_INIT_LOG(DEBUG, "After freeing rxq[%d] used and unused buf\n", i);
+ VIRTQUEUE_DUMP((struct virtqueue *)dev->data->rx_queues[i]);
+ }
+
+ for( i = 0; i < dev->data->nb_tx_queues; i++) {
+ PMD_INIT_LOG(DEBUG, "Before freeing txq[%d] used and unused bufs\n", i);
+ VIRTQUEUE_DUMP((struct virtqueue *)dev->data->tx_queues[i]);
+
+ mbuf_num = 0;
+ while( (buf = (struct rte_mbuf *)virtqueue_detatch_unused(dev->data->tx_queues[i])) != NULL) {
+ rte_pktmbuf_free_seg(buf);
+ mbuf_num ++;
+ }
+
+ PMD_INIT_LOG(DEBUG, "free %d mbufs\n", mbuf_num);
+ PMD_INIT_LOG(DEBUG, "After freeing txq[%d] used and unused buf\n", i);
+ VIRTQUEUE_DUMP((struct virtqueue *)dev->data->tx_queues[i]);
+ }
}
/*
@@ -776,6 +982,16 @@ virtio_dev_info_get(struct rte_eth_dev *dev, struct rte_eth_dev_info *dev_info)
dev_info->max_mac_addrs = VIRTIO_MAX_MAC_ADDRS;
}
+/*
+ * It enables testpmd to collect per queue stats.
+ */
+static int
+virtio_dev_queue_stats_mapping_set(__rte_unused struct rte_eth_dev *eth_dev,
+__rte_unused uint16_t queue_id, __rte_unused uint8_t stat_idx, __rte_unused uint8_t is_rx)
+{
+ return 0;
+}
+
static struct rte_driver rte_virtio_driver = {
.type = PMD_PDEV,
.init = rte_virtio_pmd_init,
diff --git a/lib/librte_pmd_virtio/virtio_ethdev.h b/lib/librte_pmd_virtio/virtio_ethdev.h
index 80c9d63..afb77c0 100644
--- a/lib/librte_pmd_virtio/virtio_ethdev.h
+++ b/lib/librte_pmd_virtio/virtio_ethdev.h
@@ -49,8 +49,8 @@
#define PAGE_SIZE 4096
#endif
-#define VIRTIO_MAX_RX_QUEUES 1
-#define VIRTIO_MAX_TX_QUEUES 1
+#define VIRTIO_MAX_RX_QUEUES 128
+#define VIRTIO_MAX_TX_QUEUES 128
#define VIRTIO_MAX_MAC_ADDRS 1
#define VIRTIO_MIN_RX_BUFSIZE 64
#define VIRTIO_MAX_RX_PKTLEN 1518
@@ -59,6 +59,7 @@
#define VTNET_FEATURES \
(VIRTIO_NET_F_MAC | \
VIRTIO_NET_F_STATUS | \
+ VIRTIO_NET_F_MQ | \
VIRTIO_NET_F_CTRL_VQ | \
VIRTIO_NET_F_CTRL_RX | \
VIRTIO_NET_F_CTRL_VLAN | \
@@ -74,6 +75,11 @@
VIRTIO_RING_F_INDIRECT_DESC)
/*
+ * CQ function prototype
+ */
+void virtio_dev_cq_start(struct rte_eth_dev *dev);
+
+/*
* RX/TX function prototypes
*/
void virtio_dev_rxtx_start(struct rte_eth_dev *dev);
diff --git a/lib/librte_pmd_virtio/virtio_pci.h b/lib/librte_pmd_virtio/virtio_pci.h
index f163877..c37a32c 100644
--- a/lib/librte_pmd_virtio/virtio_pci.h
+++ b/lib/librte_pmd_virtio/virtio_pci.h
@@ -192,7 +192,9 @@ struct virtio_net_config {
uint8_t mac[ETHER_ADDR_LEN];
/* See VIRTIO_NET_F_STATUS and VIRTIO_NET_S_* above */
uint16_t status;
-};
+ uint16_t max_virtqueue_pairs;
+}__attribute__((packed));
+
/* Value indicated in device config */
#define VIRTIO_PCI_FLAG_MSIX 0x0020
/*
diff --git a/lib/librte_pmd_virtio/virtio_rxtx.c b/lib/librte_pmd_virtio/virtio_rxtx.c
index 2dd2102..e1153ab 100644
--- a/lib/librte_pmd_virtio/virtio_rxtx.c
+++ b/lib/librte_pmd_virtio/virtio_rxtx.c
@@ -82,14 +82,14 @@ virtio_dev_vring_start(struct rte_eth_dev *dev, struct virtqueue *vq, int queue_
PMD_INIT_FUNC_TRACE();
/*
- * Reinitialise since virtio port might have been stopped and restarted
- */
+ * Reinitialise since virtio port might have been stopped and restarted
+ */
memset(vq->vq_ring_virt_mem, 0, vq->vq_ring_size);
vring_init(vr, size, ring_mem, vq->vq_alignment);
vq->vq_used_cons_idx = 0;
vq->vq_desc_head_idx = 0;
vq->vq_avail_idx = 0;
- vq->vq_desc_tail_idx = vq->vq_nentries - 1;
+ vq->vq_desc_tail_idx = (uint16_t)(vq->vq_nentries - 1);
vq->vq_free_cnt = vq->vq_nentries;
memset(vq->vq_descx, 0, sizeof(struct vq_desc_extra) * vq->vq_nentries);
@@ -110,37 +110,56 @@ virtio_dev_vring_start(struct rte_eth_dev *dev, struct virtqueue *vq, int queue_
/* Only rx virtqueue needs mbufs to be allocated at initialization */
if (queue_type == VTNET_RQ) {
if (vq->mpool == NULL)
- rte_exit(EXIT_FAILURE, "Cannot allocate initial mbufs for rx virtqueue\n");
- /* Allocate blank mbufs for the each rx descriptor */
+ rte_exit(EXIT_FAILURE, "Cannot allocate initial mbufs for rx virtqueue\n");
+
+ /* Allocate blank mbufs for the each rx descriptor */
nbufs = 0;
error = ENOSPC;
while (!virtqueue_full(vq)) {
m = rte_rxmbuf_alloc(vq->mpool);
if (m == NULL)
break;
+
/******************************************
* Enqueue allocated buffers *
*******************************************/
error = virtqueue_enqueue_recv_refill(vq, m);
+
if (error) {
rte_pktmbuf_free_seg(m);
break;
}
nbufs++;
}
+
vq_update_avail_idx(vq);
+
PMD_INIT_LOG(DEBUG, "Allocated %d bufs\n", nbufs);
- VIRTIO_WRITE_REG_2(vq->hw, VIRTIO_PCI_QUEUE_SEL, VTNET_SQ_RQ_QUEUE_IDX);
+
+ VIRTIO_WRITE_REG_2(vq->hw, VIRTIO_PCI_QUEUE_SEL, vq->vq_queue_index);
+ VIRTIO_WRITE_REG_4(vq->hw, VIRTIO_PCI_QUEUE_PFN,
+ vq->mz->phys_addr >> VIRTIO_PCI_QUEUE_ADDR_SHIFT);
+ } else if(queue_type == VTNET_TQ) {
+ VIRTIO_WRITE_REG_2(vq->hw, VIRTIO_PCI_QUEUE_SEL, vq->vq_queue_index);
VIRTIO_WRITE_REG_4(vq->hw, VIRTIO_PCI_QUEUE_PFN,
vq->mz->phys_addr >> VIRTIO_PCI_QUEUE_ADDR_SHIFT);
} else {
- VIRTIO_WRITE_REG_2(vq->hw, VIRTIO_PCI_QUEUE_SEL, VTNET_SQ_TQ_QUEUE_IDX);
+ VIRTIO_WRITE_REG_2(vq->hw, VIRTIO_PCI_QUEUE_SEL, vq->vq_queue_index);
VIRTIO_WRITE_REG_4(vq->hw, VIRTIO_PCI_QUEUE_PFN,
vq->mz->phys_addr >> VIRTIO_PCI_QUEUE_ADDR_SHIFT);
}
}
void
+virtio_dev_cq_start(struct rte_eth_dev *dev)
+{
+ struct virtio_hw *hw = VIRTIO_DEV_PRIVATE_TO_HW(dev->data->dev_private);
+
+ virtio_dev_vring_start(dev, hw->cvq, VTNET_CQ);
+ VIRTQUEUE_DUMP((struct virtqueue *)hw->cvq);
+}
+
+void
virtio_dev_rxtx_start(struct rte_eth_dev *dev)
{
/*
@@ -150,15 +169,20 @@ virtio_dev_rxtx_start(struct rte_eth_dev *dev)
* - Allocate blank mbufs for the each rx descriptor
*
*/
+ int i;
PMD_INIT_FUNC_TRACE();
- /* Start rx vring: by default we have 1 rx virtqueue. */
- virtio_dev_vring_start(dev, dev->data->rx_queues[0], VTNET_RQ);
- VIRTQUEUE_DUMP((struct virtqueue *)dev->data->rx_queues[0]);
+ /* Start rx vring. */
+ for( i = 0; i < dev->data->nb_rx_queues; i++){
+ virtio_dev_vring_start(dev, dev->data->rx_queues[i], VTNET_RQ);
+ VIRTQUEUE_DUMP((struct virtqueue *)dev->data->rx_queues[i]);
+ }
- /* Start tx vring: by default we have 1 tx virtqueue. */
- virtio_dev_vring_start(dev, dev->data->tx_queues[0], VTNET_TQ);
- VIRTQUEUE_DUMP((struct virtqueue *)dev->data->tx_queues[0]);
+ /* Start tx vring. */
+ for(i = 0; i < dev->data->nb_tx_queues; i++){
+ virtio_dev_vring_start(dev, dev->data->tx_queues[i], VTNET_TQ);
+ VIRTQUEUE_DUMP((struct virtqueue *)dev->data->tx_queues[i]);
+ }
}
int
@@ -169,7 +193,7 @@ virtio_dev_rx_queue_setup(struct rte_eth_dev *dev,
__rte_unused const struct rte_eth_rxconf *rx_conf,
struct rte_mempool *mp)
{
- uint8_t vtpci_queue_idx = VTNET_SQ_RQ_QUEUE_IDX;
+ uint8_t vtpci_queue_idx = 2 * queue_idx + VTNET_SQ_RQ_QUEUE_IDX;
struct virtqueue *vq;
int ret;
@@ -180,6 +204,7 @@ virtio_dev_rx_queue_setup(struct rte_eth_dev *dev,
PMD_INIT_LOG(ERR, "tvq initialization failed\n");
return ret;
}
+
/* Create mempool for rx mbuf allocation */
vq->mpool = mp;
@@ -201,7 +226,7 @@ virtio_dev_tx_queue_setup(struct rte_eth_dev *dev,
unsigned int socket_id,
__rte_unused const struct rte_eth_txconf *tx_conf)
{
- uint8_t vtpci_queue_idx = VTNET_SQ_TQ_QUEUE_IDX;
+ uint8_t vtpci_queue_idx = 2 * queue_idx + VTNET_SQ_TQ_QUEUE_IDX;
struct virtqueue *vq;
int ret;
@@ -254,12 +279,16 @@ virtio_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t nb_pkts)
num = (uint16_t)(likely(num <= VIRTIO_MBUF_BURST_SZ) ? num : VIRTIO_MBUF_BURST_SZ);
if (likely(num > DESC_PER_CACHELINE))
num = num - ((rxvq->vq_used_cons_idx + num) % DESC_PER_CACHELINE);
+
if(num == 0) return 0;
+
num = virtqueue_dequeue_burst_rx(rxvq, rcv_pkts, len, num);
PMD_RX_LOG(DEBUG, "used:%d dequeue:%d\n", nb_used, num);
for (i = 0; i < num ; i ++) {
rxm = rcv_pkts[i];
+
PMD_RX_LOG(DEBUG, "packet len:%d\n", len[i]);
+
if (unlikely(len[i] < (uint32_t)hw->vtnet_hdr_size + ETHER_HDR_LEN)) {
PMD_RX_LOG(ERR, "Packet drop\n");
nb_enqueued++;
@@ -267,17 +296,23 @@ virtio_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t nb_pkts)
hw->eth_stats.ierrors++;
continue;
}
+
rxm->pkt.in_port = rxvq->port_id;
rxm->pkt.data = (char *)rxm->buf_addr + RTE_PKTMBUF_HEADROOM;
rxm->pkt.nb_segs = 1;
rxm->pkt.next = NULL;
rxm->pkt.pkt_len = (uint32_t)(len[i] - sizeof(struct virtio_net_hdr));
rxm->pkt.data_len = (uint16_t)(len[i] - sizeof(struct virtio_net_hdr));
+
VIRTIO_DUMP_PACKET(rxm, rxm->pkt.data_len);
+
rx_pkts[nb_rx++] = rxm;
hw->eth_stats.ibytes += len[i] - sizeof(struct virtio_net_hdr);
+ hw->eth_stats.q_ibytes[rxvq->queue_id] += len[i] - sizeof(struct virtio_net_hdr);
}
+
hw->eth_stats.ipackets += nb_rx;
+ hw->eth_stats.q_ipackets[rxvq->queue_id] += nb_rx;
/* Allocate new mbuf for the used descriptor */
error = ENOSPC;
@@ -300,8 +335,9 @@ virtio_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t nb_pkts)
PMD_RX_LOG(DEBUG, "Notified\n");
}
}
+
vq_update_avail_idx(rxvq);
-
+
return (nb_rx);
}
@@ -332,6 +368,7 @@ virtio_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t nb_pkts)
virtqueue_dequeue_pkt_tx(txvq);
num--;
}
+
if(!virtqueue_full(txvq)) {
txm = tx_pkts[nb_tx];
/* Enqueue Packet buffers */
@@ -347,6 +384,7 @@ virtio_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t nb_pkts)
}
nb_tx++;
hw->eth_stats.obytes += txm->pkt.data_len;
+ hw->eth_stats.q_obytes[txvq->queue_id] += txm->pkt.data_len;
} else {
PMD_TX_LOG(ERR, "No free tx descriptors to transmit\n");
break;
@@ -355,10 +393,12 @@ virtio_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t nb_pkts)
vq_update_avail_idx(txvq);
hw->eth_stats.opackets += nb_tx;
+ hw->eth_stats.q_opackets[txvq->queue_id] += nb_tx;
if(unlikely(virtqueue_kick_prepare(txvq))) {
virtqueue_notify(txvq);
PMD_TX_LOG(DEBUG, "Notified backend after xmit\n");
}
+
return (nb_tx);
}
diff --git a/lib/librte_pmd_virtio/virtqueue.h b/lib/librte_pmd_virtio/virtqueue.h
index b67c223..a21fef7 100644
--- a/lib/librte_pmd_virtio/virtqueue.h
+++ b/lib/librte_pmd_virtio/virtqueue.h
@@ -103,6 +103,24 @@ enum { VTNET_RQ = 0, VTNET_TQ = 1, VTNET_CQ = 2 };
#define VIRTIO_NET_CTRL_VLAN_ADD 0
#define VIRTIO_NET_CTRL_VLAN_DEL 1
+struct virtio_net_ctrl_hdr {
+ uint8_t class;
+ uint8_t cmd;
+} __attribute__((packed));
+
+typedef uint8_t virtio_net_ctrl_ack;
+
+#define VIRTIO_NET_OK 0
+#define VIRTIO_NET_ERR 1
+
+#define VIRTIO_MAX_CTRL_DATA 128
+
+struct virtio_pmd_ctrl {
+ struct virtio_net_ctrl_hdr hdr;
+ virtio_net_ctrl_ack status;
+ uint8_t data[VIRTIO_MAX_CTRL_DATA];
+};
+
struct virtqueue {
char vq_name[VIRTQUEUE_MAX_NAME_SZ];
struct virtio_hw *hw; /**< virtio_hw structure pointer. */
@@ -142,6 +160,15 @@ struct virtqueue {
} vq_descx[0];
};
+/* If multiqueue is provided by host, then we suppport it. */
+#ifndef VIRTIO_NET_F_MQ
+#define VIRTIO_NET_F_MQ 0x400000 /* Device supports Receive Flow Steering */
+#define VIRTIO_NET_CTRL_MQ 4
+#define VIRTIO_NET_CTRL_MQ_VQ_PAIRS_SET 0
+#define VIRTIO_NET_CTRL_MQ_VQ_PAIRS_MIN 1
+#define VIRTIO_NET_CTRL_MQ_VQ_PAIRS_MAX 0x8000
+#endif
+
/**
* This is the first element of the scatter-gather list. If you don't
* specify GSO or CSUM features, you can simply ignore the header.
@@ -204,9 +231,10 @@ vq_update_avail_ring(struct virtqueue *vq, uint16_t desc_idx)
uint16_t avail_idx;
/*
* Place the head of the descriptor chain into the next slot and make
- * it usable to the host. We wait to inform the host until after the burst
- * is complete to avoid cache alignment issues with descriptors. This
- * also helps to avoid any contention on the available index.
+ * it usable to the host. The chain is made available now rather than
+ * deferring to virtqueue_notify() in the hopes that if the host is
+ * currently running on another CPU, we can keep it processing the new
+ * descriptor.
*/
avail_idx = (uint16_t)(vq->vq_avail_idx & (vq->vq_nentries - 1));
vq->vq_ring.avail->ring[avail_idx] = desc_idx;
@@ -242,7 +270,7 @@ vq_ring_free_chain(struct virtqueue *vq, uint16_t desc_idx)
vq->vq_free_cnt = (uint16_t)(vq->vq_free_cnt + dxp->ndescs);
if ((dp->flags & VRING_DESC_F_INDIRECT) == 0) {
while (dp->flags & VRING_DESC_F_NEXT) {
- desc_idx_last = dp->next;
+ desc_idx_last = dp->next;
dp = &vq->vq_ring.desc[dp->next];
}
}
@@ -259,6 +287,7 @@ vq_ring_free_chain(struct virtqueue *vq, uint16_t desc_idx)
dp_tail = &vq->vq_ring.desc[vq->vq_desc_tail_idx];
dp_tail->next = desc_idx;
}
+
vq->vq_desc_tail_idx = desc_idx_last;
dp->next = VQ_RING_DESC_CHAIN_END;
}
@@ -294,7 +323,7 @@ virtqueue_enqueue_recv_refill(struct virtqueue *vq, struct rte_mbuf *cookie)
idx = start_dp[idx].next;
vq->vq_desc_head_idx = idx;
if (vq->vq_desc_head_idx == VQ_RING_DESC_CHAIN_END)
- vq->vq_desc_tail_idx = idx;
+ vq->vq_desc_tail_idx = idx;
vq->vq_free_cnt = (uint16_t)(vq->vq_free_cnt - needed);
vq_update_avail_ring(vq, head_idx);
@@ -335,7 +364,7 @@ virtqueue_enqueue_xmit(struct virtqueue *txvq, struct rte_mbuf *cookie)
idx = start_dp[idx].next;
txvq->vq_desc_head_idx = idx;
if (txvq->vq_desc_head_idx == VQ_RING_DESC_CHAIN_END)
- txvq->vq_desc_tail_idx = idx;
+ txvq->vq_desc_tail_idx = idx;
txvq->vq_free_cnt = (uint16_t)(txvq->vq_free_cnt - needed);
vq_update_avail_ring(txvq, head_idx);
@@ -357,11 +386,13 @@ virtqueue_dequeue_burst_rx(struct virtqueue *vq, struct rte_mbuf **rx_pkts, uint
desc_idx = (uint16_t) uep->id;
len[i] = uep->len;
cookie = (struct rte_mbuf *)vq->vq_descx[desc_idx].cookie;
+
if (unlikely(cookie == NULL)) {
PMD_DRV_LOG(ERR, "vring descriptor with no mbuf cookie at %u\n",
vq->vq_used_cons_idx);
break;
}
+
rte_prefetch0(cookie);
rte_packet_prefetch(cookie->pkt.data);
rx_pkts[i] = cookie;
@@ -369,22 +400,23 @@ virtqueue_dequeue_burst_rx(struct virtqueue *vq, struct rte_mbuf **rx_pkts, uint
vq_ring_free_chain(vq, desc_idx);
vq->vq_descx[desc_idx].cookie = NULL;
}
+
return (i);
}
static inline uint16_t __attribute__((always_inline))
virtqueue_dequeue_pkt_tx(struct virtqueue *vq)
{
- struct vring_used_elem *uep;
- uint16_t used_idx, desc_idx;
+ struct vring_used_elem *uep;
+ uint16_t used_idx, desc_idx;
- used_idx = (uint16_t)(vq->vq_used_cons_idx & (vq->vq_nentries - 1));
- uep = &vq->vq_ring.used->ring[used_idx];
- desc_idx = (uint16_t) uep->id;
- vq->vq_used_cons_idx++;
- vq_ring_free_chain(vq, desc_idx);
+ used_idx = (uint16_t)(vq->vq_used_cons_idx & (vq->vq_nentries - 1));
+ uep = &vq->vq_ring.used->ring[used_idx];
+ desc_idx = (uint16_t) uep->id;
+ vq->vq_used_cons_idx++;
+ vq_ring_free_chain(vq, desc_idx);
- return 0;
+ return 0;
}
#ifdef RTE_LIBRTE_VIRTIO_DEBUG_DUMP
--
1.9.0
More information about the dev
mailing list