[dpdk-dev,11/12] net/vhost: support to run in the secondary process
Checks
Commit Message
Support to run vhost-pmd vdev in the secondary process. We obtain
information, like memory regions, kickfd, callfd, through
primary/secondary communication channel.
And by invoking rte_vhost_set_vring_effective_fd, we can set the
kickfd which can be recognized by the secondary process.
Signed-off-by: Jianfeng Tan <jianfeng.tan@intel.com>
---
drivers/net/vhost/rte_eth_vhost.c | 158 +++++++++++++++++++++++++++++++++++---
1 file changed, 146 insertions(+), 12 deletions(-)
Comments
On Fri, Aug 25, 2017 at 09:40:51AM +0000, Jianfeng Tan wrote:
> static int
> +share_device(int vid)
> +{
> + uint32_t i, vring_num;
> + int len;
> + int fds[8];
> + struct rte_vhost_memory *mem;
> + struct vhost_params *params;
> + struct rte_vhost_vring vring;
> +
> + /* share mem table */
> + if (rte_vhost_get_mem_table(vid, &mem) < 0) {
> + printf("Failed to get mem table\n");
No printf in DPDK lib and pmd, use RTE_LOG instead.
[...]
> +static int
> +eth_dev_vhost_attach(struct rte_vdev_device *dev)
> +{
> + struct rte_eth_dev *eth_dev = NULL;
> + struct rte_eth_dev_data *data = NULL;
> +
> + RTE_LOG(INFO, PMD, "Attach vhost user port\n");
> +
> + /* reserve an ethdev entry */
> + eth_dev = rte_eth_vdev_allocate(dev, sizeof(struct pmd_internal));
> + if (eth_dev == NULL)
> + goto error;
I'd suggest to "return -1" directly here, without introducing goto.
> +
> + eth_dev->dev_ops = &ops;
> +
> + /* finally assign rx and tx ops */
> + eth_dev->rx_pkt_burst = eth_vhost_rx;
> + eth_dev->tx_pkt_burst = eth_vhost_tx;
> +
> + data = eth_dev->data;
> +
> + return data->port_id;
> +
> +error:
> + return -1;
> +}
> +
> static inline int
> open_iface(const char *key __rte_unused, const char *value, void *extra_args)
> {
> @@ -1154,6 +1244,39 @@ open_int(const char *key __rte_unused, const char *value, void *extra_args)
> }
>
> static int
> +vhost_pmd_action(const char *params, int len __rte_unused,
> + int fds[], int fds_num)
> +{
> + int i;
> + void *base_addr;
> + const struct vhost_params *p = (const struct vhost_params *)params;
The cast could be avoided if you define the action prototype with
"const void *params".
> + const struct rte_vhost_mem_region *regions;
> +
> + switch (p->type) {
> + case VHOST_MSG_TYPE_REGIONS:
> + regions = (const void *)p->regions;
Unnecessary cast.
> + for (i = 0; i < fds_num; ++i) {
> + base_addr = mmap(regions[i].mmap_addr,
> + regions[i].mmap_size,
> + PROT_READ | PROT_WRITE,
> + MAP_FIXED | MAP_SHARED, fds[i], 0);
> + if (base_addr != regions[i].mmap_addr) {
> + RTE_LOG(INFO, PMD, "mmap error");
The log level should be error, nothing will work if it happens. Also, the
log message should be more informative.
--yliu
@@ -33,6 +33,7 @@
#include <unistd.h>
#include <pthread.h>
#include <stdbool.h>
+#include <sys/mman.h>
#include <rte_mbuf.h>
#include <rte_ethdev.h>
@@ -46,6 +47,16 @@
#include "rte_eth_vhost.h"
+#define VHOST_MSG_TYPE_REGIONS 1
+#define VHOST_MSG_TYPE_SET_FDS 2
+
+struct vhost_params {
+ int type;
+ int vid;
+ int vring_idx;
+ struct rte_vhost_mem_region regions[0];
+};
+
enum {VIRTIO_RXQ, VIRTIO_TXQ, VIRTIO_QNUM};
#define ETH_VHOST_IFACE_ARG "iface"
@@ -550,6 +561,67 @@ update_queuing_status(struct rte_eth_dev *dev)
}
static int
+share_device(int vid)
+{
+ uint32_t i, vring_num;
+ int len;
+ int fds[8];
+ struct rte_vhost_memory *mem;
+ struct vhost_params *params;
+ struct rte_vhost_vring vring;
+
+ /* share mem table */
+ if (rte_vhost_get_mem_table(vid, &mem) < 0) {
+ printf("Failed to get mem table\n");
+ return 0;
+ }
+ for (i = 0; i < mem->nregions; ++i)
+ fds[i] = mem->regions[i].fd;
+
+ len = sizeof(struct rte_vhost_mem_region) * mem->nregions;
+ params = malloc(sizeof(*params) + len);
+ if (params == NULL) {
+ printf("Failed to allocate memory\n");
+ return -1;
+ }
+
+ params->type = VHOST_MSG_TYPE_REGIONS;
+ params->vid = vid;
+ memcpy(params->regions, mem->regions, len);
+
+ if (rte_eal_primary_secondary_sendmsg("vhost pmd", params,
+ sizeof(*params) + len,
+ fds, mem->nregions) < 0) {
+ printf("Failed to share mem table\n");
+ free(params);
+ return -1;
+ }
+
+ /* share callfd and kickfd */
+ params->type = VHOST_MSG_TYPE_SET_FDS;
+ vring_num = rte_vhost_get_vring_num(vid);
+ for (i = 0; i < vring_num; i++) {
+ if (rte_vhost_get_vhost_vring(vid, i, &vring) < 0) {
+ printf("Failed to get vring, idx = %d\n", i);
+ free(params);
+ return -1;
+ }
+
+ params->vring_idx = i;
+ fds[0] = vring.callfd;
+ fds[1] = vring.kickfd;
+ if (rte_eal_primary_secondary_sendmsg("vhost pmd", params,
+ sizeof(*params),
+ fds, 2) < 0) {
+ printf("Failed to set fds\n");
+ return -1;
+ }
+ }
+
+ return 0;
+}
+
+static int
new_device(int vid)
{
struct rte_eth_dev *eth_dev;
@@ -610,6 +682,8 @@ new_device(int vid)
_rte_eth_dev_callback_process(eth_dev, RTE_ETH_EVENT_INTR_LSC,
NULL, NULL);
+ share_device(vid);
+
return 0;
}
@@ -1025,13 +1099,6 @@ eth_dev_vhost_create(struct rte_vdev_device *dev, char *iface_name,
RTE_LOG(INFO, PMD, "Creating VHOST-USER backend on numa socket %u\n",
numa_node);
- /* now do all data allocation - for eth_dev structure and internal
- * (private) data
- */
- data = rte_zmalloc_socket(name, sizeof(*data), 0, numa_node);
- if (data == NULL)
- goto error;
-
list = rte_zmalloc_socket(name, sizeof(*list), 0, numa_node);
if (list == NULL)
goto error;
@@ -1073,11 +1140,7 @@ eth_dev_vhost_create(struct rte_vdev_device *dev, char *iface_name,
rte_spinlock_init(&vring_state->lock);
vring_states[eth_dev->data->port_id] = vring_state;
- /* We'll replace the 'data' originally allocated by eth_dev. So the
- * vhost PMD resources won't be shared between multi processes.
- */
- rte_memcpy(data, eth_dev->data, sizeof(*data));
- eth_dev->data = data;
+ data = eth_dev->data;
data->nb_rx_queues = queues;
data->nb_tx_queues = queues;
@@ -1125,6 +1188,33 @@ eth_dev_vhost_create(struct rte_vdev_device *dev, char *iface_name,
return -1;
}
+static int
+eth_dev_vhost_attach(struct rte_vdev_device *dev)
+{
+ struct rte_eth_dev *eth_dev = NULL;
+ struct rte_eth_dev_data *data = NULL;
+
+ RTE_LOG(INFO, PMD, "Attach vhost user port\n");
+
+ /* reserve an ethdev entry */
+ eth_dev = rte_eth_vdev_allocate(dev, sizeof(struct pmd_internal));
+ if (eth_dev == NULL)
+ goto error;
+
+ eth_dev->dev_ops = &ops;
+
+ /* finally assign rx and tx ops */
+ eth_dev->rx_pkt_burst = eth_vhost_rx;
+ eth_dev->tx_pkt_burst = eth_vhost_tx;
+
+ data = eth_dev->data;
+
+ return data->port_id;
+
+error:
+ return -1;
+}
+
static inline int
open_iface(const char *key __rte_unused, const char *value, void *extra_args)
{
@@ -1154,6 +1244,39 @@ open_int(const char *key __rte_unused, const char *value, void *extra_args)
}
static int
+vhost_pmd_action(const char *params, int len __rte_unused,
+ int fds[], int fds_num)
+{
+ int i;
+ void *base_addr;
+ const struct vhost_params *p = (const struct vhost_params *)params;
+ const struct rte_vhost_mem_region *regions;
+
+ switch (p->type) {
+ case VHOST_MSG_TYPE_REGIONS:
+ regions = (const void *)p->regions;
+ for (i = 0; i < fds_num; ++i) {
+ base_addr = mmap(regions[i].mmap_addr,
+ regions[i].mmap_size,
+ PROT_READ | PROT_WRITE,
+ MAP_FIXED | MAP_SHARED, fds[i], 0);
+ if (base_addr != regions[i].mmap_addr) {
+ RTE_LOG(INFO, PMD, "mmap error");
+ break;
+ }
+ }
+ break;
+ case VHOST_MSG_TYPE_SET_FDS:
+ rte_vhost_set_vring_effective_fd(p->vid,
+ p->vring_idx,
+ fds[0], fds[1]);
+ break;
+ }
+
+ return 0;
+}
+
+static int
rte_pmd_vhost_probe(struct rte_vdev_device *dev)
{
struct rte_kvargs *kvlist = NULL;
@@ -1167,6 +1290,17 @@ rte_pmd_vhost_probe(struct rte_vdev_device *dev)
RTE_LOG(INFO, PMD, "Initializing pmd_vhost for %s\n",
rte_vdev_device_name(dev));
+ if (rte_eal_primary_secondary_add_action("vhost pmd",
+ vhost_pmd_action) < 0) {
+ RTE_LOG(ERR, PMD, "vhost fails to add action\n");
+ return -1;
+ }
+
+ if (rte_eal_process_type() == RTE_PROC_SECONDARY) {
+ ret = eth_dev_vhost_attach(dev);
+ return (ret >= 0) ? 0 : -1;
+ }
+
kvlist = rte_kvargs_parse(rte_vdev_device_args(dev), valid_arguments);
if (kvlist == NULL)
return -1;