[dpdk-dev] [PATCH 2/2] hyperv: VMBUS support infrastucture

Shreyansh Jain shreyansh.jain at nxp.com
Thu Dec 15 07:49:44 CET 2016


On Thursday 15 December 2016 05:29 AM, Stephen Hemminger wrote:
> Generalize existing bus support to handle VMBUS in Hyper-V.
> Most of the code is based of existing model for PCI, the difference
> is how bus is represented in sysfs and how addressing works.
>
> This is based on earlier code contributed by Brocade.
> It supports only 4.9 or later versions of the Linux kernel
> at this time (not older kernels or BSD).
>
> Signed-off-by: Stephen Hemminger <sthemmin at microsoft.com>
> ---
>  lib/librte_eal/common/Makefile              |   2 +-
>  lib/librte_eal/common/eal_common_devargs.c  |   7 +
>  lib/librte_eal/common/eal_common_options.c  |  38 ++
>  lib/librte_eal/common/eal_internal_cfg.h    |   3 +-
>  lib/librte_eal/common/eal_options.h         |   6 +
>  lib/librte_eal/common/eal_private.h         |   5 +
>  lib/librte_eal/common/include/rte_devargs.h |   8 +
>  lib/librte_eal/common/include/rte_vmbus.h   | 247 ++++++++
>  lib/librte_eal/linuxapp/eal/Makefile        |   6 +
>  lib/librte_eal/linuxapp/eal/eal.c           |  11 +
>  lib/librte_eal/linuxapp/eal/eal_vmbus.c     | 906 ++++++++++++++++++++++++++++
>  lib/librte_ether/rte_ethdev.c               |  90 +++
>  lib/librte_ether/rte_ethdev.h               |  28 +-
>  mk/rte.app.mk                               |   1 +
>  14 files changed, 1354 insertions(+), 4 deletions(-)
>  create mode 100644 lib/librte_eal/common/include/rte_vmbus.h
>  create mode 100644 lib/librte_eal/linuxapp/eal/eal_vmbus.c
>
> diff --git a/lib/librte_eal/common/Makefile b/lib/librte_eal/common/Makefile
> index a92c984..9254bae 100644
> --- a/lib/librte_eal/common/Makefile
> +++ b/lib/librte_eal/common/Makefile
> @@ -33,7 +33,7 @@ include $(RTE_SDK)/mk/rte.vars.mk
>
>  INC := rte_branch_prediction.h rte_common.h
>  INC += rte_debug.h rte_eal.h rte_errno.h rte_launch.h rte_lcore.h
> -INC += rte_log.h rte_memory.h rte_memzone.h rte_pci.h
> +INC += rte_log.h rte_memory.h rte_memzone.h rte_pci.h rte_vmbus.h
>  INC += rte_per_lcore.h rte_random.h
>  INC += rte_tailq.h rte_interrupts.h rte_alarm.h
>  INC += rte_string_fns.h rte_version.h
> diff --git a/lib/librte_eal/common/eal_common_devargs.c b/lib/librte_eal/common/eal_common_devargs.c
> index e403717..934ca84 100644
> --- a/lib/librte_eal/common/eal_common_devargs.c
> +++ b/lib/librte_eal/common/eal_common_devargs.c
> @@ -113,6 +113,13 @@ rte_eal_devargs_add(enum rte_devtype devtype, const char *devargs_str)
>  			goto fail;
>
>  		break;
> +	case RTE_DEVTYPE_WHITELISTED_VMBUS:
> +	case RTE_DEVTYPE_BLACKLISTED_VMBUS:
> +#ifdef RTE_LIBRTE_HV_PMD
> +		if (uuid_parse(buf, devargs->uuid) == 0)
> +			break;
> +#endif
> +		goto fail;
>  	}
>
>  	free(buf);
> diff --git a/lib/librte_eal/common/eal_common_options.c b/lib/librte_eal/common/eal_common_options.c
> index 6ca8af1..6aea87d 100644
> --- a/lib/librte_eal/common/eal_common_options.c
> +++ b/lib/librte_eal/common/eal_common_options.c
> @@ -95,6 +95,11 @@ eal_long_options[] = {
>  	{OPT_VFIO_INTR,         1, NULL, OPT_VFIO_INTR_NUM        },
>  	{OPT_VMWARE_TSC_MAP,    0, NULL, OPT_VMWARE_TSC_MAP_NUM   },
>  	{OPT_XEN_DOM0,          0, NULL, OPT_XEN_DOM0_NUM         },
> +#ifdef RTE_LIBRTE_HV_PMD
> +	{OPT_NO_VMBUS,          0, NULL, OPT_NO_VMBUS_NUM         },
> +	{OPT_VMBUS_BLACKLIST,   1, NULL, OPT_VMBUS_BLACKLIST_NUM  },
> +	{OPT_VMBUS_WHITELIST,   1, NULL, OPT_VMBUS_WHITELIST_NUM  },
> +#endif
>  	{0,                     0, NULL, 0                        }
>  };
>
> @@ -855,6 +860,21 @@ eal_parse_common_option(int opt, const char *optarg,
>  		conf->no_pci = 1;
>  		break;
>
> +#ifdef RTE_LIBRTE_HV_PMD
> +	case OPT_NO_VMBUS_NUM:
> +		conf->no_vmbus = 1;
> +		break;
> +	case OPT_VMBUS_BLACKLIST_NUM:
> +		if (rte_eal_devargs_add(RTE_DEVTYPE_BLACKLISTED_VMBUS,
> +					optarg) < 0)
> +			return -1;
> +		break;
> +	case OPT_VMBUS_WHITELIST_NUM:
> +		if (rte_eal_devargs_add(RTE_DEVTYPE_WHITELISTED_VMBUS,
> +				optarg) < 0)
> +			return -1;
> +		break;
> +#endif
>  	case OPT_NO_HPET_NUM:
>  		conf->no_hpet = 1;
>  		break;
> @@ -987,6 +1007,14 @@ eal_check_common_options(struct internal_config *internal_cfg)
>  		return -1;
>  	}
>
> +#ifdef RTE_LIBRTE_HV_PMD
> +	if (rte_eal_devargs_type_count(RTE_DEVTYPE_WHITELISTED_VMBUS) != 0 &&
> +		rte_eal_devargs_type_count(RTE_DEVTYPE_BLACKLISTED_VMBUS) != 0) {
> +		RTE_LOG(ERR, EAL, "Options vmbus blacklist and whitelist "
> +			"cannot be used at the same time\n");
> +		return -1;
> +	}
> +#endif
>  	return 0;
>  }
>
> @@ -1036,5 +1064,15 @@ eal_common_usage(void)
>  	       "  --"OPT_NO_PCI"            Disable PCI\n"
>  	       "  --"OPT_NO_HPET"           Disable HPET\n"
>  	       "  --"OPT_NO_SHCONF"         No shared config (mmap'd files)\n"
> +#ifdef RTE_LIBRTE_HV_PMD
> +	       "  --"OPT_NO_VMBUS"          Disable VMBUS\n"
> +	       "  --"OPT_VMBUS_BLACKLIST" Add a VMBUS device to black list.\n"
> +	       "                      Prevent EAL from using this PCI device. The argument\n"
> +	       "                      format is device UUID.\n"
> +	       "  --"OPT_VMBUS_WHITELIST" Add a VMBUS device to white list.\n"
> +	       "                      Only use the specified VMBUS devices. The argument format\n"
> +	       "                      is device UUID This option can be present\n"
> +	       "                      several times (once per device).\n"
> +#endif
>  	       "\n", RTE_MAX_LCORE);
>  }
> diff --git a/lib/librte_eal/common/eal_internal_cfg.h b/lib/librte_eal/common/eal_internal_cfg.h
> index 5f1367e..1827194 100644
> --- a/lib/librte_eal/common/eal_internal_cfg.h
> +++ b/lib/librte_eal/common/eal_internal_cfg.h
> @@ -69,7 +69,8 @@ struct internal_config {
>  	volatile unsigned no_pci;         /**< true to disable PCI */
>  	volatile unsigned no_hpet;        /**< true to disable HPET */
>  	volatile unsigned vmware_tsc_map; /**< true to use VMware TSC mapping
> -										* instead of native TSC */
> +					   * instead of native TSC */
> +	volatile unsigned no_vmbus;       /**< true to disable VMBUS */
>  	volatile unsigned no_shconf;      /**< true if there is no shared config */
>  	volatile unsigned create_uio_dev; /**< true to create /dev/uioX devices */
>  	volatile enum rte_proc_type_t process_type; /**< multi-process proc type */
> diff --git a/lib/librte_eal/common/eal_options.h b/lib/librte_eal/common/eal_options.h
> index a881c62..156727e 100644
> --- a/lib/librte_eal/common/eal_options.h
> +++ b/lib/librte_eal/common/eal_options.h
> @@ -83,6 +83,12 @@ enum {
>  	OPT_VMWARE_TSC_MAP_NUM,
>  #define OPT_XEN_DOM0          "xen-dom0"
>  	OPT_XEN_DOM0_NUM,
> +#define OPT_NO_VMBUS          "no-vmbus"
> +	OPT_NO_VMBUS_NUM,
> +#define OPT_VMBUS_BLACKLIST   "vmbus-blacklist"
> +	OPT_VMBUS_BLACKLIST_NUM,
> +#define OPT_VMBUS_WHITELIST   "vmbus-whitelist"
> +	OPT_VMBUS_WHITELIST_NUM,
>  	OPT_LONG_MAX_NUM
>  };
>
> diff --git a/lib/librte_eal/common/eal_private.h b/lib/librte_eal/common/eal_private.h
> index 9e7d8f6..c856c63 100644
> --- a/lib/librte_eal/common/eal_private.h
> +++ b/lib/librte_eal/common/eal_private.h
> @@ -210,6 +210,11 @@ int pci_uio_map_resource_by_index(struct rte_pci_device *dev, int res_idx,
>  		struct mapped_pci_resource *uio_res, int map_idx);
>
>  /**
> + * VMBUS related functions and structures
> + */
> +int rte_eal_vmbus_init(void);
> +
> +/**
>   * Init tail queues for non-EAL library structures. This is to allow
>   * the rings, mempools, etc. lists to be shared among multiple processes
>   *
> diff --git a/lib/librte_eal/common/include/rte_devargs.h b/lib/librte_eal/common/include/rte_devargs.h
> index 88120a1..c079d28 100644
> --- a/lib/librte_eal/common/include/rte_devargs.h
> +++ b/lib/librte_eal/common/include/rte_devargs.h
> @@ -51,6 +51,9 @@ extern "C" {
>  #include <stdio.h>
>  #include <sys/queue.h>
>  #include <rte_pci.h>
> +#ifdef RTE_LIBRTE_HV_PMD
> +#include <uuid/uuid.h>
> +#endif
>
>  /**
>   * Type of generic device
> @@ -59,6 +62,8 @@ enum rte_devtype {
>  	RTE_DEVTYPE_WHITELISTED_PCI,
>  	RTE_DEVTYPE_BLACKLISTED_PCI,
>  	RTE_DEVTYPE_VIRTUAL,
> +	RTE_DEVTYPE_WHITELISTED_VMBUS,
> +	RTE_DEVTYPE_BLACKLISTED_VMBUS,
>  };
>
>  /**
> @@ -88,6 +93,9 @@ struct rte_devargs {
>  			/** Driver name. */
>  			char drv_name[32];
>  		} virt;
> +#ifdef RTE_LIBRTE_HV_PMD
> +		uuid_t uuid;
> +#endif
>  	};
>  	/** Arguments string as given by user or "" for no argument. */
>  	char *args;
> diff --git a/lib/librte_eal/common/include/rte_vmbus.h b/lib/librte_eal/common/include/rte_vmbus.h
> new file mode 100644
> index 0000000..8540539
> --- /dev/null
> +++ b/lib/librte_eal/common/include/rte_vmbus.h
> @@ -0,0 +1,247 @@
> +/*-
> + *   BSD LICENSE
> + *
> + *   Copyright(c) 2013-2016 Brocade Communications Systems, Inc.
> + *   Copyright(c) 2016 Microsoft Corporation
> + *   All rights reserved.
> + *
> + *   Redistribution and use in source and binary forms, with or without
> + *   modification, are permitted provided that the following conditions
> + *   are met:
> + *
> + *     * Redistributions of source code must retain the above copyright
> + *       notice, this list of conditions and the following disclaimer.
> + *     * Redistributions in binary form must reproduce the above copyright
> + *       notice, this list of conditions and the following disclaimer in
> + *       the documentation and/or other materials provided with the
> + *       distribution.
> + *     * Neither the name of Intel Corporation nor the names of its
> + *       contributors may be used to endorse or promote products derived
> + *       from this software without specific prior written permission.
> + *
> + *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
> + *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
> + *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
> + *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
> + *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
> + *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
> + *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
> + *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
> + *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
> + *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
> + *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
> + *
> + */
> +
> +#ifndef _RTE_VMBUS_H_
> +#define _RTE_VMBUS_H_
> +
> +/**
> + * @file
> + *
> + * RTE VMBUS Interface
> + */
> +#ifdef __cplusplus
> +extern "C" {
> +#endif
> +
> +#include <stdio.h>
> +#include <stdlib.h>
> +#include <limits.h>
> +#include <errno.h>
> +#include <uuid/uuid.h>
> +#include <sys/queue.h>
> +#include <stdint.h>
> +#include <inttypes.h>
> +
> +#include <rte_debug.h>
> +#include <rte_interrupts.h>
> +#include <rte_dev.h>
> +
> +TAILQ_HEAD(vmbus_device_list, rte_vmbus_device);
> +TAILQ_HEAD(vmbus_driver_list, rte_vmbus_driver);
> +
> +extern struct vmbus_driver_list vmbus_driver_list;
> +extern struct vmbus_device_list vmbus_device_list;
> +
> +/** Pathname of VMBUS devices directory. */
> +#define SYSFS_VMBUS_DEVICES "/sys/bus/vmbus/devices"
> +
> +#define UUID_BUF_SZ	(36 + 1)
> +	
> +
> +/** Maximum number of VMBUS resources. */
> +#define VMBUS_MAX_RESOURCE 7
> +
> +/**
> + * A structure describing a VMBUS device.
> + */
> +struct rte_vmbus_device {
> +	TAILQ_ENTRY(rte_vmbus_device) next;     /**< Next probed VMBUS device. */
> +	struct rte_device device;               /**< Inherit core device */
> +	uuid_t device_id;			/**< VMBUS device id */
> +	uuid_t class_id;			/**< VMBUS device type */
> +	uint32_t relid;				/**< VMBUS id for notification */
> +	uint8_t	monitor_id;
> +	struct rte_intr_handle intr_handle;     /**< Interrupt handle */
> +	const struct rte_vmbus_driver *driver;  /**< Associated driver */
> +
> +	struct rte_mem_resource mem_resource[VMBUS_MAX_RESOURCE];
> +						/**< VMBUS Memory Resource */
> +	char sysfs_name[];			/**< Name in sysfs bus directory */
> +};
> +
> +struct rte_vmbus_driver;
> +
> +/**
> + * Initialisation function for the driver called during VMBUS probing.
> + */
> +typedef int (vmbus_probe_t)(struct rte_vmbus_driver *, struct rte_vmbus_device *);
> +
> +/**
> + * Uninitialisation function for the driver called during hotplugging.
> + */
> +typedef int (vmbus_remove_t)(struct rte_vmbus_device *);
> +
> +/**
> + * A structure describing a VMBUS driver.
> + */
> +struct rte_vmbus_driver {
> +	TAILQ_ENTRY(rte_vmbus_driver) next;     /**< Next in list. */
> +	struct rte_driver driver;
> +	vmbus_probe_t *probe;                   /**< Device Probe function. */
> +	vmbus_remove_t *remove;                 /**< Device Remove function. */
> +
> +	const uuid_t *id_table;			/**< ID table, NULL terminated. */
> +};
> +
> +struct vmbus_map {
> +	void *addr;
> +	char *path;
> +	uint64_t offset;
> +	uint64_t size;
> +	uint64_t phaddr;
> +};
> +
> +/*
> + * For multi-process we need to reproduce all vmbus mappings in secondary
> + * processes, so save them in a tailq.
> + */
> +struct mapped_vmbus_resource {
> +	TAILQ_ENTRY(mapped_vmbus_resource) next;
> +
> +	uuid_t uuid;
> +	char path[PATH_MAX];
> +	int nb_maps;
> +	struct vmbus_map maps[VMBUS_MAX_RESOURCE];
> +};
> +
> +TAILQ_HEAD(mapped_vmbus_res_list, mapped_vmbus_resource);
> +
> +/**
> + * Scan the content of the VMBUS bus, and the devices in the devices list
> + *
> + * @return
> + *  0 on success, negative on error
> + */
> +int rte_eal_vmbus_scan(void);
> +
> +/**
> + * Probe the VMBUS bus for registered drivers.
> + *
> + * Scan the content of the VMBUS bus, and call the probe() function for
> + * all registered drivers that have a matching entry in its id_table
> + * for discovered devices.
> + *
> + * @return
> + *   - 0 on success.
> + *   - Negative on error.
> + */
> +int rte_eal_vmbus_probe(void);
> +
> +/**
> + * Map the VMBUS device resources in user space virtual memory address
> + *
> + * @param dev
> + *   A pointer to a rte_vmbus_device structure describing the device
> + *   to use
> + *
> + * @return
> + *   0 on success, negative on error and positive if no driver
> + *   is found for the device.
> + */
> +int rte_eal_vmbus_map_device(struct rte_vmbus_device *dev);
> +
> +/**
> + * Unmap this device
> + *
> + * @param dev
> + *   A pointer to a rte_vmbus_device structure describing the device
> + *   to use
> + */
> +void rte_eal_vmbus_unmap_device(struct rte_vmbus_device *dev);
> +
> +/**
> + * Probe the single VMBUS device.
> + *
> + * Scan the content of the VMBUS bus, and find the vmbus device
> + * specified by device uuid, then call the probe() function for
> + * registered driver that has a matching entry in its id_table for
> + * discovered device.
> + *
> + * @param id
> + * 	The VMBUS device uuid.
> + * @return
> + *   - 0 on success.
> + *   - Negative on error.
> + */
> +int rte_eal_vmbus_probe_one(uuid_t id);
> +
> +/**
> + * Close the single VMBUS device.
> + *
> + * Scan the content of the VMBUS bus, and find the vmbus device id,
> + * then call the remove() function for registered driver that has a
> + * matching entry in its id_table for discovered device.
> + *
> + * @param id
> + * 	The VMBUS device uuid.
> + * @return
> + *   - 0 on success.
> + *   - Negative on error.
> + */
> +int rte_eal_vmbus_detach(uuid_t id);
> +
> +/**
> + * Register a VMBUS driver.
> + *
> + * @param driver
> + *   A pointer to a rte_vmbus_driver structure describing the driver
> + *   to be registered.
> + */
> +void rte_eal_vmbus_register(struct rte_vmbus_driver *driver);
> +
> +/** Helper for VMBUS device registration from driver nstance */
> +#define RTE_PMD_REGISTER_VMBUS(nm, vmbus_drv) \
> +RTE_INIT(vmbusinitfn_ ##nm); \
> +static void vmbusinitfn_ ##nm(void) \
> +{\
> +	(vmbus_drv).driver.name = RTE_STR(nm);\
> +	rte_eal_vmbus_register(&vmbus_drv); \
> +} \
> +RTE_PMD_EXPORT_NAME(nm, __COUNTER__)
> +
> +/**
> + * Unregister a VMBUS driver.
> + *
> + * @param driver
> + *   A pointer to a rte_vmbus_driver structure describing the driver
> + *   to be unregistered.
> + */
> +void rte_eal_vmbus_unregister(struct rte_vmbus_driver *driver);
> +
> +#ifdef __cplusplus
> +}
> +#endif
> +
> +#endif /* _RTE_VMBUS_H_ */
> diff --git a/lib/librte_eal/linuxapp/eal/Makefile b/lib/librte_eal/linuxapp/eal/Makefile
> index 4e206f0..f6ca384 100644
> --- a/lib/librte_eal/linuxapp/eal/Makefile
> +++ b/lib/librte_eal/linuxapp/eal/Makefile
> @@ -71,6 +71,11 @@ SRCS-$(CONFIG_RTE_EXEC_ENV_LINUXAPP) += eal_timer.c
>  SRCS-$(CONFIG_RTE_EXEC_ENV_LINUXAPP) += eal_interrupts.c
>  SRCS-$(CONFIG_RTE_EXEC_ENV_LINUXAPP) += eal_alarm.c
>
> +ifeq ($(CONFIG_RTE_LIBRTE_HV_PMD),y)
> +SRCS-$(CONFIG_RTE_EXEC_ENV_LINUXAPP) += eal_vmbus.c
> +LDLIBS += -luuid
> +endif
> +
>  # from common dir
>  SRCS-$(CONFIG_RTE_EXEC_ENV_LINUXAPP) += eal_common_lcore.c
>  SRCS-$(CONFIG_RTE_EXEC_ENV_LINUXAPP) += eal_common_timer.c
> @@ -114,6 +119,7 @@ CFLAGS_eal_hugepage_info.o := -D_GNU_SOURCE
>  CFLAGS_eal_pci.o := -D_GNU_SOURCE
>  CFLAGS_eal_pci_uio.o := -D_GNU_SOURCE
>  CFLAGS_eal_pci_vfio.o := -D_GNU_SOURCE
> +CFLAGS_eal_vmbux.o := -D_GNU_SOURCE
>  CFLAGS_eal_common_whitelist.o := -D_GNU_SOURCE
>  CFLAGS_eal_common_options.o := -D_GNU_SOURCE
>  CFLAGS_eal_common_thread.o := -D_GNU_SOURCE
> diff --git a/lib/librte_eal/linuxapp/eal/eal.c b/lib/librte_eal/linuxapp/eal/eal.c
> index 2075282..71083ec 100644
> --- a/lib/librte_eal/linuxapp/eal/eal.c
> +++ b/lib/librte_eal/linuxapp/eal/eal.c
> @@ -70,6 +70,7 @@
>  #include <rte_cpuflags.h>
>  #include <rte_interrupts.h>
>  #include <rte_pci.h>
> +#include <rte_vmbus.h>
>  #include <rte_dev.h>
>  #include <rte_devargs.h>
>  #include <rte_common.h>
> @@ -830,6 +831,11 @@ rte_eal_init(int argc, char **argv)
>
>  	eal_check_mem_on_local_socket();
>
> +#ifdef RTE_LIBRTE_HV_PMD
> +	if (rte_eal_vmbus_init() < 0)
> +		RTE_LOG(ERR, EAL, "Cannot init VMBUS\n");
> +#endif
> +
>  	if (eal_plugins_init() < 0)
>  		rte_panic("Cannot init plugins\n");
>
> @@ -887,6 +893,11 @@ rte_eal_init(int argc, char **argv)
>  	if (rte_eal_pci_probe())
>  		rte_panic("Cannot probe PCI\n");
>
> +#ifdef RTE_LIBRTE_HV_PMD
> +	if (rte_eal_vmbus_probe() < 0)
> +		rte_panic("Cannot probe VMBUS\n");
> +#endif
> +
>  	rte_eal_mcfg_complete();
>
>  	return fctret;
> diff --git a/lib/librte_eal/linuxapp/eal/eal_vmbus.c b/lib/librte_eal/linuxapp/eal/eal_vmbus.c
> new file mode 100644
> index 0000000..cbd8bd1
> --- /dev/null
> +++ b/lib/librte_eal/linuxapp/eal/eal_vmbus.c
> @@ -0,0 +1,906 @@
> +/*-
> + *   BSD LICENSE
> + *
> + *   Copyright(c) 2013-2016 Brocade Communications Systems, Inc.
> + *   Copyright(c) 2016 Microsoft Corporation
> + *   All rights reserved.
> + *
> + *   Redistribution and use in source and binary forms, with or without
> + *   modification, are permitted provided that the following conditions
> + *   are met:
> + *
> + *     * Redistributions of source code must retain the above copyright
> + *       notice, this list of conditions and the following disclaimer.
> + *     * Redistributions in binary form must reproduce the above copyright
> + *       notice, this list of conditions and the following disclaimer in
> + *       the documentation and/or other materials provided with the
> + *       distribution.
> + *     * Neither the name of Intel Corporation nor the names of its
> + *       contributors may be used to endorse or promote products derived
> + *       from this software without specific prior written permission.
> + *
> + *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
> + *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
> + *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
> + *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
> + *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
> + *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
> + *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
> + *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
> + *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
> + *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
> + *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
> + *
> + */
> +
> +#include <string.h>
> +#include <unistd.h>
> +#include <dirent.h>
> +#include <fcntl.h>
> +#include <sys/mman.h>
> +
> +#include <rte_eal.h>
> +#include <rte_tailq.h>
> +#include <rte_log.h>
> +#include <rte_devargs.h>
> +#include <rte_vmbus.h>
> +#include <rte_malloc.h>
> +
> +#include "eal_private.h"
> +#include "eal_pci_init.h"
> +#include "eal_filesystem.h"
> +
> +struct vmbus_driver_list vmbus_driver_list =
> +	TAILQ_HEAD_INITIALIZER(vmbus_driver_list);
> +struct vmbus_device_list vmbus_device_list =
> +	TAILQ_HEAD_INITIALIZER(vmbus_device_list);
> +
> +static void *vmbus_map_addr;
> +
> +static struct rte_tailq_elem rte_vmbus_uio_tailq = {
> +	.name = "UIO_RESOURCE_LIST",
> +};
> +EAL_REGISTER_TAILQ(rte_vmbus_uio_tailq);
> +
> +/*
> + * parse a sysfs file containing one integer value
> + * different to the eal version, as it needs to work with 64-bit values
> + */
> +static int
> +vmbus_get_sysfs_uuid(const char *filename, uuid_t uu)
> +{
> +	char buf[BUFSIZ];
> +	char *cp = NULL;
> +	FILE *f;
> +
> +	f = fopen(filename, "r");
> +	if (f == NULL) {
> +		RTE_LOG(ERR, EAL, "%s(): cannot open sysfs value %s\n",
> +				__func__, filename);
> +		return -1;
> +	}
> +
> +	if (fgets(buf, sizeof(buf), f) == NULL) {
> +		RTE_LOG(ERR, EAL, "%s(): cannot read sysfs value %s\n",
> +				__func__, filename);
> +		fclose(f);
> +		return -1;
> +	}
> +	fclose(f);
> +
> +	cp = strchr(cp, '\n');
> +	if (cp)
> +		*cp = '\0';
> +
> +	/* strip { } notation */
> +	if (buf[0] == '{' && (cp = strchr(buf, '}')))
> +		*cp = '\0';
> +
> +	if (uuid_parse(buf, uu) < 0) {
> +		RTE_LOG(ERR, EAL, "%s %s not a valid UUID\n",
> +			filename, buf);
> +		return -1;
> +	}
> +
> +	return 0;
> +}
> +
> +/* map a particular resource from a file */
> +static void *
> +vmbus_map_resource(void *requested_addr, int fd, off_t offset, size_t size,
> +		   int flags)
> +{
> +	void *mapaddr;
> +
> +	/* Map the memory resource of device */
> +	mapaddr = mmap(requested_addr, size, PROT_READ | PROT_WRITE,
> +		       MAP_SHARED | flags, fd, offset);
> +	if (mapaddr == MAP_FAILED ||
> +	    (requested_addr != NULL && mapaddr != requested_addr)) {
> +		RTE_LOG(ERR, EAL,
> +			"%s(): cannot mmap(%d, %p, 0x%lx, 0x%lx): %s)\n",
> +			__func__, fd, requested_addr,
> +			(unsigned long)size, (unsigned long)offset,
> +			strerror(errno));
> +	} else
> +		RTE_LOG(DEBUG, EAL, "  VMBUS memory mapped at %p\n", mapaddr);
> +
> +	return mapaddr;
> +}
> +
> +/* unmap a particular resource */
> +static void
> +vmbus_unmap_resource(void *requested_addr, size_t size)
> +{
> +	if (requested_addr == NULL)
> +		return;
> +
> +	/* Unmap the VMBUS memory resource of device */
> +	if (munmap(requested_addr, size)) {
> +		RTE_LOG(ERR, EAL, "%s(): cannot munmap(%p, 0x%lx): %s\n",
> +			__func__, requested_addr, (unsigned long)size,
> +			strerror(errno));
> +	} else
> +		RTE_LOG(DEBUG, EAL, "  VMBUS memory unmapped at %p\n",
> +				requested_addr);
> +}
> +
> +/* Only supports current kernel version
> + * Unlike PCI there is no option (or need) to create UIO device.
> + */
> +static int vmbus_get_uio_dev(const char *name,
> +			     char *dstbuf, size_t buflen)
> +{
> +	char dirname[PATH_MAX];
> +	unsigned int uio_num;
> +	struct dirent *e;
> +	DIR *dir;
> +
> +	snprintf(dirname, sizeof(dirname),
> +		 "/sys/bus/vmbus/devices/%s/uio", name);
> +
> +	dir = opendir(dirname);
> +	if (dir == NULL) {
> +		RTE_LOG(ERR, EAL, "Cannot map uio resources for %s: %s\n",
> +			name, strerror(errno));
> +		return -1;
> +	}
> +
> +	/* take the first file starting with "uio" */
> +	while ((e = readdir(dir)) != NULL) {
> +		if (sscanf(e->d_name, "uio%u", &uio_num) != 1)
> +			continue;
> +
> +		snprintf(dstbuf, buflen, "%s/uio%u", dirname, uio_num);
> +		break;
> +	}
> +	closedir(dir);
> +
> +	return e ? (int) uio_num : -1;
> +}
> +
> +/*
> + * parse a sysfs file containing one integer value
> + * different to the eal version, as it needs to work with 64-bit values
> + */
> +static int
> +vmbus_parse_sysfs_value(const char *dir, const char *name,
> +			uint64_t *val)
> +{
> +	char filename[PATH_MAX];
> +	FILE *f;
> +	char buf[BUFSIZ];
> +	char *end = NULL;
> +
> +	snprintf(filename, sizeof(filename), "%s/%s", dir, name);
> +	f = fopen(filename, "r");
> +	if (f == NULL) {
> +		RTE_LOG(ERR, EAL, "%s(): cannot open sysfs value %s\n",
> +				__func__, filename);
> +		return -1;
> +	}
> +
> +	if (fgets(buf, sizeof(buf), f) == NULL) {
> +		RTE_LOG(ERR, EAL, "%s(): cannot read sysfs value %s\n",
> +				__func__, filename);
> +		fclose(f);
> +		return -1;
> +	}
> +	fclose(f);
> +
> +	*val = strtoull(buf, &end, 0);
> +	if ((buf[0] == '\0') || (end == NULL) || (*end != '\n')) {
> +		RTE_LOG(ERR, EAL, "%s(): cannot parse sysfs value %s\n",
> +				__func__, filename);
> +		return -1;
> +	}
> +	return 0;
> +}
> +
> +/* Get mappings out of values provided by uio */
> +static int
> +vmbus_uio_get_mappings(const char *uioname,
> +		       struct vmbus_map maps[])
> +{
> +	int i;
> +
> +	for (i = 0; i != VMBUS_MAX_RESOURCE; i++) {
> +		struct vmbus_map *map = &maps[i];
> +		char dirname[PATH_MAX];
> +
> +		/* check if map directory exists */
> +		snprintf(dirname, sizeof(dirname),
> +			 "%s/maps/map%d", uioname, i);
> +
> +		if (access(dirname, F_OK) != 0)
> +			break;
> +
> +		/* get mapping offset */
> +		if (vmbus_parse_sysfs_value(dirname, "offset",
> +					    &map->offset) < 0)
> +			return -1;
> +
> +		/* get mapping size */
> +		if (vmbus_parse_sysfs_value(dirname, "size",
> +					    &map->size) < 0)
> +			return -1;
> +
> +		/* get mapping physical address */
> +		if (vmbus_parse_sysfs_value(dirname, "addr",
> +					    &maps->phaddr) < 0)
> +			return -1;
> +	}
> +
> +	return i;
> +}
> +
> +static void
> +vmbus_uio_free_resource(struct rte_vmbus_device *dev,
> +		struct mapped_vmbus_resource *uio_res)
> +{
> +	rte_free(uio_res);
> +
> +	if (dev->intr_handle.fd) {
> +		close(dev->intr_handle.fd);
> +		dev->intr_handle.fd = -1;
> +		dev->intr_handle.type = RTE_INTR_HANDLE_UNKNOWN;
> +	}
> +}
> +
> +static struct mapped_vmbus_resource *
> +vmbus_uio_alloc_resource(struct rte_vmbus_device *dev)
> +{
> +	struct mapped_vmbus_resource *uio_res;
> +	char dirname[PATH_MAX], devname[PATH_MAX];
> +	int uio_num, nb_maps;
> +
> +	uio_num = vmbus_get_uio_dev(dev->sysfs_name, dirname, sizeof(dirname));
> +	if (uio_num < 0) {
> +		RTE_LOG(WARNING, EAL,
> +			"  %s not managed by UIO driver, skipping\n",
> +			dev->sysfs_name);
> +		return NULL;
> +	}
> +
> +	/* allocate the mapping details for secondary processes*/
> +	uio_res = rte_zmalloc("UIO_RES", sizeof(*uio_res), 0);
> +	if (uio_res == NULL) {
> +		RTE_LOG(ERR, EAL,
> +			"%s(): cannot store uio mmap details\n", __func__);
> +		goto error;
> +	}
> +
> +	snprintf(devname, sizeof(devname), "/dev/uio%u", uio_num);
> +	dev->intr_handle.fd = open(devname, O_RDWR);
> +	if (dev->intr_handle.fd < 0) {
> +		RTE_LOG(ERR, EAL, "Cannot open %s: %s\n",
> +			devname, strerror(errno));
> +		goto error;
> +	}
> +
> +	dev->intr_handle.type = RTE_INTR_HANDLE_UIO_INTX;
> +
> +	snprintf(uio_res->path, sizeof(uio_res->path), "%s", devname);
> +	uuid_copy(uio_res->uuid, dev->device_id);
> +
> +	nb_maps = vmbus_uio_get_mappings(dirname, uio_res->maps);
> +	if (nb_maps < 0)
> +		goto error;
> +
> +	RTE_LOG(DEBUG, EAL, "Found %d memory maps for device %s\n",
> +		nb_maps, dev->sysfs_name);
> +
> +	return uio_res;
> +
> + error:
> +	vmbus_uio_free_resource(dev, uio_res);
> +	return NULL;
> +}
> +
> +static int
> +vmbus_uio_map_resource_by_index(struct rte_vmbus_device *dev,
> +				unsigned int res_idx,
> +				struct mapped_vmbus_resource *uio_res,
> +				unsigned int map_idx)
> +{
> +	struct vmbus_map *maps = uio_res->maps;
> +	char devname[PATH_MAX];
> +	void *mapaddr;
> +	int fd;
> +
> +	snprintf(devname, sizeof(devname),
> +		 "/sys/bus/vmbus/%s/resource%u", dev->sysfs_name, res_idx);
> +
> +	fd = open(devname, O_RDWR);
> +	if (fd < 0) {
> +		RTE_LOG(ERR, EAL, "Cannot open %s: %s\n",
> +				devname, strerror(errno));
> +		return -1;
> +	}
> +
> +	/* allocate memory to keep path */
> +	maps[map_idx].path = rte_malloc(NULL, strlen(devname) + 1, 0);
> +	if (maps[map_idx].path == NULL) {
> +		RTE_LOG(ERR, EAL, "Cannot allocate memory for path: %s\n",
> +				strerror(errno));
> +		return -1;
> +	}
> +
> +	/* try mapping somewhere close to the end of hugepages */
> +	if (vmbus_map_addr == NULL)
> +		vmbus_map_addr = pci_find_max_end_va();
> +
> +	mapaddr = vmbus_map_resource(vmbus_map_addr, fd, 0,
> +				     dev->mem_resource[res_idx].len, 0);
> +	close(fd);
> +	if (mapaddr == MAP_FAILED) {
> +		rte_free(maps[map_idx].path);
> +		return -1;
> +	}
> +
> +	vmbus_map_addr = RTE_PTR_ADD(mapaddr,
> +				     dev->mem_resource[res_idx].len);
> +
> +	maps[map_idx].phaddr = dev->mem_resource[res_idx].phys_addr;
> +	maps[map_idx].size = dev->mem_resource[res_idx].len;
> +	maps[map_idx].addr = mapaddr;
> +	maps[map_idx].offset = 0;
> +	strcpy(maps[map_idx].path, devname);
> +	dev->mem_resource[res_idx].addr = mapaddr;
> +
> +	return 0;
> +}
> +
> +static void
> +vmbus_uio_unmap(struct mapped_vmbus_resource *uio_res)
> +{
> +	int i;
> +
> +	if (uio_res == NULL)
> +		return;
> +
> +	for (i = 0; i != uio_res->nb_maps; i++) {
> +		vmbus_unmap_resource(uio_res->maps[i].addr,
> +				     uio_res->maps[i].size);
> +
> +		if (rte_eal_process_type() == RTE_PROC_PRIMARY)
> +			rte_free(uio_res->maps[i].path);
> +	}
> +}
> +
> +static struct mapped_vmbus_resource *
> +vmbus_uio_find_resource(struct rte_vmbus_device *dev)
> +{
> +	struct mapped_vmbus_resource *uio_res;
> +	struct mapped_vmbus_res_list *uio_res_list =
> +			RTE_TAILQ_CAST(rte_vmbus_uio_tailq.head, mapped_vmbus_res_list);
> +
> +	if (dev == NULL)
> +		return NULL;
> +
> +	TAILQ_FOREACH(uio_res, uio_res_list, next) {
> +		if (uuid_compare(uio_res->uuid, dev->device_id) == 0)
> +			return uio_res;
> +	}
> +	return NULL;
> +}
> +
> +/* unmap the VMBUS resource of a VMBUS device in virtual memory */
> +static void
> +vmbus_uio_unmap_resource(struct rte_vmbus_device *dev)
> +{
> +	struct mapped_vmbus_resource *uio_res;
> +	struct mapped_vmbus_res_list *uio_res_list =
> +			RTE_TAILQ_CAST(rte_vmbus_uio_tailq.head, mapped_vmbus_res_list);
> +
> +	if (dev == NULL)
> +		return;
> +
> +	/* find an entry for the device */
> +	uio_res = vmbus_uio_find_resource(dev);
> +	if (uio_res == NULL)
> +		return;
> +
> +	/* secondary processes - just free maps */
> +	if (rte_eal_process_type() != RTE_PROC_PRIMARY)
> +		return vmbus_uio_unmap(uio_res);
> +
> +	TAILQ_REMOVE(uio_res_list, uio_res, next);
> +
> +	/* unmap all resources */
> +	vmbus_uio_unmap(uio_res);
> +
> +	/* free uio resource */
> +	rte_free(uio_res);
> +
> +	/* close fd if in primary process */
> +	close(dev->intr_handle.fd);
> +	if (dev->intr_handle.uio_cfg_fd >= 0) {
> +		close(dev->intr_handle.uio_cfg_fd);
> +		dev->intr_handle.uio_cfg_fd = -1;
> +	}
> +
> +	dev->intr_handle.fd = -1;
> +	dev->intr_handle.type = RTE_INTR_HANDLE_UNKNOWN;
> +}
> +
> +static int
> +vmbus_uio_map_secondary(struct rte_vmbus_device *dev)
> +{
> +	struct mapped_vmbus_resource *uio_res;
> +	struct mapped_vmbus_res_list *uio_res_list =
> +			RTE_TAILQ_CAST(rte_vmbus_uio_tailq.head,
> +				       mapped_vmbus_res_list);
> +
> +	TAILQ_FOREACH(uio_res, uio_res_list, next) {
> +		int i;
> +
> +		/* skip this element if it doesn't match our id */
> +		if (uuid_compare(uio_res->uuid, dev->device_id))
> +			continue;
> +
> +		for (i = 0; i != uio_res->nb_maps; i++) {
> +			void *mapaddr;
> +			int fd;
> +
> +			fd = open(uio_res->maps[i].path, O_RDWR);
> +			if (fd < 0) {
> +				RTE_LOG(ERR, EAL, "Cannot open %s: %s\n",
> +					uio_res->maps[i].path, strerror(errno));
> +				return -1;
> +			}
> +
> +			mapaddr = vmbus_map_resource(uio_res->maps[i].addr, fd,
> +						     uio_res->maps[i].offset,
> +						     uio_res->maps[i].size, 0);
> +			/* fd is not needed in slave process, close it */
> +			close(fd);
> +
> +			if (mapaddr == uio_res->maps[i].addr)
> +				continue;
> +
> +			RTE_LOG(ERR, EAL,
> +				"Cannot mmap device resource file %s to address: %p\n",
> +				uio_res->maps[i].path,
> +				uio_res->maps[i].addr);
> +
> +			/* unmap addrs correctly mapped */
> +			while (i != 0) {
> +				--i;
> + 				vmbus_unmap_resource(uio_res->maps[i].addr,
> +						     uio_res->maps[i].size);
> +			}
> +			return -1;
> +
> +		}
> +		return 0;
> +	}
> +
> +	RTE_LOG(ERR, EAL, "Cannot find resource for device\n");
> +	return 1;
> +}
> +
> +/* map the resources of a vmbus device in virtual memory */
> +int
> +rte_eal_vmbus_map_device(struct rte_vmbus_device *dev)
> +{
> +	struct mapped_vmbus_resource *uio_res;
> +	struct mapped_vmbus_res_list *uio_res_list =
> +		RTE_TAILQ_CAST(rte_vmbus_uio_tailq.head, mapped_vmbus_res_list);
> +	int i, ret, map_idx = 0;
> +
> +	dev->intr_handle.fd = -1;
> +	dev->intr_handle.uio_cfg_fd = -1;
> +	dev->intr_handle.type = RTE_INTR_HANDLE_UNKNOWN;
> +
> +	/* secondary processes - use already recorded details */
> +	if (rte_eal_process_type() != RTE_PROC_PRIMARY)
> +		return vmbus_uio_map_secondary(dev);
> +
> +	/* allocate uio resource */
> +	uio_res = vmbus_uio_alloc_resource(dev);
> +	if (uio_res == NULL)
> +		return -1;
> +
> +	/* Map all BARs */
> +	for (i = 0; i != VMBUS_MAX_RESOURCE; i++) {
> +		uint64_t phaddr;
> +
> +		/* skip empty BAR */
> +		phaddr = dev->mem_resource[i].phys_addr;
> +		if (phaddr == 0)
> +			continue;
> +
> +		ret = vmbus_uio_map_resource_by_index(dev, i,
> +						      uio_res, map_idx);
> +		if (ret)
> +			goto error;
> +
> +		map_idx++;
> +	}
> +
> +	uio_res->nb_maps = map_idx;
> +
> +	TAILQ_INSERT_TAIL(uio_res_list, uio_res, next);
> +
> +	return 0;
> +error:
> +	for (i = 0; i < map_idx; i++) {
> +		vmbus_unmap_resource(uio_res->maps[i].addr,
> +				     uio_res->maps[i].size);
> +		rte_free(uio_res->maps[i].path);
> +	}
> +	vmbus_uio_free_resource(dev, uio_res);
> +	return -1;
> +}
> +
> +/* Scan one vmbus sysfs entry, and fill the devices list from it. */
> +static int
> +vmbus_scan_one(const char *name)
> +{
> +	struct rte_vmbus_device *dev, *dev2;
> +	char filename[PATH_MAX];
> +	char dirname[PATH_MAX];
> +	unsigned long tmp;
> +
> +	dev = malloc(sizeof(*dev) + strlen(name) + 1);
> +	if (dev == NULL)
> +		return -1;
> +
> +	memset(dev, 0, sizeof(*dev));
> +	strcpy(dev->sysfs_name, name);
> +	if (dev->sysfs_name == NULL)
> +		goto error;
> +
> +	/* sysfs base directory
> +	 *   /sys/bus/vmbus/devices/7a08391f-f5a0-4ac0-9802-d13fd964f8df
> +	 * or on older kernel
> +	 *   /sys/bus/vmbus/devices/vmbus_1
> +	 */
> +	snprintf(dirname, sizeof(dirname), "%s/%s",
> +		 SYSFS_VMBUS_DEVICES, name);
> +
> +	/* get device id */
> +	snprintf(filename, sizeof(filename), "%s/device_id", dirname);
> +	if (vmbus_get_sysfs_uuid(filename, dev->device_id) < 0)
> +		goto error;
> +
> +	/* get device class  */
> +	snprintf(filename, sizeof(filename), "%s/class_id", dirname);
> +	if (vmbus_get_sysfs_uuid(filename, dev->class_id) < 0)
> +		goto error;
> +
> +	/* get relid */
> +	snprintf(filename, sizeof(filename), "%s/id", dirname);
> +	if (eal_parse_sysfs_value(filename, &tmp) < 0)
> +		goto error;
> +	dev->relid = tmp;
> +
> +	/* get monitor id */
> +	snprintf(filename, sizeof(filename), "%s/monitor_id", dirname);
> +	if (eal_parse_sysfs_value(filename, &tmp) < 0)
> +		goto error;
> +	dev->monitor_id = tmp;
> +
> +	/* get numa node */
> +	snprintf(filename, sizeof(filename), "%s/numa_node",
> +		 dirname);
> +	if (eal_parse_sysfs_value(filename, &tmp) < 0)
> +		/* if no NUMA support, set default to 0 */
> +		dev->device.numa_node = 0;
> +	else
> +		dev->device.numa_node = tmp;
> +
> +	/* device is valid, add in list (sorted) */
> +	RTE_LOG(DEBUG, EAL, "Adding vmbus device %s\n", name);
> +
> +	TAILQ_FOREACH(dev2, &vmbus_device_list, next) {
> +		int ret;
> +
> +		ret = uuid_compare(dev->device_id, dev->device_id);
> +		if (ret > 0)
> +			continue;
> +
> +		if (ret < 0) {
> +			TAILQ_INSERT_BEFORE(dev2, dev, next);
> +			rte_eal_device_insert(&dev->device);
> +		} else { /* already registered */
> +			memmove(dev2->mem_resource, dev->mem_resource,
> +				sizeof(dev->mem_resource));
> +			free(dev);
> +		}
> +		return 0;
> +	}
> +
> +	rte_eal_device_insert(&dev->device);
> +	TAILQ_INSERT_TAIL(&vmbus_device_list, dev, next);
> +
> +	return 0;
> +error:
> +	free(dev);
> +	return -1;
> +}
> +
> +/*
> + * Scan the content of the vmbus, and the devices in the devices list
> + */
> +static int
> +vmbus_scan(void)
> +{
> +	struct dirent *e;
> +	DIR *dir;
> +
> +	dir = opendir(SYSFS_VMBUS_DEVICES);
> +	if (dir == NULL) {
> +		if (errno == ENOENT)
> +			return 0;
> +		else {
> +			RTE_LOG(ERR, EAL, "%s(): opendir failed: %s\n",
> +					__func__, strerror(errno));
> +			return -1;
> +		}
> +	}
> +
> +	while ((e = readdir(dir)) != NULL) {
> +		if (e->d_name[0] == '.')
> +			continue;
> +
> +		if (vmbus_scan_one(e->d_name) < 0)
> +			goto error;
> +	}
> +	closedir(dir);
> +	return 0;
> +
> +error:
> +	closedir(dir);
> +	return -1;
> +}
> +
> +/* Init the VMBUS EAL subsystem */
> +int rte_eal_vmbus_init(void)
> +{
> +	/* VMBUS can be disabled */
> +	if (internal_config.no_vmbus)
> +		return 0;
> +
> +	if (vmbus_scan() < 0) {
> +		RTE_LOG(ERR, EAL, "%s(): Cannot scan vmbus\n", __func__);
> +		return -1;
> +	}
> +	return 0;
> +}
> +
> +/* Below is PROBE part of eal_vmbus library */
> +
> +/*
> + * If device ID match, call the devinit() function of the driver.
> + */
> +static int
> +rte_eal_vmbus_probe_one_driver(struct rte_vmbus_driver *dr,
> +			       struct rte_vmbus_device *dev)
> +{
> +	const uuid_t *id_table;
> +
> +	RTE_LOG(DEBUG, EAL, "  probe driver: %s\n", dr->driver.name);
> +
> +	for (id_table = dr->id_table; !uuid_is_null(*id_table); ++id_table) {
> +		struct rte_devargs *args;
> +		char guid[UUID_BUF_SZ];
> +		int ret;
> +
> +		/* skip devices not assocaited with this device class */
> +		if (uuid_compare(*id_table, dev->class_id) != 0)
> +			continue;
> +
> +		uuid_unparse(dev->device_id, guid);
> +		RTE_LOG(INFO, EAL, "VMBUS device %s on NUMA socket %i\n",
> +			guid, dev->device.numa_node);
> +
> +		/* no initialization when blacklisted, return without error */
> +		args = dev->device.devargs;
> +		if (args && args->type == RTE_DEVTYPE_BLACKLISTED_VMBUS) {
> +			RTE_LOG(INFO, EAL, "  Device is blacklisted, not initializing\n");
> +			return 1;
> +		}
> +
> +		RTE_LOG(INFO, EAL, "  probe driver: %s\n", dr->driver.name);
> +
> +		/* map resources for device */
> +		ret = rte_eal_vmbus_map_device(dev);
> +		if (ret != 0)
> +			return ret;
> +
> +		/* reference driver structure */
> +		dev->driver = dr;
> +
> +		/* call the driver probe() function */
> +		ret = dr->probe(dr, dev);
> +		if (ret)
> +			dev->driver = NULL;
> +
> +		return ret;
> +	}
> +
> +	/* return positive value if driver doesn't support this device */
> +	return 1;
> +}
> +
> +
> +/*
> + * If vendor/device ID match, call the remove() function of the
> + * driver.
> + */
> +static int
> +vmbus_detach_dev(struct rte_vmbus_driver *dr,
> +		 struct rte_vmbus_device *dev)
> +{
> +	const uuid_t *id_table;
> +
> +	for (id_table = dr->id_table; !uuid_is_null(*id_table); ++id_table) {
> +		char guid[UUID_BUF_SZ];
> +
> +		/* skip devices not assocaited with this device class */
> +		if (uuid_compare(*id_table, dev->class_id) != 0)
> +			continue;
> +
> +		uuid_unparse(dev->device_id, guid);
> +		RTE_LOG(INFO, EAL, "VMBUS device %s on NUMA socket %i\n",
> +			guid, dev->device.numa_node);
> +
> +		RTE_LOG(DEBUG, EAL, "  remove driver: %s\n", dr->driver.name);
> +
> +		if (dr->remove && (dr->remove(dev) < 0))
> +			return -1;	/* negative value is an error */
> +
> +		/* clear driver structure */
> +		dev->driver = NULL;
> +
> +		vmbus_uio_unmap_resource(dev);
> +		return 0;
> +	}
> +
> +	/* return positive value if driver doesn't support this device */
> +	return 1;
> +}
> +
> +/*
> + * call the devinit() function of all
> + * registered drivers for the vmbus device. Return -1 if no driver is
> + * found for this class of vmbus device.
> + * The present assumption is that we have drivers only for vmbus network
> + * devices. That's why we don't check driver's id_table now.
> + */
> +static int
> +vmbus_probe_all_drivers(struct rte_vmbus_device *dev)
> +{
> +	struct rte_vmbus_driver *dr = NULL;
> +	int ret;
> +
> +	TAILQ_FOREACH(dr, &vmbus_driver_list, next) {
> +		ret = rte_eal_vmbus_probe_one_driver(dr, dev);
> +		if (ret < 0) {
> +			/* negative value is an error */
> +			RTE_LOG(ERR, EAL, "Failed to probe driver %s\n",
> +				dr->driver.name);
> +			return -1;
> +		}
> +		/* positive value means driver doesn't support it */
> +		if (ret > 0)
> +			continue;
> +
> +		return 0;
> +	}
> +
> +	return 1;
> +}
> +
> +
> +/*
> + * If device ID matches, call the remove() function of all
> + * registered driver for the given device. Return -1 if initialization
> + * failed, return 1 if no driver is found for this device.
> + */
> +static int
> +vmbus_detach_all_drivers(struct rte_vmbus_device *dev)
> +{
> +	struct rte_vmbus_driver *dr;
> +	int rc = 0;
> +
> +	if (dev == NULL)
> +		return -1;
> +
> +	TAILQ_FOREACH(dr, &vmbus_driver_list, next) {
> +		rc = vmbus_detach_dev(dr, dev);
> +		if (rc < 0)
> +			/* negative value is an error */
> +			return -1;
> +		if (rc > 0)
> +			/* positive value means driver doesn't support it */
> +			continue;
> +		return 0;
> +	}
> +	return 1;
> +}
> +
> +/* Detach device specified by its VMBUS id */
> +int
> +rte_eal_vmbus_detach(uuid_t device_id)
> +{
> +	struct rte_vmbus_device *dev;
> +	char ubuf[UUID_BUF_SZ];
> +
> +	TAILQ_FOREACH(dev, &vmbus_device_list, next) {
> +		if (uuid_compare(dev->device_id, device_id) != 0)
> +			continue;
> +
> +		if (vmbus_detach_all_drivers(dev) < 0)
> +			goto err_return;
> +
> +		TAILQ_REMOVE(&vmbus_device_list, dev, next);
> +		free(dev);
> +		return 0;
> +	}
> +	return -1;
> +
> +err_return:
> +	uuid_unparse(device_id, ubuf);
> +	RTE_LOG(WARNING, EAL, "Requested device %s cannot be used\n",
> +		ubuf);
> +	return -1;
> +}
> +
> +/*
> + * Scan the vmbus, and call the devinit() function for
> + * all registered drivers that have a matching entry in its id_table
> + * for discovered devices.
> + */
> +int
> +rte_eal_vmbus_probe(void)
> +{
> +	struct rte_vmbus_device *dev = NULL;
> +
> +	TAILQ_FOREACH(dev, &vmbus_device_list, next) {
> +		char ubuf[UUID_BUF_SZ];
> +
> +		uuid_unparse(dev->device_id, ubuf);
> +
> +		RTE_LOG(DEBUG, EAL, "Probing driver for device %s ...\n",
> +			ubuf);
> +		vmbus_probe_all_drivers(dev);
> +	}
> +	return 0;
> +}
> +
> +/* register vmbus driver */
> +void
> +rte_eal_vmbus_register(struct rte_vmbus_driver *driver)
> +{
> +	TAILQ_INSERT_TAIL(&vmbus_driver_list, driver, next);
> +}
> +
> +/* unregister vmbus driver */
> +void
> +rte_eal_vmbus_unregister(struct rte_vmbus_driver *driver)
> +{
> +	TAILQ_REMOVE(&vmbus_driver_list, driver, next);
> +}
> diff --git a/lib/librte_ether/rte_ethdev.c b/lib/librte_ether/rte_ethdev.c
> index 1e0f206..6298a8d 100644
> --- a/lib/librte_ether/rte_ethdev.c
> +++ b/lib/librte_ether/rte_ethdev.c
> @@ -3282,3 +3282,93 @@ rte_eth_dev_l2_tunnel_offload_set(uint8_t port_id,
>  				-ENOTSUP);
>  	return (*dev->dev_ops->l2_tunnel_offload_set)(dev, l2_tunnel, mask, en);
>  }
> +
> +
> +#ifdef RTE_LIBRTE_HV_PMD
> +int
> +rte_eth_dev_vmbus_probe(struct rte_vmbus_driver *vmbus_drv,
> +			struct rte_vmbus_device *vmbus_dev)
> +{
> +	struct eth_driver  *eth_drv = (struct eth_driver *)vmbus_drv;
> +	struct rte_eth_dev *eth_dev;
> +	char ustr[UUID_BUF_SZ];
> +	int diag;
> +
> +	uuid_unparse(vmbus_dev->device_id, ustr);
> +
> +	eth_dev = rte_eth_dev_allocate(ustr);
> +	if (eth_dev == NULL)
> +		return -ENOMEM;
> +
> +	if (rte_eal_process_type() == RTE_PROC_PRIMARY) {
> +		eth_dev->data->dev_private = rte_zmalloc("ethdev private structure",
> +				  eth_drv->dev_private_size,
> +				  RTE_CACHE_LINE_SIZE);
> +		if (eth_dev->data->dev_private == NULL)
> +			rte_panic("Cannot allocate memzone for private port data\n");
> +	}
> +
> +	eth_dev->vmbus_dev = vmbus_dev;
> +	eth_dev->driver = eth_drv;
> +	eth_dev->data->rx_mbuf_alloc_failed = 0;
> +
> +	/* init user callbacks */
> +	TAILQ_INIT(&(eth_dev->link_intr_cbs));
> +
> +	/*
> +	 * Set the default maximum frame size.
> +	 */
> +	eth_dev->data->mtu = ETHER_MTU;
> +
> +	/* Invoke PMD device initialization function */
> +	diag = (*eth_drv->eth_dev_init)(eth_dev);
> +	if (diag == 0)
> +		return 0;
> +
> +	RTE_PMD_DEBUG_TRACE("driver %s: eth_dev_init(%s) failed\n",
> +			    vmbus_drv->driver.name, ustr);
> +
> +	if (rte_eal_process_type() == RTE_PROC_PRIMARY)
> +		rte_free(eth_dev->data->dev_private);
> +
> +	return diag;
> +}
> +
> +int
> +rte_eth_dev_vmbus_remove(struct rte_vmbus_device *vmbus_dev)
> +{
> +	const struct eth_driver *eth_drv;
> +	struct rte_eth_dev *eth_dev;
> +	char ustr[UUID_BUF_SZ];
> +	int ret;
> +
> +	if (vmbus_dev == NULL)
> +		return -EINVAL;
> +
> +	uuid_unparse(vmbus_dev->device_id, ustr);
> +	eth_dev = rte_eth_dev_allocated(ustr);
> +	if (eth_dev == NULL)
> +		return -ENODEV;
> +
> +	eth_drv = (const struct eth_driver *)vmbus_dev->driver;
> +
> +	/* Invoke PMD device uninit function */
> +	if (*eth_drv->eth_dev_uninit) {
> +		ret = (*eth_drv->eth_dev_uninit)(eth_dev);
> +		if (ret)
> +			return ret;
> +	}
> +
> +	/* free ether device */
> +	rte_eth_dev_release_port(eth_dev);
> +
> +	if (rte_eal_process_type() == RTE_PROC_PRIMARY)
> +		rte_free(eth_dev->data->dev_private);
> +
> +	eth_dev->pci_dev = NULL;
> +	eth_dev->driver = NULL;
> +	eth_dev->data = NULL;
> +
> +	return 0;
> +}
> +#endif
> diff --git a/lib/librte_ether/rte_ethdev.h b/lib/librte_ether/rte_ethdev.h
> index 3c85e33..5050087 100644
> --- a/lib/librte_ether/rte_ethdev.h
> +++ b/lib/librte_ether/rte_ethdev.h
> @@ -180,6 +180,7 @@ extern "C" {
>  #include <rte_log.h>
>  #include <rte_interrupts.h>
>  #include <rte_pci.h>
> +#include <rte_vmbus.h>
>  #include <rte_dev.h>
>  #include <rte_devargs.h>
>  #include "rte_ether.h"
> @@ -1628,7 +1629,11 @@ struct rte_eth_dev {
>  	struct rte_eth_dev_data *data;  /**< Pointer to device data */
>  	const struct eth_driver *driver;/**< Driver for this device */
>  	const struct eth_dev_ops *dev_ops; /**< Functions exported by PMD */
> -	struct rte_pci_device *pci_dev; /**< PCI info. supplied by probing */
> +	union {
> +		struct rte_pci_device *pci_dev; /**< PCI info. */
> +		struct rte_vmbus_device *vmbus_dev; /**< VMBUS info. */
> +	};
> +
>  	/** User application callbacks for NIC interrupts */
>  	struct rte_eth_dev_cb_list link_intr_cbs;
>  	/**
> @@ -1866,7 +1871,11 @@ typedef int (*eth_dev_uninit_t)(struct rte_eth_dev *eth_dev);
>   * - The size of the private data to allocate for each matching device.
>   */
>  struct eth_driver {
> -	struct rte_pci_driver pci_drv;    /**< The PMD is also a PCI driver. */
> +	union {
> +		struct rte_pci_driver pci_drv;    /**< The PMD PCI driver. */
> +		struct rte_vmbus_driver vmbus_drv;/**< The PMD VMBUS drv. */
> +	};
> +
>  	eth_dev_init_t eth_dev_init;      /**< Device init function. */
>  	eth_dev_uninit_t eth_dev_uninit;  /**< Device uninit function. */
>  	unsigned int dev_private_size;    /**< Size of device private data. */

It is not a scale-able model where we have to change eth_driver/eth_dev 
for every new device type, other than PCI. Maybe VMBus is _very_ close 
to PCI so no changes are required in PCI layer (common, linuxapp, 
bsdapp) - but, for others it won't stop there.

At the least, rte_pci_driver/rte_pci_device should be removed from 
eth_driver & rte_eth_dev, respectively - relying on rte_driver and 
rte_device.

This is the primary reason work on the SoC patchset and now the new Bus 
model is being done.

> @@ -4383,6 +4392,21 @@ int rte_eth_dev_pci_probe(struct rte_pci_driver *pci_drv,
>   */
>  int rte_eth_dev_pci_remove(struct rte_pci_device *pci_dev);
>
> +/**
> + * @internal
> + * Wrapper for use by vmbus drivers as a .probe function to attach to a ethdev
> + * interface.
> + */
> +int rte_eth_dev_vmbus_probe(struct rte_vmbus_driver *vmbus_drv,
> +			  struct rte_vmbus_device *vmbus_dev);
> +
> +/**
> + * @internal
> + * Wrapper for use by vmbus drivers as a .remove function to detach a ethdev
> + * interface.
> + */
> +int rte_eth_dev_vmbus_remove(struct rte_vmbus_device *vmbus_dev);
> +
>  #ifdef __cplusplus
>  }
>  #endif
> diff --git a/mk/rte.app.mk b/mk/rte.app.mk
> index f75f0e2..6b30408 100644
> --- a/mk/rte.app.mk
> +++ b/mk/rte.app.mk
> @@ -130,6 +130,7 @@ ifeq ($(CONFIG_RTE_LIBRTE_VHOST),y)
>  _LDLIBS-$(CONFIG_RTE_LIBRTE_PMD_VHOST)      += -lrte_pmd_vhost
>  endif # $(CONFIG_RTE_LIBRTE_VHOST)
>  _LDLIBS-$(CONFIG_RTE_LIBRTE_VMXNET3_PMD)    += -lrte_pmd_vmxnet3_uio
> +_LDLIBS-$(CONFIG_RTE_LIBRTE_HV_PMD)	    += -luuid
>
>  ifeq ($(CONFIG_RTE_LIBRTE_CRYPTODEV),y)
>  _LDLIBS-$(CONFIG_RTE_LIBRTE_PMD_AESNI_MB)    += -lrte_pmd_aesni_mb
>

-
Shreyansh



More information about the dev mailing list