[dpdk-dev,2/2] hyperv: VMBUS support infrastucture

Message ID 20161214235920.12877-3-sthemmin@microsoft.com (mailing list archive)
State Superseded, archived
Headers

Checks

Context Check Description
ci/checkpatch warning coding style issues
ci/Intel compilation fail Compilation issue

Commit Message

Stephen Hemminger Dec. 14, 2016, 11:59 p.m. UTC
  Generalize existing bus support to handle VMBUS in Hyper-V.
Most of the code is based of existing model for PCI, the difference
is how bus is represented in sysfs and how addressing works.

This is based on earlier code contributed by Brocade.
It supports only 4.9 or later versions of the Linux kernel
at this time (not older kernels or BSD).

Signed-off-by: Stephen Hemminger <sthemmin@microsoft.com>
---
 lib/librte_eal/common/Makefile              |   2 +-
 lib/librte_eal/common/eal_common_devargs.c  |   7 +
 lib/librte_eal/common/eal_common_options.c  |  38 ++
 lib/librte_eal/common/eal_internal_cfg.h    |   3 +-
 lib/librte_eal/common/eal_options.h         |   6 +
 lib/librte_eal/common/eal_private.h         |   5 +
 lib/librte_eal/common/include/rte_devargs.h |   8 +
 lib/librte_eal/common/include/rte_vmbus.h   | 247 ++++++++
 lib/librte_eal/linuxapp/eal/Makefile        |   6 +
 lib/librte_eal/linuxapp/eal/eal.c           |  11 +
 lib/librte_eal/linuxapp/eal/eal_vmbus.c     | 906 ++++++++++++++++++++++++++++
 lib/librte_ether/rte_ethdev.c               |  90 +++
 lib/librte_ether/rte_ethdev.h               |  28 +-
 mk/rte.app.mk                               |   1 +
 14 files changed, 1354 insertions(+), 4 deletions(-)
 create mode 100644 lib/librte_eal/common/include/rte_vmbus.h
 create mode 100644 lib/librte_eal/linuxapp/eal/eal_vmbus.c
  

Comments

Shreyansh Jain Dec. 15, 2016, 6:49 a.m. UTC | #1
On Thursday 15 December 2016 05:29 AM, Stephen Hemminger wrote:
> Generalize existing bus support to handle VMBUS in Hyper-V.
> Most of the code is based of existing model for PCI, the difference
> is how bus is represented in sysfs and how addressing works.
>
> This is based on earlier code contributed by Brocade.
> It supports only 4.9 or later versions of the Linux kernel
> at this time (not older kernels or BSD).
>
> Signed-off-by: Stephen Hemminger <sthemmin@microsoft.com>
> ---
>  lib/librte_eal/common/Makefile              |   2 +-
>  lib/librte_eal/common/eal_common_devargs.c  |   7 +
>  lib/librte_eal/common/eal_common_options.c  |  38 ++
>  lib/librte_eal/common/eal_internal_cfg.h    |   3 +-
>  lib/librte_eal/common/eal_options.h         |   6 +
>  lib/librte_eal/common/eal_private.h         |   5 +
>  lib/librte_eal/common/include/rte_devargs.h |   8 +
>  lib/librte_eal/common/include/rte_vmbus.h   | 247 ++++++++
>  lib/librte_eal/linuxapp/eal/Makefile        |   6 +
>  lib/librte_eal/linuxapp/eal/eal.c           |  11 +
>  lib/librte_eal/linuxapp/eal/eal_vmbus.c     | 906 ++++++++++++++++++++++++++++
>  lib/librte_ether/rte_ethdev.c               |  90 +++
>  lib/librte_ether/rte_ethdev.h               |  28 +-
>  mk/rte.app.mk                               |   1 +
>  14 files changed, 1354 insertions(+), 4 deletions(-)
>  create mode 100644 lib/librte_eal/common/include/rte_vmbus.h
>  create mode 100644 lib/librte_eal/linuxapp/eal/eal_vmbus.c
>
> diff --git a/lib/librte_eal/common/Makefile b/lib/librte_eal/common/Makefile
> index a92c984..9254bae 100644
> --- a/lib/librte_eal/common/Makefile
> +++ b/lib/librte_eal/common/Makefile
> @@ -33,7 +33,7 @@ include $(RTE_SDK)/mk/rte.vars.mk
>
>  INC := rte_branch_prediction.h rte_common.h
>  INC += rte_debug.h rte_eal.h rte_errno.h rte_launch.h rte_lcore.h
> -INC += rte_log.h rte_memory.h rte_memzone.h rte_pci.h
> +INC += rte_log.h rte_memory.h rte_memzone.h rte_pci.h rte_vmbus.h
>  INC += rte_per_lcore.h rte_random.h
>  INC += rte_tailq.h rte_interrupts.h rte_alarm.h
>  INC += rte_string_fns.h rte_version.h
> diff --git a/lib/librte_eal/common/eal_common_devargs.c b/lib/librte_eal/common/eal_common_devargs.c
> index e403717..934ca84 100644
> --- a/lib/librte_eal/common/eal_common_devargs.c
> +++ b/lib/librte_eal/common/eal_common_devargs.c
> @@ -113,6 +113,13 @@ rte_eal_devargs_add(enum rte_devtype devtype, const char *devargs_str)
>  			goto fail;
>
>  		break;
> +	case RTE_DEVTYPE_WHITELISTED_VMBUS:
> +	case RTE_DEVTYPE_BLACKLISTED_VMBUS:
> +#ifdef RTE_LIBRTE_HV_PMD
> +		if (uuid_parse(buf, devargs->uuid) == 0)
> +			break;
> +#endif
> +		goto fail;
>  	}
>
>  	free(buf);
> diff --git a/lib/librte_eal/common/eal_common_options.c b/lib/librte_eal/common/eal_common_options.c
> index 6ca8af1..6aea87d 100644
> --- a/lib/librte_eal/common/eal_common_options.c
> +++ b/lib/librte_eal/common/eal_common_options.c
> @@ -95,6 +95,11 @@ eal_long_options[] = {
>  	{OPT_VFIO_INTR,         1, NULL, OPT_VFIO_INTR_NUM        },
>  	{OPT_VMWARE_TSC_MAP,    0, NULL, OPT_VMWARE_TSC_MAP_NUM   },
>  	{OPT_XEN_DOM0,          0, NULL, OPT_XEN_DOM0_NUM         },
> +#ifdef RTE_LIBRTE_HV_PMD
> +	{OPT_NO_VMBUS,          0, NULL, OPT_NO_VMBUS_NUM         },
> +	{OPT_VMBUS_BLACKLIST,   1, NULL, OPT_VMBUS_BLACKLIST_NUM  },
> +	{OPT_VMBUS_WHITELIST,   1, NULL, OPT_VMBUS_WHITELIST_NUM  },
> +#endif
>  	{0,                     0, NULL, 0                        }
>  };
>
> @@ -855,6 +860,21 @@ eal_parse_common_option(int opt, const char *optarg,
>  		conf->no_pci = 1;
>  		break;
>
> +#ifdef RTE_LIBRTE_HV_PMD
> +	case OPT_NO_VMBUS_NUM:
> +		conf->no_vmbus = 1;
> +		break;
> +	case OPT_VMBUS_BLACKLIST_NUM:
> +		if (rte_eal_devargs_add(RTE_DEVTYPE_BLACKLISTED_VMBUS,
> +					optarg) < 0)
> +			return -1;
> +		break;
> +	case OPT_VMBUS_WHITELIST_NUM:
> +		if (rte_eal_devargs_add(RTE_DEVTYPE_WHITELISTED_VMBUS,
> +				optarg) < 0)
> +			return -1;
> +		break;
> +#endif
>  	case OPT_NO_HPET_NUM:
>  		conf->no_hpet = 1;
>  		break;
> @@ -987,6 +1007,14 @@ eal_check_common_options(struct internal_config *internal_cfg)
>  		return -1;
>  	}
>
> +#ifdef RTE_LIBRTE_HV_PMD
> +	if (rte_eal_devargs_type_count(RTE_DEVTYPE_WHITELISTED_VMBUS) != 0 &&
> +		rte_eal_devargs_type_count(RTE_DEVTYPE_BLACKLISTED_VMBUS) != 0) {
> +		RTE_LOG(ERR, EAL, "Options vmbus blacklist and whitelist "
> +			"cannot be used at the same time\n");
> +		return -1;
> +	}
> +#endif
>  	return 0;
>  }
>
> @@ -1036,5 +1064,15 @@ eal_common_usage(void)
>  	       "  --"OPT_NO_PCI"            Disable PCI\n"
>  	       "  --"OPT_NO_HPET"           Disable HPET\n"
>  	       "  --"OPT_NO_SHCONF"         No shared config (mmap'd files)\n"
> +#ifdef RTE_LIBRTE_HV_PMD
> +	       "  --"OPT_NO_VMBUS"          Disable VMBUS\n"
> +	       "  --"OPT_VMBUS_BLACKLIST" Add a VMBUS device to black list.\n"
> +	       "                      Prevent EAL from using this PCI device. The argument\n"
> +	       "                      format is device UUID.\n"
> +	       "  --"OPT_VMBUS_WHITELIST" Add a VMBUS device to white list.\n"
> +	       "                      Only use the specified VMBUS devices. The argument format\n"
> +	       "                      is device UUID This option can be present\n"
> +	       "                      several times (once per device).\n"
> +#endif
>  	       "\n", RTE_MAX_LCORE);
>  }
> diff --git a/lib/librte_eal/common/eal_internal_cfg.h b/lib/librte_eal/common/eal_internal_cfg.h
> index 5f1367e..1827194 100644
> --- a/lib/librte_eal/common/eal_internal_cfg.h
> +++ b/lib/librte_eal/common/eal_internal_cfg.h
> @@ -69,7 +69,8 @@ struct internal_config {
>  	volatile unsigned no_pci;         /**< true to disable PCI */
>  	volatile unsigned no_hpet;        /**< true to disable HPET */
>  	volatile unsigned vmware_tsc_map; /**< true to use VMware TSC mapping
> -										* instead of native TSC */
> +					   * instead of native TSC */
> +	volatile unsigned no_vmbus;       /**< true to disable VMBUS */
>  	volatile unsigned no_shconf;      /**< true if there is no shared config */
>  	volatile unsigned create_uio_dev; /**< true to create /dev/uioX devices */
>  	volatile enum rte_proc_type_t process_type; /**< multi-process proc type */
> diff --git a/lib/librte_eal/common/eal_options.h b/lib/librte_eal/common/eal_options.h
> index a881c62..156727e 100644
> --- a/lib/librte_eal/common/eal_options.h
> +++ b/lib/librte_eal/common/eal_options.h
> @@ -83,6 +83,12 @@ enum {
>  	OPT_VMWARE_TSC_MAP_NUM,
>  #define OPT_XEN_DOM0          "xen-dom0"
>  	OPT_XEN_DOM0_NUM,
> +#define OPT_NO_VMBUS          "no-vmbus"
> +	OPT_NO_VMBUS_NUM,
> +#define OPT_VMBUS_BLACKLIST   "vmbus-blacklist"
> +	OPT_VMBUS_BLACKLIST_NUM,
> +#define OPT_VMBUS_WHITELIST   "vmbus-whitelist"
> +	OPT_VMBUS_WHITELIST_NUM,
>  	OPT_LONG_MAX_NUM
>  };
>
> diff --git a/lib/librte_eal/common/eal_private.h b/lib/librte_eal/common/eal_private.h
> index 9e7d8f6..c856c63 100644
> --- a/lib/librte_eal/common/eal_private.h
> +++ b/lib/librte_eal/common/eal_private.h
> @@ -210,6 +210,11 @@ int pci_uio_map_resource_by_index(struct rte_pci_device *dev, int res_idx,
>  		struct mapped_pci_resource *uio_res, int map_idx);
>
>  /**
> + * VMBUS related functions and structures
> + */
> +int rte_eal_vmbus_init(void);
> +
> +/**
>   * Init tail queues for non-EAL library structures. This is to allow
>   * the rings, mempools, etc. lists to be shared among multiple processes
>   *
> diff --git a/lib/librte_eal/common/include/rte_devargs.h b/lib/librte_eal/common/include/rte_devargs.h
> index 88120a1..c079d28 100644
> --- a/lib/librte_eal/common/include/rte_devargs.h
> +++ b/lib/librte_eal/common/include/rte_devargs.h
> @@ -51,6 +51,9 @@ extern "C" {
>  #include <stdio.h>
>  #include <sys/queue.h>
>  #include <rte_pci.h>
> +#ifdef RTE_LIBRTE_HV_PMD
> +#include <uuid/uuid.h>
> +#endif
>
>  /**
>   * Type of generic device
> @@ -59,6 +62,8 @@ enum rte_devtype {
>  	RTE_DEVTYPE_WHITELISTED_PCI,
>  	RTE_DEVTYPE_BLACKLISTED_PCI,
>  	RTE_DEVTYPE_VIRTUAL,
> +	RTE_DEVTYPE_WHITELISTED_VMBUS,
> +	RTE_DEVTYPE_BLACKLISTED_VMBUS,
>  };
>
>  /**
> @@ -88,6 +93,9 @@ struct rte_devargs {
>  			/** Driver name. */
>  			char drv_name[32];
>  		} virt;
> +#ifdef RTE_LIBRTE_HV_PMD
> +		uuid_t uuid;
> +#endif
>  	};
>  	/** Arguments string as given by user or "" for no argument. */
>  	char *args;
> diff --git a/lib/librte_eal/common/include/rte_vmbus.h b/lib/librte_eal/common/include/rte_vmbus.h
> new file mode 100644
> index 0000000..8540539
> --- /dev/null
> +++ b/lib/librte_eal/common/include/rte_vmbus.h
> @@ -0,0 +1,247 @@
> +/*-
> + *   BSD LICENSE
> + *
> + *   Copyright(c) 2013-2016 Brocade Communications Systems, Inc.
> + *   Copyright(c) 2016 Microsoft Corporation
> + *   All rights reserved.
> + *
> + *   Redistribution and use in source and binary forms, with or without
> + *   modification, are permitted provided that the following conditions
> + *   are met:
> + *
> + *     * Redistributions of source code must retain the above copyright
> + *       notice, this list of conditions and the following disclaimer.
> + *     * Redistributions in binary form must reproduce the above copyright
> + *       notice, this list of conditions and the following disclaimer in
> + *       the documentation and/or other materials provided with the
> + *       distribution.
> + *     * Neither the name of Intel Corporation nor the names of its
> + *       contributors may be used to endorse or promote products derived
> + *       from this software without specific prior written permission.
> + *
> + *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
> + *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
> + *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
> + *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
> + *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
> + *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
> + *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
> + *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
> + *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
> + *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
> + *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
> + *
> + */
> +
> +#ifndef _RTE_VMBUS_H_
> +#define _RTE_VMBUS_H_
> +
> +/**
> + * @file
> + *
> + * RTE VMBUS Interface
> + */
> +#ifdef __cplusplus
> +extern "C" {
> +#endif
> +
> +#include <stdio.h>
> +#include <stdlib.h>
> +#include <limits.h>
> +#include <errno.h>
> +#include <uuid/uuid.h>
> +#include <sys/queue.h>
> +#include <stdint.h>
> +#include <inttypes.h>
> +
> +#include <rte_debug.h>
> +#include <rte_interrupts.h>
> +#include <rte_dev.h>
> +
> +TAILQ_HEAD(vmbus_device_list, rte_vmbus_device);
> +TAILQ_HEAD(vmbus_driver_list, rte_vmbus_driver);
> +
> +extern struct vmbus_driver_list vmbus_driver_list;
> +extern struct vmbus_device_list vmbus_device_list;
> +
> +/** Pathname of VMBUS devices directory. */
> +#define SYSFS_VMBUS_DEVICES "/sys/bus/vmbus/devices"
> +
> +#define UUID_BUF_SZ	(36 + 1)
> +	
> +
> +/** Maximum number of VMBUS resources. */
> +#define VMBUS_MAX_RESOURCE 7
> +
> +/**
> + * A structure describing a VMBUS device.
> + */
> +struct rte_vmbus_device {
> +	TAILQ_ENTRY(rte_vmbus_device) next;     /**< Next probed VMBUS device. */
> +	struct rte_device device;               /**< Inherit core device */
> +	uuid_t device_id;			/**< VMBUS device id */
> +	uuid_t class_id;			/**< VMBUS device type */
> +	uint32_t relid;				/**< VMBUS id for notification */
> +	uint8_t	monitor_id;
> +	struct rte_intr_handle intr_handle;     /**< Interrupt handle */
> +	const struct rte_vmbus_driver *driver;  /**< Associated driver */
> +
> +	struct rte_mem_resource mem_resource[VMBUS_MAX_RESOURCE];
> +						/**< VMBUS Memory Resource */
> +	char sysfs_name[];			/**< Name in sysfs bus directory */
> +};
> +
> +struct rte_vmbus_driver;
> +
> +/**
> + * Initialisation function for the driver called during VMBUS probing.
> + */
> +typedef int (vmbus_probe_t)(struct rte_vmbus_driver *, struct rte_vmbus_device *);
> +
> +/**
> + * Uninitialisation function for the driver called during hotplugging.
> + */
> +typedef int (vmbus_remove_t)(struct rte_vmbus_device *);
> +
> +/**
> + * A structure describing a VMBUS driver.
> + */
> +struct rte_vmbus_driver {
> +	TAILQ_ENTRY(rte_vmbus_driver) next;     /**< Next in list. */
> +	struct rte_driver driver;
> +	vmbus_probe_t *probe;                   /**< Device Probe function. */
> +	vmbus_remove_t *remove;                 /**< Device Remove function. */
> +
> +	const uuid_t *id_table;			/**< ID table, NULL terminated. */
> +};
> +
> +struct vmbus_map {
> +	void *addr;
> +	char *path;
> +	uint64_t offset;
> +	uint64_t size;
> +	uint64_t phaddr;
> +};
> +
> +/*
> + * For multi-process we need to reproduce all vmbus mappings in secondary
> + * processes, so save them in a tailq.
> + */
> +struct mapped_vmbus_resource {
> +	TAILQ_ENTRY(mapped_vmbus_resource) next;
> +
> +	uuid_t uuid;
> +	char path[PATH_MAX];
> +	int nb_maps;
> +	struct vmbus_map maps[VMBUS_MAX_RESOURCE];
> +};
> +
> +TAILQ_HEAD(mapped_vmbus_res_list, mapped_vmbus_resource);
> +
> +/**
> + * Scan the content of the VMBUS bus, and the devices in the devices list
> + *
> + * @return
> + *  0 on success, negative on error
> + */
> +int rte_eal_vmbus_scan(void);
> +
> +/**
> + * Probe the VMBUS bus for registered drivers.
> + *
> + * Scan the content of the VMBUS bus, and call the probe() function for
> + * all registered drivers that have a matching entry in its id_table
> + * for discovered devices.
> + *
> + * @return
> + *   - 0 on success.
> + *   - Negative on error.
> + */
> +int rte_eal_vmbus_probe(void);
> +
> +/**
> + * Map the VMBUS device resources in user space virtual memory address
> + *
> + * @param dev
> + *   A pointer to a rte_vmbus_device structure describing the device
> + *   to use
> + *
> + * @return
> + *   0 on success, negative on error and positive if no driver
> + *   is found for the device.
> + */
> +int rte_eal_vmbus_map_device(struct rte_vmbus_device *dev);
> +
> +/**
> + * Unmap this device
> + *
> + * @param dev
> + *   A pointer to a rte_vmbus_device structure describing the device
> + *   to use
> + */
> +void rte_eal_vmbus_unmap_device(struct rte_vmbus_device *dev);
> +
> +/**
> + * Probe the single VMBUS device.
> + *
> + * Scan the content of the VMBUS bus, and find the vmbus device
> + * specified by device uuid, then call the probe() function for
> + * registered driver that has a matching entry in its id_table for
> + * discovered device.
> + *
> + * @param id
> + * 	The VMBUS device uuid.
> + * @return
> + *   - 0 on success.
> + *   - Negative on error.
> + */
> +int rte_eal_vmbus_probe_one(uuid_t id);
> +
> +/**
> + * Close the single VMBUS device.
> + *
> + * Scan the content of the VMBUS bus, and find the vmbus device id,
> + * then call the remove() function for registered driver that has a
> + * matching entry in its id_table for discovered device.
> + *
> + * @param id
> + * 	The VMBUS device uuid.
> + * @return
> + *   - 0 on success.
> + *   - Negative on error.
> + */
> +int rte_eal_vmbus_detach(uuid_t id);
> +
> +/**
> + * Register a VMBUS driver.
> + *
> + * @param driver
> + *   A pointer to a rte_vmbus_driver structure describing the driver
> + *   to be registered.
> + */
> +void rte_eal_vmbus_register(struct rte_vmbus_driver *driver);
> +
> +/** Helper for VMBUS device registration from driver nstance */
> +#define RTE_PMD_REGISTER_VMBUS(nm, vmbus_drv) \
> +RTE_INIT(vmbusinitfn_ ##nm); \
> +static void vmbusinitfn_ ##nm(void) \
> +{\
> +	(vmbus_drv).driver.name = RTE_STR(nm);\
> +	rte_eal_vmbus_register(&vmbus_drv); \
> +} \
> +RTE_PMD_EXPORT_NAME(nm, __COUNTER__)
> +
> +/**
> + * Unregister a VMBUS driver.
> + *
> + * @param driver
> + *   A pointer to a rte_vmbus_driver structure describing the driver
> + *   to be unregistered.
> + */
> +void rte_eal_vmbus_unregister(struct rte_vmbus_driver *driver);
> +
> +#ifdef __cplusplus
> +}
> +#endif
> +
> +#endif /* _RTE_VMBUS_H_ */
> diff --git a/lib/librte_eal/linuxapp/eal/Makefile b/lib/librte_eal/linuxapp/eal/Makefile
> index 4e206f0..f6ca384 100644
> --- a/lib/librte_eal/linuxapp/eal/Makefile
> +++ b/lib/librte_eal/linuxapp/eal/Makefile
> @@ -71,6 +71,11 @@ SRCS-$(CONFIG_RTE_EXEC_ENV_LINUXAPP) += eal_timer.c
>  SRCS-$(CONFIG_RTE_EXEC_ENV_LINUXAPP) += eal_interrupts.c
>  SRCS-$(CONFIG_RTE_EXEC_ENV_LINUXAPP) += eal_alarm.c
>
> +ifeq ($(CONFIG_RTE_LIBRTE_HV_PMD),y)
> +SRCS-$(CONFIG_RTE_EXEC_ENV_LINUXAPP) += eal_vmbus.c
> +LDLIBS += -luuid
> +endif
> +
>  # from common dir
>  SRCS-$(CONFIG_RTE_EXEC_ENV_LINUXAPP) += eal_common_lcore.c
>  SRCS-$(CONFIG_RTE_EXEC_ENV_LINUXAPP) += eal_common_timer.c
> @@ -114,6 +119,7 @@ CFLAGS_eal_hugepage_info.o := -D_GNU_SOURCE
>  CFLAGS_eal_pci.o := -D_GNU_SOURCE
>  CFLAGS_eal_pci_uio.o := -D_GNU_SOURCE
>  CFLAGS_eal_pci_vfio.o := -D_GNU_SOURCE
> +CFLAGS_eal_vmbux.o := -D_GNU_SOURCE
>  CFLAGS_eal_common_whitelist.o := -D_GNU_SOURCE
>  CFLAGS_eal_common_options.o := -D_GNU_SOURCE
>  CFLAGS_eal_common_thread.o := -D_GNU_SOURCE
> diff --git a/lib/librte_eal/linuxapp/eal/eal.c b/lib/librte_eal/linuxapp/eal/eal.c
> index 2075282..71083ec 100644
> --- a/lib/librte_eal/linuxapp/eal/eal.c
> +++ b/lib/librte_eal/linuxapp/eal/eal.c
> @@ -70,6 +70,7 @@
>  #include <rte_cpuflags.h>
>  #include <rte_interrupts.h>
>  #include <rte_pci.h>
> +#include <rte_vmbus.h>
>  #include <rte_dev.h>
>  #include <rte_devargs.h>
>  #include <rte_common.h>
> @@ -830,6 +831,11 @@ rte_eal_init(int argc, char **argv)
>
>  	eal_check_mem_on_local_socket();
>
> +#ifdef RTE_LIBRTE_HV_PMD
> +	if (rte_eal_vmbus_init() < 0)
> +		RTE_LOG(ERR, EAL, "Cannot init VMBUS\n");
> +#endif
> +
>  	if (eal_plugins_init() < 0)
>  		rte_panic("Cannot init plugins\n");
>
> @@ -887,6 +893,11 @@ rte_eal_init(int argc, char **argv)
>  	if (rte_eal_pci_probe())
>  		rte_panic("Cannot probe PCI\n");
>
> +#ifdef RTE_LIBRTE_HV_PMD
> +	if (rte_eal_vmbus_probe() < 0)
> +		rte_panic("Cannot probe VMBUS\n");
> +#endif
> +
>  	rte_eal_mcfg_complete();
>
>  	return fctret;
> diff --git a/lib/librte_eal/linuxapp/eal/eal_vmbus.c b/lib/librte_eal/linuxapp/eal/eal_vmbus.c
> new file mode 100644
> index 0000000..cbd8bd1
> --- /dev/null
> +++ b/lib/librte_eal/linuxapp/eal/eal_vmbus.c
> @@ -0,0 +1,906 @@
> +/*-
> + *   BSD LICENSE
> + *
> + *   Copyright(c) 2013-2016 Brocade Communications Systems, Inc.
> + *   Copyright(c) 2016 Microsoft Corporation
> + *   All rights reserved.
> + *
> + *   Redistribution and use in source and binary forms, with or without
> + *   modification, are permitted provided that the following conditions
> + *   are met:
> + *
> + *     * Redistributions of source code must retain the above copyright
> + *       notice, this list of conditions and the following disclaimer.
> + *     * Redistributions in binary form must reproduce the above copyright
> + *       notice, this list of conditions and the following disclaimer in
> + *       the documentation and/or other materials provided with the
> + *       distribution.
> + *     * Neither the name of Intel Corporation nor the names of its
> + *       contributors may be used to endorse or promote products derived
> + *       from this software without specific prior written permission.
> + *
> + *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
> + *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
> + *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
> + *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
> + *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
> + *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
> + *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
> + *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
> + *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
> + *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
> + *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
> + *
> + */
> +
> +#include <string.h>
> +#include <unistd.h>
> +#include <dirent.h>
> +#include <fcntl.h>
> +#include <sys/mman.h>
> +
> +#include <rte_eal.h>
> +#include <rte_tailq.h>
> +#include <rte_log.h>
> +#include <rte_devargs.h>
> +#include <rte_vmbus.h>
> +#include <rte_malloc.h>
> +
> +#include "eal_private.h"
> +#include "eal_pci_init.h"
> +#include "eal_filesystem.h"
> +
> +struct vmbus_driver_list vmbus_driver_list =
> +	TAILQ_HEAD_INITIALIZER(vmbus_driver_list);
> +struct vmbus_device_list vmbus_device_list =
> +	TAILQ_HEAD_INITIALIZER(vmbus_device_list);
> +
> +static void *vmbus_map_addr;
> +
> +static struct rte_tailq_elem rte_vmbus_uio_tailq = {
> +	.name = "UIO_RESOURCE_LIST",
> +};
> +EAL_REGISTER_TAILQ(rte_vmbus_uio_tailq);
> +
> +/*
> + * parse a sysfs file containing one integer value
> + * different to the eal version, as it needs to work with 64-bit values
> + */
> +static int
> +vmbus_get_sysfs_uuid(const char *filename, uuid_t uu)
> +{
> +	char buf[BUFSIZ];
> +	char *cp = NULL;
> +	FILE *f;
> +
> +	f = fopen(filename, "r");
> +	if (f == NULL) {
> +		RTE_LOG(ERR, EAL, "%s(): cannot open sysfs value %s\n",
> +				__func__, filename);
> +		return -1;
> +	}
> +
> +	if (fgets(buf, sizeof(buf), f) == NULL) {
> +		RTE_LOG(ERR, EAL, "%s(): cannot read sysfs value %s\n",
> +				__func__, filename);
> +		fclose(f);
> +		return -1;
> +	}
> +	fclose(f);
> +
> +	cp = strchr(cp, '\n');
> +	if (cp)
> +		*cp = '\0';
> +
> +	/* strip { } notation */
> +	if (buf[0] == '{' && (cp = strchr(buf, '}')))
> +		*cp = '\0';
> +
> +	if (uuid_parse(buf, uu) < 0) {
> +		RTE_LOG(ERR, EAL, "%s %s not a valid UUID\n",
> +			filename, buf);
> +		return -1;
> +	}
> +
> +	return 0;
> +}
> +
> +/* map a particular resource from a file */
> +static void *
> +vmbus_map_resource(void *requested_addr, int fd, off_t offset, size_t size,
> +		   int flags)
> +{
> +	void *mapaddr;
> +
> +	/* Map the memory resource of device */
> +	mapaddr = mmap(requested_addr, size, PROT_READ | PROT_WRITE,
> +		       MAP_SHARED | flags, fd, offset);
> +	if (mapaddr == MAP_FAILED ||
> +	    (requested_addr != NULL && mapaddr != requested_addr)) {
> +		RTE_LOG(ERR, EAL,
> +			"%s(): cannot mmap(%d, %p, 0x%lx, 0x%lx): %s)\n",
> +			__func__, fd, requested_addr,
> +			(unsigned long)size, (unsigned long)offset,
> +			strerror(errno));
> +	} else
> +		RTE_LOG(DEBUG, EAL, "  VMBUS memory mapped at %p\n", mapaddr);
> +
> +	return mapaddr;
> +}
> +
> +/* unmap a particular resource */
> +static void
> +vmbus_unmap_resource(void *requested_addr, size_t size)
> +{
> +	if (requested_addr == NULL)
> +		return;
> +
> +	/* Unmap the VMBUS memory resource of device */
> +	if (munmap(requested_addr, size)) {
> +		RTE_LOG(ERR, EAL, "%s(): cannot munmap(%p, 0x%lx): %s\n",
> +			__func__, requested_addr, (unsigned long)size,
> +			strerror(errno));
> +	} else
> +		RTE_LOG(DEBUG, EAL, "  VMBUS memory unmapped at %p\n",
> +				requested_addr);
> +}
> +
> +/* Only supports current kernel version
> + * Unlike PCI there is no option (or need) to create UIO device.
> + */
> +static int vmbus_get_uio_dev(const char *name,
> +			     char *dstbuf, size_t buflen)
> +{
> +	char dirname[PATH_MAX];
> +	unsigned int uio_num;
> +	struct dirent *e;
> +	DIR *dir;
> +
> +	snprintf(dirname, sizeof(dirname),
> +		 "/sys/bus/vmbus/devices/%s/uio", name);
> +
> +	dir = opendir(dirname);
> +	if (dir == NULL) {
> +		RTE_LOG(ERR, EAL, "Cannot map uio resources for %s: %s\n",
> +			name, strerror(errno));
> +		return -1;
> +	}
> +
> +	/* take the first file starting with "uio" */
> +	while ((e = readdir(dir)) != NULL) {
> +		if (sscanf(e->d_name, "uio%u", &uio_num) != 1)
> +			continue;
> +
> +		snprintf(dstbuf, buflen, "%s/uio%u", dirname, uio_num);
> +		break;
> +	}
> +	closedir(dir);
> +
> +	return e ? (int) uio_num : -1;
> +}
> +
> +/*
> + * parse a sysfs file containing one integer value
> + * different to the eal version, as it needs to work with 64-bit values
> + */
> +static int
> +vmbus_parse_sysfs_value(const char *dir, const char *name,
> +			uint64_t *val)
> +{
> +	char filename[PATH_MAX];
> +	FILE *f;
> +	char buf[BUFSIZ];
> +	char *end = NULL;
> +
> +	snprintf(filename, sizeof(filename), "%s/%s", dir, name);
> +	f = fopen(filename, "r");
> +	if (f == NULL) {
> +		RTE_LOG(ERR, EAL, "%s(): cannot open sysfs value %s\n",
> +				__func__, filename);
> +		return -1;
> +	}
> +
> +	if (fgets(buf, sizeof(buf), f) == NULL) {
> +		RTE_LOG(ERR, EAL, "%s(): cannot read sysfs value %s\n",
> +				__func__, filename);
> +		fclose(f);
> +		return -1;
> +	}
> +	fclose(f);
> +
> +	*val = strtoull(buf, &end, 0);
> +	if ((buf[0] == '\0') || (end == NULL) || (*end != '\n')) {
> +		RTE_LOG(ERR, EAL, "%s(): cannot parse sysfs value %s\n",
> +				__func__, filename);
> +		return -1;
> +	}
> +	return 0;
> +}
> +
> +/* Get mappings out of values provided by uio */
> +static int
> +vmbus_uio_get_mappings(const char *uioname,
> +		       struct vmbus_map maps[])
> +{
> +	int i;
> +
> +	for (i = 0; i != VMBUS_MAX_RESOURCE; i++) {
> +		struct vmbus_map *map = &maps[i];
> +		char dirname[PATH_MAX];
> +
> +		/* check if map directory exists */
> +		snprintf(dirname, sizeof(dirname),
> +			 "%s/maps/map%d", uioname, i);
> +
> +		if (access(dirname, F_OK) != 0)
> +			break;
> +
> +		/* get mapping offset */
> +		if (vmbus_parse_sysfs_value(dirname, "offset",
> +					    &map->offset) < 0)
> +			return -1;
> +
> +		/* get mapping size */
> +		if (vmbus_parse_sysfs_value(dirname, "size",
> +					    &map->size) < 0)
> +			return -1;
> +
> +		/* get mapping physical address */
> +		if (vmbus_parse_sysfs_value(dirname, "addr",
> +					    &maps->phaddr) < 0)
> +			return -1;
> +	}
> +
> +	return i;
> +}
> +
> +static void
> +vmbus_uio_free_resource(struct rte_vmbus_device *dev,
> +		struct mapped_vmbus_resource *uio_res)
> +{
> +	rte_free(uio_res);
> +
> +	if (dev->intr_handle.fd) {
> +		close(dev->intr_handle.fd);
> +		dev->intr_handle.fd = -1;
> +		dev->intr_handle.type = RTE_INTR_HANDLE_UNKNOWN;
> +	}
> +}
> +
> +static struct mapped_vmbus_resource *
> +vmbus_uio_alloc_resource(struct rte_vmbus_device *dev)
> +{
> +	struct mapped_vmbus_resource *uio_res;
> +	char dirname[PATH_MAX], devname[PATH_MAX];
> +	int uio_num, nb_maps;
> +
> +	uio_num = vmbus_get_uio_dev(dev->sysfs_name, dirname, sizeof(dirname));
> +	if (uio_num < 0) {
> +		RTE_LOG(WARNING, EAL,
> +			"  %s not managed by UIO driver, skipping\n",
> +			dev->sysfs_name);
> +		return NULL;
> +	}
> +
> +	/* allocate the mapping details for secondary processes*/
> +	uio_res = rte_zmalloc("UIO_RES", sizeof(*uio_res), 0);
> +	if (uio_res == NULL) {
> +		RTE_LOG(ERR, EAL,
> +			"%s(): cannot store uio mmap details\n", __func__);
> +		goto error;
> +	}
> +
> +	snprintf(devname, sizeof(devname), "/dev/uio%u", uio_num);
> +	dev->intr_handle.fd = open(devname, O_RDWR);
> +	if (dev->intr_handle.fd < 0) {
> +		RTE_LOG(ERR, EAL, "Cannot open %s: %s\n",
> +			devname, strerror(errno));
> +		goto error;
> +	}
> +
> +	dev->intr_handle.type = RTE_INTR_HANDLE_UIO_INTX;
> +
> +	snprintf(uio_res->path, sizeof(uio_res->path), "%s", devname);
> +	uuid_copy(uio_res->uuid, dev->device_id);
> +
> +	nb_maps = vmbus_uio_get_mappings(dirname, uio_res->maps);
> +	if (nb_maps < 0)
> +		goto error;
> +
> +	RTE_LOG(DEBUG, EAL, "Found %d memory maps for device %s\n",
> +		nb_maps, dev->sysfs_name);
> +
> +	return uio_res;
> +
> + error:
> +	vmbus_uio_free_resource(dev, uio_res);
> +	return NULL;
> +}
> +
> +static int
> +vmbus_uio_map_resource_by_index(struct rte_vmbus_device *dev,
> +				unsigned int res_idx,
> +				struct mapped_vmbus_resource *uio_res,
> +				unsigned int map_idx)
> +{
> +	struct vmbus_map *maps = uio_res->maps;
> +	char devname[PATH_MAX];
> +	void *mapaddr;
> +	int fd;
> +
> +	snprintf(devname, sizeof(devname),
> +		 "/sys/bus/vmbus/%s/resource%u", dev->sysfs_name, res_idx);
> +
> +	fd = open(devname, O_RDWR);
> +	if (fd < 0) {
> +		RTE_LOG(ERR, EAL, "Cannot open %s: %s\n",
> +				devname, strerror(errno));
> +		return -1;
> +	}
> +
> +	/* allocate memory to keep path */
> +	maps[map_idx].path = rte_malloc(NULL, strlen(devname) + 1, 0);
> +	if (maps[map_idx].path == NULL) {
> +		RTE_LOG(ERR, EAL, "Cannot allocate memory for path: %s\n",
> +				strerror(errno));
> +		return -1;
> +	}
> +
> +	/* try mapping somewhere close to the end of hugepages */
> +	if (vmbus_map_addr == NULL)
> +		vmbus_map_addr = pci_find_max_end_va();
> +
> +	mapaddr = vmbus_map_resource(vmbus_map_addr, fd, 0,
> +				     dev->mem_resource[res_idx].len, 0);
> +	close(fd);
> +	if (mapaddr == MAP_FAILED) {
> +		rte_free(maps[map_idx].path);
> +		return -1;
> +	}
> +
> +	vmbus_map_addr = RTE_PTR_ADD(mapaddr,
> +				     dev->mem_resource[res_idx].len);
> +
> +	maps[map_idx].phaddr = dev->mem_resource[res_idx].phys_addr;
> +	maps[map_idx].size = dev->mem_resource[res_idx].len;
> +	maps[map_idx].addr = mapaddr;
> +	maps[map_idx].offset = 0;
> +	strcpy(maps[map_idx].path, devname);
> +	dev->mem_resource[res_idx].addr = mapaddr;
> +
> +	return 0;
> +}
> +
> +static void
> +vmbus_uio_unmap(struct mapped_vmbus_resource *uio_res)
> +{
> +	int i;
> +
> +	if (uio_res == NULL)
> +		return;
> +
> +	for (i = 0; i != uio_res->nb_maps; i++) {
> +		vmbus_unmap_resource(uio_res->maps[i].addr,
> +				     uio_res->maps[i].size);
> +
> +		if (rte_eal_process_type() == RTE_PROC_PRIMARY)
> +			rte_free(uio_res->maps[i].path);
> +	}
> +}
> +
> +static struct mapped_vmbus_resource *
> +vmbus_uio_find_resource(struct rte_vmbus_device *dev)
> +{
> +	struct mapped_vmbus_resource *uio_res;
> +	struct mapped_vmbus_res_list *uio_res_list =
> +			RTE_TAILQ_CAST(rte_vmbus_uio_tailq.head, mapped_vmbus_res_list);
> +
> +	if (dev == NULL)
> +		return NULL;
> +
> +	TAILQ_FOREACH(uio_res, uio_res_list, next) {
> +		if (uuid_compare(uio_res->uuid, dev->device_id) == 0)
> +			return uio_res;
> +	}
> +	return NULL;
> +}
> +
> +/* unmap the VMBUS resource of a VMBUS device in virtual memory */
> +static void
> +vmbus_uio_unmap_resource(struct rte_vmbus_device *dev)
> +{
> +	struct mapped_vmbus_resource *uio_res;
> +	struct mapped_vmbus_res_list *uio_res_list =
> +			RTE_TAILQ_CAST(rte_vmbus_uio_tailq.head, mapped_vmbus_res_list);
> +
> +	if (dev == NULL)
> +		return;
> +
> +	/* find an entry for the device */
> +	uio_res = vmbus_uio_find_resource(dev);
> +	if (uio_res == NULL)
> +		return;
> +
> +	/* secondary processes - just free maps */
> +	if (rte_eal_process_type() != RTE_PROC_PRIMARY)
> +		return vmbus_uio_unmap(uio_res);
> +
> +	TAILQ_REMOVE(uio_res_list, uio_res, next);
> +
> +	/* unmap all resources */
> +	vmbus_uio_unmap(uio_res);
> +
> +	/* free uio resource */
> +	rte_free(uio_res);
> +
> +	/* close fd if in primary process */
> +	close(dev->intr_handle.fd);
> +	if (dev->intr_handle.uio_cfg_fd >= 0) {
> +		close(dev->intr_handle.uio_cfg_fd);
> +		dev->intr_handle.uio_cfg_fd = -1;
> +	}
> +
> +	dev->intr_handle.fd = -1;
> +	dev->intr_handle.type = RTE_INTR_HANDLE_UNKNOWN;
> +}
> +
> +static int
> +vmbus_uio_map_secondary(struct rte_vmbus_device *dev)
> +{
> +	struct mapped_vmbus_resource *uio_res;
> +	struct mapped_vmbus_res_list *uio_res_list =
> +			RTE_TAILQ_CAST(rte_vmbus_uio_tailq.head,
> +				       mapped_vmbus_res_list);
> +
> +	TAILQ_FOREACH(uio_res, uio_res_list, next) {
> +		int i;
> +
> +		/* skip this element if it doesn't match our id */
> +		if (uuid_compare(uio_res->uuid, dev->device_id))
> +			continue;
> +
> +		for (i = 0; i != uio_res->nb_maps; i++) {
> +			void *mapaddr;
> +			int fd;
> +
> +			fd = open(uio_res->maps[i].path, O_RDWR);
> +			if (fd < 0) {
> +				RTE_LOG(ERR, EAL, "Cannot open %s: %s\n",
> +					uio_res->maps[i].path, strerror(errno));
> +				return -1;
> +			}
> +
> +			mapaddr = vmbus_map_resource(uio_res->maps[i].addr, fd,
> +						     uio_res->maps[i].offset,
> +						     uio_res->maps[i].size, 0);
> +			/* fd is not needed in slave process, close it */
> +			close(fd);
> +
> +			if (mapaddr == uio_res->maps[i].addr)
> +				continue;
> +
> +			RTE_LOG(ERR, EAL,
> +				"Cannot mmap device resource file %s to address: %p\n",
> +				uio_res->maps[i].path,
> +				uio_res->maps[i].addr);
> +
> +			/* unmap addrs correctly mapped */
> +			while (i != 0) {
> +				--i;
> + 				vmbus_unmap_resource(uio_res->maps[i].addr,
> +						     uio_res->maps[i].size);
> +			}
> +			return -1;
> +
> +		}
> +		return 0;
> +	}
> +
> +	RTE_LOG(ERR, EAL, "Cannot find resource for device\n");
> +	return 1;
> +}
> +
> +/* map the resources of a vmbus device in virtual memory */
> +int
> +rte_eal_vmbus_map_device(struct rte_vmbus_device *dev)
> +{
> +	struct mapped_vmbus_resource *uio_res;
> +	struct mapped_vmbus_res_list *uio_res_list =
> +		RTE_TAILQ_CAST(rte_vmbus_uio_tailq.head, mapped_vmbus_res_list);
> +	int i, ret, map_idx = 0;
> +
> +	dev->intr_handle.fd = -1;
> +	dev->intr_handle.uio_cfg_fd = -1;
> +	dev->intr_handle.type = RTE_INTR_HANDLE_UNKNOWN;
> +
> +	/* secondary processes - use already recorded details */
> +	if (rte_eal_process_type() != RTE_PROC_PRIMARY)
> +		return vmbus_uio_map_secondary(dev);
> +
> +	/* allocate uio resource */
> +	uio_res = vmbus_uio_alloc_resource(dev);
> +	if (uio_res == NULL)
> +		return -1;
> +
> +	/* Map all BARs */
> +	for (i = 0; i != VMBUS_MAX_RESOURCE; i++) {
> +		uint64_t phaddr;
> +
> +		/* skip empty BAR */
> +		phaddr = dev->mem_resource[i].phys_addr;
> +		if (phaddr == 0)
> +			continue;
> +
> +		ret = vmbus_uio_map_resource_by_index(dev, i,
> +						      uio_res, map_idx);
> +		if (ret)
> +			goto error;
> +
> +		map_idx++;
> +	}
> +
> +	uio_res->nb_maps = map_idx;
> +
> +	TAILQ_INSERT_TAIL(uio_res_list, uio_res, next);
> +
> +	return 0;
> +error:
> +	for (i = 0; i < map_idx; i++) {
> +		vmbus_unmap_resource(uio_res->maps[i].addr,
> +				     uio_res->maps[i].size);
> +		rte_free(uio_res->maps[i].path);
> +	}
> +	vmbus_uio_free_resource(dev, uio_res);
> +	return -1;
> +}
> +
> +/* Scan one vmbus sysfs entry, and fill the devices list from it. */
> +static int
> +vmbus_scan_one(const char *name)
> +{
> +	struct rte_vmbus_device *dev, *dev2;
> +	char filename[PATH_MAX];
> +	char dirname[PATH_MAX];
> +	unsigned long tmp;
> +
> +	dev = malloc(sizeof(*dev) + strlen(name) + 1);
> +	if (dev == NULL)
> +		return -1;
> +
> +	memset(dev, 0, sizeof(*dev));
> +	strcpy(dev->sysfs_name, name);
> +	if (dev->sysfs_name == NULL)
> +		goto error;
> +
> +	/* sysfs base directory
> +	 *   /sys/bus/vmbus/devices/7a08391f-f5a0-4ac0-9802-d13fd964f8df
> +	 * or on older kernel
> +	 *   /sys/bus/vmbus/devices/vmbus_1
> +	 */
> +	snprintf(dirname, sizeof(dirname), "%s/%s",
> +		 SYSFS_VMBUS_DEVICES, name);
> +
> +	/* get device id */
> +	snprintf(filename, sizeof(filename), "%s/device_id", dirname);
> +	if (vmbus_get_sysfs_uuid(filename, dev->device_id) < 0)
> +		goto error;
> +
> +	/* get device class  */
> +	snprintf(filename, sizeof(filename), "%s/class_id", dirname);
> +	if (vmbus_get_sysfs_uuid(filename, dev->class_id) < 0)
> +		goto error;
> +
> +	/* get relid */
> +	snprintf(filename, sizeof(filename), "%s/id", dirname);
> +	if (eal_parse_sysfs_value(filename, &tmp) < 0)
> +		goto error;
> +	dev->relid = tmp;
> +
> +	/* get monitor id */
> +	snprintf(filename, sizeof(filename), "%s/monitor_id", dirname);
> +	if (eal_parse_sysfs_value(filename, &tmp) < 0)
> +		goto error;
> +	dev->monitor_id = tmp;
> +
> +	/* get numa node */
> +	snprintf(filename, sizeof(filename), "%s/numa_node",
> +		 dirname);
> +	if (eal_parse_sysfs_value(filename, &tmp) < 0)
> +		/* if no NUMA support, set default to 0 */
> +		dev->device.numa_node = 0;
> +	else
> +		dev->device.numa_node = tmp;
> +
> +	/* device is valid, add in list (sorted) */
> +	RTE_LOG(DEBUG, EAL, "Adding vmbus device %s\n", name);
> +
> +	TAILQ_FOREACH(dev2, &vmbus_device_list, next) {
> +		int ret;
> +
> +		ret = uuid_compare(dev->device_id, dev->device_id);
> +		if (ret > 0)
> +			continue;
> +
> +		if (ret < 0) {
> +			TAILQ_INSERT_BEFORE(dev2, dev, next);
> +			rte_eal_device_insert(&dev->device);
> +		} else { /* already registered */
> +			memmove(dev2->mem_resource, dev->mem_resource,
> +				sizeof(dev->mem_resource));
> +			free(dev);
> +		}
> +		return 0;
> +	}
> +
> +	rte_eal_device_insert(&dev->device);
> +	TAILQ_INSERT_TAIL(&vmbus_device_list, dev, next);
> +
> +	return 0;
> +error:
> +	free(dev);
> +	return -1;
> +}
> +
> +/*
> + * Scan the content of the vmbus, and the devices in the devices list
> + */
> +static int
> +vmbus_scan(void)
> +{
> +	struct dirent *e;
> +	DIR *dir;
> +
> +	dir = opendir(SYSFS_VMBUS_DEVICES);
> +	if (dir == NULL) {
> +		if (errno == ENOENT)
> +			return 0;
> +		else {
> +			RTE_LOG(ERR, EAL, "%s(): opendir failed: %s\n",
> +					__func__, strerror(errno));
> +			return -1;
> +		}
> +	}
> +
> +	while ((e = readdir(dir)) != NULL) {
> +		if (e->d_name[0] == '.')
> +			continue;
> +
> +		if (vmbus_scan_one(e->d_name) < 0)
> +			goto error;
> +	}
> +	closedir(dir);
> +	return 0;
> +
> +error:
> +	closedir(dir);
> +	return -1;
> +}
> +
> +/* Init the VMBUS EAL subsystem */
> +int rte_eal_vmbus_init(void)
> +{
> +	/* VMBUS can be disabled */
> +	if (internal_config.no_vmbus)
> +		return 0;
> +
> +	if (vmbus_scan() < 0) {
> +		RTE_LOG(ERR, EAL, "%s(): Cannot scan vmbus\n", __func__);
> +		return -1;
> +	}
> +	return 0;
> +}
> +
> +/* Below is PROBE part of eal_vmbus library */
> +
> +/*
> + * If device ID match, call the devinit() function of the driver.
> + */
> +static int
> +rte_eal_vmbus_probe_one_driver(struct rte_vmbus_driver *dr,
> +			       struct rte_vmbus_device *dev)
> +{
> +	const uuid_t *id_table;
> +
> +	RTE_LOG(DEBUG, EAL, "  probe driver: %s\n", dr->driver.name);
> +
> +	for (id_table = dr->id_table; !uuid_is_null(*id_table); ++id_table) {
> +		struct rte_devargs *args;
> +		char guid[UUID_BUF_SZ];
> +		int ret;
> +
> +		/* skip devices not assocaited with this device class */
> +		if (uuid_compare(*id_table, dev->class_id) != 0)
> +			continue;
> +
> +		uuid_unparse(dev->device_id, guid);
> +		RTE_LOG(INFO, EAL, "VMBUS device %s on NUMA socket %i\n",
> +			guid, dev->device.numa_node);
> +
> +		/* no initialization when blacklisted, return without error */
> +		args = dev->device.devargs;
> +		if (args && args->type == RTE_DEVTYPE_BLACKLISTED_VMBUS) {
> +			RTE_LOG(INFO, EAL, "  Device is blacklisted, not initializing\n");
> +			return 1;
> +		}
> +
> +		RTE_LOG(INFO, EAL, "  probe driver: %s\n", dr->driver.name);
> +
> +		/* map resources for device */
> +		ret = rte_eal_vmbus_map_device(dev);
> +		if (ret != 0)
> +			return ret;
> +
> +		/* reference driver structure */
> +		dev->driver = dr;
> +
> +		/* call the driver probe() function */
> +		ret = dr->probe(dr, dev);
> +		if (ret)
> +			dev->driver = NULL;
> +
> +		return ret;
> +	}
> +
> +	/* return positive value if driver doesn't support this device */
> +	return 1;
> +}
> +
> +
> +/*
> + * If vendor/device ID match, call the remove() function of the
> + * driver.
> + */
> +static int
> +vmbus_detach_dev(struct rte_vmbus_driver *dr,
> +		 struct rte_vmbus_device *dev)
> +{
> +	const uuid_t *id_table;
> +
> +	for (id_table = dr->id_table; !uuid_is_null(*id_table); ++id_table) {
> +		char guid[UUID_BUF_SZ];
> +
> +		/* skip devices not assocaited with this device class */
> +		if (uuid_compare(*id_table, dev->class_id) != 0)
> +			continue;
> +
> +		uuid_unparse(dev->device_id, guid);
> +		RTE_LOG(INFO, EAL, "VMBUS device %s on NUMA socket %i\n",
> +			guid, dev->device.numa_node);
> +
> +		RTE_LOG(DEBUG, EAL, "  remove driver: %s\n", dr->driver.name);
> +
> +		if (dr->remove && (dr->remove(dev) < 0))
> +			return -1;	/* negative value is an error */
> +
> +		/* clear driver structure */
> +		dev->driver = NULL;
> +
> +		vmbus_uio_unmap_resource(dev);
> +		return 0;
> +	}
> +
> +	/* return positive value if driver doesn't support this device */
> +	return 1;
> +}
> +
> +/*
> + * call the devinit() function of all
> + * registered drivers for the vmbus device. Return -1 if no driver is
> + * found for this class of vmbus device.
> + * The present assumption is that we have drivers only for vmbus network
> + * devices. That's why we don't check driver's id_table now.
> + */
> +static int
> +vmbus_probe_all_drivers(struct rte_vmbus_device *dev)
> +{
> +	struct rte_vmbus_driver *dr = NULL;
> +	int ret;
> +
> +	TAILQ_FOREACH(dr, &vmbus_driver_list, next) {
> +		ret = rte_eal_vmbus_probe_one_driver(dr, dev);
> +		if (ret < 0) {
> +			/* negative value is an error */
> +			RTE_LOG(ERR, EAL, "Failed to probe driver %s\n",
> +				dr->driver.name);
> +			return -1;
> +		}
> +		/* positive value means driver doesn't support it */
> +		if (ret > 0)
> +			continue;
> +
> +		return 0;
> +	}
> +
> +	return 1;
> +}
> +
> +
> +/*
> + * If device ID matches, call the remove() function of all
> + * registered driver for the given device. Return -1 if initialization
> + * failed, return 1 if no driver is found for this device.
> + */
> +static int
> +vmbus_detach_all_drivers(struct rte_vmbus_device *dev)
> +{
> +	struct rte_vmbus_driver *dr;
> +	int rc = 0;
> +
> +	if (dev == NULL)
> +		return -1;
> +
> +	TAILQ_FOREACH(dr, &vmbus_driver_list, next) {
> +		rc = vmbus_detach_dev(dr, dev);
> +		if (rc < 0)
> +			/* negative value is an error */
> +			return -1;
> +		if (rc > 0)
> +			/* positive value means driver doesn't support it */
> +			continue;
> +		return 0;
> +	}
> +	return 1;
> +}
> +
> +/* Detach device specified by its VMBUS id */
> +int
> +rte_eal_vmbus_detach(uuid_t device_id)
> +{
> +	struct rte_vmbus_device *dev;
> +	char ubuf[UUID_BUF_SZ];
> +
> +	TAILQ_FOREACH(dev, &vmbus_device_list, next) {
> +		if (uuid_compare(dev->device_id, device_id) != 0)
> +			continue;
> +
> +		if (vmbus_detach_all_drivers(dev) < 0)
> +			goto err_return;
> +
> +		TAILQ_REMOVE(&vmbus_device_list, dev, next);
> +		free(dev);
> +		return 0;
> +	}
> +	return -1;
> +
> +err_return:
> +	uuid_unparse(device_id, ubuf);
> +	RTE_LOG(WARNING, EAL, "Requested device %s cannot be used\n",
> +		ubuf);
> +	return -1;
> +}
> +
> +/*
> + * Scan the vmbus, and call the devinit() function for
> + * all registered drivers that have a matching entry in its id_table
> + * for discovered devices.
> + */
> +int
> +rte_eal_vmbus_probe(void)
> +{
> +	struct rte_vmbus_device *dev = NULL;
> +
> +	TAILQ_FOREACH(dev, &vmbus_device_list, next) {
> +		char ubuf[UUID_BUF_SZ];
> +
> +		uuid_unparse(dev->device_id, ubuf);
> +
> +		RTE_LOG(DEBUG, EAL, "Probing driver for device %s ...\n",
> +			ubuf);
> +		vmbus_probe_all_drivers(dev);
> +	}
> +	return 0;
> +}
> +
> +/* register vmbus driver */
> +void
> +rte_eal_vmbus_register(struct rte_vmbus_driver *driver)
> +{
> +	TAILQ_INSERT_TAIL(&vmbus_driver_list, driver, next);
> +}
> +
> +/* unregister vmbus driver */
> +void
> +rte_eal_vmbus_unregister(struct rte_vmbus_driver *driver)
> +{
> +	TAILQ_REMOVE(&vmbus_driver_list, driver, next);
> +}
> diff --git a/lib/librte_ether/rte_ethdev.c b/lib/librte_ether/rte_ethdev.c
> index 1e0f206..6298a8d 100644
> --- a/lib/librte_ether/rte_ethdev.c
> +++ b/lib/librte_ether/rte_ethdev.c
> @@ -3282,3 +3282,93 @@ rte_eth_dev_l2_tunnel_offload_set(uint8_t port_id,
>  				-ENOTSUP);
>  	return (*dev->dev_ops->l2_tunnel_offload_set)(dev, l2_tunnel, mask, en);
>  }
> +
> +
> +#ifdef RTE_LIBRTE_HV_PMD
> +int
> +rte_eth_dev_vmbus_probe(struct rte_vmbus_driver *vmbus_drv,
> +			struct rte_vmbus_device *vmbus_dev)
> +{
> +	struct eth_driver  *eth_drv = (struct eth_driver *)vmbus_drv;
> +	struct rte_eth_dev *eth_dev;
> +	char ustr[UUID_BUF_SZ];
> +	int diag;
> +
> +	uuid_unparse(vmbus_dev->device_id, ustr);
> +
> +	eth_dev = rte_eth_dev_allocate(ustr);
> +	if (eth_dev == NULL)
> +		return -ENOMEM;
> +
> +	if (rte_eal_process_type() == RTE_PROC_PRIMARY) {
> +		eth_dev->data->dev_private = rte_zmalloc("ethdev private structure",
> +				  eth_drv->dev_private_size,
> +				  RTE_CACHE_LINE_SIZE);
> +		if (eth_dev->data->dev_private == NULL)
> +			rte_panic("Cannot allocate memzone for private port data\n");
> +	}
> +
> +	eth_dev->vmbus_dev = vmbus_dev;
> +	eth_dev->driver = eth_drv;
> +	eth_dev->data->rx_mbuf_alloc_failed = 0;
> +
> +	/* init user callbacks */
> +	TAILQ_INIT(&(eth_dev->link_intr_cbs));
> +
> +	/*
> +	 * Set the default maximum frame size.
> +	 */
> +	eth_dev->data->mtu = ETHER_MTU;
> +
> +	/* Invoke PMD device initialization function */
> +	diag = (*eth_drv->eth_dev_init)(eth_dev);
> +	if (diag == 0)
> +		return 0;
> +
> +	RTE_PMD_DEBUG_TRACE("driver %s: eth_dev_init(%s) failed\n",
> +			    vmbus_drv->driver.name, ustr);
> +
> +	if (rte_eal_process_type() == RTE_PROC_PRIMARY)
> +		rte_free(eth_dev->data->dev_private);
> +
> +	return diag;
> +}
> +
> +int
> +rte_eth_dev_vmbus_remove(struct rte_vmbus_device *vmbus_dev)
> +{
> +	const struct eth_driver *eth_drv;
> +	struct rte_eth_dev *eth_dev;
> +	char ustr[UUID_BUF_SZ];
> +	int ret;
> +
> +	if (vmbus_dev == NULL)
> +		return -EINVAL;
> +
> +	uuid_unparse(vmbus_dev->device_id, ustr);
> +	eth_dev = rte_eth_dev_allocated(ustr);
> +	if (eth_dev == NULL)
> +		return -ENODEV;
> +
> +	eth_drv = (const struct eth_driver *)vmbus_dev->driver;
> +
> +	/* Invoke PMD device uninit function */
> +	if (*eth_drv->eth_dev_uninit) {
> +		ret = (*eth_drv->eth_dev_uninit)(eth_dev);
> +		if (ret)
> +			return ret;
> +	}
> +
> +	/* free ether device */
> +	rte_eth_dev_release_port(eth_dev);
> +
> +	if (rte_eal_process_type() == RTE_PROC_PRIMARY)
> +		rte_free(eth_dev->data->dev_private);
> +
> +	eth_dev->pci_dev = NULL;
> +	eth_dev->driver = NULL;
> +	eth_dev->data = NULL;
> +
> +	return 0;
> +}
> +#endif
> diff --git a/lib/librte_ether/rte_ethdev.h b/lib/librte_ether/rte_ethdev.h
> index 3c85e33..5050087 100644
> --- a/lib/librte_ether/rte_ethdev.h
> +++ b/lib/librte_ether/rte_ethdev.h
> @@ -180,6 +180,7 @@ extern "C" {
>  #include <rte_log.h>
>  #include <rte_interrupts.h>
>  #include <rte_pci.h>
> +#include <rte_vmbus.h>
>  #include <rte_dev.h>
>  #include <rte_devargs.h>
>  #include "rte_ether.h"
> @@ -1628,7 +1629,11 @@ struct rte_eth_dev {
>  	struct rte_eth_dev_data *data;  /**< Pointer to device data */
>  	const struct eth_driver *driver;/**< Driver for this device */
>  	const struct eth_dev_ops *dev_ops; /**< Functions exported by PMD */
> -	struct rte_pci_device *pci_dev; /**< PCI info. supplied by probing */
> +	union {
> +		struct rte_pci_device *pci_dev; /**< PCI info. */
> +		struct rte_vmbus_device *vmbus_dev; /**< VMBUS info. */
> +	};
> +
>  	/** User application callbacks for NIC interrupts */
>  	struct rte_eth_dev_cb_list link_intr_cbs;
>  	/**
> @@ -1866,7 +1871,11 @@ typedef int (*eth_dev_uninit_t)(struct rte_eth_dev *eth_dev);
>   * - The size of the private data to allocate for each matching device.
>   */
>  struct eth_driver {
> -	struct rte_pci_driver pci_drv;    /**< The PMD is also a PCI driver. */
> +	union {
> +		struct rte_pci_driver pci_drv;    /**< The PMD PCI driver. */
> +		struct rte_vmbus_driver vmbus_drv;/**< The PMD VMBUS drv. */
> +	};
> +
>  	eth_dev_init_t eth_dev_init;      /**< Device init function. */
>  	eth_dev_uninit_t eth_dev_uninit;  /**< Device uninit function. */
>  	unsigned int dev_private_size;    /**< Size of device private data. */

It is not a scale-able model where we have to change eth_driver/eth_dev 
for every new device type, other than PCI. Maybe VMBus is _very_ close 
to PCI so no changes are required in PCI layer (common, linuxapp, 
bsdapp) - but, for others it won't stop there.

At the least, rte_pci_driver/rte_pci_device should be removed from 
eth_driver & rte_eth_dev, respectively - relying on rte_driver and 
rte_device.

This is the primary reason work on the SoC patchset and now the new Bus 
model is being done.

> @@ -4383,6 +4392,21 @@ int rte_eth_dev_pci_probe(struct rte_pci_driver *pci_drv,
>   */
>  int rte_eth_dev_pci_remove(struct rte_pci_device *pci_dev);
>
> +/**
> + * @internal
> + * Wrapper for use by vmbus drivers as a .probe function to attach to a ethdev
> + * interface.
> + */
> +int rte_eth_dev_vmbus_probe(struct rte_vmbus_driver *vmbus_drv,
> +			  struct rte_vmbus_device *vmbus_dev);
> +
> +/**
> + * @internal
> + * Wrapper for use by vmbus drivers as a .remove function to detach a ethdev
> + * interface.
> + */
> +int rte_eth_dev_vmbus_remove(struct rte_vmbus_device *vmbus_dev);
> +
>  #ifdef __cplusplus
>  }
>  #endif
> diff --git a/mk/rte.app.mk b/mk/rte.app.mk
> index f75f0e2..6b30408 100644
> --- a/mk/rte.app.mk
> +++ b/mk/rte.app.mk
> @@ -130,6 +130,7 @@ ifeq ($(CONFIG_RTE_LIBRTE_VHOST),y)
>  _LDLIBS-$(CONFIG_RTE_LIBRTE_PMD_VHOST)      += -lrte_pmd_vhost
>  endif # $(CONFIG_RTE_LIBRTE_VHOST)
>  _LDLIBS-$(CONFIG_RTE_LIBRTE_VMXNET3_PMD)    += -lrte_pmd_vmxnet3_uio
> +_LDLIBS-$(CONFIG_RTE_LIBRTE_HV_PMD)	    += -luuid
>
>  ifeq ($(CONFIG_RTE_LIBRTE_CRYPTODEV),y)
>  _LDLIBS-$(CONFIG_RTE_LIBRTE_PMD_AESNI_MB)    += -lrte_pmd_aesni_mb
>

-
Shreyansh
  
Stephen Hemminger Dec. 15, 2016, 5:26 p.m. UTC | #2
On Thu, 15 Dec 2016 12:19:44 +0530
Shreyansh Jain <shreyansh.jain@nxp.com> wrote:

> > @@ -1866,7 +1871,11 @@ typedef int (*eth_dev_uninit_t)(struct rte_eth_dev *eth_dev);
> >   * - The size of the private data to allocate for each matching device.
> >   */
> >  struct eth_driver {
> > -	struct rte_pci_driver pci_drv;    /**< The PMD is also a PCI driver. */
> > +	union {
> > +		struct rte_pci_driver pci_drv;    /**< The PMD PCI driver. */
> > +		struct rte_vmbus_driver vmbus_drv;/**< The PMD VMBUS drv. */
> > +	};
> > +
> >  	eth_dev_init_t eth_dev_init;      /**< Device init function. */
> >  	eth_dev_uninit_t eth_dev_uninit;  /**< Device uninit function. */
> >  	unsigned int dev_private_size;    /**< Size of device private data. */  
> 
> It is not a scale-able model where we have to change eth_driver/eth_dev 
> for every new device type, other than PCI. Maybe VMBus is _very_ close 
> to PCI so no changes are required in PCI layer (common, linuxapp, 
> bsdapp) - but, for others it won't stop there.
> 
> At the least, rte_pci_driver/rte_pci_device should be removed from 
> eth_driver & rte_eth_dev, respectively - relying on rte_driver and 
> rte_device.
> 
> This is the primary reason work on the SoC patchset and now the new Bus 
> model is being done.

Agreed. the better long term model is to use C style inheritance where
rte_pci_driver has eth_driver inside. 
The other alternative is to make the second element an opaque pointer.

But that was too big a change, and not necessary to get VMBUS to work.
Longer term refactoring will take more effort. Go ahead and address it
with a better bus model, but that probably isn't going to be ready for
a couple of releases.
  
Thomas Monjalon Dec. 16, 2016, 6:09 p.m. UTC | #3
2016-12-15 09:26, Stephen Hemminger:
> On Thu, 15 Dec 2016 12:19:44 +0530
> Shreyansh Jain <shreyansh.jain@nxp.com> wrote:
> > It is not a scale-able model where we have to change eth_driver/eth_dev 
> > for every new device type, other than PCI. Maybe VMBus is _very_ close 
> > to PCI so no changes are required in PCI layer (common, linuxapp, 
> > bsdapp) - but, for others it won't stop there.
> > 
> > At the least, rte_pci_driver/rte_pci_device should be removed from 
> > eth_driver & rte_eth_dev, respectively - relying on rte_driver and 
> > rte_device.
> > 
> > This is the primary reason work on the SoC patchset and now the new Bus 
> > model is being done.
> 
> Agreed. the better long term model is to use C style inheritance where
> rte_pci_driver has eth_driver inside. 
> The other alternative is to make the second element an opaque pointer.
> 
> But that was too big a change, and not necessary to get VMBUS to work.
> Longer term refactoring will take more effort. Go ahead and address it
> with a better bus model, but that probably isn't going to be ready for
> a couple of releases.

We'll consider only the approach of generalizing the bus model for integr	ation.
Stephen, you are welcome to help make it happen and rebase your work
on top of this new model.
Thanks
  
Stephen Hemminger Dec. 16, 2016, 8:15 p.m. UTC | #4
On Fri, 16 Dec 2016 19:09:02 +0100
Thomas Monjalon <thomas.monjalon@6wind.com> wrote:

> 2016-12-15 09:26, Stephen Hemminger:
> > On Thu, 15 Dec 2016 12:19:44 +0530
> > Shreyansh Jain <shreyansh.jain@nxp.com> wrote:  
> > > It is not a scale-able model where we have to change eth_driver/eth_dev 
> > > for every new device type, other than PCI. Maybe VMBus is _very_ close 
> > > to PCI so no changes are required in PCI layer (common, linuxapp, 
> > > bsdapp) - but, for others it won't stop there.
> > > 
> > > At the least, rte_pci_driver/rte_pci_device should be removed from 
> > > eth_driver & rte_eth_dev, respectively - relying on rte_driver and 
> > > rte_device.
> > > 
> > > This is the primary reason work on the SoC patchset and now the new Bus 
> > > model is being done.  
> > 
> > Agreed. the better long term model is to use C style inheritance where
> > rte_pci_driver has eth_driver inside. 
> > The other alternative is to make the second element an opaque pointer.
> > 
> > But that was too big a change, and not necessary to get VMBUS to work.
> > Longer term refactoring will take more effort. Go ahead and address it
> > with a better bus model, but that probably isn't going to be ready for
> > a couple of releases.  
> 
> We'll consider only the approach of generalizing the bus model for integr	ation.
> Stephen, you are welcome to help make it happen and rebase your work
> on top of this new model.
> Thanks

I will generalize it to PCI and VMBUS only. I am not inventing a generic SOC
model since that is something that I don't have sufficient knowledge. This
fits the YAGNI principle.
  
Thomas Monjalon Dec. 17, 2016, 9:17 a.m. UTC | #5
2016-12-16 12:15, Stephen Hemminger:
> On Fri, 16 Dec 2016 19:09:02 +0100
> Thomas Monjalon <thomas.monjalon@6wind.com> wrote:
> 
> > 2016-12-15 09:26, Stephen Hemminger:
> > > On Thu, 15 Dec 2016 12:19:44 +0530
> > > Shreyansh Jain <shreyansh.jain@nxp.com> wrote:  
> > > > It is not a scale-able model where we have to change eth_driver/eth_dev 
> > > > for every new device type, other than PCI. Maybe VMBus is _very_ close 
> > > > to PCI so no changes are required in PCI layer (common, linuxapp, 
> > > > bsdapp) - but, for others it won't stop there.
> > > > 
> > > > At the least, rte_pci_driver/rte_pci_device should be removed from 
> > > > eth_driver & rte_eth_dev, respectively - relying on rte_driver and 
> > > > rte_device.
> > > > 
> > > > This is the primary reason work on the SoC patchset and now the new Bus 
> > > > model is being done.  
> > > 
> > > Agreed. the better long term model is to use C style inheritance where
> > > rte_pci_driver has eth_driver inside. 
> > > The other alternative is to make the second element an opaque pointer.
> > > 
> > > But that was too big a change, and not necessary to get VMBUS to work.
> > > Longer term refactoring will take more effort. Go ahead and address it
> > > with a better bus model, but that probably isn't going to be ready for
> > > a couple of releases.  
> > 
> > We'll consider only the approach of generalizing the bus model for integr	ation.
> > Stephen, you are welcome to help make it happen and rebase your work
> > on top of this new model.
> > Thanks
> 
> I will generalize it to PCI and VMBUS only. I am not inventing a generic SOC
> model since that is something that I don't have sufficient knowledge. This
> fits the YAGNI principle. 

There is already a work in progress to generalize bus handling. It is not
specific to SoC design. It is just a better design to add new buses.
  

Patch

diff --git a/lib/librte_eal/common/Makefile b/lib/librte_eal/common/Makefile
index a92c984..9254bae 100644
--- a/lib/librte_eal/common/Makefile
+++ b/lib/librte_eal/common/Makefile
@@ -33,7 +33,7 @@  include $(RTE_SDK)/mk/rte.vars.mk
 
 INC := rte_branch_prediction.h rte_common.h
 INC += rte_debug.h rte_eal.h rte_errno.h rte_launch.h rte_lcore.h
-INC += rte_log.h rte_memory.h rte_memzone.h rte_pci.h
+INC += rte_log.h rte_memory.h rte_memzone.h rte_pci.h rte_vmbus.h
 INC += rte_per_lcore.h rte_random.h
 INC += rte_tailq.h rte_interrupts.h rte_alarm.h
 INC += rte_string_fns.h rte_version.h
diff --git a/lib/librte_eal/common/eal_common_devargs.c b/lib/librte_eal/common/eal_common_devargs.c
index e403717..934ca84 100644
--- a/lib/librte_eal/common/eal_common_devargs.c
+++ b/lib/librte_eal/common/eal_common_devargs.c
@@ -113,6 +113,13 @@  rte_eal_devargs_add(enum rte_devtype devtype, const char *devargs_str)
 			goto fail;
 
 		break;
+	case RTE_DEVTYPE_WHITELISTED_VMBUS:
+	case RTE_DEVTYPE_BLACKLISTED_VMBUS:
+#ifdef RTE_LIBRTE_HV_PMD
+		if (uuid_parse(buf, devargs->uuid) == 0)
+			break;
+#endif
+		goto fail;
 	}
 
 	free(buf);
diff --git a/lib/librte_eal/common/eal_common_options.c b/lib/librte_eal/common/eal_common_options.c
index 6ca8af1..6aea87d 100644
--- a/lib/librte_eal/common/eal_common_options.c
+++ b/lib/librte_eal/common/eal_common_options.c
@@ -95,6 +95,11 @@  eal_long_options[] = {
 	{OPT_VFIO_INTR,         1, NULL, OPT_VFIO_INTR_NUM        },
 	{OPT_VMWARE_TSC_MAP,    0, NULL, OPT_VMWARE_TSC_MAP_NUM   },
 	{OPT_XEN_DOM0,          0, NULL, OPT_XEN_DOM0_NUM         },
+#ifdef RTE_LIBRTE_HV_PMD
+	{OPT_NO_VMBUS,          0, NULL, OPT_NO_VMBUS_NUM         },
+	{OPT_VMBUS_BLACKLIST,   1, NULL, OPT_VMBUS_BLACKLIST_NUM  },
+	{OPT_VMBUS_WHITELIST,   1, NULL, OPT_VMBUS_WHITELIST_NUM  },
+#endif
 	{0,                     0, NULL, 0                        }
 };
 
@@ -855,6 +860,21 @@  eal_parse_common_option(int opt, const char *optarg,
 		conf->no_pci = 1;
 		break;
 
+#ifdef RTE_LIBRTE_HV_PMD
+	case OPT_NO_VMBUS_NUM:
+		conf->no_vmbus = 1;
+		break;
+	case OPT_VMBUS_BLACKLIST_NUM:
+		if (rte_eal_devargs_add(RTE_DEVTYPE_BLACKLISTED_VMBUS,
+					optarg) < 0)
+			return -1;
+		break;
+	case OPT_VMBUS_WHITELIST_NUM:
+		if (rte_eal_devargs_add(RTE_DEVTYPE_WHITELISTED_VMBUS,
+				optarg) < 0)
+			return -1;
+		break;
+#endif
 	case OPT_NO_HPET_NUM:
 		conf->no_hpet = 1;
 		break;
@@ -987,6 +1007,14 @@  eal_check_common_options(struct internal_config *internal_cfg)
 		return -1;
 	}
 
+#ifdef RTE_LIBRTE_HV_PMD
+	if (rte_eal_devargs_type_count(RTE_DEVTYPE_WHITELISTED_VMBUS) != 0 &&
+		rte_eal_devargs_type_count(RTE_DEVTYPE_BLACKLISTED_VMBUS) != 0) {
+		RTE_LOG(ERR, EAL, "Options vmbus blacklist and whitelist "
+			"cannot be used at the same time\n");
+		return -1;
+	}
+#endif
 	return 0;
 }
 
@@ -1036,5 +1064,15 @@  eal_common_usage(void)
 	       "  --"OPT_NO_PCI"            Disable PCI\n"
 	       "  --"OPT_NO_HPET"           Disable HPET\n"
 	       "  --"OPT_NO_SHCONF"         No shared config (mmap'd files)\n"
+#ifdef RTE_LIBRTE_HV_PMD
+	       "  --"OPT_NO_VMBUS"          Disable VMBUS\n"
+	       "  --"OPT_VMBUS_BLACKLIST" Add a VMBUS device to black list.\n"
+	       "                      Prevent EAL from using this PCI device. The argument\n"
+	       "                      format is device UUID.\n"
+	       "  --"OPT_VMBUS_WHITELIST" Add a VMBUS device to white list.\n"
+	       "                      Only use the specified VMBUS devices. The argument format\n"
+	       "                      is device UUID This option can be present\n"
+	       "                      several times (once per device).\n"
+#endif
 	       "\n", RTE_MAX_LCORE);
 }
diff --git a/lib/librte_eal/common/eal_internal_cfg.h b/lib/librte_eal/common/eal_internal_cfg.h
index 5f1367e..1827194 100644
--- a/lib/librte_eal/common/eal_internal_cfg.h
+++ b/lib/librte_eal/common/eal_internal_cfg.h
@@ -69,7 +69,8 @@  struct internal_config {
 	volatile unsigned no_pci;         /**< true to disable PCI */
 	volatile unsigned no_hpet;        /**< true to disable HPET */
 	volatile unsigned vmware_tsc_map; /**< true to use VMware TSC mapping
-										* instead of native TSC */
+					   * instead of native TSC */
+	volatile unsigned no_vmbus;       /**< true to disable VMBUS */
 	volatile unsigned no_shconf;      /**< true if there is no shared config */
 	volatile unsigned create_uio_dev; /**< true to create /dev/uioX devices */
 	volatile enum rte_proc_type_t process_type; /**< multi-process proc type */
diff --git a/lib/librte_eal/common/eal_options.h b/lib/librte_eal/common/eal_options.h
index a881c62..156727e 100644
--- a/lib/librte_eal/common/eal_options.h
+++ b/lib/librte_eal/common/eal_options.h
@@ -83,6 +83,12 @@  enum {
 	OPT_VMWARE_TSC_MAP_NUM,
 #define OPT_XEN_DOM0          "xen-dom0"
 	OPT_XEN_DOM0_NUM,
+#define OPT_NO_VMBUS          "no-vmbus"
+	OPT_NO_VMBUS_NUM,
+#define OPT_VMBUS_BLACKLIST   "vmbus-blacklist"
+	OPT_VMBUS_BLACKLIST_NUM,
+#define OPT_VMBUS_WHITELIST   "vmbus-whitelist"
+	OPT_VMBUS_WHITELIST_NUM,
 	OPT_LONG_MAX_NUM
 };
 
diff --git a/lib/librte_eal/common/eal_private.h b/lib/librte_eal/common/eal_private.h
index 9e7d8f6..c856c63 100644
--- a/lib/librte_eal/common/eal_private.h
+++ b/lib/librte_eal/common/eal_private.h
@@ -210,6 +210,11 @@  int pci_uio_map_resource_by_index(struct rte_pci_device *dev, int res_idx,
 		struct mapped_pci_resource *uio_res, int map_idx);
 
 /**
+ * VMBUS related functions and structures
+ */
+int rte_eal_vmbus_init(void);
+
+/**
  * Init tail queues for non-EAL library structures. This is to allow
  * the rings, mempools, etc. lists to be shared among multiple processes
  *
diff --git a/lib/librte_eal/common/include/rte_devargs.h b/lib/librte_eal/common/include/rte_devargs.h
index 88120a1..c079d28 100644
--- a/lib/librte_eal/common/include/rte_devargs.h
+++ b/lib/librte_eal/common/include/rte_devargs.h
@@ -51,6 +51,9 @@  extern "C" {
 #include <stdio.h>
 #include <sys/queue.h>
 #include <rte_pci.h>
+#ifdef RTE_LIBRTE_HV_PMD
+#include <uuid/uuid.h>
+#endif
 
 /**
  * Type of generic device
@@ -59,6 +62,8 @@  enum rte_devtype {
 	RTE_DEVTYPE_WHITELISTED_PCI,
 	RTE_DEVTYPE_BLACKLISTED_PCI,
 	RTE_DEVTYPE_VIRTUAL,
+	RTE_DEVTYPE_WHITELISTED_VMBUS,
+	RTE_DEVTYPE_BLACKLISTED_VMBUS,
 };
 
 /**
@@ -88,6 +93,9 @@  struct rte_devargs {
 			/** Driver name. */
 			char drv_name[32];
 		} virt;
+#ifdef RTE_LIBRTE_HV_PMD
+		uuid_t uuid;
+#endif
 	};
 	/** Arguments string as given by user or "" for no argument. */
 	char *args;
diff --git a/lib/librte_eal/common/include/rte_vmbus.h b/lib/librte_eal/common/include/rte_vmbus.h
new file mode 100644
index 0000000..8540539
--- /dev/null
+++ b/lib/librte_eal/common/include/rte_vmbus.h
@@ -0,0 +1,247 @@ 
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright(c) 2013-2016 Brocade Communications Systems, Inc.
+ *   Copyright(c) 2016 Microsoft Corporation
+ *   All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of Intel Corporation nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ */
+
+#ifndef _RTE_VMBUS_H_
+#define _RTE_VMBUS_H_
+
+/**
+ * @file
+ *
+ * RTE VMBUS Interface
+ */
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <limits.h>
+#include <errno.h>
+#include <uuid/uuid.h>
+#include <sys/queue.h>
+#include <stdint.h>
+#include <inttypes.h>
+
+#include <rte_debug.h>
+#include <rte_interrupts.h>
+#include <rte_dev.h>
+
+TAILQ_HEAD(vmbus_device_list, rte_vmbus_device);
+TAILQ_HEAD(vmbus_driver_list, rte_vmbus_driver);
+
+extern struct vmbus_driver_list vmbus_driver_list;
+extern struct vmbus_device_list vmbus_device_list;
+
+/** Pathname of VMBUS devices directory. */
+#define SYSFS_VMBUS_DEVICES "/sys/bus/vmbus/devices"
+
+#define UUID_BUF_SZ	(36 + 1)
+	
+
+/** Maximum number of VMBUS resources. */
+#define VMBUS_MAX_RESOURCE 7
+
+/**
+ * A structure describing a VMBUS device.
+ */
+struct rte_vmbus_device {
+	TAILQ_ENTRY(rte_vmbus_device) next;     /**< Next probed VMBUS device. */
+	struct rte_device device;               /**< Inherit core device */
+	uuid_t device_id;			/**< VMBUS device id */
+	uuid_t class_id;			/**< VMBUS device type */
+	uint32_t relid;				/**< VMBUS id for notification */
+	uint8_t	monitor_id;
+	struct rte_intr_handle intr_handle;     /**< Interrupt handle */
+	const struct rte_vmbus_driver *driver;  /**< Associated driver */
+
+	struct rte_mem_resource mem_resource[VMBUS_MAX_RESOURCE];
+						/**< VMBUS Memory Resource */
+	char sysfs_name[];			/**< Name in sysfs bus directory */
+};
+
+struct rte_vmbus_driver;
+
+/**
+ * Initialisation function for the driver called during VMBUS probing.
+ */
+typedef int (vmbus_probe_t)(struct rte_vmbus_driver *, struct rte_vmbus_device *);
+
+/**
+ * Uninitialisation function for the driver called during hotplugging.
+ */
+typedef int (vmbus_remove_t)(struct rte_vmbus_device *);
+
+/**
+ * A structure describing a VMBUS driver.
+ */
+struct rte_vmbus_driver {
+	TAILQ_ENTRY(rte_vmbus_driver) next;     /**< Next in list. */
+	struct rte_driver driver;
+	vmbus_probe_t *probe;                   /**< Device Probe function. */
+	vmbus_remove_t *remove;                 /**< Device Remove function. */
+
+	const uuid_t *id_table;			/**< ID table, NULL terminated. */
+};
+
+struct vmbus_map {
+	void *addr;
+	char *path;
+	uint64_t offset;
+	uint64_t size;
+	uint64_t phaddr;
+};
+
+/*
+ * For multi-process we need to reproduce all vmbus mappings in secondary
+ * processes, so save them in a tailq.
+ */
+struct mapped_vmbus_resource {
+	TAILQ_ENTRY(mapped_vmbus_resource) next;
+
+	uuid_t uuid;
+	char path[PATH_MAX];
+	int nb_maps;
+	struct vmbus_map maps[VMBUS_MAX_RESOURCE];
+};
+
+TAILQ_HEAD(mapped_vmbus_res_list, mapped_vmbus_resource);
+
+/**
+ * Scan the content of the VMBUS bus, and the devices in the devices list
+ *
+ * @return
+ *  0 on success, negative on error
+ */
+int rte_eal_vmbus_scan(void);
+
+/**
+ * Probe the VMBUS bus for registered drivers.
+ *
+ * Scan the content of the VMBUS bus, and call the probe() function for
+ * all registered drivers that have a matching entry in its id_table
+ * for discovered devices.
+ *
+ * @return
+ *   - 0 on success.
+ *   - Negative on error.
+ */
+int rte_eal_vmbus_probe(void);
+
+/**
+ * Map the VMBUS device resources in user space virtual memory address
+ *
+ * @param dev
+ *   A pointer to a rte_vmbus_device structure describing the device
+ *   to use
+ *
+ * @return
+ *   0 on success, negative on error and positive if no driver
+ *   is found for the device.
+ */
+int rte_eal_vmbus_map_device(struct rte_vmbus_device *dev);
+
+/**
+ * Unmap this device
+ *
+ * @param dev
+ *   A pointer to a rte_vmbus_device structure describing the device
+ *   to use
+ */
+void rte_eal_vmbus_unmap_device(struct rte_vmbus_device *dev);
+
+/**
+ * Probe the single VMBUS device.
+ *
+ * Scan the content of the VMBUS bus, and find the vmbus device
+ * specified by device uuid, then call the probe() function for
+ * registered driver that has a matching entry in its id_table for
+ * discovered device.
+ *
+ * @param id
+ * 	The VMBUS device uuid.
+ * @return
+ *   - 0 on success.
+ *   - Negative on error.
+ */
+int rte_eal_vmbus_probe_one(uuid_t id);
+
+/**
+ * Close the single VMBUS device.
+ *
+ * Scan the content of the VMBUS bus, and find the vmbus device id,
+ * then call the remove() function for registered driver that has a
+ * matching entry in its id_table for discovered device.
+ *
+ * @param id
+ * 	The VMBUS device uuid.
+ * @return
+ *   - 0 on success.
+ *   - Negative on error.
+ */
+int rte_eal_vmbus_detach(uuid_t id);
+
+/**
+ * Register a VMBUS driver.
+ *
+ * @param driver
+ *   A pointer to a rte_vmbus_driver structure describing the driver
+ *   to be registered.
+ */
+void rte_eal_vmbus_register(struct rte_vmbus_driver *driver);
+
+/** Helper for VMBUS device registration from driver nstance */
+#define RTE_PMD_REGISTER_VMBUS(nm, vmbus_drv) \
+RTE_INIT(vmbusinitfn_ ##nm); \
+static void vmbusinitfn_ ##nm(void) \
+{\
+	(vmbus_drv).driver.name = RTE_STR(nm);\
+	rte_eal_vmbus_register(&vmbus_drv); \
+} \
+RTE_PMD_EXPORT_NAME(nm, __COUNTER__)
+
+/**
+ * Unregister a VMBUS driver.
+ *
+ * @param driver
+ *   A pointer to a rte_vmbus_driver structure describing the driver
+ *   to be unregistered.
+ */
+void rte_eal_vmbus_unregister(struct rte_vmbus_driver *driver);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _RTE_VMBUS_H_ */
diff --git a/lib/librte_eal/linuxapp/eal/Makefile b/lib/librte_eal/linuxapp/eal/Makefile
index 4e206f0..f6ca384 100644
--- a/lib/librte_eal/linuxapp/eal/Makefile
+++ b/lib/librte_eal/linuxapp/eal/Makefile
@@ -71,6 +71,11 @@  SRCS-$(CONFIG_RTE_EXEC_ENV_LINUXAPP) += eal_timer.c
 SRCS-$(CONFIG_RTE_EXEC_ENV_LINUXAPP) += eal_interrupts.c
 SRCS-$(CONFIG_RTE_EXEC_ENV_LINUXAPP) += eal_alarm.c
 
+ifeq ($(CONFIG_RTE_LIBRTE_HV_PMD),y)
+SRCS-$(CONFIG_RTE_EXEC_ENV_LINUXAPP) += eal_vmbus.c
+LDLIBS += -luuid
+endif
+
 # from common dir
 SRCS-$(CONFIG_RTE_EXEC_ENV_LINUXAPP) += eal_common_lcore.c
 SRCS-$(CONFIG_RTE_EXEC_ENV_LINUXAPP) += eal_common_timer.c
@@ -114,6 +119,7 @@  CFLAGS_eal_hugepage_info.o := -D_GNU_SOURCE
 CFLAGS_eal_pci.o := -D_GNU_SOURCE
 CFLAGS_eal_pci_uio.o := -D_GNU_SOURCE
 CFLAGS_eal_pci_vfio.o := -D_GNU_SOURCE
+CFLAGS_eal_vmbux.o := -D_GNU_SOURCE
 CFLAGS_eal_common_whitelist.o := -D_GNU_SOURCE
 CFLAGS_eal_common_options.o := -D_GNU_SOURCE
 CFLAGS_eal_common_thread.o := -D_GNU_SOURCE
diff --git a/lib/librte_eal/linuxapp/eal/eal.c b/lib/librte_eal/linuxapp/eal/eal.c
index 2075282..71083ec 100644
--- a/lib/librte_eal/linuxapp/eal/eal.c
+++ b/lib/librte_eal/linuxapp/eal/eal.c
@@ -70,6 +70,7 @@ 
 #include <rte_cpuflags.h>
 #include <rte_interrupts.h>
 #include <rte_pci.h>
+#include <rte_vmbus.h>
 #include <rte_dev.h>
 #include <rte_devargs.h>
 #include <rte_common.h>
@@ -830,6 +831,11 @@  rte_eal_init(int argc, char **argv)
 
 	eal_check_mem_on_local_socket();
 
+#ifdef RTE_LIBRTE_HV_PMD
+	if (rte_eal_vmbus_init() < 0)
+		RTE_LOG(ERR, EAL, "Cannot init VMBUS\n");
+#endif
+
 	if (eal_plugins_init() < 0)
 		rte_panic("Cannot init plugins\n");
 
@@ -887,6 +893,11 @@  rte_eal_init(int argc, char **argv)
 	if (rte_eal_pci_probe())
 		rte_panic("Cannot probe PCI\n");
 
+#ifdef RTE_LIBRTE_HV_PMD
+	if (rte_eal_vmbus_probe() < 0)
+		rte_panic("Cannot probe VMBUS\n");
+#endif
+
 	rte_eal_mcfg_complete();
 
 	return fctret;
diff --git a/lib/librte_eal/linuxapp/eal/eal_vmbus.c b/lib/librte_eal/linuxapp/eal/eal_vmbus.c
new file mode 100644
index 0000000..cbd8bd1
--- /dev/null
+++ b/lib/librte_eal/linuxapp/eal/eal_vmbus.c
@@ -0,0 +1,906 @@ 
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright(c) 2013-2016 Brocade Communications Systems, Inc.
+ *   Copyright(c) 2016 Microsoft Corporation
+ *   All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of Intel Corporation nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ */
+
+#include <string.h>
+#include <unistd.h>
+#include <dirent.h>
+#include <fcntl.h>
+#include <sys/mman.h>
+
+#include <rte_eal.h>
+#include <rte_tailq.h>
+#include <rte_log.h>
+#include <rte_devargs.h>
+#include <rte_vmbus.h>
+#include <rte_malloc.h>
+
+#include "eal_private.h"
+#include "eal_pci_init.h"
+#include "eal_filesystem.h"
+
+struct vmbus_driver_list vmbus_driver_list =
+	TAILQ_HEAD_INITIALIZER(vmbus_driver_list);
+struct vmbus_device_list vmbus_device_list =
+	TAILQ_HEAD_INITIALIZER(vmbus_device_list);
+
+static void *vmbus_map_addr;
+
+static struct rte_tailq_elem rte_vmbus_uio_tailq = {
+	.name = "UIO_RESOURCE_LIST",
+};
+EAL_REGISTER_TAILQ(rte_vmbus_uio_tailq);
+
+/*
+ * parse a sysfs file containing one integer value
+ * different to the eal version, as it needs to work with 64-bit values
+ */
+static int
+vmbus_get_sysfs_uuid(const char *filename, uuid_t uu)
+{
+	char buf[BUFSIZ];
+	char *cp = NULL;
+	FILE *f;
+
+	f = fopen(filename, "r");
+	if (f == NULL) {
+		RTE_LOG(ERR, EAL, "%s(): cannot open sysfs value %s\n",
+				__func__, filename);
+		return -1;
+	}
+
+	if (fgets(buf, sizeof(buf), f) == NULL) {
+		RTE_LOG(ERR, EAL, "%s(): cannot read sysfs value %s\n",
+				__func__, filename);
+		fclose(f);
+		return -1;
+	}
+	fclose(f);
+
+	cp = strchr(cp, '\n');
+	if (cp)
+		*cp = '\0';
+
+	/* strip { } notation */
+	if (buf[0] == '{' && (cp = strchr(buf, '}')))
+		*cp = '\0';
+
+	if (uuid_parse(buf, uu) < 0) {
+		RTE_LOG(ERR, EAL, "%s %s not a valid UUID\n",
+			filename, buf);
+		return -1;
+	}
+
+	return 0;
+}
+
+/* map a particular resource from a file */
+static void *
+vmbus_map_resource(void *requested_addr, int fd, off_t offset, size_t size,
+		   int flags)
+{
+	void *mapaddr;
+
+	/* Map the memory resource of device */
+	mapaddr = mmap(requested_addr, size, PROT_READ | PROT_WRITE,
+		       MAP_SHARED | flags, fd, offset);
+	if (mapaddr == MAP_FAILED ||
+	    (requested_addr != NULL && mapaddr != requested_addr)) {
+		RTE_LOG(ERR, EAL,
+			"%s(): cannot mmap(%d, %p, 0x%lx, 0x%lx): %s)\n",
+			__func__, fd, requested_addr,
+			(unsigned long)size, (unsigned long)offset,
+			strerror(errno));
+	} else
+		RTE_LOG(DEBUG, EAL, "  VMBUS memory mapped at %p\n", mapaddr);
+
+	return mapaddr;
+}
+
+/* unmap a particular resource */
+static void
+vmbus_unmap_resource(void *requested_addr, size_t size)
+{
+	if (requested_addr == NULL)
+		return;
+
+	/* Unmap the VMBUS memory resource of device */
+	if (munmap(requested_addr, size)) {
+		RTE_LOG(ERR, EAL, "%s(): cannot munmap(%p, 0x%lx): %s\n",
+			__func__, requested_addr, (unsigned long)size,
+			strerror(errno));
+	} else
+		RTE_LOG(DEBUG, EAL, "  VMBUS memory unmapped at %p\n",
+				requested_addr);
+}
+
+/* Only supports current kernel version
+ * Unlike PCI there is no option (or need) to create UIO device.
+ */
+static int vmbus_get_uio_dev(const char *name,
+			     char *dstbuf, size_t buflen)
+{
+	char dirname[PATH_MAX];
+	unsigned int uio_num;
+	struct dirent *e;
+	DIR *dir;
+
+	snprintf(dirname, sizeof(dirname),
+		 "/sys/bus/vmbus/devices/%s/uio", name);
+
+	dir = opendir(dirname);
+	if (dir == NULL) {
+		RTE_LOG(ERR, EAL, "Cannot map uio resources for %s: %s\n",
+			name, strerror(errno));
+		return -1;
+	}
+
+	/* take the first file starting with "uio" */
+	while ((e = readdir(dir)) != NULL) {
+		if (sscanf(e->d_name, "uio%u", &uio_num) != 1)
+			continue;
+
+		snprintf(dstbuf, buflen, "%s/uio%u", dirname, uio_num);
+		break;
+	}
+	closedir(dir);
+
+	return e ? (int) uio_num : -1;
+}
+
+/*
+ * parse a sysfs file containing one integer value
+ * different to the eal version, as it needs to work with 64-bit values
+ */
+static int
+vmbus_parse_sysfs_value(const char *dir, const char *name,
+			uint64_t *val)
+{
+	char filename[PATH_MAX];
+	FILE *f;
+	char buf[BUFSIZ];
+	char *end = NULL;
+
+	snprintf(filename, sizeof(filename), "%s/%s", dir, name);
+	f = fopen(filename, "r");
+	if (f == NULL) {
+		RTE_LOG(ERR, EAL, "%s(): cannot open sysfs value %s\n",
+				__func__, filename);
+		return -1;
+	}
+
+	if (fgets(buf, sizeof(buf), f) == NULL) {
+		RTE_LOG(ERR, EAL, "%s(): cannot read sysfs value %s\n",
+				__func__, filename);
+		fclose(f);
+		return -1;
+	}
+	fclose(f);
+
+	*val = strtoull(buf, &end, 0);
+	if ((buf[0] == '\0') || (end == NULL) || (*end != '\n')) {
+		RTE_LOG(ERR, EAL, "%s(): cannot parse sysfs value %s\n",
+				__func__, filename);
+		return -1;
+	}
+	return 0;
+}
+
+/* Get mappings out of values provided by uio */
+static int
+vmbus_uio_get_mappings(const char *uioname,
+		       struct vmbus_map maps[])
+{
+	int i;
+
+	for (i = 0; i != VMBUS_MAX_RESOURCE; i++) {
+		struct vmbus_map *map = &maps[i];
+		char dirname[PATH_MAX];
+
+		/* check if map directory exists */
+		snprintf(dirname, sizeof(dirname),
+			 "%s/maps/map%d", uioname, i);
+
+		if (access(dirname, F_OK) != 0)
+			break;
+
+		/* get mapping offset */
+		if (vmbus_parse_sysfs_value(dirname, "offset",
+					    &map->offset) < 0)
+			return -1;
+
+		/* get mapping size */
+		if (vmbus_parse_sysfs_value(dirname, "size",
+					    &map->size) < 0)
+			return -1;
+
+		/* get mapping physical address */
+		if (vmbus_parse_sysfs_value(dirname, "addr",
+					    &maps->phaddr) < 0)
+			return -1;
+	}
+
+	return i;
+}
+
+static void
+vmbus_uio_free_resource(struct rte_vmbus_device *dev,
+		struct mapped_vmbus_resource *uio_res)
+{
+	rte_free(uio_res);
+
+	if (dev->intr_handle.fd) {
+		close(dev->intr_handle.fd);
+		dev->intr_handle.fd = -1;
+		dev->intr_handle.type = RTE_INTR_HANDLE_UNKNOWN;
+	}
+}
+
+static struct mapped_vmbus_resource *
+vmbus_uio_alloc_resource(struct rte_vmbus_device *dev)
+{
+	struct mapped_vmbus_resource *uio_res;
+	char dirname[PATH_MAX], devname[PATH_MAX];
+	int uio_num, nb_maps;
+
+	uio_num = vmbus_get_uio_dev(dev->sysfs_name, dirname, sizeof(dirname));
+	if (uio_num < 0) {
+		RTE_LOG(WARNING, EAL,
+			"  %s not managed by UIO driver, skipping\n",
+			dev->sysfs_name);
+		return NULL;
+	}
+
+	/* allocate the mapping details for secondary processes*/
+	uio_res = rte_zmalloc("UIO_RES", sizeof(*uio_res), 0);
+	if (uio_res == NULL) {
+		RTE_LOG(ERR, EAL,
+			"%s(): cannot store uio mmap details\n", __func__);
+		goto error;
+	}
+
+	snprintf(devname, sizeof(devname), "/dev/uio%u", uio_num);
+	dev->intr_handle.fd = open(devname, O_RDWR);
+	if (dev->intr_handle.fd < 0) {
+		RTE_LOG(ERR, EAL, "Cannot open %s: %s\n",
+			devname, strerror(errno));
+		goto error;
+	}
+
+	dev->intr_handle.type = RTE_INTR_HANDLE_UIO_INTX;
+
+	snprintf(uio_res->path, sizeof(uio_res->path), "%s", devname);
+	uuid_copy(uio_res->uuid, dev->device_id);
+
+	nb_maps = vmbus_uio_get_mappings(dirname, uio_res->maps);
+	if (nb_maps < 0)
+		goto error;
+
+	RTE_LOG(DEBUG, EAL, "Found %d memory maps for device %s\n",
+		nb_maps, dev->sysfs_name);
+
+	return uio_res;
+
+ error:
+	vmbus_uio_free_resource(dev, uio_res);
+	return NULL;
+}
+
+static int
+vmbus_uio_map_resource_by_index(struct rte_vmbus_device *dev,
+				unsigned int res_idx,
+				struct mapped_vmbus_resource *uio_res,
+				unsigned int map_idx)
+{
+	struct vmbus_map *maps = uio_res->maps;
+	char devname[PATH_MAX];
+	void *mapaddr;
+	int fd;
+
+	snprintf(devname, sizeof(devname),
+		 "/sys/bus/vmbus/%s/resource%u", dev->sysfs_name, res_idx);
+
+	fd = open(devname, O_RDWR);
+	if (fd < 0) {
+		RTE_LOG(ERR, EAL, "Cannot open %s: %s\n",
+				devname, strerror(errno));
+		return -1;
+	}
+
+	/* allocate memory to keep path */
+	maps[map_idx].path = rte_malloc(NULL, strlen(devname) + 1, 0);
+	if (maps[map_idx].path == NULL) {
+		RTE_LOG(ERR, EAL, "Cannot allocate memory for path: %s\n",
+				strerror(errno));
+		return -1;
+	}
+
+	/* try mapping somewhere close to the end of hugepages */
+	if (vmbus_map_addr == NULL)
+		vmbus_map_addr = pci_find_max_end_va();
+
+	mapaddr = vmbus_map_resource(vmbus_map_addr, fd, 0,
+				     dev->mem_resource[res_idx].len, 0);
+	close(fd);
+	if (mapaddr == MAP_FAILED) {
+		rte_free(maps[map_idx].path);
+		return -1;
+	}
+
+	vmbus_map_addr = RTE_PTR_ADD(mapaddr,
+				     dev->mem_resource[res_idx].len);
+
+	maps[map_idx].phaddr = dev->mem_resource[res_idx].phys_addr;
+	maps[map_idx].size = dev->mem_resource[res_idx].len;
+	maps[map_idx].addr = mapaddr;
+	maps[map_idx].offset = 0;
+	strcpy(maps[map_idx].path, devname);
+	dev->mem_resource[res_idx].addr = mapaddr;
+
+	return 0;
+}
+
+static void
+vmbus_uio_unmap(struct mapped_vmbus_resource *uio_res)
+{
+	int i;
+
+	if (uio_res == NULL)
+		return;
+
+	for (i = 0; i != uio_res->nb_maps; i++) {
+		vmbus_unmap_resource(uio_res->maps[i].addr,
+				     uio_res->maps[i].size);
+
+		if (rte_eal_process_type() == RTE_PROC_PRIMARY)
+			rte_free(uio_res->maps[i].path);
+	}
+}
+
+static struct mapped_vmbus_resource *
+vmbus_uio_find_resource(struct rte_vmbus_device *dev)
+{
+	struct mapped_vmbus_resource *uio_res;
+	struct mapped_vmbus_res_list *uio_res_list =
+			RTE_TAILQ_CAST(rte_vmbus_uio_tailq.head, mapped_vmbus_res_list);
+
+	if (dev == NULL)
+		return NULL;
+
+	TAILQ_FOREACH(uio_res, uio_res_list, next) {
+		if (uuid_compare(uio_res->uuid, dev->device_id) == 0)
+			return uio_res;
+	}
+	return NULL;
+}
+
+/* unmap the VMBUS resource of a VMBUS device in virtual memory */
+static void
+vmbus_uio_unmap_resource(struct rte_vmbus_device *dev)
+{
+	struct mapped_vmbus_resource *uio_res;
+	struct mapped_vmbus_res_list *uio_res_list =
+			RTE_TAILQ_CAST(rte_vmbus_uio_tailq.head, mapped_vmbus_res_list);
+
+	if (dev == NULL)
+		return;
+
+	/* find an entry for the device */
+	uio_res = vmbus_uio_find_resource(dev);
+	if (uio_res == NULL)
+		return;
+
+	/* secondary processes - just free maps */
+	if (rte_eal_process_type() != RTE_PROC_PRIMARY)
+		return vmbus_uio_unmap(uio_res);
+
+	TAILQ_REMOVE(uio_res_list, uio_res, next);
+
+	/* unmap all resources */
+	vmbus_uio_unmap(uio_res);
+
+	/* free uio resource */
+	rte_free(uio_res);
+
+	/* close fd if in primary process */
+	close(dev->intr_handle.fd);
+	if (dev->intr_handle.uio_cfg_fd >= 0) {
+		close(dev->intr_handle.uio_cfg_fd);
+		dev->intr_handle.uio_cfg_fd = -1;
+	}
+
+	dev->intr_handle.fd = -1;
+	dev->intr_handle.type = RTE_INTR_HANDLE_UNKNOWN;
+}
+
+static int
+vmbus_uio_map_secondary(struct rte_vmbus_device *dev)
+{
+	struct mapped_vmbus_resource *uio_res;
+	struct mapped_vmbus_res_list *uio_res_list =
+			RTE_TAILQ_CAST(rte_vmbus_uio_tailq.head,
+				       mapped_vmbus_res_list);
+
+	TAILQ_FOREACH(uio_res, uio_res_list, next) {
+		int i;
+
+		/* skip this element if it doesn't match our id */
+		if (uuid_compare(uio_res->uuid, dev->device_id))
+			continue;
+
+		for (i = 0; i != uio_res->nb_maps; i++) {
+			void *mapaddr;
+			int fd;
+
+			fd = open(uio_res->maps[i].path, O_RDWR);
+			if (fd < 0) {
+				RTE_LOG(ERR, EAL, "Cannot open %s: %s\n",
+					uio_res->maps[i].path, strerror(errno));
+				return -1;
+			}
+
+			mapaddr = vmbus_map_resource(uio_res->maps[i].addr, fd,
+						     uio_res->maps[i].offset,
+						     uio_res->maps[i].size, 0);
+			/* fd is not needed in slave process, close it */
+			close(fd);
+
+			if (mapaddr == uio_res->maps[i].addr)
+				continue;
+
+			RTE_LOG(ERR, EAL,
+				"Cannot mmap device resource file %s to address: %p\n",
+				uio_res->maps[i].path,
+				uio_res->maps[i].addr);
+
+			/* unmap addrs correctly mapped */
+			while (i != 0) {
+				--i;
+ 				vmbus_unmap_resource(uio_res->maps[i].addr,
+						     uio_res->maps[i].size);
+			}
+			return -1;
+
+		}
+		return 0;
+	}
+
+	RTE_LOG(ERR, EAL, "Cannot find resource for device\n");
+	return 1;
+}
+
+/* map the resources of a vmbus device in virtual memory */
+int
+rte_eal_vmbus_map_device(struct rte_vmbus_device *dev)
+{
+	struct mapped_vmbus_resource *uio_res;
+	struct mapped_vmbus_res_list *uio_res_list =
+		RTE_TAILQ_CAST(rte_vmbus_uio_tailq.head, mapped_vmbus_res_list);
+	int i, ret, map_idx = 0;
+
+	dev->intr_handle.fd = -1;
+	dev->intr_handle.uio_cfg_fd = -1;
+	dev->intr_handle.type = RTE_INTR_HANDLE_UNKNOWN;
+
+	/* secondary processes - use already recorded details */
+	if (rte_eal_process_type() != RTE_PROC_PRIMARY)
+		return vmbus_uio_map_secondary(dev);
+
+	/* allocate uio resource */
+	uio_res = vmbus_uio_alloc_resource(dev);
+	if (uio_res == NULL)
+		return -1;
+
+	/* Map all BARs */
+	for (i = 0; i != VMBUS_MAX_RESOURCE; i++) {
+		uint64_t phaddr;
+
+		/* skip empty BAR */
+		phaddr = dev->mem_resource[i].phys_addr;
+		if (phaddr == 0)
+			continue;
+
+		ret = vmbus_uio_map_resource_by_index(dev, i,
+						      uio_res, map_idx);
+		if (ret)
+			goto error;
+
+		map_idx++;
+	}
+
+	uio_res->nb_maps = map_idx;
+
+	TAILQ_INSERT_TAIL(uio_res_list, uio_res, next);
+
+	return 0;
+error:
+	for (i = 0; i < map_idx; i++) {
+		vmbus_unmap_resource(uio_res->maps[i].addr,
+				     uio_res->maps[i].size);
+		rte_free(uio_res->maps[i].path);
+	}
+	vmbus_uio_free_resource(dev, uio_res);
+	return -1;
+}
+
+/* Scan one vmbus sysfs entry, and fill the devices list from it. */
+static int
+vmbus_scan_one(const char *name)
+{
+	struct rte_vmbus_device *dev, *dev2;
+	char filename[PATH_MAX];
+	char dirname[PATH_MAX];
+	unsigned long tmp;
+
+	dev = malloc(sizeof(*dev) + strlen(name) + 1);
+	if (dev == NULL)
+		return -1;
+
+	memset(dev, 0, sizeof(*dev));
+	strcpy(dev->sysfs_name, name);
+	if (dev->sysfs_name == NULL)
+		goto error;
+
+	/* sysfs base directory
+	 *   /sys/bus/vmbus/devices/7a08391f-f5a0-4ac0-9802-d13fd964f8df
+	 * or on older kernel
+	 *   /sys/bus/vmbus/devices/vmbus_1
+	 */
+	snprintf(dirname, sizeof(dirname), "%s/%s",
+		 SYSFS_VMBUS_DEVICES, name);
+
+	/* get device id */
+	snprintf(filename, sizeof(filename), "%s/device_id", dirname);
+	if (vmbus_get_sysfs_uuid(filename, dev->device_id) < 0)
+		goto error;
+
+	/* get device class  */
+	snprintf(filename, sizeof(filename), "%s/class_id", dirname);
+	if (vmbus_get_sysfs_uuid(filename, dev->class_id) < 0)
+		goto error;
+
+	/* get relid */
+	snprintf(filename, sizeof(filename), "%s/id", dirname);
+	if (eal_parse_sysfs_value(filename, &tmp) < 0)
+		goto error;
+	dev->relid = tmp;
+
+	/* get monitor id */
+	snprintf(filename, sizeof(filename), "%s/monitor_id", dirname);
+	if (eal_parse_sysfs_value(filename, &tmp) < 0)
+		goto error;
+	dev->monitor_id = tmp;
+
+	/* get numa node */
+	snprintf(filename, sizeof(filename), "%s/numa_node",
+		 dirname);
+	if (eal_parse_sysfs_value(filename, &tmp) < 0)
+		/* if no NUMA support, set default to 0 */
+		dev->device.numa_node = 0;
+	else
+		dev->device.numa_node = tmp;
+
+	/* device is valid, add in list (sorted) */
+	RTE_LOG(DEBUG, EAL, "Adding vmbus device %s\n", name);
+
+	TAILQ_FOREACH(dev2, &vmbus_device_list, next) {
+		int ret;
+
+		ret = uuid_compare(dev->device_id, dev->device_id);
+		if (ret > 0)
+			continue;
+
+		if (ret < 0) {
+			TAILQ_INSERT_BEFORE(dev2, dev, next);
+			rte_eal_device_insert(&dev->device);
+		} else { /* already registered */
+			memmove(dev2->mem_resource, dev->mem_resource,
+				sizeof(dev->mem_resource));
+			free(dev);
+		}
+		return 0;
+	}
+
+	rte_eal_device_insert(&dev->device);
+	TAILQ_INSERT_TAIL(&vmbus_device_list, dev, next);
+
+	return 0;
+error:
+	free(dev);
+	return -1;
+}
+
+/*
+ * Scan the content of the vmbus, and the devices in the devices list
+ */
+static int
+vmbus_scan(void)
+{
+	struct dirent *e;
+	DIR *dir;
+
+	dir = opendir(SYSFS_VMBUS_DEVICES);
+	if (dir == NULL) {
+		if (errno == ENOENT)
+			return 0;
+		else {
+			RTE_LOG(ERR, EAL, "%s(): opendir failed: %s\n",
+					__func__, strerror(errno));
+			return -1;
+		}
+	}
+
+	while ((e = readdir(dir)) != NULL) {
+		if (e->d_name[0] == '.')
+			continue;
+
+		if (vmbus_scan_one(e->d_name) < 0)
+			goto error;
+	}
+	closedir(dir);
+	return 0;
+
+error:
+	closedir(dir);
+	return -1;
+}
+
+/* Init the VMBUS EAL subsystem */
+int rte_eal_vmbus_init(void)
+{
+	/* VMBUS can be disabled */
+	if (internal_config.no_vmbus)
+		return 0;
+
+	if (vmbus_scan() < 0) {
+		RTE_LOG(ERR, EAL, "%s(): Cannot scan vmbus\n", __func__);
+		return -1;
+	}
+	return 0;
+}
+
+/* Below is PROBE part of eal_vmbus library */
+
+/*
+ * If device ID match, call the devinit() function of the driver.
+ */
+static int
+rte_eal_vmbus_probe_one_driver(struct rte_vmbus_driver *dr,
+			       struct rte_vmbus_device *dev)
+{
+	const uuid_t *id_table;
+
+	RTE_LOG(DEBUG, EAL, "  probe driver: %s\n", dr->driver.name);
+
+	for (id_table = dr->id_table; !uuid_is_null(*id_table); ++id_table) {
+		struct rte_devargs *args;
+		char guid[UUID_BUF_SZ];
+		int ret;
+
+		/* skip devices not assocaited with this device class */
+		if (uuid_compare(*id_table, dev->class_id) != 0)
+			continue;
+
+		uuid_unparse(dev->device_id, guid);
+		RTE_LOG(INFO, EAL, "VMBUS device %s on NUMA socket %i\n",
+			guid, dev->device.numa_node);
+
+		/* no initialization when blacklisted, return without error */
+		args = dev->device.devargs;
+		if (args && args->type == RTE_DEVTYPE_BLACKLISTED_VMBUS) {
+			RTE_LOG(INFO, EAL, "  Device is blacklisted, not initializing\n");
+			return 1;
+		}
+
+		RTE_LOG(INFO, EAL, "  probe driver: %s\n", dr->driver.name);
+
+		/* map resources for device */
+		ret = rte_eal_vmbus_map_device(dev);
+		if (ret != 0)
+			return ret;
+
+		/* reference driver structure */
+		dev->driver = dr;
+
+		/* call the driver probe() function */
+		ret = dr->probe(dr, dev);
+		if (ret)
+			dev->driver = NULL;
+
+		return ret;
+	}
+
+	/* return positive value if driver doesn't support this device */
+	return 1;
+}
+
+
+/*
+ * If vendor/device ID match, call the remove() function of the
+ * driver.
+ */
+static int
+vmbus_detach_dev(struct rte_vmbus_driver *dr,
+		 struct rte_vmbus_device *dev)
+{
+	const uuid_t *id_table;
+
+	for (id_table = dr->id_table; !uuid_is_null(*id_table); ++id_table) {
+		char guid[UUID_BUF_SZ];
+
+		/* skip devices not assocaited with this device class */
+		if (uuid_compare(*id_table, dev->class_id) != 0)
+			continue;
+
+		uuid_unparse(dev->device_id, guid);
+		RTE_LOG(INFO, EAL, "VMBUS device %s on NUMA socket %i\n",
+			guid, dev->device.numa_node);
+
+		RTE_LOG(DEBUG, EAL, "  remove driver: %s\n", dr->driver.name);
+
+		if (dr->remove && (dr->remove(dev) < 0))
+			return -1;	/* negative value is an error */
+
+		/* clear driver structure */
+		dev->driver = NULL;
+
+		vmbus_uio_unmap_resource(dev);
+		return 0;
+	}
+
+	/* return positive value if driver doesn't support this device */
+	return 1;
+}
+
+/*
+ * call the devinit() function of all
+ * registered drivers for the vmbus device. Return -1 if no driver is
+ * found for this class of vmbus device.
+ * The present assumption is that we have drivers only for vmbus network
+ * devices. That's why we don't check driver's id_table now.
+ */
+static int
+vmbus_probe_all_drivers(struct rte_vmbus_device *dev)
+{
+	struct rte_vmbus_driver *dr = NULL;
+	int ret;
+
+	TAILQ_FOREACH(dr, &vmbus_driver_list, next) {
+		ret = rte_eal_vmbus_probe_one_driver(dr, dev);
+		if (ret < 0) {
+			/* negative value is an error */
+			RTE_LOG(ERR, EAL, "Failed to probe driver %s\n",
+				dr->driver.name);
+			return -1;
+		}
+		/* positive value means driver doesn't support it */
+		if (ret > 0)
+			continue;
+
+		return 0;
+	}
+
+	return 1;
+}
+
+
+/*
+ * If device ID matches, call the remove() function of all
+ * registered driver for the given device. Return -1 if initialization
+ * failed, return 1 if no driver is found for this device.
+ */
+static int
+vmbus_detach_all_drivers(struct rte_vmbus_device *dev)
+{
+	struct rte_vmbus_driver *dr;
+	int rc = 0;
+
+	if (dev == NULL)
+		return -1;
+
+	TAILQ_FOREACH(dr, &vmbus_driver_list, next) {
+		rc = vmbus_detach_dev(dr, dev);
+		if (rc < 0)
+			/* negative value is an error */
+			return -1;
+		if (rc > 0)
+			/* positive value means driver doesn't support it */
+			continue;
+		return 0;
+	}
+	return 1;
+}
+
+/* Detach device specified by its VMBUS id */
+int
+rte_eal_vmbus_detach(uuid_t device_id)
+{
+	struct rte_vmbus_device *dev;
+	char ubuf[UUID_BUF_SZ];
+
+	TAILQ_FOREACH(dev, &vmbus_device_list, next) {
+		if (uuid_compare(dev->device_id, device_id) != 0)
+			continue;
+
+		if (vmbus_detach_all_drivers(dev) < 0)
+			goto err_return;
+
+		TAILQ_REMOVE(&vmbus_device_list, dev, next);
+		free(dev);
+		return 0;
+	}
+	return -1;
+
+err_return:
+	uuid_unparse(device_id, ubuf);
+	RTE_LOG(WARNING, EAL, "Requested device %s cannot be used\n",
+		ubuf);
+	return -1;
+}
+
+/*
+ * Scan the vmbus, and call the devinit() function for
+ * all registered drivers that have a matching entry in its id_table
+ * for discovered devices.
+ */
+int
+rte_eal_vmbus_probe(void)
+{
+	struct rte_vmbus_device *dev = NULL;
+
+	TAILQ_FOREACH(dev, &vmbus_device_list, next) {
+		char ubuf[UUID_BUF_SZ];
+
+		uuid_unparse(dev->device_id, ubuf);
+
+		RTE_LOG(DEBUG, EAL, "Probing driver for device %s ...\n",
+			ubuf);
+		vmbus_probe_all_drivers(dev);
+	}
+	return 0;
+}
+
+/* register vmbus driver */
+void
+rte_eal_vmbus_register(struct rte_vmbus_driver *driver)
+{
+	TAILQ_INSERT_TAIL(&vmbus_driver_list, driver, next);
+}
+
+/* unregister vmbus driver */
+void
+rte_eal_vmbus_unregister(struct rte_vmbus_driver *driver)
+{
+	TAILQ_REMOVE(&vmbus_driver_list, driver, next);
+}
diff --git a/lib/librte_ether/rte_ethdev.c b/lib/librte_ether/rte_ethdev.c
index 1e0f206..6298a8d 100644
--- a/lib/librte_ether/rte_ethdev.c
+++ b/lib/librte_ether/rte_ethdev.c
@@ -3282,3 +3282,93 @@  rte_eth_dev_l2_tunnel_offload_set(uint8_t port_id,
 				-ENOTSUP);
 	return (*dev->dev_ops->l2_tunnel_offload_set)(dev, l2_tunnel, mask, en);
 }
+
+
+#ifdef RTE_LIBRTE_HV_PMD
+int
+rte_eth_dev_vmbus_probe(struct rte_vmbus_driver *vmbus_drv,
+			struct rte_vmbus_device *vmbus_dev)
+{
+	struct eth_driver  *eth_drv = (struct eth_driver *)vmbus_drv;
+	struct rte_eth_dev *eth_dev;
+	char ustr[UUID_BUF_SZ];
+	int diag;
+
+	uuid_unparse(vmbus_dev->device_id, ustr);
+
+	eth_dev = rte_eth_dev_allocate(ustr);
+	if (eth_dev == NULL)
+		return -ENOMEM;
+
+	if (rte_eal_process_type() == RTE_PROC_PRIMARY) {
+		eth_dev->data->dev_private = rte_zmalloc("ethdev private structure",
+				  eth_drv->dev_private_size,
+				  RTE_CACHE_LINE_SIZE);
+		if (eth_dev->data->dev_private == NULL)
+			rte_panic("Cannot allocate memzone for private port data\n");
+	}
+
+	eth_dev->vmbus_dev = vmbus_dev;
+	eth_dev->driver = eth_drv;
+	eth_dev->data->rx_mbuf_alloc_failed = 0;
+
+	/* init user callbacks */
+	TAILQ_INIT(&(eth_dev->link_intr_cbs));
+
+	/*
+	 * Set the default maximum frame size.
+	 */
+	eth_dev->data->mtu = ETHER_MTU;
+
+	/* Invoke PMD device initialization function */
+	diag = (*eth_drv->eth_dev_init)(eth_dev);
+	if (diag == 0)
+		return 0;
+
+	RTE_PMD_DEBUG_TRACE("driver %s: eth_dev_init(%s) failed\n",
+			    vmbus_drv->driver.name, ustr);
+
+	if (rte_eal_process_type() == RTE_PROC_PRIMARY)
+		rte_free(eth_dev->data->dev_private);
+
+	return diag;
+}
+
+int
+rte_eth_dev_vmbus_remove(struct rte_vmbus_device *vmbus_dev)
+{
+	const struct eth_driver *eth_drv;
+	struct rte_eth_dev *eth_dev;
+	char ustr[UUID_BUF_SZ];
+	int ret;
+
+	if (vmbus_dev == NULL)
+		return -EINVAL;
+
+	uuid_unparse(vmbus_dev->device_id, ustr);
+	eth_dev = rte_eth_dev_allocated(ustr);
+	if (eth_dev == NULL)
+		return -ENODEV;
+
+	eth_drv = (const struct eth_driver *)vmbus_dev->driver;
+
+	/* Invoke PMD device uninit function */
+	if (*eth_drv->eth_dev_uninit) {
+		ret = (*eth_drv->eth_dev_uninit)(eth_dev);
+		if (ret)
+			return ret;
+	}
+
+	/* free ether device */
+	rte_eth_dev_release_port(eth_dev);
+
+	if (rte_eal_process_type() == RTE_PROC_PRIMARY)
+		rte_free(eth_dev->data->dev_private);
+
+	eth_dev->pci_dev = NULL;
+	eth_dev->driver = NULL;
+	eth_dev->data = NULL;
+
+	return 0;
+}
+#endif
diff --git a/lib/librte_ether/rte_ethdev.h b/lib/librte_ether/rte_ethdev.h
index 3c85e33..5050087 100644
--- a/lib/librte_ether/rte_ethdev.h
+++ b/lib/librte_ether/rte_ethdev.h
@@ -180,6 +180,7 @@  extern "C" {
 #include <rte_log.h>
 #include <rte_interrupts.h>
 #include <rte_pci.h>
+#include <rte_vmbus.h>
 #include <rte_dev.h>
 #include <rte_devargs.h>
 #include "rte_ether.h"
@@ -1628,7 +1629,11 @@  struct rte_eth_dev {
 	struct rte_eth_dev_data *data;  /**< Pointer to device data */
 	const struct eth_driver *driver;/**< Driver for this device */
 	const struct eth_dev_ops *dev_ops; /**< Functions exported by PMD */
-	struct rte_pci_device *pci_dev; /**< PCI info. supplied by probing */
+	union {
+		struct rte_pci_device *pci_dev; /**< PCI info. */
+		struct rte_vmbus_device *vmbus_dev; /**< VMBUS info. */
+	};
+
 	/** User application callbacks for NIC interrupts */
 	struct rte_eth_dev_cb_list link_intr_cbs;
 	/**
@@ -1866,7 +1871,11 @@  typedef int (*eth_dev_uninit_t)(struct rte_eth_dev *eth_dev);
  * - The size of the private data to allocate for each matching device.
  */
 struct eth_driver {
-	struct rte_pci_driver pci_drv;    /**< The PMD is also a PCI driver. */
+	union {
+		struct rte_pci_driver pci_drv;    /**< The PMD PCI driver. */
+		struct rte_vmbus_driver vmbus_drv;/**< The PMD VMBUS drv. */
+	};
+
 	eth_dev_init_t eth_dev_init;      /**< Device init function. */
 	eth_dev_uninit_t eth_dev_uninit;  /**< Device uninit function. */
 	unsigned int dev_private_size;    /**< Size of device private data. */
@@ -4383,6 +4392,21 @@  int rte_eth_dev_pci_probe(struct rte_pci_driver *pci_drv,
  */
 int rte_eth_dev_pci_remove(struct rte_pci_device *pci_dev);
 
+/**
+ * @internal
+ * Wrapper for use by vmbus drivers as a .probe function to attach to a ethdev
+ * interface.
+ */
+int rte_eth_dev_vmbus_probe(struct rte_vmbus_driver *vmbus_drv,
+			  struct rte_vmbus_device *vmbus_dev);
+
+/**
+ * @internal
+ * Wrapper for use by vmbus drivers as a .remove function to detach a ethdev
+ * interface.
+ */
+int rte_eth_dev_vmbus_remove(struct rte_vmbus_device *vmbus_dev);
+
 #ifdef __cplusplus
 }
 #endif
diff --git a/mk/rte.app.mk b/mk/rte.app.mk
index f75f0e2..6b30408 100644
--- a/mk/rte.app.mk
+++ b/mk/rte.app.mk
@@ -130,6 +130,7 @@  ifeq ($(CONFIG_RTE_LIBRTE_VHOST),y)
 _LDLIBS-$(CONFIG_RTE_LIBRTE_PMD_VHOST)      += -lrte_pmd_vhost
 endif # $(CONFIG_RTE_LIBRTE_VHOST)
 _LDLIBS-$(CONFIG_RTE_LIBRTE_VMXNET3_PMD)    += -lrte_pmd_vmxnet3_uio
+_LDLIBS-$(CONFIG_RTE_LIBRTE_HV_PMD)	    += -luuid
 
 ifeq ($(CONFIG_RTE_LIBRTE_CRYPTODEV),y)
 _LDLIBS-$(CONFIG_RTE_LIBRTE_PMD_AESNI_MB)    += -lrte_pmd_aesni_mb