[dpdk-dev] [PATCH 8/8] eal: VMBUS infrastructure

Jan Blunck jblunck at infradead.org
Wed Jan 11 22:13:32 CET 2017


On Wed, Jan 11, 2017 at 3:49 PM, Jan Blunck <jblunck at infradead.org> wrote:
> On Sat, Jan 7, 2017 at 7:17 PM, Stephen Hemminger
> <stephen at networkplumber.org> wrote:
>> Add support for VMBUS on Hyper-V/Azure. VMBUS is similar to PCI
>> but has different addressing and internal API's.
>>
>> Signed-off-by: Stephen Hemminger <sthemmin at microsoft.com>
>> ---
>>  lib/librte_eal/common/Makefile              |   2 +-
>>  lib/librte_eal/common/eal_common_devargs.c  |   7 +
>>  lib/librte_eal/common/eal_common_options.c  |  38 ++
>>  lib/librte_eal/common/eal_internal_cfg.h    |   1 +
>>  lib/librte_eal/common/eal_options.h         |   6 +
>>  lib/librte_eal/common/eal_private.h         |   5 +
>>  lib/librte_eal/common/include/rte_devargs.h |   8 +
>>  lib/librte_eal/common/include/rte_vmbus.h   | 249 ++++++++
>>  lib/librte_eal/linuxapp/eal/Makefile        |   6 +
>>  lib/librte_eal/linuxapp/eal/eal.c           |  13 +
>>  lib/librte_eal/linuxapp/eal/eal_vmbus.c     | 911 ++++++++++++++++++++++++++++
>>  lib/librte_ether/rte_ethdev.c               |  90 +++
>>  lib/librte_ether/rte_ethdev.h               |  31 +
>>  mk/rte.app.mk                               |   1 +
>>  14 files changed, 1367 insertions(+), 1 deletion(-)
>>  create mode 100644 lib/librte_eal/common/include/rte_vmbus.h
>>  create mode 100644 lib/librte_eal/linuxapp/eal/eal_vmbus.c
>>
>> diff --git a/lib/librte_eal/common/Makefile b/lib/librte_eal/common/Makefile
>> index 09a3d3af..ceb77bed 100644
>> --- a/lib/librte_eal/common/Makefile
>> +++ b/lib/librte_eal/common/Makefile
>> @@ -33,7 +33,7 @@ include $(RTE_SDK)/mk/rte.vars.mk
>>
>>  INC := rte_branch_prediction.h rte_common.h
>>  INC += rte_debug.h rte_eal.h rte_errno.h rte_launch.h rte_lcore.h
>> -INC += rte_log.h rte_memory.h rte_memzone.h rte_pci.h
>> +INC += rte_log.h rte_memory.h rte_memzone.h rte_pci.h rte_vmbus.h
>>  INC += rte_per_lcore.h rte_random.h
>>  INC += rte_tailq.h rte_interrupts.h rte_alarm.h
>>  INC += rte_string_fns.h rte_version.h
>> diff --git a/lib/librte_eal/common/eal_common_devargs.c b/lib/librte_eal/common/eal_common_devargs.c
>> index e403717b..934ca840 100644
>> --- a/lib/librte_eal/common/eal_common_devargs.c
>> +++ b/lib/librte_eal/common/eal_common_devargs.c
>> @@ -113,6 +113,13 @@ rte_eal_devargs_add(enum rte_devtype devtype, const char *devargs_str)
>>                         goto fail;
>>
>>                 break;
>> +       case RTE_DEVTYPE_WHITELISTED_VMBUS:
>> +       case RTE_DEVTYPE_BLACKLISTED_VMBUS:
>> +#ifdef RTE_LIBRTE_HV_PMD
>> +               if (uuid_parse(buf, devargs->uuid) == 0)
>> +                       break;
>> +#endif
>> +               goto fail;
>>         }
>>
>>         free(buf);
>> diff --git a/lib/librte_eal/common/eal_common_options.c b/lib/librte_eal/common/eal_common_options.c
>> index f36bc556..1a2b418c 100644
>> --- a/lib/librte_eal/common/eal_common_options.c
>> +++ b/lib/librte_eal/common/eal_common_options.c
>> @@ -95,6 +95,11 @@ eal_long_options[] = {
>>         {OPT_VFIO_INTR,         1, NULL, OPT_VFIO_INTR_NUM        },
>>         {OPT_VMWARE_TSC_MAP,    0, NULL, OPT_VMWARE_TSC_MAP_NUM   },
>>         {OPT_XEN_DOM0,          0, NULL, OPT_XEN_DOM0_NUM         },
>> +#ifdef RTE_LIBRTE_HV_PMD
>> +       {OPT_NO_VMBUS,          0, NULL, OPT_NO_VMBUS_NUM         },
>> +       {OPT_VMBUS_BLACKLIST,   1, NULL, OPT_VMBUS_BLACKLIST_NUM  },
>> +       {OPT_VMBUS_WHITELIST,   1, NULL, OPT_VMBUS_WHITELIST_NUM  },
>> +#endif
>>         {0,                     0, NULL, 0                        }
>>  };
>>
>> @@ -858,6 +863,21 @@ eal_parse_common_option(int opt, const char *optarg,
>>                 conf->no_pci = 1;
>>                 break;
>>
>> +#ifdef RTE_LIBRTE_HV_PMD
>> +       case OPT_NO_VMBUS_NUM:
>> +               conf->no_vmbus = 1;
>> +               break;
>> +       case OPT_VMBUS_BLACKLIST_NUM:
>> +               if (rte_eal_devargs_add(RTE_DEVTYPE_BLACKLISTED_VMBUS,
>> +                                       optarg) < 0)
>> +                       return -1;
>> +               break;
>> +       case OPT_VMBUS_WHITELIST_NUM:
>> +               if (rte_eal_devargs_add(RTE_DEVTYPE_WHITELISTED_VMBUS,
>> +                               optarg) < 0)
>> +                       return -1;
>> +               break;
>> +#endif
>>         case OPT_NO_HPET_NUM:
>>                 conf->no_hpet = 1;
>>                 break;
>> @@ -1017,6 +1037,14 @@ eal_check_common_options(struct internal_config *internal_cfg)
>>                 return -1;
>>         }
>>
>> +#ifdef RTE_LIBRTE_HV_PMD
>> +       if (rte_eal_devargs_type_count(RTE_DEVTYPE_WHITELISTED_VMBUS) != 0 &&
>> +               rte_eal_devargs_type_count(RTE_DEVTYPE_BLACKLISTED_VMBUS) != 0) {
>> +               RTE_LOG(ERR, EAL, "Options vmbus blacklist and whitelist "
>> +                       "cannot be used at the same time\n");
>> +               return -1;
>> +       }
>> +#endif
>>         return 0;
>>  }
>>
>> @@ -1066,5 +1094,15 @@ eal_common_usage(void)
>>                "  --"OPT_NO_PCI"            Disable PCI\n"
>>                "  --"OPT_NO_HPET"           Disable HPET\n"
>>                "  --"OPT_NO_SHCONF"         No shared config (mmap'd files)\n"
>> +#ifdef RTE_LIBRTE_HV_PMD
>> +              "  --"OPT_NO_VMBUS"          Disable VMBUS\n"
>> +              "  --"OPT_VMBUS_BLACKLIST" Add a VMBUS device to black list.\n"
>> +              "                      Prevent EAL from using this PCI device. The argument\n"
>> +              "                      format is device UUID.\n"
>> +              "  --"OPT_VMBUS_WHITELIST" Add a VMBUS device to white list.\n"
>> +              "                      Only use the specified VMBUS devices. The argument format\n"
>> +              "                      is device UUID This option can be present\n"
>> +              "                      several times (once per device).\n"
>> +#endif
>>                "\n", RTE_MAX_LCORE);
>>  }
>> diff --git a/lib/librte_eal/common/eal_internal_cfg.h b/lib/librte_eal/common/eal_internal_cfg.h
>> index 5f1367eb..4b6af937 100644
>> --- a/lib/librte_eal/common/eal_internal_cfg.h
>> +++ b/lib/librte_eal/common/eal_internal_cfg.h
>> @@ -67,6 +67,7 @@ struct internal_config {
>>         unsigned hugepage_unlink;         /**< true to unlink backing files */
>>         volatile unsigned xen_dom0_support; /**< support app running on Xen Dom0*/
>>         volatile unsigned no_pci;         /**< true to disable PCI */
>> +       volatile unsigned no_vmbus;       /**< true to disable VMBUS */
>>         volatile unsigned no_hpet;        /**< true to disable HPET */
>>         volatile unsigned vmware_tsc_map; /**< true to use VMware TSC mapping
>>                                                                                 * instead of native TSC */
>> diff --git a/lib/librte_eal/common/eal_options.h b/lib/librte_eal/common/eal_options.h
>> index a881c62e..156727e7 100644
>> --- a/lib/librte_eal/common/eal_options.h
>> +++ b/lib/librte_eal/common/eal_options.h
>> @@ -83,6 +83,12 @@ enum {
>>         OPT_VMWARE_TSC_MAP_NUM,
>>  #define OPT_XEN_DOM0          "xen-dom0"
>>         OPT_XEN_DOM0_NUM,
>> +#define OPT_NO_VMBUS          "no-vmbus"
>> +       OPT_NO_VMBUS_NUM,
>> +#define OPT_VMBUS_BLACKLIST   "vmbus-blacklist"
>> +       OPT_VMBUS_BLACKLIST_NUM,
>> +#define OPT_VMBUS_WHITELIST   "vmbus-whitelist"
>> +       OPT_VMBUS_WHITELIST_NUM,
>>         OPT_LONG_MAX_NUM
>>  };
>>
>> diff --git a/lib/librte_eal/common/eal_private.h b/lib/librte_eal/common/eal_private.h
>> index 9e7d8f6b..c856c63e 100644
>> --- a/lib/librte_eal/common/eal_private.h
>> +++ b/lib/librte_eal/common/eal_private.h
>> @@ -210,6 +210,11 @@ int pci_uio_map_resource_by_index(struct rte_pci_device *dev, int res_idx,
>>                 struct mapped_pci_resource *uio_res, int map_idx);
>>
>>  /**
>> + * VMBUS related functions and structures
>> + */
>> +int rte_eal_vmbus_init(void);
>> +
>> +/**
>>   * Init tail queues for non-EAL library structures. This is to allow
>>   * the rings, mempools, etc. lists to be shared among multiple processes
>>   *
>> diff --git a/lib/librte_eal/common/include/rte_devargs.h b/lib/librte_eal/common/include/rte_devargs.h
>> index 88120a1c..c079d289 100644
>> --- a/lib/librte_eal/common/include/rte_devargs.h
>> +++ b/lib/librte_eal/common/include/rte_devargs.h
>> @@ -51,6 +51,9 @@ extern "C" {
>>  #include <stdio.h>
>>  #include <sys/queue.h>
>>  #include <rte_pci.h>
>> +#ifdef RTE_LIBRTE_HV_PMD
>> +#include <uuid/uuid.h>
>> +#endif
>>
>>  /**
>>   * Type of generic device
>> @@ -59,6 +62,8 @@ enum rte_devtype {
>>         RTE_DEVTYPE_WHITELISTED_PCI,
>>         RTE_DEVTYPE_BLACKLISTED_PCI,
>>         RTE_DEVTYPE_VIRTUAL,
>> +       RTE_DEVTYPE_WHITELISTED_VMBUS,
>> +       RTE_DEVTYPE_BLACKLISTED_VMBUS,
>>  };
>>
>>  /**
>> @@ -88,6 +93,9 @@ struct rte_devargs {
>>                         /** Driver name. */
>>                         char drv_name[32];
>>                 } virt;
>> +#ifdef RTE_LIBRTE_HV_PMD
>> +               uuid_t uuid;
>> +#endif
>>         };
>>         /** Arguments string as given by user or "" for no argument. */
>>         char *args;
>> diff --git a/lib/librte_eal/common/include/rte_vmbus.h b/lib/librte_eal/common/include/rte_vmbus.h
>> new file mode 100644
>> index 00000000..f96d753e
>> --- /dev/null
>> +++ b/lib/librte_eal/common/include/rte_vmbus.h
>> @@ -0,0 +1,249 @@
>> +/*-
>> + *   BSD LICENSE
>> + *
>> + *   Copyright(c) 2013-2016 Brocade Communications Systems, Inc.
>> + *   Copyright(c) 2016 Microsoft Corporation
>> + *   All rights reserved.
>> + *
>> + *   Redistribution and use in source and binary forms, with or without
>> + *   modification, are permitted provided that the following conditions
>> + *   are met:
>> + *
>> + *     * Redistributions of source code must retain the above copyright
>> + *       notice, this list of conditions and the following disclaimer.
>> + *     * Redistributions in binary form must reproduce the above copyright
>> + *       notice, this list of conditions and the following disclaimer in
>> + *       the documentation and/or other materials provided with the
>> + *       distribution.
>> + *     * Neither the name of Intel Corporation nor the names of its
>> + *       contributors may be used to endorse or promote products derived
>> + *       from this software without specific prior written permission.
>> + *
>> + *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
>> + *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
>> + *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
>> + *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
>> + *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
>> + *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
>> + *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
>> + *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
>> + *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
>> + *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
>> + *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
>> + *
>> + */
>> +
>> +#ifndef _RTE_VMBUS_H_
>> +#define _RTE_VMBUS_H_
>> +
>> +/**
>> + * @file
>> + *
>> + * RTE VMBUS Interface
>> + */
>> +#ifdef __cplusplus
>> +extern "C" {
>> +#endif
>> +
>> +#include <stdio.h>
>> +#include <stdlib.h>
>> +#include <limits.h>
>> +#include <errno.h>
>> +#include <uuid/uuid.h>
>> +#include <sys/queue.h>
>> +#include <stdint.h>
>> +#include <inttypes.h>
>> +
>> +#include <rte_debug.h>
>> +#include <rte_interrupts.h>
>> +#include <rte_dev.h>
>> +
>> +TAILQ_HEAD(vmbus_device_list, rte_vmbus_device);
>> +TAILQ_HEAD(vmbus_driver_list, rte_vmbus_driver);
>> +
>> +extern struct vmbus_driver_list vmbus_driver_list;
>> +extern struct vmbus_device_list vmbus_device_list;
>> +
>> +/** Pathname of VMBUS devices directory. */
>> +#define SYSFS_VMBUS_DEVICES "/sys/bus/vmbus/devices"
>> +
>> +#define UUID_BUF_SZ    (36 + 1)
>> +
>> +
>> +/** Maximum number of VMBUS resources. */
>> +#define VMBUS_MAX_RESOURCE 7
>> +
>> +/**
>> + * A structure describing a VMBUS device.
>> + */
>> +struct rte_vmbus_device {
>> +       TAILQ_ENTRY(rte_vmbus_device) next;     /**< Next probed VMBUS device. */
>> +       struct rte_device device;               /**< Inherit core device */
>> +       uuid_t device_id;                       /**< VMBUS device id */
>> +       uuid_t class_id;                        /**< VMBUS device type */
>> +       uint32_t relid;                         /**< VMBUS id for notification */
>> +       uint8_t monitor_id;
>> +       struct rte_intr_handle intr_handle;     /**< Interrupt handle */
>> +       const struct rte_vmbus_driver *driver;  /**< Associated driver */
>> +
>> +       struct rte_mem_resource mem_resource[VMBUS_MAX_RESOURCE];
>> +                                               /**< VMBUS Memory Resource */
>> +       char sysfs_name[];                      /**< Name in sysfs bus directory */
>> +};
>> +
>> +struct rte_vmbus_driver;
>> +
>> +/**
>> + * Initialisation function for the driver called during VMBUS probing.
>> + */
>> +typedef int (vmbus_probe_t)(struct rte_vmbus_driver *,
>> +                           struct rte_vmbus_device *);
>> +
>> +/**
>> + * Uninitialisation function for the driver called during hotplugging.
>> + */
>> +typedef int (vmbus_remove_t)(struct rte_vmbus_device *);
>> +
>> +/**
>> + * A structure describing a VMBUS driver.
>> + */
>> +struct rte_vmbus_driver {
>> +       TAILQ_ENTRY(rte_vmbus_driver) next;     /**< Next in list. */
>> +       struct rte_driver driver;
>> +       vmbus_probe_t *probe;                   /**< Device Probe function. */
>> +       vmbus_remove_t *remove;                 /**< Device Remove function. */
>> +
>> +       const uuid_t *id_table;                 /**< ID table. */
>> +};
>> +
>> +struct vmbus_map {
>> +       void *addr;
>> +       char *path;
>> +       uint64_t offset;
>> +       uint64_t size;
>> +       uint64_t phaddr;
>> +};
>> +
>> +/*
>> + * For multi-process we need to reproduce all vmbus mappings in secondary
>> + * processes, so save them in a tailq.
>> + */
>> +struct mapped_vmbus_resource {
>> +       TAILQ_ENTRY(mapped_vmbus_resource) next;
>> +
>> +       uuid_t uuid;
>> +       char path[PATH_MAX];
>> +       int nb_maps;
>> +       struct vmbus_map maps[VMBUS_MAX_RESOURCE];
>> +};
>> +
>> +TAILQ_HEAD(mapped_vmbus_res_list, mapped_vmbus_resource);
>> +
>> +/**
>> + * Scan the content of the VMBUS bus, and the devices in the devices list
>> + *
>> + * @return
>> + *  0 on success, negative on error
>> + */
>> +int rte_eal_vmbus_scan(void);
>> +
>> +/**
>> + * Probe the VMBUS bus for registered drivers.
>> + *
>> + * Scan the content of the VMBUS bus, and call the probe() function for
>> + * all registered drivers that have a matching entry in its id_table
>> + * for discovered devices.
>> + *
>> + * @return
>> + *   - 0 on success.
>> + *   - Negative on error.
>> + */
>> +int rte_eal_vmbus_probe(void);
>> +
>> +/**
>> + * Map the VMBUS device resources in user space virtual memory address
>> + *
>> + * @param dev
>> + *   A pointer to a rte_vmbus_device structure describing the device
>> + *   to use
>> + *
>> + * @return
>> + *   0 on success, negative on error and positive if no driver
>> + *   is found for the device.
>> + */
>> +int rte_eal_vmbus_map_device(struct rte_vmbus_device *dev);
>> +
>> +/**
>> + * Unmap this device
>> + *
>> + * @param dev
>> + *   A pointer to a rte_vmbus_device structure describing the device
>> + *   to use
>> + */
>> +void rte_eal_vmbus_unmap_device(struct rte_vmbus_device *dev);
>> +
>> +/**
>> + * Probe the single VMBUS device.
>> + *
>> + * Scan the content of the VMBUS bus, and find the vmbus device
>> + * specified by device uuid, then call the probe() function for
>> + * registered driver that has a matching entry in its id_table for
>> + * discovered device.
>> + *
>> + * @param id
>> + *   The VMBUS device uuid.
>> + * @return
>> + *   - 0 on success.
>> + *   - Negative on error.
>> + */
>> +int rte_eal_vmbus_probe_one(uuid_t id);
>> +
>> +/**
>> + * Close the single VMBUS device.
>> + *
>> + * Scan the content of the VMBUS bus, and find the vmbus device id,
>> + * then call the remove() function for registered driver that has a
>> + * matching entry in its id_table for discovered device.
>> + *
>> + * @param id
>> + *   The VMBUS device uuid.
>> + * @return
>> + *   - 0 on success.
>> + *   - Negative on error.
>> + */
>> +int rte_eal_vmbus_detach(uuid_t id);
>> +
>> +/**
>> + * Register a VMBUS driver.
>> + *
>> + * @param driver
>> + *   A pointer to a rte_vmbus_driver structure describing the driver
>> + *   to be registered.
>> + */
>> +void rte_eal_vmbus_register(struct rte_vmbus_driver *driver);
>> +
>> +/** Helper for VMBUS device registration from driver nstance */
>> +#define RTE_PMD_REGISTER_VMBUS(nm, vmbus_drv) \
>> +RTE_INIT(vmbusinitfn_ ##nm); \
>> +static void vmbusinitfn_ ##nm(void) \
>> +{\
>> +       (vmbus_drv).driver.name = RTE_STR(nm);\
>> +       (vmbus_drv).driver.type = PMD_VMBUS; \
>> +       rte_eal_vmbus_register(&vmbus_drv); \
>> +} \
>> +RTE_PMD_EXPORT_NAME(nm, __COUNTER__)
>> +
>> +/**
>> + * Unregister a VMBUS driver.
>> + *
>> + * @param driver
>> + *   A pointer to a rte_vmbus_driver structure describing the driver
>> + *   to be unregistered.
>> + */
>> +void rte_eal_vmbus_unregister(struct rte_vmbus_driver *driver);
>
> The register/unregister need to get exported via the map file too.
>
>> +
>> +#ifdef __cplusplus
>> +}
>> +#endif
>> +
>> +#endif /* _RTE_VMBUS_H_ */
>> diff --git a/lib/librte_eal/linuxapp/eal/Makefile b/lib/librte_eal/linuxapp/eal/Makefile
>> index 4e206f09..f6ca3848 100644
>> --- a/lib/librte_eal/linuxapp/eal/Makefile
>> +++ b/lib/librte_eal/linuxapp/eal/Makefile
>> @@ -71,6 +71,11 @@ SRCS-$(CONFIG_RTE_EXEC_ENV_LINUXAPP) += eal_timer.c
>>  SRCS-$(CONFIG_RTE_EXEC_ENV_LINUXAPP) += eal_interrupts.c
>>  SRCS-$(CONFIG_RTE_EXEC_ENV_LINUXAPP) += eal_alarm.c
>>
>> +ifeq ($(CONFIG_RTE_LIBRTE_HV_PMD),y)
>> +SRCS-$(CONFIG_RTE_EXEC_ENV_LINUXAPP) += eal_vmbus.c
>> +LDLIBS += -luuid
>> +endif
>> +
>>  # from common dir
>>  SRCS-$(CONFIG_RTE_EXEC_ENV_LINUXAPP) += eal_common_lcore.c
>>  SRCS-$(CONFIG_RTE_EXEC_ENV_LINUXAPP) += eal_common_timer.c
>> @@ -114,6 +119,7 @@ CFLAGS_eal_hugepage_info.o := -D_GNU_SOURCE
>>  CFLAGS_eal_pci.o := -D_GNU_SOURCE
>>  CFLAGS_eal_pci_uio.o := -D_GNU_SOURCE
>>  CFLAGS_eal_pci_vfio.o := -D_GNU_SOURCE
>> +CFLAGS_eal_vmbux.o := -D_GNU_SOURCE
>>  CFLAGS_eal_common_whitelist.o := -D_GNU_SOURCE
>>  CFLAGS_eal_common_options.o := -D_GNU_SOURCE
>>  CFLAGS_eal_common_thread.o := -D_GNU_SOURCE
>> diff --git a/lib/librte_eal/linuxapp/eal/eal.c b/lib/librte_eal/linuxapp/eal/eal.c
>> index 16dd5b9c..1bc0814a 100644
>> --- a/lib/librte_eal/linuxapp/eal/eal.c
>> +++ b/lib/librte_eal/linuxapp/eal/eal.c
>> @@ -70,6 +70,9 @@
>>  #include <rte_cpuflags.h>
>>  #include <rte_interrupts.h>
>>  #include <rte_pci.h>
>> +#ifdef RTE_LIBRTE_HV_PMD
>> +#include <rte_vmbus.h>
>> +#endif
>>  #include <rte_dev.h>
>>  #include <rte_devargs.h>
>>  #include <rte_common.h>
>> @@ -830,6 +833,11 @@ rte_eal_init(int argc, char **argv)
>>
>>         eal_check_mem_on_local_socket();
>>
>> +#ifdef RTE_LIBRTE_HV_PMD
>> +       if (rte_eal_vmbus_init() < 0)
>> +               RTE_LOG(ERR, EAL, "Cannot init VMBUS\n");
>> +#endif
>> +
>>         if (eal_plugins_init() < 0)
>>                 rte_panic("Cannot init plugins\n");
>>
>> @@ -884,6 +892,11 @@ rte_eal_init(int argc, char **argv)
>>         if (rte_eal_pci_probe())
>>                 rte_panic("Cannot probe PCI\n");
>>
>> +#ifdef RTE_LIBRTE_HV_PMD
>> +       if (rte_eal_vmbus_probe() < 0)
>> +               rte_panic("Cannot probe VMBUS\n");
>> +#endif
>> +
>>         if (rte_eal_dev_init() < 0)
>>                 rte_panic("Cannot init pmd devices\n");
>>
>> diff --git a/lib/librte_eal/linuxapp/eal/eal_vmbus.c b/lib/librte_eal/linuxapp/eal/eal_vmbus.c
>> new file mode 100644
>> index 00000000..729f93a9
>> --- /dev/null
>> +++ b/lib/librte_eal/linuxapp/eal/eal_vmbus.c
>> @@ -0,0 +1,911 @@
>> +/*-
>> + *   BSD LICENSE
>> + *
>> + *   Copyright(c) 2013-2016 Brocade Communications Systems, Inc.
>> + *   Copyright(c) 2016 Microsoft Corporation
>> + *   All rights reserved.
>> + *
>> + *   Redistribution and use in source and binary forms, with or without
>> + *   modification, are permitted provided that the following conditions
>> + *   are met:
>> + *
>> + *     * Redistributions of source code must retain the above copyright
>> + *      notice, this list of conditions and the following disclaimer.
>> + *     * Redistributions in binary form must reproduce the above copyright
>> + *      notice, this list of conditions and the following disclaimer in
>> + *      the documentation and/or other materials provided with the
>> + *      distribution.
>> + *     * Neither the name of Intel Corporation nor the names of its
>> + *      contributors may be used to endorse or promote products derived
>> + *      from this software without specific prior written permission.
>> + *
>> + *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
>> + *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
>> + *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
>> + *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
>> + *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
>> + *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
>> + *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
>> + *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
>> + *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
>> + *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
>> + *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
>> + *
>> + */
>> +
>> +#include <string.h>
>> +#include <unistd.h>
>> +#include <dirent.h>
>> +#include <fcntl.h>
>> +#include <sys/mman.h>
>> +
>> +#include <rte_eal.h>
>> +#include <rte_tailq.h>
>> +#include <rte_log.h>
>> +#include <rte_devargs.h>
>> +#include <rte_vmbus.h>
>> +#include <rte_malloc.h>
>> +
>> +#include "eal_private.h"
>> +#include "eal_pci_init.h"
>> +#include "eal_filesystem.h"
>> +
>> +struct vmbus_driver_list vmbus_driver_list =
>> +       TAILQ_HEAD_INITIALIZER(vmbus_driver_list);
>> +struct vmbus_device_list vmbus_device_list =
>> +       TAILQ_HEAD_INITIALIZER(vmbus_device_list);
>> +
>> +static void *vmbus_map_addr;
>> +
>> +static struct rte_tailq_elem rte_vmbus_uio_tailq = {
>> +       .name = "UIO_RESOURCE_LIST",

This should be VMBUS_UIO_RESOURCE_LIST to not collide with rte_uio_tailq.

>> +};
>> +EAL_REGISTER_TAILQ(rte_vmbus_uio_tailq);
>> +
>> +/*
>> + * parse a sysfs file containing one integer value
>> + * different to the eal version, as it needs to work with 64-bit values
>> + */
>> +static int
>> +vmbus_get_sysfs_uuid(const char *filename, uuid_t uu)
>> +{
>> +       char buf[BUFSIZ];
>> +       char *cp, *in = buf;
>> +       FILE *f;
>> +
>> +       f = fopen(filename, "r");
>> +       if (f == NULL) {
>> +               RTE_LOG(ERR, EAL, "%s(): cannot open sysfs value %s\n",
>> +                               __func__, filename);
>> +               return -1;
>> +       }
>> +
>> +       if (fgets(buf, sizeof(buf), f) == NULL) {
>> +               RTE_LOG(ERR, EAL, "%s(): cannot read sysfs value %s\n",
>> +                               __func__, filename);
>> +               fclose(f);
>> +               return -1;
>> +       }
>> +       fclose(f);
>> +
>> +       cp = strchr(buf, '\n');
>> +       if (cp)
>> +               *cp = '\0';
>> +
>> +       /* strip { } notation */
>> +       if (buf[0] == '{') {
>> +               in = buf + 1;
>> +               cp = strchr(in, '}');
>> +               if (cp)
>> +                       *cp = '\0';
>> +       }
>> +
>> +       if (uuid_parse(in, uu) < 0) {
>> +               RTE_LOG(ERR, EAL, "%s %s not a valid UUID\n",
>> +                       filename, buf);
>> +               return -1;
>> +       }
>> +
>> +       return 0;
>> +}
>> +
>> +/* map a particular resource from a file */
>> +static void *
>> +vmbus_map_resource(void *requested_addr, int fd, off_t offset, size_t size,
>> +                  int flags)
>> +{
>> +       void *mapaddr;
>> +
>> +       /* Map the memory resource of device */
>> +       mapaddr = mmap(requested_addr, size, PROT_READ | PROT_WRITE,
>> +                      MAP_SHARED | flags, fd, offset);
>> +       if (mapaddr == MAP_FAILED ||
>> +           (requested_addr != NULL && mapaddr != requested_addr)) {
>> +               RTE_LOG(ERR, EAL,
>> +                       "%s(): cannot mmap(%d, %p, 0x%lx, 0x%lx): %s)\n",
>> +                       __func__, fd, requested_addr,
>> +                       (unsigned long)size, (unsigned long)offset,
>> +                       strerror(errno));
>> +       } else
>> +               RTE_LOG(DEBUG, EAL, "  VMBUS memory mapped at %p\n", mapaddr);
>> +
>> +       return mapaddr;
>> +}
>> +
>> +/* unmap a particular resource */
>> +static void
>> +vmbus_unmap_resource(void *requested_addr, size_t size)
>> +{
>> +       if (requested_addr == NULL)
>> +               return;
>> +
>> +       /* Unmap the VMBUS memory resource of device */
>> +       if (munmap(requested_addr, size)) {
>> +               RTE_LOG(ERR, EAL, "%s(): cannot munmap(%p, 0x%lx): %s\n",
>> +                       __func__, requested_addr, (unsigned long)size,
>> +                       strerror(errno));
>> +       } else
>> +               RTE_LOG(DEBUG, EAL, "  VMBUS memory unmapped at %p\n",
>> +                               requested_addr);
>> +}
>> +
>> +/* Only supports current kernel version
>> + * Unlike PCI there is no option (or need) to create UIO device.
>> + */
>> +static int vmbus_get_uio_dev(const char *name,
>> +                            char *dstbuf, size_t buflen)
>> +{
>> +       char dirname[PATH_MAX];
>> +       unsigned int uio_num;
>> +       struct dirent *e;
>> +       DIR *dir;
>> +
>> +       snprintf(dirname, sizeof(dirname),
>> +                "/sys/bus/vmbus/devices/%s/uio", name);
>> +
>> +       dir = opendir(dirname);
>> +       if (dir == NULL) {
>> +               RTE_LOG(ERR, EAL, "Cannot map uio resources for %s: %s\n",
>> +                       name, strerror(errno));
>> +               return -1;
>> +       }
>> +
>> +       /* take the first file starting with "uio" */
>> +       while ((e = readdir(dir)) != NULL) {
>> +               if (sscanf(e->d_name, "uio%u", &uio_num) != 1)
>> +                       continue;
>> +
>> +               snprintf(dstbuf, buflen, "%s/uio%u", dirname, uio_num);
>> +               break;
>> +       }
>> +       closedir(dir);
>> +
>> +       return e ? (int) uio_num : -1;
>> +}
>> +
>> +/*
>> + * parse a sysfs file containing one integer value
>> + * different to the eal version, as it needs to work with 64-bit values
>> + */
>> +static int
>> +vmbus_parse_sysfs_value(const char *dir, const char *name,
>> +                       uint64_t *val)
>> +{
>> +       char filename[PATH_MAX];
>> +       FILE *f;
>> +       char buf[BUFSIZ];
>> +       char *end = NULL;
>> +
>> +       snprintf(filename, sizeof(filename), "%s/%s", dir, name);
>> +       f = fopen(filename, "r");
>> +       if (f == NULL) {
>> +               RTE_LOG(ERR, EAL, "%s(): cannot open sysfs value %s\n",
>> +                               __func__, filename);
>> +               return -1;
>> +       }
>> +
>> +       if (fgets(buf, sizeof(buf), f) == NULL) {
>> +               RTE_LOG(ERR, EAL, "%s(): cannot read sysfs value %s\n",
>> +                               __func__, filename);
>> +               fclose(f);
>> +               return -1;
>> +       }
>> +       fclose(f);
>> +
>> +       *val = strtoull(buf, &end, 0);
>> +       if ((buf[0] == '\0') || (end == NULL) || (*end != '\n')) {
>> +               RTE_LOG(ERR, EAL, "%s(): cannot parse sysfs value %s\n",
>> +                               __func__, filename);
>> +               return -1;
>> +       }
>> +       return 0;
>> +}
>> +
>> +/* Get mappings out of values provided by uio */
>> +static int
>> +vmbus_uio_get_mappings(const char *uioname,
>> +                      struct vmbus_map maps[])
>> +{
>> +       int i;
>> +
>> +       for (i = 0; i != VMBUS_MAX_RESOURCE; i++) {
>> +               struct vmbus_map *map = &maps[i];
>> +               char dirname[PATH_MAX];
>> +
>> +               /* check if map directory exists */
>> +               snprintf(dirname, sizeof(dirname),
>> +                        "%s/maps/map%d", uioname, i);
>> +
>> +               if (access(dirname, F_OK) != 0)
>> +                       break;
>> +
>> +               /* get mapping offset */
>> +               if (vmbus_parse_sysfs_value(dirname, "offset",
>> +                                           &map->offset) < 0)
>> +                       return -1;
>> +
>> +               /* get mapping size */
>> +               if (vmbus_parse_sysfs_value(dirname, "size",
>> +                                           &map->size) < 0)
>> +                       return -1;
>> +
>> +               /* get mapping physical address */
>> +               if (vmbus_parse_sysfs_value(dirname, "addr",
>> +                                           &maps->phaddr) < 0)
>> +                       return -1;
>> +       }
>> +
>> +       return i;
>> +}
>> +
>> +static void
>> +vmbus_uio_free_resource(struct rte_vmbus_device *dev,
>> +               struct mapped_vmbus_resource *uio_res)
>> +{
>> +       rte_free(uio_res);
>> +
>> +       if (dev->intr_handle.fd) {
>> +               close(dev->intr_handle.fd);
>> +               dev->intr_handle.fd = -1;
>> +               dev->intr_handle.type = RTE_INTR_HANDLE_UNKNOWN;
>> +       }
>> +}
>> +
>> +static struct mapped_vmbus_resource *
>> +vmbus_uio_alloc_resource(struct rte_vmbus_device *dev)
>> +{
>> +       struct mapped_vmbus_resource *uio_res;
>> +       char dirname[PATH_MAX], devname[PATH_MAX];
>> +       int uio_num, nb_maps;
>> +
>> +       uio_num = vmbus_get_uio_dev(dev->sysfs_name, dirname, sizeof(dirname));
>> +       if (uio_num < 0) {
>> +               RTE_LOG(WARNING, EAL,
>> +                       "  %s not managed by UIO driver, skipping\n",
>> +                       dev->sysfs_name);
>> +               return NULL;
>> +       }
>> +
>> +       /* allocate the mapping details for secondary processes*/
>> +       uio_res = rte_zmalloc("UIO_RES", sizeof(*uio_res), 0);
>> +       if (uio_res == NULL) {
>> +               RTE_LOG(ERR, EAL,
>> +                       "%s(): cannot store uio mmap details\n", __func__);
>> +               goto error;
>> +       }
>> +
>> +       snprintf(devname, sizeof(devname), "/dev/uio%u", uio_num);
>> +       dev->intr_handle.fd = open(devname, O_RDWR);
>> +       if (dev->intr_handle.fd < 0) {
>> +               RTE_LOG(ERR, EAL, "Cannot open %s: %s\n",
>> +                       devname, strerror(errno));
>> +               goto error;
>> +       }
>> +
>> +       dev->intr_handle.type = RTE_INTR_HANDLE_UIO_INTX;
>> +
>> +       snprintf(uio_res->path, sizeof(uio_res->path), "%s", devname);
>> +       uuid_copy(uio_res->uuid, dev->device_id);
>> +
>> +       nb_maps = vmbus_uio_get_mappings(dirname, uio_res->maps);
>> +       if (nb_maps < 0)
>> +               goto error;
>> +
>> +       RTE_LOG(DEBUG, EAL, "Found %d memory maps for device %s\n",
>> +               nb_maps, dev->sysfs_name);
>> +
>> +       return uio_res;
>> +
>> + error:
>> +       vmbus_uio_free_resource(dev, uio_res);
>> +       return NULL;
>> +}
>> +
>> +static int
>> +vmbus_uio_map_resource_by_index(struct rte_vmbus_device *dev,
>> +                               unsigned int res_idx,
>> +                               struct mapped_vmbus_resource *uio_res,
>> +                               unsigned int map_idx)
>> +{
>> +       struct vmbus_map *maps = uio_res->maps;
>> +       char devname[PATH_MAX];
>> +       void *mapaddr;
>> +       int fd;
>> +
>> +       snprintf(devname, sizeof(devname),
>> +                "/sys/bus/vmbus/%s/resource%u", dev->sysfs_name, res_idx);
>> +
>> +       fd = open(devname, O_RDWR);
>> +       if (fd < 0) {
>> +               RTE_LOG(ERR, EAL, "Cannot open %s: %s\n",
>> +                               devname, strerror(errno));
>> +               return -1;
>> +       }
>> +
>> +       /* allocate memory to keep path */
>> +       maps[map_idx].path = rte_malloc(NULL, strlen(devname) + 1, 0);
>> +       if (maps[map_idx].path == NULL) {
>> +               RTE_LOG(ERR, EAL, "Cannot allocate memory for path: %s\n",
>> +                               strerror(errno));
>> +               return -1;
>> +       }
>> +
>> +       /* try mapping somewhere close to the end of hugepages */
>> +       if (vmbus_map_addr == NULL)
>> +               vmbus_map_addr = pci_find_max_end_va();
>> +
>> +       mapaddr = vmbus_map_resource(vmbus_map_addr, fd, 0,
>> +                                    dev->mem_resource[res_idx].len, 0);
>> +       close(fd);
>> +       if (mapaddr == MAP_FAILED) {
>> +               rte_free(maps[map_idx].path);
>> +               return -1;
>> +       }
>> +
>> +       vmbus_map_addr = RTE_PTR_ADD(mapaddr,
>> +                                    dev->mem_resource[res_idx].len);
>> +
>> +       maps[map_idx].phaddr = dev->mem_resource[res_idx].phys_addr;
>> +       maps[map_idx].size = dev->mem_resource[res_idx].len;
>> +       maps[map_idx].addr = mapaddr;
>> +       maps[map_idx].offset = 0;
>> +       strcpy(maps[map_idx].path, devname);
>> +       dev->mem_resource[res_idx].addr = mapaddr;
>> +
>> +       return 0;
>> +}
>> +
>> +static void
>> +vmbus_uio_unmap(struct mapped_vmbus_resource *uio_res)
>> +{
>> +       int i;
>> +
>> +       if (uio_res == NULL)
>> +               return;
>> +
>> +       for (i = 0; i != uio_res->nb_maps; i++) {
>> +               vmbus_unmap_resource(uio_res->maps[i].addr,
>> +                                    uio_res->maps[i].size);
>> +
>> +               if (rte_eal_process_type() == RTE_PROC_PRIMARY)
>> +                       rte_free(uio_res->maps[i].path);
>> +       }
>> +}
>> +
>> +static struct mapped_vmbus_resource *
>> +vmbus_uio_find_resource(struct rte_vmbus_device *dev)
>> +{
>> +       struct mapped_vmbus_resource *uio_res;
>> +       struct mapped_vmbus_res_list *uio_res_list =
>> +                       RTE_TAILQ_CAST(rte_vmbus_uio_tailq.head,
>> +                                      mapped_vmbus_res_list);
>> +
>> +       if (dev == NULL)
>> +               return NULL;
>> +
>> +       TAILQ_FOREACH(uio_res, uio_res_list, next) {
>> +               if (uuid_compare(uio_res->uuid, dev->device_id) == 0)
>> +                       return uio_res;
>> +       }
>> +       return NULL;
>> +}
>> +
>> +/* unmap the VMBUS resource of a VMBUS device in virtual memory */
>> +static void
>> +vmbus_uio_unmap_resource(struct rte_vmbus_device *dev)
>> +{
>> +       struct mapped_vmbus_resource *uio_res;
>> +       struct mapped_vmbus_res_list *uio_res_list =
>> +                       RTE_TAILQ_CAST(rte_vmbus_uio_tailq.head,
>> +                                      mapped_vmbus_res_list);
>> +
>> +       if (dev == NULL)
>> +               return;
>> +
>> +       /* find an entry for the device */
>> +       uio_res = vmbus_uio_find_resource(dev);
>> +       if (uio_res == NULL)
>> +               return;
>> +
>> +       /* secondary processes - just free maps */
>> +       if (rte_eal_process_type() != RTE_PROC_PRIMARY)
>> +               return vmbus_uio_unmap(uio_res);
>> +
>> +       TAILQ_REMOVE(uio_res_list, uio_res, next);
>> +
>> +       /* unmap all resources */
>> +       vmbus_uio_unmap(uio_res);
>> +
>> +       /* free uio resource */
>> +       rte_free(uio_res);
>> +
>> +       /* close fd if in primary process */
>> +       close(dev->intr_handle.fd);
>> +       if (dev->intr_handle.uio_cfg_fd >= 0) {
>> +               close(dev->intr_handle.uio_cfg_fd);
>> +               dev->intr_handle.uio_cfg_fd = -1;
>> +       }
>> +
>> +       dev->intr_handle.fd = -1;
>> +       dev->intr_handle.type = RTE_INTR_HANDLE_UNKNOWN;
>> +}
>> +
>> +static int
>> +vmbus_uio_map_secondary(struct rte_vmbus_device *dev)
>> +{
>> +       struct mapped_vmbus_resource *uio_res;
>> +       struct mapped_vmbus_res_list *uio_res_list =
>> +                       RTE_TAILQ_CAST(rte_vmbus_uio_tailq.head,
>> +                                      mapped_vmbus_res_list);
>> +
>> +       TAILQ_FOREACH(uio_res, uio_res_list, next) {
>> +               int i;
>> +
>> +               /* skip this element if it doesn't match our id */
>> +               if (uuid_compare(uio_res->uuid, dev->device_id))
>> +                       continue;
>> +
>> +               for (i = 0; i != uio_res->nb_maps; i++) {
>> +                       void *mapaddr;
>> +                       int fd;
>> +
>> +                       fd = open(uio_res->maps[i].path, O_RDWR);
>> +                       if (fd < 0) {
>> +                               RTE_LOG(ERR, EAL, "Cannot open %s: %s\n",
>> +                                       uio_res->maps[i].path, strerror(errno));
>> +                               return -1;
>> +                       }
>> +
>> +                       mapaddr = vmbus_map_resource(uio_res->maps[i].addr, fd,
>> +                                                    uio_res->maps[i].offset,
>> +                                                    uio_res->maps[i].size, 0);
>> +                       /* fd is not needed in slave process, close it */
>> +                       close(fd);
>> +
>> +                       if (mapaddr == uio_res->maps[i].addr)
>> +                               continue;
>> +
>> +                       RTE_LOG(ERR, EAL,
>> +                               "Cannot mmap device resource file %s to address: %p\n",
>> +                               uio_res->maps[i].path,
>> +                               uio_res->maps[i].addr);
>> +
>> +                       /* unmap addrs correctly mapped */
>> +                       while (i != 0) {
>> +                               --i;
>> +                               vmbus_unmap_resource(uio_res->maps[i].addr,
>> +                                                    uio_res->maps[i].size);
>> +                       }
>> +                       return -1;
>> +
>> +               }
>> +               return 0;
>> +       }
>> +
>> +       RTE_LOG(ERR, EAL, "Cannot find resource for device\n");
>> +       return 1;
>> +}
>> +
>> +/* map the resources of a vmbus device in virtual memory */
>> +int
>> +rte_eal_vmbus_map_device(struct rte_vmbus_device *dev)
>> +{
>> +       struct mapped_vmbus_resource *uio_res;
>> +       struct mapped_vmbus_res_list *uio_res_list =
>> +               RTE_TAILQ_CAST(rte_vmbus_uio_tailq.head, mapped_vmbus_res_list);
>> +       int i, ret, map_idx = 0;
>> +
>> +       dev->intr_handle.fd = -1;
>> +       dev->intr_handle.uio_cfg_fd = -1;
>> +       dev->intr_handle.type = RTE_INTR_HANDLE_UNKNOWN;
>> +
>> +       /* secondary processes - use already recorded details */
>> +       if (rte_eal_process_type() != RTE_PROC_PRIMARY)
>> +               return vmbus_uio_map_secondary(dev);
>> +
>> +       /* allocate uio resource */
>> +       uio_res = vmbus_uio_alloc_resource(dev);
>> +       if (uio_res == NULL)
>> +               return -1;
>> +
>> +       /* Map all BARs */
>> +       for (i = 0; i != VMBUS_MAX_RESOURCE; i++) {
>> +               uint64_t phaddr;
>> +
>> +               /* skip empty BAR */
>> +               phaddr = dev->mem_resource[i].phys_addr;
>> +               if (phaddr == 0)
>> +                       continue;
>> +
>> +               ret = vmbus_uio_map_resource_by_index(dev, i,
>> +                                                     uio_res, map_idx);
>> +               if (ret)
>> +                       goto error;
>> +
>> +               map_idx++;
>> +       }
>> +
>> +       uio_res->nb_maps = map_idx;
>> +
>> +       TAILQ_INSERT_TAIL(uio_res_list, uio_res, next);
>> +
>> +       return 0;
>> +error:
>> +       for (i = 0; i < map_idx; i++) {
>> +               vmbus_unmap_resource(uio_res->maps[i].addr,
>> +                                    uio_res->maps[i].size);
>> +               rte_free(uio_res->maps[i].path);
>> +       }
>> +       vmbus_uio_free_resource(dev, uio_res);
>> +       return -1;
>> +}
>> +
>> +/* Scan one vmbus sysfs entry, and fill the devices list from it. */
>> +static int
>> +vmbus_scan_one(const char *name)
>> +{
>> +       struct rte_vmbus_device *dev, *dev2;
>> +       char filename[PATH_MAX];
>> +       char dirname[PATH_MAX];
>> +       unsigned long tmp;
>> +
>> +       dev = malloc(sizeof(*dev) + strlen(name) + 1);
>> +       if (dev == NULL)
>> +               return -1;
>> +
>> +       memset(dev, 0, sizeof(*dev));
>> +       strcpy(dev->sysfs_name, name);
>> +       if (dev->sysfs_name == NULL)
>> +               goto error;
>> +
>> +       /* sysfs base directory
>> +        *   /sys/bus/vmbus/devices/7a08391f-f5a0-4ac0-9802-d13fd964f8df
>> +        * or on older kernel
>> +        *   /sys/bus/vmbus/devices/vmbus_1
>> +        */
>> +       snprintf(dirname, sizeof(dirname), "%s/%s",
>> +                SYSFS_VMBUS_DEVICES, name);
>> +
>> +       /* get device id */
>> +       snprintf(filename, sizeof(filename), "%s/device_id", dirname);
>> +       if (vmbus_get_sysfs_uuid(filename, dev->device_id) < 0)
>> +               goto error;
>> +
>> +       /* get device class  */
>> +       snprintf(filename, sizeof(filename), "%s/class_id", dirname);
>> +       if (vmbus_get_sysfs_uuid(filename, dev->class_id) < 0)
>> +               goto error;
>> +
>> +       /* get relid */
>> +       snprintf(filename, sizeof(filename), "%s/id", dirname);
>> +       if (eal_parse_sysfs_value(filename, &tmp) < 0)
>> +               goto error;
>> +       dev->relid = tmp;
>> +
>> +       /* get monitor id */
>> +       snprintf(filename, sizeof(filename), "%s/monitor_id", dirname);
>> +       if (eal_parse_sysfs_value(filename, &tmp) < 0)
>> +               goto error;
>> +       dev->monitor_id = tmp;
>> +
>> +       /* get numa node */
>> +       snprintf(filename, sizeof(filename), "%s/numa_node",
>> +                dirname);
>> +       if (eal_parse_sysfs_value(filename, &tmp) < 0)
>> +               /* if no NUMA support, set default to 0 */
>> +               dev->device.numa_node = 0;
>> +       else
>> +               dev->device.numa_node = tmp;
>> +
>> +       /* device is valid, add in list (sorted) */
>> +       RTE_LOG(DEBUG, EAL, "Adding vmbus device %s\n", name);
>> +
>> +       TAILQ_FOREACH(dev2, &vmbus_device_list, next) {
>> +               int ret;
>> +
>> +               ret = uuid_compare(dev->device_id, dev->device_id);
>> +               if (ret > 0)
>> +                       continue;
>> +
>> +               if (ret < 0) {
>> +                       TAILQ_INSERT_BEFORE(dev2, dev, next);
>> +                       rte_eal_device_insert(&dev->device);
>> +               } else { /* already registered */
>> +                       memmove(dev2->mem_resource, dev->mem_resource,
>> +                               sizeof(dev->mem_resource));
>> +                       free(dev);
>> +               }
>> +               return 0;
>> +       }
>> +
>> +       rte_eal_device_insert(&dev->device);
>> +       TAILQ_INSERT_TAIL(&vmbus_device_list, dev, next);
>> +
>> +       return 0;
>> +error:
>> +       free(dev);
>> +       return -1;
>> +}
>> +
>> +/*
>> + * Scan the content of the vmbus, and the devices in the devices list
>> + */
>> +static int
>> +vmbus_scan(void)
>> +{
>> +       struct dirent *e;
>> +       DIR *dir;
>> +
>> +       dir = opendir(SYSFS_VMBUS_DEVICES);
>> +       if (dir == NULL) {
>> +               if (errno == ENOENT)
>> +                       return 0;
>> +
>> +               RTE_LOG(ERR, EAL, "%s(): opendir failed: %s\n",
>> +                       __func__, strerror(errno));
>> +               return -1;
>> +       }
>> +
>> +       while ((e = readdir(dir)) != NULL) {
>> +               if (e->d_name[0] == '.')
>> +                       continue;
>> +
>> +               if (vmbus_scan_one(e->d_name) < 0)
>> +                       goto error;
>> +       }
>> +       closedir(dir);
>> +       return 0;
>> +
>> +error:
>> +       closedir(dir);
>> +       return -1;
>> +}
>> +
>> +/* Init the VMBUS EAL subsystem */
>> +int rte_eal_vmbus_init(void)
>> +{
>> +       /* VMBUS can be disabled */
>> +       if (internal_config.no_vmbus)
>> +               return 0;
>> +
>> +       if (vmbus_scan() < 0) {
>> +               RTE_LOG(ERR, EAL, "%s(): Cannot scan vmbus\n", __func__);
>> +               return -1;
>> +       }
>> +       return 0;
>> +}
>> +
>> +/* Below is PROBE part of eal_vmbus library */
>> +
>> +/*
>> + * If device ID match, call the devinit() function of the driver.
>> + */
>> +static int
>> +rte_eal_vmbus_probe_one_driver(struct rte_vmbus_driver *dr,
>> +                              struct rte_vmbus_device *dev)
>> +{
>> +       const uuid_t *id_table;
>> +
>> +       RTE_LOG(DEBUG, EAL, "  probe driver: %s\n", dr->driver.name);
>> +
>> +       for (id_table = dr->id_table; !uuid_is_null(*id_table); ++id_table) {
>> +               struct rte_devargs *args;
>> +               char guid[UUID_BUF_SZ];
>> +               int ret;
>> +
>> +               /* skip devices not assocaited with this device class */
>> +               if (uuid_compare(*id_table, dev->class_id) != 0)
>> +                       continue;
>> +
>> +               uuid_unparse(dev->device_id, guid);
>> +               RTE_LOG(INFO, EAL, "VMBUS device %s on NUMA socket %i\n",
>> +                       guid, dev->device.numa_node);
>> +
>> +               /* no initialization when blacklisted, return without error */
>> +               args = dev->device.devargs;
>> +               if (args && args->type == RTE_DEVTYPE_BLACKLISTED_VMBUS) {
>> +                       RTE_LOG(INFO, EAL, "  Device is blacklisted, not initializing\n");
>> +                       return 1;
>> +               }
>> +
>> +               RTE_LOG(INFO, EAL, "  probe driver: %s\n", dr->driver.name);
>> +
>> +               /* map resources for device */
>> +               ret = rte_eal_vmbus_map_device(dev);
>> +               if (ret != 0)
>> +                       return ret;
>> +
>> +               /* reference driver structure */
>> +               dev->driver = dr;
>> +
>> +               /* call the driver probe() function */
>> +               ret = dr->probe(dr, dev);
>> +               if (ret)
>> +                       dev->driver = NULL;
>> +
>> +               return ret;
>> +       }
>> +
>> +       /* return positive value if driver doesn't support this device */
>> +       return 1;
>> +}
>> +
>> +
>> +/*
>> + * If vendor/device ID match, call the remove() function of the
>> + * driver.
>> + */
>> +static int
>> +vmbus_detach_dev(struct rte_vmbus_driver *dr,
>> +                struct rte_vmbus_device *dev)
>> +{
>> +       const uuid_t *id_table;
>> +
>> +       for (id_table = dr->id_table; !uuid_is_null(*id_table); ++id_table) {
>> +               char guid[UUID_BUF_SZ];
>> +
>> +               /* skip devices not assocaited with this device class */
>> +               if (uuid_compare(*id_table, dev->class_id) != 0)
>> +                       continue;
>> +
>> +               uuid_unparse(dev->device_id, guid);
>> +               RTE_LOG(INFO, EAL, "VMBUS device %s on NUMA socket %i\n",
>> +                       guid, dev->device.numa_node);
>> +
>> +               RTE_LOG(DEBUG, EAL, "  remove driver: %s\n", dr->driver.name);
>> +
>> +               if (dr->remove && (dr->remove(dev) < 0))
>> +                       return -1;      /* negative value is an error */
>> +
>> +               /* clear driver structure */
>> +               dev->driver = NULL;
>> +
>> +               vmbus_uio_unmap_resource(dev);
>> +               return 0;
>> +       }
>> +
>> +       /* return positive value if driver doesn't support this device */
>> +       return 1;
>> +}
>> +
>> +/*
>> + * call the devinit() function of all
>> + * registered drivers for the vmbus device. Return -1 if no driver is
>> + * found for this class of vmbus device.
>> + * The present assumption is that we have drivers only for vmbus network
>> + * devices. That's why we don't check driver's id_table now.
>> + */
>> +static int
>> +vmbus_probe_all_drivers(struct rte_vmbus_device *dev)
>> +{
>> +       struct rte_vmbus_driver *dr = NULL;
>> +       int ret;
>> +
>> +       TAILQ_FOREACH(dr, &vmbus_driver_list, next) {
>> +               ret = rte_eal_vmbus_probe_one_driver(dr, dev);
>> +               if (ret < 0) {
>> +                       /* negative value is an error */
>> +                       RTE_LOG(ERR, EAL, "Failed to probe driver %s\n",
>> +                               dr->driver.name);
>> +                       return -1;
>> +               }
>> +               /* positive value means driver doesn't support it */
>> +               if (ret > 0)
>> +                       continue;
>> +
>> +               return 0;
>> +       }
>> +
>> +       return 1;
>> +}
>> +
>> +
>> +/*
>> + * If device ID matches, call the remove() function of all
>> + * registered driver for the given device. Return -1 if initialization
>> + * failed, return 1 if no driver is found for this device.
>> + */
>> +static int
>> +vmbus_detach_all_drivers(struct rte_vmbus_device *dev)
>> +{
>> +       struct rte_vmbus_driver *dr;
>> +       int rc = 0;
>> +
>> +       if (dev == NULL)
>> +               return -1;
>> +
>> +       TAILQ_FOREACH(dr, &vmbus_driver_list, next) {
>> +               rc = vmbus_detach_dev(dr, dev);
>> +               if (rc < 0)
>> +                       /* negative value is an error */
>> +                       return -1;
>> +               if (rc > 0)
>> +                       /* positive value means driver doesn't support it */
>> +                       continue;
>> +               return 0;
>> +       }
>> +       return 1;
>> +}
>> +
>> +/* Detach device specified by its VMBUS id */
>> +int
>> +rte_eal_vmbus_detach(uuid_t device_id)
>> +{
>> +       struct rte_vmbus_device *dev;
>> +       char ubuf[UUID_BUF_SZ];
>> +
>> +       TAILQ_FOREACH(dev, &vmbus_device_list, next) {
>> +               if (uuid_compare(dev->device_id, device_id) != 0)
>> +                       continue;
>> +
>> +               if (vmbus_detach_all_drivers(dev) < 0)
>> +                       goto err_return;
>> +
>> +               TAILQ_REMOVE(&vmbus_device_list, dev, next);
>> +               free(dev);
>> +               return 0;
>> +       }
>> +       return -1;
>> +
>> +err_return:
>> +       uuid_unparse(device_id, ubuf);
>> +       RTE_LOG(WARNING, EAL, "Requested device %s cannot be used\n",
>> +               ubuf);
>> +       return -1;
>> +}
>> +
>> +/*
>> + * Scan the vmbus, and call the devinit() function for
>> + * all registered drivers that have a matching entry in its id_table
>> + * for discovered devices.
>> + */
>> +int
>> +rte_eal_vmbus_probe(void)
>> +{
>> +       struct rte_vmbus_device *dev = NULL;
>> +
>> +       TAILQ_FOREACH(dev, &vmbus_device_list, next) {
>> +               char ubuf[UUID_BUF_SZ];
>> +
>> +               uuid_unparse(dev->device_id, ubuf);
>> +
>> +               RTE_LOG(DEBUG, EAL, "Probing driver for device %s ...\n",
>> +                       ubuf);
>> +               vmbus_probe_all_drivers(dev);
>> +       }
>> +       return 0;
>> +}
>> +
>> +/* register vmbus driver */
>> +void
>> +rte_eal_vmbus_register(struct rte_vmbus_driver *driver)
>> +{
>> +       TAILQ_INSERT_TAIL(&vmbus_driver_list, driver, next);
>> +}
>> +
>> +/* unregister vmbus driver */
>> +void
>> +rte_eal_vmbus_unregister(struct rte_vmbus_driver *driver)
>> +{
>> +       TAILQ_REMOVE(&vmbus_driver_list, driver, next);
>> +}
>> diff --git a/lib/librte_ether/rte_ethdev.c b/lib/librte_ether/rte_ethdev.c
>> index 7c212096..b69af0f0 100644
>> --- a/lib/librte_ether/rte_ethdev.c
>> +++ b/lib/librte_ether/rte_ethdev.c
>> @@ -3334,3 +3334,93 @@ rte_eth_dev_l2_tunnel_offload_set(uint8_t port_id,
>>                                 -ENOTSUP);
>>         return (*dev->dev_ops->l2_tunnel_offload_set)(dev, l2_tunnel, mask, en);
>>  }
>> +
>> +
>> +#ifdef RTE_LIBRTE_HV_PMD
>> +int
>> +rte_eth_dev_vmbus_probe(struct rte_vmbus_driver *vmbus_drv,
>> +                       struct rte_vmbus_device *vmbus_dev)
>> +{
>> +       struct eth_driver  *eth_drv = (struct eth_driver *)vmbus_drv;
>> +       struct rte_eth_dev *eth_dev;
>> +       char ustr[UUID_BUF_SZ];
>> +       int diag;
>> +
>> +       uuid_unparse(vmbus_dev->device_id, ustr);
>> +
>> +       eth_dev = rte_eth_dev_allocate(ustr);
>> +       if (eth_dev == NULL)
>> +               return -ENOMEM;
>> +
>> +       if (rte_eal_process_type() == RTE_PROC_PRIMARY) {
>> +               eth_dev->data->dev_private = rte_zmalloc("ethdev private structure",
>> +                                 eth_drv->dev_private_size,
>> +                                 RTE_CACHE_LINE_SIZE);
>> +               if (eth_dev->data->dev_private == NULL)
>> +                       rte_panic("Cannot allocate memzone for private port data\n");
>> +       }
>> +
>> +       eth_dev->device = &vmbus_dev->device;
>> +       eth_dev->driver = eth_drv;
>> +       eth_dev->data->rx_mbuf_alloc_failed = 0;
>> +
>> +       /* init user callbacks */
>> +       TAILQ_INIT(&(eth_dev->link_intr_cbs));
>> +
>> +       /*
>> +        * Set the default maximum frame size.
>> +        */
>> +       eth_dev->data->mtu = ETHER_MTU;
>> +
>> +       /* Invoke PMD device initialization function */
>> +       diag = (*eth_drv->eth_dev_init)(eth_dev);
>> +       if (diag == 0)
>> +               return 0;
>> +
>> +       RTE_PMD_DEBUG_TRACE("driver %s: eth_dev_init(%s) failed\n",
>> +                           vmbus_drv->driver.name, ustr);
>> +
>> +       if (rte_eal_process_type() == RTE_PROC_PRIMARY)
>> +               rte_free(eth_dev->data->dev_private);
>> +
>> +       return diag;
>> +}
>> +
>> +int
>> +rte_eth_dev_vmbus_remove(struct rte_vmbus_device *vmbus_dev)
>> +{
>> +       const struct eth_driver *eth_drv;
>> +       struct rte_eth_dev *eth_dev;
>> +       char ustr[UUID_BUF_SZ];
>> +       int ret;
>> +
>> +       if (vmbus_dev == NULL)
>> +               return -EINVAL;
>> +
>> +       uuid_unparse(vmbus_dev->device_id, ustr);
>> +       eth_dev = rte_eth_dev_allocated(ustr);
>> +       if (eth_dev == NULL)
>> +               return -ENODEV;
>> +
>> +       eth_drv = (const struct eth_driver *)vmbus_dev->driver;
>> +
>> +       /* Invoke PMD device uninit function */
>> +       if (*eth_drv->eth_dev_uninit) {
>> +               ret = (*eth_drv->eth_dev_uninit)(eth_dev);
>> +               if (ret)
>> +                       return ret;
>> +       }
>> +
>> +       /* free ether device */
>> +       rte_eth_dev_release_port(eth_dev);
>> +
>> +       if (rte_eal_process_type() == RTE_PROC_PRIMARY)
>> +               rte_free(eth_dev->data->dev_private);
>> +
>> +       eth_dev->device = NULL;
>> +       eth_dev->driver = NULL;
>> +       eth_dev->data = NULL;
>> +
>> +       return 0;
>> +}
>> +#endif
>> diff --git a/lib/librte_ether/rte_ethdev.h b/lib/librte_ether/rte_ethdev.h
>> index 1a62a322..2a8c1eed 100644
>> --- a/lib/librte_ether/rte_ethdev.h
>> +++ b/lib/librte_ether/rte_ethdev.h
>> @@ -180,6 +180,9 @@ extern "C" {
>>  #include <rte_log.h>
>>  #include <rte_interrupts.h>
>>  #include <rte_pci.h>
>> +#ifdef RTE_LIBRTE_HV_PMD
>> +#include <rte_vmbus.h>
>> +#endif
>>  #include <rte_dev.h>
>>  #include <rte_devargs.h>
>>  #include <rte_errno.h>
>> @@ -1908,6 +1911,17 @@ struct rte_pci_eth_driver {
>>         struct eth_driver       eth_drv;        /**< Ethernet driver. */
>>  };
>>
>> +#ifdef RTE_LIBRTE_HV_PMD
>> +/**
>> + * @internal
>> + * The structure associated with a PMD VMBUS Ethernet driver.
>> + */
>> +struct rte_vmbus_eth_driver {
>> +       struct rte_vmbus_driver vmbus_drv;      /**< Underlying VMBUS driver. */
>> +       struct eth_driver       eth_drv;        /**< Ethernet driver. */
>> +};
>> +#endif
>> +
>>  /**
>>   * Convert a numerical speed in Mbps to a bitmap flag that can be used in
>>   * the bitmap link_speeds of the struct rte_eth_conf
>> @@ -4543,6 +4557,23 @@ int rte_eth_dev_pci_probe(struct rte_pci_driver *pci_drv,
>>   */
>>  int rte_eth_dev_pci_remove(struct rte_pci_device *pci_dev);
>>
>> +#ifdef RTE_LIBRTE_HV_PMD
>> +/**
>> + * @internal
>> + * Wrapper for use by vmbus drivers as a .probe function to attach to a ethdev
>> + * interface.
>> + */
>> +int rte_eth_dev_vmbus_probe(struct rte_vmbus_driver *vmbus_drv,
>> +                         struct rte_vmbus_device *vmbus_dev);
>> +
>> +/**
>> + * @internal
>> + * Wrapper for use by vmbus drivers as a .remove function to detach a ethdev
>> + * interface.
>> + */
>> +int rte_eth_dev_vmbus_remove(struct rte_vmbus_device *vmbus_dev);
>> +#endif
>> +
>>  #ifdef __cplusplus
>>  }
>>  #endif
>> diff --git a/mk/rte.app.mk b/mk/rte.app.mk
>> index f75f0e24..6b304084 100644
>> --- a/mk/rte.app.mk
>> +++ b/mk/rte.app.mk
>> @@ -130,6 +130,7 @@ ifeq ($(CONFIG_RTE_LIBRTE_VHOST),y)
>>  _LDLIBS-$(CONFIG_RTE_LIBRTE_PMD_VHOST)      += -lrte_pmd_vhost
>>  endif # $(CONFIG_RTE_LIBRTE_VHOST)
>>  _LDLIBS-$(CONFIG_RTE_LIBRTE_VMXNET3_PMD)    += -lrte_pmd_vmxnet3_uio
>> +_LDLIBS-$(CONFIG_RTE_LIBRTE_HV_PMD)        += -luuid
>>
>>  ifeq ($(CONFIG_RTE_LIBRTE_CRYPTODEV),y)
>>  _LDLIBS-$(CONFIG_RTE_LIBRTE_PMD_AESNI_MB)    += -lrte_pmd_aesni_mb
>> --
>> 2.11.0
>>


More information about the dev mailing list