[dpdk-dev] [PATCH V18 2/4] eal: add device event monitor framework
Guo, Jia
jia.guo at intel.com
Thu Apr 5 05:44:57 CEST 2018
On 4/4/2018 10:53 AM, Tan, Jianfeng wrote:
>
>> -----Original Message-----
>> From: Guo, Jia
>> Sent: Tuesday, April 3, 2018 6:34 PM
>> To: stephen at networkplumber.org; Richardson, Bruce; Yigit, Ferruh;
>> Ananyev, Konstantin; gaetan.rivet at 6wind.com; Wu, Jingjing;
>> thomas at monjalon.net; motih at mellanox.com; Van Haaren, Harry; Tan,
>> Jianfeng
>> Cc: jblunck at infradead.org; shreyansh.jain at nxp.com; dev at dpdk.org; Guo,
>> Jia; Zhang, Helin
>> Subject: [PATCH V18 2/4] eal: add device event monitor framework
>>
>> This patch aims to add a general device event monitor framework at
>> EAL device layer, for device hotplug awareness and actions adopted
>> accordingly. It could also expand for all other types of device event
>> monitor, but not in this scope at the stage.
>>
>> To get started, users firstly call below new added APIs to enable/disable
>> the device event monitor mechanism:
>> - rte_dev_event_monitor_start
>> - rte_dev_event_monitor_stop
>>
>> Then users shell register or unregister callbacks through the new added
>> APIs. Callbacks can be some device specific, or for all devices.
>> -rte_dev_event_callback_register
>> -rte_dev_event_callback_unregister
>>
>> Use hotplug case for example, when device hotplug insertion or hotplug
>> removal, we will get notified from kernel, then call user's callbacks
>> accordingly to handle it, such as detach or attach the device from the
>> bus, and could benefit further fail-safe or live-migration.
>>
>> Signed-off-by: Jeff Guo <jia.guo at intel.com>
>> ---
>> v18->v17:
>> add feature announcement in release document, fix bsp compile issue.
>> ---
>> doc/guides/rel_notes/release_18_05.rst | 9 ++
>> lib/librte_eal/bsdapp/eal/Makefile | 1 +
>> lib/librte_eal/bsdapp/eal/eal_dev.c | 21 ++++
>> lib/librte_eal/bsdapp/eal/meson.build | 1 +
>> lib/librte_eal/common/eal_common_dev.c | 168
>> ++++++++++++++++++++++++++++++++
>> lib/librte_eal/common/eal_private.h | 15 +++
>> lib/librte_eal/common/include/rte_dev.h | 94 ++++++++++++++++++
>> lib/librte_eal/linuxapp/eal/Makefile | 1 +
>> lib/librte_eal/linuxapp/eal/eal_dev.c | 22 +++++
>> lib/librte_eal/linuxapp/eal/meson.build | 1 +
>> lib/librte_eal/rte_eal_version.map | 8 ++
>> 11 files changed, 341 insertions(+)
>> create mode 100644 lib/librte_eal/bsdapp/eal/eal_dev.c
>> create mode 100644 lib/librte_eal/linuxapp/eal/eal_dev.c
>>
>> diff --git a/doc/guides/rel_notes/release_18_05.rst
>> b/doc/guides/rel_notes/release_18_05.rst
>> index 3923dc2..37e00c4 100644
>> --- a/doc/guides/rel_notes/release_18_05.rst
>> +++ b/doc/guides/rel_notes/release_18_05.rst
>> @@ -41,6 +41,15 @@ New Features
>> Also, make sure to start the actual text at the margin.
>>
>> =========================================================
>>
>> +* **Added device event monitor framework.**
>> +
>> + Added a general device event monitor framework at EAL, for device
>> dynamic management.
>> + Such as device hotplug awareness and actions adopted accordingly. The list
>> of new APIs:
>> +
>> + * ``rte_dev_event_monitor_start`` and ``rte_dev_event_monitor_stop``
>> are for
>> + the event monitor enable and disable.
>> + * ``rte_dev_event_callback_register`` and
>> ``rte_dev_event_callback_unregister``
>> + are for the user's callbacks register and unregister.
>>
>> API Changes
>> -----------
>> diff --git a/lib/librte_eal/bsdapp/eal/Makefile
>> b/lib/librte_eal/bsdapp/eal/Makefile
>> index dd455e6..c0921dd 100644
>> --- a/lib/librte_eal/bsdapp/eal/Makefile
>> +++ b/lib/librte_eal/bsdapp/eal/Makefile
>> @@ -33,6 +33,7 @@ SRCS-$(CONFIG_RTE_EXEC_ENV_BSDAPP) +=
>> eal_lcore.c
>> SRCS-$(CONFIG_RTE_EXEC_ENV_BSDAPP) += eal_timer.c
>> SRCS-$(CONFIG_RTE_EXEC_ENV_BSDAPP) += eal_interrupts.c
>> SRCS-$(CONFIG_RTE_EXEC_ENV_BSDAPP) += eal_alarm.c
>> +SRCS-$(CONFIG_RTE_EXEC_ENV_BSDAPP) += eal_dev.c
>>
>> # from common dir
>> SRCS-$(CONFIG_RTE_EXEC_ENV_BSDAPP) += eal_common_lcore.c
>> diff --git a/lib/librte_eal/bsdapp/eal/eal_dev.c
>> b/lib/librte_eal/bsdapp/eal/eal_dev.c
>> new file mode 100644
>> index 0000000..1c6c51b
>> --- /dev/null
>> +++ b/lib/librte_eal/bsdapp/eal/eal_dev.c
>> @@ -0,0 +1,21 @@
>> +/* SPDX-License-Identifier: BSD-3-Clause
>> + * Copyright(c) 2018 Intel Corporation
>> + */
>> +
>> +#include <rte_log.h>
>> +#include <rte_compat.h>
>> +#include <rte_dev.h>
>> +
>> +int __rte_experimental
>> +rte_dev_event_monitor_start(void)
>> +{
>> + RTE_LOG(ERR, EAL, "Device event is not supported for FreeBSD\n");
>> + return -1;
>> +}
>> +
>> +int __rte_experimental
>> +rte_dev_event_monitor_stop(void)
>> +{
>> + RTE_LOG(ERR, EAL, "Device event is not supported for FreeBSD\n");
>> + return -1;
>> +}
>> diff --git a/lib/librte_eal/bsdapp/eal/meson.build
>> b/lib/librte_eal/bsdapp/eal/meson.build
>> index e83fc91..6dfc533 100644
>> --- a/lib/librte_eal/bsdapp/eal/meson.build
>> +++ b/lib/librte_eal/bsdapp/eal/meson.build
>> @@ -12,4 +12,5 @@ env_sources = files('eal_alarm.c',
>> 'eal_timer.c',
>> 'eal.c',
>> 'eal_memory.c',
>> + 'eal_dev.c'
>> )
>> diff --git a/lib/librte_eal/common/eal_common_dev.c
>> b/lib/librte_eal/common/eal_common_dev.c
>> index cd07144..e09e86f 100644
>> --- a/lib/librte_eal/common/eal_common_dev.c
>> +++ b/lib/librte_eal/common/eal_common_dev.c
>> @@ -14,9 +14,34 @@
>> #include <rte_devargs.h>
>> #include <rte_debug.h>
>> #include <rte_log.h>
>> +#include <rte_spinlock.h>
>> +#include <rte_malloc.h>
>>
>> #include "eal_private.h"
>>
>> +/* spinlock for device callbacks */
> It's for protect callback list.
>
>> +static rte_spinlock_t dev_event_lock = RTE_SPINLOCK_INITIALIZER;
> Put this spinlock where the list locates.
>
>> +
>> +/**
>> + * The device event callback description.
>> + *
>> + * It contains callback address to be registered by user application,
>> + * the pointer to the parameters for callback, and the device name.
>> + */
>> +struct dev_event_callback {
>> + TAILQ_ENTRY(dev_event_callback) next; /**< Callbacks list */
>> + rte_dev_event_cb_fn cb_fn; /**< Callback address */
>> + void *cb_arg; /**< Callback parameter */
>> + char *dev_name; /**< Callback device name, NULL is for all
>> device */
>> + uint32_t active; /**< Callback is executing */
>> +};
>> +
>> +/** @internal Structure to keep track of registered callbacks */
>> +TAILQ_HEAD(dev_event_cb_list, dev_event_callback);
>> +
>> +/* The device event callback list for all registered callbacks. */
>> +static struct dev_event_cb_list dev_event_cbs;
>> +
>> static int cmp_detached_dev_name(const struct rte_device *dev,
>> const void *_name)
>> {
>> @@ -207,3 +232,146 @@ rte_eal_hotplug_remove(const char *busname,
>> const char *devname)
>> rte_eal_devargs_remove(busname, devname);
>> return ret;
>> }
>> +
>> +int __rte_experimental
>> +rte_dev_event_callback_register(const char *device_name,
>> + rte_dev_event_cb_fn cb_fn,
>> + void *cb_arg)
>> +{
>> + struct dev_event_callback *event_cb;
>> + int ret;
>> +
>> + if (!cb_fn)
>> + return -EINVAL;
>> +
>> + rte_spinlock_lock(&dev_event_lock);
>> +
>> + if (TAILQ_EMPTY(&dev_event_cbs))
>> + TAILQ_INIT(&dev_event_cbs);
>> +
>> + TAILQ_FOREACH(event_cb, &dev_event_cbs, next) {
>> + if (event_cb->cb_fn == cb_fn && event_cb->cb_arg ==
>> cb_arg) {
>> + if (device_name == NULL && event_cb->dev_name
>> == NULL)
>> + break;
>> + if (device_name == NULL || event_cb->dev_name
>> == NULL)
>> + continue;
>> + if (!strcmp(event_cb->dev_name, device_name))
>> + break;
>> + }
>> + }
>> +
>> + /* create a new callback. */
>> + if (event_cb == NULL) {
>> + event_cb = malloc(sizeof(struct dev_event_callback));
>> + if (event_cb != NULL) {
>> + event_cb->cb_fn = cb_fn;
>> + event_cb->cb_arg = cb_arg;
>> + event_cb->active = 0;
>> + if (!device_name) {
>> + event_cb->dev_name = NULL;
>> + } else {
>> + event_cb->dev_name =
>> strdup(device_name);
>> + if (event_cb->dev_name == NULL) {
>> + ret = -ENOMEM;
>> + goto error;
>> + }
>> + }
>> + TAILQ_INSERT_TAIL(&dev_event_cbs, event_cb,
>> next);
>> + } else {
>> + RTE_LOG(ERR, EAL,
>> + "Failed to allocate memory for device "
>> + "event callback.");
>> + ret = -ENOMEM;
>> + goto error;
>> + }
>> + } else {
>> + RTE_LOG(ERR, EAL,
>> + "The callback is already exist, no need "
>> + "to register again.\n");
>> + ret = -EEXIST;
>> + goto error;
> Here is a bug that you will free an existing callback entry.
you are correct.
>> + }
>> +
>> + rte_spinlock_unlock(&dev_event_lock);
>> + return 0;
>> +error:
>> + free(event_cb);
>> + rte_spinlock_unlock(&dev_event_lock);
>> + return ret;
>> +}
>> +
>> +int __rte_experimental
>> +rte_dev_event_callback_unregister(const char *device_name,
>> + rte_dev_event_cb_fn cb_fn,
>> + void *cb_arg)
>> +{
> Let's clearly define the behavior and return of this API. If I understand it correctly,
>
> If cb_arg != -1, we use (dev_name, cb_fn, cb_arg) as the key to look up the registered API.
> If cb_arg == -1, we use (cb_fn) as the key to look up the registered API.
>
> For return value, we want to return the number of callbacks being removed. It could be:
> >=0, number of callbacks been removed. (When we encounter an active callback, we shall skip it or just return -EAGAIN, neither sounds good to me actually)
> <0, error encountered.
>
> If you agree with above statement, below implementation has lots of issues.
>
>> + int ret = 0;
>> + struct dev_event_callback *event_cb, *next;
>> +
>> + if (!cb_fn)
>> + return -EINVAL;
>> +
>> + rte_spinlock_lock(&dev_event_lock);
>> +
>> + /*walk through the callbacks and remove all that match. */
>> + for (event_cb = TAILQ_FIRST(&dev_event_cbs); event_cb != NULL;
>> + event_cb = next) {
>> +
>> + next = TAILQ_NEXT(event_cb, next);
> First of all, if cb_fn != event_cb->cb_fn, we shall continue.
might not, if cb_arg = -1 for all cb, don't need to care if cb_fn
equal event_cb->cb_fn or not.
>> +
>> + if (device_name != NULL && event_cb->dev_name != NULL) {
>> + if (!strcmp(event_cb->dev_name, device_name)) {
>> + if (event_cb->cb_fn != cb_fn ||
>> + (cb_arg != (void *)-1 &&
>> + event_cb->cb_arg != cb_arg))
>> + continue;
>> + }
>> + } else if (device_name != NULL) {
>> + continue;
>> + }
> What about device_name == NULL && event_cb->dev_name != NULL && cb_arg == -1?
if device_name == NULL, it mean for all device, just process any cb.
>
> What about device_name == NULL && event_cb->dev_name == NULL && cb_arg != -1 && cb_arg != event_cb->cb_arg?
if device_name == NULL, don't care about cb_arg, just remove the back.
>> +
>> + /*
>> + * if this callback is not executing right now,
>> + * then remove it.
>> + */
>> + if (event_cb->active == 0) {
>> + TAILQ_REMOVE(&dev_event_cbs, event_cb, next);
>> + free(event_cb);
>> + ret++;
>> + } else {
>> + ret = -EAGAIN;
> If you don't break here, next time you find another satisfied callback, you will ret++ on a (-EAGAIN) value.
here , i think you are correct. but since return ret value is for number
check, so that it would just continue here.
>> + }
>> + }
>> + rte_spinlock_unlock(&dev_event_lock);
>> + return ret;
>> +}
> BTW, don't know why DPDK has the tradition of using cb_arg==-1 to stand for multiple callbacks, it's not a good API design to me. Would like as others' opinions, shall we continue this?
i don't have obvious objection for the cb_arg=-1 usage, it might make
sense.
>> +
>> +void
>> +dev_callback_process(char *device_name, enum rte_dev_event_type
>> event)
>> +{
>> + struct dev_event_callback *cb_lst;
>> + int rc;
>> +
>> + if (device_name == NULL)
>> + return;
>> +
>> + rte_spinlock_lock(&dev_event_lock);
>> +
>> + TAILQ_FOREACH(cb_lst, &dev_event_cbs, next) {
>> + if (cb_lst->dev_name) {
>> + if (strcmp(cb_lst->dev_name, device_name))
>> + continue;
>> + }
>> + cb_lst->active = 1;
>> + rte_spinlock_unlock(&dev_event_lock);
>> + rc = cb_lst->cb_fn(device_name, event,
>> + cb_lst->cb_arg);
>> + if (rc) {
>> + RTE_LOG(ERR, EAL,
>> + "Failed to process callback function.");
>> + }
> I don't see a reason why we need the return value from callbacks. Probably, define it as void type.
>> + rte_spinlock_lock(&dev_event_lock);
>> + cb_lst->active = 0;
>> + }
>> + rte_spinlock_unlock(&dev_event_lock);
>> +}
>> diff --git a/lib/librte_eal/common/eal_private.h
>> b/lib/librte_eal/common/eal_private.h
>> index 0b28770..88e5a59 100644
>> --- a/lib/librte_eal/common/eal_private.h
>> +++ b/lib/librte_eal/common/eal_private.h
>> @@ -9,6 +9,8 @@
>> #include <stdint.h>
>> #include <stdio.h>
>>
>> +#include <rte_dev.h>
>> +
>> /**
>> * Initialize the memzone subsystem (private to eal).
>> *
>> @@ -205,4 +207,17 @@ struct rte_bus
>> *rte_bus_find_by_device_name(const char *str);
>>
>> int rte_mp_channel_init(void);
>>
>> +/**
>> + * Internal Executes all the user application registered callbacks for
>> + * the specific device. It is for DPDK internal user only. User
>> + * application should not call it directly.
>> + *
>> + * @param device_name
>> + * The device name.
>> + * @param event
>> + * the device event type.
>> + *
>> + */
>> +void
>> +dev_callback_process(char *device_name, enum rte_dev_event_type event);
> Too many *_process functions in this patch. Let's avoid using such ambiguous words.
>
> For example, you can rename this function to dev_event_callback_invoke().
hold on this define,will consider rename other side.
>> #endif /* _EAL_PRIVATE_H_ */
>> diff --git a/lib/librte_eal/common/include/rte_dev.h
>> b/lib/librte_eal/common/include/rte_dev.h
>> index b688f1e..4c78938 100644
>> --- a/lib/librte_eal/common/include/rte_dev.h
>> +++ b/lib/librte_eal/common/include/rte_dev.h
>> @@ -24,6 +24,25 @@ extern "C" {
>> #include <rte_compat.h>
>> #include <rte_log.h>
>>
>> +/**
>> + * The device event type.
>> + */
>> +enum rte_dev_event_type {
>> + RTE_DEV_EVENT_ADD, /**< device being added */
>> + RTE_DEV_EVENT_REMOVE, /**< device being removed */
>> + RTE_DEV_EVENT_MAX /**< max value of this enum */
>> +};
>> +
>> +struct rte_dev_event {
>> + enum rte_dev_event_type type; /**< device event type */
>> + int subsystem; /**< subsystem id */
>> + char *devname; /**< device name */
>> +};
>> +
>> +typedef int (*rte_dev_event_cb_fn)(char *device_name,
>> + enum rte_dev_event_type event,
>> + void *cb_arg);
>> +
>> __attribute__((format(printf, 2, 0)))
>> static inline void
>> rte_pmd_debug_trace(const char *func_name, const char *fmt, ...)
>> @@ -267,4 +286,79 @@ __attribute__((used)) = str
>> }
>> #endif
>>
>> +/**
>> + * @warning
>> + * @b EXPERIMENTAL: this API may change without prior notice
>> + *
>> + * It registers the callback for the specific device.
>> + * Multiple callbacks cal be registered at the same time.
>> + *
>> + * @param device_name
>> + * The device name, that is the param name of the struct rte_device,
>> + * null value means for all devices.
>> + * @param cb_fn
>> + * callback address.
>> + * @param cb_arg
>> + * address of parameter for callback.
>> + *
>> + * @return
>> + * - On success, zero.
>> + * - On failure, a negative value.
>> + */
>> +int __rte_experimental
>> +rte_dev_event_callback_register(const char *device_name,
>> + rte_dev_event_cb_fn cb_fn,
>> + void *cb_arg);
>> +
>> +/**
>> + * @warning
>> + * @b EXPERIMENTAL: this API may change without prior notice
>> + *
>> + * It unregisters the callback according to the specified device.
>> + *
>> + * @param device_name
>> + * The device name, that is the param name of the struct rte_device,
>> + * null value means for all devices.
>> + * @param cb_fn
>> + * callback address.
>> + * @param cb_arg
>> + * address of parameter for callback, (void *)-1 means to remove all
>> + * registered which has the same callback address.
>> + *
>> + * @return
>> + * - On success, return the number of callback entities removed.
>> + * - On failure, a negative value.
>> + */
>> +int __rte_experimental
>> +rte_dev_event_callback_unregister(const char *device_name,
>> + rte_dev_event_cb_fn cb_fn,
>> + void *cb_arg);
>> +
>> +/**
>> + * @warning
>> + * @b EXPERIMENTAL: this API may change without prior notice
>> + *
>> + * Start the device event monitoring.
>> + *
>> + * @param none
>> + * @return
>> + * - On success, zero.
>> + * - On failure, a negative value.
>> + */
>> +int __rte_experimental
>> +rte_dev_event_monitor_start(void);
>> +
>> +/**
>> + * @warning
>> + * @b EXPERIMENTAL: this API may change without prior notice
>> + *
>> + * Stop the device event monitoring .
>> + *
>> + * @param none
>> + * @return
>> + * - On success, zero.
>> + * - On failure, a negative value.
>> + */
>> +int __rte_experimental
>> +rte_dev_event_monitor_stop(void);
>> #endif /* _RTE_DEV_H_ */
>> diff --git a/lib/librte_eal/linuxapp/eal/Makefile
>> b/lib/librte_eal/linuxapp/eal/Makefile
>> index 7e5bbe8..8578796 100644
>> --- a/lib/librte_eal/linuxapp/eal/Makefile
>> +++ b/lib/librte_eal/linuxapp/eal/Makefile
>> @@ -41,6 +41,7 @@ SRCS-$(CONFIG_RTE_EXEC_ENV_LINUXAPP) +=
>> eal_lcore.c
>> SRCS-$(CONFIG_RTE_EXEC_ENV_LINUXAPP) += eal_timer.c
>> SRCS-$(CONFIG_RTE_EXEC_ENV_LINUXAPP) += eal_interrupts.c
>> SRCS-$(CONFIG_RTE_EXEC_ENV_LINUXAPP) += eal_alarm.c
>> +SRCS-$(CONFIG_RTE_EXEC_ENV_LINUXAPP) += eal_dev.c
>>
>> # from common dir
>> SRCS-$(CONFIG_RTE_EXEC_ENV_LINUXAPP) += eal_common_lcore.c
>> diff --git a/lib/librte_eal/linuxapp/eal/eal_dev.c
>> b/lib/librte_eal/linuxapp/eal/eal_dev.c
>> new file mode 100644
>> index 0000000..9c8d1a0
>> --- /dev/null
>> +++ b/lib/librte_eal/linuxapp/eal/eal_dev.c
>> @@ -0,0 +1,22 @@
>> +/* SPDX-License-Identifier: BSD-3-Clause
>> + * Copyright(c) 2018 Intel Corporation
>> + */
>> +
>> +#include <rte_log.h>
>> +#include <rte_compat.h>
>> +#include <rte_dev.h>
>> +
>> +
>> +int __rte_experimental
>> +rte_dev_event_monitor_start(void)
>> +{
>> + /* TODO: start uevent monitor for linux */
>> + return 0;
>> +}
>> +
>> +int __rte_experimental
>> +rte_dev_event_monitor_stop(void)
>> +{
>> + /* TODO: stop uevent monitor for linux */
>> + return 0;
>> +}
>> diff --git a/lib/librte_eal/linuxapp/eal/meson.build
>> b/lib/librte_eal/linuxapp/eal/meson.build
>> index 03974ff..b222571 100644
>> --- a/lib/librte_eal/linuxapp/eal/meson.build
>> +++ b/lib/librte_eal/linuxapp/eal/meson.build
>> @@ -18,6 +18,7 @@ env_sources = files('eal_alarm.c',
>> 'eal_vfio_mp_sync.c',
>> 'eal.c',
>> 'eal_memory.c',
>> + 'eal_dev.c',
>> )
>>
>> if has_libnuma == 1
>> diff --git a/lib/librte_eal/rte_eal_version.map
>> b/lib/librte_eal/rte_eal_version.map
>> index d123602..d23f491 100644
>> --- a/lib/librte_eal/rte_eal_version.map
>> +++ b/lib/librte_eal/rte_eal_version.map
>> @@ -256,3 +256,11 @@ EXPERIMENTAL {
>> rte_service_start_with_defaults;
>>
>> } DPDK_18.02;
>> +
>> +EXPERIMENTAL {
>> + global:
>> +
>> + rte_dev_event_callback_register;
>> + rte_dev_event_callback_unregister;
>> +
>> +} DPDK_18.05;
>> --
>> 2.7.4
More information about the dev
mailing list