[dpdk-dev,V16,3/4] eal/linux: uevent parse and process

Message ID 1522063256-3997-4-git-send-email-jia.guo@intel.com (mailing list archive)
State Superseded, archived
Delegated to: Thomas Monjalon
Headers

Checks

Context Check Description
ci/checkpatch success coding style OK
ci/Intel-compilation fail Compilation issues

Commit Message

Guo, Jia March 26, 2018, 11:20 a.m. UTC
  In order to handle the uevent which have been detected from the kernel
side, add uevent parse and process function to translate the uevent into
device event, which user has subscribe to monitor.

Signed-off-by: Jeff Guo <jia.guo@intel.com>
---
1.move all linux specific together
---
 lib/librte_eal/linuxapp/eal/eal_dev.c | 214 +++++++++++++++++++++++++++++++++-
 1 file changed, 211 insertions(+), 3 deletions(-)
  

Comments

Jianfeng Tan March 28, 2018, 4:15 p.m. UTC | #1
BTW, adding new .c file needs to update meson.build now.

On 3/26/2018 7:20 PM, Jeff Guo wrote:
> In order to handle the uevent which have been detected from the kernel
> side, add uevent parse and process function to translate the uevent into
> device event, which user has subscribe to monitor.
>
> Signed-off-by: Jeff Guo <jia.guo@intel.com>
> ---
> 1.move all linux specific together
> ---
>   lib/librte_eal/linuxapp/eal/eal_dev.c | 214 +++++++++++++++++++++++++++++++++-
>   1 file changed, 211 insertions(+), 3 deletions(-)
>
> diff --git a/lib/librte_eal/linuxapp/eal/eal_dev.c b/lib/librte_eal/linuxapp/eal/eal_dev.c
> index 5ab5830..90094c0 100644
> --- a/lib/librte_eal/linuxapp/eal/eal_dev.c
> +++ b/lib/librte_eal/linuxapp/eal/eal_dev.c
> @@ -2,19 +2,227 @@
>    * Copyright(c) 2018 Intel Corporation
>    */
>   
> -#include <rte_log.h>
> +#include <stdio.h>
> +#include <string.h>
> +#include <inttypes.h>
> +#include <sys/queue.h>
> +#include <sys/signalfd.h>
> +#include <sys/ioctl.h>
> +#include <sys/socket.h>
> +#include <linux/netlink.h>
> +#include <sys/epoll.h>
> +#include <unistd.h>
> +#include <signal.h>

Some header files are not necessary, the above one for example.

> +#include <stdbool.h>
> +#include <fcntl.h>
> +
> +#include <rte_malloc.h>
> +#include <rte_bus.h>
>   #include <rte_dev.h>
> +#include <rte_devargs.h>

We don't need this one neither.

> +#include <rte_debug.h>
> +#include <rte_log.h>
> +#include <rte_interrupts.h>
> +
> +#include "eal_private.h"
> +#include "eal_thread.h"

Ditto.

> +
> +static struct rte_intr_handle intr_handle = {.fd = -1 };

I don't think we need a static intr_handle, what we need is the monitor fd.

> +static bool monitor_not_started = true;
> +
> +#define EAL_UEV_MSG_LEN 4096
> +#define EAL_UEV_MSG_ELEM_LEN 128
> +
> +/* identify the system layer which event exposure from */
> +enum eal_dev_event_subsystem {
> +	EAL_DEV_EVENT_SUBSYSTEM_PCI, /* PCI bus device event */
> +	EAL_DEV_EVENT_SUBSYSTEM_UIO, /* UIO driver device event */
> +	EAL_DEV_EVENT_SUBSYSTEM_MAX
> +};
> +
> +static int
> +dev_uev_monitor_fd_new(void)
> +{
> +	int uevent_fd;
> +
> +	uevent_fd = socket(PF_NETLINK, SOCK_RAW | SOCK_CLOEXEC |
> +			SOCK_NONBLOCK,
> +			NETLINK_KOBJECT_UEVENT);
> +	if (uevent_fd < 0) {
> +		RTE_LOG(ERR, EAL, "create uevent fd failed\n");
> +		return -1;
> +	}
> +	return uevent_fd;
> +}
> +
> +static int
> +dev_uev_monitor_create(int netlink_fd)

I think we should merge this function with above function. I don't see a 
reason to split into two functions.

> +{
> +	struct sockaddr_nl addr;
> +	int ret;
> +	int size = 64 * 1024;
> +	int nonblock = 1;
> +
> +	memset(&addr, 0, sizeof(addr));
> +	addr.nl_family = AF_NETLINK;
> +	addr.nl_pid = 0;
> +	addr.nl_groups = 0xffffffff;
> +
> +	if (bind(netlink_fd, (struct sockaddr *) &addr, sizeof(addr)) < 0) {
> +		RTE_LOG(ERR, EAL, "bind failed\n");

Please print more information here, so that we don't have to check the 
code if we really encounter such an error.

> +		goto err;
> +	}
> +
> +	setsockopt(netlink_fd, SOL_SOCKET, SO_PASSCRED, &size, sizeof(size));
> +
> +	ret = ioctl(netlink_fd, FIONBIO, &nonblock);
> +	if (ret != 0) {
> +		RTE_LOG(ERR, EAL, "ioctl(FIONBIO) failed\n");
> +		goto err;
> +	}
> +	return 0;
> +err:
> +	close(netlink_fd);
> +	return -1;
> +}
> +
> +static void
> +dev_uev_parse(const char *buf, struct rte_dev_event *event, int length)

We always get an event we care? If no, we need return something so that 
the caller can skip this event.

> +{
> +	char action[EAL_UEV_MSG_ELEM_LEN];
> +	char subsystem[EAL_UEV_MSG_ELEM_LEN];
> +	char dev_path[EAL_UEV_MSG_ELEM_LEN];
> +	char pci_slot_name[EAL_UEV_MSG_ELEM_LEN];
> +	int i = 0;
> +
> +	memset(action, 0, EAL_UEV_MSG_ELEM_LEN);
> +	memset(subsystem, 0, EAL_UEV_MSG_ELEM_LEN);
> +	memset(dev_path, 0, EAL_UEV_MSG_ELEM_LEN);
> +	memset(pci_slot_name, 0, EAL_UEV_MSG_ELEM_LEN);

I did not see you need dev_path, why do we care to parse it?

> +
> +	while (i < length) {
> +		for (; i < length; i++) {
> +			if (*buf)
> +				break;
> +			buf++;
> +		}
> +		if (!strncmp(buf, "ACTION=", 7)) {
> +			buf += 7;
> +			i += 7;
> +			snprintf(action, sizeof(action), "%s", buf);
> +		} else if (!strncmp(buf, "DEVPATH=", 8)) {
> +			buf += 8;
> +			i += 8;
> +			snprintf(dev_path, sizeof(dev_path), "%s", buf);
> +		} else if (!strncmp(buf, "SUBSYSTEM=", 10)) {
> +			buf += 10;
> +			i += 10;
> +			snprintf(subsystem, sizeof(subsystem), "%s", buf);
> +		} else if (!strncmp(buf, "PCI_SLOT_NAME=", 14)) {
> +			buf += 14;
> +			i += 14;
> +			snprintf(pci_slot_name, sizeof(subsystem), "%s", buf);
> +			event->devname = pci_slot_name;

You are assigning a stack pointer for the caller to use; this is 
dangerous, we should never do that.

> +		}
> +		for (; i < length; i++) {
> +			if (*buf == '\0')
> +				break;
> +			buf++;
> +		}
> +	}
> +
> +	if (!strncmp(subsystem, "uio", 3))
> +		event->subsystem = EAL_DEV_EVENT_SUBSYSTEM_UIO;
> +	else if (!strncmp(subsystem, "pci", 3))
> +		event->subsystem = EAL_DEV_EVENT_SUBSYSTEM_PCI;
> +	if (!strncmp(action, "add", 3))
> +		event->type = RTE_DEV_EVENT_ADD;
> +	if (!strncmp(action, "remove", 6))
> +		event->type = RTE_DEV_EVENT_REMOVE;
> +}
> +
> +static int
> +dev_uev_receive(int fd, struct rte_dev_event *uevent)
> +{
> +	int ret;
> +	char buf[EAL_UEV_MSG_LEN];
> +
> +	memset(uevent, 0, sizeof(struct rte_dev_event));
> +	memset(buf, 0, EAL_UEV_MSG_LEN);
> +
> +	ret = recv(fd, buf, EAL_UEV_MSG_LEN, MSG_DONTWAIT);
> +	if (ret < 0) {
> +		RTE_LOG(ERR, EAL,
> +		"Socket read error(%d): %s\n",
> +		errno, strerror(errno));

The above three lines are in bad format.

> +		return -1;
> +	} else if (ret == 0)
> +		/* connection closed */
> +		return -1;
> +
> +	dev_uev_parse(buf, uevent, EAL_UEV_MSG_LEN);
> +
> +	return 0;
> +}
> +
> +static void
> +dev_uev_process(__rte_unused void *param)
> +{
> +	struct rte_dev_event uevent;
> +
> +	if (dev_uev_receive(intr_handle.fd, &uevent))
> +		return;
> +
> +	if (uevent.devname)
> +		dev_callback_process(uevent.devname, uevent.type, NULL);
> +}
>   
>   int __rte_experimental
>   rte_dev_event_monitor_start(void)
>   {
> -	/* TODO: start uevent monitor for linux */
> +	int ret;
> +
> +	if (!monitor_not_started)
> +		return 0;
> +
> +	intr_handle.fd = dev_uev_monitor_fd_new();
> +	intr_handle.type = RTE_INTR_HANDLE_DEV_EVENT;
> +
> +	ret = dev_uev_monitor_create(intr_handle.fd);
> +
> +	if (ret) {
> +		RTE_LOG(ERR, EAL, "error create device event monitor\n");
> +		return -1;
> +	}
> +
> +	ret = rte_intr_callback_register(&intr_handle, dev_uev_process, NULL);
> +
> +	if (ret) {
> +		RTE_LOG(ERR, EAL, "fail to register uevent callback\n");
> +		return -1;
> +	}
> +
> +	monitor_not_started = false;
> +
>   	return 0;
>   }
>   
>   int __rte_experimental
>   rte_dev_event_monitor_stop(void)
>   {
> -	/* TODO: stop uevent monitor for linux */
> +	int ret;
> +
> +	if (monitor_not_started)
> +		return 0;
> +
> +	ret = rte_intr_callback_unregister(&intr_handle, dev_uev_process, NULL);
> +	if (ret) {
> +		RTE_LOG(ERR, EAL, "fail to unregister uevent callback");
> +		return ret;
> +	}
> +
> +	close(intr_handle.fd);
> +	intr_handle.fd = -1;
> +	monitor_not_started = true;
>   	return 0;
>   }
  
Van Haaren, Harry March 29, 2018, 1:32 p.m. UTC | #2
Two additional input along with Jianfeng's existing comments;

> -----Original Message-----
> From: Tan, Jianfeng
> Sent: Wednesday, March 28, 2018 5:16 PM
> To: Guo, Jia <jia.guo@intel.com>; stephen@networkplumber.org; Richardson,
> Bruce <bruce.richardson@intel.com>; Yigit, Ferruh <ferruh.yigit@intel.com>;
> Ananyev, Konstantin <konstantin.ananyev@intel.com>; gaetan.rivet@6wind.com;
> Wu, Jingjing <jingjing.wu@intel.com>; thomas@monjalon.net;
> motih@mellanox.com; Van Haaren, Harry <harry.van.haaren@intel.com>
> Cc: jblunck@infradead.org; shreyansh.jain@nxp.com; dev@dpdk.org; Zhang,
> Helin <helin.zhang@intel.com>
> Subject: Re: [PATCH V16 3/4] eal/linux: uevent parse and process
> 
> BTW, adding new .c file needs to update meson.build now.
> 
> On 3/26/2018 7:20 PM, Jeff Guo wrote:
> > In order to handle the uevent which have been detected from the kernel
> > side, add uevent parse and process function to translate the uevent into
> > device event, which user has subscribe to monitor.
> >
> > Signed-off-by: Jeff Guo <jia.guo@intel.com>
> > ---
> > 1.move all linux specific together
> > ---
> >   lib/librte_eal/linuxapp/eal/eal_dev.c | 214
> +++++++++++++++++++++++++++++++++-
> >   1 file changed, 211 insertions(+), 3 deletions(-)
> >
> > diff --git a/lib/librte_eal/linuxapp/eal/eal_dev.c
> b/lib/librte_eal/linuxapp/eal/eal_dev.c
> 
> > +static bool monitor_not_started = true;

This variable should be named "monitor_started", as it is a static var it will be zero by default,
and the following code is easier to read:

if ( !not_started )   becomes    if (started)



> >   int __rte_experimental
> >   rte_dev_event_monitor_start(void)
> >   {
> > -	/* TODO: start uevent monitor for linux */
> > +	int ret;
> > +
> > +	if (!monitor_not_started)
> > +		return 0;
> > +
> > +	intr_handle.fd = dev_uev_monitor_fd_new();
> > +	intr_handle.type = RTE_INTR_HANDLE_DEV_EVENT;

dev_uev_monitor_fd_new() can return -1 on error, we should check for that case here.
  
Guo, Jia March 29, 2018, 3:03 p.m. UTC | #3
hi, harry

thanks for your review.
On 3/29/2018 9:32 PM, Van Haaren, Harry wrote:
> Two additional input along with Jianfeng's existing comments;
>
>> -----Original Message-----
>> From: Tan, Jianfeng
>> Sent: Wednesday, March 28, 2018 5:16 PM
>> To: Guo, Jia <jia.guo@intel.com>; stephen@networkplumber.org; Richardson,
>> Bruce <bruce.richardson@intel.com>; Yigit, Ferruh <ferruh.yigit@intel.com>;
>> Ananyev, Konstantin <konstantin.ananyev@intel.com>; gaetan.rivet@6wind.com;
>> Wu, Jingjing <jingjing.wu@intel.com>; thomas@monjalon.net;
>> motih@mellanox.com; Van Haaren, Harry <harry.van.haaren@intel.com>
>> Cc: jblunck@infradead.org; shreyansh.jain@nxp.com; dev@dpdk.org; Zhang,
>> Helin <helin.zhang@intel.com>
>> Subject: Re: [PATCH V16 3/4] eal/linux: uevent parse and process
>>
>> BTW, adding new .c file needs to update meson.build now.
>>
>> On 3/26/2018 7:20 PM, Jeff Guo wrote:
>>> In order to handle the uevent which have been detected from the kernel
>>> side, add uevent parse and process function to translate the uevent into
>>> device event, which user has subscribe to monitor.
>>>
>>> Signed-off-by: Jeff Guo <jia.guo@intel.com>
>>> ---
>>> 1.move all linux specific together
>>> ---
>>>    lib/librte_eal/linuxapp/eal/eal_dev.c | 214
>> +++++++++++++++++++++++++++++++++-
>>>    1 file changed, 211 insertions(+), 3 deletions(-)
>>>
>>> diff --git a/lib/librte_eal/linuxapp/eal/eal_dev.c
>> b/lib/librte_eal/linuxapp/eal/eal_dev.c
>>
>>> +static bool monitor_not_started = true;
> This variable should be named "monitor_started", as it is a static var it will be zero by default,
> and the following code is easier to read:
>
> if ( !not_started )   becomes    if (started)
>
make sense.
>
>>>    int __rte_experimental
>>>    rte_dev_event_monitor_start(void)
>>>    {
>>> -	/* TODO: start uevent monitor for linux */
>>> +	int ret;
>>> +
>>> +	if (!monitor_not_started)
>>> +		return 0;
>>> +
>>> +	intr_handle.fd = dev_uev_monitor_fd_new();
>>> +	intr_handle.type = RTE_INTR_HANDLE_DEV_EVENT;
> dev_uev_monitor_fd_new() can return -1 on error, we should check for that case here.
>
you are right.
  
Guo, Jia March 29, 2018, 3:08 p.m. UTC | #4
jianfeng


On 3/29/2018 12:15 AM, Tan, Jianfeng wrote:
> BTW, adding new .c file needs to update meson.build now.
>
thanks for your info .
> On 3/26/2018 7:20 PM, Jeff Guo wrote:
>> In order to handle the uevent which have been detected from the kernel
>> side, add uevent parse and process function to translate the uevent into
>> device event, which user has subscribe to monitor.
>>
>> Signed-off-by: Jeff Guo <jia.guo@intel.com>
>> ---
>> 1.move all linux specific together
>> ---
>>   lib/librte_eal/linuxapp/eal/eal_dev.c | 214 
>> +++++++++++++++++++++++++++++++++-
>>   1 file changed, 211 insertions(+), 3 deletions(-)
>>
>> diff --git a/lib/librte_eal/linuxapp/eal/eal_dev.c 
>> b/lib/librte_eal/linuxapp/eal/eal_dev.c
>> index 5ab5830..90094c0 100644
>> --- a/lib/librte_eal/linuxapp/eal/eal_dev.c
>> +++ b/lib/librte_eal/linuxapp/eal/eal_dev.c
>> @@ -2,19 +2,227 @@
>>    * Copyright(c) 2018 Intel Corporation
>>    */
>>   -#include <rte_log.h>
>> +#include <stdio.h>
>> +#include <string.h>
>> +#include <inttypes.h>
>> +#include <sys/queue.h>
>> +#include <sys/signalfd.h>
>> +#include <sys/ioctl.h>
>> +#include <sys/socket.h>
>> +#include <linux/netlink.h>
>> +#include <sys/epoll.h>
>> +#include <unistd.h>
>> +#include <signal.h>
>
> Some header files are not necessary, the above one for example.
>
>> +#include <stdbool.h>
>> +#include <fcntl.h>
>> +
>> +#include <rte_malloc.h>
>> +#include <rte_bus.h>
>>   #include <rte_dev.h>
>> +#include <rte_devargs.h>
>
> We don't need this one neither.
>
>> +#include <rte_debug.h>
>> +#include <rte_log.h>
>> +#include <rte_interrupts.h>
>> +
>> +#include "eal_private.h"
>> +#include "eal_thread.h"
>
> Ditto.
>
>> +
>> +static struct rte_intr_handle intr_handle = {.fd = -1 };
>
> I don't think we need a static intr_handle, what we need is the 
> monitor fd.
>
>> +static bool monitor_not_started = true;
>> +
>> +#define EAL_UEV_MSG_LEN 4096
>> +#define EAL_UEV_MSG_ELEM_LEN 128
>> +
>> +/* identify the system layer which event exposure from */
>> +enum eal_dev_event_subsystem {
>> +    EAL_DEV_EVENT_SUBSYSTEM_PCI, /* PCI bus device event */
>> +    EAL_DEV_EVENT_SUBSYSTEM_UIO, /* UIO driver device event */
>> +    EAL_DEV_EVENT_SUBSYSTEM_MAX
>> +};
>> +
>> +static int
>> +dev_uev_monitor_fd_new(void)
>> +{
>> +    int uevent_fd;
>> +
>> +    uevent_fd = socket(PF_NETLINK, SOCK_RAW | SOCK_CLOEXEC |
>> +            SOCK_NONBLOCK,
>> +            NETLINK_KOBJECT_UEVENT);
>> +    if (uevent_fd < 0) {
>> +        RTE_LOG(ERR, EAL, "create uevent fd failed\n");
>> +        return -1;
>> +    }
>> +    return uevent_fd;
>> +}
>> +
>> +static int
>> +dev_uev_monitor_create(int netlink_fd)
>
> I think we should merge this function with above function. I don't see 
> a reason to split into two functions.
>
make sense.
>> +{
>> +    struct sockaddr_nl addr;
>> +    int ret;
>> +    int size = 64 * 1024;
>> +    int nonblock = 1;
>> +
>> +    memset(&addr, 0, sizeof(addr));
>> +    addr.nl_family = AF_NETLINK;
>> +    addr.nl_pid = 0;
>> +    addr.nl_groups = 0xffffffff;
>> +
>> +    if (bind(netlink_fd, (struct sockaddr *) &addr, sizeof(addr)) < 
>> 0) {
>> +        RTE_LOG(ERR, EAL, "bind failed\n");
>
> Please print more information here, so that we don't have to check the 
> code if we really encounter such an error.
>
>> +        goto err;
>> +    }
>> +
>> +    setsockopt(netlink_fd, SOL_SOCKET, SO_PASSCRED, &size, 
>> sizeof(size));
>> +
>> +    ret = ioctl(netlink_fd, FIONBIO, &nonblock);
>> +    if (ret != 0) {
>> +        RTE_LOG(ERR, EAL, "ioctl(FIONBIO) failed\n");
>> +        goto err;
>> +    }
>> +    return 0;
>> +err:
>> +    close(netlink_fd);
>> +    return -1;
>> +}
>> +
>> +static void
>> +dev_uev_parse(const char *buf, struct rte_dev_event *event, int length)
>
> We always get an event we care? If no, we need return something so 
> that the caller can skip this event.
>
this function do no filter the event.
>> +{
>> +    char action[EAL_UEV_MSG_ELEM_LEN];
>> +    char subsystem[EAL_UEV_MSG_ELEM_LEN];
>> +    char dev_path[EAL_UEV_MSG_ELEM_LEN];
>> +    char pci_slot_name[EAL_UEV_MSG_ELEM_LEN];
>> +    int i = 0;
>> +
>> +    memset(action, 0, EAL_UEV_MSG_ELEM_LEN);
>> +    memset(subsystem, 0, EAL_UEV_MSG_ELEM_LEN);
>> +    memset(dev_path, 0, EAL_UEV_MSG_ELEM_LEN);
>> +    memset(pci_slot_name, 0, EAL_UEV_MSG_ELEM_LEN);
>
> I did not see you need dev_path, why do we care to parse it?
>
>> +
>> +    while (i < length) {
>> +        for (; i < length; i++) {
>> +            if (*buf)
>> +                break;
>> +            buf++;
>> +        }
>> +        if (!strncmp(buf, "ACTION=", 7)) {
>> +            buf += 7;
>> +            i += 7;
>> +            snprintf(action, sizeof(action), "%s", buf);
>> +        } else if (!strncmp(buf, "DEVPATH=", 8)) {
>> +            buf += 8;
>> +            i += 8;
>> +            snprintf(dev_path, sizeof(dev_path), "%s", buf);
>> +        } else if (!strncmp(buf, "SUBSYSTEM=", 10)) {
>> +            buf += 10;
>> +            i += 10;
>> +            snprintf(subsystem, sizeof(subsystem), "%s", buf);
>> +        } else if (!strncmp(buf, "PCI_SLOT_NAME=", 14)) {
>> +            buf += 14;
>> +            i += 14;
>> +            snprintf(pci_slot_name, sizeof(subsystem), "%s", buf);
>> +            event->devname = pci_slot_name;
>
> You are assigning a stack pointer for the caller to use; this is 
> dangerous, we should never do that.
>
make sense.
>> +        }
>> +        for (; i < length; i++) {
>> +            if (*buf == '\0')
>> +                break;
>> +            buf++;
>> +        }
>> +    }
>> +
>> +    if (!strncmp(subsystem, "uio", 3))
>> +        event->subsystem = EAL_DEV_EVENT_SUBSYSTEM_UIO;
>> +    else if (!strncmp(subsystem, "pci", 3))
>> +        event->subsystem = EAL_DEV_EVENT_SUBSYSTEM_PCI;
>> +    if (!strncmp(action, "add", 3))
>> +        event->type = RTE_DEV_EVENT_ADD;
>> +    if (!strncmp(action, "remove", 6))
>> +        event->type = RTE_DEV_EVENT_REMOVE;
>> +}
>> +
>> +static int
>> +dev_uev_receive(int fd, struct rte_dev_event *uevent)
>> +{
>> +    int ret;
>> +    char buf[EAL_UEV_MSG_LEN];
>> +
>> +    memset(uevent, 0, sizeof(struct rte_dev_event));
>> +    memset(buf, 0, EAL_UEV_MSG_LEN);
>> +
>> +    ret = recv(fd, buf, EAL_UEV_MSG_LEN, MSG_DONTWAIT);
>> +    if (ret < 0) {
>> +        RTE_LOG(ERR, EAL,
>> +        "Socket read error(%d): %s\n",
>> +        errno, strerror(errno));
>
> The above three lines are in bad format.
>
>> +        return -1;
>> +    } else if (ret == 0)
>> +        /* connection closed */
>> +        return -1;
>> +
>> +    dev_uev_parse(buf, uevent, EAL_UEV_MSG_LEN);
>> +
>> +    return 0;
>> +}
>> +
>> +static void
>> +dev_uev_process(__rte_unused void *param)
>> +{
>> +    struct rte_dev_event uevent;
>> +
>> +    if (dev_uev_receive(intr_handle.fd, &uevent))
>> +        return;
>> +
>> +    if (uevent.devname)
>> +        dev_callback_process(uevent.devname, uevent.type, NULL);
>> +}
>>     int __rte_experimental
>>   rte_dev_event_monitor_start(void)
>>   {
>> -    /* TODO: start uevent monitor for linux */
>> +    int ret;
>> +
>> +    if (!monitor_not_started)
>> +        return 0;
>> +
>> +    intr_handle.fd = dev_uev_monitor_fd_new();
>> +    intr_handle.type = RTE_INTR_HANDLE_DEV_EVENT;
>> +
>> +    ret = dev_uev_monitor_create(intr_handle.fd);
>> +
>> +    if (ret) {
>> +        RTE_LOG(ERR, EAL, "error create device event monitor\n");
>> +        return -1;
>> +    }
>> +
>> +    ret = rte_intr_callback_register(&intr_handle, dev_uev_process, 
>> NULL);
>> +
>> +    if (ret) {
>> +        RTE_LOG(ERR, EAL, "fail to register uevent callback\n");
>> +        return -1;
>> +    }
>> +
>> +    monitor_not_started = false;
>> +
>>       return 0;
>>   }
>>     int __rte_experimental
>>   rte_dev_event_monitor_stop(void)
>>   {
>> -    /* TODO: stop uevent monitor for linux */
>> +    int ret;
>> +
>> +    if (monitor_not_started)
>> +        return 0;
>> +
>> +    ret = rte_intr_callback_unregister(&intr_handle, 
>> dev_uev_process, NULL);
>> +    if (ret) {
>> +        RTE_LOG(ERR, EAL, "fail to unregister uevent callback");
>> +        return ret;
>> +    }
>> +
>> +    close(intr_handle.fd);
>> +    intr_handle.fd = -1;
>> +    monitor_not_started = true;
>>       return 0;
>>   }
>
  

Patch

diff --git a/lib/librte_eal/linuxapp/eal/eal_dev.c b/lib/librte_eal/linuxapp/eal/eal_dev.c
index 5ab5830..90094c0 100644
--- a/lib/librte_eal/linuxapp/eal/eal_dev.c
+++ b/lib/librte_eal/linuxapp/eal/eal_dev.c
@@ -2,19 +2,227 @@ 
  * Copyright(c) 2018 Intel Corporation
  */
 
-#include <rte_log.h>
+#include <stdio.h>
+#include <string.h>
+#include <inttypes.h>
+#include <sys/queue.h>
+#include <sys/signalfd.h>
+#include <sys/ioctl.h>
+#include <sys/socket.h>
+#include <linux/netlink.h>
+#include <sys/epoll.h>
+#include <unistd.h>
+#include <signal.h>
+#include <stdbool.h>
+#include <fcntl.h>
+
+#include <rte_malloc.h>
+#include <rte_bus.h>
 #include <rte_dev.h>
+#include <rte_devargs.h>
+#include <rte_debug.h>
+#include <rte_log.h>
+#include <rte_interrupts.h>
+
+#include "eal_private.h"
+#include "eal_thread.h"
+
+static struct rte_intr_handle intr_handle = {.fd = -1 };
+static bool monitor_not_started = true;
+
+#define EAL_UEV_MSG_LEN 4096
+#define EAL_UEV_MSG_ELEM_LEN 128
+
+/* identify the system layer which event exposure from */
+enum eal_dev_event_subsystem {
+	EAL_DEV_EVENT_SUBSYSTEM_PCI, /* PCI bus device event */
+	EAL_DEV_EVENT_SUBSYSTEM_UIO, /* UIO driver device event */
+	EAL_DEV_EVENT_SUBSYSTEM_MAX
+};
+
+static int
+dev_uev_monitor_fd_new(void)
+{
+	int uevent_fd;
+
+	uevent_fd = socket(PF_NETLINK, SOCK_RAW | SOCK_CLOEXEC |
+			SOCK_NONBLOCK,
+			NETLINK_KOBJECT_UEVENT);
+	if (uevent_fd < 0) {
+		RTE_LOG(ERR, EAL, "create uevent fd failed\n");
+		return -1;
+	}
+	return uevent_fd;
+}
+
+static int
+dev_uev_monitor_create(int netlink_fd)
+{
+	struct sockaddr_nl addr;
+	int ret;
+	int size = 64 * 1024;
+	int nonblock = 1;
+
+	memset(&addr, 0, sizeof(addr));
+	addr.nl_family = AF_NETLINK;
+	addr.nl_pid = 0;
+	addr.nl_groups = 0xffffffff;
+
+	if (bind(netlink_fd, (struct sockaddr *) &addr, sizeof(addr)) < 0) {
+		RTE_LOG(ERR, EAL, "bind failed\n");
+		goto err;
+	}
+
+	setsockopt(netlink_fd, SOL_SOCKET, SO_PASSCRED, &size, sizeof(size));
+
+	ret = ioctl(netlink_fd, FIONBIO, &nonblock);
+	if (ret != 0) {
+		RTE_LOG(ERR, EAL, "ioctl(FIONBIO) failed\n");
+		goto err;
+	}
+	return 0;
+err:
+	close(netlink_fd);
+	return -1;
+}
+
+static void
+dev_uev_parse(const char *buf, struct rte_dev_event *event, int length)
+{
+	char action[EAL_UEV_MSG_ELEM_LEN];
+	char subsystem[EAL_UEV_MSG_ELEM_LEN];
+	char dev_path[EAL_UEV_MSG_ELEM_LEN];
+	char pci_slot_name[EAL_UEV_MSG_ELEM_LEN];
+	int i = 0;
+
+	memset(action, 0, EAL_UEV_MSG_ELEM_LEN);
+	memset(subsystem, 0, EAL_UEV_MSG_ELEM_LEN);
+	memset(dev_path, 0, EAL_UEV_MSG_ELEM_LEN);
+	memset(pci_slot_name, 0, EAL_UEV_MSG_ELEM_LEN);
+
+	while (i < length) {
+		for (; i < length; i++) {
+			if (*buf)
+				break;
+			buf++;
+		}
+		if (!strncmp(buf, "ACTION=", 7)) {
+			buf += 7;
+			i += 7;
+			snprintf(action, sizeof(action), "%s", buf);
+		} else if (!strncmp(buf, "DEVPATH=", 8)) {
+			buf += 8;
+			i += 8;
+			snprintf(dev_path, sizeof(dev_path), "%s", buf);
+		} else if (!strncmp(buf, "SUBSYSTEM=", 10)) {
+			buf += 10;
+			i += 10;
+			snprintf(subsystem, sizeof(subsystem), "%s", buf);
+		} else if (!strncmp(buf, "PCI_SLOT_NAME=", 14)) {
+			buf += 14;
+			i += 14;
+			snprintf(pci_slot_name, sizeof(subsystem), "%s", buf);
+			event->devname = pci_slot_name;
+		}
+		for (; i < length; i++) {
+			if (*buf == '\0')
+				break;
+			buf++;
+		}
+	}
+
+	if (!strncmp(subsystem, "uio", 3))
+		event->subsystem = EAL_DEV_EVENT_SUBSYSTEM_UIO;
+	else if (!strncmp(subsystem, "pci", 3))
+		event->subsystem = EAL_DEV_EVENT_SUBSYSTEM_PCI;
+	if (!strncmp(action, "add", 3))
+		event->type = RTE_DEV_EVENT_ADD;
+	if (!strncmp(action, "remove", 6))
+		event->type = RTE_DEV_EVENT_REMOVE;
+}
+
+static int
+dev_uev_receive(int fd, struct rte_dev_event *uevent)
+{
+	int ret;
+	char buf[EAL_UEV_MSG_LEN];
+
+	memset(uevent, 0, sizeof(struct rte_dev_event));
+	memset(buf, 0, EAL_UEV_MSG_LEN);
+
+	ret = recv(fd, buf, EAL_UEV_MSG_LEN, MSG_DONTWAIT);
+	if (ret < 0) {
+		RTE_LOG(ERR, EAL,
+		"Socket read error(%d): %s\n",
+		errno, strerror(errno));
+		return -1;
+	} else if (ret == 0)
+		/* connection closed */
+		return -1;
+
+	dev_uev_parse(buf, uevent, EAL_UEV_MSG_LEN);
+
+	return 0;
+}
+
+static void
+dev_uev_process(__rte_unused void *param)
+{
+	struct rte_dev_event uevent;
+
+	if (dev_uev_receive(intr_handle.fd, &uevent))
+		return;
+
+	if (uevent.devname)
+		dev_callback_process(uevent.devname, uevent.type, NULL);
+}
 
 int __rte_experimental
 rte_dev_event_monitor_start(void)
 {
-	/* TODO: start uevent monitor for linux */
+	int ret;
+
+	if (!monitor_not_started)
+		return 0;
+
+	intr_handle.fd = dev_uev_monitor_fd_new();
+	intr_handle.type = RTE_INTR_HANDLE_DEV_EVENT;
+
+	ret = dev_uev_monitor_create(intr_handle.fd);
+
+	if (ret) {
+		RTE_LOG(ERR, EAL, "error create device event monitor\n");
+		return -1;
+	}
+
+	ret = rte_intr_callback_register(&intr_handle, dev_uev_process, NULL);
+
+	if (ret) {
+		RTE_LOG(ERR, EAL, "fail to register uevent callback\n");
+		return -1;
+	}
+
+	monitor_not_started = false;
+
 	return 0;
 }
 
 int __rte_experimental
 rte_dev_event_monitor_stop(void)
 {
-	/* TODO: stop uevent monitor for linux */
+	int ret;
+
+	if (monitor_not_started)
+		return 0;
+
+	ret = rte_intr_callback_unregister(&intr_handle, dev_uev_process, NULL);
+	if (ret) {
+		RTE_LOG(ERR, EAL, "fail to unregister uevent callback");
+		return ret;
+	}
+
+	close(intr_handle.fd);
+	intr_handle.fd = -1;
+	monitor_not_started = true;
 	return 0;
 }