@@ -472,6 +472,32 @@ pci_find_device(const struct rte_device *start, rte_dev_cmp_t cmp,
return NULL;
}
+/* check the failure address belongs to which device. */
+static struct rte_pci_device *
+pci_find_device_by_addr(const void *failure_addr)
+{
+ struct rte_pci_device *pdev = NULL;
+ int i;
+
+ FOREACH_DEVICE_ON_PCIBUS(pdev) {
+ for (i = 0; i != RTE_DIM(pdev->mem_resource); i++) {
+ if ((uint64_t)(uintptr_t)failure_addr >=
+ (uint64_t)(uintptr_t)pdev->mem_resource[i].addr &&
+ (uint64_t)(uintptr_t)failure_addr <
+ (uint64_t)(uintptr_t)pdev->mem_resource[i].addr +
+ pdev->mem_resource[i].len) {
+ RTE_LOG(INFO, EAL, "Failure address "
+ "%16.16"PRIx64" belongs to "
+ "device %s!\n",
+ (uint64_t)(uintptr_t)failure_addr,
+ pdev->device.name);
+ return pdev;
+ }
+ }
+ }
+ return NULL;
+}
+
static int
pci_hotplug_handler(struct rte_device *dev)
{
@@ -506,6 +532,32 @@ pci_hotplug_handler(struct rte_device *dev)
}
static int
+pci_sigbus_handler(const void *failure_addr)
+{
+ struct rte_pci_device *pdev = NULL;
+ int ret = 0;
+
+ pdev = pci_find_device_by_addr(failure_addr);
+ if (!pdev) {
+ /* not found the device which is illegal access in MMIO,
+ * so it is a generic sigbus error.
+ */
+ ret = 1;
+ }
+
+ /* handle hotplug when sigbus error is caused of hot removal */
+ ret = pci_hotplug_handler(&pdev->device);
+ if (ret) {
+ RTE_LOG(ERR, EAL, "Failed to handle hot plug for device %s",
+ pdev->name);
+ ret = -1;
+ rte_errno = -1;
+ }
+
+ return ret;
+}
+
+static int
pci_plug(struct rte_device *dev)
{
return pci_probe_all_drivers(RTE_DEV_TO_PCI(dev));
@@ -536,6 +588,7 @@ struct rte_pci_bus rte_pci_bus = {
.parse = pci_parse,
.get_iommu_class = rte_pci_get_iommu_class,
.hotplug_handler = pci_hotplug_handler,
+ .sigbus_handler = pci_sigbus_handler,
},
.device_list = TAILQ_HEAD_INITIALIZER(rte_pci_bus.device_list),
.driver_list = TAILQ_HEAD_INITIALIZER(rte_pci_bus.driver_list),
@@ -37,6 +37,7 @@
#include <rte_bus.h>
#include <rte_debug.h>
#include <rte_string_fns.h>
+#include <rte_errno.h>
#include "eal_private.h"
@@ -220,7 +221,6 @@ rte_bus_find_by_device_name(const char *str)
return rte_bus_find(NULL, bus_can_parse, name);
}
-
/*
* Get iommu class of devices on the bus.
*/
@@ -242,3 +242,35 @@ rte_bus_get_iommu_class(void)
}
return mode;
}
+
+static int
+bus_handle_sigbus(const struct rte_bus *bus,
+ const void *failure_addr)
+{
+ return !(bus->sigbus_handler && bus->sigbus_handler(failure_addr) <= 0);
+}
+
+int
+rte_bus_sigbus_handler(const void *failure_addr)
+{
+ struct rte_bus *bus;
+ int old_errno = rte_errno;
+ int no_handle = 0;
+
+ rte_errno = 0;
+
+ bus = rte_bus_find(NULL, bus_handle_sigbus, failure_addr);
+ if (bus == NULL) {
+ RTE_LOG(ERR, EAL, "No bus can handle the sigbus error!");
+ no_handle = 1;
+ } else if (rte_errno != 0) {
+ RTE_LOG(ERR, EAL, "Failed to handle the sigbus error!");
+ no_handle = 1;
+ }
+
+ /* if sigbus not be handled, return back old errno. */
+ if (no_handle)
+ rte_errno = old_errno;
+
+ return no_handle;
+}
@@ -181,6 +181,19 @@ typedef int (*rte_bus_parse_t)(const char *name, void *addr);
typedef int (*rte_bus_hotplug_handler_t)(struct rte_device *dev);
/**
+ * Implementation a specific sigbus handler, which is responsible
+ * for handle the sigbus error which is original memory error, or specific
+ * memory error that caused of hot unplug.
+ * @param failure_addr
+ * Pointer of the fault address of the sigbus error.
+ *
+ * @return
+ * 0 on success.
+ * !0 on error.
+ */
+typedef int (*rte_bus_sigbus_handler_t)(const void *failure_addr);
+
+/**
* Bus scan policies
*/
enum rte_bus_scan_mode {
@@ -225,6 +238,7 @@ struct rte_bus {
struct rte_bus_conf conf; /**< Bus configuration */
rte_bus_get_iommu_class_t get_iommu_class; /**< Get iommu class */
rte_bus_hotplug_handler_t hotplug_handler; /**< handle hot plug on bus */
+ rte_bus_sigbus_handler_t sigbus_handler; /**< handle sigbus error */
};
/**
@@ -335,6 +349,11 @@ struct rte_bus *rte_bus_find_by_name(const char *busname);
enum rte_iova_mode rte_bus_get_iommu_class(void);
/**
+ * Handle the sigbus error on corresponding bus.
+ */
+int rte_bus_sigbus_handler(const void* failure_addr);
+
+/**
* Helper for Bus registration.
* The constructor has higher priority than PMD constructors.
*/
@@ -4,6 +4,8 @@
#include <string.h>
#include <unistd.h>
+#include <fcntl.h>
+#include <signal.h>
#include <sys/socket.h>
#include <linux/netlink.h>
@@ -14,15 +16,24 @@
#include <rte_malloc.h>
#include <rte_interrupts.h>
#include <rte_alarm.h>
+#include <rte_bus.h>
+#include <rte_eal.h>
+#include <rte_spinlock.h>
+#include <rte_errno.h>
#include "eal_private.h"
static struct rte_intr_handle intr_handle = {.fd = -1 };
static bool monitor_started;
+extern struct rte_bus_list rte_bus_list;
+
#define EAL_UEV_MSG_LEN 4096
#define EAL_UEV_MSG_ELEM_LEN 128
+/* spinlock for device failure process */
+static rte_spinlock_t dev_failure_lock = RTE_SPINLOCK_INITIALIZER;
+
static void dev_uev_handler(__rte_unused void *param);
/* identify the system layer which reports this event. */
@@ -33,6 +44,34 @@ enum eal_dev_event_subsystem {
EAL_DEV_EVENT_SUBSYSTEM_MAX
};
+static void sigbus_handler(int signum __rte_unused, siginfo_t *info,
+ void *ctx __rte_unused)
+{
+ int ret;
+
+ RTE_LOG(DEBUG, EAL, "Thread[%d] catch SIGBUS, fault address:%p\n",
+ (int)pthread_self(), info->si_addr);
+
+ rte_spinlock_lock(&dev_failure_lock);
+ ret = rte_bus_sigbus_handler(info->si_addr);
+ rte_spinlock_unlock(&dev_failure_lock);
+ if (!ret)
+ RTE_LOG(INFO, EAL,
+ "Success to handle SIGBUS error for hotplug!\n");
+ else
+ rte_exit(EXIT_FAILURE,
+ "A generic SIGBUS error, (rte_errno: %s)!",
+ strerror(rte_errno));
+}
+
+static int cmp_dev_name(const struct rte_device *dev,
+ const void *_name)
+{
+ const char *name = _name;
+
+ return strcmp(dev->name, name);
+}
+
static int
dev_uev_socket_fd_create(void)
{
@@ -147,6 +186,9 @@ dev_uev_handler(__rte_unused void *param)
struct rte_dev_event uevent;
int ret;
char buf[EAL_UEV_MSG_LEN];
+ struct rte_bus *bus;
+ struct rte_device *dev;
+ const char *busname;
memset(&uevent, 0, sizeof(struct rte_dev_event));
memset(buf, 0, EAL_UEV_MSG_LEN);
@@ -171,13 +213,48 @@ dev_uev_handler(__rte_unused void *param)
RTE_LOG(DEBUG, EAL, "receive uevent(name:%s, type:%d, subsystem:%d)\n",
uevent.devname, uevent.type, uevent.subsystem);
- if (uevent.devname)
+ switch (uevent.subsystem) {
+ case EAL_DEV_EVENT_SUBSYSTEM_PCI:
+ case EAL_DEV_EVENT_SUBSYSTEM_UIO:
+ busname = "pci";
+ break;
+ default:
+ break;
+ }
+
+ if (uevent.devname) {
+ if (uevent.type == RTE_DEV_EVENT_REMOVE) {
+ bus = rte_bus_find_by_name(busname);
+ if (bus == NULL) {
+ RTE_LOG(ERR, EAL, "Cannot find bus (%s)\n",
+ busname);
+ return;
+ }
+ dev = bus->find_device(NULL, cmp_dev_name,
+ uevent.devname);
+ if (dev == NULL) {
+ RTE_LOG(ERR, EAL, "Cannot find device (%s) on "
+ "bus (%s)\n", uevent.devname, busname);
+ return;
+ }
+ rte_spinlock_lock(&dev_failure_lock);
+ ret = bus->hotplug_handler(dev);
+ rte_spinlock_unlock(&dev_failure_lock);
+ if (ret) {
+ RTE_LOG(ERR, EAL, "Can not handle hotplug for "
+ "device (%s)\n", dev->name);
+ return;
+ }
+ }
dev_callback_process(uevent.devname, uevent.type);
+ }
}
int __rte_experimental
rte_dev_event_monitor_start(void)
{
+ sigset_t mask;
+ struct sigaction action;
int ret;
if (monitor_started)
@@ -197,6 +274,14 @@ rte_dev_event_monitor_start(void)
return -1;
}
+ /* register sigbus handler */
+ sigemptyset(&mask);
+ sigaddset(&mask, SIGBUS);
+ action.sa_flags = SA_SIGINFO;
+ action.sa_mask = mask;
+ action.sa_sigaction = sigbus_handler;
+ sigaction(SIGBUS, &action, NULL);
+
monitor_started = true;
return 0;
@@ -220,5 +305,6 @@ rte_dev_event_monitor_stop(void)
close(intr_handle.fd);
intr_handle.fd = -1;
monitor_started = false;
+
return 0;
}