[PATCH v1] raw/ifpga: fix monitor thread issues

Wei Huang wei.huang at intel.com
Tue Mar 1 09:47:03 CET 2022


Monitor thread handles graceful shutdown according to the value of
specific sensors in device, two issues are found below.
1. Thread is not created when card is probed.
2. Thread is canceled without checking presence of other cards.
To fix them, thread is created in pci device probe function, a reference
count is checked before canceling the thread.

Fixes: 9c006c45 ("raw/ifpga: scan PCIe BDF device tree")
Cc: stable at dpdk.org

Signed-off-by: Wei Huang <wei.huang at intel.com>
Acked-by: Tianfei Zhang <tianfei.zhang at intel.com>
---
 drivers/raw/ifpga/ifpga_rawdev.c | 62 ++++++++++++++++++++++++++--------------
 drivers/raw/ifpga/ifpga_rawdev.h |  2 ++
 2 files changed, 43 insertions(+), 21 deletions(-)

diff --git a/drivers/raw/ifpga/ifpga_rawdev.c b/drivers/raw/ifpga/ifpga_rawdev.c
index f341f4a..d36cf61 100644
--- a/drivers/raw/ifpga/ifpga_rawdev.c
+++ b/drivers/raw/ifpga/ifpga_rawdev.c
@@ -68,7 +68,7 @@
 
 static struct ifpga_rawdev ifpga_rawdevices[IFPGA_RAWDEV_NUM];
 
-static int ifpga_monitor_start;
+static int ifpga_monitor_refcnt;
 static pthread_t ifpga_monitor_start_thread;
 
 static struct ifpga_rawdev *
@@ -133,6 +133,7 @@ struct ifpga_rawdev *
 	dev->dev_id = dev_id;
 	for (i = 0; i < IFPGA_MAX_IRQ; i++)
 		dev->intr_handle[i] = NULL;
+	dev->poll_enabled = 0;
 
 	return dev;
 }
@@ -207,10 +208,11 @@ static int ifpga_get_dev_vendor_id(const char *bdf,
 
 	return 0;
 }
-static int ifpga_rawdev_fill_info(struct ifpga_rawdev *ifpga_dev,
-	const char *bdf)
+
+static int ifpga_rawdev_fill_info(struct ifpga_rawdev *ifpga_dev)
 {
-	char path[1024] = "/sys/bus/pci/devices/0000:";
+	struct opae_adapter *adapter = NULL;
+	char path[1024] = "/sys/bus/pci/devices/";
 	char link[1024], link1[1024];
 	char dir[1024] = "/sys/devices/";
 	char *c;
@@ -225,7 +227,11 @@ static int ifpga_rawdev_fill_info(struct ifpga_rawdev *ifpga_dev,
 	int func;
 	uint32_t dev_id, vendor_id;
 
-	strlcat(path, bdf, sizeof(path));
+	adapter = ifpga_dev ? ifpga_rawdev_get_priv(ifpga_dev->rawdev) : NULL;
+	if (!adapter)
+		return -ENODEV;
+
+	strlcat(path, adapter->name, sizeof(path));
 	memset(link, 0, sizeof(link));
 	memset(link1, 0, sizeof(link1));
 	ret = readlink(path, link, (sizeof(link)-1));
@@ -375,7 +381,7 @@ static int ifpga_rawdev_fill_info(struct ifpga_rawdev *ifpga_dev,
 		/* monitor temperature sensors */
 		if (!strcmp(sensor->name, "Board Temperature") ||
 				!strcmp(sensor->name, "FPGA Die Temperature")) {
-			IFPGA_RAWDEV_PMD_INFO("read sensor %s %d %d %d\n",
+			IFPGA_RAWDEV_PMD_DEBUG("read sensor %s %d %d %d\n",
 					sensor->name, value, sensor->high_warn,
 					sensor->high_fatal);
 
@@ -417,7 +423,7 @@ static int set_surprise_link_check_aer(
 	bool enable = 0;
 	uint32_t aer_new0, aer_new1;
 
-	if (!ifpga_rdev) {
+	if (!ifpga_rdev || !ifpga_rdev->rawdev) {
 		printf("\n device does not exist\n");
 		return -EFAULT;
 	}
@@ -496,11 +502,11 @@ static int set_surprise_link_check_aer(
 	int gsd_enable, ret;
 #define MS 1000
 
-	while (__atomic_load_n(&ifpga_monitor_start, __ATOMIC_RELAXED)) {
+	while (__atomic_load_n(&ifpga_monitor_refcnt, __ATOMIC_RELAXED)) {
 		gsd_enable = 0;
 		for (i = 0; i < IFPGA_RAWDEV_NUM; i++) {
 			ifpga_rdev = &ifpga_rawdevices[i];
-			if (ifpga_rdev->rawdev) {
+			if (ifpga_rdev->poll_enabled) {
 				ret = set_surprise_link_check_aer(ifpga_rdev,
 					gsd_enable);
 				if (ret == 1 && !gsd_enable) {
@@ -520,32 +526,46 @@ static int set_surprise_link_check_aer(
 }
 
 static int
-ifpga_monitor_start_func(void)
+ifpga_monitor_start_func(struct ifpga_rawdev *dev)
 {
 	int ret;
 
-	if (!__atomic_load_n(&ifpga_monitor_start, __ATOMIC_RELAXED)) {
+	if (!dev)
+		return -ENODEV;
+
+	ret = ifpga_rawdev_fill_info(dev);
+	if (ret)
+		return ret;
+
+	dev->poll_enabled = 1;
+
+	if (!__atomic_fetch_add(&ifpga_monitor_refcnt, 1, __ATOMIC_RELAXED)) {
 		ret = rte_ctrl_thread_create(&ifpga_monitor_start_thread,
 					     "ifpga-monitor", NULL,
 					     ifpga_rawdev_gsd_handle, NULL);
 		if (ret != 0) {
+			ifpga_monitor_start_thread = 0;
 			IFPGA_RAWDEV_PMD_ERR(
 				"Fail to create ifpga monitor thread");
 			return -1;
 		}
-		__atomic_store_n(&ifpga_monitor_start, 1, __ATOMIC_RELAXED);
 	}
 
 	return 0;
 }
+
 static int
-ifpga_monitor_stop_func(void)
+ifpga_monitor_stop_func(struct ifpga_rawdev *dev)
 {
 	int ret;
 
-	if (__atomic_load_n(&ifpga_monitor_start, __ATOMIC_RELAXED)) {
-		__atomic_store_n(&ifpga_monitor_start, 0, __ATOMIC_RELAXED);
+	if (!dev || !dev->poll_enabled)
+		return 0;
+
+	dev->poll_enabled = 0;
 
+	if (!__atomic_sub_fetch(&ifpga_monitor_refcnt, 1, __ATOMIC_RELAXED) &&
+		ifpga_monitor_start_thread) {
 		ret = pthread_cancel(ifpga_monitor_start_thread);
 		if (ret)
 			IFPGA_RAWDEV_PMD_ERR("Can't cancel the thread");
@@ -718,6 +738,7 @@ static int set_surprise_link_check_aer(
 	struct opae_adapter *adapter;
 
 	if (dev) {
+		ifpga_monitor_stop_func(ifpga_rawdev_get(dev));
 		adapter = ifpga_rawdev_get_priv(dev);
 		if (adapter) {
 			opae_adapter_destroy(adapter);
@@ -1572,6 +1593,10 @@ static int fme_clean_fme_error(struct opae_manager *mgr)
 	if (ret)
 		goto free_adapter_data;
 
+	ret = ifpga_monitor_start_func(dev);
+	if (ret)
+		goto free_adapter_data;
+
 	return ret;
 
 free_adapter_data:
@@ -1647,7 +1672,7 @@ static int fme_clean_fme_error(struct opae_manager *mgr)
 static int
 ifpga_rawdev_pci_remove(struct rte_pci_device *pci_dev)
 {
-	ifpga_monitor_stop_func();
+	IFPGA_RAWDEV_PMD_INFO("remove pci_dev %s", pci_dev->device.name);
 	return ifpga_rawdev_destroy(pci_dev);
 }
 
@@ -1698,7 +1723,6 @@ static int ifpga_rawdev_get_string_arg(const char *key __rte_unused,
 	struct ifpga_rawdev *ifpga_dev;
 	int port;
 	char *name = NULL;
-	const char *bdf;
 	char dev_name[RTE_RAWDEV_NAME_MAX_LEN];
 	int ret = -1;
 
@@ -1747,10 +1771,6 @@ static int ifpga_rawdev_get_string_arg(const char *key __rte_unused,
 	ifpga_dev = ifpga_rawdev_get(rawdev);
 	if (!ifpga_dev)
 		goto end;
-	bdf = name;
-	ifpga_rawdev_fill_info(ifpga_dev, bdf);
-
-	ifpga_monitor_start_func();
 
 	memset(dev_name, 0, sizeof(dev_name));
 	snprintf(dev_name, RTE_RAWDEV_NAME_MAX_LEN, "%d|%s",
diff --git a/drivers/raw/ifpga/ifpga_rawdev.h b/drivers/raw/ifpga/ifpga_rawdev.h
index 6e09afe..857b734 100644
--- a/drivers/raw/ifpga/ifpga_rawdev.h
+++ b/drivers/raw/ifpga/ifpga_rawdev.h
@@ -62,6 +62,8 @@ struct ifpga_rawdev {
 	char parent_bdf[16];
 	/* 0 for FME interrupt, others are reserved for AFU irq */
 	void *intr_handle[IFPGA_MAX_IRQ];
+	/* enable monitor thread poll device's sensors or not */
+	int poll_enabled;
 };
 
 struct ifpga_rawdev *
-- 
1.8.3.1



More information about the stable mailing list