[dpdk-dev,v3,64/68] vfio: enable support for mem event callbacks

Message ID 58d088d72499fb5c7cd994a7522906fb78b62866.1522797505.git.anatoly.burakov@intel.com (mailing list archive)
State Superseded, archived
Headers

Checks

Context Check Description
ci/checkpatch success coding style OK
ci/Intel-compilation fail apply issues

Commit Message

Burakov, Anatoly April 3, 2018, 11:22 p.m. UTC
  Enable callbacks on first device attach, disable callbacks
on last device attach.

Signed-off-by: Anatoly Burakov <anatoly.burakov@intel.com>
---

Notes:
    v3:
    - Moved callbacks to attach/detach as opposed to init

 lib/librte_eal/linuxapp/eal/eal_vfio.c | 83 +++++++++++++++++++++++++++++++---
 1 file changed, 77 insertions(+), 6 deletions(-)
  

Patch

diff --git a/lib/librte_eal/linuxapp/eal/eal_vfio.c b/lib/librte_eal/linuxapp/eal/eal_vfio.c
index 8b5f8fd..eb1a024 100644
--- a/lib/librte_eal/linuxapp/eal/eal_vfio.c
+++ b/lib/librte_eal/linuxapp/eal/eal_vfio.c
@@ -18,6 +18,8 @@ 
 
 #ifdef VFIO_PRESENT
 
+#define VFIO_MEM_EVENT_CLB_NAME "vfio_mem_event_clb"
+
 /* per-process VFIO config */
 static struct vfio_config vfio_cfg;
 
@@ -250,6 +252,38 @@  vfio_group_device_count(int vfio_group_fd)
 	return vfio_cfg.vfio_groups[i].devices;
 }
 
+static void
+vfio_mem_event_callback(enum rte_mem_event type, const void *addr, size_t len)
+{
+	struct rte_memseg_list *msl;
+	struct rte_memseg *ms;
+	size_t cur_len = 0;
+
+	msl = rte_mem_virt2memseg_list(addr);
+
+	/* for IOVA as VA mode, no need to care for IOVA addresses */
+	if (rte_eal_iova_mode() == RTE_IOVA_VA) {
+		uint64_t vfio_va = (uint64_t)(uintptr_t)addr;
+		if (type == RTE_MEM_EVENT_ALLOC)
+			rte_vfio_dma_map(vfio_va, vfio_va, len);
+		else
+			rte_vfio_dma_unmap(vfio_va, vfio_va, len);
+		return;
+	}
+
+	/* memsegs are contiguous in memory */
+	ms = rte_mem_virt2memseg(addr, msl);
+	while (cur_len < len) {
+		if (type == RTE_MEM_EVENT_ALLOC)
+			rte_vfio_dma_map(ms->addr_64, ms->iova, ms->len);
+		else
+			rte_vfio_dma_unmap(ms->addr_64, ms->iova, ms->len);
+
+		cur_len += ms->len;
+		++ms;
+	}
+}
+
 int
 rte_vfio_clear_group(int vfio_group_fd)
 {
@@ -312,6 +346,8 @@  int
 rte_vfio_setup_device(const char *sysfs_base, const char *dev_addr,
 		int *vfio_dev_fd, struct vfio_device_info *device_info)
 {
+	struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config;
+	rte_rwlock_t *mem_lock = &mcfg->memory_hotplug_lock;
 	struct vfio_group_status group_status = {
 			.argsz = sizeof(group_status)
 	};
@@ -399,6 +435,10 @@  rte_vfio_setup_device(const char *sysfs_base, const char *dev_addr,
 				rte_vfio_clear_group(vfio_group_fd);
 				return -1;
 			}
+			/* lock memory hotplug before mapping and release it
+			 * after registering callback, to prevent races
+			 */
+			rte_rwlock_read_lock(mem_lock);
 			ret = t->dma_map_func(vfio_cfg.vfio_container_fd);
 			if (ret) {
 				RTE_LOG(ERR, EAL,
@@ -406,10 +446,17 @@  rte_vfio_setup_device(const char *sysfs_base, const char *dev_addr,
 					dev_addr, errno, strerror(errno));
 				close(vfio_group_fd);
 				rte_vfio_clear_group(vfio_group_fd);
+				rte_rwlock_read_unlock(mem_lock);
 				return -1;
 			}
 
 			vfio_cfg.vfio_iommu_type = t;
+
+			/* register callback for mem events */
+			rte_mem_event_callback_register(VFIO_MEM_EVENT_CLB_NAME,
+					vfio_mem_event_callback);
+			/* unlock memory hotplug */
+			rte_rwlock_read_unlock(mem_lock);
 		}
 	}
 
@@ -447,6 +494,8 @@  int
 rte_vfio_release_device(const char *sysfs_base, const char *dev_addr,
 		    int vfio_dev_fd)
 {
+	struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config;
+	rte_rwlock_t *mem_lock = &mcfg->memory_hotplug_lock;
 	struct vfio_group_status group_status = {
 			.argsz = sizeof(group_status)
 	};
@@ -454,13 +503,20 @@  rte_vfio_release_device(const char *sysfs_base, const char *dev_addr,
 	int iommu_group_no;
 	int ret;
 
+	/* we don't want any DMA mapping messages to come while we're detaching
+	 * VFIO device, because this might be the last device and we might need
+	 * to unregister the callback.
+	 */
+	rte_rwlock_read_lock(mem_lock);
+
 	/* get group number */
 	ret = vfio_get_group_no(sysfs_base, dev_addr, &iommu_group_no);
 	if (ret <= 0) {
 		RTE_LOG(WARNING, EAL, "  %s not managed by VFIO driver\n",
 			dev_addr);
 		/* This is an error at this point. */
-		return -1;
+		ret = -1;
+		goto out;
 	}
 
 	/* get the actual group fd */
@@ -468,7 +524,8 @@  rte_vfio_release_device(const char *sysfs_base, const char *dev_addr,
 	if (vfio_group_fd <= 0) {
 		RTE_LOG(INFO, EAL, "vfio_get_group_fd failed for %s\n",
 				   dev_addr);
-		return -1;
+		ret = -1;
+		goto out;
 	}
 
 	/* At this point we got an active group. Closing it will make the
@@ -480,7 +537,8 @@  rte_vfio_release_device(const char *sysfs_base, const char *dev_addr,
 	if (close(vfio_dev_fd) < 0) {
 		RTE_LOG(INFO, EAL, "Error when closing vfio_dev_fd for %s\n",
 				   dev_addr);
-		return -1;
+		ret = -1;
+		goto out;
 	}
 
 	/* An VFIO group can have several devices attached. Just when there is
@@ -492,17 +550,30 @@  rte_vfio_release_device(const char *sysfs_base, const char *dev_addr,
 		if (close(vfio_group_fd) < 0) {
 			RTE_LOG(INFO, EAL, "Error when closing vfio_group_fd for %s\n",
 				dev_addr);
-			return -1;
+			ret = -1;
+			goto out;
 		}
 
 		if (rte_vfio_clear_group(vfio_group_fd) < 0) {
 			RTE_LOG(INFO, EAL, "Error when clearing group for %s\n",
 					   dev_addr);
-			return -1;
+			ret = -1;
+			goto out;
 		}
 	}
 
-	return 0;
+	/* if there are no active device groups, unregister the callback to
+	 * avoid spurious attempts to map/unmap memory from VFIO.
+	 */
+	if (vfio_cfg.vfio_active_groups == 0)
+		rte_mem_event_callback_unregister(VFIO_MEM_EVENT_CLB_NAME);
+
+	/* success */
+	ret = 0;
+
+out:
+	rte_rwlock_read_unlock(mem_lock);
+	return ret;
 }
 
 int