@@ -749,6 +749,8 @@ int rte_vfio_enable(const char *modname);
int rte_vfio_is_enabled(const char *modname);
int rte_vfio_noiommu_is_enabled(void);
int rte_vfio_clear_group(int vfio_group_fd);
+int rte_vfio_dma_map(uint64_t vaddr, uint64_t iova, uint64_t len);
+int rte_vfio_dma_unmap(uint64_t vaddr, uint64_t iova, uint64_t len);
int rte_vfio_setup_device(__rte_unused const char *sysfs_base,
__rte_unused const char *dev_addr,
@@ -784,3 +786,17 @@ int rte_vfio_clear_group(__rte_unused int vfio_group_fd)
{
return 0;
}
+
+int __rte_experimental
+rte_vfio_dma_map(uint64_t __rte_unused vaddr, __rte_unused uint64_t iova,
+ __rte_unused uint64_t len)
+{
+ return -1;
+}
+
+int __rte_experimental
+rte_vfio_dma_unmap(uint64_t __rte_unused vaddr, uint64_t __rte_unused iova,
+ __rte_unused uint64_t len)
+{
+ return -1;
+}
@@ -127,6 +127,45 @@ int rte_vfio_noiommu_is_enabled(void);
int
rte_vfio_clear_group(int vfio_group_fd);
+/**
+ * Map memory region for use with VFIO.
+ *
+ * @param vaddr
+ * Starting virtual address of memory to be mapped.
+ *
+ * @param iova
+ * Starting IOVA address of memory to be mapped.
+ *
+ * @param len
+ * Length of memory segment being mapped.
+ *
+ * @return
+ * 0 if success.
+ * -1 on error.
+ */
+int __rte_experimental
+rte_vfio_dma_map(uint64_t vaddr, uint64_t iova, uint64_t len);
+
+
+/**
+ * Unmap memory region from VFIO.
+ *
+ * @param vaddr
+ * Starting virtual address of memory to be unmapped.
+ *
+ * @param iova
+ * Starting IOVA address of memory to be unmapped.
+ *
+ * @param len
+ * Length of memory segment being unmapped.
+ *
+ * @return
+ * 0 if success.
+ * -1 on error.
+ */
+int __rte_experimental
+rte_vfio_dma_unmap(uint64_t vaddr, uint64_t iova, uint64_t len);
+
#ifdef __cplusplus
}
#endif
@@ -22,17 +22,35 @@
static struct vfio_config vfio_cfg;
static int vfio_type1_dma_map(int);
+static int vfio_type1_dma_mem_map(int, uint64_t, uint64_t, uint64_t, int);
static int vfio_spapr_dma_map(int);
+static int vfio_spapr_dma_mem_map(int, uint64_t, uint64_t, uint64_t, int);
static int vfio_noiommu_dma_map(int);
+static int vfio_noiommu_dma_mem_map(int, uint64_t, uint64_t, uint64_t, int);
/* IOMMU types we support */
static const struct vfio_iommu_type iommu_types[] = {
/* x86 IOMMU, otherwise known as type 1 */
- { RTE_VFIO_TYPE1, "Type 1", &vfio_type1_dma_map},
+ {
+ .type_id = RTE_VFIO_TYPE1,
+ .name = "Type 1",
+ .dma_map_func = &vfio_type1_dma_map,
+ .dma_user_map_func = &vfio_type1_dma_mem_map
+ },
/* ppc64 IOMMU, otherwise known as spapr */
- { RTE_VFIO_SPAPR, "sPAPR", &vfio_spapr_dma_map},
+ {
+ .type_id = RTE_VFIO_SPAPR,
+ .name = "sPAPR",
+ .dma_map_func = &vfio_spapr_dma_map,
+ .dma_user_map_func = &vfio_spapr_dma_mem_map
+ },
/* IOMMU-less mode */
- { RTE_VFIO_NOIOMMU, "No-IOMMU", &vfio_noiommu_dma_map},
+ {
+ .type_id = RTE_VFIO_NOIOMMU,
+ .name = "No-IOMMU",
+ .dma_map_func = &vfio_noiommu_dma_map,
+ .dma_user_map_func = &vfio_noiommu_dma_mem_map
+ },
};
int
@@ -333,9 +351,10 @@ rte_vfio_setup_device(const char *sysfs_base, const char *dev_addr,
*/
if (internal_config.process_type == RTE_PROC_PRIMARY &&
vfio_cfg.vfio_active_groups == 1) {
+ const struct vfio_iommu_type *t;
+
/* select an IOMMU type which we will be using */
- const struct vfio_iommu_type *t =
- vfio_set_iommu_type(vfio_cfg.vfio_container_fd);
+ t = vfio_set_iommu_type(vfio_cfg.vfio_container_fd);
if (!t) {
RTE_LOG(ERR, EAL,
" %s failed to select IOMMU type\n",
@@ -353,6 +372,8 @@ rte_vfio_setup_device(const char *sysfs_base, const char *dev_addr,
rte_vfio_clear_group(vfio_group_fd);
return -1;
}
+
+ vfio_cfg.vfio_iommu_type = t;
}
}
@@ -668,23 +689,49 @@ static int
type1_map(const struct rte_memseg *ms, void *arg)
{
int *vfio_container_fd = arg;
+
+ return vfio_type1_dma_mem_map(*vfio_container_fd, ms->addr_64, ms->iova,
+ ms->len, 1);
+}
+
+static int
+vfio_type1_dma_mem_map(int vfio_container_fd, uint64_t vaddr, uint64_t iova,
+ uint64_t len, int do_map)
+{
struct vfio_iommu_type1_dma_map dma_map;
+ struct vfio_iommu_type1_dma_unmap dma_unmap;
int ret;
- memset(&dma_map, 0, sizeof(dma_map));
- dma_map.argsz = sizeof(struct vfio_iommu_type1_dma_map);
- dma_map.vaddr = ms->addr_64;
- dma_map.size = ms->len;
- dma_map.iova = ms->iova;
- dma_map.flags = VFIO_DMA_MAP_FLAG_READ | VFIO_DMA_MAP_FLAG_WRITE;
-
- ret = ioctl(*vfio_container_fd, VFIO_IOMMU_MAP_DMA, &dma_map);
+ if (do_map != 0) {
+ memset(&dma_map, 0, sizeof(dma_map));
+ dma_map.argsz = sizeof(struct vfio_iommu_type1_dma_map);
+ dma_map.vaddr = vaddr;
+ dma_map.size = len;
+ dma_map.iova = iova;
+ dma_map.flags = VFIO_DMA_MAP_FLAG_READ |
+ VFIO_DMA_MAP_FLAG_WRITE;
- if (ret) {
- RTE_LOG(ERR, EAL, " cannot set up DMA remapping, error %i (%s)\n",
+ ret = ioctl(vfio_container_fd, VFIO_IOMMU_MAP_DMA, &dma_map);
+ if (ret) {
+ RTE_LOG(ERR, EAL, " cannot set up DMA remapping, error %i (%s)\n",
errno, strerror(errno));
- return -1;
+ return -1;
+ }
+ } else {
+ memset(&dma_unmap, 0, sizeof(dma_unmap));
+ dma_unmap.argsz = sizeof(struct vfio_iommu_type1_dma_unmap);
+ dma_unmap.size = len;
+ dma_unmap.iova = iova;
+
+ ret = ioctl(vfio_container_fd, VFIO_IOMMU_UNMAP_DMA,
+ &dma_unmap);
+ if (ret) {
+ RTE_LOG(ERR, EAL, " cannot clear DMA remapping, error %i (%s)\n",
+ errno, strerror(errno));
+ return -1;
+ }
}
+
return 0;
}
@@ -694,12 +741,78 @@ vfio_type1_dma_map(int vfio_container_fd)
return rte_memseg_walk(type1_map, &vfio_container_fd);
}
+static int
+vfio_spapr_dma_do_map(int vfio_container_fd, uint64_t vaddr, uint64_t iova,
+ uint64_t len, int do_map)
+{
+ struct vfio_iommu_type1_dma_map dma_map;
+ struct vfio_iommu_type1_dma_unmap dma_unmap;
+ int ret;
+
+ if (do_map != 0) {
+ memset(&dma_map, 0, sizeof(dma_map));
+ dma_map.argsz = sizeof(struct vfio_iommu_type1_dma_map);
+ dma_map.vaddr = vaddr;
+ dma_map.size = len;
+ dma_map.iova = iova;
+ dma_map.flags = VFIO_DMA_MAP_FLAG_READ |
+ VFIO_DMA_MAP_FLAG_WRITE;
+
+ ret = ioctl(vfio_container_fd, VFIO_IOMMU_MAP_DMA, &dma_map);
+ if (ret) {
+ RTE_LOG(ERR, EAL, " cannot set up DMA remapping, error %i (%s)\n",
+ errno, strerror(errno));
+ return -1;
+ }
+
+ } else {
+ struct vfio_iommu_spapr_register_memory reg = {
+ .argsz = sizeof(reg),
+ .flags = 0
+ };
+ reg.vaddr = (uintptr_t) vaddr;
+ reg.size = len;
+
+ ret = ioctl(vfio_container_fd,
+ VFIO_IOMMU_SPAPR_UNREGISTER_MEMORY, ®);
+ if (ret) {
+ RTE_LOG(ERR, EAL, " cannot unregister vaddr for IOMMU, error %i (%s)\n",
+ errno, strerror(errno));
+ return -1;
+ }
+
+ memset(&dma_unmap, 0, sizeof(dma_unmap));
+ dma_unmap.argsz = sizeof(struct vfio_iommu_type1_dma_unmap);
+ dma_unmap.size = len;
+ dma_unmap.iova = iova;
+
+ ret = ioctl(vfio_container_fd, VFIO_IOMMU_UNMAP_DMA,
+ &dma_unmap);
+ if (ret) {
+ RTE_LOG(ERR, EAL, " cannot clear DMA remapping, error %i (%s)\n",
+ errno, strerror(errno));
+ return -1;
+ }
+ }
+
+ return 0;
+}
+
+static int
+vfio_spapr_map_walk(const struct rte_memseg *ms, void *arg)
+{
+ int *vfio_container_fd = arg;
+
+ return vfio_spapr_dma_mem_map(*vfio_container_fd, ms->addr_64, ms->iova,
+ ms->len, 1);
+}
+
struct spapr_walk_param {
uint64_t window_size;
uint64_t hugepage_sz;
};
static int
-spapr_window_size(const struct rte_memseg *ms, void *arg)
+vfio_spapr_window_size_walk(const struct rte_memseg *ms, void *arg)
{
struct spapr_walk_param *param = arg;
uint64_t max = ms->iova + ms->len;
@@ -713,39 +826,43 @@ spapr_window_size(const struct rte_memseg *ms, void *arg)
}
static int
-spapr_map(const struct rte_memseg *ms, void *arg)
-{
- struct vfio_iommu_type1_dma_map dma_map;
- struct vfio_iommu_spapr_register_memory reg = {
- .argsz = sizeof(reg),
- .flags = 0
+vfio_spapr_create_new_dma_window(int vfio_container_fd,
+ struct vfio_iommu_spapr_tce_create *create) {
+ struct vfio_iommu_spapr_tce_remove remove = {
+ .argsz = sizeof(remove),
+ };
+ struct vfio_iommu_spapr_tce_info info = {
+ .argsz = sizeof(info),
};
- int *vfio_container_fd = arg;
int ret;
- reg.vaddr = (uintptr_t) ms->addr;
- reg.size = ms->len;
- ret = ioctl(*vfio_container_fd,
- VFIO_IOMMU_SPAPR_REGISTER_MEMORY, ®);
+ /* query spapr iommu info */
+ ret = ioctl(vfio_container_fd, VFIO_IOMMU_SPAPR_TCE_GET_INFO, &info);
if (ret) {
- RTE_LOG(ERR, EAL, " cannot register vaddr for IOMMU, error %i (%s)\n",
- errno, strerror(errno));
+ RTE_LOG(ERR, EAL, " cannot get iommu info, "
+ "error %i (%s)\n", errno, strerror(errno));
return -1;
}
- memset(&dma_map, 0, sizeof(dma_map));
- dma_map.argsz = sizeof(struct vfio_iommu_type1_dma_map);
- dma_map.vaddr = ms->addr_64;
- dma_map.size = ms->len;
- dma_map.iova = ms->iova;
- dma_map.flags = VFIO_DMA_MAP_FLAG_READ |
- VFIO_DMA_MAP_FLAG_WRITE;
-
- ret = ioctl(*vfio_container_fd, VFIO_IOMMU_MAP_DMA, &dma_map);
+ /* remove default DMA of 32 bit window */
+ remove.start_addr = info.dma32_window_start;
+ ret = ioctl(vfio_container_fd, VFIO_IOMMU_SPAPR_TCE_REMOVE, &remove);
+ if (ret) {
+ RTE_LOG(ERR, EAL, " cannot remove default DMA window, "
+ "error %i (%s)\n", errno, strerror(errno));
+ return -1;
+ }
+ /* create new DMA window */
+ ret = ioctl(vfio_container_fd, VFIO_IOMMU_SPAPR_TCE_CREATE, create);
if (ret) {
- RTE_LOG(ERR, EAL, " cannot set up DMA remapping, error %i (%s)\n",
- errno, strerror(errno));
+ RTE_LOG(ERR, EAL, " cannot create new DMA window, "
+ "error %i (%s)\n", errno, strerror(errno));
+ return -1;
+ }
+
+ if (create->start_addr != 0) {
+ RTE_LOG(ERR, EAL, " DMA window start address != 0\n");
return -1;
}
@@ -753,61 +870,82 @@ spapr_map(const struct rte_memseg *ms, void *arg)
}
static int
-vfio_spapr_dma_map(int vfio_container_fd)
+vfio_spapr_dma_mem_map(int vfio_container_fd, uint64_t vaddr, uint64_t iova,
+ uint64_t len, int do_map)
{
struct spapr_walk_param param;
- int ret;
- struct vfio_iommu_spapr_tce_info info = {
- .argsz = sizeof(info),
- };
struct vfio_iommu_spapr_tce_create create = {
.argsz = sizeof(create),
};
- struct vfio_iommu_spapr_tce_remove remove = {
- .argsz = sizeof(remove),
- };
+ /* check if window size needs to be adjusted */
memset(¶m, 0, sizeof(param));
- /* query spapr iommu info */
- ret = ioctl(vfio_container_fd, VFIO_IOMMU_SPAPR_TCE_GET_INFO, &info);
- if (ret) {
- RTE_LOG(ERR, EAL, " cannot get iommu info, "
- "error %i (%s)\n", errno, strerror(errno));
+ if (rte_memseg_walk(vfio_spapr_window_size_walk, ¶m) < 0) {
+ RTE_LOG(ERR, EAL, "Could not get window size\n");
return -1;
}
- /* remove default DMA of 32 bit window */
- remove.start_addr = info.dma32_window_start;
- ret = ioctl(vfio_container_fd, VFIO_IOMMU_SPAPR_TCE_REMOVE, &remove);
- if (ret) {
- RTE_LOG(ERR, EAL, " cannot remove default DMA window, "
- "error %i (%s)\n", errno, strerror(errno));
- return -1;
+ /* sPAPR requires window size to be a power of 2 */
+ create.window_size = rte_align64pow2(param.window_size);
+ create.page_shift = __builtin_ctzll(param.hugepage_sz);
+ create.levels = 1;
+
+ if (do_map) {
+ /* re-create window and remap the entire memory */
+ if (iova > create.window_size) {
+ if (vfio_spapr_create_new_dma_window(vfio_container_fd,
+ &create) < 0) {
+ RTE_LOG(ERR, EAL, "Could not create new DMA window\n");
+ return -1;
+ }
+ if (rte_memseg_walk(vfio_spapr_map_walk,
+ &vfio_container_fd) < 0) {
+ RTE_LOG(ERR, EAL, "Could not recreate DMA maps\n");
+ return -1;
+ }
+ }
+ /* now that we've remapped all of the memory that was present
+ * before, map the segment that we were requested to map.
+ */
+ if (vfio_spapr_dma_do_map(vfio_container_fd,
+ vaddr, iova, len, 1) < 0) {
+ RTE_LOG(ERR, EAL, "Could not map segment\n");
+ return -1;
+ }
+ } else {
+
+ /* for unmap, check if iova within DMA window */
+ if (iova > create.window_size) {
+ RTE_LOG(ERR, EAL, "iova beyond DMA window for unmap");
+ return -1;
+ }
+
+ vfio_spapr_dma_do_map(vfio_container_fd, vaddr, iova, len, 0);
}
+ return 0;
+}
+
+static int
+vfio_spapr_dma_map(int vfio_container_fd)
+{
+ struct vfio_iommu_spapr_tce_create create = {
+ .argsz = sizeof(create),
+ };
+ struct spapr_walk_param param;
+
+ memset(¶m, 0, sizeof(param));
/* create DMA window from 0 to max(phys_addr + len) */
- rte_memseg_walk(spapr_window_size, ¶m);
+ rte_memseg_walk(vfio_spapr_window_size_walk, ¶m);
/* sPAPR requires window size to be a power of 2 */
create.window_size = rte_align64pow2(param.window_size);
create.page_shift = __builtin_ctzll(param.hugepage_sz);
create.levels = 1;
- ret = ioctl(vfio_container_fd, VFIO_IOMMU_SPAPR_TCE_CREATE, &create);
- if (ret) {
- RTE_LOG(ERR, EAL, " cannot create new DMA window, "
- "error %i (%s)\n", errno, strerror(errno));
- return -1;
- }
-
- if (create.start_addr != 0) {
- RTE_LOG(ERR, EAL, " DMA window start address != 0\n");
- return -1;
- }
-
/* map all DPDK segments for DMA. use 1:1 PA to IOVA mapping */
- if (rte_memseg_walk(spapr_map, &vfio_container_fd) < 0)
+ if (rte_memseg_walk(vfio_spapr_map_walk, &vfio_container_fd) < 0)
return -1;
return 0;
@@ -820,6 +958,49 @@ vfio_noiommu_dma_map(int __rte_unused vfio_container_fd)
return 0;
}
+static int
+vfio_noiommu_dma_mem_map(int __rte_unused vfio_container_fd,
+ uint64_t __rte_unused vaddr,
+ uint64_t __rte_unused iova, uint64_t __rte_unused len,
+ int __rte_unused do_map)
+{
+ /* No-IOMMU mode does not need DMA mapping */
+ return 0;
+}
+
+static int
+vfio_dma_mem_map(uint64_t vaddr, uint64_t iova, uint64_t len, int do_map)
+{
+ const struct vfio_iommu_type *t = vfio_cfg.vfio_iommu_type;
+
+ if (!t) {
+ RTE_LOG(ERR, EAL, " VFIO support not initialized\n");
+ return -1;
+ }
+
+ if (!t->dma_user_map_func) {
+ RTE_LOG(ERR, EAL,
+ " VFIO custom DMA region maping not supported by IOMMU %s\n",
+ t->name);
+ return -1;
+ }
+
+ return t->dma_user_map_func(vfio_cfg.vfio_container_fd, vaddr, iova,
+ len, do_map);
+}
+
+int __rte_experimental
+rte_vfio_dma_map(uint64_t vaddr, uint64_t iova, uint64_t len)
+{
+ return vfio_dma_mem_map(vaddr, iova, len, 1);
+}
+
+int __rte_experimental
+rte_vfio_dma_unmap(uint64_t vaddr, uint64_t iova, uint64_t len)
+{
+ return vfio_dma_mem_map(vaddr, iova, len, 0);
+}
+
int
rte_vfio_noiommu_is_enabled(void)
{
@@ -852,4 +1033,20 @@ rte_vfio_noiommu_is_enabled(void)
return c == 'Y';
}
+#else
+
+int __rte_experimental
+rte_vfio_dma_map(uint64_t __rte_unused vaddr, __rte_unused uint64_t iova,
+ __rte_unused uint64_t len)
+{
+ return -1;
+}
+
+int __rte_experimental
+rte_vfio_dma_unmap(uint64_t __rte_unused vaddr, uint64_t __rte_unused iova,
+ __rte_unused uint64_t len)
+{
+ return -1;
+}
+
#endif
@@ -19,6 +19,7 @@
#ifdef VFIO_PRESENT
+#include <stdint.h>
#include <linux/vfio.h>
#define RTE_VFIO_TYPE1 VFIO_TYPE1_IOMMU
@@ -26,6 +27,7 @@
#ifndef VFIO_SPAPR_TCE_v2_IOMMU
#define RTE_VFIO_SPAPR 7
#define VFIO_IOMMU_SPAPR_REGISTER_MEMORY _IO(VFIO_TYPE, VFIO_BASE + 17)
+#define VFIO_IOMMU_SPAPR_UNREGISTER_MEMORY _IO(VFIO_TYPE, VFIO_BASE + 18)
#define VFIO_IOMMU_SPAPR_TCE_CREATE _IO(VFIO_TYPE, VFIO_BASE + 19)
#define VFIO_IOMMU_SPAPR_TCE_REMOVE _IO(VFIO_TYPE, VFIO_BASE + 20)
@@ -110,6 +112,7 @@ struct vfio_config {
int vfio_enabled;
int vfio_container_fd;
int vfio_active_groups;
+ const struct vfio_iommu_type *vfio_iommu_type;
struct vfio_group vfio_groups[VFIO_MAX_GROUPS];
};
@@ -119,9 +122,18 @@ struct vfio_config {
* */
typedef int (*vfio_dma_func_t)(int);
+/* Custom memory region DMA mapping function prototype.
+ * Takes VFIO container fd, virtual address, phisical address, length and
+ * operation type (0 to unmap 1 for map) as a parameters.
+ * Returns 0 on success, -1 on error.
+ **/
+typedef int (*vfio_dma_user_func_t)(int fd, uint64_t vaddr, uint64_t iova,
+ uint64_t len, int do_map);
+
struct vfio_iommu_type {
int type_id;
const char *name;
+ vfio_dma_user_func_t dma_user_map_func;
vfio_dma_func_t dma_map_func;
};
@@ -266,5 +266,7 @@ EXPERIMENTAL {
rte_service_start_with_defaults;
rte_socket_count;
rte_socket_id_by_idx;
+ rte_vfio_dma_map;
+ rte_vfio_dma_unmap;
} DPDK_18.02;