[dpdk-dev] [RFC] eal/memory: introducing an option to set iova as va

Santosh Shukla santosh.shukla at caviumnetworks.com
Wed May 24 18:11:01 CEST 2017


Some NPU hardware like OCTEONTX follows push model to get
the packet from the pktio device. Where packet allocation
and freeing done by the HW. Since HW can operate only on
IOVA with help of SMMU/IOMMU, When packet receives from the
Ethernet device, It is the IOVA address(which is PA in existing scheme).

Mapping IOVA as PA is expensive on those HW, where every
packet needs to be converted to VA from PA/IOVA.

This patch proposes the scheme where the user can set IOVA
as VA by using an eal command line argument. That helps to
avoid costly lookup for VA in SW by leveraging the SMMU
translation feature.

Signed-off-by: Santosh Shukla <santosh.shukla at caviumnetworks.com>
---
 lib/librte_eal/bsdapp/eal/eal_memory.c          |  6 ++++++
 lib/librte_eal/bsdapp/eal/rte_eal_version.map   |  7 +++++++
 lib/librte_eal/common/eal_common_options.c      |  6 ++++++
 lib/librte_eal/common/eal_internal_cfg.h        |  1 +
 lib/librte_eal/common/eal_options.h             |  2 ++
 lib/librte_eal/common/include/rte_memory.h      |  3 +++
 lib/librte_eal/linuxapp/eal/eal_memory.c        | 12 ++++++++++++
 lib/librte_eal/linuxapp/eal/rte_eal_version.map |  7 +++++++
 8 files changed, 44 insertions(+)

diff --git a/lib/librte_eal/bsdapp/eal/eal_memory.c b/lib/librte_eal/bsdapp/eal/eal_memory.c
index 3614da8db..6c8c2c96e 100644
--- a/lib/librte_eal/bsdapp/eal/eal_memory.c
+++ b/lib/librte_eal/bsdapp/eal/eal_memory.c
@@ -47,6 +47,12 @@
 
 #define EAL_PAGE_SIZE (sysconf(_SC_PAGESIZE))
 
+int __rte_unused
+rte_mem_is_iova_as_va(void)
+{
+	return internal_config.iova_va;
+}
+
 /*
  * Get physical address of any mapped virtual address in the current process.
  */
diff --git a/lib/librte_eal/bsdapp/eal/rte_eal_version.map b/lib/librte_eal/bsdapp/eal/rte_eal_version.map
index 2e48a7366..6e020ca7f 100644
--- a/lib/librte_eal/bsdapp/eal/rte_eal_version.map
+++ b/lib/librte_eal/bsdapp/eal/rte_eal_version.map
@@ -193,3 +193,10 @@ DPDK_17.05 {
 	vfio_get_group_no;
 
 } DPDK_17.02;
+
+DPDK_17.08 {
+	global:
+
+	rte_mem_is_iova_as_va;
+
+} DPDK_17.05;
diff --git a/lib/librte_eal/common/eal_common_options.c b/lib/librte_eal/common/eal_common_options.c
index f470195f3..164123ef0 100644
--- a/lib/librte_eal/common/eal_common_options.c
+++ b/lib/librte_eal/common/eal_common_options.c
@@ -95,6 +95,7 @@ eal_long_options[] = {
 	{OPT_VFIO_INTR,         1, NULL, OPT_VFIO_INTR_NUM        },
 	{OPT_VMWARE_TSC_MAP,    0, NULL, OPT_VMWARE_TSC_MAP_NUM   },
 	{OPT_XEN_DOM0,          0, NULL, OPT_XEN_DOM0_NUM         },
+	{OPT_IOVA_AS_VA,        0, NULL, OPT_IOVA_AS_VA_NUM       },
 	{0,                     0, NULL, 0                        }
 };
 
@@ -876,6 +877,10 @@ eal_parse_common_option(int opt, const char *optarg,
 		conf->no_pci = 1;
 		break;
 
+	case OPT_IOVA_AS_VA_NUM:
+		conf->iova_va = 1;
+		break;
+
 	case OPT_NO_HPET_NUM:
 		conf->no_hpet = 1;
 		break;
@@ -1083,5 +1088,6 @@ eal_common_usage(void)
 	       "  --"OPT_NO_PCI"            Disable PCI\n"
 	       "  --"OPT_NO_HPET"           Disable HPET\n"
 	       "  --"OPT_NO_SHCONF"         No shared config (mmap'd files)\n"
+	       "  --"OPT_IOVA_AS_VA"        Use va addr as iova\n"
 	       "\n", RTE_MAX_LCORE);
 }
diff --git a/lib/librte_eal/common/eal_internal_cfg.h b/lib/librte_eal/common/eal_internal_cfg.h
index 7b7e8c887..6293ed0aa 100644
--- a/lib/librte_eal/common/eal_internal_cfg.h
+++ b/lib/librte_eal/common/eal_internal_cfg.h
@@ -84,6 +84,7 @@ struct internal_config {
 	const char *hugepage_dir;         /**< specific hugetlbfs directory to use */
 
 	unsigned num_hugepage_sizes;      /**< how many sizes on this system */
+	volatile unsigned iova_va;	  /**< use va addr as iova */
 	struct hugepage_info hugepage_info[MAX_HUGEPAGE_SIZES];
 };
 extern struct internal_config internal_config; /**< Global EAL configuration. */
diff --git a/lib/librte_eal/common/eal_options.h b/lib/librte_eal/common/eal_options.h
index a881c62e2..50f98edea 100644
--- a/lib/librte_eal/common/eal_options.h
+++ b/lib/librte_eal/common/eal_options.h
@@ -83,6 +83,8 @@ enum {
 	OPT_VMWARE_TSC_MAP_NUM,
 #define OPT_XEN_DOM0          "xen-dom0"
 	OPT_XEN_DOM0_NUM,
+#define OPT_IOVA_AS_VA        "iova-va"
+	OPT_IOVA_AS_VA_NUM,
 	OPT_LONG_MAX_NUM
 };
 
diff --git a/lib/librte_eal/common/include/rte_memory.h b/lib/librte_eal/common/include/rte_memory.h
index 4aa5d1f77..c6c57e138 100644
--- a/lib/librte_eal/common/include/rte_memory.h
+++ b/lib/librte_eal/common/include/rte_memory.h
@@ -258,6 +258,9 @@ rte_mem_phy2mch(int32_t memseg_id __rte_unused, const phys_addr_t phy_addr)
 }
 #endif
 
+int
+rte_mem_is_iova_as_va(void);
+
 #ifdef __cplusplus
 }
 #endif
diff --git a/lib/librte_eal/linuxapp/eal/eal_memory.c b/lib/librte_eal/linuxapp/eal/eal_memory.c
index ebe068334..d7a373ba2 100644
--- a/lib/librte_eal/linuxapp/eal/eal_memory.c
+++ b/lib/librte_eal/linuxapp/eal/eal_memory.c
@@ -109,6 +109,12 @@ int rte_xen_dom0_supported(void)
 }
 #endif
 
+int
+rte_mem_is_iova_as_va(void)
+{
+	return internal_config.iova_va;
+}
+
 /**
  * @file
  * Huge page mapping under linux
@@ -169,6 +175,9 @@ rte_mem_virt2phy(const void *virtaddr)
 	int page_size;
 	off_t offset;
 
+	if (rte_mem_is_iova_as_va())
+		return (uintptr_t)virtaddr;
+
 	/* when using dom0, /proc/self/pagemap always returns 0, check in
 	 * dpdk memory by browsing the memsegs */
 	if (rte_xen_dom0_supported()) {
@@ -480,6 +489,9 @@ map_all_hugepages(struct hugepage_file *hugepg_tbl,
 		}
 		else {
 			hugepg_tbl[i].final_va = virtaddr;
+
+			if (rte_mem_is_iova_as_va())
+				hugepg_tbl[i].physaddr = (uintptr_t)virtaddr;
 		}
 
 		if (orig) {
diff --git a/lib/librte_eal/linuxapp/eal/rte_eal_version.map b/lib/librte_eal/linuxapp/eal/rte_eal_version.map
index 670bab3a5..b0ba2233f 100644
--- a/lib/librte_eal/linuxapp/eal/rte_eal_version.map
+++ b/lib/librte_eal/linuxapp/eal/rte_eal_version.map
@@ -198,3 +198,10 @@ DPDK_17.05 {
 	vfio_get_group_no;
 
 } DPDK_17.02;
+
+DPDK_17.08 {
+	global:
+
+	rte_mem_is_iova_as_va;
+
+} DPDK_17.05;
-- 
2.11.0



More information about the dev mailing list