[dpdk-dev] [PATCH] eal: add request to map reserved physical memory

Ajit Khaparde ajit.khaparde at broadcom.com
Wed Mar 28 06:51:20 CEST 2018


From: Srinath Mannam <srinath.mannam at broadcom.com>

Reserved physical memory is requested from kernel
and it will be mapped to user space.
This memory will be mapped to IOVA using VFIO.
And this memory will be provided to SPDK to allocate
NVMe CQs.

Signed-off-by: Srinath Mannam <srinath.mannam at broadcom.com>
Signed-off-by: Scott Branden <scott.branden at broadcom.com>
Signed-off-by: Ajit Khaparde <ajit.khaparde at broadcom.com>
---
 lib/librte_eal/common/eal_common_options.c |  5 ++
 lib/librte_eal/common/eal_internal_cfg.h   |  1 +
 lib/librte_eal/common/eal_options.h        |  2 +
 lib/librte_eal/common/include/rte_eal.h    |  8 ++++
 lib/librte_eal/common/include/rte_malloc.h |  7 +++
 lib/librte_eal/common/rte_malloc.c         | 17 +++++++
 lib/librte_eal/linuxapp/eal/eal.c          | 75 ++++++++++++++++++++++++++++++
 7 files changed, 115 insertions(+)

diff --git a/lib/librte_eal/common/eal_common_options.c b/lib/librte_eal/common/eal_common_options.c
index 8a51adee6..7b929fde3 100644
--- a/lib/librte_eal/common/eal_common_options.c
+++ b/lib/librte_eal/common/eal_common_options.c
@@ -73,6 +73,7 @@ eal_long_options[] = {
 	{OPT_VDEV,              1, NULL, OPT_VDEV_NUM             },
 	{OPT_VFIO_INTR,         1, NULL, OPT_VFIO_INTR_NUM        },
 	{OPT_VMWARE_TSC_MAP,    0, NULL, OPT_VMWARE_TSC_MAP_NUM   },
+	{OPT_ISO_CMEM,          0, NULL, OPT_ISO_CMEM_NUM         },
 	{0,                     0, NULL, 0                        }
 };
 
@@ -1119,6 +1120,10 @@ eal_parse_common_option(int opt, const char *optarg,
 		conf->no_pci = 1;
 		break;
 
+	case OPT_ISO_CMEM_NUM:
+		conf->iso_cmem = 1;
+		break;
+
 	case OPT_NO_HPET_NUM:
 		conf->no_hpet = 1;
 		break;
diff --git a/lib/librte_eal/common/eal_internal_cfg.h b/lib/librte_eal/common/eal_internal_cfg.h
index a0082d121..7c06b9918 100644
--- a/lib/librte_eal/common/eal_internal_cfg.h
+++ b/lib/librte_eal/common/eal_internal_cfg.h
@@ -37,6 +37,7 @@ struct internal_config {
 	volatile unsigned no_hugetlbfs;   /**< true to disable hugetlbfs */
 	unsigned hugepage_unlink;         /**< true to unlink backing files */
 	volatile unsigned no_pci;         /**< true to disable PCI */
+	unsigned int iso_cmem;            /**< true to enable isolated cmem */
 	volatile unsigned no_hpet;        /**< true to disable HPET */
 	volatile unsigned vmware_tsc_map; /**< true to use VMware TSC mapping
 										* instead of native TSC */
diff --git a/lib/librte_eal/common/eal_options.h b/lib/librte_eal/common/eal_options.h
index e86c71142..d6fe9ca97 100644
--- a/lib/librte_eal/common/eal_options.h
+++ b/lib/librte_eal/common/eal_options.h
@@ -55,6 +55,8 @@ enum {
 	OPT_VFIO_INTR_NUM,
 #define OPT_VMWARE_TSC_MAP    "vmware-tsc-map"
 	OPT_VMWARE_TSC_MAP_NUM,
+#define OPT_ISO_CMEM          "iso-cmem"
+	OPT_ISO_CMEM_NUM,
 	OPT_LONG_MAX_NUM
 };
 
diff --git a/lib/librte_eal/common/include/rte_eal.h b/lib/librte_eal/common/include/rte_eal.h
index 044474e6c..322e2e3c2 100644
--- a/lib/librte_eal/common/include/rte_eal.h
+++ b/lib/librte_eal/common/include/rte_eal.h
@@ -73,6 +73,14 @@ struct rte_config {
 	struct rte_mem_config *mem_config;
 } __attribute__((__packed__));
 
+/**
+ * Get the global custom memory segment structure.
+ *
+ * @return
+ *   A pointer to the global cmemseg structure.
+ */
+struct rte_memseg *rte_eal_get_iso_cmemseg(void);
+
 /**
  * Get the global configuration structure.
  *
diff --git a/lib/librte_eal/common/include/rte_malloc.h b/lib/librte_eal/common/include/rte_malloc.h
index f02a8ba1d..a2ba8be29 100644
--- a/lib/librte_eal/common/include/rte_malloc.h
+++ b/lib/librte_eal/common/include/rte_malloc.h
@@ -156,6 +156,13 @@ rte_realloc(void *ptr, size_t size, unsigned align);
 void *
 rte_malloc_socket(const char *type, size_t size, unsigned align, int socket);
 
+/**
+ * This function allocates memory from the huge-page area of memory or
+ * from reserved memory.
+ */
+void *
+rte_malloc_cmem(const char *type, size_t size, size_t align, int socket);
+
 /**
  * Allocate zero'ed memory from the heap.
  *
diff --git a/lib/librte_eal/common/rte_malloc.c b/lib/librte_eal/common/rte_malloc.c
index e0e0d0b3e..75085be1f 100644
--- a/lib/librte_eal/common/rte_malloc.c
+++ b/lib/librte_eal/common/rte_malloc.c
@@ -33,6 +33,23 @@ void rte_free(void *addr)
 		rte_panic("Fatal error: Invalid memory\n");
 }
 
+/*
+ * Allocate memory on cmem heap if cmem segment available else allocate
+ * from normal heap.
+ */
+void *
+rte_malloc_cmem(const char *type, size_t size, size_t align, int socket_id)
+{
+	struct rte_memseg *cmemseg = rte_eal_get_iso_cmemseg();
+	void *addr;
+
+	if (cmemseg)
+		socket_id = cmemseg->socket_id;
+
+	addr = rte_malloc_socket(NULL, size, align, socket_id);
+	return addr;
+}
+
 /*
  * Allocate memory on specified heap.
  */
diff --git a/lib/librte_eal/linuxapp/eal/eal.c b/lib/librte_eal/linuxapp/eal/eal.c
index 2ecd07b95..e8cb0b0f9 100644
--- a/lib/librte_eal/linuxapp/eal/eal.c
+++ b/lib/librte_eal/linuxapp/eal/eal.c
@@ -66,6 +66,7 @@ static rte_usage_hook_t	rte_application_usage_hook = NULL;
 
 /* early configuration structure, when memory config is not mmapped */
 static struct rte_mem_config early_mem_config;
+static struct rte_memseg *iso_cmemseg;
 
 /* define fd variable here, because file needs to be kept open for the
  * duration of the program, as we hold a write lock on it in the primary proc */
@@ -110,12 +111,83 @@ rte_eal_mbuf_default_mempool_ops(void)
 }
 
 /* Return a pointer to the configuration structure */
+struct rte_memseg *
+rte_eal_get_iso_cmemseg(void)
+{
+	if (internal_config.iso_cmem == 1)
+		return iso_cmemseg;
+
+	return NULL;
+}
+
 struct rte_config *
 rte_eal_get_configuration(void)
 {
 	return &rte_config;
 }
 
+static struct rte_memseg *map_cmem_virtual_area(void)
+{
+	void *addr = NULL;
+	int fd;
+	off_t filesize;
+	struct rte_memseg *cmemseg;
+	struct rte_mem_config *mcfg;
+	unsigned int i;
+	unsigned int socket = 0;
+
+	mcfg = rte_eal_get_configuration()->mem_config;
+	if (mcfg == NULL)
+		return NULL;
+
+	for (i = 0; i < RTE_MAX_MEMSEG; i++) {
+		if (mcfg->memseg[i].addr == NULL) {
+			cmemseg = &mcfg->memseg[i];
+			break;
+		}
+		socket |= (1 << mcfg->memseg[i].socket_id);
+	}
+
+	if (!cmemseg)
+		return NULL;
+
+	for (i = 0; i < RTE_MAX_NUMA_NODES; i++) {
+		if (!(socket & (1 << i))) {
+			cmemseg->socket_id = i;
+			break;
+		}
+	}
+	if (i == RTE_MAX_NUMA_NODES)
+		goto error;
+
+	fd = open("/dev/cmem", O_RDWR);
+	if (fd < 0) {
+		RTE_LOG(ERR, EAL, "Cannot open /dev/cmem\n");
+		goto error;
+	}
+
+	filesize = lseek(fd, 0, SEEK_END);
+	if (filesize < 0) {
+		close(fd);
+		goto error;
+	}
+
+	addr = mmap(NULL, filesize, (PROT_READ | PROT_WRITE),
+			MAP_SHARED, fd, 0);
+	close(fd);
+	if (addr == MAP_FAILED)
+		goto error;
+
+	memset(addr, 0, filesize);
+	cmemseg->phys_addr = rte_mem_virt2phy(addr);
+	cmemseg->addr_64 = addr;
+	cmemseg->len = filesize;
+
+	return cmemseg;
+error:
+	return NULL;
+}
+
 enum rte_iova_mode
 rte_eal_iova_mode(void)
 {
@@ -862,6 +934,9 @@ rte_eal_init(int argc, char **argv)
 	/* the directories are locked during eal_hugepage_info_init */
 	eal_hugedirs_unlock();
 
+	if (internal_config.iso_cmem == 1)
+		iso_cmemseg = map_cmem_virtual_area();
+
 	if (rte_eal_memzone_init() < 0) {
 		rte_eal_init_alert("Cannot init memzone\n");
 		rte_errno = ENODEV;
-- 
2.14.3 (Apple Git-98)



More information about the dev mailing list