[dpdk-dev] [PATCH v3 4/6] ena: allocate coherent memory in node-aware way

Jan Medala jan at semihalf.com
Thu Jun 30 17:04:57 CEST 2016


On multi-node systems try to allocate memory possibly closest to requesting
node.
While allocating (coherent) memory, get information about calling node Id
and basing on it reserve memzone.

Signed-off-by: Alexander Matushevsky <matua at amazon.com>
Signed-off-by: Jakub Palider <jpa at semihalf.com>
Signed-off-by: Jan Medala <jan at semihalf.com>
---
 drivers/net/ena/base/ena_com.c       | 51 ++++++++++++++++++++++++++----------
 drivers/net/ena/base/ena_plat_dpdk.h | 23 ++++++++++++++++
 drivers/net/ena/ena_ethdev.c         | 15 +++++++++++
 3 files changed, 75 insertions(+), 14 deletions(-)

diff --git a/drivers/net/ena/base/ena_com.c b/drivers/net/ena/base/ena_com.c
index b5b8cd9..a3649d8 100644
--- a/drivers/net/ena/base/ena_com.c
+++ b/drivers/net/ena/base/ena_com.c
@@ -329,6 +329,7 @@ static int ena_com_init_io_sq(struct ena_com_dev *ena_dev,
 			      struct ena_com_io_sq *io_sq)
 {
 	size_t size;
+	int dev_node;
 
 	ENA_TOUCH(ctx);
 
@@ -341,15 +342,29 @@ static int ena_com_init_io_sq(struct ena_com_dev *ena_dev,
 
 	size = io_sq->desc_entry_size * io_sq->q_depth;
 
-	if (io_sq->mem_queue_type == ENA_ADMIN_PLACEMENT_POLICY_HOST)
-		ENA_MEM_ALLOC_COHERENT(ena_dev->dmadev,
-				       size,
-				       io_sq->desc_addr.virt_addr,
-				       io_sq->desc_addr.phys_addr,
-				       io_sq->desc_addr.mem_handle);
-	else
-		io_sq->desc_addr.virt_addr =
-			ENA_MEM_ALLOC(ena_dev->dmadev, size);
+	if (io_sq->mem_queue_type == ENA_ADMIN_PLACEMENT_POLICY_HOST) {
+		ENA_MEM_ALLOC_COHERENT_NODE(ena_dev->dmadev,
+					    size,
+					    io_sq->desc_addr.virt_addr,
+					    io_sq->desc_addr.phys_addr,
+					    ctx->numa_node,
+					    dev_node);
+		if (!io_sq->desc_addr.virt_addr)
+			ENA_MEM_ALLOC_COHERENT(ena_dev->dmadev,
+					       size,
+					       io_sq->desc_addr.virt_addr,
+					       io_sq->desc_addr.phys_addr,
+					       io_sq->desc_addr.mem_handle);
+	} else {
+		ENA_MEM_ALLOC_NODE(ena_dev->dmadev,
+				   size,
+				   io_sq->desc_addr.virt_addr,
+				   ctx->numa_node,
+				   dev_node);
+		if (!io_sq->desc_addr.virt_addr)
+			io_sq->desc_addr.virt_addr =
+				ENA_MEM_ALLOC(ena_dev->dmadev, size);
+	}
 
 	if (!io_sq->desc_addr.virt_addr) {
 		ena_trc_err("memory allocation failed");
@@ -368,6 +383,7 @@ static int ena_com_init_io_cq(struct ena_com_dev *ena_dev,
 			      struct ena_com_io_cq *io_cq)
 {
 	size_t size;
+	int prev_node;
 
 	ENA_TOUCH(ctx);
 	memset(&io_cq->cdesc_addr, 0x0, sizeof(struct ena_com_io_desc_addr));
@@ -380,11 +396,18 @@ static int ena_com_init_io_cq(struct ena_com_dev *ena_dev,
 
 	size = io_cq->cdesc_entry_size_in_bytes * io_cq->q_depth;
 
-	ENA_MEM_ALLOC_COHERENT(ena_dev->dmadev,
-			       size,
-			       io_cq->cdesc_addr.virt_addr,
-			       io_cq->cdesc_addr.phys_addr,
-			       io_cq->cdesc_addr.mem_handle);
+	ENA_MEM_ALLOC_COHERENT_NODE(ena_dev->dmadev,
+				    size,
+				    io_cq->cdesc_addr.virt_addr,
+				    io_cq->cdesc_addr.phys_addr,
+				    ctx->numa_node,
+				    prev_node);
+	if (!io_cq->cdesc_addr.virt_addr)
+		ENA_MEM_ALLOC_COHERENT(ena_dev->dmadev,
+				       size,
+				       io_cq->cdesc_addr.virt_addr,
+				       io_cq->cdesc_addr.phys_addr,
+				       io_cq->cdesc_addr.mem_handle);
 
 	if (!io_cq->cdesc_addr.virt_addr) {
 		ena_trc_err("memory allocation failed");
diff --git a/drivers/net/ena/base/ena_plat_dpdk.h b/drivers/net/ena/base/ena_plat_dpdk.h
index 3c0203f..b1ed80c 100644
--- a/drivers/net/ena/base/ena_plat_dpdk.h
+++ b/drivers/net/ena/base/ena_plat_dpdk.h
@@ -196,6 +196,29 @@ typedef uint64_t dma_addr_t;
 		   ENA_TOUCH(dmadev);					\
 		   rte_free(virt); })
 
+#define ENA_MEM_ALLOC_COHERENT_NODE(dmadev, size, virt, phys, node, dev_node) \
+	do {								\
+		const struct rte_memzone *mz;				\
+		char z_name[RTE_MEMZONE_NAMESIZE];			\
+		ENA_TOUCH(dmadev); ENA_TOUCH(dev_node);			\
+		snprintf(z_name, sizeof(z_name),			\
+				"ena_alloc_%d", ena_alloc_cnt++);	\
+		mz = rte_memzone_reserve(z_name, size, node, 0); \
+		virt = mz->addr;					\
+		phys = mz->phys_addr;					\
+	} while (0)
+
+#define ENA_MEM_ALLOC_NODE(dmadev, size, virt, node, dev_node) \
+	do {								\
+		const struct rte_memzone *mz;				\
+		char z_name[RTE_MEMZONE_NAMESIZE];			\
+		ENA_TOUCH(dmadev); ENA_TOUCH(dev_node);			\
+		snprintf(z_name, sizeof(z_name),			\
+				"ena_alloc_%d", ena_alloc_cnt++);	\
+		mz = rte_memzone_reserve(z_name, size, node, 0); \
+		virt = mz->addr;					\
+	} while (0)
+
 #define ENA_MEM_ALLOC(dmadev, size) rte_zmalloc(NULL, size, 1)
 #define ENA_MEM_FREE(dmadev, ptr) ({ENA_TOUCH(dmadev); rte_free(ptr); })
 
diff --git a/drivers/net/ena/ena_ethdev.c b/drivers/net/ena/ena_ethdev.c
index 25637a6..f8dbde4 100644
--- a/drivers/net/ena/ena_ethdev.c
+++ b/drivers/net/ena/ena_ethdev.c
@@ -38,6 +38,7 @@
 #include <rte_dev.h>
 #include <rte_errno.h>
 #include <rte_version.h>
+#include <rte_eal_memconfig.h>
 
 #include "ena_ethdev.h"
 #include "ena_logs.h"
@@ -232,6 +233,18 @@ static struct eth_dev_ops ena_dev_ops = {
 	.reta_query           = ena_rss_reta_query,
 };
 
+#define NUMA_NO_NODE	SOCKET_ID_ANY
+
+static inline int ena_cpu_to_node(int cpu)
+{
+	struct rte_config *config = rte_eal_get_configuration();
+
+	if (likely(cpu < RTE_MAX_MEMZONE))
+		return config->mem_config->memzone[cpu].socket_id;
+
+	return NUMA_NO_NODE;
+}
+
 static inline void ena_rx_mbuf_prepare(struct rte_mbuf *mbuf,
 				       struct ena_com_rx_ctx *ena_rx_ctx)
 {
@@ -959,6 +972,7 @@ static int ena_tx_queue_setup(struct rte_eth_dev *dev,
 	ctx.msix_vector = -1; /* admin interrupts not used */
 	ctx.mem_queue_type = ena_dev->tx_mem_queue_type;
 	ctx.queue_size = adapter->tx_ring_size;
+	ctx.numa_node = ena_cpu_to_node(queue_idx);
 
 	rc = ena_com_create_io_queue(ena_dev, &ctx);
 	if (rc) {
@@ -1049,6 +1063,7 @@ static int ena_rx_queue_setup(struct rte_eth_dev *dev,
 	ctx.mem_queue_type = ENA_ADMIN_PLACEMENT_POLICY_HOST;
 	ctx.msix_vector = -1; /* admin interrupts not used */
 	ctx.queue_size = adapter->rx_ring_size;
+	ctx.numa_node = ena_cpu_to_node(queue_idx);
 
 	rc = ena_com_create_io_queue(ena_dev, &ctx);
 	if (rc)
-- 
2.8.2



More information about the dev mailing list