[v7,2/4] kni: add IOVA = VA support in KNI lib
Checks
Commit Message
From: Vamsi Attunuru <vattunuru@marvell.com>
Current KNI implmentation only operates in IOVA=PA mode, patch adds
required functionality in KNI lib to support IOVA=VA mode.
KNI kernel module requires device info to get iommu domain related
information for IOVA addr related translations. Patch defines device
related info in rte_kni_device_info struct and passes device info to
kernel KNI module when IOVA=VA mode is enabled.
Signed-off-by: Vamsi Attunuru <vattunuru@marvell.com>
Signed-off-by: Kiran Kumar K <kirankumark@marvell.com>
---
lib/librte_eal/linux/eal/include/rte_kni_common.h | 8 ++++
lib/librte_kni/Makefile | 1 +
lib/librte_kni/meson.build | 1 +
lib/librte_kni/rte_kni.c | 54 +++++++++++++++++++++++
4 files changed, 64 insertions(+)
@@ -111,6 +111,13 @@ struct rte_kni_device_info {
void * mbuf_va;
phys_addr_t mbuf_phys;
+ /* PCI info */
+ uint16_t vendor_id; /**< Vendor ID or PCI_ANY_ID. */
+ uint16_t device_id; /**< Device ID or PCI_ANY_ID. */
+ uint8_t bus; /**< Device bus */
+ uint8_t devid; /**< Device ID */
+ uint8_t function; /**< Device function. */
+
uint16_t group_id; /**< Group ID */
uint32_t core_id; /**< core ID to bind for kernel thread */
@@ -121,6 +128,7 @@ struct rte_kni_device_info {
unsigned mbuf_size;
unsigned int mtu;
uint8_t mac_addr[6];
+ uint8_t iova_mode;
};
#define KNI_DEVICE "kni"
@@ -7,6 +7,7 @@ include $(RTE_SDK)/mk/rte.vars.mk
LIB = librte_kni.a
CFLAGS += $(WERROR_FLAGS) -I$(SRCDIR) -O3 -fno-strict-aliasing
+CFLAGS += -I$(RTE_SDK)/drivers/bus/pci
LDLIBS += -lrte_eal -lrte_mempool -lrte_mbuf -lrte_ethdev
EXPORT_MAP := rte_kni_version.map
@@ -9,3 +9,4 @@ version = 2
sources = files('rte_kni.c')
headers = files('rte_kni.h')
deps += ['ethdev', 'pci']
+includes += include_directories('../../drivers/bus/pci')
@@ -14,6 +14,7 @@
#include <rte_spinlock.h>
#include <rte_string_fns.h>
#include <rte_ethdev.h>
+#include <rte_bus_pci.h>
#include <rte_malloc.h>
#include <rte_log.h>
#include <rte_kni.h>
@@ -199,6 +200,26 @@ kni_release_mz(struct rte_kni *kni)
rte_memzone_free(kni->m_sync_addr);
}
+static void
+kni_dev_pci_addr_get(struct rte_pci_addr *addr,
+ struct rte_pci_id *id, uint16_t port_id)
+{
+ const struct rte_pci_device *pci_dev;
+ const struct rte_bus *bus = NULL;
+ struct rte_eth_dev_info dev_info;
+
+ memset(&dev_info, 0, sizeof(dev_info));
+ rte_eth_dev_info_get(port_id, &dev_info);
+
+ if (dev_info.device)
+ bus = rte_bus_find_by_device(dev_info.device);
+ if (bus && !strcmp(bus->name, "pci")) {
+ pci_dev = RTE_DEV_TO_PCI(dev_info.device);
+ *addr = pci_dev->addr;
+ *id = pci_dev->id;
+ }
+}
+
struct rte_kni *
rte_kni_alloc(struct rte_mempool *pktmbuf_pool,
const struct rte_kni_conf *conf,
@@ -247,6 +268,37 @@ rte_kni_alloc(struct rte_mempool *pktmbuf_pool,
kni->ops.port_id = UINT16_MAX;
memset(&dev_info, 0, sizeof(dev_info));
+
+ if (rte_eal_iova_mode() == RTE_IOVA_VA) {
+ uint64_t page_sz = pktmbuf_pool->mz->hugepage_sz;
+ uint16_t port_id = conf->group_id;
+ struct rte_pci_addr addr = { 0 };
+ struct rte_pci_id id = { 0 };
+ size_t buf_sz;
+
+ kni_dev_pci_addr_get(&addr, &id, port_id);
+ dev_info.bus = addr.bus;
+ dev_info.devid = addr.devid;
+ dev_info.function = addr.function;
+ dev_info.vendor_id = id.vendor_id;
+ dev_info.device_id = id.device_id;
+
+ buf_sz = pktmbuf_pool->header_size + pktmbuf_pool->elt_size +
+ pktmbuf_pool->trailer_size;
+
+ /* Return failure when mbuf size is bigger than page size,
+ * because phys address of those mbuf might not be physically
+ * contiguous and KNI kernal module can not translate those
+ * mbuf's IOVA addresses.
+ */
+ if (buf_sz > page_sz) {
+ RTE_LOG(ERR, KNI,
+ "KNI does not work in IOVA=VA mode when mbuf_sz > page_sz\n");
+ RTE_LOG(ERR, KNI, "buf_sz:0x%" PRIx64 " > ", buf_sz);
+ RTE_LOG(ERR, KNI, "page_sz:0x%" PRIx64 "\n", page_sz);
+ goto kni_fail;
+ }
+ }
dev_info.core_id = conf->core_id;
dev_info.force_bind = conf->force_bind;
dev_info.group_id = conf->group_id;
@@ -300,6 +352,8 @@ rte_kni_alloc(struct rte_mempool *pktmbuf_pool,
kni->group_id = conf->group_id;
kni->mbuf_size = conf->mbuf_size;
+ dev_info.iova_mode = (rte_eal_iova_mode() == RTE_IOVA_VA) ? 1 : 0;
+
ret = ioctl(kni_fd, RTE_KNI_IOCTL_CREATE, &dev_info);
if (ret < 0)
goto ioctl_fail;