@@ -300,6 +300,32 @@ The sk_buff is then freed and the mbuf sent in the tx_q FIFO.
The DPDK TX thread dequeues the mbuf and sends it to the PMD via ``rte_eth_tx_burst()``.
It then puts the mbuf back in the cache.
+IOVA = VA: Support
+------------------
+
+KNI can be operated in IOVA_VA scheme when
+
+- LINUX_VERSION_CODE >= KERNEL_VERSION(4, 8, 0) and
+- eal option `iova-mode=va` is passed or bus IOVA scheme in the DPDK is selected
+ as RTE_IOVA_VA.
+
+Packet Pool APIs for IOVA=VA mode
+---------------------------------
+
+``rte_kni_pktmbuf_pool_create`` and ``rte_kni_pktmbuf_pool_free`` APIs need to
+be used for creating packet pools for running KNI applications in IOVA=VA mode.
+Packet pool's mbuf memory should be physically contiguous for the KNI kernel
+module to work in IOVA=VA mode, this memory requirement was taken care inside
+those KNI packet pool create APIs.
+
+Command-line option for legacy KNI
+----------------------------------
+
+Existing KNI applications can use ``--legacy-kni`` eal command-line option to
+work with DPDK 19.11 and later versions. When this option is selected, IOVA mode
+will be forced to PA mode to enable old KNI applications work with the latest
+DPDK without code changes.
+
Ethtool
-------
@@ -120,6 +120,19 @@ New Features
Added EAL option ``--legacy-kni`` to make existing KNI applications work
with DPDK 19.11 and later.
+* **Added IOVA as VA support for KNI.**
+
+ Added IOVA as VA support for KNI. When KNI needs to operate in IOVA = VA
+ mode, packet pool's mbuf memory should be physically contiguous. This memory
+ requirement taken care using the new ``rte_kni_pktmbuf_pool_create`` and
+ ``rte_kni_pktmbuf_pool_free`` routines.
+
+ The ``examples/kni/`` updated to use this API to enable IOVA as VA or
+ IOVA as PA mode.
+
+ When "--legacy-kni" selected, IOVA mode will be forced to PA mode to enable
+ old KNI application work with the latest DPDK without code changes.
+
Removed Items
-------------
@@ -1017,8 +1017,9 @@ main(int argc, char** argv)
rte_exit(EXIT_FAILURE, "Could not parse input parameters\n");
/* Create the mbuf pool */
- pktmbuf_pool = rte_pktmbuf_pool_create("mbuf_pool", NB_MBUF,
+ pktmbuf_pool = rte_kni_pktmbuf_pool_create("mbuf_pool", NB_MBUF,
MEMPOOL_CACHE_SZ, 0, MBUF_DATA_SZ, rte_socket_id());
+
if (pktmbuf_pool == NULL) {
rte_exit(EXIT_FAILURE, "Could not initialise mbuf pool\n");
return -1;
@@ -1085,6 +1086,9 @@ main(int argc, char** argv)
continue;
kni_free_kni(port);
}
+
+ rte_kni_pktmbuf_pool_free(pktmbuf_pool);
+
for (i = 0; i < RTE_MAX_ETHPORTS; i++)
if (kni_port_params_array[i]) {
rte_free(kni_port_params_array[i]);
@@ -947,6 +947,29 @@ static int rte_eal_vfio_setup(void)
}
#endif
+static enum rte_iova_mode
+rte_eal_kni_get_iova_mode(enum rte_iova_mode iova_mode)
+{
+ if (iova_mode == RTE_IOVA_PA)
+ goto exit;
+
+ if (internal_config.legacy_kni) {
+ iova_mode = RTE_IOVA_PA;
+ RTE_LOG(WARNING, EAL, "Forcing IOVA as 'PA' because legacy KNI is enabled\n");
+ goto exit;
+ }
+
+ if (iova_mode == RTE_IOVA_VA) {
+#if KERNEL_VERSION(4, 8, 0) > LINUX_VERSION_CODE
+ iova_mode = RTE_IOVA_PA;
+ RTE_LOG(WARNING, EAL, "Forcing IOVA as 'PA' because KNI module does not support VA\n");
+#endif
+ }
+
+exit:
+ return iova_mode;
+}
+
static void rte_eal_init_alert(const char *msg)
{
fprintf(stderr, "EAL: FATAL: %s\n", msg);
@@ -1110,24 +1133,16 @@ rte_eal_init(int argc, char **argv)
RTE_LOG(DEBUG, EAL, "IOMMU is not available, selecting IOVA as PA mode.\n");
}
}
-#ifdef RTE_LIBRTE_KNI
- /* Workaround for KNI which requires physical address to work */
- if (iova_mode == RTE_IOVA_VA &&
- rte_eal_check_module("rte_kni") == 1) {
- if (phys_addrs) {
- iova_mode = RTE_IOVA_PA;
- RTE_LOG(WARNING, EAL, "Forcing IOVA as 'PA' because KNI module is loaded\n");
- } else {
- RTE_LOG(DEBUG, EAL, "KNI can not work since physical addresses are unavailable\n");
- }
- }
-#endif
rte_eal_get_configuration()->iova_mode = iova_mode;
} else {
rte_eal_get_configuration()->iova_mode =
internal_config.iova_mode;
}
+ if (rte_eal_check_module("rte_kni") == 1)
+ rte_eal_get_configuration()->iova_mode =
+ rte_eal_kni_get_iova_mode(rte_eal_iova_mode());
+
if (rte_eal_iova_mode() == RTE_IOVA_PA && !phys_addrs) {
rte_eal_init_alert("Cannot use IOVA as 'PA' since physical addresses are not available");
rte_errno = EINVAL;
@@ -123,6 +123,7 @@ struct rte_kni_device_info {
unsigned mbuf_size;
unsigned int mtu;
uint8_t mac_addr[6];
+ uint8_t iova_mode;
};
#define KNI_DEVICE "kni"
@@ -6,6 +6,7 @@ include $(RTE_SDK)/mk/rte.vars.mk
# library name
LIB = librte_kni.a
+CFLAGS += -DALLOW_EXPERIMENTAL_API
CFLAGS += $(WERROR_FLAGS) -I$(SRCDIR) -O3 -fno-strict-aliasing
LDLIBS += -lrte_eal -lrte_mempool -lrte_mbuf -lrte_ethdev
@@ -1,6 +1,7 @@
# SPDX-License-Identifier: BSD-3-Clause
# Copyright(c) 2017 Intel Corporation
+allow_experimental_apis = true
if not is_linux or not dpdk_conf.get('RTE_ARCH_64')
build = false
reason = 'only supported on 64-bit linux'
@@ -21,6 +21,7 @@
#include <rte_tailq.h>
#include <rte_rwlock.h>
#include <rte_eal_memconfig.h>
+#include <rte_mbuf_pool_ops.h>
#include <rte_kni_common.h>
#include "rte_kni_fifo.h"
@@ -97,11 +98,6 @@ static volatile int kni_fd = -1;
int
rte_kni_init(unsigned int max_kni_ifaces __rte_unused)
{
- if (rte_eal_iova_mode() != RTE_IOVA_PA) {
- RTE_LOG(ERR, KNI, "KNI requires IOVA as PA\n");
- return -1;
- }
-
/* Check FD and open */
if (kni_fd < 0) {
kni_fd = open("/dev/" KNI_DEVICE, O_RDWR);
@@ -300,6 +296,8 @@ rte_kni_alloc(struct rte_mempool *pktmbuf_pool,
kni->group_id = conf->group_id;
kni->mbuf_size = conf->mbuf_size;
+ dev_info.iova_mode = (rte_eal_iova_mode() == RTE_IOVA_VA) ? 1 : 0;
+
ret = ioctl(kni_fd, RTE_KNI_IOCTL_CREATE, &dev_info);
if (ret < 0)
goto ioctl_fail;
@@ -687,6 +685,65 @@ kni_allocate_mbufs(struct rte_kni *kni)
}
}
+struct rte_mempool *
+rte_kni_pktmbuf_pool_create(const char *name, unsigned int n,
+ unsigned int cache_size, uint16_t priv_size, uint16_t data_room_size,
+ int socket_id)
+{
+ struct rte_pktmbuf_pool_private mbp_priv;
+ const char *mp_ops_name;
+ struct rte_mempool *mp;
+ unsigned int elt_size;
+ int ret;
+
+ if (RTE_ALIGN(priv_size, RTE_MBUF_PRIV_ALIGN) != priv_size) {
+ RTE_LOG(ERR, MBUF, "mbuf priv_size=%u is not aligned\n",
+ priv_size);
+ rte_errno = EINVAL;
+ return NULL;
+ }
+ elt_size = sizeof(struct rte_mbuf) + (unsigned int)priv_size +
+ (unsigned int)data_room_size;
+ mbp_priv.mbuf_data_room_size = data_room_size;
+ mbp_priv.mbuf_priv_size = priv_size;
+
+ mp = rte_mempool_create_empty(name, n, elt_size, cache_size,
+ sizeof(struct rte_pktmbuf_pool_private), socket_id, 0);
+ if (mp == NULL)
+ return NULL;
+
+ mp_ops_name = rte_mbuf_best_mempool_ops();
+ ret = rte_mempool_set_ops_byname(mp, mp_ops_name, NULL);
+ if (ret != 0) {
+ RTE_LOG(ERR, MBUF, "error setting mempool handler\n");
+ rte_mempool_free(mp);
+ rte_errno = -ret;
+ return NULL;
+ }
+ rte_pktmbuf_pool_init(mp, &mbp_priv);
+
+ if (rte_eal_iova_mode() == RTE_IOVA_VA)
+ ret = rte_mempool_populate_from_pg_sz_chunks(mp);
+ else
+ ret = rte_mempool_populate_default(mp);
+
+ if (ret < 0) {
+ rte_mempool_free(mp);
+ rte_errno = -ret;
+ return NULL;
+ }
+
+ rte_mempool_obj_iter(mp, rte_pktmbuf_init, NULL);
+
+ return mp;
+}
+
+void
+rte_kni_pktmbuf_pool_free(struct rte_mempool *mp)
+{
+ rte_mempool_free(mp);
+}
+
struct rte_kni *
rte_kni_get(const char *name)
{
@@ -187,6 +187,54 @@ unsigned rte_kni_tx_burst(struct rte_kni *kni, struct rte_mbuf **mbufs,
unsigned num);
/**
+ * Create a kni packet mbuf pool.
+ *
+ * This function creates and initializes a packet mbuf pool for KNI applications
+ * It calls the required mempool populate routine based on the IOVA mode.
+ *
+ * @param name
+ * The name of the mbuf pool.
+ * @param n
+ * The number of elements in the mbuf pool. The optimum size (in terms
+ * of memory usage) for a mempool is when n is a power of two minus one:
+ * n = (2^q - 1).
+ * @param cache_size
+ * Size of the per-core object cache. See rte_mempool_create() for
+ * details.
+ * @param priv_size
+ * Size of application private are between the rte_mbuf structure
+ * and the data buffer. This value must be aligned to RTE_MBUF_PRIV_ALIGN.
+ * @param data_room_size
+ * Size of data buffer in each mbuf, including RTE_PKTMBUF_HEADROOM.
+ * @param socket_id
+ * The socket identifier where the memory should be allocated. The
+ * value can be *SOCKET_ID_ANY* if there is no NUMA constraint for the
+ * reserved zone.
+ * @return
+ * The pointer to the new allocated mempool, on success. NULL on error
+ * with rte_errno set appropriately. Possible rte_errno values include:
+ * - E_RTE_NO_CONFIG - function could not get pointer to rte_config structure
+ * - E_RTE_SECONDARY - function was called from a secondary process instance
+ * - EINVAL - cache size provided is too large, or priv_size is not aligned.
+ * - ENOSPC - the maximum number of memzones has already been allocated
+ * - EEXIST - a memzone with the same name already exists
+ * - ENOMEM - no appropriate memory area found in which to create memzone
+ */
+__rte_experimental
+struct rte_mempool *rte_kni_pktmbuf_pool_create(const char *name,
+ unsigned int n, unsigned int cache_size, uint16_t priv_size,
+ uint16_t data_room_size, int socket_id);
+
+/**
+ * Free the given packet mempool.
+ *
+ * @param mp
+ * The mempool pointer.
+ */
+__rte_experimental
+void rte_kni_pktmbuf_pool_free(struct rte_mempool *mp);
+
+/**
* Get the KNI context of its name.
*
* @param name
@@ -19,5 +19,8 @@ DPDK_2.0 {
EXPERIMENTAL {
global:
+ # added in 19.11
rte_kni_update_link;
+ rte_kni_pktmbuf_pool_create;
+ rte_kni_pktmbuf_pool_free;
};