[v11,4/4] kni: add IOVA=VA support in kernel module

Message ID 20191021080324.10659-5-vattunuru@marvell.com (mailing list archive)
State Superseded, archived
Delegated to: Ferruh Yigit
Headers
Series kni: add IOVA=VA mode support |

Checks

Context Check Description
ci/checkpatch success coding style OK
ci/Intel-compilation fail Compilation issues
ci/travis-robot success Travis build: passed

Commit Message

Vamsi Krishna Attunuru Oct. 21, 2019, 8:03 a.m. UTC
  From: Vamsi Attunuru <vattunuru@marvell.com>

Patch adds support for kernel module to work in
IOVA = VA mode by providing address translation
routines to convert IOVA aka user space VA to
kernel virtual addresses.

Signed-off-by: Vamsi Attunuru <vattunuru@marvell.com>
Signed-off-by: Kiran Kumar K <kirankumark@marvell.com>
---
 kernel/linux/kni/compat.h   |  4 ++++
 kernel/linux/kni/kni_dev.h  | 31 ++++++++++++++++++++++++
 kernel/linux/kni/kni_misc.c | 39 +++++++++++++++++++++++-------
 kernel/linux/kni/kni_net.c  | 58 +++++++++++++++++++++++++++++++++++----------
 4 files changed, 110 insertions(+), 22 deletions(-)
  

Comments

Ferruh Yigit Oct. 21, 2019, 12:02 p.m. UTC | #1
On 10/21/2019 9:03 AM, vattunuru@marvell.com wrote:
> From: Vamsi Attunuru <vattunuru@marvell.com>
> 
> Patch adds support for kernel module to work in
> IOVA = VA mode by providing address translation
> routines to convert IOVA aka user space VA to
> kernel virtual addresses.
> 
> Signed-off-by: Vamsi Attunuru <vattunuru@marvell.com>
> Signed-off-by: Kiran Kumar K <kirankumark@marvell.com>

<...>

> +static inline phys_addr_t iova_to_phys(struct task_struct *tsk,
> +				       unsigned long iova)
> +{
> +	unsigned int flags = FOLL_TOUCH;
> +	phys_addr_t offset, phys_addr;
> +	struct page *page = NULL;
> +	int ret;
> +
> +	offset = iova & (PAGE_SIZE - 1);
> +
> +	/* Read one page struct info */
> +	ret =  get_user_pages_remote(tsk, tsk->mm, iova, 1,
> +				     flags, &page, 0, 0);
> +	if (ret < 0)
> +		return 0;
> +
> +	phys_addr = page_to_phys(page) | offset;
> +	put_page(page);
> +
> +	return phys_addr;
> +}
> +
> +static inline void *iova_to_kva(struct task_struct *tsk, unsigned long iova)
> +{
> +	return phys_to_virt(iova_to_phys(tsk, iova));
> +}

Do you have any measurement for the performance affect of this change?

> +
>  void kni_net_release_fifo_phy(struct kni_dev *kni);
>  void kni_net_rx(struct kni_dev *kni);
>  void kni_net_init(struct net_device *dev);
> diff --git a/kernel/linux/kni/kni_misc.c b/kernel/linux/kni/kni_misc.c
> index 2b75502..7af7ab4 100644
> --- a/kernel/linux/kni/kni_misc.c
> +++ b/kernel/linux/kni/kni_misc.c
> @@ -348,15 +348,36 @@ kni_ioctl_create(struct net *net, uint32_t ioctl_num,
>  	strncpy(kni->name, dev_info.name, RTE_KNI_NAMESIZE);
>  
>  	/* Translate user space info into kernel space info */
> -	kni->tx_q = phys_to_virt(dev_info.tx_phys);
> -	kni->rx_q = phys_to_virt(dev_info.rx_phys);
> -	kni->alloc_q = phys_to_virt(dev_info.alloc_phys);
> -	kni->free_q = phys_to_virt(dev_info.free_phys);
> -
> -	kni->req_q = phys_to_virt(dev_info.req_phys);
> -	kni->resp_q = phys_to_virt(dev_info.resp_phys);
> -	kni->sync_va = dev_info.sync_va;
> -	kni->sync_kva = phys_to_virt(dev_info.sync_phys);
> +	if (dev_info.iova_mode) {
> +#ifdef HAVE_IOVA_TO_KVA_MAPPING_SUPPORT

Do you think is a runtime check required, for the case code has been compiled on
a box that has newer kernel but run in a box with older kernel?
Not sure about it myself either...

<...>

> @@ -62,6 +76,24 @@ kva2data_kva(struct rte_kni_mbuf *m)
>  	return phys_to_virt(m->buf_physaddr + m->data_off);
>  }
>  
> +static inline void *
> +get_kva(struct kni_dev *kni, void *pa)
> +{
> +	if (kni->iova_mode == 1)

This check done multiple times per packet, I wonder if this can be prevented,
again even with the original mode do you have any numbers related to the
performance affect of this check?

Thanks,
ferruh
  

Patch

diff --git a/kernel/linux/kni/compat.h b/kernel/linux/kni/compat.h
index 562d8bf..b5e8914 100644
--- a/kernel/linux/kni/compat.h
+++ b/kernel/linux/kni/compat.h
@@ -121,3 +121,7 @@ 
 #if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 11, 0)
 #define HAVE_SIGNAL_FUNCTIONS_OWN_HEADER
 #endif
+
+#if KERNEL_VERSION(4, 8, 0) <= LINUX_VERSION_CODE
+#define HAVE_IOVA_TO_KVA_MAPPING_SUPPORT
+#endif
diff --git a/kernel/linux/kni/kni_dev.h b/kernel/linux/kni/kni_dev.h
index c1ca678..abe9b14 100644
--- a/kernel/linux/kni/kni_dev.h
+++ b/kernel/linux/kni/kni_dev.h
@@ -41,6 +41,8 @@  struct kni_dev {
 	/* kni list */
 	struct list_head list;
 
+	uint8_t iova_mode;
+
 	uint32_t core_id;            /* Core ID to bind */
 	char name[RTE_KNI_NAMESIZE]; /* Network device name */
 	struct task_struct *pthread;
@@ -84,8 +86,37 @@  struct kni_dev {
 	void *va[MBUF_BURST_SZ];
 	void *alloc_pa[MBUF_BURST_SZ];
 	void *alloc_va[MBUF_BURST_SZ];
+
+	struct task_struct *usr_tsk;
 };
 
+static inline phys_addr_t iova_to_phys(struct task_struct *tsk,
+				       unsigned long iova)
+{
+	unsigned int flags = FOLL_TOUCH;
+	phys_addr_t offset, phys_addr;
+	struct page *page = NULL;
+	int ret;
+
+	offset = iova & (PAGE_SIZE - 1);
+
+	/* Read one page struct info */
+	ret =  get_user_pages_remote(tsk, tsk->mm, iova, 1,
+				     flags, &page, 0, 0);
+	if (ret < 0)
+		return 0;
+
+	phys_addr = page_to_phys(page) | offset;
+	put_page(page);
+
+	return phys_addr;
+}
+
+static inline void *iova_to_kva(struct task_struct *tsk, unsigned long iova)
+{
+	return phys_to_virt(iova_to_phys(tsk, iova));
+}
+
 void kni_net_release_fifo_phy(struct kni_dev *kni);
 void kni_net_rx(struct kni_dev *kni);
 void kni_net_init(struct net_device *dev);
diff --git a/kernel/linux/kni/kni_misc.c b/kernel/linux/kni/kni_misc.c
index 2b75502..7af7ab4 100644
--- a/kernel/linux/kni/kni_misc.c
+++ b/kernel/linux/kni/kni_misc.c
@@ -348,15 +348,36 @@  kni_ioctl_create(struct net *net, uint32_t ioctl_num,
 	strncpy(kni->name, dev_info.name, RTE_KNI_NAMESIZE);
 
 	/* Translate user space info into kernel space info */
-	kni->tx_q = phys_to_virt(dev_info.tx_phys);
-	kni->rx_q = phys_to_virt(dev_info.rx_phys);
-	kni->alloc_q = phys_to_virt(dev_info.alloc_phys);
-	kni->free_q = phys_to_virt(dev_info.free_phys);
-
-	kni->req_q = phys_to_virt(dev_info.req_phys);
-	kni->resp_q = phys_to_virt(dev_info.resp_phys);
-	kni->sync_va = dev_info.sync_va;
-	kni->sync_kva = phys_to_virt(dev_info.sync_phys);
+	if (dev_info.iova_mode) {
+#ifdef HAVE_IOVA_TO_KVA_MAPPING_SUPPORT
+		kni->tx_q = iova_to_kva(current, dev_info.tx_phys);
+		kni->rx_q = iova_to_kva(current, dev_info.rx_phys);
+		kni->alloc_q = iova_to_kva(current, dev_info.alloc_phys);
+		kni->free_q = iova_to_kva(current, dev_info.free_phys);
+
+		kni->req_q = iova_to_kva(current, dev_info.req_phys);
+		kni->resp_q = iova_to_kva(current, dev_info.resp_phys);
+		kni->sync_va = dev_info.sync_va;
+		kni->sync_kva = iova_to_kva(current, dev_info.sync_phys);
+		kni->usr_tsk = current;
+		kni->iova_mode = 1;
+#else
+		pr_err("KNI module does not support IOVA to VA translation\n");
+		return -EINVAL;
+#endif
+	} else {
+
+		kni->tx_q = phys_to_virt(dev_info.tx_phys);
+		kni->rx_q = phys_to_virt(dev_info.rx_phys);
+		kni->alloc_q = phys_to_virt(dev_info.alloc_phys);
+		kni->free_q = phys_to_virt(dev_info.free_phys);
+
+		kni->req_q = phys_to_virt(dev_info.req_phys);
+		kni->resp_q = phys_to_virt(dev_info.resp_phys);
+		kni->sync_va = dev_info.sync_va;
+		kni->sync_kva = phys_to_virt(dev_info.sync_phys);
+		kni->iova_mode = 0;
+	}
 
 	kni->mbuf_size = dev_info.mbuf_size;
 
diff --git a/kernel/linux/kni/kni_net.c b/kernel/linux/kni/kni_net.c
index f25b127..e95207b 100644
--- a/kernel/linux/kni/kni_net.c
+++ b/kernel/linux/kni/kni_net.c
@@ -36,6 +36,20 @@  static void kni_net_rx_normal(struct kni_dev *kni);
 /* kni rx function pointer, with default to normal rx */
 static kni_net_rx_t kni_net_rx_func = kni_net_rx_normal;
 
+/* iova to kernel virtual address */
+static inline void *
+iova2kva(struct kni_dev *kni, void *iova)
+{
+	return phys_to_virt(iova_to_phys(kni->usr_tsk, (unsigned long)iova));
+}
+
+static inline void *
+iova2data_kva(struct kni_dev *kni, struct rte_kni_mbuf *m)
+{
+	return phys_to_virt(iova_to_phys(kni->usr_tsk, m->buf_physaddr) +
+			    m->data_off);
+}
+
 /* physical address to kernel virtual address */
 static void *
 pa2kva(void *pa)
@@ -62,6 +76,24 @@  kva2data_kva(struct rte_kni_mbuf *m)
 	return phys_to_virt(m->buf_physaddr + m->data_off);
 }
 
+static inline void *
+get_kva(struct kni_dev *kni, void *pa)
+{
+	if (kni->iova_mode == 1)
+		return iova2kva(kni, pa);
+
+	return pa2kva(pa);
+}
+
+static inline void *
+get_data_kva(struct kni_dev *kni, void *pkt_kva)
+{
+	if (kni->iova_mode == 1)
+		return iova2data_kva(kni, pkt_kva);
+
+	return kva2data_kva(pkt_kva);
+}
+
 /*
  * It can be called to process the request.
  */
@@ -178,7 +210,7 @@  kni_fifo_trans_pa2va(struct kni_dev *kni,
 			return;
 
 		for (i = 0; i < num_rx; i++) {
-			kva = pa2kva(kni->pa[i]);
+			kva = get_kva(kni, kni->pa[i]);
 			kni->va[i] = pa2va(kni->pa[i], kva);
 
 			kva_nb_segs = kva->nb_segs;
@@ -266,8 +298,8 @@  kni_net_tx(struct sk_buff *skb, struct net_device *dev)
 	if (likely(ret == 1)) {
 		void *data_kva;
 
-		pkt_kva = pa2kva(pkt_pa);
-		data_kva = kva2data_kva(pkt_kva);
+		pkt_kva = get_kva(kni, pkt_pa);
+		data_kva = get_data_kva(kni, pkt_kva);
 		pkt_va = pa2va(pkt_pa, pkt_kva);
 
 		len = skb->len;
@@ -338,9 +370,9 @@  kni_net_rx_normal(struct kni_dev *kni)
 
 	/* Transfer received packets to netif */
 	for (i = 0; i < num_rx; i++) {
-		kva = pa2kva(kni->pa[i]);
+		kva = get_kva(kni, kni->pa[i]);
 		len = kva->pkt_len;
-		data_kva = kva2data_kva(kva);
+		data_kva = get_data_kva(kni, kva);
 		kni->va[i] = pa2va(kni->pa[i], kva);
 
 		skb = netdev_alloc_skb(dev, len);
@@ -437,9 +469,9 @@  kni_net_rx_lo_fifo(struct kni_dev *kni)
 		num = ret;
 		/* Copy mbufs */
 		for (i = 0; i < num; i++) {
-			kva = pa2kva(kni->pa[i]);
+			kva = get_kva(kni, kni->pa[i]);
 			len = kva->data_len;
-			data_kva = kva2data_kva(kva);
+			data_kva = get_data_kva(kni, kva);
 			kni->va[i] = pa2va(kni->pa[i], kva);
 
 			while (kva->next) {
@@ -449,8 +481,8 @@  kni_net_rx_lo_fifo(struct kni_dev *kni)
 				kva = next_kva;
 			}
 
-			alloc_kva = pa2kva(kni->alloc_pa[i]);
-			alloc_data_kva = kva2data_kva(alloc_kva);
+			alloc_kva = get_kva(kni, kni->alloc_pa[i]);
+			alloc_data_kva = get_data_kva(kni, alloc_kva);
 			kni->alloc_va[i] = pa2va(kni->alloc_pa[i], alloc_kva);
 
 			memcpy(alloc_data_kva, data_kva, len);
@@ -517,9 +549,9 @@  kni_net_rx_lo_fifo_skb(struct kni_dev *kni)
 
 	/* Copy mbufs to sk buffer and then call tx interface */
 	for (i = 0; i < num; i++) {
-		kva = pa2kva(kni->pa[i]);
+		kva = get_kva(kni, kni->pa[i]);
 		len = kva->pkt_len;
-		data_kva = kva2data_kva(kva);
+		data_kva = get_data_kva(kni, kva);
 		kni->va[i] = pa2va(kni->pa[i], kva);
 
 		skb = netdev_alloc_skb(dev, len);
@@ -550,8 +582,8 @@  kni_net_rx_lo_fifo_skb(struct kni_dev *kni)
 					break;
 
 				prev_kva = kva;
-				kva = pa2kva(kva->next);
-				data_kva = kva2data_kva(kva);
+				kva = get_kva(kni, kva->next);
+				data_kva = get_data_kva(kni, kva);
 				/* Convert physical address to virtual address */
 				prev_kva->next = pa2va(prev_kva->next, kva);
 			}