[dpdk-dev,07/21] vhost: add iotlb helper functions

Message ID 20170831095023.21037-8-maxime.coquelin@redhat.com (mailing list archive)
State Superseded, archived
Delegated to: Yuanhan Liu
Headers

Checks

Context Check Description
ci/checkpatch success coding style OK
ci/Intel-compilation success Compilation OK

Commit Message

Maxime Coquelin Aug. 31, 2017, 9:50 a.m. UTC
  Signed-off-by: Maxime Coquelin <maxime.coquelin@redhat.com>
---
 lib/librte_vhost/Makefile |   4 +-
 lib/librte_vhost/iotlb.c  | 231 ++++++++++++++++++++++++++++++++++++++++++++++
 lib/librte_vhost/iotlb.h  |  46 +++++++++
 lib/librte_vhost/vhost.c  |   1 +
 lib/librte_vhost/vhost.h  |   5 +
 5 files changed, 285 insertions(+), 2 deletions(-)
 create mode 100644 lib/librte_vhost/iotlb.c
 create mode 100644 lib/librte_vhost/iotlb.h
  

Comments

Tiwei Bie Sept. 5, 2017, 6:02 a.m. UTC | #1
On Thu, Aug 31, 2017 at 11:50:09AM +0200, Maxime Coquelin wrote:
[...]
> +
> +#define IOTLB_CACHE_SIZE 1024
> +
> +static void vhost_user_iotlb_cache_remove_all(struct vhost_virtqueue *vq)
> +{
> +	struct vhost_iotlb_entry *node, *temp_node;
> +
> +	rte_rwlock_write_lock(&vq->iotlb_lock);
> +
> +	TAILQ_FOREACH_SAFE(node, &vq->iotlb_list, next, temp_node) {
> +		TAILQ_REMOVE(&vq->iotlb_list, node, next);
> +		rte_mempool_put(vq->iotlb_pool, node);
> +	}
> +
> +	rte_rwlock_write_unlock(&vq->iotlb_lock);
> +}
> +
> +void vhost_user_iotlb_cache_insert(struct vhost_virtqueue *vq, uint64_t iova,
> +				uint64_t uaddr, uint64_t size, uint8_t perm)
> +{
> +	struct vhost_iotlb_entry *node, *new_node;
> +	int ret;
> +
> +	ret = rte_mempool_get(vq->iotlb_pool, (void **)&new_node);
> +	if (ret) {
> +		RTE_LOG(ERR, VHOST_CONFIG, "IOTLB pool empty, invalidate cache\n");

I think the log level should be DEBUG or INFO or the likes, not ERR.

> +		vhost_user_iotlb_cache_remove_all(vq);
> +		ret = rte_mempool_get(vq->iotlb_pool, (void **)&new_node);
> +		if (ret) {
> +			RTE_LOG(ERR, VHOST_CONFIG, "IOTLB pool still empty, failure\n");
> +			return;
> +		}
> +	}
> +
[...]
> +
> +void vhost_user_iotlb_cache_remove(struct vhost_virtqueue *vq,
> +					uint64_t iova, uint64_t size)
> +{
> +	struct vhost_iotlb_entry *node, *temp_node;
> +
> +	if (unlikely(!size))
> +		return;
> +
> +	rte_rwlock_write_lock(&vq->iotlb_lock);
> +
> +	TAILQ_FOREACH_SAFE(node, &vq->iotlb_list, next, temp_node) {
> +		/* Sorted list */
> +		if (unlikely(node->iova >= iova + size)) {
> +			break;
> +		} else if ((node->iova < iova + size) &&
> +					(iova < node->iova + node->size)) {

The `else' can be removed.
And the check of (node->iova < iova + size) can also be removed.

> +			TAILQ_REMOVE(&vq->iotlb_list, node, next);
> +			rte_mempool_put(vq->iotlb_pool, node);
> +			continue;
> +		}
> +	}
> +
> +	rte_rwlock_write_unlock(&vq->iotlb_lock);
> +}
> +
> +

Only one empty line is needed here.

> +uint64_t vhost_user_iotlb_cache_find(struct vhost_virtqueue *vq, uint64_t iova,
> +						uint64_t *size, uint8_t perm)
> +{
[...]
> +#ifndef _VHOST_IOTLB_H_
> +#define _VHOST_IOTLB_H_
> +
> +#include "vhost.h"
> +void vhost_user_iotlb_cache_insert(struct vhost_virtqueue *vq, uint64_t iova,
> +					uint64_t uaddr, uint64_t size,
> +					uint8_t perm);

An empty line should be added after #include "vhost.h".

Best regards,
Tiwei Bie
  
Maxime Coquelin Sept. 5, 2017, 3:16 p.m. UTC | #2
On 09/05/2017 08:02 AM, Tiwei Bie wrote:
> On Thu, Aug 31, 2017 at 11:50:09AM +0200, Maxime Coquelin wrote:
> [...]
>> +
>> +#define IOTLB_CACHE_SIZE 1024
>> +
>> +static void vhost_user_iotlb_cache_remove_all(struct vhost_virtqueue *vq)
>> +{
>> +	struct vhost_iotlb_entry *node, *temp_node;
>> +
>> +	rte_rwlock_write_lock(&vq->iotlb_lock);
>> +
>> +	TAILQ_FOREACH_SAFE(node, &vq->iotlb_list, next, temp_node) {
>> +		TAILQ_REMOVE(&vq->iotlb_list, node, next);
>> +		rte_mempool_put(vq->iotlb_pool, node);
>> +	}
>> +
>> +	rte_rwlock_write_unlock(&vq->iotlb_lock);
>> +}
>> +
>> +void vhost_user_iotlb_cache_insert(struct vhost_virtqueue *vq, uint64_t iova,
>> +				uint64_t uaddr, uint64_t size, uint8_t perm)
>> +{
>> +	struct vhost_iotlb_entry *node, *new_node;
>> +	int ret;
>> +
>> +	ret = rte_mempool_get(vq->iotlb_pool, (void **)&new_node);
>> +	if (ret) {
>> +		RTE_LOG(ERR, VHOST_CONFIG, "IOTLB pool empty, invalidate cache\n");
> 
> I think the log level should be DEBUG or INFO or the likes, not ERR.

I set it to ERR because failing in this case would mean that either the 
IOTLB cache has not been sized large enough and we would like it to be
reported as it would have an impact on performance and drop rate, or
that we have a malicious or buggy guest.

I'm fine with changing it to INFO though.

>> +		vhost_user_iotlb_cache_remove_all(vq);
>> +		ret = rte_mempool_get(vq->iotlb_pool, (void **)&new_node);
>> +		if (ret) {
>> +			RTE_LOG(ERR, VHOST_CONFIG, "IOTLB pool still empty, failure\n");
>> +			return;
>> +		}
>> +	}
>> +
> [...]
>> +
>> +void vhost_user_iotlb_cache_remove(struct vhost_virtqueue *vq,
>> +					uint64_t iova, uint64_t size)
>> +{
>> +	struct vhost_iotlb_entry *node, *temp_node;
>> +
>> +	if (unlikely(!size))
>> +		return;
>> +
>> +	rte_rwlock_write_lock(&vq->iotlb_lock);
>> +
>> +	TAILQ_FOREACH_SAFE(node, &vq->iotlb_list, next, temp_node) {
>> +		/* Sorted list */
>> +		if (unlikely(node->iova >= iova + size)) {
>> +			break;
>> +		} else if ((node->iova < iova + size) &&
>> +					(iova < node->iova + node->size)) {
> 
> The `else' can be removed.
> And the check of (node->iova < iova + size) can also be removed.

Right! Fixed.

>> +			TAILQ_REMOVE(&vq->iotlb_list, node, next);
>> +			rte_mempool_put(vq->iotlb_pool, node);
>> +			continue;
>> +		}
>> +	}
>> +
>> +	rte_rwlock_write_unlock(&vq->iotlb_lock);
>> +}
>> +
>> +
> 
> Only one empty line is needed here.

Fixed.

>> +uint64_t vhost_user_iotlb_cache_find(struct vhost_virtqueue *vq, uint64_t iova,
>> +						uint64_t *size, uint8_t perm)
>> +{
> [...]
>> +#ifndef _VHOST_IOTLB_H_
>> +#define _VHOST_IOTLB_H_
>> +
>> +#include "vhost.h"
>> +void vhost_user_iotlb_cache_insert(struct vhost_virtqueue *vq, uint64_t iova,
>> +					uint64_t uaddr, uint64_t size,
>> +					uint8_t perm);
> 
> An empty line should be added after #include "vhost.h".
Fixed.

Thanks,
Maxime

> Best regards,
> Tiwei Bie
>
  
Yuanhan Liu Sept. 8, 2017, 8:08 a.m. UTC | #3
On Thu, Aug 31, 2017 at 11:50:09AM +0200, Maxime Coquelin wrote:
> diff --git a/lib/librte_vhost/iotlb.c b/lib/librte_vhost/iotlb.c
> new file mode 100644
> index 000000000..1b739dae5
> --- /dev/null
> +++ b/lib/librte_vhost/iotlb.c
> @@ -0,0 +1,231 @@
> +/*-
> + *   BSD LICENSE
> + *
> + *   Copyright (c) 2017 Red Hat, Inc.
> + *   Copyright (c) 2017 Maxime Coquelin <maxime.coquelin@redhat.com>

I'm not a lawer, but I have been told many years before, that you don't
have the copyright for the code you write for open source project, the
company you work for does.

Thus, it's more common to see something like following:
	Copyright , ... the commany ...
	Author:  Some One <...@...>

However, as you may have noticed, it's not common to put the authorship
in the source files. Though I don't object it.

[...]
> +#define IOTLB_CACHE_SIZE 1024
> +
> +static void vhost_user_iotlb_cache_remove_all(struct vhost_virtqueue *vq)
   ^^^^^^^^^^^^
Note that it's not the DPDK coding style to define a function.

> +{
> +	struct vhost_iotlb_entry *node, *temp_node;
> +
> +	rte_rwlock_write_lock(&vq->iotlb_lock);
> +
> +	TAILQ_FOREACH_SAFE(node, &vq->iotlb_list, next, temp_node) {
> +		TAILQ_REMOVE(&vq->iotlb_list, node, next);
> +		rte_mempool_put(vq->iotlb_pool, node);
> +	}
> +
> +	rte_rwlock_write_unlock(&vq->iotlb_lock);
> +}
> +
> +void vhost_user_iotlb_cache_insert(struct vhost_virtqueue *vq, uint64_t iova,
> +				uint64_t uaddr, uint64_t size, uint8_t perm)
> +{
> +	struct vhost_iotlb_entry *node, *new_node;
> +	int ret;
> +
> +	ret = rte_mempool_get(vq->iotlb_pool, (void **)&new_node);
> +	if (ret) {
> +		RTE_LOG(ERR, VHOST_CONFIG, "IOTLB pool empty, invalidate cache\n");

It's a cache, why not considering remove one to get space for new one?

> +		vhost_user_iotlb_cache_remove_all(vq);
> +		ret = rte_mempool_get(vq->iotlb_pool, (void **)&new_node);
> +		if (ret) {
> +			RTE_LOG(ERR, VHOST_CONFIG, "IOTLB pool still empty, failure\n");
> +			return;
> +		}
> +	}
> +
> +	new_node->iova = iova;
> +	new_node->uaddr = uaddr;
> +	new_node->size = size;
> +	new_node->perm = perm;
> +
> +	rte_rwlock_write_lock(&vq->iotlb_lock);
> +
> +	TAILQ_FOREACH(node, &vq->iotlb_list, next) {
> +		/*
> +		 * Entries must be invalidated before being updated.
> +		 * So if iova already in list, assume identical.
> +		 */
> +		if (node->iova == new_node->iova) {
> +			rte_mempool_put(vq->iotlb_pool, new_node);
> +			goto unlock;
> +		} else if (node->iova > new_node->iova) {
> +			TAILQ_INSERT_BEFORE(node, new_node, next);
> +			goto unlock;
> +		}
> +	}
> +
> +	TAILQ_INSERT_TAIL(&vq->iotlb_list, new_node, next);
> +
> +unlock:
> +	rte_rwlock_write_unlock(&vq->iotlb_lock);
> +}
> +
> +void vhost_user_iotlb_cache_remove(struct vhost_virtqueue *vq,
> +					uint64_t iova, uint64_t size)
> +{
> +	struct vhost_iotlb_entry *node, *temp_node;
> +
> +	if (unlikely(!size))
> +		return;
> +
> +	rte_rwlock_write_lock(&vq->iotlb_lock);
> +
> +	TAILQ_FOREACH_SAFE(node, &vq->iotlb_list, next, temp_node) {
> +		/* Sorted list */

I'd like to put such comments at the struct declartion, so that you don't
have to mention it many times that it's a sorted list.

> +		if (unlikely(node->iova >= iova + size)) {
> +			break;
> +		} else if ((node->iova < iova + size) &&
> +					(iova < node->iova + node->size)) {
> +			TAILQ_REMOVE(&vq->iotlb_list, node, next);
> +			rte_mempool_put(vq->iotlb_pool, node);
> +			continue;
> +		}
> +	}
> +
> +	rte_rwlock_write_unlock(&vq->iotlb_lock);
> +}
> +
[...]
> +int vhost_user_iotlb_init(struct virtio_net *dev, int vq_index)
> +{
> +	char pool_name[RTE_MEMPOOL_NAMESIZE];
> +	struct vhost_virtqueue *vq = dev->virtqueue[vq_index];
> +	int ret = -1, socket;
> +
> +	if (vq->iotlb_pool) {
> +		/*
> +		 * The cache has already been initialized,
> +		 * just drop all entries
> +		 */
> +		vhost_user_iotlb_cache_remove_all(vq);
> +		return 0;
> +	}
> +
> +#ifdef RTE_LIBRTE_VHOST_NUMA
> +	ret = get_mempolicy(&socket, NULL, 0, vq, MPOL_F_NODE | MPOL_F_ADDR);
> +#endif
> +	if (ret)
> +		socket = 0;
> +
> +	rte_rwlock_init(&vq->iotlb_lock);
> +
> +	TAILQ_INIT(&vq->iotlb_list);
> +
> +	snprintf(pool_name, sizeof(pool_name), "iotlb_cache_%d_%d",
> +			dev->vid, vq_index);

iotlb_cache is too generic. Adding a "vhost" prefix?

	--yliu
  
Maxime Coquelin Sept. 8, 2017, 8:24 a.m. UTC | #4
On 09/08/2017 10:08 AM, Yuanhan Liu wrote:
> On Thu, Aug 31, 2017 at 11:50:09AM +0200, Maxime Coquelin wrote:
>> diff --git a/lib/librte_vhost/iotlb.c b/lib/librte_vhost/iotlb.c
>> new file mode 100644
>> index 000000000..1b739dae5
>> --- /dev/null
>> +++ b/lib/librte_vhost/iotlb.c
>> @@ -0,0 +1,231 @@
>> +/*-
>> + *   BSD LICENSE
>> + *
>> + *   Copyright (c) 2017 Red Hat, Inc.
>> + *   Copyright (c) 2017 Maxime Coquelin <maxime.coquelin@redhat.com>
> 
> I'm not a lawer, but I have been told many years before, that you don't
> have the copyright for the code you write for open source project, the
> company you work for does.
> 
> Thus, it's more common to see something like following:
> 	Copyright , ... the commany ...
> 	Author:  Some One <...@...>
> 
> However, as you may have noticed, it's not common to put the authorship
> in the source files. Though I don't object it.

I'm not a lawyer too. At least in other projects, it seems common the
author puts his name as copyright owner.

I have no issue to change it to only keep Red Hat's one though.

> [...]
>> +#define IOTLB_CACHE_SIZE 1024
>> +
>> +static void vhost_user_iotlb_cache_remove_all(struct vhost_virtqueue *vq)
>     ^^^^^^^^^^^^
> Note that it's not the DPDK coding style to define a function.

Ok, I guess you mean:
static void
vhost_user_iotlb_cache_remove_all(struct vhost_virtqueue *vq) ?

>> +{
>> +	struct vhost_iotlb_entry *node, *temp_node;
>> +
>> +	rte_rwlock_write_lock(&vq->iotlb_lock);
>> +
>> +	TAILQ_FOREACH_SAFE(node, &vq->iotlb_list, next, temp_node) {
>> +		TAILQ_REMOVE(&vq->iotlb_list, node, next);
>> +		rte_mempool_put(vq->iotlb_pool, node);
>> +	}
>> +
>> +	rte_rwlock_write_unlock(&vq->iotlb_lock);
>> +}
>> +
>> +void vhost_user_iotlb_cache_insert(struct vhost_virtqueue *vq, uint64_t iova,
>> +				uint64_t uaddr, uint64_t size, uint8_t perm)
>> +{
>> +	struct vhost_iotlb_entry *node, *new_node;
>> +	int ret;
>> +
>> +	ret = rte_mempool_get(vq->iotlb_pool, (void **)&new_node);
>> +	if (ret) {
>> +		RTE_LOG(ERR, VHOST_CONFIG, "IOTLB pool empty, invalidate cache\n");
> 
> It's a cache, why not considering remove one to get space for new one?

It would mean having to track every lookups not to remove hot entries,
which would have an impact on performance.

Moreover, the idea is to have the cache large enough, else you could
face packet drops due to random cache misses.

We might consider to improve it, but I consider it an optimization that
could be implemented later if needed.

>> +		vhost_user_iotlb_cache_remove_all(vq);
>> +		ret = rte_mempool_get(vq->iotlb_pool, (void **)&new_node);
>> +		if (ret) {
>> +			RTE_LOG(ERR, VHOST_CONFIG, "IOTLB pool still empty, failure\n");
>> +			return;
>> +		}
>> +	}
>> +
>> +	new_node->iova = iova;
>> +	new_node->uaddr = uaddr;
>> +	new_node->size = size;
>> +	new_node->perm = perm;
>> +
>> +	rte_rwlock_write_lock(&vq->iotlb_lock);
>> +
>> +	TAILQ_FOREACH(node, &vq->iotlb_list, next) {
>> +		/*
>> +		 * Entries must be invalidated before being updated.
>> +		 * So if iova already in list, assume identical.
>> +		 */
>> +		if (node->iova == new_node->iova) {
>> +			rte_mempool_put(vq->iotlb_pool, new_node);
>> +			goto unlock;
>> +		} else if (node->iova > new_node->iova) {
>> +			TAILQ_INSERT_BEFORE(node, new_node, next);
>> +			goto unlock;
>> +		}
>> +	}
>> +
>> +	TAILQ_INSERT_TAIL(&vq->iotlb_list, new_node, next);
>> +
>> +unlock:
>> +	rte_rwlock_write_unlock(&vq->iotlb_lock);
>> +}
>> +
>> +void vhost_user_iotlb_cache_remove(struct vhost_virtqueue *vq,
>> +					uint64_t iova, uint64_t size)
>> +{
>> +	struct vhost_iotlb_entry *node, *temp_node;
>> +
>> +	if (unlikely(!size))
>> +		return;
>> +
>> +	rte_rwlock_write_lock(&vq->iotlb_lock);
>> +
>> +	TAILQ_FOREACH_SAFE(node, &vq->iotlb_list, next, temp_node) {
>> +		/* Sorted list */
> 
> I'd like to put such comments at the struct declartion, so that you don't
> have to mention it many times that it's a sorted list.

Ok, I'll comment directly in struct declaration.

>> +		if (unlikely(node->iova >= iova + size)) {
>> +			break;
>> +		} else if ((node->iova < iova + size) &&
>> +					(iova < node->iova + node->size)) {
>> +			TAILQ_REMOVE(&vq->iotlb_list, node, next);
>> +			rte_mempool_put(vq->iotlb_pool, node);
>> +			continue;
>> +		}
>> +	}
>> +
>> +	rte_rwlock_write_unlock(&vq->iotlb_lock);
>> +}
>> +
> [...]
>> +int vhost_user_iotlb_init(struct virtio_net *dev, int vq_index)
>> +{
>> +	char pool_name[RTE_MEMPOOL_NAMESIZE];
>> +	struct vhost_virtqueue *vq = dev->virtqueue[vq_index];
>> +	int ret = -1, socket;
>> +
>> +	if (vq->iotlb_pool) {
>> +		/*
>> +		 * The cache has already been initialized,
>> +		 * just drop all entries
>> +		 */
>> +		vhost_user_iotlb_cache_remove_all(vq);
>> +		return 0;
>> +	}
>> +
>> +#ifdef RTE_LIBRTE_VHOST_NUMA
>> +	ret = get_mempolicy(&socket, NULL, 0, vq, MPOL_F_NODE | MPOL_F_ADDR);
>> +#endif
>> +	if (ret)
>> +		socket = 0;
>> +
>> +	rte_rwlock_init(&vq->iotlb_lock);
>> +
>> +	TAILQ_INIT(&vq->iotlb_list);
>> +
>> +	snprintf(pool_name, sizeof(pool_name), "iotlb_cache_%d_%d",
>> +			dev->vid, vq_index);
> 
> iotlb_cache is too generic. Adding a "vhost" prefix?

Sure, that would be better.

Thanks,
Maxime

> 	--yliu
>
  
Yuanhan Liu Sept. 8, 2017, 8:36 a.m. UTC | #5
On Fri, Sep 08, 2017 at 10:24:58AM +0200, Maxime Coquelin wrote:
> 
> 
> On 09/08/2017 10:08 AM, Yuanhan Liu wrote:
> >On Thu, Aug 31, 2017 at 11:50:09AM +0200, Maxime Coquelin wrote:
> >>diff --git a/lib/librte_vhost/iotlb.c b/lib/librte_vhost/iotlb.c
> >>new file mode 100644
> >>index 000000000..1b739dae5
> >>--- /dev/null
> >>+++ b/lib/librte_vhost/iotlb.c
> >>@@ -0,0 +1,231 @@
> >>+/*-
> >>+ *   BSD LICENSE
> >>+ *
> >>+ *   Copyright (c) 2017 Red Hat, Inc.
> >>+ *   Copyright (c) 2017 Maxime Coquelin <maxime.coquelin@redhat.com>
> >
> >I'm not a lawer, but I have been told many years before, that you don't
> >have the copyright for the code you write for open source project, the
> >company you work for does.
> >
> >Thus, it's more common to see something like following:
> >	Copyright , ... the commany ...
> >	Author:  Some One <...@...>
> >
> >However, as you may have noticed, it's not common to put the authorship
> >in the source files. Though I don't object it.
> 
> I'm not a lawyer too. At least in other projects, it seems common the
> author puts his name as copyright owner.
> 
> I have no issue to change it to only keep Red Hat's one though.

That's up to you. What I said before was JFYI :)

> >[...]
> >>+#define IOTLB_CACHE_SIZE 1024
> >>+
> >>+static void vhost_user_iotlb_cache_remove_all(struct vhost_virtqueue *vq)
> >    ^^^^^^^^^^^^
> >Note that it's not the DPDK coding style to define a function.
> 
> Ok, I guess you mean:
> static void
> vhost_user_iotlb_cache_remove_all(struct vhost_virtqueue *vq) ?

Yep.

> >>+{
> >>+	struct vhost_iotlb_entry *node, *temp_node;
> >>+
> >>+	rte_rwlock_write_lock(&vq->iotlb_lock);
> >>+
> >>+	TAILQ_FOREACH_SAFE(node, &vq->iotlb_list, next, temp_node) {
> >>+		TAILQ_REMOVE(&vq->iotlb_list, node, next);
> >>+		rte_mempool_put(vq->iotlb_pool, node);
> >>+	}
> >>+
> >>+	rte_rwlock_write_unlock(&vq->iotlb_lock);
> >>+}
> >>+
> >>+void vhost_user_iotlb_cache_insert(struct vhost_virtqueue *vq, uint64_t iova,
> >>+				uint64_t uaddr, uint64_t size, uint8_t perm)
> >>+{
> >>+	struct vhost_iotlb_entry *node, *new_node;
> >>+	int ret;
> >>+
> >>+	ret = rte_mempool_get(vq->iotlb_pool, (void **)&new_node);
> >>+	if (ret) {
> >>+		RTE_LOG(ERR, VHOST_CONFIG, "IOTLB pool empty, invalidate cache\n");
> >
> >It's a cache, why not considering remove one to get space for new one?
> 
> It would mean having to track every lookups not to remove hot entries,
> which would have an impact on performance.

You were removing all caches, how can we do worse than that? Even a
random evict would be better. Or, more simply, just to remove the
head or the tail?

	--yliu

> Moreover, the idea is to have the cache large enough, else you could
> face packet drops due to random cache misses.
> 
> We might consider to improve it, but I consider it an optimization that
> could be implemented later if needed.
> 
> >>+		vhost_user_iotlb_cache_remove_all(vq);
> >>+		ret = rte_mempool_get(vq->iotlb_pool, (void **)&new_node);
> >>+		if (ret) {
> >>+			RTE_LOG(ERR, VHOST_CONFIG, "IOTLB pool still empty, failure\n");
> >>+			return;
> >>+		}
> >>+	}
  
Maxime Coquelin Sept. 8, 2017, 8:50 a.m. UTC | #6
On 09/08/2017 10:36 AM, Yuanhan Liu wrote:
> On Fri, Sep 08, 2017 at 10:24:58AM +0200, Maxime Coquelin wrote:
>>
>>
>> On 09/08/2017 10:08 AM, Yuanhan Liu wrote:
>>> On Thu, Aug 31, 2017 at 11:50:09AM +0200, Maxime Coquelin wrote:
>>>> diff --git a/lib/librte_vhost/iotlb.c b/lib/librte_vhost/iotlb.c
>>>> new file mode 100644
>>>> index 000000000..1b739dae5
>>>> --- /dev/null
>>>> +++ b/lib/librte_vhost/iotlb.c
>>>> @@ -0,0 +1,231 @@
>>>> +/*-
>>>> + *   BSD LICENSE
>>>> + *
>>>> + *   Copyright (c) 2017 Red Hat, Inc.
>>>> + *   Copyright (c) 2017 Maxime Coquelin <maxime.coquelin@redhat.com>
>>>
>>> I'm not a lawer, but I have been told many years before, that you don't
>>> have the copyright for the code you write for open source project, the
>>> company you work for does.
>>>
>>> Thus, it's more common to see something like following:
>>> 	Copyright , ... the commany ...
>>> 	Author:  Some One <...@...>
>>>
>>> However, as you may have noticed, it's not common to put the authorship
>>> in the source files. Though I don't object it.
>>
>> I'm not a lawyer too. At least in other projects, it seems common the
>> author puts his name as copyright owner.
>>
>> I have no issue to change it to only keep Red Hat's one though.
> 
> That's up to you. What I said before was JFYI :)
> 
>>> [...]
>>>> +#define IOTLB_CACHE_SIZE 1024
>>>> +
>>>> +static void vhost_user_iotlb_cache_remove_all(struct vhost_virtqueue *vq)
>>>     ^^^^^^^^^^^^
>>> Note that it's not the DPDK coding style to define a function.
>>
>> Ok, I guess you mean:
>> static void
>> vhost_user_iotlb_cache_remove_all(struct vhost_virtqueue *vq) ?
> 
> Yep.
> 
>>>> +{
>>>> +	struct vhost_iotlb_entry *node, *temp_node;
>>>> +
>>>> +	rte_rwlock_write_lock(&vq->iotlb_lock);
>>>> +
>>>> +	TAILQ_FOREACH_SAFE(node, &vq->iotlb_list, next, temp_node) {
>>>> +		TAILQ_REMOVE(&vq->iotlb_list, node, next);
>>>> +		rte_mempool_put(vq->iotlb_pool, node);
>>>> +	}
>>>> +
>>>> +	rte_rwlock_write_unlock(&vq->iotlb_lock);
>>>> +}
>>>> +
>>>> +void vhost_user_iotlb_cache_insert(struct vhost_virtqueue *vq, uint64_t iova,
>>>> +				uint64_t uaddr, uint64_t size, uint8_t perm)
>>>> +{
>>>> +	struct vhost_iotlb_entry *node, *new_node;
>>>> +	int ret;
>>>> +
>>>> +	ret = rte_mempool_get(vq->iotlb_pool, (void **)&new_node);
>>>> +	if (ret) {
>>>> +		RTE_LOG(ERR, VHOST_CONFIG, "IOTLB pool empty, invalidate cache\n");
>>>
>>> It's a cache, why not considering remove one to get space for new one?
>>
>> It would mean having to track every lookups not to remove hot entries,
>> which would have an impact on performance.
> 
> You were removing all caches, how can we do worse than that? Even a
> random evict would be better. Or, more simply, just to remove the
> head or the tail?

I think removing head or tail could cause deadlocks.
For example it needs to translate from 0x0 to 0x2000, with page size
being 0x1000.

If cache is full, when inserting 0x1000-0x1fff, it will remove
0x0-0xfff, so a miss will be sent for 0x0-0xffff and 0x1000-0x1fff will
be remove at insert time, etc...


Note that we really need to size the cache large enough for performance
purpose, so having the cache full could be cause by either buggy or
malicious guest.


> 	--yliu
> 
>> Moreover, the idea is to have the cache large enough, else you could
>> face packet drops due to random cache misses.
>>
>> We might consider to improve it, but I consider it an optimization that
>> could be implemented later if needed.
>>
>>>> +		vhost_user_iotlb_cache_remove_all(vq);
>>>> +		ret = rte_mempool_get(vq->iotlb_pool, (void **)&new_node);
>>>> +		if (ret) {
>>>> +			RTE_LOG(ERR, VHOST_CONFIG, "IOTLB pool still empty, failure\n");
>>>> +			return;
>>>> +		}
>>>> +	}
  
Yuanhan Liu Sept. 8, 2017, 9:21 a.m. UTC | #7
On Fri, Sep 08, 2017 at 10:50:49AM +0200, Maxime Coquelin wrote:
> >>>>+{
> >>>>+	struct vhost_iotlb_entry *node, *temp_node;
> >>>>+
> >>>>+	rte_rwlock_write_lock(&vq->iotlb_lock);
> >>>>+
> >>>>+	TAILQ_FOREACH_SAFE(node, &vq->iotlb_list, next, temp_node) {
> >>>>+		TAILQ_REMOVE(&vq->iotlb_list, node, next);
> >>>>+		rte_mempool_put(vq->iotlb_pool, node);
> >>>>+	}
> >>>>+
> >>>>+	rte_rwlock_write_unlock(&vq->iotlb_lock);
> >>>>+}
> >>>>+
> >>>>+void vhost_user_iotlb_cache_insert(struct vhost_virtqueue *vq, uint64_t iova,
> >>>>+				uint64_t uaddr, uint64_t size, uint8_t perm)
> >>>>+{
> >>>>+	struct vhost_iotlb_entry *node, *new_node;
> >>>>+	int ret;
> >>>>+
> >>>>+	ret = rte_mempool_get(vq->iotlb_pool, (void **)&new_node);
> >>>>+	if (ret) {
> >>>>+		RTE_LOG(ERR, VHOST_CONFIG, "IOTLB pool empty, invalidate cache\n");
> >>>
> >>>It's a cache, why not considering remove one to get space for new one?
> >>
> >>It would mean having to track every lookups not to remove hot entries,
> >>which would have an impact on performance.
> >
> >You were removing all caches, how can we do worse than that? Even a
> >random evict would be better. Or, more simply, just to remove the
> >head or the tail?
> 
> I think removing head or tail could cause deadlocks.
> For example it needs to translate from 0x0 to 0x2000, with page size
> being 0x1000.
> 
> If cache is full, when inserting 0x1000-0x1fff, it will remove
> 0x0-0xfff, so a miss will be sent for 0x0-0xffff and 0x1000-0x1fff will
> be remove at insert time, etc...

Okay, that means we can't simply remove the head or tail.

> Note that we really need to size the cache large enough for performance
> purpose, so having the cache full could be cause by either buggy or
> malicious guest.

I agree. But for the malicious guest, it could lead to a DDOS attack:
assume it keeps vhost running out of cache and then vhost keeps printing
above message.

What I suggested was to evict one (by some polices) to get a space for
the new one we want to insert. Note that it won't be a performance issue,
IMO, due to we only do that when we run out of caches, which, according
to your sayings, should not happen in normal cases.

	--yliu

> >>Moreover, the idea is to have the cache large enough, else you could
> >>face packet drops due to random cache misses.
> >>
> >>We might consider to improve it, but I consider it an optimization that
> >>could be implemented later if needed.
> >>
> >>>>+		vhost_user_iotlb_cache_remove_all(vq);
> >>>>+		ret = rte_mempool_get(vq->iotlb_pool, (void **)&new_node);
> >>>>+		if (ret) {
> >>>>+			RTE_LOG(ERR, VHOST_CONFIG, "IOTLB pool still empty, failure\n");
> >>>>+			return;
> >>>>+		}
> >>>>+	}
  
Maxime Coquelin Sept. 8, 2017, 9:28 a.m. UTC | #8
On 09/08/2017 11:21 AM, Yuanhan Liu wrote:
> On Fri, Sep 08, 2017 at 10:50:49AM +0200, Maxime Coquelin wrote:
>>>>>> +{
>>>>>> +	struct vhost_iotlb_entry *node, *temp_node;
>>>>>> +
>>>>>> +	rte_rwlock_write_lock(&vq->iotlb_lock);
>>>>>> +
>>>>>> +	TAILQ_FOREACH_SAFE(node, &vq->iotlb_list, next, temp_node) {
>>>>>> +		TAILQ_REMOVE(&vq->iotlb_list, node, next);
>>>>>> +		rte_mempool_put(vq->iotlb_pool, node);
>>>>>> +	}
>>>>>> +
>>>>>> +	rte_rwlock_write_unlock(&vq->iotlb_lock);
>>>>>> +}
>>>>>> +
>>>>>> +void vhost_user_iotlb_cache_insert(struct vhost_virtqueue *vq, uint64_t iova,
>>>>>> +				uint64_t uaddr, uint64_t size, uint8_t perm)
>>>>>> +{
>>>>>> +	struct vhost_iotlb_entry *node, *new_node;
>>>>>> +	int ret;
>>>>>> +
>>>>>> +	ret = rte_mempool_get(vq->iotlb_pool, (void **)&new_node);
>>>>>> +	if (ret) {
>>>>>> +		RTE_LOG(ERR, VHOST_CONFIG, "IOTLB pool empty, invalidate cache\n");
>>>>>
>>>>> It's a cache, why not considering remove one to get space for new one?
>>>>
>>>> It would mean having to track every lookups not to remove hot entries,
>>>> which would have an impact on performance.
>>>
>>> You were removing all caches, how can we do worse than that? Even a
>>> random evict would be better. Or, more simply, just to remove the
>>> head or the tail?
>>
>> I think removing head or tail could cause deadlocks.
>> For example it needs to translate from 0x0 to 0x2000, with page size
>> being 0x1000.
>>
>> If cache is full, when inserting 0x1000-0x1fff, it will remove
>> 0x0-0xfff, so a miss will be sent for 0x0-0xffff and 0x1000-0x1fff will
>> be remove at insert time, etc...
> 
> Okay, that means we can't simply remove the head or tail.
> 
>> Note that we really need to size the cache large enough for performance
>> purpose, so having the cache full could be cause by either buggy or
>> malicious guest.
> 
> I agree. But for the malicious guest, it could lead to a DDOS attack:
> assume it keeps vhost running out of cache and then vhost keeps printing
> above message.
> 
> What I suggested was to evict one (by some polices) to get a space for
> the new one we want to insert. Note that it won't be a performance issue,
> IMO, due to we only do that when we run out of caches, which, according
> to your sayings, should not happen in normal cases.

Ok, so let's randomly remove one entry. What about using something like:
rte_rand() % IOTLB_CACHE_SIZE

> 
> 	--yliu
> 
>>>> Moreover, the idea is to have the cache large enough, else you could
>>>> face packet drops due to random cache misses.
>>>>
>>>> We might consider to improve it, but I consider it an optimization that
>>>> could be implemented later if needed.
>>>>
>>>>>> +		vhost_user_iotlb_cache_remove_all(vq);
>>>>>> +		ret = rte_mempool_get(vq->iotlb_pool, (void **)&new_node);
>>>>>> +		if (ret) {
>>>>>> +			RTE_LOG(ERR, VHOST_CONFIG, "IOTLB pool still empty, failure\n");
>>>>>> +			return;
>>>>>> +		}
>>>>>> +	}
  

Patch

diff --git a/lib/librte_vhost/Makefile b/lib/librte_vhost/Makefile
index 4a116fe31..e1084aba5 100644
--- a/lib/librte_vhost/Makefile
+++ b/lib/librte_vhost/Makefile
@@ -47,8 +47,8 @@  LDLIBS += -lnuma
 endif
 
 # all source are stored in SRCS-y
-SRCS-$(CONFIG_RTE_LIBRTE_VHOST) := fd_man.c socket.c vhost.c vhost_user.c \
-				   virtio_net.c
+SRCS-$(CONFIG_RTE_LIBRTE_VHOST) := fd_man.c iotlb.c socket.c vhost.c \
+					vhost_user.c virtio_net.c
 
 # install includes
 SYMLINK-$(CONFIG_RTE_LIBRTE_VHOST)-include += rte_vhost.h
diff --git a/lib/librte_vhost/iotlb.c b/lib/librte_vhost/iotlb.c
new file mode 100644
index 000000000..1b739dae5
--- /dev/null
+++ b/lib/librte_vhost/iotlb.c
@@ -0,0 +1,231 @@ 
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright (c) 2017 Red Hat, Inc.
+ *   Copyright (c) 2017 Maxime Coquelin <maxime.coquelin@redhat.com>
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of Intel Corporation nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifdef RTE_LIBRTE_VHOST_NUMA
+#include <numaif.h>
+#endif
+
+#include <rte_tailq.h>
+
+#include "iotlb.h"
+#include "vhost.h"
+
+struct vhost_iotlb_entry {
+	TAILQ_ENTRY(vhost_iotlb_entry) next;
+
+	uint64_t iova;
+	uint64_t uaddr;
+	uint64_t size;
+	uint8_t perm;
+};
+
+#define IOTLB_CACHE_SIZE 1024
+
+static void vhost_user_iotlb_cache_remove_all(struct vhost_virtqueue *vq)
+{
+	struct vhost_iotlb_entry *node, *temp_node;
+
+	rte_rwlock_write_lock(&vq->iotlb_lock);
+
+	TAILQ_FOREACH_SAFE(node, &vq->iotlb_list, next, temp_node) {
+		TAILQ_REMOVE(&vq->iotlb_list, node, next);
+		rte_mempool_put(vq->iotlb_pool, node);
+	}
+
+	rte_rwlock_write_unlock(&vq->iotlb_lock);
+}
+
+void vhost_user_iotlb_cache_insert(struct vhost_virtqueue *vq, uint64_t iova,
+				uint64_t uaddr, uint64_t size, uint8_t perm)
+{
+	struct vhost_iotlb_entry *node, *new_node;
+	int ret;
+
+	ret = rte_mempool_get(vq->iotlb_pool, (void **)&new_node);
+	if (ret) {
+		RTE_LOG(ERR, VHOST_CONFIG, "IOTLB pool empty, invalidate cache\n");
+		vhost_user_iotlb_cache_remove_all(vq);
+		ret = rte_mempool_get(vq->iotlb_pool, (void **)&new_node);
+		if (ret) {
+			RTE_LOG(ERR, VHOST_CONFIG, "IOTLB pool still empty, failure\n");
+			return;
+		}
+	}
+
+	new_node->iova = iova;
+	new_node->uaddr = uaddr;
+	new_node->size = size;
+	new_node->perm = perm;
+
+	rte_rwlock_write_lock(&vq->iotlb_lock);
+
+	TAILQ_FOREACH(node, &vq->iotlb_list, next) {
+		/*
+		 * Entries must be invalidated before being updated.
+		 * So if iova already in list, assume identical.
+		 */
+		if (node->iova == new_node->iova) {
+			rte_mempool_put(vq->iotlb_pool, new_node);
+			goto unlock;
+		} else if (node->iova > new_node->iova) {
+			TAILQ_INSERT_BEFORE(node, new_node, next);
+			goto unlock;
+		}
+	}
+
+	TAILQ_INSERT_TAIL(&vq->iotlb_list, new_node, next);
+
+unlock:
+	rte_rwlock_write_unlock(&vq->iotlb_lock);
+}
+
+void vhost_user_iotlb_cache_remove(struct vhost_virtqueue *vq,
+					uint64_t iova, uint64_t size)
+{
+	struct vhost_iotlb_entry *node, *temp_node;
+
+	if (unlikely(!size))
+		return;
+
+	rte_rwlock_write_lock(&vq->iotlb_lock);
+
+	TAILQ_FOREACH_SAFE(node, &vq->iotlb_list, next, temp_node) {
+		/* Sorted list */
+		if (unlikely(node->iova >= iova + size)) {
+			break;
+		} else if ((node->iova < iova + size) &&
+					(iova < node->iova + node->size)) {
+			TAILQ_REMOVE(&vq->iotlb_list, node, next);
+			rte_mempool_put(vq->iotlb_pool, node);
+			continue;
+		}
+	}
+
+	rte_rwlock_write_unlock(&vq->iotlb_lock);
+}
+
+
+uint64_t vhost_user_iotlb_cache_find(struct vhost_virtqueue *vq, uint64_t iova,
+						uint64_t *size, uint8_t perm)
+{
+	struct vhost_iotlb_entry *node;
+	uint64_t offset, vva = 0, mapped = 0;
+
+	if (unlikely(!*size))
+		goto out;
+
+	rte_rwlock_read_lock(&vq->iotlb_lock);
+
+	TAILQ_FOREACH(node, &vq->iotlb_list, next) {
+		/* List sorted by iova */
+		if (unlikely(iova < node->iova))
+			break;
+
+		if (iova >= node->iova + node->size)
+			continue;
+
+		if (unlikely((perm & node->perm) != perm)) {
+			vva = 0;
+			break;
+		}
+
+		offset = iova - node->iova;
+		if (!vva)
+			vva = node->uaddr + offset;
+
+		mapped += node->size - offset;
+		iova = node->iova + node->size;
+
+		if (mapped >= *size)
+			break;
+	}
+
+	rte_rwlock_read_unlock(&vq->iotlb_lock);
+
+out:
+	/* Only part of the requested chunk is mapped */
+	if (unlikely(mapped < *size))
+		*size = mapped;
+
+	return vva;
+}
+
+int vhost_user_iotlb_init(struct virtio_net *dev, int vq_index)
+{
+	char pool_name[RTE_MEMPOOL_NAMESIZE];
+	struct vhost_virtqueue *vq = dev->virtqueue[vq_index];
+	int ret = -1, socket;
+
+	if (vq->iotlb_pool) {
+		/*
+		 * The cache has already been initialized,
+		 * just drop all entries
+		 */
+		vhost_user_iotlb_cache_remove_all(vq);
+		return 0;
+	}
+
+#ifdef RTE_LIBRTE_VHOST_NUMA
+	ret = get_mempolicy(&socket, NULL, 0, vq, MPOL_F_NODE | MPOL_F_ADDR);
+#endif
+	if (ret)
+		socket = 0;
+
+	rte_rwlock_init(&vq->iotlb_lock);
+
+	TAILQ_INIT(&vq->iotlb_list);
+
+	snprintf(pool_name, sizeof(pool_name), "iotlb_cache_%d_%d",
+			dev->vid, vq_index);
+
+	/* If already created, free it and recreate */
+	vq->iotlb_pool = rte_mempool_lookup(pool_name);
+	if (vq->iotlb_pool)
+		rte_mempool_free(vq->iotlb_pool);
+
+	vq->iotlb_pool = rte_mempool_create(pool_name,
+			IOTLB_CACHE_SIZE, sizeof(struct vhost_iotlb_entry), 0,
+			0, 0, NULL, NULL, NULL, socket,
+			MEMPOOL_F_NO_CACHE_ALIGN |
+			MEMPOOL_F_SP_PUT |
+			MEMPOOL_F_SC_GET);
+	if (!vq->iotlb_pool) {
+		RTE_LOG(ERR, VHOST_CONFIG,
+				"Failed to create IOTLB cache pool (%s)\n",
+				pool_name);
+		return -1;
+	}
+
+	return 0;
+}
+
diff --git a/lib/librte_vhost/iotlb.h b/lib/librte_vhost/iotlb.h
new file mode 100644
index 000000000..459820762
--- /dev/null
+++ b/lib/librte_vhost/iotlb.h
@@ -0,0 +1,46 @@ 
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright (c) 2017 Red Hat, Inc.
+ *   Copyright (c) 2017 Maxime Coquelin <maxime.coquelin@redhat.com>
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of Intel Corporation nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+#ifndef _VHOST_IOTLB_H_
+#define _VHOST_IOTLB_H_
+
+#include "vhost.h"
+void vhost_user_iotlb_cache_insert(struct vhost_virtqueue *vq, uint64_t iova,
+					uint64_t uaddr, uint64_t size,
+					uint8_t perm);
+void vhost_user_iotlb_cache_remove(struct vhost_virtqueue *vq,
+					uint64_t iova, uint64_t size);
+uint64_t vhost_user_iotlb_cache_find(struct vhost_virtqueue *vq, uint64_t iova,
+					uint64_t *size, uint8_t perm);
+int vhost_user_iotlb_init(struct virtio_net *dev, int vq_index);
+
+#endif /* _VHOST_IOTLB_H_ */
diff --git a/lib/librte_vhost/vhost.c b/lib/librte_vhost/vhost.c
index 429983858..f1099753c 100644
--- a/lib/librte_vhost/vhost.c
+++ b/lib/librte_vhost/vhost.c
@@ -188,6 +188,7 @@  free_device(struct virtio_net *dev)
 		vq = dev->virtqueue[i];
 
 		rte_free(vq->shadow_used_ring);
+		rte_mempool_free(vq->iotlb_pool);
 
 		rte_free(vq);
 	}
diff --git a/lib/librte_vhost/vhost.h b/lib/librte_vhost/vhost.h
index 5c9d9316a..7816a92b5 100644
--- a/lib/librte_vhost/vhost.h
+++ b/lib/librte_vhost/vhost.h
@@ -45,6 +45,7 @@ 
 
 #include <rte_log.h>
 #include <rte_ether.h>
+#include <rte_rwlock.h>
 
 #include "rte_vhost.h"
 
@@ -114,6 +115,10 @@  struct vhost_virtqueue {
 
 	struct vring_used_elem  *shadow_used_ring;
 	uint16_t                shadow_used_idx;
+
+	rte_rwlock_t	iotlb_lock;
+	struct rte_mempool *iotlb_pool;
+	TAILQ_HEAD(, vhost_iotlb_entry) iotlb_list;
 } __rte_cache_aligned;
 
 /* Old kernels have no such macros defined */