[dpdk-dev,v4,66/70] bus/fslmc: enable support for mem event callbacks for vfio

Message ID f8a9fe56a365c5cb655c0a22ecc31390c24f3fe9.1523218215.git.anatoly.burakov@intel.com (mailing list archive)
State Superseded, archived
Headers

Checks

Context Check Description
ci/checkpatch warning coding style issues
ci/Intel-compilation fail apply patch file failure

Commit Message

Anatoly Burakov April 8, 2018, 8:18 p.m. UTC
  VFIO needs to map and unmap segments for DMA whenever they
become available or unavailable, so register a callback for
memory events, and provide map/unmap functions.

Signed-off-by: Shreyansh Jain <shreyansh.jain@nxp.com>
Signed-off-by: Anatoly Burakov <anatoly.burakov@intel.com>
---
 drivers/bus/fslmc/fslmc_vfio.c | 150 +++++++++++++++++++++++++++++++++++++----
 1 file changed, 136 insertions(+), 14 deletions(-)
  

Comments

Shreyansh Jain April 9, 2018, 10:01 a.m. UTC | #1
Hi Anatoly,

On Monday 09 April 2018 01:48 AM, Anatoly Burakov wrote:
> VFIO needs to map and unmap segments for DMA whenever they
> become available or unavailable, so register a callback for
> memory events, and provide map/unmap functions.
> 
> Signed-off-by: Shreyansh Jain <shreyansh.jain@nxp.com>
> Signed-off-by: Anatoly Burakov <anatoly.burakov@intel.com>
> ---
>   drivers/bus/fslmc/fslmc_vfio.c | 150 +++++++++++++++++++++++++++++++++++++----
>   1 file changed, 136 insertions(+), 14 deletions(-)
> 
> diff --git a/drivers/bus/fslmc/fslmc_vfio.c b/drivers/bus/fslmc/fslmc_vfio.c
> index db3eb61..dfdd2bc 100644
> --- a/drivers/bus/fslmc/fslmc_vfio.c
> +++ b/drivers/bus/fslmc/fslmc_vfio.c
> @@ -30,6 +30,7 @@
>   #include <rte_kvargs.h>
>   #include <rte_dev.h>
>   #include <rte_bus.h>
> +#include <rte_eal_memconfig.h>
>   
>   #include "rte_fslmc.h"
>   #include "fslmc_vfio.h"
> @@ -188,11 +189,57 @@ static int vfio_map_irq_region(struct fslmc_vfio_group *group)
>   	return -errno;
>   }
>   
> +static int fslmc_map_dma(uint64_t vaddr, rte_iova_t iovaddr, size_t len);
> +static int fslmc_unmap_dma(uint64_t vaddr, rte_iova_t iovaddr, size_t len);
> +
> +static void
> +fslmc_memevent_cb(enum rte_mem_event type, const void *addr, size_t len)
> +{
> +	struct rte_memseg_list *msl;
> +	struct rte_memseg *ms;
> +	size_t cur_len = 0, map_len = 0;
> +	uint64_t virt_addr;
> +	rte_iova_t iova_addr;
> +	int ret;
> +
> +	msl = rte_mem_virt2memseg_list(addr);
> +
> +	while (cur_len < len) {
> +		const void *va = RTE_PTR_ADD(addr, cur_len);
> +
> +		ms = rte_mem_virt2memseg(va, msl);
> +		iova_addr = ms->iova;
> +		virt_addr = ms->addr_64;
> +		map_len = ms->len;
> +
> +		DPAA2_BUS_DEBUG("Calling with type=%d, va=%p, virt_addr=0x%" PRIx64 ", iova=0x%" PRIx64 ", map_len=%zu\n",

I would like to correct this message (80char + rewording) - What do you 
suggest? Should I send a new patch to you or just convey what should be 
changed?

> +				type, va, virt_addr, iova_addr, map_len);
> +
> +		if (type == RTE_MEM_EVENT_ALLOC)
> +			ret = fslmc_map_dma(virt_addr, iova_addr, map_len);
> +		else
> +			ret = fslmc_unmap_dma(virt_addr, iova_addr, map_len);
> +
> +		if (ret != 0) {
> +			DPAA2_BUS_ERR("DMA Mapping/Unmapping failed. Map=%d, addr=%p, len=%zu, err:(%d)",
> +					type, va, map_len, ret);

Same as above. 80 Char issue.

> +			return;
> +		}
> +
> +		cur_len += map_len;
> +	}
> +
> +	if (type == RTE_MEM_EVENT_ALLOC)
> +		DPAA2_BUS_DEBUG("Total Mapped: addr=%p, len=%zu\n",
> +				addr, len);
> +	else
> +		DPAA2_BUS_DEBUG("Total Unmapped: addr=%p, len=%zu\n",
> +				addr, len);
> +}
> +
>   static int
> -fslmc_vfio_map(const struct rte_memseg_list *msl __rte_unused,
> -		const struct rte_memseg *ms, void *arg)
> +fslmc_map_dma(uint64_t vaddr, rte_iova_t iovaddr __rte_unused, size_t len)
>   {
> -	int *n_segs = arg;
>   	struct fslmc_vfio_group *group;
>   	struct vfio_iommu_type1_dma_map dma_map = {
>   		.argsz = sizeof(struct vfio_iommu_type1_dma_map),
> @@ -200,10 +247,11 @@ fslmc_vfio_map(const struct rte_memseg_list *msl __rte_unused,
>   	};
>   	int ret;
>   
> -	dma_map.size = ms->len;
> -	dma_map.vaddr = ms->addr_64;
> +	dma_map.size = len;
> +	dma_map.vaddr = vaddr;
> +
>   #ifdef RTE_LIBRTE_DPAA2_USE_PHYS_IOVA
> -	dma_map.iova = ms->iova;
> +	dma_map.iova = iovaddr;
>   #else
>   	dma_map.iova = dma_map.vaddr;
>   #endif
> @@ -216,30 +264,99 @@ fslmc_vfio_map(const struct rte_memseg_list *msl __rte_unused,
>   		return -1;
>   	}
>   
> -	DPAA2_BUS_DEBUG("-->Initial SHM Virtual ADDR %llX",
> -			dma_map.vaddr);
> -	DPAA2_BUS_DEBUG("-----> DMA size 0x%llX", dma_map.size);
> -	ret = ioctl(group->container->fd, VFIO_IOMMU_MAP_DMA,
> -			&dma_map);
> +	DPAA2_BUS_DEBUG("--> Map address: %llX, size: 0x%llX\n",
> +			dma_map.vaddr, dma_map.size);
> +	ret = ioctl(group->container->fd, VFIO_IOMMU_MAP_DMA, &dma_map);
>   	if (ret) {
>   		DPAA2_BUS_ERR("VFIO_IOMMU_MAP_DMA API(errno = %d)",
>   				errno);
>   		return -1;
>   	}
> -	(*n_segs)++;
> +
>   	return 0;
>   }
>   
> +static int
> +fslmc_unmap_dma(uint64_t vaddr, uint64_t iovaddr __rte_unused, size_t len)
> +{
> +	struct fslmc_vfio_group *group;
> +	struct vfio_iommu_type1_dma_unmap dma_unmap = {
> +		.argsz = sizeof(struct vfio_iommu_type1_dma_unmap),
> +		.flags = 0,
> +	};
> +	int ret;
> +
> +	dma_unmap.size = len;
> +	dma_unmap.iova = vaddr;
> +
> +	/* SET DMA MAP for IOMMU */
> +	group = &vfio_group;
> +
> +	if (!group->container) {
> +		DPAA2_BUS_ERR("Container is not connected ");
> +		return -1;
> +	}
> +
> +	DPAA2_BUS_DEBUG("--> Unmap address: %llX, size: 0x%llX\n",
> +			dma_unmap.iova, dma_unmap.size);
> +	ret = ioctl(group->container->fd, VFIO_IOMMU_UNMAP_DMA, &dma_unmap);
> +	if (ret) {
> +		DPAA2_BUS_ERR("VFIO_IOMMU_UNMAP_DMA API(errno = %d)",
> +				errno);
> +		return -1;
> +	}
> +
> +	return 0;
> +}
> +
> +static int
> +fslmc_dmamap_seg(const struct rte_memseg_list *msl __rte_unused,
> +		 const struct rte_memseg *ms, void *arg)
> +{
> +	int *n_segs = arg;
> +	int ret;
> +
> +	ret = fslmc_map_dma(ms->addr_64, ms->iova, ms->len);
> +	if (ret)
> +		DPAA2_BUS_ERR("Unable to VFIO map (addr=%p, len=%zu)\n",
> +				ms->addr, ms->len);
> +	else
> +		(*n_segs)++;
> +
> +	return ret;
> +}
> +
>   int rte_fslmc_vfio_dmamap(void)
>   {
> -	int i = 0;
> +	int i = 0, ret;
> +	struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config;
> +	rte_rwlock_t *mem_lock = &mcfg->memory_hotplug_lock;
> +
> +	/* Lock before parsing and registering callback to memory subsystem */
> +	rte_rwlock_read_lock(mem_lock);
>   
> -	if (rte_memseg_walk(fslmc_vfio_map, &i) < 0)
> +	if (rte_memseg_walk(fslmc_dmamap_seg, &i) < 0) {
> +		rte_rwlock_read_unlock(mem_lock);
>   		return -1;
> +	}
> +
> +	ret = rte_mem_event_callback_register("fslmc_memevent_clb",
> +					      fslmc_memevent_cb);
> +	if (ret && rte_errno == ENOTSUP)
> +		DPAA2_BUS_DEBUG("Memory event callbacks not supported");
> +	else if (ret)
> +		DPAA2_BUS_DEBUG("Unable to install memory handler");
> +	else
> +		DPAA2_BUS_DEBUG("Installed memory callback handler");
>   
>   	/* Verifying that at least single segment is available */
>   	if (i <= 0) {
> +		/* TODO: Is this verification required any more? What would
> +		 * happen to non-legacy case where nothing was preallocated
> +		 * thus causing i==0?
> +		 */

And this as well - if call backs are not going to appear in case of 
legacy, this needs to be removed.
let me know how do you want to take these changes.

>   		DPAA2_BUS_ERR("No Segments found for VFIO Mapping");
> +		rte_rwlock_read_unlock(mem_lock);
>   		return -1;
>   	}
>   	DPAA2_BUS_DEBUG("Total %d segments found.", i);
> @@ -250,6 +367,11 @@ int rte_fslmc_vfio_dmamap(void)
>   	 */
>   	vfio_map_irq_region(&vfio_group);
>   
> +	/* Existing segments have been mapped and memory callback for hotplug
> +	 * has been installed.
> +	 */
> +	rte_rwlock_read_unlock(mem_lock);
> +
>   	return 0;
>   }
>   
>
  
Anatoly Burakov April 9, 2018, 10:55 a.m. UTC | #2
On 09-Apr-18 11:01 AM, Shreyansh Jain wrote:
> Hi Anatoly,
> 
> On Monday 09 April 2018 01:48 AM, Anatoly Burakov wrote:
>> VFIO needs to map and unmap segments for DMA whenever they
>> become available or unavailable, so register a callback for
>> memory events, and provide map/unmap functions.
>>
>> Signed-off-by: Shreyansh Jain <shreyansh.jain@nxp.com>
>> Signed-off-by: Anatoly Burakov <anatoly.burakov@intel.com>
>> ---

<...>

>> +        DPAA2_BUS_DEBUG("Calling with type=%d, va=%p, virt_addr=0x%" 
>> PRIx64 ", iova=0x%" PRIx64 ", map_len=%zu\n",
> 
> I would like to correct this message (80char + rewording) - What do you 
> suggest? Should I send a new patch to you or just convey what should be 
> changed?
> 

As far as i know, leaving strings on single line is good for grepping. 
However, perhaps having PRIx64 etc in there breaks it anyway.

>> +                type, va, virt_addr, iova_addr, map_len);
>> +
>> +        if (type == RTE_MEM_EVENT_ALLOC)
>> +            ret = fslmc_map_dma(virt_addr, iova_addr, map_len);
>> +        else
>> +            ret = fslmc_unmap_dma(virt_addr, iova_addr, map_len);
>> +
>> +        if (ret != 0) {
>> +            DPAA2_BUS_ERR("DMA Mapping/Unmapping failed. Map=%d, 
>> addr=%p, len=%zu, err:(%d)",
>> +                    type, va, map_len, ret);
> 
> Same as above. 80 Char issue.

Same reasoning - leaving strings unbroken allows for easier grepping the 
codebase, but i'm not sure what's our policy on having formatted strings 
unbroken.

> 
>> +            return;
>> +        }
>> +
>> +        cur_len += map_len;
>> +    }
>> +
>> +    if (type == RTE_MEM_EVENT_ALLOC)
>> +        DPAA2_BUS_DEBUG("Total Mapped: addr=%p, len=%zu\n",
>> +                addr, len);
>> +    else

<...>

>> +    ret = rte_mem_event_callback_register("fslmc_memevent_clb",
>> +                          fslmc_memevent_cb);
>> +    if (ret && rte_errno == ENOTSUP)
>> +        DPAA2_BUS_DEBUG("Memory event callbacks not supported");
>> +    else if (ret)
>> +        DPAA2_BUS_DEBUG("Unable to install memory handler");
>> +    else
>> +        DPAA2_BUS_DEBUG("Installed memory callback handler");
>>       /* Verifying that at least single segment is available */
>>       if (i <= 0) {
>> +        /* TODO: Is this verification required any more? What would
>> +         * happen to non-legacy case where nothing was preallocated
>> +         * thus causing i==0?
>> +         */
> 
> And this as well - if call backs are not going to appear in case of 
> legacy, this needs to be removed.

Callbacks aren't only not going to appear in legacy mode - they will 
also not appear on FreeBSD. We check this above, with checking rte_errno 
value (if callbacks are not supported, it's set to ENOTSUP, and having 
callbacks unsupported is not an error).

> let me know how do you want to take these changes.
> 

Now that i think of it, this error condition is wrong. This is called in 
both legacy and non-legacy mode. This is bus probe, no? For non-legacy 
mode, it is entirely possible to start without any memory whatsoever. It 
just so happens that rte_service API allocates some on init, and hence 
you always have at least one segment - that may not be the case forever. 
So, non-legacy mode, not having memsegs is not an error, it is expected 
behavior, so maybe we should remove this error check altogether.

>>           DPAA2_BUS_ERR("No Segments found for VFIO Mapping");
>> +        rte_rwlock_read_unlock(mem_lock);
>>           return -1;
>>       }
>>       DPAA2_BUS_DEBUG("Total %d segments found.", i);
>> @@ -250,6 +367,11 @@ int rte_fslmc_vfio_dmamap(void)
>>        */
>>       vfio_map_irq_region(&vfio_group);
>> +    /* Existing segments have been mapped and memory callback for 
>> hotplug
>> +     * has been installed.
>> +     */
>> +    rte_rwlock_read_unlock(mem_lock);
>> +
>>       return 0;
>>   }
>>
> 
>
  
Shreyansh Jain April 9, 2018, 12:09 p.m. UTC | #3
On Monday 09 April 2018 04:25 PM, Burakov, Anatoly wrote:
> On 09-Apr-18 11:01 AM, Shreyansh Jain wrote:
>> Hi Anatoly,
>>
>> On Monday 09 April 2018 01:48 AM, Anatoly Burakov wrote:
>>> VFIO needs to map and unmap segments for DMA whenever they
>>> become available or unavailable, so register a callback for
>>> memory events, and provide map/unmap functions.
>>>
>>> Signed-off-by: Shreyansh Jain <shreyansh.jain@nxp.com>
>>> Signed-off-by: Anatoly Burakov <anatoly.burakov@intel.com>
>>> ---
> 
> <...>
> 
>>> +        DPAA2_BUS_DEBUG("Calling with type=%d, va=%p, virt_addr=0x%" 
>>> PRIx64 ", iova=0x%" PRIx64 ", map_len=%zu\n",
>>
>> I would like to correct this message (80char + rewording) - What do 
>> you suggest? Should I send a new patch to you or just convey what 
>> should be changed?
>>
> 
> As far as i know, leaving strings on single line is good for grepping. 
> However, perhaps having PRIx64 etc in there breaks it anyway.

Yes, that and the debug message was not helpful.
This is what I had in mind. (DPAA2_BUS_DEBUG doesn't require an extra \n)

DPAA2_BUS_DEBUG("Request for %s, va=%p, virt_addr=0x%" PRIx64 ","
		"iova=0x%" PRIx64 ", map_len=%zu",
		type == RTE_MEM_EVENT_ALLOC? "alloc" : "dealloc",
		va, virt_addr, iova_addr, map_len);

> 
>>> +                type, va, virt_addr, iova_addr, map_len);
>>> +
>>> +        if (type == RTE_MEM_EVENT_ALLOC)
>>> +            ret = fslmc_map_dma(virt_addr, iova_addr, map_len);
>>> +        else
>>> +            ret = fslmc_unmap_dma(virt_addr, iova_addr, map_len);
>>> +
>>> +        if (ret != 0) {
>>> +            DPAA2_BUS_ERR("DMA Mapping/Unmapping failed. Map=%d, 
>>> addr=%p, len=%zu, err:(%d)",
>>> +                    type, va, map_len, ret);
>>
>> Same as above. 80 Char issue.
> 
> Same reasoning - leaving strings unbroken allows for easier grepping the 
> codebase, but i'm not sure what's our policy on having formatted strings 
> unbroken.

My policy is not different, but the various variables being dumped 
cannot anyway help in grepping - So, keeping the variables on separate 
lines for 80chars is ok. "DMA Mapping/Unmapping failed." is enough for 
greps.

> 
>>
>>> +            return;
>>> +        }
>>> +
>>> +        cur_len += map_len;
>>> +    }
>>> +
>>> +    if (type == RTE_MEM_EVENT_ALLOC)
>>> +        DPAA2_BUS_DEBUG("Total Mapped: addr=%p, len=%zu\n",
>>> +                addr, len);
>>> +    else
> 
> <...>
> 
>>> +    ret = rte_mem_event_callback_register("fslmc_memevent_clb",
>>> +                          fslmc_memevent_cb);
>>> +    if (ret && rte_errno == ENOTSUP)
>>> +        DPAA2_BUS_DEBUG("Memory event callbacks not supported");
>>> +    else if (ret)
>>> +        DPAA2_BUS_DEBUG("Unable to install memory handler");
>>> +    else
>>> +        DPAA2_BUS_DEBUG("Installed memory callback handler");
>>>       /* Verifying that at least single segment is available */
>>>       if (i <= 0) {
>>> +        /* TODO: Is this verification required any more? What would
>>> +         * happen to non-legacy case where nothing was preallocated
>>> +         * thus causing i==0?
>>> +         */
>>
>> And this as well - if call backs are not going to appear in case of 
>> legacy, this needs to be removed.
> 
> Callbacks aren't only not going to appear in legacy mode - they will 
> also not appear on FreeBSD. We check this above, with checking rte_errno 
> value (if callbacks are not supported, it's set to ENOTSUP, and having 
> callbacks unsupported is not an error).
> 
>> let me know how do you want to take these changes.
>>
> 
> Now that i think of it, this error condition is wrong. This is called in 
> both legacy and non-legacy mode. This is bus probe, no? For non-legacy 
> mode, it is entirely possible to start without any memory whatsoever. It 
> just so happens that rte_service API allocates some on init, and hence 
> you always have at least one segment - that may not be the case forever. 
> So, non-legacy mode, not having memsegs is not an error, it is expected 
> behavior, so maybe we should remove this error check altogether.

Agree - that count was only required in the earlier case. It can be removed.

> 
>>>           DPAA2_BUS_ERR("No Segments found for VFIO Mapping");
>>> +        rte_rwlock_read_unlock(mem_lock);
>>>           return -1;
>>>       }
>>>       DPAA2_BUS_DEBUG("Total %d segments found.", i);
>>> @@ -250,6 +367,11 @@ int rte_fslmc_vfio_dmamap(void)
>>>        */
>>>       vfio_map_irq_region(&vfio_group);
>>> +    /* Existing segments have been mapped and memory callback for 
>>> hotplug
>>> +     * has been installed.
>>> +     */
>>> +    rte_rwlock_read_unlock(mem_lock);
>>> +
>>>       return 0;
>>>   }
>>>
>>
>>
> 
> 

I think there are enough changes, even if trivial. Maybe I can rework 
this patch and send you. If that is inconvenient, just extract from that 
whatever you want.
  
Anatoly Burakov April 9, 2018, 12:35 p.m. UTC | #4
On 09-Apr-18 1:09 PM, Shreyansh Jain wrote:
> On Monday 09 April 2018 04:25 PM, Burakov, Anatoly wrote:
>> On 09-Apr-18 11:01 AM, Shreyansh Jain wrote:
>>> Hi Anatoly,
>>>
>>> On Monday 09 April 2018 01:48 AM, Anatoly Burakov wrote:
>>>> VFIO needs to map and unmap segments for DMA whenever they
>>>> become available or unavailable, so register a callback for
>>>> memory events, and provide map/unmap functions.
>>>>
>>>> Signed-off-by: Shreyansh Jain <shreyansh.jain@nxp.com>
>>>> Signed-off-by: Anatoly Burakov <anatoly.burakov@intel.com>
>>>> ---
>>
>> <...>
>>
>>>> +        DPAA2_BUS_DEBUG("Calling with type=%d, va=%p, 
>>>> virt_addr=0x%" PRIx64 ", iova=0x%" PRIx64 ", map_len=%zu\n",
>>>
>>> I would like to correct this message (80char + rewording) - What do 
>>> you suggest? Should I send a new patch to you or just convey what 
>>> should be changed?
>>>
>>
>> As far as i know, leaving strings on single line is good for grepping. 
>> However, perhaps having PRIx64 etc in there breaks it anyway.
> 
> Yes, that and the debug message was not helpful.
> This is what I had in mind. (DPAA2_BUS_DEBUG doesn't require an extra \n)
> 
> DPAA2_BUS_DEBUG("Request for %s, va=%p, virt_addr=0x%" PRIx64 ","
>          "iova=0x%" PRIx64 ", map_len=%zu",
>          type == RTE_MEM_EVENT_ALLOC? "alloc" : "dealloc",
>          va, virt_addr, iova_addr, map_len);
> 
>>
>>>> +                type, va, virt_addr, iova_addr, map_len);
>>>> +
>>>> +        if (type == RTE_MEM_EVENT_ALLOC)
>>>> +            ret = fslmc_map_dma(virt_addr, iova_addr, map_len);
>>>> +        else
>>>> +            ret = fslmc_unmap_dma(virt_addr, iova_addr, map_len);
>>>> +
>>>> +        if (ret != 0) {
>>>> +            DPAA2_BUS_ERR("DMA Mapping/Unmapping failed. Map=%d, 
>>>> addr=%p, len=%zu, err:(%d)",
>>>> +                    type, va, map_len, ret);
>>>
>>> Same as above. 80 Char issue.
>>
>> Same reasoning - leaving strings unbroken allows for easier grepping 
>> the codebase, but i'm not sure what's our policy on having formatted 
>> strings unbroken.
> 
> My policy is not different, but the various variables being dumped 
> cannot anyway help in grepping - So, keeping the variables on separate 
> lines for 80chars is ok. "DMA Mapping/Unmapping failed." is enough for 
> greps.
> 
>>
>>>
>>>> +            return;
>>>> +        }
>>>> +
>>>> +        cur_len += map_len;
>>>> +    }
>>>> +
>>>> +    if (type == RTE_MEM_EVENT_ALLOC)
>>>> +        DPAA2_BUS_DEBUG("Total Mapped: addr=%p, len=%zu\n",
>>>> +                addr, len);
>>>> +    else
>>
>> <...>
>>
>>>> +    ret = rte_mem_event_callback_register("fslmc_memevent_clb",
>>>> +                          fslmc_memevent_cb);
>>>> +    if (ret && rte_errno == ENOTSUP)
>>>> +        DPAA2_BUS_DEBUG("Memory event callbacks not supported");
>>>> +    else if (ret)
>>>> +        DPAA2_BUS_DEBUG("Unable to install memory handler");
>>>> +    else
>>>> +        DPAA2_BUS_DEBUG("Installed memory callback handler");
>>>>       /* Verifying that at least single segment is available */
>>>>       if (i <= 0) {
>>>> +        /* TODO: Is this verification required any more? What would
>>>> +         * happen to non-legacy case where nothing was preallocated
>>>> +         * thus causing i==0?
>>>> +         */
>>>
>>> And this as well - if call backs are not going to appear in case of 
>>> legacy, this needs to be removed.
>>
>> Callbacks aren't only not going to appear in legacy mode - they will 
>> also not appear on FreeBSD. We check this above, with checking 
>> rte_errno value (if callbacks are not supported, it's set to ENOTSUP, 
>> and having callbacks unsupported is not an error).
>>
>>> let me know how do you want to take these changes.
>>>
>>
>> Now that i think of it, this error condition is wrong. This is called 
>> in both legacy and non-legacy mode. This is bus probe, no? For 
>> non-legacy mode, it is entirely possible to start without any memory 
>> whatsoever. It just so happens that rte_service API allocates some on 
>> init, and hence you always have at least one segment - that may not be 
>> the case forever. So, non-legacy mode, not having memsegs is not an 
>> error, it is expected behavior, so maybe we should remove this error 
>> check altogether.
> 
> Agree - that count was only required in the earlier case. It can be 
> removed.
> 
>>
>>>>           DPAA2_BUS_ERR("No Segments found for VFIO Mapping");
>>>> +        rte_rwlock_read_unlock(mem_lock);
>>>>           return -1;
>>>>       }
>>>>       DPAA2_BUS_DEBUG("Total %d segments found.", i);
>>>> @@ -250,6 +367,11 @@ int rte_fslmc_vfio_dmamap(void)
>>>>        */
>>>>       vfio_map_irq_region(&vfio_group);
>>>> +    /* Existing segments have been mapped and memory callback for 
>>>> hotplug
>>>> +     * has been installed.
>>>> +     */
>>>> +    rte_rwlock_read_unlock(mem_lock);
>>>> +
>>>>       return 0;
>>>>   }
>>>>
>>>
>>>
>>
>>
> 
> I think there are enough changes, even if trivial. Maybe I can rework 
> this patch and send you. If that is inconvenient, just extract from that 
> whatever you want.
> 

There aren't a lot of changes so i'll respin it myself, addressing the 
comments above. Thanks!
  

Patch

diff --git a/drivers/bus/fslmc/fslmc_vfio.c b/drivers/bus/fslmc/fslmc_vfio.c
index db3eb61..dfdd2bc 100644
--- a/drivers/bus/fslmc/fslmc_vfio.c
+++ b/drivers/bus/fslmc/fslmc_vfio.c
@@ -30,6 +30,7 @@ 
 #include <rte_kvargs.h>
 #include <rte_dev.h>
 #include <rte_bus.h>
+#include <rte_eal_memconfig.h>
 
 #include "rte_fslmc.h"
 #include "fslmc_vfio.h"
@@ -188,11 +189,57 @@  static int vfio_map_irq_region(struct fslmc_vfio_group *group)
 	return -errno;
 }
 
+static int fslmc_map_dma(uint64_t vaddr, rte_iova_t iovaddr, size_t len);
+static int fslmc_unmap_dma(uint64_t vaddr, rte_iova_t iovaddr, size_t len);
+
+static void
+fslmc_memevent_cb(enum rte_mem_event type, const void *addr, size_t len)
+{
+	struct rte_memseg_list *msl;
+	struct rte_memseg *ms;
+	size_t cur_len = 0, map_len = 0;
+	uint64_t virt_addr;
+	rte_iova_t iova_addr;
+	int ret;
+
+	msl = rte_mem_virt2memseg_list(addr);
+
+	while (cur_len < len) {
+		const void *va = RTE_PTR_ADD(addr, cur_len);
+
+		ms = rte_mem_virt2memseg(va, msl);
+		iova_addr = ms->iova;
+		virt_addr = ms->addr_64;
+		map_len = ms->len;
+
+		DPAA2_BUS_DEBUG("Calling with type=%d, va=%p, virt_addr=0x%" PRIx64 ", iova=0x%" PRIx64 ", map_len=%zu\n",
+				type, va, virt_addr, iova_addr, map_len);
+
+		if (type == RTE_MEM_EVENT_ALLOC)
+			ret = fslmc_map_dma(virt_addr, iova_addr, map_len);
+		else
+			ret = fslmc_unmap_dma(virt_addr, iova_addr, map_len);
+
+		if (ret != 0) {
+			DPAA2_BUS_ERR("DMA Mapping/Unmapping failed. Map=%d, addr=%p, len=%zu, err:(%d)",
+					type, va, map_len, ret);
+			return;
+		}
+
+		cur_len += map_len;
+	}
+
+	if (type == RTE_MEM_EVENT_ALLOC)
+		DPAA2_BUS_DEBUG("Total Mapped: addr=%p, len=%zu\n",
+				addr, len);
+	else
+		DPAA2_BUS_DEBUG("Total Unmapped: addr=%p, len=%zu\n",
+				addr, len);
+}
+
 static int
-fslmc_vfio_map(const struct rte_memseg_list *msl __rte_unused,
-		const struct rte_memseg *ms, void *arg)
+fslmc_map_dma(uint64_t vaddr, rte_iova_t iovaddr __rte_unused, size_t len)
 {
-	int *n_segs = arg;
 	struct fslmc_vfio_group *group;
 	struct vfio_iommu_type1_dma_map dma_map = {
 		.argsz = sizeof(struct vfio_iommu_type1_dma_map),
@@ -200,10 +247,11 @@  fslmc_vfio_map(const struct rte_memseg_list *msl __rte_unused,
 	};
 	int ret;
 
-	dma_map.size = ms->len;
-	dma_map.vaddr = ms->addr_64;
+	dma_map.size = len;
+	dma_map.vaddr = vaddr;
+
 #ifdef RTE_LIBRTE_DPAA2_USE_PHYS_IOVA
-	dma_map.iova = ms->iova;
+	dma_map.iova = iovaddr;
 #else
 	dma_map.iova = dma_map.vaddr;
 #endif
@@ -216,30 +264,99 @@  fslmc_vfio_map(const struct rte_memseg_list *msl __rte_unused,
 		return -1;
 	}
 
-	DPAA2_BUS_DEBUG("-->Initial SHM Virtual ADDR %llX",
-			dma_map.vaddr);
-	DPAA2_BUS_DEBUG("-----> DMA size 0x%llX", dma_map.size);
-	ret = ioctl(group->container->fd, VFIO_IOMMU_MAP_DMA,
-			&dma_map);
+	DPAA2_BUS_DEBUG("--> Map address: %llX, size: 0x%llX\n",
+			dma_map.vaddr, dma_map.size);
+	ret = ioctl(group->container->fd, VFIO_IOMMU_MAP_DMA, &dma_map);
 	if (ret) {
 		DPAA2_BUS_ERR("VFIO_IOMMU_MAP_DMA API(errno = %d)",
 				errno);
 		return -1;
 	}
-	(*n_segs)++;
+
 	return 0;
 }
 
+static int
+fslmc_unmap_dma(uint64_t vaddr, uint64_t iovaddr __rte_unused, size_t len)
+{
+	struct fslmc_vfio_group *group;
+	struct vfio_iommu_type1_dma_unmap dma_unmap = {
+		.argsz = sizeof(struct vfio_iommu_type1_dma_unmap),
+		.flags = 0,
+	};
+	int ret;
+
+	dma_unmap.size = len;
+	dma_unmap.iova = vaddr;
+
+	/* SET DMA MAP for IOMMU */
+	group = &vfio_group;
+
+	if (!group->container) {
+		DPAA2_BUS_ERR("Container is not connected ");
+		return -1;
+	}
+
+	DPAA2_BUS_DEBUG("--> Unmap address: %llX, size: 0x%llX\n",
+			dma_unmap.iova, dma_unmap.size);
+	ret = ioctl(group->container->fd, VFIO_IOMMU_UNMAP_DMA, &dma_unmap);
+	if (ret) {
+		DPAA2_BUS_ERR("VFIO_IOMMU_UNMAP_DMA API(errno = %d)",
+				errno);
+		return -1;
+	}
+
+	return 0;
+}
+
+static int
+fslmc_dmamap_seg(const struct rte_memseg_list *msl __rte_unused,
+		 const struct rte_memseg *ms, void *arg)
+{
+	int *n_segs = arg;
+	int ret;
+
+	ret = fslmc_map_dma(ms->addr_64, ms->iova, ms->len);
+	if (ret)
+		DPAA2_BUS_ERR("Unable to VFIO map (addr=%p, len=%zu)\n",
+				ms->addr, ms->len);
+	else
+		(*n_segs)++;
+
+	return ret;
+}
+
 int rte_fslmc_vfio_dmamap(void)
 {
-	int i = 0;
+	int i = 0, ret;
+	struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config;
+	rte_rwlock_t *mem_lock = &mcfg->memory_hotplug_lock;
+
+	/* Lock before parsing and registering callback to memory subsystem */
+	rte_rwlock_read_lock(mem_lock);
 
-	if (rte_memseg_walk(fslmc_vfio_map, &i) < 0)
+	if (rte_memseg_walk(fslmc_dmamap_seg, &i) < 0) {
+		rte_rwlock_read_unlock(mem_lock);
 		return -1;
+	}
+
+	ret = rte_mem_event_callback_register("fslmc_memevent_clb",
+					      fslmc_memevent_cb);
+	if (ret && rte_errno == ENOTSUP)
+		DPAA2_BUS_DEBUG("Memory event callbacks not supported");
+	else if (ret)
+		DPAA2_BUS_DEBUG("Unable to install memory handler");
+	else
+		DPAA2_BUS_DEBUG("Installed memory callback handler");
 
 	/* Verifying that at least single segment is available */
 	if (i <= 0) {
+		/* TODO: Is this verification required any more? What would
+		 * happen to non-legacy case where nothing was preallocated
+		 * thus causing i==0?
+		 */
 		DPAA2_BUS_ERR("No Segments found for VFIO Mapping");
+		rte_rwlock_read_unlock(mem_lock);
 		return -1;
 	}
 	DPAA2_BUS_DEBUG("Total %d segments found.", i);
@@ -250,6 +367,11 @@  int rte_fslmc_vfio_dmamap(void)
 	 */
 	vfio_map_irq_region(&vfio_group);
 
+	/* Existing segments have been mapped and memory callback for hotplug
+	 * has been installed.
+	 */
+	rte_rwlock_read_unlock(mem_lock);
+
 	return 0;
 }