[dpdk-dev,v7,3/9] linuxapp/eal_pci: get iommu class

Message ID 20170831032618.7120-4-santosh.shukla@caviumnetworks.com (mailing list archive)
State Superseded, archived
Headers

Checks

Context Check Description
ci/checkpatch success coding style OK
ci/Intel-compilation success Compilation OK

Commit Message

Santosh Shukla Aug. 31, 2017, 3:26 a.m. UTC
  Get iommu class of PCI device on the bus and returns preferred iova
mapping mode for that bus.

Patch also introduces RTE_PCI_DRV_IOVA_AS_VA drv flag.
Flag used when driver needs to operate in iova=va mode.

Algorithm for iova scheme selection for PCI bus:
0. If no device bound then return with RTE_IOVA_DC mapping mode,
else goto 1).
1. Look for device attached to vfio kdrv and has .drv_flag set
to RTE_PCI_DRV_IOVA_AS_VA.
2. Look for any device attached to UIO class of driver.
3. Check for vfio-noiommu mode enabled.

If 2) & 3) is false and 1) is true then select
mapping scheme as RTE_IOVA_VA. Otherwise use default
mapping scheme (RTE_IOVA_PA).

Signed-off-by: Santosh Shukla <santosh.shukla@caviumnetworks.com>
Signed-off-by: Jerin Jacob <jerin.jacob@caviumnetworks.com>
Reviewed-by: Maxime Coquelin <maxime.coquelin@redhat.com>
Acked-by: Hemant Agrawal <hemant.agrawal@nxp.com>
---
v6 --> v7:
- squashed v6 series patch no [01/12] & [05/12]..
  i.e.. moved RTE_PCI_DRV_IOVA_AS_VA flag into this patch. (Aaron comment).

 lib/librte_eal/common/include/rte_pci.h         |  2 +
 lib/librte_eal/linuxapp/eal/eal_pci.c           | 95 +++++++++++++++++++++++++
 lib/librte_eal/linuxapp/eal/eal_vfio.c          | 19 +++++
 lib/librte_eal/linuxapp/eal/eal_vfio.h          |  4 ++
 lib/librte_eal/linuxapp/eal/rte_eal_version.map |  1 +
 5 files changed, 121 insertions(+)
  

Comments

Anatoly Burakov Sept. 4, 2017, 3:08 p.m. UTC | #1
> From: Santosh Shukla [mailto:santosh.shukla@caviumnetworks.com]
> Sent: Thursday, August 31, 2017 4:26 AM
> To: dev@dpdk.org
> Cc: thomas@monjalon.net; jerin.jacob@caviumnetworks.com;
> hemant.agrawal@nxp.com; olivier.matz@6wind.com;
> maxime.coquelin@redhat.com; Gonzalez Monroy, Sergio
> <sergio.gonzalez.monroy@intel.com>; Richardson, Bruce
> <bruce.richardson@intel.com>; shreyansh.jain@nxp.com;
> gaetan.rivet@6wind.com; Burakov, Anatoly <anatoly.burakov@intel.com>;
> stephen@networkplumber.org; aconole@redhat.com; Santosh Shukla
> <santosh.shukla@caviumnetworks.com>
> Subject: [PATCH v7 3/9] linuxapp/eal_pci: get iommu class
> 
> Get iommu class of PCI device on the bus and returns preferred iova
> mapping mode for that bus.
> 
> Patch also introduces RTE_PCI_DRV_IOVA_AS_VA drv flag.
> Flag used when driver needs to operate in iova=va mode.
> 
> Algorithm for iova scheme selection for PCI bus:
> 0. If no device bound then return with RTE_IOVA_DC mapping mode, else
> goto 1).
> 1. Look for device attached to vfio kdrv and has .drv_flag set to
> RTE_PCI_DRV_IOVA_AS_VA.
> 2. Look for any device attached to UIO class of driver.
> 3. Check for vfio-noiommu mode enabled.
> 
> If 2) & 3) is false and 1) is true then select mapping scheme as RTE_IOVA_VA.
> Otherwise use default mapping scheme (RTE_IOVA_PA).
> 
> Signed-off-by: Santosh Shukla <santosh.shukla@caviumnetworks.com>
> Signed-off-by: Jerin Jacob <jerin.jacob@caviumnetworks.com>
> Reviewed-by: Maxime Coquelin <maxime.coquelin@redhat.com>
> Acked-by: Hemant Agrawal <hemant.agrawal@nxp.com>
> ---
> v6 --> v7:
> - squashed v6 series patch no [01/12] & [05/12]..
>   i.e.. moved RTE_PCI_DRV_IOVA_AS_VA flag into this patch. (Aaron
> comment).
> 
>  lib/librte_eal/common/include/rte_pci.h         |  2 +
>  lib/librte_eal/linuxapp/eal/eal_pci.c           | 95
> +++++++++++++++++++++++++
>  lib/librte_eal/linuxapp/eal/eal_vfio.c          | 19 +++++
>  lib/librte_eal/linuxapp/eal/eal_vfio.h          |  4 ++
>  lib/librte_eal/linuxapp/eal/rte_eal_version.map |  1 +
>  5 files changed, 121 insertions(+)
> 
> diff --git a/lib/librte_eal/common/include/rte_pci.h
> b/lib/librte_eal/common/include/rte_pci.h
> index 0e36de093..a67d77f22 100644
> --- a/lib/librte_eal/common/include/rte_pci.h
> +++ b/lib/librte_eal/common/include/rte_pci.h
> @@ -202,6 +202,8 @@ struct rte_pci_bus {  #define
> RTE_PCI_DRV_INTR_RMV 0x0010
>  /** Device driver needs to keep mapped resources if unsupported dev
> detected */  #define RTE_PCI_DRV_KEEP_MAPPED_RES 0x0020
> +/** Device driver supports iova as va */ #define
> RTE_PCI_DRV_IOVA_AS_VA
> +0X0040
> 
>  /**
>   * A structure describing a PCI mapping.
> diff --git a/lib/librte_eal/linuxapp/eal/eal_pci.c
> b/lib/librte_eal/linuxapp/eal/eal_pci.c
> index 8951ce742..9725fd493 100644
> --- a/lib/librte_eal/linuxapp/eal/eal_pci.c
> +++ b/lib/librte_eal/linuxapp/eal/eal_pci.c
> @@ -45,6 +45,7 @@
>  #include "eal_filesystem.h"
>  #include "eal_private.h"
>  #include "eal_pci_init.h"
> +#include "eal_vfio.h"
> 
>  /**
>   * @file
> @@ -487,6 +488,100 @@ rte_pci_scan(void)
>  	return -1;
>  }
> 
> +/*
> + * Is pci device bound to any kdrv
> + */
> +static inline int
> +pci_device_is_bound(void)
> +{
> +	struct rte_pci_device *dev = NULL;
> +	int ret = 0;
> +
> +	FOREACH_DEVICE_ON_PCIBUS(dev) {
> +		if (dev->kdrv == RTE_KDRV_UNKNOWN ||
> +		    dev->kdrv == RTE_KDRV_NONE) {
> +			continue;
> +		} else {
> +			ret = 1;
> +			break;
> +		}
> +	}
> +	return ret;
> +}
> +
> +/*
> + * Any one of the device bound to uio
> + */
> +static inline int
> +pci_device_bound_uio(void)
> +{
> +	struct rte_pci_device *dev = NULL;
> +
> +	FOREACH_DEVICE_ON_PCIBUS(dev) {
> +		if (dev->kdrv == RTE_KDRV_IGB_UIO ||
> +		   dev->kdrv == RTE_KDRV_UIO_GENERIC) {
> +			return 1;
> +		}
> +	}
> +	return 0;
> +}
> +
> +/*
> + * Any one of the device has iova as va  */ static inline int
> +pci_device_has_iova_va(void)
> +{
> +	struct rte_pci_device *dev = NULL;
> +	struct rte_pci_driver *drv = NULL;
> +
> +	FOREACH_DRIVER_ON_PCIBUS(drv) {
> +		if (drv && drv->drv_flags & RTE_PCI_DRV_IOVA_AS_VA) {
> +			FOREACH_DEVICE_ON_PCIBUS(dev) {
> +				if (dev->kdrv == RTE_KDRV_VFIO &&
> +				    rte_pci_match(drv, dev))
> +					return 1;
> +			}
> +		}
> +	}
> +	return 0;
> +}
> +
> +/*
> + * Get iommu class of PCI devices on the bus.
> + */
> +enum rte_iova_mode
> +rte_pci_get_iommu_class(void)
> +{
> +	bool is_bound;
> +	bool is_vfio_noiommu_enabled = true;
> +	bool has_iova_va;
> +	bool is_bound_uio;
> +
> +	is_bound = pci_device_is_bound();
> +	if (!is_bound)
> +		return RTE_IOVA_DC;
> +
> +	has_iova_va = pci_device_has_iova_va();
> +	is_bound_uio = pci_device_bound_uio(); #ifdef VFIO_PRESENT
> +	is_vfio_noiommu_enabled = vfio_noiommu_is_enabled() == 1 ? 1 :
> 0;

If you specify is_vfio_noiommu_enabled as bool, you should probably treat it as such, and assign true/false.

Other than that, I'm curious why is it always set to "true" by default? If we don't have VFIO compiled, it seems like the error message would always complain about vfio-noiommu mode being enabled, which is confusing.

Thanks,
Anatoly
  
Santosh Shukla Sept. 5, 2017, 8:47 a.m. UTC | #2
Hi Anatoly,


On Monday 04 September 2017 08:38 PM, Burakov, Anatoly wrote:
>> From: Santosh Shukla [mailto:santosh.shukla@caviumnetworks.com]
>> Sent: Thursday, August 31, 2017 4:26 AM
>> To: dev@dpdk.org
>> Cc: thomas@monjalon.net; jerin.jacob@caviumnetworks.com;
>> hemant.agrawal@nxp.com; olivier.matz@6wind.com;
>> maxime.coquelin@redhat.com; Gonzalez Monroy, Sergio
>> <sergio.gonzalez.monroy@intel.com>; Richardson, Bruce
>> <bruce.richardson@intel.com>; shreyansh.jain@nxp.com;
>> gaetan.rivet@6wind.com; Burakov, Anatoly <anatoly.burakov@intel.com>;
>> stephen@networkplumber.org; aconole@redhat.com; Santosh Shukla
>> <santosh.shukla@caviumnetworks.com>
>> Subject: [PATCH v7 3/9] linuxapp/eal_pci: get iommu class
>>
>> Get iommu class of PCI device on the bus and returns preferred iova
>> mapping mode for that bus.
>>
>> Patch also introduces RTE_PCI_DRV_IOVA_AS_VA drv flag.
>> Flag used when driver needs to operate in iova=va mode.
>>
>> Algorithm for iova scheme selection for PCI bus:
>> 0. If no device bound then return with RTE_IOVA_DC mapping mode, else
>> goto 1).
>> 1. Look for device attached to vfio kdrv and has .drv_flag set to
>> RTE_PCI_DRV_IOVA_AS_VA.
>> 2. Look for any device attached to UIO class of driver.
>> 3. Check for vfio-noiommu mode enabled.
>>
>> If 2) & 3) is false and 1) is true then select mapping scheme as RTE_IOVA_VA.
>> Otherwise use default mapping scheme (RTE_IOVA_PA).
>>
>> Signed-off-by: Santosh Shukla <santosh.shukla@caviumnetworks.com>
>> Signed-off-by: Jerin Jacob <jerin.jacob@caviumnetworks.com>
>> Reviewed-by: Maxime Coquelin <maxime.coquelin@redhat.com>
>> Acked-by: Hemant Agrawal <hemant.agrawal@nxp.com>
>> ---
>> v6 --> v7:
>> - squashed v6 series patch no [01/12] & [05/12]..
>>   i.e.. moved RTE_PCI_DRV_IOVA_AS_VA flag into this patch. (Aaron
>> comment).
>>
>>  lib/librte_eal/common/include/rte_pci.h         |  2 +
>>  lib/librte_eal/linuxapp/eal/eal_pci.c           | 95
>> +++++++++++++++++++++++++
>>  lib/librte_eal/linuxapp/eal/eal_vfio.c          | 19 +++++
>>  lib/librte_eal/linuxapp/eal/eal_vfio.h          |  4 ++
>>  lib/librte_eal/linuxapp/eal/rte_eal_version.map |  1 +
>>  5 files changed, 121 insertions(+)
>>
>> diff --git a/lib/librte_eal/common/include/rte_pci.h
>> b/lib/librte_eal/common/include/rte_pci.h
>> index 0e36de093..a67d77f22 100644
>> --- a/lib/librte_eal/common/include/rte_pci.h
>> +++ b/lib/librte_eal/common/include/rte_pci.h
>> @@ -202,6 +202,8 @@ struct rte_pci_bus {  #define
>> RTE_PCI_DRV_INTR_RMV 0x0010
>>  /** Device driver needs to keep mapped resources if unsupported dev
>> detected */  #define RTE_PCI_DRV_KEEP_MAPPED_RES 0x0020
>> +/** Device driver supports iova as va */ #define
>> RTE_PCI_DRV_IOVA_AS_VA
>> +0X0040
>>
>>  /**
>>   * A structure describing a PCI mapping.
>> diff --git a/lib/librte_eal/linuxapp/eal/eal_pci.c
>> b/lib/librte_eal/linuxapp/eal/eal_pci.c
>> index 8951ce742..9725fd493 100644
>> --- a/lib/librte_eal/linuxapp/eal/eal_pci.c
>> +++ b/lib/librte_eal/linuxapp/eal/eal_pci.c
>> @@ -45,6 +45,7 @@
>>  #include "eal_filesystem.h"
>>  #include "eal_private.h"
>>  #include "eal_pci_init.h"
>> +#include "eal_vfio.h"
>>
>>  /**
>>   * @file
>> @@ -487,6 +488,100 @@ rte_pci_scan(void)
>>  	return -1;
>>  }
>>
>> +/*
>> + * Is pci device bound to any kdrv
>> + */
>> +static inline int
>> +pci_device_is_bound(void)
>> +{
>> +	struct rte_pci_device *dev = NULL;
>> +	int ret = 0;
>> +
>> +	FOREACH_DEVICE_ON_PCIBUS(dev) {
>> +		if (dev->kdrv == RTE_KDRV_UNKNOWN ||
>> +		    dev->kdrv == RTE_KDRV_NONE) {
>> +			continue;
>> +		} else {
>> +			ret = 1;
>> +			break;
>> +		}
>> +	}
>> +	return ret;
>> +}
>> +
>> +/*
>> + * Any one of the device bound to uio
>> + */
>> +static inline int
>> +pci_device_bound_uio(void)
>> +{
>> +	struct rte_pci_device *dev = NULL;
>> +
>> +	FOREACH_DEVICE_ON_PCIBUS(dev) {
>> +		if (dev->kdrv == RTE_KDRV_IGB_UIO ||
>> +		   dev->kdrv == RTE_KDRV_UIO_GENERIC) {
>> +			return 1;
>> +		}
>> +	}
>> +	return 0;
>> +}
>> +
>> +/*
>> + * Any one of the device has iova as va  */ static inline int
>> +pci_device_has_iova_va(void)
>> +{
>> +	struct rte_pci_device *dev = NULL;
>> +	struct rte_pci_driver *drv = NULL;
>> +
>> +	FOREACH_DRIVER_ON_PCIBUS(drv) {
>> +		if (drv && drv->drv_flags & RTE_PCI_DRV_IOVA_AS_VA) {
>> +			FOREACH_DEVICE_ON_PCIBUS(dev) {
>> +				if (dev->kdrv == RTE_KDRV_VFIO &&
>> +				    rte_pci_match(drv, dev))
>> +					return 1;
>> +			}
>> +		}
>> +	}
>> +	return 0;
>> +}
>> +
>> +/*
>> + * Get iommu class of PCI devices on the bus.
>> + */
>> +enum rte_iova_mode
>> +rte_pci_get_iommu_class(void)
>> +{
>> +	bool is_bound;
>> +	bool is_vfio_noiommu_enabled = true;
>> +	bool has_iova_va;
>> +	bool is_bound_uio;
>> +
>> +	is_bound = pci_device_is_bound();
>> +	if (!is_bound)
>> +		return RTE_IOVA_DC;
>> +
>> +	has_iova_va = pci_device_has_iova_va();
>> +	is_bound_uio = pci_device_bound_uio(); #ifdef VFIO_PRESENT
>> +	is_vfio_noiommu_enabled = vfio_noiommu_is_enabled() == 1 ? 1 :
>> 0;
> If you specify is_vfio_noiommu_enabled as bool, you should probably treat it as such, and assign true/false.

queued for v8.

> Other than that, I'm curious why is it always set to "true" by default? If we don't have VFIO compiled, it seems like the error message would always complain about vfio-noiommu mode being enabled, which is confusing.

Set to 'true' for case when VFIO_PRESENT unset.. meaning platform
doesn't support VFIO (linux versioned < 3.6) 
i.e.. using UIO - In that case, flag makes sure _pa policy selected.

On error message: It won't come in non-vfio case, as 'has_iova_va' will set to 0.
Error message will show for those case where few device out of many bind to uio, so
message will pop-up and iova policy would be _pa in that case.

Thanks.

> Thanks,
> Anatoly
  
Anatoly Burakov Sept. 5, 2017, 8:55 a.m. UTC | #3
> From: santosh [mailto:santosh.shukla@caviumnetworks.com]
> Sent: Tuesday, September 5, 2017 9:48 AM
> To: Burakov, Anatoly <anatoly.burakov@intel.com>; dev@dpdk.org
> Cc: thomas@monjalon.net; jerin.jacob@caviumnetworks.com;
> hemant.agrawal@nxp.com; olivier.matz@6wind.com;
> maxime.coquelin@redhat.com; Gonzalez Monroy, Sergio
> <sergio.gonzalez.monroy@intel.com>; Richardson, Bruce
> <bruce.richardson@intel.com>; shreyansh.jain@nxp.com;
> gaetan.rivet@6wind.com; stephen@networkplumber.org;
> aconole@redhat.com
> Subject: Re: [PATCH v7 3/9] linuxapp/eal_pci: get iommu class
> 
> Hi Anatoly,
> 
> 
> On Monday 04 September 2017 08:38 PM, Burakov, Anatoly wrote:
> >> From: Santosh Shukla [mailto:santosh.shukla@caviumnetworks.com]
> >> Sent: Thursday, August 31, 2017 4:26 AM
> >> To: dev@dpdk.org
> >> Cc: thomas@monjalon.net; jerin.jacob@caviumnetworks.com;
> >> hemant.agrawal@nxp.com; olivier.matz@6wind.com;
> >> maxime.coquelin@redhat.com; Gonzalez Monroy, Sergio
> >> <sergio.gonzalez.monroy@intel.com>; Richardson, Bruce
> >> <bruce.richardson@intel.com>; shreyansh.jain@nxp.com;
> >> gaetan.rivet@6wind.com; Burakov, Anatoly
> <anatoly.burakov@intel.com>;
> >> stephen@networkplumber.org; aconole@redhat.com; Santosh Shukla
> >> <santosh.shukla@caviumnetworks.com>
> >> Subject: [PATCH v7 3/9] linuxapp/eal_pci: get iommu class
> >>
> >> Get iommu class of PCI device on the bus and returns preferred iova
> >> mapping mode for that bus.
> >>
> >> Patch also introduces RTE_PCI_DRV_IOVA_AS_VA drv flag.
> >> Flag used when driver needs to operate in iova=va mode.
> >>
> >> Algorithm for iova scheme selection for PCI bus:
> >> 0. If no device bound then return with RTE_IOVA_DC mapping mode, else
> >> goto 1).
> >> 1. Look for device attached to vfio kdrv and has .drv_flag set to
> >> RTE_PCI_DRV_IOVA_AS_VA.
> >> 2. Look for any device attached to UIO class of driver.
> >> 3. Check for vfio-noiommu mode enabled.
> >>
> >> If 2) & 3) is false and 1) is true then select mapping scheme as
> RTE_IOVA_VA.
> >> Otherwise use default mapping scheme (RTE_IOVA_PA).
> >>
> >> Signed-off-by: Santosh Shukla <santosh.shukla@caviumnetworks.com>
> >> Signed-off-by: Jerin Jacob <jerin.jacob@caviumnetworks.com>
> >> Reviewed-by: Maxime Coquelin <maxime.coquelin@redhat.com>
> >> Acked-by: Hemant Agrawal <hemant.agrawal@nxp.com>
> >> ---
> >> v6 --> v7:
> >> - squashed v6 series patch no [01/12] & [05/12]..
> >>   i.e.. moved RTE_PCI_DRV_IOVA_AS_VA flag into this patch. (Aaron
> >> comment).
> >>
> >>  lib/librte_eal/common/include/rte_pci.h         |  2 +
> >>  lib/librte_eal/linuxapp/eal/eal_pci.c           | 95
> >> +++++++++++++++++++++++++
> >>  lib/librte_eal/linuxapp/eal/eal_vfio.c          | 19 +++++
> >>  lib/librte_eal/linuxapp/eal/eal_vfio.h          |  4 ++
> >>  lib/librte_eal/linuxapp/eal/rte_eal_version.map |  1 +
> >>  5 files changed, 121 insertions(+)
> >>
> >> diff --git a/lib/librte_eal/common/include/rte_pci.h
> >> b/lib/librte_eal/common/include/rte_pci.h
> >> index 0e36de093..a67d77f22 100644
> >> --- a/lib/librte_eal/common/include/rte_pci.h
> >> +++ b/lib/librte_eal/common/include/rte_pci.h
> >> @@ -202,6 +202,8 @@ struct rte_pci_bus {  #define
> >> RTE_PCI_DRV_INTR_RMV 0x0010
> >>  /** Device driver needs to keep mapped resources if unsupported dev
> >> detected */  #define RTE_PCI_DRV_KEEP_MAPPED_RES 0x0020
> >> +/** Device driver supports iova as va */ #define
> >> RTE_PCI_DRV_IOVA_AS_VA
> >> +0X0040
> >>
> >>  /**
> >>   * A structure describing a PCI mapping.
> >> diff --git a/lib/librte_eal/linuxapp/eal/eal_pci.c
> >> b/lib/librte_eal/linuxapp/eal/eal_pci.c
> >> index 8951ce742..9725fd493 100644
> >> --- a/lib/librte_eal/linuxapp/eal/eal_pci.c
> >> +++ b/lib/librte_eal/linuxapp/eal/eal_pci.c
> >> @@ -45,6 +45,7 @@
> >>  #include "eal_filesystem.h"
> >>  #include "eal_private.h"
> >>  #include "eal_pci_init.h"
> >> +#include "eal_vfio.h"
> >>
> >>  /**
> >>   * @file
> >> @@ -487,6 +488,100 @@ rte_pci_scan(void)
> >>  	return -1;
> >>  }
> >>
> >> +/*
> >> + * Is pci device bound to any kdrv
> >> + */
> >> +static inline int
> >> +pci_device_is_bound(void)
> >> +{
> >> +	struct rte_pci_device *dev = NULL;
> >> +	int ret = 0;
> >> +
> >> +	FOREACH_DEVICE_ON_PCIBUS(dev) {
> >> +		if (dev->kdrv == RTE_KDRV_UNKNOWN ||
> >> +		    dev->kdrv == RTE_KDRV_NONE) {
> >> +			continue;
> >> +		} else {
> >> +			ret = 1;
> >> +			break;
> >> +		}
> >> +	}
> >> +	return ret;
> >> +}
> >> +
> >> +/*
> >> + * Any one of the device bound to uio  */ static inline int
> >> +pci_device_bound_uio(void)
> >> +{
> >> +	struct rte_pci_device *dev = NULL;
> >> +
> >> +	FOREACH_DEVICE_ON_PCIBUS(dev) {
> >> +		if (dev->kdrv == RTE_KDRV_IGB_UIO ||
> >> +		   dev->kdrv == RTE_KDRV_UIO_GENERIC) {
> >> +			return 1;
> >> +		}
> >> +	}
> >> +	return 0;
> >> +}
> >> +
> >> +/*
> >> + * Any one of the device has iova as va  */ static inline int
> >> +pci_device_has_iova_va(void)
> >> +{
> >> +	struct rte_pci_device *dev = NULL;
> >> +	struct rte_pci_driver *drv = NULL;
> >> +
> >> +	FOREACH_DRIVER_ON_PCIBUS(drv) {
> >> +		if (drv && drv->drv_flags & RTE_PCI_DRV_IOVA_AS_VA) {
> >> +			FOREACH_DEVICE_ON_PCIBUS(dev) {
> >> +				if (dev->kdrv == RTE_KDRV_VFIO &&
> >> +				    rte_pci_match(drv, dev))
> >> +					return 1;
> >> +			}
> >> +		}
> >> +	}
> >> +	return 0;
> >> +}
> >> +
> >> +/*
> >> + * Get iommu class of PCI devices on the bus.
> >> + */
> >> +enum rte_iova_mode
> >> +rte_pci_get_iommu_class(void)
> >> +{
> >> +	bool is_bound;
> >> +	bool is_vfio_noiommu_enabled = true;
> >> +	bool has_iova_va;
> >> +	bool is_bound_uio;
> >> +
> >> +	is_bound = pci_device_is_bound();
> >> +	if (!is_bound)
> >> +		return RTE_IOVA_DC;
> >> +
> >> +	has_iova_va = pci_device_has_iova_va();
> >> +	is_bound_uio = pci_device_bound_uio(); #ifdef VFIO_PRESENT
> >> +	is_vfio_noiommu_enabled = vfio_noiommu_is_enabled() == 1 ? 1 :
> >> 0;
> > If you specify is_vfio_noiommu_enabled as bool, you should probably treat
> it as such, and assign true/false.
> 
> queued for v8.
> 
> > Other than that, I'm curious why is it always set to "true" by default? If we
> don't have VFIO compiled, it seems like the error message would always
> complain about vfio-noiommu mode being enabled, which is confusing.
> 
> Set to 'true' for case when VFIO_PRESENT unset.. meaning platform doesn't
> support VFIO (linux versioned < 3.6) i.e.. using UIO - In that case, flag makes
> sure _pa policy selected.
> 
> On error message: It won't come in non-vfio case, as 'has_iova_va' will set to
> 0.
> Error message will show for those case where few device out of many bind
> to uio, so message will pop-up and iova policy would be _pa in that case.
> 
> Thanks.

Right. My apologies, I misunderstood the meaning of "has_iova_va" flag.

Thanks,
Anatoly

> 
> > Thanks,
> > Anatoly
  
Santosh Shukla Sept. 5, 2017, 8:59 a.m. UTC | #4
Hi Anatoly,


On Tuesday 05 September 2017 02:25 PM, Burakov, Anatoly wrote:
>> From: santosh [mailto:santosh.shukla@caviumnetworks.com]
>> Sent: Tuesday, September 5, 2017 9:48 AM
>> To: Burakov, Anatoly <anatoly.burakov@intel.com>; dev@dpdk.org
>> Cc: thomas@monjalon.net; jerin.jacob@caviumnetworks.com;
>> hemant.agrawal@nxp.com; olivier.matz@6wind.com;
>> maxime.coquelin@redhat.com; Gonzalez Monroy, Sergio
>> <sergio.gonzalez.monroy@intel.com>; Richardson, Bruce
>> <bruce.richardson@intel.com>; shreyansh.jain@nxp.com;
>> gaetan.rivet@6wind.com; stephen@networkplumber.org;
>> aconole@redhat.com
>> Subject: Re: [PATCH v7 3/9] linuxapp/eal_pci: get iommu class
>>
>> Hi Anatoly,
>>
>>
>> On Monday 04 September 2017 08:38 PM, Burakov, Anatoly wrote:
>>>> From: Santosh Shukla [mailto:santosh.shukla@caviumnetworks.com]
>>>> Sent: Thursday, August 31, 2017 4:26 AM
>>>> To: dev@dpdk.org
>>>> Cc: thomas@monjalon.net; jerin.jacob@caviumnetworks.com;
>>>> hemant.agrawal@nxp.com; olivier.matz@6wind.com;
>>>> maxime.coquelin@redhat.com; Gonzalez Monroy, Sergio
>>>> <sergio.gonzalez.monroy@intel.com>; Richardson, Bruce
>>>> <bruce.richardson@intel.com>; shreyansh.jain@nxp.com;
>>>> gaetan.rivet@6wind.com; Burakov, Anatoly
>> <anatoly.burakov@intel.com>;
>>>> stephen@networkplumber.org; aconole@redhat.com; Santosh Shukla
>>>> <santosh.shukla@caviumnetworks.com>
>>>> Subject: [PATCH v7 3/9] linuxapp/eal_pci: get iommu class
>>>>
>>>> Get iommu class of PCI device on the bus and returns preferred iova
>>>> mapping mode for that bus.
>>>>
>>>> Patch also introduces RTE_PCI_DRV_IOVA_AS_VA drv flag.
>>>> Flag used when driver needs to operate in iova=va mode.
>>>>
>>>> Algorithm for iova scheme selection for PCI bus:
>>>> 0. If no device bound then return with RTE_IOVA_DC mapping mode, else
>>>> goto 1).
>>>> 1. Look for device attached to vfio kdrv and has .drv_flag set to
>>>> RTE_PCI_DRV_IOVA_AS_VA.
>>>> 2. Look for any device attached to UIO class of driver.
>>>> 3. Check for vfio-noiommu mode enabled.
>>>>
>>>> If 2) & 3) is false and 1) is true then select mapping scheme as
>> RTE_IOVA_VA.
>>>> Otherwise use default mapping scheme (RTE_IOVA_PA).
>>>>
>>>> Signed-off-by: Santosh Shukla <santosh.shukla@caviumnetworks.com>
>>>> Signed-off-by: Jerin Jacob <jerin.jacob@caviumnetworks.com>
>>>> Reviewed-by: Maxime Coquelin <maxime.coquelin@redhat.com>
>>>> Acked-by: Hemant Agrawal <hemant.agrawal@nxp.com>
>>>> ---
>>>> v6 --> v7:
>>>> - squashed v6 series patch no [01/12] & [05/12]..
>>>>   i.e.. moved RTE_PCI_DRV_IOVA_AS_VA flag into this patch. (Aaron
>>>> comment).
>>>>
>>>>  lib/librte_eal/common/include/rte_pci.h         |  2 +
>>>>  lib/librte_eal/linuxapp/eal/eal_pci.c           | 95
>>>> +++++++++++++++++++++++++
>>>>  lib/librte_eal/linuxapp/eal/eal_vfio.c          | 19 +++++
>>>>  lib/librte_eal/linuxapp/eal/eal_vfio.h          |  4 ++
>>>>  lib/librte_eal/linuxapp/eal/rte_eal_version.map |  1 +
>>>>  5 files changed, 121 insertions(+)
>>>>
>>>> diff --git a/lib/librte_eal/common/include/rte_pci.h
>>>> b/lib/librte_eal/common/include/rte_pci.h
>>>> index 0e36de093..a67d77f22 100644
>>>> --- a/lib/librte_eal/common/include/rte_pci.h
>>>> +++ b/lib/librte_eal/common/include/rte_pci.h
>>>> @@ -202,6 +202,8 @@ struct rte_pci_bus {  #define
>>>> RTE_PCI_DRV_INTR_RMV 0x0010
>>>>  /** Device driver needs to keep mapped resources if unsupported dev
>>>> detected */  #define RTE_PCI_DRV_KEEP_MAPPED_RES 0x0020
>>>> +/** Device driver supports iova as va */ #define
>>>> RTE_PCI_DRV_IOVA_AS_VA
>>>> +0X0040
>>>>
>>>>  /**
>>>>   * A structure describing a PCI mapping.
>>>> diff --git a/lib/librte_eal/linuxapp/eal/eal_pci.c
>>>> b/lib/librte_eal/linuxapp/eal/eal_pci.c
>>>> index 8951ce742..9725fd493 100644
>>>> --- a/lib/librte_eal/linuxapp/eal/eal_pci.c
>>>> +++ b/lib/librte_eal/linuxapp/eal/eal_pci.c
>>>> @@ -45,6 +45,7 @@
>>>>  #include "eal_filesystem.h"
>>>>  #include "eal_private.h"
>>>>  #include "eal_pci_init.h"
>>>> +#include "eal_vfio.h"
>>>>
>>>>  /**
>>>>   * @file
>>>> @@ -487,6 +488,100 @@ rte_pci_scan(void)
>>>>  	return -1;
>>>>  }
>>>>
>>>> +/*
>>>> + * Is pci device bound to any kdrv
>>>> + */
>>>> +static inline int
>>>> +pci_device_is_bound(void)
>>>> +{
>>>> +	struct rte_pci_device *dev = NULL;
>>>> +	int ret = 0;
>>>> +
>>>> +	FOREACH_DEVICE_ON_PCIBUS(dev) {
>>>> +		if (dev->kdrv == RTE_KDRV_UNKNOWN ||
>>>> +		    dev->kdrv == RTE_KDRV_NONE) {
>>>> +			continue;
>>>> +		} else {
>>>> +			ret = 1;
>>>> +			break;
>>>> +		}
>>>> +	}
>>>> +	return ret;
>>>> +}
>>>> +
>>>> +/*
>>>> + * Any one of the device bound to uio  */ static inline int
>>>> +pci_device_bound_uio(void)
>>>> +{
>>>> +	struct rte_pci_device *dev = NULL;
>>>> +
>>>> +	FOREACH_DEVICE_ON_PCIBUS(dev) {
>>>> +		if (dev->kdrv == RTE_KDRV_IGB_UIO ||
>>>> +		   dev->kdrv == RTE_KDRV_UIO_GENERIC) {
>>>> +			return 1;
>>>> +		}
>>>> +	}
>>>> +	return 0;
>>>> +}
>>>> +
>>>> +/*
>>>> + * Any one of the device has iova as va  */ static inline int
>>>> +pci_device_has_iova_va(void)
>>>> +{
>>>> +	struct rte_pci_device *dev = NULL;
>>>> +	struct rte_pci_driver *drv = NULL;
>>>> +
>>>> +	FOREACH_DRIVER_ON_PCIBUS(drv) {
>>>> +		if (drv && drv->drv_flags & RTE_PCI_DRV_IOVA_AS_VA) {
>>>> +			FOREACH_DEVICE_ON_PCIBUS(dev) {
>>>> +				if (dev->kdrv == RTE_KDRV_VFIO &&
>>>> +				    rte_pci_match(drv, dev))
>>>> +					return 1;
>>>> +			}
>>>> +		}
>>>> +	}
>>>> +	return 0;
>>>> +}
>>>> +
>>>> +/*
>>>> + * Get iommu class of PCI devices on the bus.
>>>> + */
>>>> +enum rte_iova_mode
>>>> +rte_pci_get_iommu_class(void)
>>>> +{
>>>> +	bool is_bound;
>>>> +	bool is_vfio_noiommu_enabled = true;
>>>> +	bool has_iova_va;
>>>> +	bool is_bound_uio;
>>>> +
>>>> +	is_bound = pci_device_is_bound();
>>>> +	if (!is_bound)
>>>> +		return RTE_IOVA_DC;
>>>> +
>>>> +	has_iova_va = pci_device_has_iova_va();
>>>> +	is_bound_uio = pci_device_bound_uio(); #ifdef VFIO_PRESENT
>>>> +	is_vfio_noiommu_enabled = vfio_noiommu_is_enabled() == 1 ? 1 :
>>>> 0;
>>> If you specify is_vfio_noiommu_enabled as bool, you should probably treat
>> it as such, and assign true/false.
>>
>> queued for v8.
>>
>>> Other than that, I'm curious why is it always set to "true" by default? If we
>> don't have VFIO compiled, it seems like the error message would always
>> complain about vfio-noiommu mode being enabled, which is confusing.
>>
>> Set to 'true' for case when VFIO_PRESENT unset.. meaning platform doesn't
>> support VFIO (linux versioned < 3.6) i.e.. using UIO - In that case, flag makes
>> sure _pa policy selected.
>>
>> On error message: It won't come in non-vfio case, as 'has_iova_va' will set to
>> 0.
>> Error message will show for those case where few device out of many bind
>> to uio, so message will pop-up and iova policy would be _pa in that case.
>>
>> Thanks.
> Right. My apologies, I misunderstood the meaning of "has_iova_va" flag.

No worry ;). Thanks for review feedback and looking into v7 series.

Can I collect your reviewed-by: for [3/9]?

Thanks. 

> Thanks,
> Anatoly
>
>>> Thanks,
>>> Anatoly
  
Anatoly Burakov Sept. 5, 2017, 9:01 a.m. UTC | #5
> From: Santosh Shukla [mailto:santosh.shukla@caviumnetworks.com]
> Sent: Thursday, August 31, 2017 4:26 AM
> To: dev@dpdk.org
> Cc: thomas@monjalon.net; jerin.jacob@caviumnetworks.com;
> hemant.agrawal@nxp.com; olivier.matz@6wind.com;
> maxime.coquelin@redhat.com; Gonzalez Monroy, Sergio
> <sergio.gonzalez.monroy@intel.com>; Richardson, Bruce
> <bruce.richardson@intel.com>; shreyansh.jain@nxp.com;
> gaetan.rivet@6wind.com; Burakov, Anatoly <anatoly.burakov@intel.com>;
> stephen@networkplumber.org; aconole@redhat.com; Santosh Shukla
> <santosh.shukla@caviumnetworks.com>
> Subject: [PATCH v7 3/9] linuxapp/eal_pci: get iommu class
> 
> Get iommu class of PCI device on the bus and returns preferred iova
> mapping mode for that bus.
> 
> Patch also introduces RTE_PCI_DRV_IOVA_AS_VA drv flag.
> Flag used when driver needs to operate in iova=va mode.
> 
> Algorithm for iova scheme selection for PCI bus:
> 0. If no device bound then return with RTE_IOVA_DC mapping mode, else
> goto 1).
> 1. Look for device attached to vfio kdrv and has .drv_flag set to
> RTE_PCI_DRV_IOVA_AS_VA.
> 2. Look for any device attached to UIO class of driver.
> 3. Check for vfio-noiommu mode enabled.
> 
> If 2) & 3) is false and 1) is true then select mapping scheme as RTE_IOVA_VA.
> Otherwise use default mapping scheme (RTE_IOVA_PA).
> 
> Signed-off-by: Santosh Shukla <santosh.shukla@caviumnetworks.com>
> Signed-off-by: Jerin Jacob <jerin.jacob@caviumnetworks.com>
> Reviewed-by: Maxime Coquelin <maxime.coquelin@redhat.com>
> Acked-by: Hemant Agrawal <hemant.agrawal@nxp.com>
> ---

Reviewed-by: Anatoly Burakov <anatoly.burakov@intel.com>
  

Patch

diff --git a/lib/librte_eal/common/include/rte_pci.h b/lib/librte_eal/common/include/rte_pci.h
index 0e36de093..a67d77f22 100644
--- a/lib/librte_eal/common/include/rte_pci.h
+++ b/lib/librte_eal/common/include/rte_pci.h
@@ -202,6 +202,8 @@  struct rte_pci_bus {
 #define RTE_PCI_DRV_INTR_RMV 0x0010
 /** Device driver needs to keep mapped resources if unsupported dev detected */
 #define RTE_PCI_DRV_KEEP_MAPPED_RES 0x0020
+/** Device driver supports iova as va */
+#define RTE_PCI_DRV_IOVA_AS_VA 0X0040
 
 /**
  * A structure describing a PCI mapping.
diff --git a/lib/librte_eal/linuxapp/eal/eal_pci.c b/lib/librte_eal/linuxapp/eal/eal_pci.c
index 8951ce742..9725fd493 100644
--- a/lib/librte_eal/linuxapp/eal/eal_pci.c
+++ b/lib/librte_eal/linuxapp/eal/eal_pci.c
@@ -45,6 +45,7 @@ 
 #include "eal_filesystem.h"
 #include "eal_private.h"
 #include "eal_pci_init.h"
+#include "eal_vfio.h"
 
 /**
  * @file
@@ -487,6 +488,100 @@  rte_pci_scan(void)
 	return -1;
 }
 
+/*
+ * Is pci device bound to any kdrv
+ */
+static inline int
+pci_device_is_bound(void)
+{
+	struct rte_pci_device *dev = NULL;
+	int ret = 0;
+
+	FOREACH_DEVICE_ON_PCIBUS(dev) {
+		if (dev->kdrv == RTE_KDRV_UNKNOWN ||
+		    dev->kdrv == RTE_KDRV_NONE) {
+			continue;
+		} else {
+			ret = 1;
+			break;
+		}
+	}
+	return ret;
+}
+
+/*
+ * Any one of the device bound to uio
+ */
+static inline int
+pci_device_bound_uio(void)
+{
+	struct rte_pci_device *dev = NULL;
+
+	FOREACH_DEVICE_ON_PCIBUS(dev) {
+		if (dev->kdrv == RTE_KDRV_IGB_UIO ||
+		   dev->kdrv == RTE_KDRV_UIO_GENERIC) {
+			return 1;
+		}
+	}
+	return 0;
+}
+
+/*
+ * Any one of the device has iova as va
+ */
+static inline int
+pci_device_has_iova_va(void)
+{
+	struct rte_pci_device *dev = NULL;
+	struct rte_pci_driver *drv = NULL;
+
+	FOREACH_DRIVER_ON_PCIBUS(drv) {
+		if (drv && drv->drv_flags & RTE_PCI_DRV_IOVA_AS_VA) {
+			FOREACH_DEVICE_ON_PCIBUS(dev) {
+				if (dev->kdrv == RTE_KDRV_VFIO &&
+				    rte_pci_match(drv, dev))
+					return 1;
+			}
+		}
+	}
+	return 0;
+}
+
+/*
+ * Get iommu class of PCI devices on the bus.
+ */
+enum rte_iova_mode
+rte_pci_get_iommu_class(void)
+{
+	bool is_bound;
+	bool is_vfio_noiommu_enabled = true;
+	bool has_iova_va;
+	bool is_bound_uio;
+
+	is_bound = pci_device_is_bound();
+	if (!is_bound)
+		return RTE_IOVA_DC;
+
+	has_iova_va = pci_device_has_iova_va();
+	is_bound_uio = pci_device_bound_uio();
+#ifdef VFIO_PRESENT
+	is_vfio_noiommu_enabled = vfio_noiommu_is_enabled() == 1 ? 1 : 0;
+#endif
+
+	if (has_iova_va && !is_bound_uio && !is_vfio_noiommu_enabled)
+		return RTE_IOVA_VA;
+
+	if (has_iova_va) {
+		RTE_LOG(WARNING, EAL, "Some devices want iova as va but pa will be used because.. ");
+		if (is_vfio_noiommu_enabled)
+			RTE_LOG(WARNING, EAL, "vfio-noiommu mode configured\n");
+		if (is_bound_uio)
+			RTE_LOG(WARNING, EAL, "few device bound to UIO\n");
+	}
+
+	return RTE_IOVA_PA;
+}
+
 /* Read PCI config space. */
 int rte_pci_read_config(const struct rte_pci_device *device,
 		void *buf, size_t len, off_t offset)
diff --git a/lib/librte_eal/linuxapp/eal/eal_vfio.c b/lib/librte_eal/linuxapp/eal/eal_vfio.c
index 946df7e31..c8a97b7e7 100644
--- a/lib/librte_eal/linuxapp/eal/eal_vfio.c
+++ b/lib/librte_eal/linuxapp/eal/eal_vfio.c
@@ -816,4 +816,23 @@  vfio_noiommu_dma_map(int __rte_unused vfio_container_fd)
 	return 0;
 }
 
+int
+vfio_noiommu_is_enabled(void)
+{
+	int fd, ret, cnt __rte_unused;
+	char c;
+
+	ret = -1;
+	fd = open(VFIO_NOIOMMU_MODE, O_RDONLY);
+	if (fd < 0)
+		return -1;
+
+	cnt = read(fd, &c, 1);
+	if (c == 'Y')
+		ret = 1;
+
+	close(fd);
+	return ret;
+}
+
 #endif
diff --git a/lib/librte_eal/linuxapp/eal/eal_vfio.h b/lib/librte_eal/linuxapp/eal/eal_vfio.h
index 5ff63e5d7..26ea8e119 100644
--- a/lib/librte_eal/linuxapp/eal/eal_vfio.h
+++ b/lib/librte_eal/linuxapp/eal/eal_vfio.h
@@ -150,6 +150,8 @@  struct vfio_config {
 #define VFIO_NOIOMMU_GROUP_FMT "/dev/vfio/noiommu-%u"
 #define VFIO_GET_REGION_ADDR(x) ((uint64_t) x << 40ULL)
 #define VFIO_GET_REGION_IDX(x) (x >> 40)
+#define VFIO_NOIOMMU_MODE      \
+	"/sys/module/vfio/parameters/enable_unsafe_noiommu_mode"
 
 /* DMA mapping function prototype.
  * Takes VFIO container fd as a parameter.
@@ -210,6 +212,8 @@  int pci_vfio_is_enabled(void);
 
 int vfio_mp_sync_setup(void);
 
+int vfio_noiommu_is_enabled(void);
+
 #define SOCKET_REQ_CONTAINER 0x100
 #define SOCKET_REQ_GROUP 0x200
 #define SOCKET_CLR_GROUP 0x300
diff --git a/lib/librte_eal/linuxapp/eal/rte_eal_version.map b/lib/librte_eal/linuxapp/eal/rte_eal_version.map
index a15b382ff..40420ded3 100644
--- a/lib/librte_eal/linuxapp/eal/rte_eal_version.map
+++ b/lib/librte_eal/linuxapp/eal/rte_eal_version.map
@@ -247,5 +247,6 @@  DPDK_17.11 {
 	global:
 
 	rte_pci_match;
+	rte_pci_get_iommu_class;
 
 } DPDK_17.08;