[RFC,5/6] bus/mlx5_pci: register a PCI driver

Message ID 20200610171728.89-6-parav@mellanox.com (mailing list archive)
State Superseded, archived
Headers
Series Improve mlx5 PMD common driver framework for multiple classes |

Checks

Context Check Description
ci/checkpatch success coding style OK
ci/Intel-compilation success Compilation OK

Commit Message

Parav Pandit June 10, 2020, 5:17 p.m. UTC
  Create a mlx5 bus driver framework for invoking drivers of
multiple classes who have registered with the mlx5_pci bus
driver.

Validate user class arguments for supported class combinations.

Signed-off-by: Parav Pandit <parav@mellanox.com>
---
 drivers/bus/mlx5_pci/Makefile           |   1 +
 drivers/bus/mlx5_pci/meson.build        |   2 +-
 drivers/bus/mlx5_pci/mlx5_pci_bus.c     | 253 ++++++++++++++++++++++++
 drivers/bus/mlx5_pci/rte_bus_mlx5_pci.h |   1 +
 4 files changed, 256 insertions(+), 1 deletion(-)
  

Comments

Gaëtan Rivet June 15, 2020, 9:46 p.m. UTC | #1
On 10/06/20 17:17 +0000, Parav Pandit wrote:
> Create a mlx5 bus driver framework for invoking drivers of
> multiple classes who have registered with the mlx5_pci bus
> driver.
> 
> Validate user class arguments for supported class combinations.
> 
> Signed-off-by: Parav Pandit <parav@mellanox.com>
> ---
>  drivers/bus/mlx5_pci/Makefile           |   1 +
>  drivers/bus/mlx5_pci/meson.build        |   2 +-
>  drivers/bus/mlx5_pci/mlx5_pci_bus.c     | 253 ++++++++++++++++++++++++
>  drivers/bus/mlx5_pci/rte_bus_mlx5_pci.h |   1 +
>  4 files changed, 256 insertions(+), 1 deletion(-)
> 
> diff --git a/drivers/bus/mlx5_pci/Makefile b/drivers/bus/mlx5_pci/Makefile
> index b36916e52..327076fe4 100644
> --- a/drivers/bus/mlx5_pci/Makefile
> +++ b/drivers/bus/mlx5_pci/Makefile
> @@ -15,6 +15,7 @@ CFLAGS += -I$(BUILDDIR)/drivers/common/mlx5
>  CFLAGS += -I$(RTE_SDK)/drivers/bus/pci
>  CFLAGS += -Wno-strict-prototypes
>  LDLIBS += -lrte_eal
> +LDLIBS += -lrte_kvargs
>  LDLIBS += -lrte_common_mlx5
>  LDLIBS += -lrte_pci -lrte_bus_pci
>  
> diff --git a/drivers/bus/mlx5_pci/meson.build b/drivers/bus/mlx5_pci/meson.build
> index cc4a84e23..5111baa4e 100644
> --- a/drivers/bus/mlx5_pci/meson.build
> +++ b/drivers/bus/mlx5_pci/meson.build
> @@ -1,6 +1,6 @@
>  # SPDX-License-Identifier: BSD-3-Clause
>  # Copyright(c) 2020 Mellanox Technologies Ltd
>  
> -deps += ['pci', 'bus_pci', 'common_mlx5']
> +deps += ['pci', 'bus_pci', 'common_mlx5', 'kvargs']
>  install_headers('rte_bus_mlx5_pci.h')
>  sources = files('mlx5_pci_bus.c')
> diff --git a/drivers/bus/mlx5_pci/mlx5_pci_bus.c b/drivers/bus/mlx5_pci/mlx5_pci_bus.c
> index 66db3c7b0..8108e35ea 100644
> --- a/drivers/bus/mlx5_pci/mlx5_pci_bus.c
> +++ b/drivers/bus/mlx5_pci/mlx5_pci_bus.c
> @@ -3,12 +3,265 @@
>   */
>  
>  #include "rte_bus_mlx5_pci.h"
> +#include <mlx5_common_utils.h>
>  
>  static TAILQ_HEAD(mlx5_pci_bus_drv_head, rte_mlx5_pci_driver) drv_list =
>  				TAILQ_HEAD_INITIALIZER(drv_list);
>  
> +struct class_map {
> +	const char *name;
> +	enum mlx5_class dev_class;
> +};

Defining a type here does not seem useful.
You could return an "enum mlx5_class" from the function is_valid_class()
further below, instead of a class_map entry. You have the
MLX5_CLASS_INVALID sentinel value to mark the inexistant mapping.

Making this type anonymous, you should merge it with the array below:

static const struct {
	const char *name;
	enum mlx5_class class; // Remember to change this enum to a
			       // fixed width type by the way.
} mlx5_classes[] = {
	...
};

> +
> +const struct class_map mlx5_classes[] = {

This is not really a class list, but as the type describes,
a mapping between names and binary id.

I think mlx5_class_names would fit better.

> +	{ .name = "vdpa", .dev_class = MLX5_CLASS_VDPA },
> +	{ .name = "net", .dev_class = MLX5_CLASS_NET },
> +};

Globals should be static even if not exposed through header.

> +
> +static const enum mlx5_class mlx5_valid_class_combo[] = {
> +	MLX5_CLASS_NET,
> +	MLX5_CLASS_VDPA,

How do you describe future combos?
Should we expect MLX5_CLASS_NET | MLX5_CLASS_REGEX for example?

> +	/* New class combination should be added here */
> +};
> +
> +static const struct class_map *is_valid_class(const char *class_name)

I don't think the function name conveys its actual use.
mlx5_class_from_name() for example would align with other DPDK APIs.

> +{
> +	unsigned int i;
> +
> +	for (i = 0; i < sizeof(mlx5_classes) / sizeof(struct class_map);

RTE_DIM(mlx5_classes) must be used instead.

> +	     i++) {
> +		if (strcmp(class_name, mlx5_classes[i].name) == 0)
> +			return &mlx5_classes[i];
> +
> +	}
> +	return NULL;
> +}
> +
> +static int is_valid_class_combo(const char *class_names)
> +{
> +	enum mlx5_class user_classes = 0;
> +	char *nstr = strdup(class_names);

Not a fan of assignment with malloc directly at var declaration,
you are missing some checks here.

> +	const struct class_map *entry;
> +	char *copy = nstr;

copy is nondescript and dangerous. I'd use
nstr_orig instead.

> +	int invalid = 0;
> +	unsigned int i;
> +	char *found;

Reading the code below, it seems that the kvlist should only
be defined if the devargs length is > 0, so class_names here should be
defined.

However you do not handle the OOM case explicitly here,
if nstr cannot be allocated on strdup().

> +
> +	while (nstr) {
> +		/* Extract each individual class name */
> +		found = strsep(&nstr, ":");

I have not seen the feature test macros (_DEFAULT_SOURCE) in
the Makefile, it seems required for strsep()?

> +		if (!found)
> +			continue;
> +
> +		/* Check if its a valid class */
> +		entry = is_valid_class(found);

As said earlier, you have no use for the full map entry,
you could return the mlx5_class type instead, as you have
the MLX5_CLASS_INVALID sentinel available to mark the !found case.

> +		if (!entry) {
> +			invalid = EINVAL;

Just toggling invalid = 1; here would be better.
Return EINVAL explicitly if (invalid).

> +			break;
> +		}
> +		user_classes |= entry->dev_class;
> +	}
> +	if (copy)
> +		free(copy);
> +	if (invalid)
> +		return invalid;

You are returning EINVAL here, but -EINVAL below.
It should be aligned, in DPDK usually error return values are negative.
positive errno should only be used for rte_errno.

> +
> +	/* Verify if user specified valid supported combination */
> +	for (i = 0;
> +	     i < sizeof(mlx5_valid_class_combo) / sizeof(enum mlx5_class);

RTE_DIM() here;

> +	     i++) {
> +		if (mlx5_valid_class_combo[i] == user_classes)

return 0; directly?

> +			break;
> +	}
> +	/* Not found any valid class combination */
> +	if (i == sizeof(mlx5_valid_class_combo) / sizeof(enum mlx5_class))

This would simplify this check, where you can directly return -ENODEV
for example to differentiate from invalid class name and invalid combo.

> +		return -EINVAL;
> +	else
> +		return 0;
> +}
> +
> +static int
> +mlx5_bus_opt_handler(__rte_unused const char *key, const char *value,
> +		       void *opaque)
> +{
> +	int *ret = opaque;
> +
> +	*ret = is_valid_class_combo(value);
> +	if (*ret)
> +		DRV_LOG(ERR, "Invalid mlx5 classes %s. Maybe typo in device"
> +			" class argument setting?", value);

Error message should not be cut in half, it makes it difficult to grep.
If you differentiate between typo in name and invalid combo you could
directly warn the user about the proper error.

(You can ignore the warning from checkpatch.sh about the long lines on a
string if there is one.)

> +	return 0;
> +}
> +
> +static int
> +mlx5_bus_options_valid(const struct rte_devargs *devargs)
> +{
> +	struct rte_kvargs *kvlist;
> +	const char *key = MLX5_CLASS_ARG_NAME;
> +	int ret = 0;
> +
> +	if (devargs == NULL)
> +		return 0;
> +	kvlist = rte_kvargs_parse(devargs->args, NULL);
> +	if (kvlist == NULL)
> +		return 0;
> +	if (rte_kvargs_count(kvlist, key))
> +		rte_kvargs_process(kvlist, key, mlx5_bus_opt_handler, &ret);
> +	rte_kvargs_free(kvlist);
> +	return ret;
> +}
> +
>  void
>  rte_mlx5_pci_driver_register(struct rte_mlx5_pci_driver *driver)
>  {
>  	TAILQ_INSERT_TAIL(&drv_list, driver, next);
>  }
> +
> +static bool
> +mlx5_bus_match(const struct rte_mlx5_pci_driver *drv,
> +		 const struct rte_pci_device *pci_dev)
> +{
> +	const struct rte_pci_id *id_table;
> +
> +	for (id_table = drv->id_table; id_table->vendor_id != 0; id_table++) {
> +		/* check if device's ids match the class driver's ones */
> +		if (id_table->vendor_id != pci_dev->id.vendor_id &&
> +				id_table->vendor_id != PCI_ANY_ID)
> +			continue;
> +		if (id_table->device_id != pci_dev->id.device_id &&
> +				id_table->device_id != PCI_ANY_ID)
> +			continue;
> +		if (id_table->subsystem_vendor_id !=
> +		    pci_dev->id.subsystem_vendor_id &&
> +		    id_table->subsystem_vendor_id != PCI_ANY_ID)
> +			continue;
> +		if (id_table->subsystem_device_id !=
> +		    pci_dev->id.subsystem_device_id &&
> +		    id_table->subsystem_device_id != PCI_ANY_ID)
> +			continue;
> +		if (id_table->class_id != pci_dev->id.class_id &&
> +				id_table->class_id != RTE_CLASS_ANY_ID)
> +			continue;
> +
> +		return true;
> +	}
> +	return false;
> +}
> +
> +/**
> + * DPDK callback to register to probe multiple PCI class devices.
> + *
> + * @param[in] pci_drv
> + *   PCI driver structure.
> + * @param[in] dev
> + *   PCI device information.
> + *
> + * @return
> + *   0 on success, 1 to skip this driver, a negative errno value otherwise
> + *   and rte_errno is set.
> + */
> +static int
> +mlx5_bus_pci_probe(struct rte_pci_driver *drv __rte_unused,
> +		     struct rte_pci_device *dev)
> +{
> +	struct rte_mlx5_pci_driver *class;
> +	int ret = 0;
> +
> +	ret = mlx5_bus_options_valid(dev->device.devargs);
> +	if (ret)
> +		return ret;
> +
> +	TAILQ_FOREACH(class, &drv_list, next) {
> +		if (!mlx5_bus_match(class, dev))
> +			continue;
> +
> +		if (!mlx5_class_enabled(dev->device.devargs, class->dev_class))
> +			continue;
> +
> +		ret = class->probe(drv, dev);
> +		if (!ret)
> +			class->loaded = true;
> +	}
> +	return 0;
> +}
> +
> +/**
> + * DPDK callback to remove one or more class devices for a PCI device.
> + *
> + * This function removes all class devices belong to a given PCI device.
> + *
> + * @param[in] pci_dev
> + *   Pointer to the PCI device.
> + *
> + * @return
> + *   0 on success, the function cannot fail.
> + */
> +static int
> +mlx5_bus_pci_remove(struct rte_pci_device *dev)
> +{
> +	struct rte_mlx5_pci_driver *class;
> +
> +	/* Remove each class driver in reverse order */

Why use a revese order specifically?

> +	TAILQ_FOREACH_REVERSE(class, &drv_list, mlx5_pci_bus_drv_head, next) {
> +		if (!mlx5_class_enabled(dev->device.devargs, class->dev_class))

You are parsing the devargs each time to check a single class.
If you return the proper bitmap instead, do not parse the devargs within
this loop, do it beforehand and check only that
(devargs_class_bits & class->dev_class).

Same thing in the probe().

> +			continue;
> +
> +		if (class->loaded)
> +			class->remove(dev);
> +	}
> +	return 0;
> +}
> +
> +static int
> +mlx5_bus_pci_dma_map(struct rte_pci_device *dev, void *addr,
> +		     uint64_t iova, size_t len)
> +{
> +	struct rte_mlx5_pci_driver *class;
> +	int ret = -EINVAL;
> +
> +	TAILQ_FOREACH(class, &drv_list, next) {
> +		if (!class->dma_map)
> +			continue;
> +
> +		return class->dma_map(dev, addr, iova, len);

Is there a specific class that could have priority for the DMA?

> +	}
> +	return ret;
> +}
> +
> +static int
> +mlx5_bus_pci_dma_unmap(struct rte_pci_device *dev, void *addr,
> +		       uint64_t iova, size_t len)
> +{
> +	struct rte_mlx5_pci_driver *class;
> +	int ret = -EINVAL;
> +
> +	TAILQ_FOREACH_REVERSE(class, &drv_list, mlx5_pci_bus_drv_head, next) {
> +		if (!class->dma_unmap)
> +			continue;
> +
> +		return class->dma_unmap(dev, addr, iova, len);

If you have two classes A -> B having dma_map() + dma_unmap(),
you will dma_map() with A then dma_unmap() with B, due to
the _REVERSE() iteration?

Why use reversed iteration at all by the way for dinit? If your ops is
sound any order should be ok.

> +	}
> +	return ret;
> +}
> +
> +static const struct rte_pci_id mlx5_bus_pci_id_map[] = {
> +	{
> +		.vendor_id = 0
> +	}
> +};
> +
> +static struct rte_pci_driver mlx5_bus_driver = {
> +	.driver = {
> +		.name = "mlx5_bus_pci",
> +	},
> +	.id_table = mlx5_bus_pci_id_map,
> +	.probe = mlx5_bus_pci_probe,
> +	.remove = mlx5_bus_pci_remove,
> +	.dma_map = mlx5_bus_pci_dma_map,
> +	.dma_unmap = mlx5_bus_pci_dma_unmap,
> +	.drv_flags = RTE_PCI_DRV_INTR_LSC | RTE_PCI_DRV_INTR_RMV |
> +		     RTE_PCI_DRV_PROBE_AGAIN,
> +};
> +
> +RTE_PMD_REGISTER_PCI(mlx5_bus, mlx5_bus_driver);
> +RTE_PMD_REGISTER_PCI_TABLE(mlx5_bus, mlx5_bus_pci_id_map);
> diff --git a/drivers/bus/mlx5_pci/rte_bus_mlx5_pci.h b/drivers/bus/mlx5_pci/rte_bus_mlx5_pci.h
> index b0423f99e..7be9a15cd 100644
> --- a/drivers/bus/mlx5_pci/rte_bus_mlx5_pci.h
> +++ b/drivers/bus/mlx5_pci/rte_bus_mlx5_pci.h
> @@ -36,6 +36,7 @@ struct rte_mlx5_pci_driver {
>  	pci_dma_unmap_t *dma_unmap;   /**< Class device dma unmap function. */
>  	TAILQ_ENTRY(rte_mlx5_pci_driver) next;
>  	const struct rte_pci_id *id_table; /**< ID table, NULL terminated. */
> +	bool loaded;
>  };
>  
>  /**
> -- 
> 2.25.4
>
  
Thomas Monjalon June 17, 2020, 8:18 a.m. UTC | #2
15/06/2020 23:46, Gaëtan Rivet:
> On 10/06/20 17:17 +0000, Parav Pandit wrote:
> > +		DRV_LOG(ERR, "Invalid mlx5 classes %s. Maybe typo in device"
> > +			" class argument setting?", value);
> 
> Error message should not be cut in half, it makes it difficult to grep.
> If you differentiate between typo in name and invalid combo you could
> directly warn the user about the proper error.
> 
> (You can ignore the warning from checkpatch.sh about the long lines on a
> string if there is one.)

The best is to cut the message after variable placeholder.
Here you can cut at the end of the first sentence "%s. "
  
Parav Pandit June 18, 2020, 9:47 a.m. UTC | #3
> From: Thomas Monjalon <thomas@monjalon.net>
> Sent: Wednesday, June 17, 2020 1:49 PM
> To: Parav Pandit <parav@mellanox.com>; Gaëtan Rivet <grive@u256.net>
> Cc: dev@dpdk.org; ferruh.yigit@intel.com; Ori Kam <orika@mellanox.com>;
> Matan Azrad <matan@mellanox.com>
> Subject: Re: [dpdk-dev] [RFC PATCH 5/6] bus/mlx5_pci: register a PCI driver
> 
> 15/06/2020 23:46, Gaëtan Rivet:
> > On 10/06/20 17:17 +0000, Parav Pandit wrote:
> > > +		DRV_LOG(ERR, "Invalid mlx5 classes %s. Maybe typo in
> device"
> > > +			" class argument setting?", value);
> >
> > Error message should not be cut in half, it makes it difficult to grep.
> > If you differentiate between typo in name and invalid combo you could
> > directly warn the user about the proper error.
> >
> > (You can ignore the warning from checkpatch.sh about the long lines on
> > a string if there is one.)
> 
> The best is to cut the message after variable placeholder.
> Here you can cut at the end of the first sentence "%s. "
> 
o.k.
  
Parav Pandit June 18, 2020, 10:03 a.m. UTC | #4
> From: Gaëtan Rivet <grive@u256.net>
> Sent: Tuesday, June 16, 2020 3:17 AM
> 
> On 10/06/20 17:17 +0000, Parav Pandit wrote:
> > Create a mlx5 bus driver framework for invoking drivers of multiple
> > classes who have registered with the mlx5_pci bus driver.
> >
> > Validate user class arguments for supported class combinations.
> >
> > Signed-off-by: Parav Pandit <parav@mellanox.com>
> > ---
> >  drivers/bus/mlx5_pci/Makefile           |   1 +
> >  drivers/bus/mlx5_pci/meson.build        |   2 +-
> >  drivers/bus/mlx5_pci/mlx5_pci_bus.c     | 253
> ++++++++++++++++++++++++
> >  drivers/bus/mlx5_pci/rte_bus_mlx5_pci.h |   1 +
> >  4 files changed, 256 insertions(+), 1 deletion(-)
> >
> > diff --git a/drivers/bus/mlx5_pci/Makefile
> > b/drivers/bus/mlx5_pci/Makefile index b36916e52..327076fe4 100644
> > --- a/drivers/bus/mlx5_pci/Makefile
> > +++ b/drivers/bus/mlx5_pci/Makefile
> > @@ -15,6 +15,7 @@ CFLAGS += -I$(BUILDDIR)/drivers/common/mlx5
> CFLAGS
> > += -I$(RTE_SDK)/drivers/bus/pci  CFLAGS += -Wno-strict-prototypes
> > LDLIBS += -lrte_eal
> > +LDLIBS += -lrte_kvargs
> >  LDLIBS += -lrte_common_mlx5
> >  LDLIBS += -lrte_pci -lrte_bus_pci
> >
> > diff --git a/drivers/bus/mlx5_pci/meson.build
> > b/drivers/bus/mlx5_pci/meson.build
> > index cc4a84e23..5111baa4e 100644
> > --- a/drivers/bus/mlx5_pci/meson.build
> > +++ b/drivers/bus/mlx5_pci/meson.build
> > @@ -1,6 +1,6 @@
> >  # SPDX-License-Identifier: BSD-3-Clause  # Copyright(c) 2020 Mellanox
> > Technologies Ltd
> >
> > -deps += ['pci', 'bus_pci', 'common_mlx5']
> > +deps += ['pci', 'bus_pci', 'common_mlx5', 'kvargs']
> >  install_headers('rte_bus_mlx5_pci.h')
> >  sources = files('mlx5_pci_bus.c')
> > diff --git a/drivers/bus/mlx5_pci/mlx5_pci_bus.c
> > b/drivers/bus/mlx5_pci/mlx5_pci_bus.c
> > index 66db3c7b0..8108e35ea 100644
> > --- a/drivers/bus/mlx5_pci/mlx5_pci_bus.c
> > +++ b/drivers/bus/mlx5_pci/mlx5_pci_bus.c
> > @@ -3,12 +3,265 @@
> >   */
> >
> >  #include "rte_bus_mlx5_pci.h"
> > +#include <mlx5_common_utils.h>
> >
> >  static TAILQ_HEAD(mlx5_pci_bus_drv_head, rte_mlx5_pci_driver) drv_list
> =
> >  				TAILQ_HEAD_INITIALIZER(drv_list);
> >
> > +struct class_map {
> > +	const char *name;
> > +	enum mlx5_class dev_class;
> > +};
> 
> Defining a type here does not seem useful.
> You could return an "enum mlx5_class" from the function is_valid_class()
> further below, instead of a class_map entry. You have the
> MLX5_CLASS_INVALID sentinel value to mark the inexistant mapping.
> 
> Making this type anonymous, you should merge it with the array below:
> 
> static const struct {
> 	const char *name;
> 	enum mlx5_class class; // Remember to change this enum to a
> 			       // fixed width type by the way.
Yes, I acked to changed to define, but I forgot that I use the enum here.
So I am going to keep the enum as code reads more clear with enum.

> } mlx5_classes[] = {
> 	...
> };
> 
> > +
> > +const struct class_map mlx5_classes[] = {
> 
> This is not really a class list, but as the type describes, a mapping between
> names and binary id.
> 
> I think mlx5_class_names would fit better.
o.k.
> 
> > +	{ .name = "vdpa", .dev_class = MLX5_CLASS_VDPA },
> > +	{ .name = "net", .dev_class = MLX5_CLASS_NET }, };
> 
> Globals should be static even if not exposed through header.
Yes.
> 
> > +
> > +static const enum mlx5_class mlx5_valid_class_combo[] = {
> > +	MLX5_CLASS_NET,
> > +	MLX5_CLASS_VDPA,
> 
> How do you describe future combos?
> Should we expect MLX5_CLASS_NET | MLX5_CLASS_REGEX for example?
> 
Correct.

> > +	/* New class combination should be added here */ };
> > +
> > +static const struct class_map *is_valid_class(const char *class_name)
> 
> I don't think the function name conveys its actual use.
> mlx5_class_from_name() for example would align with other DPDK APIs.
Make sense. Will change.
> 
> > +{
> > +	unsigned int i;
> > +
> > +	for (i = 0; i < sizeof(mlx5_classes) / sizeof(struct class_map);
> 
> RTE_DIM(mlx5_classes) must be used instead.
> 
> > +	     i++) {
> > +		if (strcmp(class_name, mlx5_classes[i].name) == 0)
> > +			return &mlx5_classes[i];
> > +
> > +	}
> > +	return NULL;
> > +}
> > +
> > +static int is_valid_class_combo(const char *class_names) {
> > +	enum mlx5_class user_classes = 0;
> > +	char *nstr = strdup(class_names);
> 
> Not a fan of assignment with malloc directly at var declaration, you are
> missing some checks here.
> 
Ok. will do assignment explicitly and do the check.

> > +	const struct class_map *entry;
> > +	char *copy = nstr;
> 
> copy is nondescript and dangerous. I'd use nstr_orig instead.
Ok. will change varialble name from copy to nstr_orig.

> 
> > +	int invalid = 0;
> > +	unsigned int i;
> > +	char *found;
> 
> Reading the code below, it seems that the kvlist should only be defined if the
> devargs length is > 0, so class_names here should be defined.
> 
> However you do not handle the OOM case explicitly here, if nstr cannot be
> allocated on strdup().
> 
Will return error on OOM.

> > +
> > +	while (nstr) {
> > +		/* Extract each individual class name */
> > +		found = strsep(&nstr, ":");
> 
> I have not seen the feature test macros (_DEFAULT_SOURCE) in the
> Makefile, it seems required for strsep()?
> 
If its mandatory meson build should have complained?

> > +		if (!found)
> > +			continue;
> > +
> > +		/* Check if its a valid class */
> > +		entry = is_valid_class(found);
> 
> As said earlier, you have no use for the full map entry, you could return the
> mlx5_class type instead, as you have the MLX5_CLASS_INVALID sentinel
> available to mark the !found case.
> 
> > +		if (!entry) {
> > +			invalid = EINVAL;
> 
> Just toggling invalid = 1; here would be better.
> Return EINVAL explicitly if (invalid).
> 
> > +			break;
> > +		}
> > +		user_classes |= entry->dev_class;
> > +	}
> > +	if (copy)
> > +		free(copy);
> > +	if (invalid)
> > +		return invalid;
> 
> You are returning EINVAL here, but -EINVAL below.
> It should be aligned, in DPDK usually error return values are negative.
> positive errno should only be used for rte_errno.
> 
> > +
> > +	/* Verify if user specified valid supported combination */
> > +	for (i = 0;
> > +	     i < sizeof(mlx5_valid_class_combo) / sizeof(enum mlx5_class);
> 
> RTE_DIM() here;
> 
Ack.

> > +	     i++) {
> > +		if (mlx5_valid_class_combo[i] == user_classes)
> 
> return 0; directly?
> 
Yes.

> > +			break;
> > +	}
> > +	/* Not found any valid class combination */
> > +	if (i == sizeof(mlx5_valid_class_combo) / sizeof(enum mlx5_class))
> 
> This would simplify this check, where you can directly return -ENODEV for
> example to differentiate from invalid class name and invalid combo.
> 
> > +		return -EINVAL;
> > +	else
> > +		return 0;
> > +}
> > +
> > +static int
> > +mlx5_bus_opt_handler(__rte_unused const char *key, const char *value,
> > +		       void *opaque)
> > +{
> > +	int *ret = opaque;
> > +
> > +	*ret = is_valid_class_combo(value);
> > +	if (*ret)
> > +		DRV_LOG(ERR, "Invalid mlx5 classes %s. Maybe typo in
> device"
> > +			" class argument setting?", value);
> 
> Error message should not be cut in half, it makes it difficult to grep.
> If you differentiate between typo in name and invalid combo you could
> directly warn the user about the proper error.
> 
Will have it in one line.

> (You can ignore the warning from checkpatch.sh about the long lines on a
> string if there is one.)
> 
> > +	return 0;
> > +}
> > +
> > +static int
> > +mlx5_bus_options_valid(const struct rte_devargs *devargs) {
> > +	struct rte_kvargs *kvlist;
> > +	const char *key = MLX5_CLASS_ARG_NAME;
> > +	int ret = 0;
> > +
> > +	if (devargs == NULL)
> > +		return 0;
> > +	kvlist = rte_kvargs_parse(devargs->args, NULL);
> > +	if (kvlist == NULL)
> > +		return 0;
> > +	if (rte_kvargs_count(kvlist, key))
> > +		rte_kvargs_process(kvlist, key, mlx5_bus_opt_handler,
> &ret);
> > +	rte_kvargs_free(kvlist);
> > +	return ret;
> > +}
> > +
> >  void
> >  rte_mlx5_pci_driver_register(struct rte_mlx5_pci_driver *driver)  {
> >  	TAILQ_INSERT_TAIL(&drv_list, driver, next);  }
> > +
> > +static bool
> > +mlx5_bus_match(const struct rte_mlx5_pci_driver *drv,
> > +		 const struct rte_pci_device *pci_dev) {
> > +	const struct rte_pci_id *id_table;
> > +
> > +	for (id_table = drv->id_table; id_table->vendor_id != 0; id_table++) {
> > +		/* check if device's ids match the class driver's ones */
> > +		if (id_table->vendor_id != pci_dev->id.vendor_id &&
> > +				id_table->vendor_id != PCI_ANY_ID)
> > +			continue;
> > +		if (id_table->device_id != pci_dev->id.device_id &&
> > +				id_table->device_id != PCI_ANY_ID)
> > +			continue;
> > +		if (id_table->subsystem_vendor_id !=
> > +		    pci_dev->id.subsystem_vendor_id &&
> > +		    id_table->subsystem_vendor_id != PCI_ANY_ID)
> > +			continue;
> > +		if (id_table->subsystem_device_id !=
> > +		    pci_dev->id.subsystem_device_id &&
> > +		    id_table->subsystem_device_id != PCI_ANY_ID)
> > +			continue;
> > +		if (id_table->class_id != pci_dev->id.class_id &&
> > +				id_table->class_id != RTE_CLASS_ANY_ID)
> > +			continue;
> > +
> > +		return true;
> > +	}
> > +	return false;
> > +}
> > +
> > +/**
> > + * DPDK callback to register to probe multiple PCI class devices.
> > + *
> > + * @param[in] pci_drv
> > + *   PCI driver structure.
> > + * @param[in] dev
> > + *   PCI device information.
> > + *
> > + * @return
> > + *   0 on success, 1 to skip this driver, a negative errno value otherwise
> > + *   and rte_errno is set.
> > + */
> > +static int
> > +mlx5_bus_pci_probe(struct rte_pci_driver *drv __rte_unused,
> > +		     struct rte_pci_device *dev)
> > +{
> > +	struct rte_mlx5_pci_driver *class;
> > +	int ret = 0;
> > +
> > +	ret = mlx5_bus_options_valid(dev->device.devargs);
> > +	if (ret)
> > +		return ret;
> > +
> > +	TAILQ_FOREACH(class, &drv_list, next) {
> > +		if (!mlx5_bus_match(class, dev))
> > +			continue;
> > +
> > +		if (!mlx5_class_enabled(dev->device.devargs, class-
> >dev_class))
> > +			continue;
> > +
> > +		ret = class->probe(drv, dev);
> > +		if (!ret)
> > +			class->loaded = true;
> > +	}
> > +	return 0;
> > +}
> > +
> > +/**
> > + * DPDK callback to remove one or more class devices for a PCI device.
> > + *
> > + * This function removes all class devices belong to a given PCI device.
> > + *
> > + * @param[in] pci_dev
> > + *   Pointer to the PCI device.
> > + *
> > + * @return
> > + *   0 on success, the function cannot fail.
> > + */
> > +static int
> > +mlx5_bus_pci_remove(struct rte_pci_device *dev) {
> > +	struct rte_mlx5_pci_driver *class;
> > +
> > +	/* Remove each class driver in reverse order */
> 
> Why use a revese order specifically?
> 
> > +	TAILQ_FOREACH_REVERSE(class, &drv_list, mlx5_pci_bus_drv_head,
> next) {
> > +		if (!mlx5_class_enabled(dev->device.devargs, class-
> >dev_class))
> 
> You are parsing the devargs each time to check a single class.
> If you return the proper bitmap instead, do not parse the devargs within this
> loop, do it beforehand and check only that (devargs_class_bits & class-
> >dev_class).
> 
> Same thing in the probe().
> 
> > +			continue;
> > +
> > +		if (class->loaded)
> > +			class->remove(dev);
> > +	}
> > +	return 0;
> > +}
> > +
> > +static int
> > +mlx5_bus_pci_dma_map(struct rte_pci_device *dev, void *addr,
> > +		     uint64_t iova, size_t len)
> > +{
> > +	struct rte_mlx5_pci_driver *class;
> > +	int ret = -EINVAL;
> > +
> > +	TAILQ_FOREACH(class, &drv_list, next) {
> > +		if (!class->dma_map)
> > +			continue;
> > +
> > +		return class->dma_map(dev, addr, iova, len);
> 
> Is there a specific class that could have priority for the DMA?
> 
No.

> > +	}
> > +	return ret;
> > +}
> > +
> > +static int
> > +mlx5_bus_pci_dma_unmap(struct rte_pci_device *dev, void *addr,
> > +		       uint64_t iova, size_t len)
> > +{
> > +	struct rte_mlx5_pci_driver *class;
> > +	int ret = -EINVAL;
> > +
> > +	TAILQ_FOREACH_REVERSE(class, &drv_list, mlx5_pci_bus_drv_head,
> next) {
> > +		if (!class->dma_unmap)
> > +			continue;
> > +
> > +		return class->dma_unmap(dev, addr, iova, len);
> 
> If you have two classes A -> B having dma_map() + dma_unmap(), you will
> dma_map() with A then dma_unmap() with B, due to the _REVERSE()
> iteration?
> 
There isn't plan for two drivers to do so.
If two classes do that its source of an error.
Will enhance the bus when that need arise.

> Why use reversed iteration at all by the way for dinit? If your ops is sound
> any order should be ok.
> 
Because deinit must be always reverse of init() code regardless.

> > +	}
> > +	return ret;
> > +}
  
Gaëtan Rivet June 18, 2020, 2:35 p.m. UTC | #5
On 18/06/20 10:03 +0000, Parav Pandit wrote:
> 
> > From: Gaëtan Rivet <grive@u256.net>
> > Sent: Tuesday, June 16, 2020 3:17 AM
> > 
> > On 10/06/20 17:17 +0000, Parav Pandit wrote:
> > > Create a mlx5 bus driver framework for invoking drivers of multiple
> > > classes who have registered with the mlx5_pci bus driver.
> > >
> > > Validate user class arguments for supported class combinations.
> > >
> > > Signed-off-by: Parav Pandit <parav@mellanox.com>
> > > ---
> > >  drivers/bus/mlx5_pci/Makefile           |   1 +
> > >  drivers/bus/mlx5_pci/meson.build        |   2 +-
> > >  drivers/bus/mlx5_pci/mlx5_pci_bus.c     | 253
> > ++++++++++++++++++++++++
> > >  drivers/bus/mlx5_pci/rte_bus_mlx5_pci.h |   1 +
> > >  4 files changed, 256 insertions(+), 1 deletion(-)
> > >

[...]

> > > +
> > > +	while (nstr) {
> > > +		/* Extract each individual class name */
> > > +		found = strsep(&nstr, ":");
> > 
> > I have not seen the feature test macros (_DEFAULT_SOURCE) in the
> > Makefile, it seems required for strsep()?
> > 
> If its mandatory meson build should have complained?
> 

Invoking the compiler without specific standard conformance will work.
If someone adds for example -std=c11 however then _DEFAULT_SOURCE
becomes necessary.

It all depends on the range of compiler versions targeted by this code.
I don't know the full coverage, but I see -std=c11 + -D_DEFAULT_SOURCE
in most mlx5 code, which is why I'm asking for a double check here.


[...]

> > > +			continue;
> > > +
> > > +		if (class->loaded)
> > > +			class->remove(dev);
> > > +	}
> > > +	return 0;
> > > +}
> > > +
> > > +static int
> > > +mlx5_bus_pci_dma_map(struct rte_pci_device *dev, void *addr,
> > > +		     uint64_t iova, size_t len)
> > > +{
> > > +	struct rte_mlx5_pci_driver *class;
> > > +	int ret = -EINVAL;
> > > +
> > > +	TAILQ_FOREACH(class, &drv_list, next) {
> > > +		if (!class->dma_map)
> > > +			continue;
> > > +
> > > +		return class->dma_map(dev, addr, iova, len);
> > 
> > Is there a specific class that could have priority for the DMA?
> > 
> No.
> 

The code being written this way seems to point to multiple classes being
able to have DMA ops. If that's not the case, you can add a sanity check
to enforce than only the right classes have DMA ops defined.

> > > +	}
> > > +	return ret;
> > > +}
> > > +
> > > +static int
> > > +mlx5_bus_pci_dma_unmap(struct rte_pci_device *dev, void *addr,
> > > +		       uint64_t iova, size_t len)
> > > +{
> > > +	struct rte_mlx5_pci_driver *class;
> > > +	int ret = -EINVAL;
> > > +
> > > +	TAILQ_FOREACH_REVERSE(class, &drv_list, mlx5_pci_bus_drv_head,
> > next) {
> > > +		if (!class->dma_unmap)
> > > +			continue;
> > > +
> > > +		return class->dma_unmap(dev, addr, iova, len);
> > 
> > If you have two classes A -> B having dma_map() + dma_unmap(), you will
> > dma_map() with A then dma_unmap() with B, due to the _REVERSE()
> > iteration?
> > 
> There isn't plan for two drivers to do so.
> If two classes do that its source of an error.
> Will enhance the bus when that need arise.
> 

You have well-defined edge-cases, but they are not apparent reading
the code. Such error could be warned about and / or documented.

> > Why use reversed iteration at all by the way for dinit? If your ops is sound
> > any order should be ok.
> > 
> Because deinit must be always reverse of init() code regardless.
> 

This is a strong statement :)

If this is a requirement for driver inter-dependencies to be properly
implemented, this should be documented as such. Probably also explained
in the high-level design documentation in the header exposing this
driver API.

Best,
  
Parav Pandit June 18, 2020, 3:52 p.m. UTC | #6
> From: Gaëtan Rivet <grive@u256.net>
> Sent: Thursday, June 18, 2020 8:05 PM
> To: Parav Pandit <parav@mellanox.com>
> Cc: dev@dpdk.org; ferruh.yigit@intel.com; Ori Kam <orika@mellanox.com>;
> Matan Azrad <matan@mellanox.com>
> Subject: Re: [dpdk-dev] [RFC PATCH 5/6] bus/mlx5_pci: register a PCI driver
> 
> On 18/06/20 10:03 +0000, Parav Pandit wrote:
> >
> > > From: Gaëtan Rivet <grive@u256.net>
> > > Sent: Tuesday, June 16, 2020 3:17 AM
> > >
> > > On 10/06/20 17:17 +0000, Parav Pandit wrote:
> > > > Create a mlx5 bus driver framework for invoking drivers of
> > > > multiple classes who have registered with the mlx5_pci bus driver.
> > > >
> > > > Validate user class arguments for supported class combinations.
> > > >
> > > > Signed-off-by: Parav Pandit <parav@mellanox.com>
> > > > ---
> > > >  drivers/bus/mlx5_pci/Makefile           |   1 +
> > > >  drivers/bus/mlx5_pci/meson.build        |   2 +-
> > > >  drivers/bus/mlx5_pci/mlx5_pci_bus.c     | 253
> > > ++++++++++++++++++++++++
> > > >  drivers/bus/mlx5_pci/rte_bus_mlx5_pci.h |   1 +
> > > >  4 files changed, 256 insertions(+), 1 deletion(-)
> > > >
> 
> [...]
> 
> > > > +
> > > > +	while (nstr) {
> > > > +		/* Extract each individual class name */
> > > > +		found = strsep(&nstr, ":");
> > >
> > > I have not seen the feature test macros (_DEFAULT_SOURCE) in the
> > > Makefile, it seems required for strsep()?
> > >
> > If its mandatory meson build should have complained?
> >
> 
> Invoking the compiler without specific standard conformance will work.
> If someone adds for example -std=c11 however then _DEFAULT_SOURCE
> becomes necessary.
> 
> It all depends on the range of compiler versions targeted by this code.
> I don't know the full coverage, but I see -std=c11 + -D_DEFAULT_SOURCE in
> most mlx5 code, which is why I'm asking for a double check here.
> 
> 
> [...]
> 
> > > > +			continue;
> > > > +
> > > > +		if (class->loaded)
> > > > +			class->remove(dev);
> > > > +	}
> > > > +	return 0;
> > > > +}
> > > > +
> > > > +static int
> > > > +mlx5_bus_pci_dma_map(struct rte_pci_device *dev, void *addr,
> > > > +		     uint64_t iova, size_t len) {
> > > > +	struct rte_mlx5_pci_driver *class;
> > > > +	int ret = -EINVAL;
> > > > +
> > > > +	TAILQ_FOREACH(class, &drv_list, next) {
> > > > +		if (!class->dma_map)
> > > > +			continue;
> > > > +
> > > > +		return class->dma_map(dev, addr, iova, len);
> > >
> > > Is there a specific class that could have priority for the DMA?
> > >
> > No.
> >
> 
> The code being written this way seems to point to multiple classes being able
> to have DMA ops. If that's not the case, you can add a sanity check to enforce
> than only the right classes have DMA ops defined.
> 
Yes. I will add it. good point.

> > > > +	}
> > > > +	return ret;
> > > > +}
> > > > +
> > > > +static int
> > > > +mlx5_bus_pci_dma_unmap(struct rte_pci_device *dev, void *addr,
> > > > +		       uint64_t iova, size_t len) {
> > > > +	struct rte_mlx5_pci_driver *class;
> > > > +	int ret = -EINVAL;
> > > > +
> > > > +	TAILQ_FOREACH_REVERSE(class, &drv_list, mlx5_pci_bus_drv_head,
> > > next) {
> > > > +		if (!class->dma_unmap)
> > > > +			continue;
> > > > +
> > > > +		return class->dma_unmap(dev, addr, iova, len);
> > >
> > > If you have two classes A -> B having dma_map() + dma_unmap(), you
> > > will
> > > dma_map() with A then dma_unmap() with B, due to the _REVERSE()
> > > iteration?
> > >
> > There isn't plan for two drivers to do so.
> > If two classes do that its source of an error.
> > Will enhance the bus when that need arise.
> >
> 
> You have well-defined edge-cases, but they are not apparent reading the
> code. Such error could be warned about and / or documented.
> 
> > > Why use reversed iteration at all by the way for dinit? If your ops
> > > is sound any order should be ok.
> > >
> > Because deinit must be always reverse of init() code regardless.
> >
> 
> This is a strong statement :)
> 
:-)
Deinit not following reverse is almost an invitation to bugs.
Given there are two different drivers, there shouldn't be much dependency that way.
But it is always a good practice to follow de-init as reverse sequence.
It eliminates plenty of null and other validation checks all over.

> If this is a requirement for driver inter-dependencies to be properly
> implemented, this should be documented as such. Probably also explained in
> the high-level design documentation in the header exposing this driver API.
> 
At the moment we don't have such dependency and will continue to steer the design that way.
Only dependency to have is between bus driver (rte_bus_pci_mlx5) and its upper layer class drivers.

> Best,
> --
> Gaëtan
  

Patch

diff --git a/drivers/bus/mlx5_pci/Makefile b/drivers/bus/mlx5_pci/Makefile
index b36916e52..327076fe4 100644
--- a/drivers/bus/mlx5_pci/Makefile
+++ b/drivers/bus/mlx5_pci/Makefile
@@ -15,6 +15,7 @@  CFLAGS += -I$(BUILDDIR)/drivers/common/mlx5
 CFLAGS += -I$(RTE_SDK)/drivers/bus/pci
 CFLAGS += -Wno-strict-prototypes
 LDLIBS += -lrte_eal
+LDLIBS += -lrte_kvargs
 LDLIBS += -lrte_common_mlx5
 LDLIBS += -lrte_pci -lrte_bus_pci
 
diff --git a/drivers/bus/mlx5_pci/meson.build b/drivers/bus/mlx5_pci/meson.build
index cc4a84e23..5111baa4e 100644
--- a/drivers/bus/mlx5_pci/meson.build
+++ b/drivers/bus/mlx5_pci/meson.build
@@ -1,6 +1,6 @@ 
 # SPDX-License-Identifier: BSD-3-Clause
 # Copyright(c) 2020 Mellanox Technologies Ltd
 
-deps += ['pci', 'bus_pci', 'common_mlx5']
+deps += ['pci', 'bus_pci', 'common_mlx5', 'kvargs']
 install_headers('rte_bus_mlx5_pci.h')
 sources = files('mlx5_pci_bus.c')
diff --git a/drivers/bus/mlx5_pci/mlx5_pci_bus.c b/drivers/bus/mlx5_pci/mlx5_pci_bus.c
index 66db3c7b0..8108e35ea 100644
--- a/drivers/bus/mlx5_pci/mlx5_pci_bus.c
+++ b/drivers/bus/mlx5_pci/mlx5_pci_bus.c
@@ -3,12 +3,265 @@ 
  */
 
 #include "rte_bus_mlx5_pci.h"
+#include <mlx5_common_utils.h>
 
 static TAILQ_HEAD(mlx5_pci_bus_drv_head, rte_mlx5_pci_driver) drv_list =
 				TAILQ_HEAD_INITIALIZER(drv_list);
 
+struct class_map {
+	const char *name;
+	enum mlx5_class dev_class;
+};
+
+const struct class_map mlx5_classes[] = {
+	{ .name = "vdpa", .dev_class = MLX5_CLASS_VDPA },
+	{ .name = "net", .dev_class = MLX5_CLASS_NET },
+};
+
+static const enum mlx5_class mlx5_valid_class_combo[] = {
+	MLX5_CLASS_NET,
+	MLX5_CLASS_VDPA,
+	/* New class combination should be added here */
+};
+
+static const struct class_map *is_valid_class(const char *class_name)
+{
+	unsigned int i;
+
+	for (i = 0; i < sizeof(mlx5_classes) / sizeof(struct class_map);
+	     i++) {
+		if (strcmp(class_name, mlx5_classes[i].name) == 0)
+			return &mlx5_classes[i];
+
+	}
+	return NULL;
+}
+
+static int is_valid_class_combo(const char *class_names)
+{
+	enum mlx5_class user_classes = 0;
+	char *nstr = strdup(class_names);
+	const struct class_map *entry;
+	char *copy = nstr;
+	int invalid = 0;
+	unsigned int i;
+	char *found;
+
+	while (nstr) {
+		/* Extract each individual class name */
+		found = strsep(&nstr, ":");
+		if (!found)
+			continue;
+
+		/* Check if its a valid class */
+		entry = is_valid_class(found);
+		if (!entry) {
+			invalid = EINVAL;
+			break;
+		}
+		user_classes |= entry->dev_class;
+	}
+	if (copy)
+		free(copy);
+	if (invalid)
+		return invalid;
+
+	/* Verify if user specified valid supported combination */
+	for (i = 0;
+	     i < sizeof(mlx5_valid_class_combo) / sizeof(enum mlx5_class);
+	     i++) {
+		if (mlx5_valid_class_combo[i] == user_classes)
+			break;
+	}
+	/* Not found any valid class combination */
+	if (i == sizeof(mlx5_valid_class_combo) / sizeof(enum mlx5_class))
+		return -EINVAL;
+	else
+		return 0;
+}
+
+static int
+mlx5_bus_opt_handler(__rte_unused const char *key, const char *value,
+		       void *opaque)
+{
+	int *ret = opaque;
+
+	*ret = is_valid_class_combo(value);
+	if (*ret)
+		DRV_LOG(ERR, "Invalid mlx5 classes %s. Maybe typo in device"
+			" class argument setting?", value);
+	return 0;
+}
+
+static int
+mlx5_bus_options_valid(const struct rte_devargs *devargs)
+{
+	struct rte_kvargs *kvlist;
+	const char *key = MLX5_CLASS_ARG_NAME;
+	int ret = 0;
+
+	if (devargs == NULL)
+		return 0;
+	kvlist = rte_kvargs_parse(devargs->args, NULL);
+	if (kvlist == NULL)
+		return 0;
+	if (rte_kvargs_count(kvlist, key))
+		rte_kvargs_process(kvlist, key, mlx5_bus_opt_handler, &ret);
+	rte_kvargs_free(kvlist);
+	return ret;
+}
+
 void
 rte_mlx5_pci_driver_register(struct rte_mlx5_pci_driver *driver)
 {
 	TAILQ_INSERT_TAIL(&drv_list, driver, next);
 }
+
+static bool
+mlx5_bus_match(const struct rte_mlx5_pci_driver *drv,
+		 const struct rte_pci_device *pci_dev)
+{
+	const struct rte_pci_id *id_table;
+
+	for (id_table = drv->id_table; id_table->vendor_id != 0; id_table++) {
+		/* check if device's ids match the class driver's ones */
+		if (id_table->vendor_id != pci_dev->id.vendor_id &&
+				id_table->vendor_id != PCI_ANY_ID)
+			continue;
+		if (id_table->device_id != pci_dev->id.device_id &&
+				id_table->device_id != PCI_ANY_ID)
+			continue;
+		if (id_table->subsystem_vendor_id !=
+		    pci_dev->id.subsystem_vendor_id &&
+		    id_table->subsystem_vendor_id != PCI_ANY_ID)
+			continue;
+		if (id_table->subsystem_device_id !=
+		    pci_dev->id.subsystem_device_id &&
+		    id_table->subsystem_device_id != PCI_ANY_ID)
+			continue;
+		if (id_table->class_id != pci_dev->id.class_id &&
+				id_table->class_id != RTE_CLASS_ANY_ID)
+			continue;
+
+		return true;
+	}
+	return false;
+}
+
+/**
+ * DPDK callback to register to probe multiple PCI class devices.
+ *
+ * @param[in] pci_drv
+ *   PCI driver structure.
+ * @param[in] dev
+ *   PCI device information.
+ *
+ * @return
+ *   0 on success, 1 to skip this driver, a negative errno value otherwise
+ *   and rte_errno is set.
+ */
+static int
+mlx5_bus_pci_probe(struct rte_pci_driver *drv __rte_unused,
+		     struct rte_pci_device *dev)
+{
+	struct rte_mlx5_pci_driver *class;
+	int ret = 0;
+
+	ret = mlx5_bus_options_valid(dev->device.devargs);
+	if (ret)
+		return ret;
+
+	TAILQ_FOREACH(class, &drv_list, next) {
+		if (!mlx5_bus_match(class, dev))
+			continue;
+
+		if (!mlx5_class_enabled(dev->device.devargs, class->dev_class))
+			continue;
+
+		ret = class->probe(drv, dev);
+		if (!ret)
+			class->loaded = true;
+	}
+	return 0;
+}
+
+/**
+ * DPDK callback to remove one or more class devices for a PCI device.
+ *
+ * This function removes all class devices belong to a given PCI device.
+ *
+ * @param[in] pci_dev
+ *   Pointer to the PCI device.
+ *
+ * @return
+ *   0 on success, the function cannot fail.
+ */
+static int
+mlx5_bus_pci_remove(struct rte_pci_device *dev)
+{
+	struct rte_mlx5_pci_driver *class;
+
+	/* Remove each class driver in reverse order */
+	TAILQ_FOREACH_REVERSE(class, &drv_list, mlx5_pci_bus_drv_head, next) {
+		if (!mlx5_class_enabled(dev->device.devargs, class->dev_class))
+			continue;
+
+		if (class->loaded)
+			class->remove(dev);
+	}
+	return 0;
+}
+
+static int
+mlx5_bus_pci_dma_map(struct rte_pci_device *dev, void *addr,
+		     uint64_t iova, size_t len)
+{
+	struct rte_mlx5_pci_driver *class;
+	int ret = -EINVAL;
+
+	TAILQ_FOREACH(class, &drv_list, next) {
+		if (!class->dma_map)
+			continue;
+
+		return class->dma_map(dev, addr, iova, len);
+	}
+	return ret;
+}
+
+static int
+mlx5_bus_pci_dma_unmap(struct rte_pci_device *dev, void *addr,
+		       uint64_t iova, size_t len)
+{
+	struct rte_mlx5_pci_driver *class;
+	int ret = -EINVAL;
+
+	TAILQ_FOREACH_REVERSE(class, &drv_list, mlx5_pci_bus_drv_head, next) {
+		if (!class->dma_unmap)
+			continue;
+
+		return class->dma_unmap(dev, addr, iova, len);
+	}
+	return ret;
+}
+
+static const struct rte_pci_id mlx5_bus_pci_id_map[] = {
+	{
+		.vendor_id = 0
+	}
+};
+
+static struct rte_pci_driver mlx5_bus_driver = {
+	.driver = {
+		.name = "mlx5_bus_pci",
+	},
+	.id_table = mlx5_bus_pci_id_map,
+	.probe = mlx5_bus_pci_probe,
+	.remove = mlx5_bus_pci_remove,
+	.dma_map = mlx5_bus_pci_dma_map,
+	.dma_unmap = mlx5_bus_pci_dma_unmap,
+	.drv_flags = RTE_PCI_DRV_INTR_LSC | RTE_PCI_DRV_INTR_RMV |
+		     RTE_PCI_DRV_PROBE_AGAIN,
+};
+
+RTE_PMD_REGISTER_PCI(mlx5_bus, mlx5_bus_driver);
+RTE_PMD_REGISTER_PCI_TABLE(mlx5_bus, mlx5_bus_pci_id_map);
diff --git a/drivers/bus/mlx5_pci/rte_bus_mlx5_pci.h b/drivers/bus/mlx5_pci/rte_bus_mlx5_pci.h
index b0423f99e..7be9a15cd 100644
--- a/drivers/bus/mlx5_pci/rte_bus_mlx5_pci.h
+++ b/drivers/bus/mlx5_pci/rte_bus_mlx5_pci.h
@@ -36,6 +36,7 @@  struct rte_mlx5_pci_driver {
 	pci_dma_unmap_t *dma_unmap;   /**< Class device dma unmap function. */
 	TAILQ_ENTRY(rte_mlx5_pci_driver) next;
 	const struct rte_pci_id *id_table; /**< ID table, NULL terminated. */
+	bool loaded;
 };
 
 /**