[dpdk-dev] [PATCHv4] librte_acl make it build/work for 'default' target

Ananyev, Konstantin konstantin.ananyev at intel.com
Fri Aug 29 19:58:58 CEST 2014


> -----Original Message-----
> From: Neil Horman [mailto:nhorman at tuxdriver.com]
> Sent: Thursday, August 28, 2014 9:38 PM
> To: dev at dpdk.org
> Cc: Neil Horman; Ananyev, Konstantin; thomas.monjalon at 6wind.com
> Subject: [PATCHv4] librte_acl make it build/work for 'default' target
> 
> Make ACL library to build/work on 'default' architecture:
> - make rte_acl_classify_scalar really scalar
>  (make sure it wouldn't use sse4 instrincts through resolve_priority()).
> - Provide two versions of rte_acl_classify code path:
>   rte_acl_classify_sse() - could be build and used only on systems with sse4.2
>   and upper, return -ENOTSUP on lower arch.
>   rte_acl_classify_scalar() - a slower version, but could be build and used
>   on all systems.
> - keep common code shared between these two codepaths.
> 
> v2 chages:
>  run-time selection of most appropriate code-path for given ISA.
>  By default the highest supprted one is selected.
>  User can still override that selection by manually assigning new value to
>  the global function pointer rte_acl_default_classify.
>  rte_acl_classify() becomes a macro calling whatever rte_acl_default_classify
>  points to.
> 
> V3 Changes
>  Updated classify pointer to be a function so as to better preserve ABI
>  REmoved macro definitions for match check functions to make them static inline
> 
> V4 Changes
>  Rewrote classification selection mechanim to use a function table, so that we
> can just store the preferred alg in the rte_acl_ctx struct so that multiprocess
> access works.  I understand that leaves us with an extra load instruction, but I
> think thats ok, because it also allows...
> 
>  Addition of a new function rte_acl_classify_alg.  This function lets you
> specify an enum value to override the acl contexts default algorith when doing a
> classification.  This allows an application to specify a classification
> algorithm without needing to pulicize each method.  I know there was concern
> over keeping those methods public, but we don't have a static ABI at the moment,
> so this seems to me a reasonable thing to do, as it gives us less of an ABI
> surface to worry about.

Good way to overcome the problem.
>From what I am seeing it adds a tiny slowdown (as expected) ... 
Though it provides a good flexibility and I don't have any better ideas.
So I'd say let stick with that approach.

Below are few technical comments.

Thanks
Konstantin

> 
>  Fixed misc missed static declarations
> 
>  Removed acl_match_check.h and moved match_check function to acl_run.h
> 
>  typdeffed function pointer to match check.
> 
> Signed-off-by: Neil Horman <nhorman at tuxdriver.com>
> CC: konstantin.ananyev at intel.com
> CC: thomas.monjalon at 6wind.com
> ---
>  app/test-acl/main.c             |  13 +-
>  app/test/test_acl.c             |  10 +-
>  lib/librte_acl/Makefile         |   5 +-
>  lib/librte_acl/acl.h            |   1 +
>  lib/librte_acl/acl_bld.c        |   5 +-
>  lib/librte_acl/acl_run.c        | 944 ----------------------------------------
>  lib/librte_acl/acl_run.h        | 271 ++++++++++++
>  lib/librte_acl/acl_run_scalar.c | 197 +++++++++
>  lib/librte_acl/acl_run_sse.c    | 630 +++++++++++++++++++++++++++
>  lib/librte_acl/rte_acl.c        |  62 +++
>  lib/librte_acl/rte_acl.h        |  66 ++-
>  11 files changed, 1208 insertions(+), 996 deletions(-)
>  delete mode 100644 lib/librte_acl/acl_run.c
>  create mode 100644 lib/librte_acl/acl_run.h
>  create mode 100644 lib/librte_acl/acl_run_scalar.c
>  create mode 100644 lib/librte_acl/acl_run_sse.c
> 


> diff --git a/app/test/test_acl.c b/app/test/test_acl.c
> index 869f6d3..2169f59 100644
> --- a/app/test/test_acl.c
> +++ b/app/test/test_acl.c
> @@ -859,7 +859,7 @@ test_invalid_parameters(void)
>  	}
> 
>  	/* cover invalid but positive categories in classify */
> -	result = rte_acl_classify_scalar(acx, NULL, NULL, 0, 3);
> +	result = rte_acl_classify(acx, NULL, NULL, 0, 3);

Typo, should be:
rte_acl_classify_alg(acx, RTE_ACL_CLASSIFY_SCALAR, NULL, NULL, 0, 3); 

> diff --git a/lib/librte_acl/acl.h b/lib/librte_acl/acl.h
> index b9d63fd..9236b7b 100644
> --- a/lib/librte_acl/acl.h
> +++ b/lib/librte_acl/acl.h
> @@ -168,6 +168,7 @@ struct rte_acl_ctx {
>  	void               *mem;
>  	size_t              mem_sz;
>  	struct rte_acl_config config; /* copy of build config. */
> +	enum rte_acl_classify_alg alg;
>  };

Each rte_acl_build() will reset all fields of rte_acl_ctx starting from num_categories and below.
So we need to move alg somewhere above num_categories:

--- a/lib/librte_acl/acl.h
+++ b/lib/librte_acl/acl.h
@@ -153,6 +153,7 @@ struct rte_acl_ctx {
        /** Name of the ACL context. */
        int32_t             socket_id;
        /** Socket ID to allocate memory from. */
+       enum rte_acl_classify_alg alg;
        void               *rules;
        uint32_t            max_rules;
        uint32_t            rule_sz;
@@ -168,9 +169,11 @@ struct rte_acl_ctx {
        void               *mem;
        size_t              mem_sz;
        struct rte_acl_config config; /* copy of build config. */
-       enum rte_acl_classify_alg alg;
 };

> diff --git a/lib/librte_acl/acl_run_scalar.c b/lib/librte_acl/acl_run_scalar.c
> new file mode 100644
> index 0000000..4bf58c7
> --- /dev/null
> +++ b/lib/librte_acl/acl_run_scalar.c
> @@ -0,0 +1,197 @@
> +
> +#include "acl_run.h"
> +
> +int
> +rte_acl_classify_scalar(const struct rte_acl_ctx *ctx, const uint8_t **data,
> +        uint32_t *results, uint32_t num, uint32_t categories);

No need to put this declaration here.
I think you can put both rte_acl_classify_sse(), rte_acl_classify_scalar() into acl.h (it is internal lib header).
And remove another declarations of these functions from rte_acl.c.

> diff --git a/lib/librte_acl/acl_run_sse.c b/lib/librte_acl/acl_run_sse.c
> new file mode 100644
> index 0000000..7ae63dd
> --- /dev/null
> +++ b/lib/librte_acl/acl_run_sse.c
> +#include "acl_run.h"
> +
+
+int
+rte_acl_classify_sse(const struct rte_acl_ctx *ctx, const uint8_t **data,
+        uint32_t *results, uint32_t num, uint32_t categories);
+

Move to acl.h.

> diff --git a/lib/librte_acl/rte_acl.c b/lib/librte_acl/rte_acl.c
> index 7c288bd..741bed4 100644
> --- a/lib/librte_acl/rte_acl.c
> +++ b/lib/librte_acl/rte_acl.c
> @@ -33,11 +33,72 @@
> 
>  #include <rte_acl.h>
>  #include "acl.h"
> +#include "acl_run.h"

acl_run.h contains defintions for a lot of functions and should be included only by acl_run_*.c. 
I  think it is better to move typedef int (*rte_acl_classify_t) into acl.h and don't include acl_run.h here.

> 
>  #define	BIT_SIZEOF(x)	(sizeof(x) * CHAR_BIT)
> 
>  TAILQ_HEAD(rte_acl_list, rte_tailq_entry);
> 
> +extern int
> +rte_acl_classify_scalar(const struct rte_acl_ctx *ctx, const uint8_t **data,
> +        uint32_t *results, uint32_t num, uint32_t categories);
> +
> +extern int
> +rte_acl_classify_sse(const struct rte_acl_ctx *ctx, const uint8_t **data,
> +        uint32_t *results, uint32_t num, uint32_t categories);
> +

As above: I think it is safe to move these declarations into acl.h.

> +static rte_acl_classify_t classify_fns[] = {
> +	[RTE_ACL_CLASSIFY_DEFAULT] = rte_acl_classify_scalar,
> +	[RTE_ACL_CLASSIFY_SCALAR] = rte_acl_classify_scalar,
> +	[RTE_ACL_CLASSIFY_SSE] = rte_acl_classify_sse,
> +};

static const static rte_acl_classify_t classify_fns[]
?

> +
> +
> +extern int
> +rte_acl_classify_scalar(const struct rte_acl_ctx *ctx, const uint8_t **data,
> +        uint32_t *results, uint32_t num, uint32_t categories);

Duplicate.

> +
> +/* by default, use always avaialbe scalar code path. */
> +static enum rte_acl_classify_alg rte_acl_default_classify = RTE_ACL_CLASSIFY_SCALAR;

Line is longer than 80 chars?

> +
> +void rte_acl_set_default_classify(enum rte_acl_classify_alg alg)
> +{
> +	rte_acl_default_classify = alg;
> +}

void
rte_acls_set_default_classify(...)

Though, I am not sure why do we need it to be public now.
Users can setup ALG per context.

> +
> +void rte_acl_set_ctx_classify(struct rte_acl_ctx *ctx, enum rte_acl_classify_alg alg)
> +{
> +	ctx->alg = alg;
> +}

Same as above:
void
rte_acl_set_ctx_classify(...)
Plus probably add checking that alg is a valid argument:
If ((uint32_t)alg < RTE_DIM(classify_fns)) {ctx->alg=alg; return 0;}
return -EINVAL; 

> +
> +static void __attribute__((constructor))
> +rte_acl_init(void)
> +{
> +	enum rte_acl_classify_alg alg = RTE_ACL_CLASSIFY_DEFAULT;
> +
> +	if (rte_cpu_get_flag_enabled(RTE_CPUFLAG_SSE4_1))
> +		alg = RTE_ACL_CLASSIFY_SSE;
> +
> +	rte_acl_set_default_classify(alg);
> +}
> +
> +int rte_acl_classify(const struct rte_acl_ctx *ctx,
> +		     const uint8_t **data,
> +		     uint32_t *results, uint32_t num,
> +		     uint32_t categories)
> +{
> +	return classify_fns[ctx->alg](ctx, data, results, num, categories);
> +}
> +
> +int rte_acl_classify_alg(const struct rte_acl_ctx *ctx,
> +			 enum rte_acl_classify_alg alg,
> +			 const uint8_t **data,
> +			 uint32_t *results, uint32_t num,
> +			 uint32_t categories)
> +{
> +	return classify_fns[alg](ctx, data, results, num, categories);
> +}

Can you move alg argument to be the last one?
That would prevent copying parameters between registers.

Plus same thing with the function definition style. 

> +
>  struct rte_acl_ctx *
>  rte_acl_find_existing(const char *name)
>  {
> @@ -165,6 +226,7 @@ rte_acl_create(const struct rte_acl_param *param)
>  		ctx->max_rules = param->max_rule_num;
>  		ctx->rule_sz = param->rule_size;
>  		ctx->socket_id = param->socket_id;
> +		ctx->alg = rte_acl_default_classify;
>  		snprintf(ctx->name, sizeof(ctx->name), "%s", param->name);
> 
>  		te->data = (void *) ctx;
> diff --git a/lib/librte_acl/rte_acl.h b/lib/librte_acl/rte_acl.h
> index afc0f69..c092a49 100644
> --- a/lib/librte_acl/rte_acl.h
> +++ b/lib/librte_acl/rte_acl.h
> @@ -259,39 +259,6 @@ void
>  rte_acl_reset(struct rte_acl_ctx *ctx);
> 
>  /**
> - * Search for a matching ACL rule for each input data buffer.
> - * Each input data buffer can have up to *categories* matches.
> - * That implies that results array should be big enough to hold
> - * (categories * num) elements.
> - * Also categories parameter should be either one or multiple of
> - * RTE_ACL_RESULTS_MULTIPLIER and can't be bigger than RTE_ACL_MAX_CATEGORIES.
> - * If more than one rule is applicable for given input buffer and
> - * given category, then rule with highest priority will be returned as a match.
> - * Note, that it is a caller responsibility to ensure that input parameters
> - * are valid and point to correct memory locations.
> - *
> - * @param ctx
> - *   ACL context to search with.
> - * @param data
> - *   Array of pointers to input data buffers to perform search.
> - *   Note that all fields in input data buffers supposed to be in network
> - *   byte order (MSB).
> - * @param results
> - *   Array of search results, *categories* results per each input data buffer.
> - * @param num
> - *   Number of elements in the input data buffers array.
> - * @param categories
> - *   Number of maximum possible matches for each input buffer, one possible
> - *   match per category.
> - * @return
> - *   zero on successful completion.
> - *   -EINVAL for incorrect arguments.
> - */
> -int
> -rte_acl_classify(const struct rte_acl_ctx *ctx, const uint8_t **data,
> -	uint32_t *results, uint32_t num, uint32_t categories);
> -
> -/**
>   * Perform scalar search for a matching ACL rule for each input data buffer.
>   * Note, that while the search itself will avoid explicit use of SSE/AVX
>   * intrinsics, code for comparing matching results/priorities sill might use
> @@ -323,9 +290,36 @@ rte_acl_classify(const struct rte_acl_ctx *ctx, const uint8_t **data,
>   *   zero on successful completion.
>   *   -EINVAL for incorrect arguments.
>   */
> -int
> -rte_acl_classify_scalar(const struct rte_acl_ctx *ctx, const uint8_t **data,
> -	uint32_t *results, uint32_t num, uint32_t categories);
> +
> +enum rte_acl_classify_alg {
> +	RTE_ACL_CLASSIFY_DEFAULT = 0,
> +	RTE_ACL_CLASSIFY_SCALAR = 1,
> +	RTE_ACL_CLASSIFY_SSE = 2,
> +};
> +

I think you removed the wrong comment.
All public API function declaration supposed to be preceded by formal doxygen style comment:
Brief explanation, parameters and return value description, etc.
Please restore the proper comment for it.
BTW,  two new functions above - they need a formal comments too.

> +extern int
> +rte_acl_classify(const struct rte_acl_ctx *ctx,
> +		 const uint8_t **data,
> +		 uint32_t *results, uint32_t num,
> +		 uint32_t categories);
> +
> +extern int
> +rte_acl_classify_alg(const struct rte_acl_ctx *ctx,
> +		 enum rte_acl_classify_alg alg,
> +		 const uint8_t **data,
> +		 uint32_t *results, uint32_t num,
> +		 uint32_t categories);
> +/*
> + * Set the default classify algorithm for newly allocated classify contexts
> + */
> +extern void
> +rte_acl_set_default_classify(enum rte_acl_classify_alg alg);
> +
> +/*
> + * Override the default classifier function for a given ctx
> + */
> +extern void
> +rte_acl_set_ctx_classify(struct rte_acl_ctx *ctx, enum rte_acl_classify_alg alg);
> 
>  /**
>   * Dump an ACL context structure to the console.
> --
> 1.9.3

Also, need to update examples/l3fwd-acl/ (remove rte_acl_classify_scalar() calls).
Something like:

diff --git a/examples/l3fwd-acl/main.c b/examples/l3fwd-acl/main.c
index 9b2c21b..8cbf202 100644
--- a/examples/l3fwd-acl/main.c
+++ b/examples/l3fwd-acl/main.c
@@ -278,15 +278,6 @@ send_single_packet(struct rte_mbuf *m, uint8_t port);
        (in) = end + 1;                                         \
 } while (0)

-#define CLASSIFY(context, data, res, num, cat) do {            \
-       if (scalar)                                             \
-               rte_acl_classify_scalar((context), (data),      \
-               (res), (num), (cat));                           \
-       else                                                    \
-               rte_acl_classify((context), (data),             \
-               (res), (num), (cat));                           \
-} while (0)
-
 /*
   * ACL rules should have higher priorities than route ones to ensure ACL rule
   * always be found when input packets have multi-matches in the database.
@@ -1253,6 +1244,9 @@ app_acl_init(void)

        dump_acl_config();

+       if (parm_config.scalar)
+                rte_acl_set_default_classify(RTE_ACL_CLASSIFY_SCALAR);
+
        /* Load  rules from the input file */
        if (add_rules(parm_config.rule_ipv4_name, &route_base_ipv4,
                        &route_num_ipv4, &acl_base_ipv4, &acl_num_ipv4,
@@ -1436,10 +1430,8 @@ main_loop(__attribute__((unused)) void *dummy)
        int socketid;
        const uint64_t drain_tsc = (rte_get_tsc_hz() + US_PER_S - 1)
                        / US_PER_S * BURST_TX_DRAIN_US;
-       int scalar = parm_config.scalar;

        prev_tsc = 0;
-
        lcore_id = rte_lcore_id();
        qconf = &lcore_conf[lcore_id];
        socketid = rte_lcore_to_socket_id(lcore_id);
@@ -1503,7 +1495,8 @@ main_loop(__attribute__((unused)) void *dummy)
                                        nb_rx);

                                if (acl_search.num_ipv4) {
-                                       CLASSIFY(acl_config.acx_ipv4[socketid],
+                                       rte_acl_classify(
+                                               acl_config.acx_ipv4[socketid],
                                                acl_search.data_ipv4,
                                                acl_search.res_ipv4,
                                                acl_search.num_ipv4,
@@ -1515,7 +1508,8 @@ main_loop(__attribute__((unused)) void *dummy)
                                }

                                if (acl_search.num_ipv6) {
-                                       CLASSIFY(acl_config.acx_ipv6[socketid],
+                                       rte_acl_classify(
+                                               acl_config.acx_ipv6[socketid],
                                                acl_search.data_ipv6,
                                                acl_search.res_ipv6,
                                                acl_search.num_ipv6,



More information about the dev mailing list