[EXT] [PATCH v10 14/16] graph: add stats for mcore dispatch model

Pavan Nikhilesh Bhagavatula pbhagavatula at marvell.com
Thu Jun 8 15:11:54 CEST 2023


> Add stats for mcore dispatch model if stats collection is
> enabled.
> 
> Signed-off-by: Haiyue Wang <haiyue.wang at intel.com>
> Signed-off-by: Cunming Liang <cunming.liang at intel.com>
> Signed-off-by: Zhirun Yan <zhirun.yan at intel.com>
> Acked-by: Jerin Jacob <jerinj at marvell.com>

Acked-by: Pavan Nikhilesh <pbhagavatula at marvell.com>

> ---
>  lib/graph/graph_debug.c                    |  6 ++
>  lib/graph/graph_stats.c                    | 76 +++++++++++++++++++---
>  lib/graph/rte_graph.h                      | 10 +++
>  lib/graph/rte_graph_model_mcore_dispatch.c |  3 +
>  lib/graph/rte_graph_worker_common.h        |  2 +
>  5 files changed, 89 insertions(+), 8 deletions(-)
> 
> diff --git a/lib/graph/graph_debug.c b/lib/graph/graph_debug.c
> index b84412f5dd..9def3067ec 100644
> --- a/lib/graph/graph_debug.c
> +++ b/lib/graph/graph_debug.c
> @@ -74,6 +74,12 @@ rte_graph_obj_dump(FILE *f, struct rte_graph *g, bool
> all)
>  		fprintf(f, "       size=%d\n", n->size);
>  		fprintf(f, "       idx=%d\n", n->idx);
>  		fprintf(f, "       total_objs=%" PRId64 "\n", n->total_objs);
> +		if (rte_graph_worker_model_get(g) ==
> RTE_GRAPH_MODEL_MCORE_DISPATCH) {
> +			fprintf(f, "       total_sched_objs=%" PRId64 "\n",
> +				n->dispatch.total_sched_objs);
> +			fprintf(f, "       total_sched_fail=%" PRId64 "\n",
> +				n->dispatch.total_sched_fail);
> +		}
>  		fprintf(f, "       total_calls=%" PRId64 "\n", n->total_calls);
>  		for (i = 0; i < n->nb_edges; i++)
>  			fprintf(f, "          edge[%d] <%s>\n", i,
> diff --git a/lib/graph/graph_stats.c b/lib/graph/graph_stats.c
> index c0140ba922..cc32245c05 100644
> --- a/lib/graph/graph_stats.c
> +++ b/lib/graph/graph_stats.c
> @@ -40,13 +40,19 @@ struct rte_graph_cluster_stats {
>  	struct cluster_node clusters[];
>  } __rte_cache_aligned;
> 
> +#define boarder_model_dispatch()                                                              \
> +	fprintf(f, "+-------------------------------+---------------+--------" \
> +		   "-------+---------------+---------------+---------------+" \
> +		   "---------------+---------------+-" \
> +		   "----------+\n")
> +
>  #define boarder()                                                              \
>  	fprintf(f, "+-------------------------------+---------------+--------" \
>  		   "-------+---------------+---------------+---------------+-" \
>  		   "----------+\n")
> 
>  static inline void
> -print_banner(FILE *f)
> +print_banner_default(FILE *f)
>  {
>  	boarder();
>  	fprintf(f, "%-32s%-16s%-16s%-16s%-16s%-16s%-16s\n", "|Node",
> "|calls",
> @@ -55,6 +61,28 @@ print_banner(FILE *f)
>  	boarder();
>  }
> 
> +static inline void
> +print_banner_dispatch(FILE *f)
> +{
> +	boarder_model_dispatch();
> +	fprintf(f, "%-32s%-16s%-16s%-16s%-16s%-16s%-16s%-16s%-16s\n",
> +		"|Node", "|calls",
> +		"|objs", "|sched objs", "|sched fail",
> +		"|realloc_count", "|objs/call", "|objs/sec(10E6)",
> +		"|cycles/call|");
> +	boarder_model_dispatch();
> +}
> +
> +static inline void
> +print_banner(FILE *f)
> +{
> +	if
> (rte_graph_worker_model_get(STAILQ_FIRST(graph_list_head_get())-
> >graph) ==
> +	    RTE_GRAPH_MODEL_MCORE_DISPATCH)
> +		print_banner_dispatch(f);
> +	else
> +		print_banner_default(f);
> +}
> +
>  static inline void
>  print_node(FILE *f, const struct rte_graph_cluster_node_stats *stat)
>  {
> @@ -76,11 +104,22 @@ print_node(FILE *f, const struct
> rte_graph_cluster_node_stats *stat)
>  	objs_per_sec = ts_per_hz ? (objs - prev_objs) / ts_per_hz : 0;
>  	objs_per_sec /= 1000000;
> 
> -	fprintf(f,
> -		"|%-31s|%-15" PRIu64 "|%-15" PRIu64 "|%-15" PRIu64
> -		"|%-15.3f|%-15.6f|%-11.4f|\n",
> -		stat->name, calls, objs, stat->realloc_count, objs_per_call,
> -		objs_per_sec, cycles_per_call);
> +	if
> (rte_graph_worker_model_get(STAILQ_FIRST(graph_list_head_get())-
> >graph) ==
> +	    RTE_GRAPH_MODEL_MCORE_DISPATCH) {
> +		fprintf(f,
> +			"|%-31s|%-15" PRIu64 "|%-15" PRIu64 "|%-15"
> PRIu64
> +			"|%-15" PRIu64 "|%-15" PRIu64
> +			"|%-15.3f|%-15.6f|%-11.4f|\n",
> +			stat->name, calls, objs, stat->dispatch.sched_objs,
> +			stat->dispatch.sched_fail, stat->realloc_count,
> objs_per_call,
> +			objs_per_sec, cycles_per_call);
> +	} else {
> +		fprintf(f,
> +			"|%-31s|%-15" PRIu64 "|%-15" PRIu64 "|%-15"
> PRIu64
> +			"|%-15.3f|%-15.6f|%-11.4f|\n",
> +			stat->name, calls, objs, stat->realloc_count,
> objs_per_call,
> +			objs_per_sec, cycles_per_call);
> +	}
>  }
> 
>  static int
> @@ -88,13 +127,20 @@ graph_cluster_stats_cb(bool is_first, bool is_last,
> void *cookie,
>  		       const struct rte_graph_cluster_node_stats *stat)
>  {
>  	FILE *f = cookie;
> +	int model;
> +
> +	model =
> rte_graph_worker_model_get(STAILQ_FIRST(graph_list_head_get())-
> >graph);
> 
>  	if (unlikely(is_first))
>  		print_banner(f);
>  	if (stat->objs)
>  		print_node(f, stat);
> -	if (unlikely(is_last))
> -		boarder();
> +	if (unlikely(is_last)) {
> +		if (model == RTE_GRAPH_MODEL_MCORE_DISPATCH)
> +			boarder_model_dispatch();
> +		else
> +			boarder();
> +	}
> 
>  	return 0;
>  };
> @@ -333,12 +379,20 @@ cluster_node_arregate_stats(struct cluster_node
> *cluster)
>  {
>  	uint64_t calls = 0, cycles = 0, objs = 0, realloc_count = 0;
>  	struct rte_graph_cluster_node_stats *stat = &cluster->stat;
> +	uint64_t sched_objs = 0, sched_fail = 0;
>  	struct rte_node *node;
>  	rte_node_t count;
> +	int model;
> 
> +	model =
> rte_graph_worker_model_get(STAILQ_FIRST(graph_list_head_get())-
> >graph);
>  	for (count = 0; count < cluster->nb_nodes; count++) {
>  		node = cluster->nodes[count];
> 
> +		if (model == RTE_GRAPH_MODEL_MCORE_DISPATCH) {
> +			sched_objs += node->dispatch.total_sched_objs;
> +			sched_fail += node->dispatch.total_sched_fail;
> +		}
> +
>  		calls += node->total_calls;
>  		objs += node->total_objs;
>  		cycles += node->total_cycles;
> @@ -348,6 +402,12 @@ cluster_node_arregate_stats(struct cluster_node
> *cluster)
>  	stat->calls = calls;
>  	stat->objs = objs;
>  	stat->cycles = cycles;
> +
> +	if (model == RTE_GRAPH_MODEL_MCORE_DISPATCH) {
> +		stat->dispatch.sched_objs = sched_objs;
> +		stat->dispatch.sched_fail = sched_fail;
> +	}
> +
>  	stat->ts = rte_get_timer_cycles();
>  	stat->realloc_count = realloc_count;
>  }
> diff --git a/lib/graph/rte_graph.h b/lib/graph/rte_graph.h
> index 2ffee520b1..28e50e49b8 100644
> --- a/lib/graph/rte_graph.h
> +++ b/lib/graph/rte_graph.h
> @@ -220,6 +220,16 @@ struct rte_graph_cluster_node_stats {
>  	uint64_t prev_objs;	/**< Previous number of processed objs. */
>  	uint64_t prev_cycles;	/**< Previous number of cycles. */
> 
> +	RTE_STD_C11
> +	union {
> +		struct {
> +			uint64_t sched_objs;
> +			/**< Previous number of scheduled objs for dispatch
> model. */
> +			uint64_t sched_fail;
> +			/**< Previous number of failed schedule objs for
> dispatch model. */
> +		} dispatch;
> +	};
> +
>  	uint64_t realloc_count; /**< Realloc count. */
> 
>  	rte_node_t id;	/**< Node identifier of stats. */
> diff --git a/lib/graph/rte_graph_model_mcore_dispatch.c
> b/lib/graph/rte_graph_model_mcore_dispatch.c
> index 8f4bc860ab..d1291b8c57 100644
> --- a/lib/graph/rte_graph_model_mcore_dispatch.c
> +++ b/lib/graph/rte_graph_model_mcore_dispatch.c
> @@ -96,6 +96,7 @@ __graph_sched_node_enqueue(struct rte_node
> *node, struct rte_graph *graph)
>  		rte_pause();
> 
>  	off += size;
> +	node->dispatch.total_sched_objs += size;
>  	node->idx -= size;
>  	if (node->idx > 0)
>  		goto submit_again;
> @@ -107,6 +108,8 @@ __graph_sched_node_enqueue(struct rte_node
> *node, struct rte_graph *graph)
>  		memmove(&node->objs[0], &node->objs[off],
>  			node->idx * sizeof(void *));
> 
> +	node->dispatch.total_sched_fail += node->idx;
> +
>  	return false;
>  }
> 
> diff --git a/lib/graph/rte_graph_worker_common.h
> b/lib/graph/rte_graph_worker_common.h
> index d6a16dc7e3..a6bae4c6a5 100644
> --- a/lib/graph/rte_graph_worker_common.h
> +++ b/lib/graph/rte_graph_worker_common.h
> @@ -110,6 +110,8 @@ struct rte_node {
>  		/* Fast schedule area for mcore dispatch model */
>  		struct {
>  			unsigned int lcore_id;  /**< Node running lcore. */
> +			uint64_t total_sched_objs; /**< Number of objects
> scheduled. */
> +			uint64_t total_sched_fail; /**< Number of scheduled
> failure. */
>  		} dispatch;
>  	};
>  	/* Fast path area  */
> --
> 2.37.2



More information about the dev mailing list