[dpdk-dev] [PATCH v7 11/17] test: add perf test for distributor burst mode

David Hunt david.hunt at intel.com
Tue Feb 21 04:17:47 CET 2017


Signed-off-by: David Hunt <david.hunt at intel.com>
---
 app/test/test_distributor_perf.c | 117 +++++++++++++++++++++++++--------------
 1 file changed, 76 insertions(+), 41 deletions(-)

diff --git a/app/test/test_distributor_perf.c b/app/test/test_distributor_perf.c
index a7e4823..30ab1a5 100644
--- a/app/test/test_distributor_perf.c
+++ b/app/test/test_distributor_perf.c
@@ -1,7 +1,7 @@
 /*-
  *   BSD LICENSE
  *
- *   Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
+ *   Copyright(c) 2010-2017 Intel Corporation. All rights reserved.
  *   All rights reserved.
  *
  *   Redistribution and use in source and binary forms, with or without
@@ -39,10 +39,11 @@
 #include <rte_cycles.h>
 #include <rte_common.h>
 #include <rte_mbuf.h>
-#include <rte_distributor_v20.h>
+#include <rte_distributor.h>
 
-#define ITER_POWER 20 /* log 2 of how many iterations we do when timing. */
-#define BURST 32
+#define ITER_POWER_CL 25 /* log 2 of how many iterations  for Cache Line test */
+#define ITER_POWER 21 /* log 2 of how many iterations we do when timing. */
+#define BURST 64
 #define BIG_BATCH 1024
 
 /* static vars - zero initialized by default */
@@ -54,7 +55,8 @@ struct worker_stats {
 } __rte_cache_aligned;
 struct worker_stats worker_stats[RTE_MAX_LCORE];
 
-/* worker thread used for testing the time to do a round-trip of a cache
+/*
+ * worker thread used for testing the time to do a round-trip of a cache
  * line between two cores and back again
  */
 static void
@@ -69,7 +71,8 @@ flip_bit(volatile uint64_t *arg)
 	}
 }
 
-/* test case to time the number of cycles to round-trip a cache line between
+/*
+ * test case to time the number of cycles to round-trip a cache line between
  * two cores and back again.
  */
 static void
@@ -86,7 +89,7 @@ time_cache_line_switch(void)
 		rte_pause();
 
 	const uint64_t start_time = rte_rdtsc();
-	for (i = 0; i < (1 << ITER_POWER); i++) {
+	for (i = 0; i < (1 << ITER_POWER_CL); i++) {
 		while (*pdata)
 			rte_pause();
 		*pdata = 1;
@@ -98,13 +101,14 @@ time_cache_line_switch(void)
 	*pdata = 2;
 	rte_eal_wait_lcore(slaveid);
 	printf("==== Cache line switch test ===\n");
-	printf("Time for %u iterations = %"PRIu64" ticks\n", (1<<ITER_POWER),
+	printf("Time for %u iterations = %"PRIu64" ticks\n", (1<<ITER_POWER_CL),
 			end_time-start_time);
 	printf("Ticks per iteration = %"PRIu64"\n\n",
-			(end_time-start_time) >> ITER_POWER);
+			(end_time-start_time) >> ITER_POWER_CL);
 }
 
-/* returns the total count of the number of packets handled by the worker
+/*
+ * returns the total count of the number of packets handled by the worker
  * functions given below.
  */
 static unsigned
@@ -123,35 +127,44 @@ clear_packet_count(void)
 	memset(&worker_stats, 0, sizeof(worker_stats));
 }
 
-/* this is the basic worker function for performance tests.
+/*
+ * this is the basic worker function for performance tests.
  * it does nothing but return packets and count them.
  */
 static int
 handle_work(void *arg)
 {
-	struct rte_mbuf *pkt = NULL;
-	struct rte_distributor_v20 *d = arg;
-	unsigned count = 0;
-	unsigned id = __sync_fetch_and_add(&worker_idx, 1);
+	struct rte_distributor *d = arg;
+	unsigned int count = 0;
+	unsigned int num = 0;
+	int i;
+	unsigned int id = __sync_fetch_and_add(&worker_idx, 1);
+	struct rte_mbuf *buf[8] __rte_cache_aligned;
 
-	pkt = rte_distributor_get_pkt_v20(d, id, NULL);
+	for (i = 0; i < 8; i++)
+		buf[i] = NULL;
+
+	num = rte_distributor_get_pkt(d, id, buf, buf, num);
 	while (!quit) {
-		worker_stats[id].handled_packets++, count++;
-		pkt = rte_distributor_get_pkt_v20(d, id, pkt);
+		worker_stats[id].handled_packets += num;
+		count += num;
+		num = rte_distributor_get_pkt(d, id, buf, buf, num);
 	}
-	worker_stats[id].handled_packets++, count++;
-	rte_distributor_return_pkt_v20(d, id, pkt);
+	worker_stats[id].handled_packets += num;
+	count += num;
+	rte_distributor_return_pkt(d, id, buf, num);
 	return 0;
 }
 
-/* this basic performance test just repeatedly sends in 32 packets at a time
+/*
+ * this basic performance test just repeatedly sends in 32 packets at a time
  * to the distributor and verifies at the end that we got them all in the worker
  * threads and finally how long per packet the processing took.
  */
 static inline int
-perf_test(struct rte_distributor_v20 *d, struct rte_mempool *p)
+perf_test(struct rte_distributor *d, struct rte_mempool *p)
 {
-	unsigned i;
+	unsigned int i;
 	uint64_t start, end;
 	struct rte_mbuf *bufs[BURST];
 
@@ -166,15 +179,16 @@ perf_test(struct rte_distributor_v20 *d, struct rte_mempool *p)
 
 	start = rte_rdtsc();
 	for (i = 0; i < (1<<ITER_POWER); i++)
-		rte_distributor_process_v20(d, bufs, BURST);
+		rte_distributor_process(d, bufs, BURST);
 	end = rte_rdtsc();
 
 	do {
 		usleep(100);
-		rte_distributor_process_v20(d, NULL, 0);
+		rte_distributor_process(d, NULL, 0);
 	} while (total_packet_count() < (BURST << ITER_POWER));
 
-	printf("=== Performance test of distributor ===\n");
+	rte_distributor_clear_returns(d);
+
 	printf("Time per burst:  %"PRIu64"\n", (end - start) >> ITER_POWER);
 	printf("Time per packet: %"PRIu64"\n\n",
 			((end - start) >> ITER_POWER)/BURST);
@@ -192,21 +206,22 @@ perf_test(struct rte_distributor_v20 *d, struct rte_mempool *p)
 
 /* Useful function which ensures that all worker functions terminate */
 static void
-quit_workers(struct rte_distributor_v20 *d, struct rte_mempool *p)
+quit_workers(struct rte_distributor *d, struct rte_mempool *p)
 {
-	const unsigned num_workers = rte_lcore_count() - 1;
-	unsigned i;
+	const unsigned int num_workers = rte_lcore_count() - 1;
+	unsigned int i;
 	struct rte_mbuf *bufs[RTE_MAX_LCORE];
+
 	rte_mempool_get_bulk(p, (void *)bufs, num_workers);
 
 	quit = 1;
 	for (i = 0; i < num_workers; i++)
 		bufs[i]->hash.usr = i << 1;
-	rte_distributor_process_v20(d, bufs, num_workers);
+	rte_distributor_process(d, bufs, num_workers);
 
 	rte_mempool_put_bulk(p, (void *)bufs, num_workers);
 
-	rte_distributor_process_v20(d, NULL, 0);
+	rte_distributor_process(d, NULL, 0);
 	rte_eal_mp_wait_lcore();
 	quit = 0;
 	worker_idx = 0;
@@ -215,7 +230,8 @@ quit_workers(struct rte_distributor_v20 *d, struct rte_mempool *p)
 static int
 test_distributor_perf(void)
 {
-	static struct rte_distributor_v20 *d;
+	static struct rte_distributor *ds;
+	static struct rte_distributor *db;
 	static struct rte_mempool *p;
 
 	if (rte_lcore_count() < 2) {
@@ -226,16 +242,28 @@ test_distributor_perf(void)
 	/* first time how long it takes to round-trip a cache line */
 	time_cache_line_switch();
 
-	if (d == NULL) {
-		d = rte_distributor_create_v20("Test_perf", rte_socket_id(),
-				rte_lcore_count() - 1);
-		if (d == NULL) {
+	if (ds == NULL) {
+		ds = rte_distributor_create("Test_perf", rte_socket_id(),
+				rte_lcore_count() - 1,
+				RTE_DIST_ALG_SINGLE);
+		if (ds == NULL) {
 			printf("Error creating distributor\n");
 			return -1;
 		}
 	} else {
-		rte_distributor_flush_v20(d);
-		rte_distributor_clear_returns_v20(d);
+		rte_distributor_clear_returns(ds);
+	}
+
+	if (db == NULL) {
+		db = rte_distributor_create("Test_burst", rte_socket_id(),
+				rte_lcore_count() - 1,
+				RTE_DIST_ALG_BURST);
+		if (db == NULL) {
+			printf("Error creating burst distributor\n");
+			return -1;
+		}
+	} else {
+		rte_distributor_clear_returns(db);
 	}
 
 	const unsigned nb_bufs = (511 * rte_lcore_count()) < BIG_BATCH ?
@@ -249,10 +277,17 @@ test_distributor_perf(void)
 		}
 	}
 
-	rte_eal_mp_remote_launch(handle_work, d, SKIP_MASTER);
-	if (perf_test(d, p) < 0)
+	printf("=== Performance test of distributor (single mode) ===\n");
+	rte_eal_mp_remote_launch(handle_work, ds, SKIP_MASTER);
+	if (perf_test(ds, p) < 0)
+		return -1;
+	quit_workers(ds, p);
+
+	printf("=== Performance test of distributor (burst mode) ===\n");
+	rte_eal_mp_remote_launch(handle_work, db, SKIP_MASTER);
+	if (perf_test(db, p) < 0)
 		return -1;
-	quit_workers(d, p);
+	quit_workers(db, p);
 
 	return 0;
 }
-- 
2.7.4



More information about the dev mailing list