[dpdk-dev] [PATCH v1 0/2] rte_memcmp functions

Ravi Kerur rkerur at gmail.com
Mon Mar 7 23:59:43 CET 2016


This patch provides AVX/SSE based memcmp implementation on x86. For 
other architectures supported by DPDK, rte_memcmp simply uses memcmp
function.

Following are preliminary performance numbers on Intel(R) Xeon(R) CPU E5-2620 v3 @ 2.40GHz

RTE>>memcmp_perf_autotest 
 *** RTE memcmp equal performance test results ***
 *** Length (bytes), Ticks/Op. ***
 ***          2,     4.8526 ***
 ***          5,     5.4023 ***
 ***          8,     4.5067 ***
 ***          9,     5.4024 ***
 ***         15,     7.2069 ***
 ***         16,     4.5027 ***
 ***         17,     4.5020 ***
 ***         31,     4.5020 ***
 ***         32,     4.5033 ***
 ***         33,     5.1377 ***
 ***         63,     6.9069 ***
 ***         64,     6.9472 ***
 ***         65,     9.6301 ***
 ***        127,    13.5122 ***
 ***        128,    10.8028 ***
 ***        129,    11.7058 ***
 ***        191,    14.4105 ***
 ***        192,    14.4251 ***
 ***        193,    16.2139 ***
 ***        255,    18.0125 ***
 ***        256,    17.1150 ***
 ***        257,    18.9129 ***
 ***        319,    20.7148 ***
 ***        320,    20.7161 ***
 ***        321,    22.5198 ***
 ***        383,    24.3169 ***
 ***        384,    22.5195 ***
 ***        385,    24.3197 ***
 ***        447,    26.1171 ***
 ***        448,    26.1289 ***
 ***        449,    27.9168 ***
 ***        511,    29.7252 ***
 ***        512,    29.7202 ***
 ***        513,    27.9253 ***
 ***        767,    38.7506 ***
 ***        768,    36.9327 ***
 ***        769,    38.7259 ***
 ***       1023,    49.5368 ***
 ***       1024,    49.5347 ***
 ***       1025,    46.8414 ***
 ***       1522,    68.4517 ***
 ***       1536,    68.4522 ***
 ***       1600,    67.5478 ***
 ***       2048,    87.3674 ***
 ***       2560,   106.2776 ***
 ***       3072,   125.1937 ***
 ***       3584,   144.1503 ***
 ***       4096,   163.0243 ***
 ***       4608,   181.9367 ***
 ***       5632,   219.7613 ***
 ***       6144,   238.6745 ***
 ***       6656,   257.6009 ***
 ***       7168,   276.5084 ***
 ***       7680,   295.4162 ***
 ***       8192,   314.3726 ***
 ***      16834,   746.1065 ***
 *** memcmp equal performance test results ***
 *** Length (bytes), Ticks/Op. ***
 ***          2,     9.0100 ***
 ***          5,     8.1065 ***
 ***          8,     9.1944 ***
 ***          9,     9.0044 ***
 ***         15,     9.0084 ***
 ***         16,    10.0695 ***
 ***         17,     9.0109 ***
 ***         31,     9.9111 ***
 ***         32,     9.9085 ***
 ***         33,     9.9112 ***
 ***         63,    12.6098 ***
 ***         64,    12.6106 ***
 ***         65,    12.6060 ***
 ***        127,    19.8160 ***
 ***        128,    19.8145 ***
 ***        129,    20.7260 ***
 ***        191,    26.1214 ***
 ***        192,    26.1195 ***
 ***        193,    26.1158 ***
 ***        255,    30.6222 ***
 ***        256,    30.6267 ***
 ***        257,    31.5270 ***
 ***        319,    36.0264 ***
 ***        320,    36.0497 ***
 ***        321,    36.9247 ***
 ***        383,    40.5290 ***
 ***        384,    40.5265 ***
 ***        385,    41.4331 ***
 ***        447,    45.9317 ***
 ***        448,    45.9324 ***
 ***        449,    45.9302 ***
 ***        511,    50.4652 ***
 ***        512,    50.4379 ***
 ***        513,    51.3361 ***
 ***        767,    67.5552 ***
 ***        768,    67.5464 ***
 ***        769,    67.5462 ***
 ***       1023,    85.5579 ***
 ***       1024,    85.5610 ***
 ***       1025,    85.5582 ***
 ***       1522,   120.6860 ***
 ***       1536,   121.6064 ***
 ***       1600,   126.1075 ***
 ***       2048,   157.6208 ***
 ***       2560,   208.8309 ***
 ***       3072,   241.7587 ***
 ***       3584,   276.1556 ***
 ***       4096,   310.5865 ***
 ***       4608,   343.8918 ***
 ***       5632,   411.2264 ***
 ***       6144,   445.3057 ***
 ***       6656,   480.4620 ***
 ***       7168,   512.5769 ***
 ***       7680,   547.9394 ***
 ***       8192,   582.7687 ***
 ***      16834,  1456.4280 ***
 *** RTE memcmp greater than performance test results ***
 *** Length (bytes), Ticks/Op. ***
 ***          1,    22.5862 ***
 ***          8,    24.9140 ***
 ***         15,    25.3942 ***
 ***         16,    22.1721 ***
 ***         32,    24.1650 ***
 ***         64,    25.0849 ***
 ***        128,    26.5515 ***
 ***        256,    28.7055 ***
 ***        512,    35.2811 ***
 ***       1024,    44.4520 ***
 ***       2048,    64.1331 ***
 ***       4096,   103.9949 ***
 ***       8192,   184.8077 ***
 ***      16384,   345.6785 ***
 *** memcmp greater than performance test results ***
 *** Length (bytes), Ticks/Op. ***
 ***          1,    22.6340 ***
 ***          8,    25.5552 ***
 ***         15,    25.4223 ***
 ***         16,    25.1371 ***
 ***         32,    26.7381 ***
 ***         64,    27.4521 ***
 ***        128,    29.7323 ***
 ***        256,    35.8891 ***
 ***        512,    46.0419 ***
 ***       1024,   101.1564 ***
 ***       2048,   159.8415 ***
 ***       4096,   230.2136 ***
 ***       8192,   366.2912 ***
 ***      16384,   647.0217 ***
 *** RTE memcmp less than performance test results ***
 *** Length (bytes), Ticks/Op. ***
 ***          1,    22.6627 ***
 ***          8,    26.2665 ***
 ***         15,    26.8192 ***
 ***         16,    21.7960 ***
 ***         32,    23.9878 ***
 ***         64,    24.2074 ***
 ***        128,    26.8111 ***
 ***        256,    28.3444 ***
 ***        512,    34.7882 ***
 ***       1024,    44.4824 ***
 ***       2048,    63.4154 ***
 ***       4096,   101.4360 ***
 ***       8192,   179.1029 ***
 ***      16384,   333.9357 ***
 *** memcmp less than performance test results ***
 *** Length (bytes), Ticks/Op. ***
 ***          1,    22.2894 ***
 ***          8,    24.9805 ***
 ***         15,    24.8632 ***
 ***         16,    24.3448 ***
 ***         32,    24.8554 ***
 ***         64,    25.7541 ***
 ***        128,    29.1831 ***
 ***        256,    36.2345 ***
 ***        512,    45.8233 ***
 ***       1024,   103.4597 ***
 ***       2048,   163.5588 ***
 ***       4096,   232.7368 ***
 ***       8192,   368.1143 ***
 ***      16384,   649.0326 ***
Test OK
RTE>>quit


Ravi Kerur (2):
  rte_memcmp functions using Intel AVX and SSE intrinsics
  Test cases for rte_memcmp functions

 app/test/Makefile                                  |  31 +-
 app/test/autotest_data.py                          |  19 +
 app/test/test_memcmp.c                             | 250 +++++++
 app/test/test_memcmp_perf.c                        | 396 +++++++++++
 .../common/include/arch/arm/rte_memcmp.h           |  60 ++
 .../common/include/arch/ppc_64/rte_memcmp.h        |  62 ++
 .../common/include/arch/tile/rte_memcmp.h          |  60 ++
 .../common/include/arch/x86/rte_memcmp.h           | 786 +++++++++++++++++++++
 lib/librte_eal/common/include/generic/rte_memcmp.h | 175 +++++
 9 files changed, 1838 insertions(+), 1 deletion(-)
 create mode 100644 app/test/test_memcmp.c
 create mode 100644 app/test/test_memcmp_perf.c
 create mode 100644 lib/librte_eal/common/include/arch/arm/rte_memcmp.h
 create mode 100644 lib/librte_eal/common/include/arch/ppc_64/rte_memcmp.h
 create mode 100644 lib/librte_eal/common/include/arch/tile/rte_memcmp.h
 create mode 100644 lib/librte_eal/common/include/arch/x86/rte_memcmp.h
 create mode 100644 lib/librte_eal/common/include/generic/rte_memcmp.h

-- 
1.9.1



More information about the dev mailing list