[dpdk-dev] [PATCH v3 3/6] hash: update jhash function with the latest available
Ananyev, Konstantin
konstantin.ananyev at intel.com
Wed May 6 02:35:58 CEST 2015
Hi Pablo,
> -----Original Message-----
> From: dev [mailto:dev-bounces at dpdk.org] On Behalf Of Pablo de Lara
> Sent: Tuesday, May 05, 2015 3:44 PM
> To: dev at dpdk.org
> Subject: [dpdk-dev] [PATCH v3 3/6] hash: update jhash function with the latest available
>
> Jenkins hash function was developed originally in 1996,
> and was integrated in first versions of DPDK.
> The function has been improved in 2006,
> achieving up to 60% better performance, compared to the original one.
>
> This patch integrates that code into the rte_jhash library.
>
> Signed-off-by: Pablo de Lara <pablo.de.lara.guarch at intel.com>
> ---
> lib/librte_hash/rte_jhash.h | 261 +++++++++++++++++++++++++++++++------------
> 1 files changed, 188 insertions(+), 73 deletions(-)
>
> diff --git a/lib/librte_hash/rte_jhash.h b/lib/librte_hash/rte_jhash.h
> index a4bf5a1..0e96b7c 100644
> --- a/lib/librte_hash/rte_jhash.h
> +++ b/lib/librte_hash/rte_jhash.h
> @@ -1,7 +1,7 @@
> /*-
> * BSD LICENSE
> *
> - * Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
> + * Copyright(c) 2010-2015 Intel Corporation. All rights reserved.
> * All rights reserved.
> *
> * Redistribution and use in source and binary forms, with or without
> @@ -45,38 +45,68 @@ extern "C" {
> #endif
>
> #include <stdint.h>
> +#include <string.h>
> +#include <rte_byteorder.h>
>
> /* jhash.h: Jenkins hash support.
> *
> - * Copyright (C) 1996 Bob Jenkins (bob_jenkins at burtleburtle.net)
> + * Copyright (C) 2006 Bob Jenkins (bob_jenkins at burtleburtle.net)
> *
> * http://burtleburtle.net/bob/hash/
> *
> * These are the credits from Bob's sources:
> *
> - * lookup2.c, by Bob Jenkins, December 1996, Public Domain.
> - * hash(), hash2(), hash3, and mix() are externally useful functions.
> - * Routines to test the hash are included if SELF_TEST is defined.
> - * You can use this free for any purpose. It has no warranty.
> + * lookup3.c, by Bob Jenkins, May 2006, Public Domain.
> + *
> + * These are functions for producing 32-bit hashes for hash table lookup.
> + * hashword(), hashlittle(), hashlittle2(), hashbig(), mix(), and final()
> + * are externally useful functions. Routines to test the hash are included
> + * if SELF_TEST is defined. You can use this free for any purpose. It's in
> + * the public domain. It has no warranty.
> *
> * $FreeBSD$
> */
>
> +#define rot(x, k) (((x) << (k)) | ((x) >> (32-(k))))
> +
> /** @internal Internal function. NOTE: Arguments are modified. */
> #define __rte_jhash_mix(a, b, c) do { \
> - a -= b; a -= c; a ^= (c>>13); \
> - b -= c; b -= a; b ^= (a<<8); \
> - c -= a; c -= b; c ^= (b>>13); \
> - a -= b; a -= c; a ^= (c>>12); \
> - b -= c; b -= a; b ^= (a<<16); \
> - c -= a; c -= b; c ^= (b>>5); \
> - a -= b; a -= c; a ^= (c>>3); \
> - b -= c; b -= a; b ^= (a<<10); \
> - c -= a; c -= b; c ^= (b>>15); \
> + a -= c; a ^= rot(c, 4); c += b; \
> + b -= a; b ^= rot(a, 6); a += c; \
> + c -= b; c ^= rot(b, 8); b += a; \
> + a -= c; a ^= rot(c, 16); c += b; \
> + b -= a; b ^= rot(a, 19); a += c; \
> + c -= b; c ^= rot(b, 4); b += a; \
> +} while (0)
> +
> +#define __rte_jhash_final(a, b, c) do { \
> + c ^= b; c -= rot(b, 14); \
> + a ^= c; a -= rot(c, 11); \
> + b ^= a; b -= rot(a, 25); \
> + c ^= b; c -= rot(b, 16); \
> + a ^= c; a -= rot(c, 4); \
> + b ^= a; b -= rot(a, 14); \
> + c ^= b; c -= rot(b, 24); \
> } while (0)
>
> /** The golden ratio: an arbitrary value. */
> -#define RTE_JHASH_GOLDEN_RATIO 0x9e3779b9
> +#define RTE_JHASH_GOLDEN_RATIO 0xdeadbeef
> +
> +#if RTE_BYTE_ORDER == RTE_LITTLE_ENDIAN
> +#define RTE_JHASH_BYTE0_SHIFT 0
> +#define RTE_JHASH_BYTE1_SHIFT 8
> +#define RTE_JHASH_BYTE2_SHIFT 16
> +#define RTE_JHASH_BYTE3_SHIFT 24
> +#else
> +#define RTE_JHASH_BYTE0_SHIFT 24
> +#define RTE_JHASH_BYTE1_SHIFT 16
> +#define RTE_JHASH_BYTE2_SHIFT 8
> +#define RTE_JHASH_BYTE3_SHIFT 0
> +#endif
> +
> +#define LOWER8b_MASK rte_le_to_cpu_32(0xff)
> +#define LOWER16b_MASK rte_le_to_cpu_32(0xffff)
> +#define LOWER24b_MASK rte_le_to_cpu_32(0xffffff)
>
> /**
> * The most generic version, hashes an arbitrary sequence
> @@ -95,42 +125,119 @@ extern "C" {
> static inline uint32_t
> rte_jhash(const void *key, uint32_t length, uint32_t initval)
> {
> - uint32_t a, b, c, len;
> - const uint8_t *k = (const uint8_t *)key;
> - const uint32_t *k32 = (const uint32_t *)key;
> + uint32_t a, b, c;
> + union {
> + const void *ptr;
> + size_t i;
> + } u;
>
> - len = length;
> - a = b = RTE_JHASH_GOLDEN_RATIO;
> - c = initval;
> + /* Set up the internal state */
> + a = b = c = RTE_JHASH_GOLDEN_RATIO + ((uint32_t)length) + initval;
>
> - while (len >= 12) {
> - a += k32[0];
> - b += k32[1];
> - c += k32[2];
> + u.ptr = key;
>
> - __rte_jhash_mix(a,b,c);
> + /* Check key alignment. For x86 architecture, first case is always optimal */
> + if (!strcmp(RTE_ARCH,"x86_64") || !strcmp(RTE_ARCH,"i686") || (u.i & 0x3) == 0) {
Wonder why strcmp(), why not something like: 'if defined(RTE_ARCH_I686) || defined(RTE_ARCH_X86_64)' as in all other places?
Another question what would be in case of RTE_ARCH="x86_x32"?
Konstantin
> + const uint32_t *k = (const uint32_t *)key;
>
> - k += (3 * sizeof(uint32_t)), k32 += 3;
> - len -= (3 * sizeof(uint32_t));
> - }
> + while (length > 12) {
> + a += k[0];
> + b += k[1];
> + c += k[2];
>
> - c += length;
> - switch (len) {
> - case 11: c += ((uint32_t)k[10] << 24);
> - case 10: c += ((uint32_t)k[9] << 16);
> - case 9 : c += ((uint32_t)k[8] << 8);
> - case 8 : b += ((uint32_t)k[7] << 24);
> - case 7 : b += ((uint32_t)k[6] << 16);
> - case 6 : b += ((uint32_t)k[5] << 8);
> - case 5 : b += k[4];
> - case 4 : a += ((uint32_t)k[3] << 24);
> - case 3 : a += ((uint32_t)k[2] << 16);
> - case 2 : a += ((uint32_t)k[1] << 8);
> - case 1 : a += k[0];
> - default: break;
> - };
> + __rte_jhash_mix(a, b, c);
> +
> + k += 3;
> + length -= 12;
> + }
> +
> + switch (length) {
> + case 12:
> + c += k[2]; b += k[1]; a += k[0]; break;
> + case 11:
> + c += k[2] & LOWER24b_MASK; b += k[1]; a += k[0]; break;
> + case 10:
> + c += k[2] & LOWER16b_MASK; b += k[1]; a += k[0]; break;
> + case 9:
> + c += k[2] & LOWER8b_MASK; b += k[1]; a += k[0]; break;
> + case 8:
> + b += k[1]; a += k[0]; break;
> + case 7:
> + b += k[1] & LOWER24b_MASK; a += k[0]; break;
> + case 6:
> + b += k[1] & LOWER16b_MASK; a += k[0]; break;
> + case 5:
> + b += k[1] & LOWER8b_MASK; a += k[0]; break;
> + case 4:
> + a += k[0]; break;
> + case 3:
> + a += k[0] & LOWER24b_MASK; break;
> + case 2:
> + a += k[0] & LOWER16b_MASK; break;
> + case 1:
> + a += k[0] & LOWER8b_MASK; break;
> + /* zero length strings require no mixing */
> + case 0:
> + return c;
> + };
> + } else {
> + const uint8_t *k = (const uint8_t *)key;
> +
> + /* all but the last block: affect some 32 bits of (a, b, c) */
> + while (length > 12) {
> + a += ((uint32_t)k[0]) << RTE_JHASH_BYTE0_SHIFT;
> + a += ((uint32_t)k[1]) << RTE_JHASH_BYTE1_SHIFT;
> + a += ((uint32_t)k[2]) << RTE_JHASH_BYTE2_SHIFT;
> + a += ((uint32_t)k[3]) << RTE_JHASH_BYTE3_SHIFT;
> + b += ((uint32_t)k[4]) << RTE_JHASH_BYTE0_SHIFT;
> + b += ((uint32_t)k[5]) << RTE_JHASH_BYTE1_SHIFT;
> + b += ((uint32_t)k[6]) << RTE_JHASH_BYTE2_SHIFT;
> + b += ((uint32_t)k[7]) << RTE_JHASH_BYTE3_SHIFT;
> + c += ((uint32_t)k[8]) << RTE_JHASH_BYTE0_SHIFT;
> + c += ((uint32_t)k[9]) << RTE_JHASH_BYTE1_SHIFT;
> + c += ((uint32_t)k[10]) << RTE_JHASH_BYTE2_SHIFT;
> + c += ((uint32_t)k[11]) << RTE_JHASH_BYTE3_SHIFT;
> +
> + __rte_jhash_mix(a, b, c);
> +
> + k += 12;
> + length -= 12;
> + }
> +
> + /* last block: affect all 32 bits of (c) */
> + /* all the case statements fall through */
> + switch (length) {
> + case 12:
> + c += ((uint32_t)k[11]) << RTE_JHASH_BYTE3_SHIFT;
> + case 11:
> + c += ((uint32_t)k[10]) << RTE_JHASH_BYTE2_SHIFT;
> + case 10:
> + c += ((uint32_t)k[9]) << RTE_JHASH_BYTE1_SHIFT;
> + case 9:
> + c += ((uint32_t)k[8]) << RTE_JHASH_BYTE0_SHIFT;
> + case 8:
> + b += ((uint32_t)k[7]) << RTE_JHASH_BYTE3_SHIFT;
> + case 7:
> + b += ((uint32_t)k[6]) << RTE_JHASH_BYTE2_SHIFT;
> + case 6:
> + b += ((uint32_t)k[5]) << RTE_JHASH_BYTE1_SHIFT;
> + case 5:
> + b += ((uint32_t)k[4]) << RTE_JHASH_BYTE0_SHIFT;
> + case 4:
> + a += ((uint32_t)k[3]) << RTE_JHASH_BYTE3_SHIFT;
> + case 3:
> + a += ((uint32_t)k[2]) << RTE_JHASH_BYTE2_SHIFT;
> + case 2:
> + a += ((uint32_t)k[1]) << RTE_JHASH_BYTE1_SHIFT;
> + case 1:
> + a += ((uint32_t)k[0]) << RTE_JHASH_BYTE0_SHIFT;
> + break;
> + case 0:
> + return c;
> + }
> + }
>
> - __rte_jhash_mix(a,b,c);
> + __rte_jhash_final(a, b, c);
>
> return c;
> }
> @@ -151,33 +258,51 @@ rte_jhash(const void *key, uint32_t length, uint32_t initval)
> static inline uint32_t
> rte_jhash2(const uint32_t *k, uint32_t length, uint32_t initval)
> {
> - uint32_t a, b, c, len;
> + uint32_t a, b, c;
>
> - a = b = RTE_JHASH_GOLDEN_RATIO;
> - c = initval;
> - len = length;
> + /* Set up the internal state */
> + a = b = c = RTE_JHASH_GOLDEN_RATIO + (((uint32_t)length) << 2) + initval;
>
> - while (len >= 3) {
> + /* Handle most of the key */
> + while (length > 3) {
> a += k[0];
> b += k[1];
> c += k[2];
> +
> __rte_jhash_mix(a, b, c);
> - k += 3; len -= 3;
> - }
>
> - c += length * 4;
> + k += 3;
> + length -= 3;
> + }
>
> - switch (len) {
> - case 2 : b += k[1];
> - case 1 : a += k[0];
> - default: break;
> + /* Handle the last 3 uint32_t's */
> + switch (length) {
> + case 3:
> + c += k[2];
> + case 2:
> + b += k[1];
> + case 1:
> + a += k[0];
> + __rte_jhash_final(a, b, c);
> + /* case 0: nothing left to add */
> + case 0:
> + break;
> };
>
> - __rte_jhash_mix(a,b,c);
> -
> return c;
> }
>
> +static inline uint32_t
> +__rte_jhash_3words(uint32_t a, uint32_t b, uint32_t c, uint32_t initval)
> +{
> + a += RTE_JHASH_GOLDEN_RATIO + initval;
> + b += RTE_JHASH_GOLDEN_RATIO + initval;
> + c += RTE_JHASH_GOLDEN_RATIO + initval;
> +
> + __rte_jhash_final(a, b, c);
> +
> + return c;
> +}
>
> /**
> * A special ultra-optimized versions that knows it is hashing exactly
> @@ -197,17 +322,7 @@ rte_jhash2(const uint32_t *k, uint32_t length, uint32_t initval)
> static inline uint32_t
> rte_jhash_3words(uint32_t a, uint32_t b, uint32_t c, uint32_t initval)
> {
> - a += RTE_JHASH_GOLDEN_RATIO;
> - b += RTE_JHASH_GOLDEN_RATIO;
> - c += initval;
> -
> - __rte_jhash_mix(a, b, c);
> -
> - /*
> - * NOTE: In particular the "c += length; __rte_jhash_mix(a,b,c);"
> - * normally done at the end is not done here.
> - */
> - return c;
> + return __rte_jhash_3words(a + 12, b + 12, c + 12, initval);
> }
>
> /**
> @@ -226,7 +341,7 @@ rte_jhash_3words(uint32_t a, uint32_t b, uint32_t c, uint32_t initval)
> static inline uint32_t
> rte_jhash_2words(uint32_t a, uint32_t b, uint32_t initval)
> {
> - return rte_jhash_3words(a, b, 0, initval);
> + return __rte_jhash_3words(a + 8, b + 8, 8, initval);
> }
>
> /**
> @@ -243,7 +358,7 @@ rte_jhash_2words(uint32_t a, uint32_t b, uint32_t initval)
> static inline uint32_t
> rte_jhash_1word(uint32_t a, uint32_t initval)
> {
> - return rte_jhash_3words(a, 0, 0, initval);
> + return __rte_jhash_3words(a + 4, 4, 4, initval);
> }
>
> #ifdef __cplusplus
> --
> 1.7.4.1
More information about the dev
mailing list