doc
c_jhash.h
Go to the documentation of this file.
1 /*
2  * c_jhash.c Jenkins Hash
3  *
4  * Copyright (c) 1997 Bob Jenkins <bob_jenkins@burtleburtle.net>
5  *
6  * lookup8.c, by Bob Jenkins, January 4 1997, Public Domain.
7  * hash(), hash2(), hash3, and _c_mix() are externally useful functions.
8  * Routines to test the hash are included if SELF_TEST is defined.
9  * You can use this free for any purpose. It has no warranty.
10  *
11  * See http://burtleburtle.net/bob/hash/evahash.html
12  */
13 
14 /**
15  * @file c_jhash.h
16  *
17  * @brief Interface of the cynapses jhash implementation
18  *
19  * @defgroup cynJHashInternals cynapses libc jhash function
20  * @ingroup cynLibraryAPI
21  *
22  * @{
23  */
24 #ifndef _C_JHASH_H
25 #define _C_JHASH_H
26 
27 #include <stdint.h>
28 
29 #define c_hashsize(n) ((uint8_t) 1 << (n))
30 #define c_hashmask(n) (xhashsize(n) - 1)
31 
32 /**
33  * _c_mix -- Mix 3 32-bit values reversibly.
34  *
35  * For every delta with one or two bit set, and the deltas of all three
36  * high bits or all three low bits, whether the original value of a,b,c
37  * is almost all zero or is uniformly distributed,
38  * If _c_mix() is run forward or backward, at least 32 bits in a,b,c
39  * have at least 1/4 probability of changing.
40  * If _c_mix() is run forward, every bit of c will change between 1/3 and
41  * 2/3 of the time. (Well, 22/100 and 78/100 for some 2-bit deltas.)
42  * _c_mix() was built out of 36 single-cycle latency instructions in a
43  * structure that could supported 2x parallelism, like so:
44  * a -= b;
45  * a -= c; x = (c>>13);
46  * b -= c; a ^= x;
47  * b -= a; x = (a<<8);
48  * c -= a; b ^= x;
49  * c -= b; x = (b>>13);
50  * ...
51  *
52  * Unfortunately, superscalar Pentiums and Sparcs can't take advantage
53  * of that parallelism. They've also turned some of those single-cycle
54  * latency instructions into multi-cycle latency instructions. Still,
55  * this is the fastest good hash I could find. There were about 2^^68
56  * to choose from. I only looked at a billion or so.
57  */
58 #define _c_mix(a,b,c) \
59 { \
60  a -= b; a -= c; a ^= (c>>13); \
61  b -= c; b -= a; b ^= (a<<8); \
62  c -= a; c -= b; c ^= (b>>13); \
63  a -= b; a -= c; a ^= (c>>12); \
64  b -= c; b -= a; b ^= (a<<16); \
65  c -= a; c -= b; c ^= (b>>5); \
66  a -= b; a -= c; a ^= (c>>3); \
67  b -= c; b -= a; b ^= (a<<10); \
68  c -= a; c -= b; c ^= (b>>15); \
69 }
70 
71 /**
72  * _c_mix64 -- Mix 3 64-bit values reversibly.
73  *
74  * _c_mix64() takes 48 machine instructions, but only 24 cycles on a superscalar
75  * machine (like Intel's new MMX architecture). It requires 4 64-bit
76  * registers for 4::2 parallelism.
77  * All 1-bit deltas, all 2-bit deltas, all deltas composed of top bits of
78  * (a,b,c), and all deltas of bottom bits were tested. All deltas were
79  * tested both on random keys and on keys that were nearly all zero.
80  * These deltas all cause every bit of c to change between 1/3 and 2/3
81  * of the time (well, only 113/400 to 287/400 of the time for some
82  * 2-bit delta). These deltas all cause at least 80 bits to change
83  * among (a,b,c) when the _c_mix is run either forward or backward (yes it
84  * is reversible).
85  * This implies that a hash using _c_mix64 has no funnels. There may be
86  * characteristics with 3-bit deltas or bigger, I didn't test for
87  * those.
88  */
89 #define _c_mix64(a,b,c) \
90 { \
91  a -= b; a -= c; a ^= (c>>43); \
92  b -= c; b -= a; b ^= (a<<9); \
93  c -= a; c -= b; c ^= (b>>8); \
94  a -= b; a -= c; a ^= (c>>38); \
95  b -= c; b -= a; b ^= (a<<23); \
96  c -= a; c -= b; c ^= (b>>5); \
97  a -= b; a -= c; a ^= (c>>35); \
98  b -= c; b -= a; b ^= (a<<49); \
99  c -= a; c -= b; c ^= (b>>11); \
100  a -= b; a -= c; a ^= (c>>12); \
101  b -= c; b -= a; b ^= (a<<18); \
102  c -= a; c -= b; c ^= (b>>22); \
103 }
104 
105 /**
106  * @brief hash a variable-length key into a 32-bit value
107  *
108  * The best hash table sizes are powers of 2. There is no need to do
109  * mod a prime (mod is sooo slow!). If you need less than 32 bits,
110  * use a bitmask. For example, if you need only 10 bits, do
111  * h = (h & hashmask(10));
112  * In which case, the hash table should have hashsize(10) elements.
113  *
114  * Use for hash table lookup, or anything where one collision in 2^32 is
115  * acceptable. Do NOT use for cryptographic purposes.
116  *
117  * @param k The key (the unaligned variable-length array of bytes).
118  *
119  * @param length The length of the key, counting by bytes.
120  *
121  * @param initval Initial value, can be any 4-byte value.
122  *
123  * @return Returns a 32-bit value. Every bit of the key affects every bit
124  * of the return value. Every 1-bit and 2-bit delta achieves
125  * avalanche. About 36+6len instructions.
126  */
127 static inline uint32_t c_jhash(const uint8_t *k, uint32_t length, uint32_t initval) {
128  uint32_t a,b,c,len;
129 
130  /* Set up the internal state */
131  len = length;
132  a = b = 0x9e3779b9; /* the golden ratio; an arbitrary value */
133  c = initval; /* the previous hash value */
134 
135  while (len >= 12) {
136  a += (k[0] +((uint32_t)k[1]<<8) +((uint32_t)k[2]<<16) +((uint32_t)k[3]<<24));
137  b += (k[4] +((uint32_t)k[5]<<8) +((uint32_t)k[6]<<16) +((uint32_t)k[7]<<24));
138  c += (k[8] +((uint32_t)k[9]<<8) +((uint32_t)k[10]<<16)+((uint32_t)k[11]<<24));
139  _c_mix(a,b,c);
140  k += 12; len -= 12;
141  }
142 
143  /* handle the last 11 bytes */
144  c += length;
145  /* all the case statements fall through */
146  switch(len) {
147  case 11: c+=((uint32_t)k[10]<<24);
148  case 10: c+=((uint32_t)k[9]<<16);
149  case 9 : c+=((uint32_t)k[8]<<8);
150  /* the first byte of c is reserved for the length */
151  case 8 : b+=((uint32_t)k[7]<<24);
152  case 7 : b+=((uint32_t)k[6]<<16);
153  case 6 : b+=((uint32_t)k[5]<<8);
154  case 5 : b+=k[4];
155  case 4 : a+=((uint32_t)k[3]<<24);
156  case 3 : a+=((uint32_t)k[2]<<16);
157  case 2 : a+=((uint32_t)k[1]<<8);
158  case 1 : a+=k[0];
159  /* case 0: nothing left to add */
160  }
161  _c_mix(a,b,c);
162 
163  return c;
164 }
165 
166 /**
167  * @brief hash a variable-length key into a 64-bit value
168  *
169  * The best hash table sizes are powers of 2. There is no need to do
170  * mod a prime (mod is sooo slow!). If you need less than 64 bits,
171  * use a bitmask. For example, if you need only 10 bits, do
172  * h = (h & hashmask(10));
173  * In which case, the hash table should have hashsize(10) elements.
174  *
175  * Use for hash table lookup, or anything where one collision in 2^^64
176  * is acceptable. Do NOT use for cryptographic purposes.
177  *
178  * @param k The key (the unaligned variable-length array of bytes).
179  * @param length The length of the key, counting by bytes.
180  * @param intval Initial value, can be any 8-byte value.
181  *
182  * @return A 64-bit value. Every bit of the key affects every bit of
183  * the return value. No funnels. Every 1-bit and 2-bit delta
184  * achieves avalanche. About 41+5len instructions.
185  */
186 static inline uint64_t c_jhash64(const uint8_t *k, uint64_t length, uint64_t intval) {
187  uint64_t a,b,c,len;
188 
189  /* Set up the internal state */
190  len = length;
191  a = b = intval; /* the previous hash value */
192  c = 0x9e3779b97f4a7c13LL; /* the golden ratio; an arbitrary value */
193 
194  /* handle most of the key */
195  while (len >= 24)
196  {
197  a += (k[0] +((uint64_t)k[ 1]<< 8)+((uint64_t)k[ 2]<<16)+((uint64_t)k[ 3]<<24)
198  +((uint64_t)k[4 ]<<32)+((uint64_t)k[ 5]<<40)+((uint64_t)k[ 6]<<48)+((uint64_t)k[ 7]<<56));
199  b += (k[8] +((uint64_t)k[ 9]<< 8)+((uint64_t)k[10]<<16)+((uint64_t)k[11]<<24)
200  +((uint64_t)k[12]<<32)+((uint64_t)k[13]<<40)+((uint64_t)k[14]<<48)+((uint64_t)k[15]<<56));
201  c += (k[16] +((uint64_t)k[17]<< 8)+((uint64_t)k[18]<<16)+((uint64_t)k[19]<<24)
202  +((uint64_t)k[20]<<32)+((uint64_t)k[21]<<40)+((uint64_t)k[22]<<48)+((uint64_t)k[23]<<56));
203  _c_mix64(a,b,c);
204  k += 24; len -= 24;
205  }
206 
207  /* handle the last 23 bytes */
208  c += length;
209  switch(len) {
210  case 23: c+=((uint64_t)k[22]<<56);
211  case 22: c+=((uint64_t)k[21]<<48);
212  case 21: c+=((uint64_t)k[20]<<40);
213  case 20: c+=((uint64_t)k[19]<<32);
214  case 19: c+=((uint64_t)k[18]<<24);
215  case 18: c+=((uint64_t)k[17]<<16);
216  case 17: c+=((uint64_t)k[16]<<8);
217  /* the first byte of c is reserved for the length */
218  case 16: b+=((uint64_t)k[15]<<56);
219  case 15: b+=((uint64_t)k[14]<<48);
220  case 14: b+=((uint64_t)k[13]<<40);
221  case 13: b+=((uint64_t)k[12]<<32);
222  case 12: b+=((uint64_t)k[11]<<24);
223  case 11: b+=((uint64_t)k[10]<<16);
224  case 10: b+=((uint64_t)k[ 9]<<8);
225  case 9: b+=((uint64_t)k[ 8]);
226  case 8: a+=((uint64_t)k[ 7]<<56);
227  case 7: a+=((uint64_t)k[ 6]<<48);
228  case 6: a+=((uint64_t)k[ 5]<<40);
229  case 5: a+=((uint64_t)k[ 4]<<32);
230  case 4: a+=((uint64_t)k[ 3]<<24);
231  case 3: a+=((uint64_t)k[ 2]<<16);
232  case 2: a+=((uint64_t)k[ 1]<<8);
233  case 1: a+=((uint64_t)k[ 0]);
234  /* case 0: nothing left to add */
235  }
236  _c_mix64(a,b,c);
237 
238  return c;
239 }
240 
241 /**
242  * }@
243  */
244 #endif /* _C_JHASH_H */
245 
static uint64_t c_jhash64(const uint8_t *k, uint64_t length, uint64_t intval)
hash a variable-length key into a 64-bit value
Definition: c_jhash.h:186
#define _c_mix64(a, b, c)
_c_mix64 – Mix 3 64-bit values reversibly.
Definition: c_jhash.h:89
static uint32_t c_jhash(const uint8_t *k, uint32_t length, uint32_t initval)
hash a variable-length key into a 32-bit value
Definition: c_jhash.h:127
#define _c_mix(a, b, c)
_c_mix – Mix 3 32-bit values reversibly.
Definition: c_jhash.h:58