00001 /* 00002 * Copyright 2004-2006 Intel Corporation 00003 * 00004 * Licensed under the Apache License, Version 2.0 (the "License"); 00005 * you may not use this file except in compliance with the License. 00006 * You may obtain a copy of the License at 00007 * 00008 * http://www.apache.org/licenses/LICENSE-2.0 00009 * 00010 * Unless required by applicable law or agreed to in writing, software 00011 * distributed under the License is distributed on an "AS IS" BASIS, 00012 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 00013 * See the License for the specific language governing permissions and 00014 * limitations under the License. 00015 */ 00016 00017 00018 #include "jenkins_hash.h" 00019 00020 namespace oasys { 00021 00022 /* 00023 * Fast general purpose hash function from Bob Jenkins: 00024 * http://burtleburtle.net/bob/hash/doobs.html 00025 * 00026 * Slightly modified by mike demmer to use standard type definitions 00027 * and parameter types inside the declaration. 00028 */ 00029 00030 #define hashsize(n) ((u_int32_t)1<<(n)) 00031 #define hashmask(n) (hashsize(n)-1) 00032 00033 /* 00034 -------------------------------------------------------------------- 00035 mix -- mix 3 32-bit values reversibly. 00036 For every delta with one or two bits set, and the deltas of all three 00037 high bits or all three low bits, whether the original value of a,b,c 00038 is almost all zero or is uniformly distributed, 00039 * If mix() is run forward or backward, at least 32 bits in a,b,c 00040 have at least 1/4 probability of changing. 00041 * If mix() is run forward, every bit of c will change between 1/3 and 00042 2/3 of the time. (Well, 22/100 and 78/100 for some 2-bit deltas.) 00043 mix() was built out of 36 single-cycle latency instructions in a 00044 structure that could supported 2x parallelism, like so: 00045 a -= b; 00046 a -= c; x = (c>>13); 00047 b -= c; a ^= x; 00048 b -= a; x = (a<<8); 00049 c -= a; b ^= x; 00050 c -= b; x = (b>>13); 00051 ... 00052 Unfortunately, superscalar Pentiums and Sparcs can't take advantage 00053 of that parallelism. They've also turned some of those single-cycle 00054 latency instructions into multi-cycle latency instructions. Still, 00055 this is the fastest good hash I could find. There were about 2^^68 00056 to choose from. I only looked at a billion or so. 00057 -------------------------------------------------------------------- 00058 */ 00059 #define mix(a,b,c) \ 00060 { \ 00061 a -= b; a -= c; a ^= (c>>13); \ 00062 b -= c; b -= a; b ^= (a<<8); \ 00063 c -= a; c -= b; c ^= (b>>13); \ 00064 a -= b; a -= c; a ^= (c>>12); \ 00065 b -= c; b -= a; b ^= (a<<16); \ 00066 c -= a; c -= b; c ^= (b>>5); \ 00067 a -= b; a -= c; a ^= (c>>3); \ 00068 b -= c; b -= a; b ^= (a<<10); \ 00069 c -= a; c -= b; c ^= (b>>15); \ 00070 } 00071 00072 /* 00073 -------------------------------------------------------------------- 00074 hash() -- hash a variable-length key into a 32-bit value 00075 k : the key (the unaligned variable-length array of bytes) 00076 len : the length of the key, counting by bytes 00077 initval : can be any 4-byte value 00078 Returns a 32-bit value. Every bit of the key affects every bit of 00079 the return value. Every 1-bit and 2-bit delta achieves avalanche. 00080 About 6*len+35 instructions. 00081 00082 The best hash table sizes are powers of 2. There is no need to do 00083 mod a prime (mod is sooo slow!). If you need less than 32 bits, 00084 use a bitmask. For example, if you need only 10 bits, do 00085 h = (h & hashmask(10)); 00086 In which case, the hash table should have hashsize(10) elements. 00087 00088 If you are hashing n strings (u_int8_t **)k, do it like this: 00089 for (i=0, h=0; i<n; ++i) h = hash( k[i], len[i], h); 00090 00091 By Bob Jenkins, 1996. bob_jenkins@burtleburtle.net. You may use this 00092 code any way you wish, private, educational, or commercial. It's free. 00093 00094 See http://burtleburtle.net/bob/hash/evahash.html 00095 Use for hash table lookup, or anything where one collision in 2^^32 is 00096 acceptable. Do NOT use for cryptographic purposes. 00097 -------------------------------------------------------------------- 00098 */ 00099 00100 u_int32_t 00101 jenkins_hash(u_int8_t *k, /* the key */ 00102 u_int32_t length, /* the length of the key */ 00103 u_int32_t initval) /* the previous hash, or an arbitrary value */ 00104 { 00105 register u_int32_t a,b,c,len; 00106 00107 /* Set up the internal state */ 00108 len = length; 00109 a = b = 0x9e3779b9; /* the golden ratio; an arbitrary value */ 00110 c = initval; /* the previous hash value */ 00111 00112 /*---------------------------------------- handle most of the key */ 00113 while (len >= 12) 00114 { 00115 a += (k[0] +((u_int32_t)k[1]<<8) +((u_int32_t)k[2]<<16) +((u_int32_t)k[3]<<24)); 00116 b += (k[4] +((u_int32_t)k[5]<<8) +((u_int32_t)k[6]<<16) +((u_int32_t)k[7]<<24)); 00117 c += (k[8] +((u_int32_t)k[9]<<8) +((u_int32_t)k[10]<<16)+((u_int32_t)k[11]<<24)); 00118 mix(a,b,c); 00119 k += 12; len -= 12; 00120 } 00121 00122 /*------------------------------------- handle the last 11 bytes */ 00123 c += length; 00124 switch(len) /* all the case statements fall through */ 00125 { 00126 case 11: c+=((u_int32_t)k[10]<<24); 00127 case 10: c+=((u_int32_t)k[9]<<16); 00128 case 9 : c+=((u_int32_t)k[8]<<8); 00129 /* the first byte of c is reserved for the length */ 00130 case 8 : b+=((u_int32_t)k[7]<<24); 00131 case 7 : b+=((u_int32_t)k[6]<<16); 00132 case 6 : b+=((u_int32_t)k[5]<<8); 00133 case 5 : b+=k[4]; 00134 case 4 : a+=((u_int32_t)k[3]<<24); 00135 case 3 : a+=((u_int32_t)k[2]<<16); 00136 case 2 : a+=((u_int32_t)k[1]<<8); 00137 case 1 : a+=k[0]; 00138 /* case 0: nothing left to add */ 00139 } 00140 mix(a,b,c); 00141 /*-------------------------------------------- report the result */ 00142 return c; 00143 } 00144 00145 } // namespace oasys