storage/index/utils/hash.h
| Line | Branch | Exec | Source |
|---|---|---|---|
| 1 | #pragma once | ||
| 2 | |||
| 3 | #include <stddef.h> | ||
| 4 | |||
| 5 | 244244 | inline size_t standard(const void* _ptr, | |
| 6 | size_t _len, | ||
| 7 | size_t _seed = static_cast<size_t>(0xc70f6907UL)) { | ||
| 8 | 244244 | return std::_Hash_bytes(_ptr, _len, _seed); | |
| 9 | } | ||
| 10 | |||
| 11 | // JENKINS HASH FUNCTION | ||
| 12 | inline size_t | ||
| 13 | ✗ | jenkins(const void* _ptr, size_t _len, size_t _seed = 0xc70f6907UL) { | |
| 14 | ✗ | size_t i = 0; | |
| 15 | ✗ | size_t hash = 0; | |
| 16 | ✗ | const char* key = static_cast<const char*>(_ptr); | |
| 17 | ✗ | while (i != _len) { | |
| 18 | ✗ | hash += key[i++]; | |
| 19 | ✗ | hash += hash << (10); | |
| 20 | ✗ | hash ^= hash >> (6); | |
| 21 | } | ||
| 22 | ✗ | hash += hash << (3); | |
| 23 | ✗ | hash ^= hash >> (11); | |
| 24 | ✗ | hash += hash << (15); | |
| 25 | ✗ | return hash; | |
| 26 | } | ||
| 27 | |||
| 28 | //----------------------------------------------------------------------------- | ||
| 29 | // MurmurHash2, by Austin Appleby | ||
| 30 | |||
| 31 | // Note - This code makes a few assumptions about how your machine behaves - | ||
| 32 | |||
| 33 | // 1. We can read a 4-byte value from any address without crashing | ||
| 34 | // 2. sizeof(int) == 4 | ||
| 35 | |||
| 36 | // And it has a few limitations - | ||
| 37 | |||
| 38 | // 1. It will not work incrementally. | ||
| 39 | // 2. It will not produce the same results on little-endian and big-endian | ||
| 40 | // machines. | ||
| 41 | ✗ | inline size_t murmur2(const void* key, size_t len, size_t seed = 0xc70f6907UL) { | |
| 42 | // 'm' and 'r' are mixing constants generated offline. | ||
| 43 | // They're not really 'magic', they just happen to work well. | ||
| 44 | |||
| 45 | ✗ | const unsigned int m = 0x5bd1e995; | |
| 46 | ✗ | const int r = 24; | |
| 47 | |||
| 48 | // Initialize the hash to a 'random' value | ||
| 49 | |||
| 50 | ✗ | unsigned int h = seed ^ len; | |
| 51 | |||
| 52 | // Mix 4 bytes at a time into the hash | ||
| 53 | |||
| 54 | ✗ | const unsigned char* data = (const unsigned char*)key; | |
| 55 | |||
| 56 | ✗ | while (len >= 4) { | |
| 57 | ✗ | unsigned int k = *(unsigned int*)data; | |
| 58 | |||
| 59 | ✗ | k *= m; | |
| 60 | ✗ | k ^= k >> r; | |
| 61 | ✗ | k *= m; | |
| 62 | |||
| 63 | ✗ | h *= m; | |
| 64 | ✗ | h ^= k; | |
| 65 | |||
| 66 | ✗ | data += 4; | |
| 67 | ✗ | len -= 4; | |
| 68 | } | ||
| 69 | |||
| 70 | // Handle the last few bytes of the input array | ||
| 71 | |||
| 72 | ✗ | switch (len) { | |
| 73 | ✗ | case 3: | |
| 74 | ✗ | h ^= data[2] << 16; | |
| 75 | ✗ | case 2: | |
| 76 | ✗ | h ^= data[1] << 8; | |
| 77 | ✗ | case 1: | |
| 78 | ✗ | h ^= data[0]; | |
| 79 | ✗ | h *= m; | |
| 80 | }; | ||
| 81 | |||
| 82 | // Do a few final mixes of the hash to ensure the last few | ||
| 83 | // bytes are well-incorporated. | ||
| 84 | |||
| 85 | ✗ | h ^= h >> 13; | |
| 86 | ✗ | h *= m; | |
| 87 | ✗ | h ^= h >> 15; | |
| 88 | |||
| 89 | ✗ | return h; | |
| 90 | } | ||
| 91 | |||
| 92 | #define NUMBER64_1 11400714785074694791ULL | ||
| 93 | #define NUMBER64_2 14029467366897019727ULL | ||
| 94 | #define NUMBER64_3 1609587929392839161ULL | ||
| 95 | #define NUMBER64_4 9650029242287828579ULL | ||
| 96 | #define NUMBER64_5 2870177450012600261ULL | ||
| 97 | |||
| 98 | #define hash_get64bits(x) hash_read64_align(x, align) | ||
| 99 | #define hash_get32bits(x) hash_read32_align(x, align) | ||
| 100 | #define shifting_hash(x, r) ((x << r) | (x >> (64 - r))) | ||
| 101 | #define TO64(x) (((U64_INT*)(x))->v) | ||
| 102 | #define TO32(x) (((U32_INT*)(x))->v) | ||
| 103 | |||
| 104 | typedef struct U64_INT { | ||
| 105 | uint64_t v; | ||
| 106 | } U64_INT; | ||
| 107 | |||
| 108 | typedef struct U32_INT { | ||
| 109 | uint32_t v; | ||
| 110 | } U32_INT; | ||
| 111 | |||
| 112 | ✗ | inline uint64_t hash_read64_align(const void* ptr, uint32_t align) { | |
| 113 | ✗ | if (align == 0) { | |
| 114 | ✗ | return TO64(ptr); | |
| 115 | } | ||
| 116 | ✗ | return *(uint64_t*)ptr; | |
| 117 | } | ||
| 118 | |||
| 119 | ✗ | inline uint32_t hash_read32_align(const void* ptr, uint32_t align) { | |
| 120 | ✗ | if (align == 0) { | |
| 121 | ✗ | return TO32(ptr); | |
| 122 | } | ||
| 123 | ✗ | return *(uint32_t*)ptr; | |
| 124 | } | ||
| 125 | |||
| 126 | ✗ | inline uint64_t hash_compute( | |
| 127 | const void* input, uint64_t length, uint64_t seed, uint32_t align) { | ||
| 128 | ✗ | const uint8_t* p = (const uint8_t*)input; | |
| 129 | ✗ | const uint8_t* end = p + length; | |
| 130 | uint64_t hash; | ||
| 131 | |||
| 132 | ✗ | if (length >= 32) { | |
| 133 | ✗ | const uint8_t* const limitation = end - 32; | |
| 134 | ✗ | uint64_t v1 = seed + NUMBER64_1 + NUMBER64_2; | |
| 135 | ✗ | uint64_t v2 = seed + NUMBER64_2; | |
| 136 | ✗ | uint64_t v3 = seed + 0; | |
| 137 | ✗ | uint64_t v4 = seed - NUMBER64_1; | |
| 138 | |||
| 139 | do { | ||
| 140 | ✗ | v1 += hash_get64bits(p) * NUMBER64_2; | |
| 141 | ✗ | p += 8; | |
| 142 | ✗ | v1 = shifting_hash(v1, 31); | |
| 143 | ✗ | v1 *= NUMBER64_1; | |
| 144 | ✗ | v2 += hash_get64bits(p) * NUMBER64_2; | |
| 145 | ✗ | p += 8; | |
| 146 | ✗ | v2 = shifting_hash(v2, 31); | |
| 147 | ✗ | v2 *= NUMBER64_1; | |
| 148 | ✗ | v3 += hash_get64bits(p) * NUMBER64_2; | |
| 149 | ✗ | p += 8; | |
| 150 | ✗ | v3 = shifting_hash(v3, 31); | |
| 151 | ✗ | v3 *= NUMBER64_1; | |
| 152 | ✗ | v4 += hash_get64bits(p) * NUMBER64_2; | |
| 153 | ✗ | p += 8; | |
| 154 | ✗ | v4 = shifting_hash(v4, 31); | |
| 155 | ✗ | v4 *= NUMBER64_1; | |
| 156 | ✗ | } while (p <= limitation); | |
| 157 | |||
| 158 | ✗ | hash = shifting_hash(v1, 1) + shifting_hash(v2, 7) + shifting_hash(v3, 12) + | |
| 159 | ✗ | shifting_hash(v4, 18); | |
| 160 | |||
| 161 | ✗ | v1 *= NUMBER64_2; | |
| 162 | ✗ | v1 = shifting_hash(v1, 31); | |
| 163 | ✗ | v1 *= NUMBER64_1; | |
| 164 | ✗ | hash ^= v1; | |
| 165 | ✗ | hash = hash * NUMBER64_1 + NUMBER64_4; | |
| 166 | |||
| 167 | ✗ | v2 *= NUMBER64_2; | |
| 168 | ✗ | v2 = shifting_hash(v2, 31); | |
| 169 | ✗ | v2 *= NUMBER64_1; | |
| 170 | ✗ | hash ^= v2; | |
| 171 | ✗ | hash = hash * NUMBER64_1 + NUMBER64_4; | |
| 172 | |||
| 173 | ✗ | v3 *= NUMBER64_2; | |
| 174 | ✗ | v3 = shifting_hash(v3, 31); | |
| 175 | ✗ | v3 *= NUMBER64_1; | |
| 176 | ✗ | hash ^= v3; | |
| 177 | ✗ | hash = hash * NUMBER64_1 + NUMBER64_4; | |
| 178 | |||
| 179 | ✗ | v4 *= NUMBER64_2; | |
| 180 | ✗ | v4 = shifting_hash(v4, 31); | |
| 181 | ✗ | v4 *= NUMBER64_1; | |
| 182 | ✗ | hash ^= v4; | |
| 183 | ✗ | hash = hash * NUMBER64_1 + NUMBER64_4; | |
| 184 | } else { | ||
| 185 | ✗ | hash = seed + NUMBER64_5; | |
| 186 | } | ||
| 187 | |||
| 188 | ✗ | hash += (uint64_t)length; | |
| 189 | |||
| 190 | ✗ | while (p + 8 <= end) { | |
| 191 | ✗ | uint64_t k1 = hash_get64bits(p); | |
| 192 | ✗ | k1 *= NUMBER64_2; | |
| 193 | ✗ | k1 = shifting_hash(k1, 31); | |
| 194 | ✗ | k1 *= NUMBER64_1; | |
| 195 | ✗ | hash ^= k1; | |
| 196 | ✗ | hash = shifting_hash(hash, 27) * NUMBER64_1 + NUMBER64_4; | |
| 197 | ✗ | p += 8; | |
| 198 | } | ||
| 199 | |||
| 200 | ✗ | if (p + 4 <= end) { | |
| 201 | ✗ | hash ^= (uint64_t)(hash_get32bits(p)) * NUMBER64_1; | |
| 202 | ✗ | hash = shifting_hash(hash, 23) * NUMBER64_2 + NUMBER64_3; | |
| 203 | ✗ | p += 4; | |
| 204 | } | ||
| 205 | |||
| 206 | ✗ | while (p < end) { | |
| 207 | ✗ | hash ^= (*p) * NUMBER64_5; | |
| 208 | ✗ | hash = shifting_hash(hash, 11) * NUMBER64_1; | |
| 209 | ✗ | p++; | |
| 210 | } | ||
| 211 | |||
| 212 | ✗ | hash ^= hash >> 33; | |
| 213 | ✗ | hash *= NUMBER64_2; | |
| 214 | ✗ | hash ^= hash >> 29; | |
| 215 | ✗ | hash *= NUMBER64_3; | |
| 216 | ✗ | hash ^= hash >> 32; | |
| 217 | |||
| 218 | ✗ | return hash; | |
| 219 | } | ||
| 220 | |||
| 221 | ✗ | inline uint64_t xxhash(const void* data, size_t length, size_t seed) { | |
| 222 | ✗ | if ((((uint64_t)data) & 7) == 0) { | |
| 223 | ✗ | return hash_compute(data, length, seed, 1); | |
| 224 | } | ||
| 225 | ✗ | return hash_compute(data, length, seed, 0); | |
| 226 | } | ||
| 227 | |||
| 228 | static size_t (*hash_funcs[4])(const void* key, size_t len, size_t seed) = { | ||
| 229 | standard, murmur2, jenkins, xxhash}; | ||
| 230 | |||
| 231 | 244244 | inline size_t h(const void* key, size_t len, size_t seed = 0xc70697UL) { | |
| 232 | 244244 | return hash_funcs[0](key, len, seed); | |
| 233 | } | ||
| 234 |