1
0

xhash.hpp 6.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208
  1. /***************************************************************************
  2. * Copyright (c) Johan Mabille, Sylvain Corlay and Wolf Vollprecht *
  3. * Copyright (c) QuantStack *
  4. * *
  5. * Distributed under the terms of the BSD 3-Clause License. *
  6. * *
  7. * The full license is in the file LICENSE, distributed with this software. *
  8. ****************************************************************************/
  9. #ifndef XTL_HASH_HPP
  10. #define XTL_HASH_HPP
  11. #include <cstddef>
  12. #include <cstdint>
  13. #include <cstring>
  14. #include <type_traits>
  15. namespace xtl
  16. {
  17. std::size_t hash_bytes(const void* buffer, std::size_t length, std::size_t seed);
  18. uint32_t murmur2_x86(const void* buffer, std::size_t length, uint32_t seed);
  19. uint64_t murmur2_x64(const void* buffer, std::size_t length, uint64_t seed);
  20. /******************************
  21. * hash_bytes implementation *
  22. ******************************/
  23. namespace detail
  24. {
  25. // Dummy hash implementation for unusual sizeof(std::size_t)
  26. template <std::size_t N>
  27. std::size_t murmur_hash(const void* buffer, std::size_t length, std::size_t seed)
  28. {
  29. std::size_t hash = seed;
  30. const char* data = static_cast<const char*>(buffer);
  31. for (; length != 0; --length)
  32. {
  33. hash = (hash * 131) + static_cast<std::size_t>(*data++);
  34. }
  35. return hash;
  36. }
  37. // Murmur hash is an algorithm written by Austin Appleby. See https://github.com/aappleby/smhasher/blob/master/src/MurmurHash2.cpp
  38. inline uint32_t murmur2_x86_impl(const void* buffer, std::size_t length, uint32_t seed)
  39. {
  40. const uint32_t m = 0x5bd1e995;
  41. uint32_t len = static_cast<uint32_t>(length);
  42. // Initialize the hash to a 'random' value
  43. uint32_t h = seed ^ len;
  44. // Mix 4 bytes at a time into the hash
  45. const unsigned char * data = (const unsigned char *)buffer;
  46. while(len >= 4)
  47. {
  48. uint32_t k = *(uint32_t*)data;
  49. k *= m;
  50. k ^= k >> 24;
  51. k *= m;
  52. h *= m;
  53. h ^= k;
  54. data += 4;
  55. len -= 4;
  56. }
  57. // Handle the last few bytes of the input array
  58. switch(len)
  59. {
  60. case 3: h ^= static_cast<uint32_t>(data[2] << 16);
  61. case 2: h ^= static_cast<uint32_t>(data[1] << 8);
  62. case 1: h ^= static_cast<uint32_t>(data[0]);
  63. h *= m;
  64. };
  65. // Do a few final mixes of the hash to ensure the last few
  66. // // bytes are well-incorporated.
  67. h ^= h >> 13;
  68. h *= m;
  69. h ^= h >> 15;
  70. return h;
  71. }
  72. template <>
  73. inline std::size_t murmur_hash<4>(const void* buffer, std::size_t length, std::size_t seed)
  74. {
  75. return std::size_t(murmur2_x86_impl(buffer, length, static_cast<uint32_t>(seed)));
  76. }
  77. inline std::size_t load_bytes(const char* p, int n)
  78. {
  79. std::size_t result = 0;
  80. --n;
  81. do
  82. {
  83. result = (result << 8) + static_cast<unsigned char>(p[n]);
  84. } while (--n >= 0);
  85. return result;
  86. }
  87. #if INTPTR_MAX == INT64_MAX
  88. // 64-bits hash for 64-bits platform
  89. template <>
  90. inline std::size_t murmur_hash<8>(const void* buffer, std::size_t length, std::size_t seed)
  91. {
  92. constexpr std::size_t m = (static_cast<std::size_t>(0xc6a4a793UL) << 32UL) +
  93. static_cast<std::size_t>(0x5bd1e995UL);
  94. constexpr int r = 47;
  95. const char* data = static_cast<const char*>(buffer);
  96. const char* end = data + (length & std::size_t(~0x7));
  97. std::size_t hash = seed ^ (length * m);
  98. while (data != end)
  99. {
  100. std::size_t k;
  101. std::memcpy(&k, data, sizeof(k));
  102. k *= m;
  103. k ^= k >> r;
  104. k *= m;
  105. hash ^= k;
  106. hash *= m;
  107. data += 8;
  108. }
  109. if ((length & 0x7) != 0)
  110. {
  111. std::size_t k = load_bytes(end, length & 0x7);
  112. hash ^= k;
  113. hash *= m;
  114. }
  115. hash ^= hash >> r;
  116. hash *= m;
  117. hash ^= hash >> r;
  118. return hash;
  119. }
  120. #elif INTPTR_MAX == INT32_MAX
  121. //64-bits hash for 32-bits platform
  122. inline void mmix(uint32_t& h, uint32_t& k, uint32_t m, int r)
  123. {
  124. k *= m; k ^= k >> r; k *= m; h *= m; h ^= k;
  125. }
  126. template <>
  127. inline std::size_t murmur_hash<8>(const void* buffer, std::size_t length, std::size_t seed)
  128. {
  129. const uint32_t m = 0x5bd1e995;
  130. const int r = 24;
  131. uint32_t l = length;
  132. const auto* data = reinterpret_cast<const unsigned char*>(buffer);
  133. uint32_t h = seed;
  134. while (length >= 4)
  135. {
  136. uint32_t k = *(uint32_t*)data;
  137. mmix(h, k, m, r);
  138. data += 4;
  139. length -= 4;
  140. }
  141. uint32_t t = 0;
  142. switch (length)
  143. {
  144. case 3: t ^= data[2] << 16;
  145. case 2: t ^= data[1] << 8;
  146. case 1: t ^= data[0];
  147. };
  148. mmix(h, t, m, r);
  149. mmix(h, l, m, r);
  150. h ^= h >> 13;
  151. h *= m;
  152. h ^= h >> 15;
  153. return h;
  154. }
  155. #else
  156. #error Unknown pointer size or missing size macros!
  157. #endif
  158. }
  159. inline std::size_t hash_bytes(const void* buffer, std::size_t length, std::size_t seed)
  160. {
  161. return detail::murmur_hash<sizeof(std::size_t)>(buffer, length, seed);
  162. }
  163. inline uint32_t murmur2_x86(const void* buffer, std::size_t length, uint32_t seed)
  164. {
  165. return detail::murmur2_x86_impl(buffer, length, seed);
  166. }
  167. inline uint64_t murmur2_x64(const void* buffer, std::size_t length, uint64_t seed)
  168. {
  169. return detail::murmur_hash<8>(buffer, length, seed);
  170. }
  171. }
  172. #endif