namedict.h 5.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204
  1. #pragma once
  2. #include "common.h"
  3. #include "memory.h"
  4. #include "str.h"
  5. namespace pkpy{
  6. const std::vector<uint16_t> kHashSeeds = {9629, 43049, 13267, 59509, 39251, 1249, 35803, 54469, 27689, 9719, 34897, 18973, 30661, 19913, 27919, 32143, 3467, 28019, 1051, 39419, 1361, 28547, 48197, 2609, 24317, 22861, 41467, 17623, 52837, 59053, 33589, 32117};
  7. inline uint16_t find_next_power_of_2(uint16_t n){
  8. uint16_t x = 2;
  9. while(x < n) x <<= 1;
  10. return x;
  11. }
  12. #define _hash(key, mask, hash_seed) ( ( (key).index * (hash_seed) >> 8 ) & (mask) )
  13. inline uint16_t find_perfect_hash_seed(uint16_t capacity, const std::vector<StrName>& keys){
  14. if(keys.empty()) return kHashSeeds[0];
  15. std::set<uint16_t> indices;
  16. std::pair<uint16_t, float> best_score = {kHashSeeds[0], 0.0f};
  17. for(int i=0; i<kHashSeeds.size(); i++){
  18. indices.clear();
  19. for(auto key: keys){
  20. uint16_t index = _hash(key, capacity-1, kHashSeeds[i]);
  21. indices.insert(index);
  22. }
  23. float score = indices.size() / (float)keys.size();
  24. if(score > best_score.second) best_score = {kHashSeeds[i], score};
  25. }
  26. return best_score.first;
  27. }
  28. template<typename T>
  29. struct NameDictImpl {
  30. using Item = std::pair<StrName, T>;
  31. static constexpr uint16_t __Capacity = 8;
  32. // ensure the initial capacity is ok for memory pool
  33. static_assert(std::is_pod_v<T>);
  34. static_assert(sizeof(Item) * __Capacity <= 128);
  35. float _load_factor;
  36. uint16_t _capacity;
  37. uint16_t _size;
  38. uint16_t _hash_seed;
  39. uint16_t _mask;
  40. Item* _items;
  41. void _alloc(int cap){
  42. _items = (Item*)pool128.alloc(cap * sizeof(Item));
  43. memset(_items, 0, cap * sizeof(Item));
  44. }
  45. NameDictImpl(float load_factor=0.67, uint16_t capacity=__Capacity, uint16_t hash_seed=kHashSeeds[0]):
  46. _load_factor(load_factor), _capacity(capacity), _size(0),
  47. _hash_seed(hash_seed), _mask(capacity-1) {
  48. _alloc(capacity);
  49. }
  50. NameDictImpl(const NameDictImpl& other) {
  51. memcpy(this, &other, sizeof(NameDictImpl));
  52. _alloc(_capacity);
  53. for(int i=0; i<_capacity; i++){
  54. _items[i] = other._items[i];
  55. }
  56. }
  57. NameDictImpl& operator=(const NameDictImpl& other) {
  58. pool128.dealloc(_items);
  59. memcpy(this, &other, sizeof(NameDictImpl));
  60. _alloc(_capacity);
  61. for(int i=0; i<_capacity; i++){
  62. _items[i] = other._items[i];
  63. }
  64. return *this;
  65. }
  66. ~NameDictImpl(){ pool128.dealloc(_items); }
  67. NameDictImpl(NameDictImpl&&) = delete;
  68. NameDictImpl& operator=(NameDictImpl&&) = delete;
  69. uint16_t size() const { return _size; }
  70. #define HASH_PROBE(key, ok, i) \
  71. ok = false; \
  72. i = _hash(key, _mask, _hash_seed); \
  73. while(!_items[i].first.empty()) { \
  74. if(_items[i].first == (key)) { ok = true; break; } \
  75. i = (i + 1) & _mask; \
  76. }
  77. T operator[](StrName key) const {
  78. bool ok; uint16_t i;
  79. HASH_PROBE(key, ok, i);
  80. if(!ok) throw std::out_of_range(fmt("NameDict key not found: ", key));
  81. return _items[i].second;
  82. }
  83. void set(StrName key, T val){
  84. bool ok; uint16_t i;
  85. HASH_PROBE(key, ok, i);
  86. if(!ok) {
  87. _size++;
  88. if(_size > _capacity*_load_factor){
  89. _rehash(true);
  90. HASH_PROBE(key, ok, i);
  91. }
  92. _items[i].first = key;
  93. }
  94. _items[i].second = val;
  95. }
  96. void _rehash(bool resize){
  97. Item* old_items = _items;
  98. uint16_t old_capacity = _capacity;
  99. if(resize){
  100. _capacity = find_next_power_of_2(_capacity * 2);
  101. _mask = _capacity - 1;
  102. }
  103. _alloc(_capacity);
  104. for(uint16_t i=0; i<old_capacity; i++){
  105. if(old_items[i].first.empty()) continue;
  106. bool ok; uint16_t j;
  107. HASH_PROBE(old_items[i].first, ok, j);
  108. if(ok) FATAL_ERROR();
  109. _items[j] = old_items[i];
  110. }
  111. pool128.dealloc(old_items);
  112. }
  113. void _try_perfect_rehash(){
  114. _hash_seed = find_perfect_hash_seed(_capacity, keys());
  115. _rehash(false); // do not resize
  116. }
  117. T try_get(StrName key) const{
  118. bool ok; uint16_t i;
  119. HASH_PROBE(key, ok, i);
  120. if(!ok){
  121. if constexpr(std::is_pointer_v<T>) return nullptr;
  122. else if constexpr(std::is_same_v<int, T>) return -1;
  123. else return Discarded();
  124. }
  125. return _items[i].second;
  126. }
  127. bool try_set(StrName key, T val){
  128. bool ok; uint16_t i;
  129. HASH_PROBE(key, ok, i);
  130. if(!ok) return false;
  131. _items[i].second = val;
  132. return true;
  133. }
  134. bool contains(StrName key) const {
  135. bool ok; uint16_t i;
  136. HASH_PROBE(key, ok, i);
  137. return ok;
  138. }
  139. void update(const NameDictImpl& other){
  140. for(uint16_t i=0; i<other._capacity; i++){
  141. auto& item = other._items[i];
  142. if(!item.first.empty()) set(item.first, item.second);
  143. }
  144. }
  145. void erase(StrName key){
  146. bool ok; uint16_t i;
  147. HASH_PROBE(key, ok, i);
  148. if(!ok) throw std::out_of_range(fmt("NameDict key not found: ", key));
  149. _items[i].first = StrName();
  150. _items[i].second = nullptr;
  151. _size--;
  152. }
  153. std::vector<Item> items() const {
  154. std::vector<Item> v;
  155. for(uint16_t i=0; i<_capacity; i++){
  156. if(_items[i].first.empty()) continue;
  157. v.push_back(_items[i]);
  158. }
  159. return v;
  160. }
  161. std::vector<StrName> keys() const {
  162. std::vector<StrName> v;
  163. for(uint16_t i=0; i<_capacity; i++){
  164. if(_items[i].first.empty()) continue;
  165. v.push_back(_items[i].first);
  166. }
  167. return v;
  168. }
  169. #undef HASH_PROBE
  170. #undef _hash
  171. };
  172. using NameDict = NameDictImpl<PyObject*>;
  173. using NameDict_ = shared_ptr<NameDict>;
  174. using NameDictInt = NameDictImpl<int>;
  175. using NameDictInt_ = shared_ptr<NameDictInt>;
  176. } // namespace pkpy