namedict.h 6.0 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212
  1. #pragma once
  2. #include "common.h"
  3. #include "memory.h"
  4. #include "str.h"
  5. namespace pkpy{
  6. const std::vector<uint16_t> kHashSeeds = {9629, 43049, 13267, 59509, 39251, 1249, 35803, 54469, 27689, 9719, 34897, 18973, 30661, 19913, 27919, 32143};
  7. inline uint16_t find_next_power_of_2(uint16_t n){
  8. uint16_t x = 2;
  9. while(x < n) x <<= 1;
  10. return x;
  11. }
  12. #define _hash(key, mask, hash_seed) ( ( (key).index * (hash_seed) >> 8 ) & (mask) )
  13. inline uint16_t find_perfect_hash_seed(uint16_t capacity, const std::vector<StrName>& keys){
  14. if(keys.empty()) return kHashSeeds[0];
  15. static std::set<uint16_t> indices;
  16. indices.clear();
  17. std::pair<uint16_t, float> best_score = {kHashSeeds[0], 0.0f};
  18. for(int i=0; i<kHashSeeds.size(); i++){
  19. indices.clear();
  20. for(auto key: keys){
  21. uint16_t index = _hash(key, capacity-1, kHashSeeds[i]);
  22. indices.insert(index);
  23. }
  24. float score = indices.size() / (float)keys.size();
  25. if(score > best_score.second) best_score = {kHashSeeds[i], score};
  26. }
  27. return best_score.first;
  28. }
  29. template<typename T>
  30. struct NameDictImpl {
  31. using Item = std::pair<StrName, T>;
  32. static constexpr uint16_t __Capacity = 8;
  33. // ensure the initial capacity is ok for memory pool
  34. static_assert(std::is_pod_v<T>);
  35. static_assert(sizeof(Item) * __Capacity <= 128);
  36. float _load_factor;
  37. uint16_t _capacity;
  38. uint16_t _size;
  39. uint16_t _hash_seed;
  40. uint16_t _mask;
  41. Item* _items;
  42. void _alloc(int cap){
  43. _items = (Item*)pool128.alloc(cap * sizeof(Item));
  44. memset(_items, 0, cap * sizeof(Item));
  45. }
  46. NameDictImpl(float load_factor=0.67, uint16_t capacity=__Capacity, uint16_t hash_seed=kHashSeeds[0]):
  47. _load_factor(load_factor), _capacity(capacity), _size(0),
  48. _hash_seed(hash_seed), _mask(capacity-1) {
  49. _alloc(capacity);
  50. }
  51. NameDictImpl(const NameDictImpl& other) {
  52. memcpy(this, &other, sizeof(NameDictImpl));
  53. _alloc(_capacity);
  54. for(int i=0; i<_capacity; i++){
  55. _items[i] = other._items[i];
  56. }
  57. }
  58. NameDictImpl& operator=(const NameDictImpl& other) {
  59. pool128.dealloc(_items);
  60. memcpy(this, &other, sizeof(NameDictImpl));
  61. _alloc(_capacity);
  62. for(int i=0; i<_capacity; i++){
  63. _items[i] = other._items[i];
  64. }
  65. return *this;
  66. }
  67. ~NameDictImpl(){ pool128.dealloc(_items); }
  68. NameDictImpl(NameDictImpl&&) = delete;
  69. NameDictImpl& operator=(NameDictImpl&&) = delete;
  70. uint16_t size() const { return _size; }
  71. #define HASH_PROBE(key, ok, i) \
  72. ok = false; \
  73. i = _hash(key, _mask, _hash_seed); \
  74. while(!_items[i].first.empty()) { \
  75. if(_items[i].first == (key)) { ok = true; break; } \
  76. i = (i + 1) & _mask; \
  77. }
  78. T operator[](StrName key) const {
  79. bool ok; uint16_t i;
  80. HASH_PROBE(key, ok, i);
  81. if(!ok) throw std::out_of_range(fmt("NameDict key not found: ", key));
  82. return _items[i].second;
  83. }
  84. void set(StrName key, T val){
  85. bool ok; uint16_t i;
  86. HASH_PROBE(key, ok, i);
  87. if(!ok) {
  88. _size++;
  89. if(_size > _capacity*_load_factor){
  90. _rehash(true);
  91. HASH_PROBE(key, ok, i);
  92. }
  93. _items[i].first = key;
  94. }
  95. _items[i].second = val;
  96. }
  97. void _rehash(bool resize){
  98. Item* old_items = _items;
  99. uint16_t old_capacity = _capacity;
  100. if(resize){
  101. _capacity = find_next_power_of_2(_capacity * 2);
  102. _mask = _capacity - 1;
  103. }
  104. _alloc(_capacity);
  105. for(uint16_t i=0; i<old_capacity; i++){
  106. if(old_items[i].first.empty()) continue;
  107. bool ok; uint16_t j;
  108. HASH_PROBE(old_items[i].first, ok, j);
  109. if(ok) FATAL_ERROR();
  110. _items[j] = old_items[i];
  111. }
  112. pool128.dealloc(old_items);
  113. }
  114. void _try_perfect_rehash(){
  115. _hash_seed = find_perfect_hash_seed(_capacity, keys());
  116. _rehash(false); // do not resize
  117. }
  118. T try_get(StrName key) const{
  119. bool ok; uint16_t i;
  120. HASH_PROBE(key, ok, i);
  121. if(!ok){
  122. if constexpr(std::is_pointer_v<T>) return nullptr;
  123. else if constexpr(std::is_same_v<int, T>) return -1;
  124. else return Discarded();
  125. }
  126. return _items[i].second;
  127. }
  128. bool try_set(StrName key, T val){
  129. bool ok; uint16_t i;
  130. HASH_PROBE(key, ok, i);
  131. if(!ok) return false;
  132. _items[i].second = val;
  133. return true;
  134. }
  135. bool contains(StrName key) const {
  136. bool ok; uint16_t i;
  137. HASH_PROBE(key, ok, i);
  138. return ok;
  139. }
  140. void update(const NameDictImpl& other){
  141. for(uint16_t i=0; i<other._capacity; i++){
  142. auto& item = other._items[i];
  143. if(!item.first.empty()) set(item.first, item.second);
  144. }
  145. }
  146. void erase(StrName key){
  147. bool ok; uint16_t i;
  148. HASH_PROBE(key, ok, i);
  149. if(!ok) throw std::out_of_range(fmt("NameDict key not found: ", key));
  150. _items[i].first = StrName();
  151. _items[i].second = nullptr;
  152. _size--;
  153. }
  154. std::vector<Item> items() const {
  155. std::vector<Item> v;
  156. for(uint16_t i=0; i<_capacity; i++){
  157. if(_items[i].first.empty()) continue;
  158. v.push_back(_items[i]);
  159. }
  160. return v;
  161. }
  162. std::vector<StrName> keys() const {
  163. std::vector<StrName> v;
  164. for(uint16_t i=0; i<_capacity; i++){
  165. if(_items[i].first.empty()) continue;
  166. v.push_back(_items[i].first);
  167. }
  168. return v;
  169. }
  170. void clear(){
  171. for(uint16_t i=0; i<_capacity; i++){
  172. _items[i].first = StrName();
  173. _items[i].second = nullptr;
  174. }
  175. _size = 0;
  176. }
  177. #undef HASH_PROBE
  178. #undef _hash
  179. };
  180. using NameDict = NameDictImpl<PyObject*>;
  181. using NameDict_ = shared_ptr<NameDict>;
  182. using NameDictInt = NameDictImpl<int>;
  183. } // namespace pkpy