namedict.h 5.8 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203
  1. #pragma once
  2. #include "common.h"
  3. #include "memory.h"
  4. #include "str.h"
  5. namespace pkpy{
  6. const uint16_t kHashSeeds[] = {9629, 43049, 13267, 59509, 39251, 1249, 27689, 9719, 19913};
  7. #define _hash(key, mask, hash_seed) ( ( (key).index * (hash_seed) >> 8 ) & (mask) )
  8. inline uint16_t find_perfect_hash_seed(uint16_t capacity, const std::vector<StrName>& keys){
  9. if(keys.empty()) return kHashSeeds[0];
  10. static std::set<uint16_t> indices;
  11. indices.clear();
  12. std::pair<uint16_t, float> best_score = {kHashSeeds[0], 0.0f};
  13. const int kHashSeedsSize = sizeof(kHashSeeds) / sizeof(kHashSeeds[0]);
  14. for(int i=0; i<kHashSeedsSize; i++){
  15. indices.clear();
  16. for(auto key: keys){
  17. uint16_t index = _hash(key, capacity-1, kHashSeeds[i]);
  18. indices.insert(index);
  19. }
  20. float score = indices.size() / (float)keys.size();
  21. if(score > best_score.second) best_score = {kHashSeeds[i], score};
  22. }
  23. return best_score.first;
  24. }
  25. template<typename T>
  26. struct NameDictImpl {
  27. using Item = std::pair<StrName, T>;
  28. static constexpr uint16_t __Capacity = 8;
  29. // ensure the initial capacity is ok for memory pool
  30. static_assert(std::is_pod_v<T>);
  31. static_assert(sizeof(Item) * __Capacity <= 128);
  32. float _load_factor;
  33. uint16_t _capacity;
  34. uint16_t _size;
  35. uint16_t _hash_seed;
  36. uint16_t _mask;
  37. Item* _items;
  38. #define HASH_PROBE(key, ok, i) \
  39. ok = false; \
  40. i = _hash(key, _mask, _hash_seed); \
  41. while(!_items[i].first.empty()) { \
  42. if(_items[i].first == (key)) { ok = true; break; } \
  43. i = (i + 1) & _mask; \
  44. }
  45. #define NAMEDICT_ALLOC() \
  46. _items = (Item*)pool128.alloc(_capacity * sizeof(Item)); \
  47. memset(_items, 0, _capacity * sizeof(Item)); \
  48. NameDictImpl(float load_factor=0.67f):
  49. _load_factor(load_factor), _capacity(__Capacity), _size(0),
  50. _hash_seed(kHashSeeds[0]), _mask(__Capacity-1) {
  51. NAMEDICT_ALLOC()
  52. }
  53. NameDictImpl(const NameDictImpl& other) {
  54. memcpy(this, &other, sizeof(NameDictImpl));
  55. NAMEDICT_ALLOC()
  56. for(int i=0; i<_capacity; i++) _items[i] = other._items[i];
  57. }
  58. NameDictImpl& operator=(const NameDictImpl& other) {
  59. pool128.dealloc(_items);
  60. memcpy(this, &other, sizeof(NameDictImpl));
  61. NAMEDICT_ALLOC()
  62. for(int i=0; i<_capacity; i++) _items[i] = other._items[i];
  63. return *this;
  64. }
  65. ~NameDictImpl(){ pool128.dealloc(_items); }
  66. NameDictImpl(NameDictImpl&&) = delete;
  67. NameDictImpl& operator=(NameDictImpl&&) = delete;
  68. uint16_t size() const { return _size; }
  69. T operator[](StrName key) const {
  70. bool ok; uint16_t i;
  71. HASH_PROBE(key, ok, i);
  72. if(!ok) throw std::out_of_range(fmt("NameDict key not found: ", key));
  73. return _items[i].second;
  74. }
  75. void set(StrName key, T val){
  76. bool ok; uint16_t i;
  77. HASH_PROBE(key, ok, i);
  78. if(!ok) {
  79. _size++;
  80. if(_size > _capacity*_load_factor){
  81. _rehash(true);
  82. HASH_PROBE(key, ok, i);
  83. }
  84. _items[i].first = key;
  85. }
  86. _items[i].second = val;
  87. }
  88. void _rehash(bool resize){
  89. Item* old_items = _items;
  90. uint16_t old_capacity = _capacity;
  91. if(resize){
  92. _capacity *= 2;
  93. _mask = _capacity - 1;
  94. }
  95. NAMEDICT_ALLOC()
  96. for(uint16_t i=0; i<old_capacity; i++){
  97. if(old_items[i].first.empty()) continue;
  98. bool ok; uint16_t j;
  99. HASH_PROBE(old_items[i].first, ok, j);
  100. if(ok) FATAL_ERROR();
  101. _items[j] = old_items[i];
  102. }
  103. pool128.dealloc(old_items);
  104. }
  105. void _try_perfect_rehash(){
  106. _hash_seed = find_perfect_hash_seed(_capacity, keys());
  107. _rehash(false); // do not resize
  108. }
  109. T try_get(StrName key) const{
  110. bool ok; uint16_t i;
  111. HASH_PROBE(key, ok, i);
  112. if(!ok){
  113. if constexpr(std::is_pointer_v<T>) return nullptr;
  114. else if constexpr(std::is_same_v<int, T>) return -1;
  115. else return Discarded();
  116. }
  117. return _items[i].second;
  118. }
  119. bool try_set(StrName key, T val){
  120. bool ok; uint16_t i;
  121. HASH_PROBE(key, ok, i);
  122. if(!ok) return false;
  123. _items[i].second = val;
  124. return true;
  125. }
  126. bool contains(StrName key) const {
  127. bool ok; uint16_t i;
  128. HASH_PROBE(key, ok, i);
  129. return ok;
  130. }
  131. void update(const NameDictImpl& other){
  132. for(uint16_t i=0; i<other._capacity; i++){
  133. auto& item = other._items[i];
  134. if(!item.first.empty()) set(item.first, item.second);
  135. }
  136. }
  137. void erase(StrName key){
  138. bool ok; uint16_t i;
  139. HASH_PROBE(key, ok, i);
  140. if(!ok) throw std::out_of_range(fmt("NameDict key not found: ", key));
  141. _items[i].first = StrName();
  142. _items[i].second = nullptr;
  143. _size--;
  144. }
  145. std::vector<Item> items() const {
  146. std::vector<Item> v;
  147. for(uint16_t i=0; i<_capacity; i++){
  148. if(_items[i].first.empty()) continue;
  149. v.push_back(_items[i]);
  150. }
  151. return v;
  152. }
  153. std::vector<StrName> keys() const {
  154. std::vector<StrName> v;
  155. for(uint16_t i=0; i<_capacity; i++){
  156. if(_items[i].first.empty()) continue;
  157. v.push_back(_items[i].first);
  158. }
  159. return v;
  160. }
  161. void clear(){
  162. for(uint16_t i=0; i<_capacity; i++){
  163. _items[i].first = StrName();
  164. _items[i].second = nullptr;
  165. }
  166. _size = 0;
  167. }
  168. #undef HASH_PROBE
  169. #undef NAMEDICT_ALLOC
  170. #undef _hash
  171. };
  172. using NameDict = NameDictImpl<PyObject*>;
  173. using NameDict_ = shared_ptr<NameDict>;
  174. using NameDictInt = NameDictImpl<int>;
  175. } // namespace pkpy