namedict.h 6.8 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237
  1. #pragma once
  2. #include "common.h"
  3. #include "memory.h"
  4. #include "str.h"
  5. namespace pkpy{
  6. const int kNameDictNodeSize = sizeof(StrName) + sizeof(PyVar);
  7. template<int __Bucket, int __BucketSize=32>
  8. struct DictArrayPool {
  9. std::vector<StrName*> buckets[__Bucket+1];
  10. StrName* alloc(uint16_t n){
  11. StrName* _keys;
  12. if(n > __Bucket || buckets[n].empty()){
  13. _keys = (StrName*)malloc(kNameDictNodeSize * n);
  14. memset((void*)_keys, 0, kNameDictNodeSize * n);
  15. }else{
  16. _keys = buckets[n].back();
  17. memset((void*)_keys, 0, sizeof(StrName) * n);
  18. buckets[n].pop_back();
  19. }
  20. return _keys;
  21. }
  22. void dealloc(StrName* head, uint16_t n){
  23. PyVar* _values = (PyVar*)(head + n);
  24. if(n > __Bucket || buckets[n].size() >= __BucketSize){
  25. for(int i=0; i<n; i++) _values[i].~PyVar();
  26. free(head);
  27. }else{
  28. buckets[n].push_back(head);
  29. }
  30. }
  31. ~DictArrayPool(){
  32. // let it leak, since this object is static
  33. }
  34. };
  35. const std::vector<uint16_t> kHashSeeds = {9629, 43049, 13267, 59509, 39251, 1249, 35803, 54469, 27689, 9719, 34897, 18973, 30661, 19913, 27919, 32143, 3467, 28019, 1051, 39419, 1361, 28547, 48197, 2609, 24317, 22861, 41467, 17623, 52837, 59053, 33589, 32117};
  36. static DictArrayPool<32> _dict_pool;
  37. uint16_t find_next_capacity(uint16_t n){
  38. uint16_t x = 2;
  39. while(x < n) x <<= 1;
  40. return x;
  41. }
  42. #define _hash(key, mask, hash_seed) ( ( (key).index * (hash_seed) >> 8 ) & (mask) )
  43. uint16_t find_perfect_hash_seed(uint16_t capacity, const std::vector<StrName>& keys){
  44. if(keys.empty()) return kHashSeeds[0];
  45. std::set<uint16_t> indices;
  46. std::pair<uint16_t, float> best_score = {kHashSeeds[0], 0.0f};
  47. for(int i=0; i<kHashSeeds.size(); i++){
  48. indices.clear();
  49. for(auto key: keys){
  50. uint16_t index = _hash(key, capacity-1, kHashSeeds[i]);
  51. indices.insert(index);
  52. }
  53. float score = indices.size() / (float)keys.size();
  54. if(score > best_score.second) best_score = {kHashSeeds[i], score};
  55. }
  56. return best_score.first;
  57. }
  58. struct NameDict {
  59. uint16_t _capacity;
  60. uint16_t _size;
  61. float _load_factor;
  62. uint16_t _hash_seed;
  63. uint16_t _mask;
  64. StrName* _keys;
  65. inline PyVar& value(uint16_t i){
  66. return reinterpret_cast<PyVar*>(_keys + _capacity)[i];
  67. }
  68. inline const PyVar& value(uint16_t i) const {
  69. return reinterpret_cast<const PyVar*>(_keys + _capacity)[i];
  70. }
  71. NameDict(uint16_t capacity=2, float load_factor=0.67, uint16_t hash_seed=kHashSeeds[0]):
  72. _capacity(capacity), _size(0), _load_factor(load_factor),
  73. _hash_seed(hash_seed), _mask(capacity-1) {
  74. _keys = _dict_pool.alloc(capacity);
  75. }
  76. NameDict(const NameDict& other) {
  77. memcpy(this, &other, sizeof(NameDict));
  78. _keys = _dict_pool.alloc(_capacity);
  79. for(int i=0; i<_capacity; i++){
  80. _keys[i] = other._keys[i];
  81. value(i) = other.value(i);
  82. }
  83. }
  84. NameDict& operator=(const NameDict& other) {
  85. _dict_pool.dealloc(_keys, _capacity);
  86. memcpy(this, &other, sizeof(NameDict));
  87. _keys = _dict_pool.alloc(_capacity);
  88. for(int i=0; i<_capacity; i++){
  89. _keys[i] = other._keys[i];
  90. value(i) = other.value(i);
  91. }
  92. return *this;
  93. }
  94. ~NameDict(){ _dict_pool.dealloc(_keys, _capacity); }
  95. NameDict(NameDict&&) = delete;
  96. NameDict& operator=(NameDict&&) = delete;
  97. uint16_t size() const { return _size; }
  98. #define HASH_PROBE(key, ok, i) \
  99. ok = false; \
  100. i = _hash(key, _mask, _hash_seed); \
  101. while(!_keys[i].empty()) { \
  102. if(_keys[i] == (key)) { ok = true; break; } \
  103. i = (i + 1) & _mask; \
  104. }
  105. const PyVar& operator[](StrName key) const {
  106. bool ok; uint16_t i;
  107. HASH_PROBE(key, ok, i);
  108. if(!ok) throw std::out_of_range("NameDict key not found: " + key.str());
  109. return value(i);
  110. }
  111. PyVar& get(StrName key){
  112. bool ok; uint16_t i;
  113. HASH_PROBE(key, ok, i);
  114. if(!ok) throw std::out_of_range("NameDict key not found: " + key.str());
  115. return value(i);
  116. }
  117. template<typename T>
  118. void set(StrName key, T&& val){
  119. bool ok; uint16_t i;
  120. HASH_PROBE(key, ok, i);
  121. if(!ok) {
  122. _size++;
  123. if(_size > _capacity*_load_factor){
  124. _rehash(true);
  125. HASH_PROBE(key, ok, i);
  126. }
  127. _keys[i] = key;
  128. }
  129. value(i) = std::forward<T>(val);
  130. }
  131. void _rehash(bool resize){
  132. StrName* old_keys = _keys;
  133. PyVar* old_values = &value(0);
  134. uint16_t old_capacity = _capacity;
  135. if(resize){
  136. _capacity = find_next_capacity(_capacity * 2);
  137. _mask = _capacity - 1;
  138. }
  139. _keys = _dict_pool.alloc(_capacity);
  140. for(uint16_t i=0; i<old_capacity; i++){
  141. if(old_keys[i].empty()) continue;
  142. bool ok; uint16_t j;
  143. HASH_PROBE(old_keys[i], ok, j);
  144. if(ok) UNREACHABLE();
  145. _keys[j] = old_keys[i];
  146. value(j) = old_values[i]; // std::move makes a segfault
  147. }
  148. _dict_pool.dealloc(old_keys, old_capacity);
  149. }
  150. void _try_perfect_rehash(){
  151. _hash_seed = find_perfect_hash_seed(_capacity, keys());
  152. _rehash(false); // do not resize
  153. }
  154. inline PyVar* try_get(StrName key){
  155. bool ok; uint16_t i;
  156. HASH_PROBE(key, ok, i);
  157. if(!ok) return nullptr;
  158. return &value(i);
  159. }
  160. inline bool try_set(StrName key, PyVar&& val){
  161. bool ok; uint16_t i;
  162. HASH_PROBE(key, ok, i);
  163. if(!ok) return false;
  164. value(i) = std::move(val);
  165. return true;
  166. }
  167. inline bool contains(StrName key) const {
  168. bool ok; uint16_t i;
  169. HASH_PROBE(key, ok, i);
  170. return ok;
  171. }
  172. void update(const NameDict& other){
  173. for(uint16_t i=0; i<other._capacity; i++){
  174. if(other._keys[i].empty()) continue;
  175. set(other._keys[i], other.value(i));
  176. }
  177. }
  178. void erase(StrName key){
  179. bool ok; uint16_t i;
  180. HASH_PROBE(key, ok, i);
  181. if(!ok) throw std::out_of_range("NameDict key not found: " + key.str());
  182. _keys[i] = StrName(); value(i).reset();
  183. _size--;
  184. }
  185. std::vector<std::pair<StrName, PyVar>> items() const {
  186. std::vector<std::pair<StrName, PyVar>> v;
  187. for(uint16_t i=0; i<_capacity; i++){
  188. if(_keys[i].empty()) continue;
  189. v.push_back(std::make_pair(_keys[i], value(i)));
  190. }
  191. return v;
  192. }
  193. std::vector<StrName> keys() const {
  194. std::vector<StrName> v;
  195. for(uint16_t i=0; i<_capacity; i++){
  196. if(_keys[i].empty()) continue;
  197. v.push_back(_keys[i]);
  198. }
  199. return v;
  200. }
  201. #undef HASH_PROBE
  202. #undef _hash
  203. };
  204. } // namespace pkpy