|
|
@@ -5,18 +5,17 @@
|
|
|
#include "str.h"
|
|
|
|
|
|
namespace pkpy{
|
|
|
- const std::vector<uint32_t> kHashSeeds = {0, 3259656564, 3106121857, 2774518055, 4085946151, 4274771677, 4047908201, 2149081045, 4160441109, 4127125901, 3109730425, 2794687362, 2806137727, 2642447290, 4070996945, 3580743775, 3719956858, 2960278187, 3568486238, 3125361093, 2232173865, 4043238260, 3265527710, 2206062780, 3968387223, 3144295694, 3293736932, 3196583945, 3832534010, 3311528523, 4258510773, 4049882022, 3058077580, 2446794117, 2330081744, 2563269634, 3848248775, 2197398712, 2874906918, 3012473024, 3477039876, 2710692860, 2806508231, 3893239503, 3929140074, 3145323261, 3593960112, 2451662716, 2545939029, 2475647797, 2790321726, 4166873680, 3504262692, 3140715282, 3078827310, 3177714229, 3006241931, 3777800785, 3621627818, 3163832382, 2166076714, 3622591406, 3299007679, 2915427082, 3939911590, 4145015468, 2791077264, 3916399405, 3330576709, 2466029172, 3534773842, 2690327419, 2487859383, 3687001303, 2615131117, 3057598651, 2548471802, 3145782646, 3895406770, 2150621965, 2179753887, 2159855306, 2439700132, 2397760304, 3405860607, 4268549710, 2779408554, 2485874456, 3796299954, 4179315997, 2380599704, 3210079474, 3951990603, 3342489194, 2997361581, 3576131817, 3163713423, 2467495451, 4190562029, 2588496185};
|
|
|
- const std::vector<uint32_t> kPrimes = {2, 3, 5, 7, 11, 13, 17, 19, 23, 29, 31, 37, 41, 43, 47, 53, 59, 61, 67, 71, 73, 79, 83, 89, 97, 101, 103, 107, 109, 113, 127, 131, 137, 139, 149, 151, 157, 163, 167, 173, 179, 181, 191, 193, 197, 199, 211, 223, 227, 229, 233, 239, 241, 251, 257, 263, 269, 271, 277, 281, 283, 293, 307, 311, 313, 317, 331, 337, 347, 349, 353, 359, 367, 373, 379, 383, 389, 397, 401, 409, 419, 421, 431, 433, 439, 443, 449, 457, 461, 463, 467, 479, 487, 491, 499, 503, 509, 521, 523, 541, 547, 557, 563, 569, 571, 577, 587, 593, 599};
|
|
|
+ const std::vector<uint32_t> kHashSeeds = {0, 2619911537, 3657312521, 3108729646, 3527288759, 3054146033, 3436461329, 3073779540, 2262929533, 3564928174, 2823402058, 4053700272, 3710518398, 2193895465, 3616465673, 2370151435, 3911946797, 2518033560, 4090755824, 2554076140, 2922670102, 2817437464, 3058134240, 4015911568, 2683197236, 3580292421, 2489282276, 2198476149, 3059557576, 3251314740, 2164089808, 3407729628, 4006319879, 3563204365, 2959032770, 3699872774, 3285955347, 2886756578, 2727979131, 3987926730, 3558848942, 3667783180, 3427603538, 2678804156, 3899695574, 3497073252, 4125590716, 3439003736, 3166960007, 2341256547, 3498025667, 2434275284, 2294495502, 2454032734, 2622845447, 2237894924, 4127773463, 2899873446, 3826047724, 2772822995, 4021041972, 3585330008, 3442671856, 4033639492, 4190375370, 3423510541, 3993284300, 3399740404, 2346010479, 2665226039, 3989420676, 2430396952, 4162553639, 3318451871, 2451157282, 3888084520, 4216786107, 3630490447, 3686500437, 4270289137, 2845436680, 3990477872, 3386727112, 2603155603, 2533548133, 2476236382, 2752268515, 2714540624, 3649552071, 2486775129, 3447438497, 2660214659, 3171847655, 2173117107, 2777204947, 3473126570, 2874563719, 3710212439, 3882999260, 3884415651, 3939886653, 2513961523, 3259070705, 4076001992, 3695924943, 2630642728, 2302962913, 3977147010, 4229898948, 3278694988, 3668138471, 4174657761, 2681204139, 2468496171, 3953941369, 4216451258, 3986080889, 3355338704, 3484226746, 3964851958, 4063196140, 3210555673, 3972895759, 2762823957};
|
|
|
+ const std::vector<uint32_t> kPrimes = {2, 3, 5, 7, 11, 13, 17, 19, 23, 29, 31, 37, 41, 43, 47, 53, 59, 61, 67, 71, 73, 79, 83, 89, 97, 101, 103, 107, 109, 113, 127, 131, 137, 139, 149, 151, 157, 163, 167, 173, 179, 181, 191, 193, 197, 199, 211, 223, 227, 229, 233, 239, 241, 251, 257, 263, 269, 271, 277, 281, 283, 293, 307, 311, 313, 317, 331, 337, 347, 349, 353};
|
|
|
|
|
|
- uint32_t find_next_prime(uint32_t n){
|
|
|
+ uint32_t find_next_capacity(uint32_t n){
|
|
|
auto it = std::lower_bound(kPrimes.begin(), kPrimes.end(), n);
|
|
|
if(it == kPrimes.end()) return n;
|
|
|
return *it;
|
|
|
}
|
|
|
|
|
|
inline uint32_t _hash(StrName key, uint32_t capacity, uint32_t hash_seed){
|
|
|
- uint32_t i = key.index * (uint32_t)2654435761;
|
|
|
- return (i ^ hash_seed) % capacity;
|
|
|
+ return (key.index ^ hash_seed) % capacity;
|
|
|
}
|
|
|
|
|
|
uint32_t find_perfect_hash_seed(uint32_t capacity, const std::vector<StrName>& keys){
|
|
|
@@ -29,7 +28,15 @@ namespace pkpy{
|
|
|
uint32_t index = _hash(key, capacity, kHashSeeds[i]);
|
|
|
indices.insert(index);
|
|
|
}
|
|
|
- float score = indices.size() / (float)keys.size();
|
|
|
+ float find_hit_score = indices.size() / (float)keys.size();
|
|
|
+ std::vector<uint32_t> indices_vec(indices.begin(), indices.end());
|
|
|
+ std::sort(indices_vec.begin(), indices_vec.end());
|
|
|
+ float find_miss_score = indices.size();
|
|
|
+ for(int j=1; j<indices_vec.size(); j++){
|
|
|
+ int gap = indices_vec[j] - indices_vec[j-1];
|
|
|
+ if(gap == 1) find_miss_score -= 1;
|
|
|
+ }
|
|
|
+ float score = find_hit_score*2 + find_miss_score/indices.size();
|
|
|
scores.push_back({kHashSeeds[i], score});
|
|
|
}
|
|
|
std::sort(scores.begin(), scores.end(), [](auto a, auto b){ return a.second > b.second; });
|
|
|
@@ -51,7 +58,7 @@ namespace pkpy{
|
|
|
|
|
|
NameDict(uint32_t capacity=2, float load_factor=0.67, uint32_t hash_seed=0):
|
|
|
_capacity(capacity), _size(0), _load_factor(load_factor),
|
|
|
- _hash_seed(hash_seed), _a(new NameDictNode[capacity]) {}
|
|
|
+ _hash_seed(hash_seed), _a(new NameDictNode[_capacity]) {}
|
|
|
|
|
|
NameDict(const NameDict& other) {
|
|
|
this->_capacity = other._capacity;
|
|
|
@@ -69,14 +76,6 @@ namespace pkpy{
|
|
|
uint32_t size() const { return _size; }
|
|
|
|
|
|
#define HASH_PROBE(key, ok, i) \
|
|
|
- bool ok = false; uint32_t i; \
|
|
|
- i = _hash(key, _capacity, _hash_seed); \
|
|
|
- while(!_a[i].empty()) { \
|
|
|
- if(_a[i].first == (key)) { ok = true; break; } \
|
|
|
- i = (i + 1) % _capacity; \
|
|
|
- }
|
|
|
-
|
|
|
-#define HASH_PROBE_OVERRIDE(key, ok, i) \
|
|
|
ok = false; \
|
|
|
i = _hash(key, _capacity, _hash_seed); \
|
|
|
while(!_a[i].empty()) { \
|
|
|
@@ -85,58 +84,63 @@ namespace pkpy{
|
|
|
}
|
|
|
|
|
|
const PyVar& operator[](StrName key) const {
|
|
|
+ bool ok; uint32_t i;
|
|
|
HASH_PROBE(key, ok, i);
|
|
|
- if(!ok) throw std::out_of_range("NameDict key not found");
|
|
|
+ if(!ok) throw std::out_of_range("NameDict key not found: " + key.str());
|
|
|
return _a[i].second;
|
|
|
}
|
|
|
|
|
|
- [[nodiscard]] PyVar& operator[](StrName key){
|
|
|
+ PyVar& get(StrName key){
|
|
|
+ bool ok; uint32_t i;
|
|
|
+ HASH_PROBE(key, ok, i);
|
|
|
+ if(!ok) throw std::out_of_range("NameDict key not found: " + key.str());
|
|
|
+ return _a[i].second;
|
|
|
+ }
|
|
|
+
|
|
|
+ template<typename T>
|
|
|
+ void set(StrName key, T&& value){
|
|
|
+ bool ok; uint32_t i;
|
|
|
HASH_PROBE(key, ok, i);
|
|
|
if(!ok) {
|
|
|
- _a[i].first = key;
|
|
|
_size++;
|
|
|
- if(_size > _capacity * _load_factor){
|
|
|
+ if(_size > _capacity*_load_factor){
|
|
|
_rehash(true);
|
|
|
- HASH_PROBE_OVERRIDE(key, ok, i);
|
|
|
+ HASH_PROBE(key, ok, i);
|
|
|
}
|
|
|
+ _a[i].first = key;
|
|
|
}
|
|
|
- return _a[i].second;
|
|
|
+ _a[i].second = std::forward<T>(value);
|
|
|
}
|
|
|
|
|
|
void _rehash(bool resize){
|
|
|
NameDictNode* old_a = _a;
|
|
|
uint32_t old_capacity = _capacity;
|
|
|
- if(resize) _capacity = find_next_prime(_capacity * 2);
|
|
|
- _size = 0;
|
|
|
+ if(resize) _capacity = find_next_capacity(_capacity * 2);
|
|
|
_a = new NameDictNode[_capacity];
|
|
|
for(uint32_t i=0; i<old_capacity; i++){
|
|
|
if(old_a[i].empty()) continue;
|
|
|
+ bool ok; uint32_t j;
|
|
|
HASH_PROBE(old_a[i].first, ok, j);
|
|
|
if(ok) UNREACHABLE();
|
|
|
- _a[j].first = old_a[i].first;
|
|
|
- _a[j].second = std::move(old_a[i].second);
|
|
|
- _size++;
|
|
|
+ _a[j] = std::move(old_a[i]);
|
|
|
}
|
|
|
delete[] old_a;
|
|
|
}
|
|
|
|
|
|
void _try_perfect_rehash(){
|
|
|
- std::vector<StrName> keys;
|
|
|
- for(uint32_t i=0; i<_capacity; i++){
|
|
|
- if(_a[i].empty()) continue;
|
|
|
- keys.push_back(_a[i].first);
|
|
|
- }
|
|
|
- _hash_seed = find_perfect_hash_seed(_capacity, keys);
|
|
|
+ _hash_seed = find_perfect_hash_seed(_capacity, keys());
|
|
|
_rehash(false); // do not resize
|
|
|
}
|
|
|
|
|
|
inline PyVar* try_get(StrName key){
|
|
|
+ bool ok; uint32_t i;
|
|
|
HASH_PROBE(key, ok, i);
|
|
|
if(!ok) return nullptr;
|
|
|
return &_a[i].second;
|
|
|
}
|
|
|
|
|
|
inline bool try_set(StrName key, PyVar&& value){
|
|
|
+ bool ok; uint32_t i;
|
|
|
HASH_PROBE(key, ok, i);
|
|
|
if(!ok) return false;
|
|
|
_a[i].second = std::move(value);
|
|
|
@@ -144,64 +148,70 @@ namespace pkpy{
|
|
|
}
|
|
|
|
|
|
inline bool contains(StrName key) const {
|
|
|
+ bool ok; uint32_t i;
|
|
|
HASH_PROBE(key, ok, i);
|
|
|
return ok;
|
|
|
}
|
|
|
|
|
|
- ~NameDict(){ delete[] _a;}
|
|
|
-
|
|
|
- struct iterator {
|
|
|
- const NameDict* _dict;
|
|
|
- uint32_t i;
|
|
|
- iterator() = default;
|
|
|
- iterator(const NameDict* dict, uint32_t i): _dict(dict), i(i) { _skip_empty(); }
|
|
|
- inline void _skip_empty(){ while(i < _dict->_capacity && _dict->_a[i].empty()) i++;}
|
|
|
- inline iterator& operator++(){ i++; _skip_empty(); return *this;}
|
|
|
+ ~NameDict(){ delete[] _a; }
|
|
|
|
|
|
- inline bool operator!=(const iterator& other) const { return i != other.i; }
|
|
|
- inline bool operator==(const iterator& other) const { return i == other.i; }
|
|
|
-
|
|
|
- inline NameDictNode* operator->() const { return &_dict->_a[i]; }
|
|
|
- };
|
|
|
-
|
|
|
- template<typename T>
|
|
|
- void emplace(StrName key, T&& value){
|
|
|
- HASH_PROBE(key, ok, i);
|
|
|
- if(!ok) {
|
|
|
- _a[i].first = key;
|
|
|
- _size++;
|
|
|
- if(_size > _capacity * _load_factor){
|
|
|
- _rehash(true);
|
|
|
- HASH_PROBE_OVERRIDE(key, ok, i);
|
|
|
- }
|
|
|
+ void update(const NameDict& other){
|
|
|
+ for(uint32_t i=0; i<other._capacity; i++){
|
|
|
+ if(other._a[i].empty()) continue;
|
|
|
+ set(other._a[i].first, other._a[i].second);
|
|
|
}
|
|
|
- _a[i].second = std::forward<T>(value);
|
|
|
- }
|
|
|
-
|
|
|
- void insert(iterator begin, iterator end){
|
|
|
- for(auto it = begin; it != end; ++it){
|
|
|
- emplace(it->first, it->second);
|
|
|
- }
|
|
|
- }
|
|
|
-
|
|
|
- iterator find(StrName key) const{
|
|
|
- HASH_PROBE(key, ok, i);
|
|
|
- if(!ok) return end();
|
|
|
- return iterator(this, i);
|
|
|
}
|
|
|
|
|
|
void erase(StrName key){
|
|
|
+ bool ok; uint32_t i;
|
|
|
HASH_PROBE(key, ok, i);
|
|
|
- if(!ok) throw std::out_of_range("NameDict key not found");
|
|
|
+ if(!ok) throw std::out_of_range("NameDict key not found: " + key.str());
|
|
|
_a[i] = NameDictNode();
|
|
|
_size--;
|
|
|
}
|
|
|
|
|
|
- inline iterator begin() const { return iterator(this, 0); }
|
|
|
- inline iterator end() const { return iterator(this, _capacity); }
|
|
|
+ void print_stats(){
|
|
|
+ std::map<StrName, uint32_t> stats;
|
|
|
+ for(uint32_t i=0; i<_capacity; i++){
|
|
|
+ if(_a[i].empty()) continue;
|
|
|
+ stats[_a[i].first] = 1;
|
|
|
+ }
|
|
|
+ for(auto [key, _]: stats){
|
|
|
+ bool ok = false; uint32_t i;
|
|
|
+ i = _hash(key, _capacity, _hash_seed);
|
|
|
+ while(!_a[i].empty()) {
|
|
|
+ if(_a[i].first == (key)) { ok = true; break; }
|
|
|
+ i = (i + 1) % _capacity;
|
|
|
+ stats[key]++;
|
|
|
+ }
|
|
|
+ }
|
|
|
+ for(uint32_t i=0; i<_capacity; i++){
|
|
|
+ if(_a[i].empty()) {
|
|
|
+ std::cout << i << ": <NULL>" << std::endl;
|
|
|
+ continue;
|
|
|
+ }
|
|
|
+ std::cout << i << ": <" << _a[i].first.str() << ", " << stats[_a[i].first] << '>' << std::endl;
|
|
|
+ }
|
|
|
+ }
|
|
|
|
|
|
- #undef HASH_PROBE
|
|
|
- #undef HASH_PROBE_OVERRIDE
|
|
|
+ std::vector<NameDictNode> items() const {
|
|
|
+ std::vector<NameDictNode> v;
|
|
|
+ for(uint32_t i=0; i<_capacity; i++){
|
|
|
+ if(_a[i].empty()) continue;
|
|
|
+ v.push_back(_a[i]);
|
|
|
+ }
|
|
|
+ return v;
|
|
|
+ }
|
|
|
+
|
|
|
+ std::vector<StrName> keys() const {
|
|
|
+ std::vector<StrName> v;
|
|
|
+ for(uint32_t i=0; i<_capacity; i++){
|
|
|
+ if(_a[i].empty()) continue;
|
|
|
+ v.push_back(_a[i].first);
|
|
|
+ }
|
|
|
+ return v;
|
|
|
+ }
|
|
|
+#undef HASH_PROBE
|
|
|
};
|
|
|
|
|
|
} // namespace pkpy
|