|
|
@@ -51,8 +51,6 @@ static uint32_t Dict__next_cap(uint32_t cap) {
|
|
|
}
|
|
|
}
|
|
|
|
|
|
-
|
|
|
-
|
|
|
typedef struct {
|
|
|
DictEntry* curr;
|
|
|
DictEntry* end;
|
|
|
@@ -61,9 +59,24 @@ typedef struct {
|
|
|
|
|
|
static void Dict__ctor(Dict* self, uint32_t capacity, int entries_capacity) {
|
|
|
self->length = 0;
|
|
|
- self->capacity = capacity;
|
|
|
- self->indices = PK_MALLOC(self->capacity * sizeof(DictIndex));
|
|
|
- memset(self->indices, -1, self->capacity * sizeof(DictIndex));
|
|
|
+ self->capacity = capacity; // the 1st prime
|
|
|
+
|
|
|
+ size_t indices_size;
|
|
|
+ if(self->capacity < UINT16_MAX - 1) {
|
|
|
+ self->index_is_short = true;
|
|
|
+ indices_size = self->capacity * sizeof(uint16_t);
|
|
|
+ self->null_index_value = UINT16_MAX;
|
|
|
+ self->deleted_index_value = UINT16_MAX - 1;
|
|
|
+ } else {
|
|
|
+ self->index_is_short = false;
|
|
|
+ indices_size = self->capacity * sizeof(uint32_t);
|
|
|
+ self->null_index_value = UINT32_MAX;
|
|
|
+ self->deleted_index_value = UINT32_MAX - 1;
|
|
|
+ }
|
|
|
+
|
|
|
+ self->indices = PK_MALLOC(indices_size);
|
|
|
+ memset(self->indices, -1, indices_size);
|
|
|
+
|
|
|
c11_vector__ctor(&self->entries, sizeof(DictEntry));
|
|
|
c11_vector__reserve(&self->entries, entries_capacity);
|
|
|
}
|
|
|
@@ -75,65 +88,105 @@ static void Dict__dtor(Dict* self) {
|
|
|
c11_vector__dtor(&self->entries);
|
|
|
}
|
|
|
|
|
|
-static bool Dict__try_get(Dict* self, py_TValue* key, DictEntry** out) {
|
|
|
- py_i64 hash;
|
|
|
- if(!py_hash(key, &hash)) return false;
|
|
|
- int idx = (uint64_t)hash % self->capacity;
|
|
|
- for(int i = 0; i < PK_DICT_MAX_COLLISION; i++) {
|
|
|
- int idx2 = self->indices[idx]._[i];
|
|
|
- if(idx2 == -1) continue;
|
|
|
+static uint32_t Dict__get_index(Dict* self, uint32_t index) {
|
|
|
+ if(self->index_is_short) {
|
|
|
+ uint16_t* indices = self->indices;
|
|
|
+ return indices[index];
|
|
|
+ } else {
|
|
|
+ uint32_t* indices = self->indices;
|
|
|
+ return indices[index];
|
|
|
+ }
|
|
|
+}
|
|
|
+
|
|
|
+static void Dict__swap_index(Dict* self, uint32_t x, uint32_t y) {
|
|
|
+ if(self->index_is_short) {
|
|
|
+ uint16_t* indices = self->indices;
|
|
|
+ uint16_t tmp = indices[x];
|
|
|
+ indices[x] = indices[y];
|
|
|
+ indices[y] = tmp;
|
|
|
+ } else {
|
|
|
+ uint32_t* indices = self->indices;
|
|
|
+ uint32_t tmp = indices[x];
|
|
|
+ indices[x] = indices[y];
|
|
|
+ indices[y] = tmp;
|
|
|
+ }
|
|
|
+}
|
|
|
+
|
|
|
+static void Dict__set_index(Dict* self, uint32_t index, uint32_t value) {
|
|
|
+ if(self->index_is_short) {
|
|
|
+ uint16_t* indices = self->indices;
|
|
|
+ indices[index] = (uint16_t)value;
|
|
|
+ } else {
|
|
|
+ uint32_t* indices = self->indices;
|
|
|
+ indices[index] = value;
|
|
|
+ }
|
|
|
+}
|
|
|
+
|
|
|
+static bool
|
|
|
+ Dict__probe(Dict* self, py_TValue* key, py_i64* p_hash, uint32_t* p_idx, DictEntry** p_entry) {
|
|
|
+ if(!py_hash(key, p_hash)) return false;
|
|
|
+ py_i64 hash = *p_hash;
|
|
|
+ uint32_t idx = (uint64_t)hash % self->capacity;
|
|
|
+ const uint32_t max_idx = self->capacity - 1;
|
|
|
+ while(true) {
|
|
|
+ uint32_t idx2 = Dict__get_index(self, idx);
|
|
|
+ if(idx2 == self->null_index_value) break;
|
|
|
DictEntry* entry = c11__at(DictEntry, &self->entries, idx2);
|
|
|
if(entry->hash == (uint64_t)hash) {
|
|
|
int res = py_equal(&entry->key, key);
|
|
|
if(res == 1) {
|
|
|
- *out = entry;
|
|
|
+ *p_idx = idx;
|
|
|
+ *p_entry = entry;
|
|
|
return true;
|
|
|
}
|
|
|
if(res == -1) return false; // error
|
|
|
}
|
|
|
+ // try next index
|
|
|
+ idx = idx < max_idx ? idx + 1 : 0;
|
|
|
}
|
|
|
- *out = NULL;
|
|
|
+ // not found
|
|
|
+ *p_idx = idx;
|
|
|
+ *p_entry = NULL;
|
|
|
return true;
|
|
|
}
|
|
|
|
|
|
+static bool Dict__try_get(Dict* self, py_TValue* key, DictEntry** out) {
|
|
|
+ py_i64 hash;
|
|
|
+ uint32_t idx;
|
|
|
+ return Dict__probe(self, key, &hash, &idx, out);
|
|
|
+}
|
|
|
+
|
|
|
static void Dict__clear(Dict* self) {
|
|
|
- memset(self->indices, -1, self->capacity * sizeof(DictIndex));
|
|
|
+ size_t indices_size = self->index_is_short ? self->capacity * sizeof(uint16_t)
|
|
|
+ : self->capacity * sizeof(uint32_t);
|
|
|
+ memset(self->indices, -1, indices_size);
|
|
|
c11_vector__clear(&self->entries);
|
|
|
self->length = 0;
|
|
|
}
|
|
|
|
|
|
static void Dict__rehash_2x(Dict* self) {
|
|
|
Dict old_dict = *self;
|
|
|
- uint32_t new_capacity = self->capacity;
|
|
|
-
|
|
|
-__RETRY:
|
|
|
- // use next capacity
|
|
|
- new_capacity = Dict__next_cap(new_capacity);
|
|
|
+ uint32_t new_capacity = Dict__next_cap(new_capacity);
|
|
|
// create a new dict with new capacity
|
|
|
Dict__ctor(self, new_capacity, old_dict.entries.capacity);
|
|
|
// move entries from old dict to new dict
|
|
|
+ const uint32_t max_idx = new_capacity - 1;
|
|
|
for(int i = 0; i < old_dict.entries.length; i++) {
|
|
|
DictEntry* old_entry = c11__at(DictEntry, &old_dict.entries, i);
|
|
|
if(py_isnil(&old_entry->key)) continue;
|
|
|
- int idx = old_entry->hash % new_capacity;
|
|
|
- bool success = false;
|
|
|
- for(int i = 0; i < PK_DICT_MAX_COLLISION; i++) {
|
|
|
- int idx2 = self->indices[idx]._[i];
|
|
|
- if(idx2 == -1) {
|
|
|
- // insert new entry (empty slot)
|
|
|
+ uint32_t idx = old_entry->hash % new_capacity;
|
|
|
+ while(true) {
|
|
|
+ uint32_t idx2 = Dict__get_index(self, idx);
|
|
|
+ if(idx2 == self->null_index_value) {
|
|
|
c11_vector__push(DictEntry, &self->entries, *old_entry);
|
|
|
- self->indices[idx]._[i] = self->entries.length - 1;
|
|
|
+ Dict__set_index(self, idx, self->entries.length - 1);
|
|
|
self->length++;
|
|
|
- success = true;
|
|
|
break;
|
|
|
}
|
|
|
- }
|
|
|
- if(!success) {
|
|
|
- Dict__dtor(self);
|
|
|
- goto __RETRY;
|
|
|
+ // try next index
|
|
|
+ idx = idx < max_idx ? idx + 1 : 0;
|
|
|
}
|
|
|
}
|
|
|
- // done
|
|
|
Dict__dtor(&old_dict);
|
|
|
}
|
|
|
|
|
|
@@ -153,93 +206,69 @@ static void Dict__compact_entries(Dict* self) {
|
|
|
}
|
|
|
self->entries.length = n;
|
|
|
// update indices
|
|
|
- for(uint32_t i = 0; i < self->capacity; i++) {
|
|
|
- for(int j = 0; j < PK_DICT_MAX_COLLISION; j++) {
|
|
|
- int idx = self->indices[i]._[j];
|
|
|
- if(idx == -1) continue;
|
|
|
- self->indices[i]._[j] = mappings[idx];
|
|
|
- }
|
|
|
+ for(int idx = 0; idx < self->capacity; idx++) {
|
|
|
+ uint32_t idx2 = Dict__get_index(self, idx);
|
|
|
+ if(idx2 == self->null_index_value) continue;
|
|
|
+ Dict__set_index(self, idx, mappings[idx2]);
|
|
|
}
|
|
|
PK_FREE(mappings);
|
|
|
}
|
|
|
|
|
|
static bool Dict__set(Dict* self, py_TValue* key, py_TValue* val) {
|
|
|
py_i64 hash;
|
|
|
- if(!py_hash(key, &hash)) return false;
|
|
|
- int idx = (uint64_t)hash % self->capacity;
|
|
|
- int bad_hash_count = 0;
|
|
|
- for(int i = 0; i < PK_DICT_MAX_COLLISION; i++) {
|
|
|
- int idx2 = self->indices[idx]._[i];
|
|
|
- if(idx2 == -1) {
|
|
|
- // insert new entry
|
|
|
- DictEntry* new_entry = c11_vector__emplace(&self->entries);
|
|
|
- new_entry->hash = (uint64_t)hash;
|
|
|
- new_entry->key = *key;
|
|
|
- new_entry->val = *val;
|
|
|
- self->indices[idx]._[i] = self->entries.length - 1;
|
|
|
- self->length++;
|
|
|
- return true;
|
|
|
- }
|
|
|
+ uint32_t idx;
|
|
|
+ DictEntry* entry;
|
|
|
+ if(!Dict__probe(self, key, &hash, &idx, &entry)) return false;
|
|
|
+ if(entry) {
|
|
|
// update existing entry
|
|
|
- DictEntry* entry = c11__at(DictEntry, &self->entries, idx2);
|
|
|
- // check if they have the same hash
|
|
|
- if(entry->hash == (uint64_t)hash) {
|
|
|
- // check if they are equal
|
|
|
- int res = py_equal(&entry->key, key);
|
|
|
- if(res == 1) {
|
|
|
- entry->val = *val;
|
|
|
- return true;
|
|
|
- }
|
|
|
- if(res == -1) return false; // error
|
|
|
- // res == 0
|
|
|
- bad_hash_count++;
|
|
|
- }
|
|
|
- }
|
|
|
- // no empty slot found
|
|
|
- if(bad_hash_count == PK_DICT_MAX_COLLISION) {
|
|
|
- // all `PK_DICT_MAX_COLLISION` slots have the same hash but different keys
|
|
|
- // we are unable to solve this collision via rehashing
|
|
|
- return RuntimeError("dict: %d/%d/%d: maximum collision reached (hash=%i)",
|
|
|
- self->entries.length,
|
|
|
- self->entries.capacity,
|
|
|
- self->capacity,
|
|
|
- hash);
|
|
|
- }
|
|
|
-
|
|
|
- if(self->capacity >= (uint32_t)self->entries.length * 10) {
|
|
|
- return RuntimeError("dict: %d/%d/%d: minimum load factor reached",
|
|
|
- self->entries.length,
|
|
|
- self->entries.capacity,
|
|
|
- self->capacity);
|
|
|
+ entry->val = *val;
|
|
|
+ return true;
|
|
|
}
|
|
|
- Dict__rehash_2x(self);
|
|
|
- return Dict__set(self, key, val);
|
|
|
+ // insert new entry
|
|
|
+ DictEntry* new_entry = c11_vector__emplace(&self->entries);
|
|
|
+ new_entry->hash = (uint64_t)hash;
|
|
|
+ new_entry->key = *key;
|
|
|
+ new_entry->val = *val;
|
|
|
+ Dict__set_index(self, idx, self->entries.length - 1);
|
|
|
+ self->length++;
|
|
|
+ // check if we need to rehash
|
|
|
+ float load_factor = (float)self->length / self->capacity;
|
|
|
+ if(load_factor > 4 / 7.0f) Dict__rehash_2x(self);
|
|
|
+ return true;
|
|
|
}
|
|
|
|
|
|
/// Delete an entry from the dict.
|
|
|
/// -1: error, 0: not found, 1: found and deleted
|
|
|
static int Dict__pop(Dict* self, py_Ref key) {
|
|
|
py_i64 hash;
|
|
|
- if(!py_hash(key, &hash)) return -1;
|
|
|
- int idx = (uint64_t)hash % self->capacity;
|
|
|
- for(int i = 0; i < PK_DICT_MAX_COLLISION; i++) {
|
|
|
- int idx2 = self->indices[idx]._[i];
|
|
|
- if(idx2 == -1) continue;
|
|
|
- DictEntry* entry = c11__at(DictEntry, &self->entries, idx2);
|
|
|
- if(entry->hash == (uint64_t)hash) {
|
|
|
- int res = py_equal(&entry->key, key);
|
|
|
- if(res == 1) {
|
|
|
- *py_retval() = entry->val;
|
|
|
- py_newnil(&entry->key);
|
|
|
- self->indices[idx]._[i] = -1;
|
|
|
- self->length--;
|
|
|
- if(self->length < self->entries.length / 2) Dict__compact_entries(self);
|
|
|
- return 1;
|
|
|
- }
|
|
|
- if(res == -1) return -1; // error
|
|
|
- }
|
|
|
+ uint32_t idx;
|
|
|
+ DictEntry* entry;
|
|
|
+ if(!Dict__probe(self, key, &hash, &idx, &entry)) return -1;
|
|
|
+ if(!entry) return 0; // not found
|
|
|
+
|
|
|
+ // found the entry, delete and return it
|
|
|
+ py_assign(py_retval(), &entry->val);
|
|
|
+ Dict__set_index(self, idx, self->null_index_value);
|
|
|
+ py_newnil(&entry->key);
|
|
|
+ py_newnil(&entry->val);
|
|
|
+ self->length--;
|
|
|
+ // tidy indices
|
|
|
+ uint32_t pre_z = idx;
|
|
|
+ const uint32_t max_idx = self->capacity - 1;
|
|
|
+ uint32_t z = idx < max_idx ? idx + 1 : 0;
|
|
|
+ while(true) {
|
|
|
+ uint32_t idx2 = Dict__get_index(self, z);
|
|
|
+ if(idx2 == self->null_index_value) break;
|
|
|
+ uint64_t h = c11__at(DictEntry, &self->entries, idx2)->hash;
|
|
|
+ if(h != hash) break;
|
|
|
+ Dict__swap_index(self, pre_z, z);
|
|
|
+ pre_z = z;
|
|
|
+ z = z < max_idx ? z + 1 : 0;
|
|
|
}
|
|
|
- return 0;
|
|
|
+ // compact entries if necessary
|
|
|
+ if(self->entries.length > 16 && self->length < self->entries.length / 2)
|
|
|
+ Dict__compact_entries(self);
|
|
|
+ return 1;
|
|
|
}
|
|
|
|
|
|
static void DictIterator__ctor(DictIterator* self, Dict* dict, int mode) {
|
|
|
@@ -262,13 +291,13 @@ static bool dict__new__(int argc, py_Ref argv) {
|
|
|
py_Type cls = py_totype(argv);
|
|
|
int slots = cls == tp_dict ? 0 : -1;
|
|
|
Dict* ud = py_newobject(py_retval(), cls, slots, sizeof(Dict));
|
|
|
- Dict__ctor(ud, 7, 8);
|
|
|
+ Dict__ctor(ud, 7, 4);
|
|
|
return true;
|
|
|
}
|
|
|
|
|
|
void py_newdict(py_OutRef out) {
|
|
|
Dict* ud = py_newobject(out, tp_dict, 0, sizeof(Dict));
|
|
|
- Dict__ctor(ud, 7, 8);
|
|
|
+ Dict__ctor(ud, 7, 4);
|
|
|
}
|
|
|
|
|
|
static bool dict__init__(int argc, py_Ref argv) {
|